This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
It's `VMS::Filespec', not `Filespec'
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
3e5c0189 717 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
719a9bb0
NC
718 /* avoid tie/overload weirdness */
719 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 720 if (s[len-1] != ';')
bdc0bf6f 721 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
722 } else {
723 SvTEMP_off(line);
724 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 725 parser->linestr = line;
8990e307 726 }
f06b5848
DM
727 parser->oldoldbufptr =
728 parser->oldbufptr =
729 parser->bufptr =
730 parser->linestart = SvPVX(parser->linestr);
731 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
732 parser->last_lop = parser->last_uni = NULL;
79072805 733}
a687059c 734
e3abe207
DM
735
736/* delete a parser object */
737
738void
739Perl_parser_free(pTHX_ const yy_parser *parser)
740{
7918f24d
NC
741 PERL_ARGS_ASSERT_PARSER_FREE;
742
7c4baf47 743 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
744 SvREFCNT_dec(parser->linestr);
745
2f9285f8
DM
746 if (parser->rsfp == PerlIO_stdin())
747 PerlIO_clearerr(parser->rsfp);
799361c3
SH
748 else if (parser->rsfp && (!parser->old_parser ||
749 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 750 PerlIO_close(parser->rsfp);
5486870f 751 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 752
e3abe207
DM
753 Safefree(parser->stack);
754 Safefree(parser->lex_brackstack);
755 Safefree(parser->lex_casestack);
756 PL_parser = parser->old_parser;
757 Safefree(parser);
758}
759
760
ffb4593c
NT
761/*
762 * Perl_lex_end
9cbb5ea2
GS
763 * Finalizer for lexing operations. Must be called when the parser is
764 * done with the lexer.
ffb4593c
NT
765 */
766
463ee0b2 767void
864dbfa3 768Perl_lex_end(pTHX)
463ee0b2 769{
97aff369 770 dVAR;
3280af22 771 PL_doextract = FALSE;
463ee0b2
LW
772}
773
ffb4593c 774/*
f0e67a1d
Z
775=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
776
777Buffer scalar containing the chunk currently under consideration of the
778text currently being lexed. This is always a plain string scalar (for
779which C<SvPOK> is true). It is not intended to be used as a scalar by
780normal scalar means; instead refer to the buffer directly by the pointer
781variables described below.
782
783The lexer maintains various C<char*> pointers to things in the
784C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
785reallocated, all of these pointers must be updated. Don't attempt to
786do this manually, but rather use L</lex_grow_linestr> if you need to
787reallocate the buffer.
788
789The content of the text chunk in the buffer is commonly exactly one
790complete line of input, up to and including a newline terminator,
791but there are situations where it is otherwise. The octets of the
792buffer may be intended to be interpreted as either UTF-8 or Latin-1.
793The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
794flag on this scalar, which may disagree with it.
795
796For direct examination of the buffer, the variable
797L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
798lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
799of these pointers is usually preferable to examination of the scalar
800through normal scalar means.
801
802=for apidoc AmxU|char *|PL_parser-E<gt>bufend
803
804Direct pointer to the end of the chunk of text currently being lexed, the
805end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
806+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
807always located at the end of the buffer, and does not count as part of
808the buffer's contents.
809
810=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
811
812Points to the current position of lexing inside the lexer buffer.
813Characters around this point may be freely examined, within
814the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
815L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
816interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
817
818Lexing code (whether in the Perl core or not) moves this pointer past
819the characters that it consumes. It is also expected to perform some
820bookkeeping whenever a newline character is consumed. This movement
821can be more conveniently performed by the function L</lex_read_to>,
822which handles newlines appropriately.
823
824Interpretation of the buffer's octets can be abstracted out by
825using the slightly higher-level functions L</lex_peek_unichar> and
826L</lex_read_unichar>.
827
828=for apidoc AmxU|char *|PL_parser-E<gt>linestart
829
830Points to the start of the current line inside the lexer buffer.
831This is useful for indicating at which column an error occurred, and
832not much else. This must be updated by any lexing code that consumes
833a newline; the function L</lex_read_to> handles this detail.
834
835=cut
836*/
837
838/*
839=for apidoc Amx|bool|lex_bufutf8
840
841Indicates whether the octets in the lexer buffer
842(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
843of Unicode characters. If not, they should be interpreted as Latin-1
844characters. This is analogous to the C<SvUTF8> flag for scalars.
845
846In UTF-8 mode, it is not guaranteed that the lexer buffer actually
847contains valid UTF-8. Lexing code must be robust in the face of invalid
848encoding.
849
850The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
851is significant, but not the whole story regarding the input character
852encoding. Normally, when a file is being read, the scalar contains octets
853and its C<SvUTF8> flag is off, but the octets should be interpreted as
854UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
855however, the scalar may have the C<SvUTF8> flag on, and in this case its
856octets should be interpreted as UTF-8 unless the C<use bytes> pragma
857is in effect. This logic may change in the future; use this function
858instead of implementing the logic yourself.
859
860=cut
861*/
862
863bool
864Perl_lex_bufutf8(pTHX)
865{
866 return UTF;
867}
868
869/*
870=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
871
872Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
873at least I<len> octets (including terminating NUL). Returns a
874pointer to the reallocated buffer. This is necessary before making
875any direct modification of the buffer that would increase its length.
876L</lex_stuff_pvn> provides a more convenient way to insert text into
877the buffer.
878
879Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
880this function updates all of the lexer's variables that point directly
881into the buffer.
882
883=cut
884*/
885
886char *
887Perl_lex_grow_linestr(pTHX_ STRLEN len)
888{
889 SV *linestr;
890 char *buf;
891 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
892 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
893 linestr = PL_parser->linestr;
894 buf = SvPVX(linestr);
895 if (len <= SvLEN(linestr))
896 return buf;
897 bufend_pos = PL_parser->bufend - buf;
898 bufptr_pos = PL_parser->bufptr - buf;
899 oldbufptr_pos = PL_parser->oldbufptr - buf;
900 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
901 linestart_pos = PL_parser->linestart - buf;
902 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
903 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
904 buf = sv_grow(linestr, len);
905 PL_parser->bufend = buf + bufend_pos;
906 PL_parser->bufptr = buf + bufptr_pos;
907 PL_parser->oldbufptr = buf + oldbufptr_pos;
908 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
909 PL_parser->linestart = buf + linestart_pos;
910 if (PL_parser->last_uni)
911 PL_parser->last_uni = buf + last_uni_pos;
912 if (PL_parser->last_lop)
913 PL_parser->last_lop = buf + last_lop_pos;
914 return buf;
915}
916
917/*
83aa740e 918=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
919
920Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
921immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
922reallocating the buffer if necessary. This means that lexing code that
923runs later will see the characters as if they had appeared in the input.
924It is not recommended to do this as part of normal parsing, and most
925uses of this facility run the risk of the inserted characters being
926interpreted in an unintended manner.
927
928The string to be inserted is represented by I<len> octets starting
929at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
930according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
931The characters are recoded for the lexer buffer, according to how the
932buffer is currently being interpreted (L</lex_bufutf8>). If a string
933to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
934function is more convenient.
935
936=cut
937*/
938
939void
83aa740e 940Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 941{
749123ff 942 dVAR;
f0e67a1d
Z
943 char *bufptr;
944 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
945 if (flags & ~(LEX_STUFF_UTF8))
946 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
947 if (UTF) {
948 if (flags & LEX_STUFF_UTF8) {
949 goto plain_copy;
950 } else {
951 STRLEN highhalf = 0;
83aa740e 952 const char *p, *e = pv+len;
f0e67a1d
Z
953 for (p = pv; p != e; p++)
954 highhalf += !!(((U8)*p) & 0x80);
955 if (!highhalf)
956 goto plain_copy;
957 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
958 bufptr = PL_parser->bufptr;
959 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
960 SvCUR_set(PL_parser->linestr,
961 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
962 PL_parser->bufend += len+highhalf;
963 for (p = pv; p != e; p++) {
964 U8 c = (U8)*p;
965 if (c & 0x80) {
966 *bufptr++ = (char)(0xc0 | (c >> 6));
967 *bufptr++ = (char)(0x80 | (c & 0x3f));
968 } else {
969 *bufptr++ = (char)c;
970 }
971 }
972 }
973 } else {
974 if (flags & LEX_STUFF_UTF8) {
975 STRLEN highhalf = 0;
83aa740e 976 const char *p, *e = pv+len;
f0e67a1d
Z
977 for (p = pv; p != e; p++) {
978 U8 c = (U8)*p;
979 if (c >= 0xc4) {
980 Perl_croak(aTHX_ "Lexing code attempted to stuff "
981 "non-Latin-1 character into Latin-1 input");
982 } else if (c >= 0xc2 && p+1 != e &&
983 (((U8)p[1]) & 0xc0) == 0x80) {
984 p++;
985 highhalf++;
986 } else if (c >= 0x80) {
987 /* malformed UTF-8 */
988 ENTER;
989 SAVESPTR(PL_warnhook);
990 PL_warnhook = PERL_WARNHOOK_FATAL;
991 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
992 LEAVE;
993 }
994 }
995 if (!highhalf)
996 goto plain_copy;
997 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
998 bufptr = PL_parser->bufptr;
999 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1000 SvCUR_set(PL_parser->linestr,
1001 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1002 PL_parser->bufend += len-highhalf;
1003 for (p = pv; p != e; p++) {
1004 U8 c = (U8)*p;
1005 if (c & 0x80) {
1006 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1007 p++;
1008 } else {
1009 *bufptr++ = (char)c;
1010 }
1011 }
1012 } else {
1013 plain_copy:
1014 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1015 bufptr = PL_parser->bufptr;
1016 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1017 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1018 PL_parser->bufend += len;
1019 Copy(pv, bufptr, len, char);
1020 }
1021 }
1022}
1023
1024/*
1025=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1026
1027Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1028immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1029reallocating the buffer if necessary. This means that lexing code that
1030runs later will see the characters as if they had appeared in the input.
1031It is not recommended to do this as part of normal parsing, and most
1032uses of this facility run the risk of the inserted characters being
1033interpreted in an unintended manner.
1034
1035The string to be inserted is the string value of I<sv>. The characters
1036are recoded for the lexer buffer, according to how the buffer is currently
1037being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1038not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1039need to construct a scalar.
1040
1041=cut
1042*/
1043
1044void
1045Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1046{
1047 char *pv;
1048 STRLEN len;
1049 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1050 if (flags)
1051 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1052 pv = SvPV(sv, len);
1053 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1054}
1055
1056/*
1057=for apidoc Amx|void|lex_unstuff|char *ptr
1058
1059Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1060I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1061This hides the discarded text from any lexing code that runs later,
1062as if the text had never appeared.
1063
1064This is not the normal way to consume lexed text. For that, use
1065L</lex_read_to>.
1066
1067=cut
1068*/
1069
1070void
1071Perl_lex_unstuff(pTHX_ char *ptr)
1072{
1073 char *buf, *bufend;
1074 STRLEN unstuff_len;
1075 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1076 buf = PL_parser->bufptr;
1077 if (ptr < buf)
1078 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1079 if (ptr == buf)
1080 return;
1081 bufend = PL_parser->bufend;
1082 if (ptr > bufend)
1083 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1084 unstuff_len = ptr - buf;
1085 Move(ptr, buf, bufend+1-ptr, char);
1086 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1087 PL_parser->bufend = bufend - unstuff_len;
1088}
1089
1090/*
1091=for apidoc Amx|void|lex_read_to|char *ptr
1092
1093Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1094to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1095performing the correct bookkeeping whenever a newline character is passed.
1096This is the normal way to consume lexed text.
1097
1098Interpretation of the buffer's octets can be abstracted out by
1099using the slightly higher-level functions L</lex_peek_unichar> and
1100L</lex_read_unichar>.
1101
1102=cut
1103*/
1104
1105void
1106Perl_lex_read_to(pTHX_ char *ptr)
1107{
1108 char *s;
1109 PERL_ARGS_ASSERT_LEX_READ_TO;
1110 s = PL_parser->bufptr;
1111 if (ptr < s || ptr > PL_parser->bufend)
1112 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1113 for (; s != ptr; s++)
1114 if (*s == '\n') {
1115 CopLINE_inc(PL_curcop);
1116 PL_parser->linestart = s+1;
1117 }
1118 PL_parser->bufptr = ptr;
1119}
1120
1121/*
1122=for apidoc Amx|void|lex_discard_to|char *ptr
1123
1124Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1125up to I<ptr>. The remaining content of the buffer will be moved, and
1126all pointers into the buffer updated appropriately. I<ptr> must not
1127be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1128it is not permitted to discard text that has yet to be lexed.
1129
1130Normally it is not necessarily to do this directly, because it suffices to
1131use the implicit discarding behaviour of L</lex_next_chunk> and things
1132based on it. However, if a token stretches across multiple lines,
1f317c95 1133and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1134that purpose, then after completion of the token it would be wise to
1135explicitly discard the now-unneeded earlier lines, to avoid future
1136multi-line tokens growing the buffer without bound.
1137
1138=cut
1139*/
1140
1141void
1142Perl_lex_discard_to(pTHX_ char *ptr)
1143{
1144 char *buf;
1145 STRLEN discard_len;
1146 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1147 buf = SvPVX(PL_parser->linestr);
1148 if (ptr < buf)
1149 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1150 if (ptr == buf)
1151 return;
1152 if (ptr > PL_parser->bufptr)
1153 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1154 discard_len = ptr - buf;
1155 if (PL_parser->oldbufptr < ptr)
1156 PL_parser->oldbufptr = ptr;
1157 if (PL_parser->oldoldbufptr < ptr)
1158 PL_parser->oldoldbufptr = ptr;
1159 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1160 PL_parser->last_uni = NULL;
1161 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1162 PL_parser->last_lop = NULL;
1163 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1164 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1165 PL_parser->bufend -= discard_len;
1166 PL_parser->bufptr -= discard_len;
1167 PL_parser->oldbufptr -= discard_len;
1168 PL_parser->oldoldbufptr -= discard_len;
1169 if (PL_parser->last_uni)
1170 PL_parser->last_uni -= discard_len;
1171 if (PL_parser->last_lop)
1172 PL_parser->last_lop -= discard_len;
1173}
1174
1175/*
1176=for apidoc Amx|bool|lex_next_chunk|U32 flags
1177
1178Reads in the next chunk of text to be lexed, appending it to
1179L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1180looked to the end of the current chunk and wants to know more. It is
1181usual, but not necessary, for lexing to have consumed the entirety of
1182the current chunk at this time.
1183
1184If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1185chunk (i.e., the current chunk has been entirely consumed), normally the
1186current chunk will be discarded at the same time that the new chunk is
1187read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1188will not be discarded. If the current chunk has not been entirely
1189consumed, then it will not be discarded regardless of the flag.
1190
1191Returns true if some new text was added to the buffer, or false if the
1192buffer has reached the end of the input text.
1193
1194=cut
1195*/
1196
1197#define LEX_FAKE_EOF 0x80000000
1198
1199bool
1200Perl_lex_next_chunk(pTHX_ U32 flags)
1201{
1202 SV *linestr;
1203 char *buf;
1204 STRLEN old_bufend_pos, new_bufend_pos;
1205 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1206 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1207 bool got_some_for_debugger = 0;
f0e67a1d
Z
1208 bool got_some;
1209 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1211 linestr = PL_parser->linestr;
1212 buf = SvPVX(linestr);
1213 if (!(flags & LEX_KEEP_PREVIOUS) &&
1214 PL_parser->bufptr == PL_parser->bufend) {
1215 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1216 linestart_pos = 0;
1217 if (PL_parser->last_uni != PL_parser->bufend)
1218 PL_parser->last_uni = NULL;
1219 if (PL_parser->last_lop != PL_parser->bufend)
1220 PL_parser->last_lop = NULL;
1221 last_uni_pos = last_lop_pos = 0;
1222 *buf = 0;
1223 SvCUR(linestr) = 0;
1224 } else {
1225 old_bufend_pos = PL_parser->bufend - buf;
1226 bufptr_pos = PL_parser->bufptr - buf;
1227 oldbufptr_pos = PL_parser->oldbufptr - buf;
1228 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1229 linestart_pos = PL_parser->linestart - buf;
1230 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1231 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1232 }
1233 if (flags & LEX_FAKE_EOF) {
1234 goto eof;
1235 } else if (!PL_parser->rsfp) {
1236 got_some = 0;
1237 } else if (filter_gets(linestr, old_bufend_pos)) {
1238 got_some = 1;
17cc9359 1239 got_some_for_debugger = 1;
f0e67a1d 1240 } else {
580561a3
Z
1241 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1242 sv_setpvs(linestr, "");
f0e67a1d
Z
1243 eof:
1244 /* End of real input. Close filehandle (unless it was STDIN),
1245 * then add implicit termination.
1246 */
1247 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1248 PerlIO_clearerr(PL_parser->rsfp);
1249 else if (PL_parser->rsfp)
1250 (void)PerlIO_close(PL_parser->rsfp);
1251 PL_parser->rsfp = NULL;
1252 PL_doextract = FALSE;
1253#ifdef PERL_MAD
1254 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1255 PL_faketokens = 1;
1256#endif
1257 if (!PL_in_eval && PL_minus_p) {
1258 sv_catpvs(linestr,
1259 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1260 PL_minus_n = PL_minus_p = 0;
1261 } else if (!PL_in_eval && PL_minus_n) {
1262 sv_catpvs(linestr, /*{*/";}");
1263 PL_minus_n = 0;
1264 } else
1265 sv_catpvs(linestr, ";");
1266 got_some = 1;
1267 }
1268 buf = SvPVX(linestr);
1269 new_bufend_pos = SvCUR(linestr);
1270 PL_parser->bufend = buf + new_bufend_pos;
1271 PL_parser->bufptr = buf + bufptr_pos;
1272 PL_parser->oldbufptr = buf + oldbufptr_pos;
1273 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1274 PL_parser->linestart = buf + linestart_pos;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni = buf + last_uni_pos;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1279 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1280 PL_curstash != PL_debstash) {
1281 /* debugger active and we're not compiling the debugger code,
1282 * so store the line into the debugger's array of lines
1283 */
1284 update_debugger_info(NULL, buf+old_bufend_pos,
1285 new_bufend_pos-old_bufend_pos);
1286 }
1287 return got_some;
1288}
1289
1290/*
1291=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1292
1293Looks ahead one (Unicode) character in the text currently being lexed.
1294Returns the codepoint (unsigned integer value) of the next character,
1295or -1 if lexing has reached the end of the input text. To consume the
1296peeked character, use L</lex_read_unichar>.
1297
1298If the next character is in (or extends into) the next chunk of input
1299text, the next chunk will be read in. Normally the current chunk will be
1300discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1301then the current chunk will not be discarded.
1302
1303If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1304is encountered, an exception is generated.
1305
1306=cut
1307*/
1308
1309I32
1310Perl_lex_peek_unichar(pTHX_ U32 flags)
1311{
749123ff 1312 dVAR;
f0e67a1d
Z
1313 char *s, *bufend;
1314 if (flags & ~(LEX_KEEP_PREVIOUS))
1315 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1316 s = PL_parser->bufptr;
1317 bufend = PL_parser->bufend;
1318 if (UTF) {
1319 U8 head;
1320 I32 unichar;
1321 STRLEN len, retlen;
1322 if (s == bufend) {
1323 if (!lex_next_chunk(flags))
1324 return -1;
1325 s = PL_parser->bufptr;
1326 bufend = PL_parser->bufend;
1327 }
1328 head = (U8)*s;
1329 if (!(head & 0x80))
1330 return head;
1331 if (head & 0x40) {
1332 len = PL_utf8skip[head];
1333 while ((STRLEN)(bufend-s) < len) {
1334 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1335 break;
1336 s = PL_parser->bufptr;
1337 bufend = PL_parser->bufend;
1338 }
1339 }
1340 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1341 if (retlen == (STRLEN)-1) {
1342 /* malformed UTF-8 */
1343 ENTER;
1344 SAVESPTR(PL_warnhook);
1345 PL_warnhook = PERL_WARNHOOK_FATAL;
1346 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1347 LEAVE;
1348 }
1349 return unichar;
1350 } else {
1351 if (s == bufend) {
1352 if (!lex_next_chunk(flags))
1353 return -1;
1354 s = PL_parser->bufptr;
1355 }
1356 return (U8)*s;
1357 }
1358}
1359
1360/*
1361=for apidoc Amx|I32|lex_read_unichar|U32 flags
1362
1363Reads the next (Unicode) character in the text currently being lexed.
1364Returns the codepoint (unsigned integer value) of the character read,
1365and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1366if lexing has reached the end of the input text. To non-destructively
1367examine the next character, use L</lex_peek_unichar> instead.
1368
1369If the next character is in (or extends into) the next chunk of input
1370text, the next chunk will be read in. Normally the current chunk will be
1371discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1372then the current chunk will not be discarded.
1373
1374If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1375is encountered, an exception is generated.
1376
1377=cut
1378*/
1379
1380I32
1381Perl_lex_read_unichar(pTHX_ U32 flags)
1382{
1383 I32 c;
1384 if (flags & ~(LEX_KEEP_PREVIOUS))
1385 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1386 c = lex_peek_unichar(flags);
1387 if (c != -1) {
1388 if (c == '\n')
1389 CopLINE_inc(PL_curcop);
1390 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1391 }
1392 return c;
1393}
1394
1395/*
1396=for apidoc Amx|void|lex_read_space|U32 flags
1397
1398Reads optional spaces, in Perl style, in the text currently being
1399lexed. The spaces may include ordinary whitespace characters and
1400Perl-style comments. C<#line> directives are processed if encountered.
1401L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1402at a non-space character (or the end of the input text).
1403
1404If spaces extend into the next chunk of input text, the next chunk will
1405be read in. Normally the current chunk will be discarded at the same
1406time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1407chunk will not be discarded.
1408
1409=cut
1410*/
1411
f0998909
Z
1412#define LEX_NO_NEXT_CHUNK 0x80000000
1413
f0e67a1d
Z
1414void
1415Perl_lex_read_space(pTHX_ U32 flags)
1416{
1417 char *s, *bufend;
1418 bool need_incline = 0;
f0998909 1419 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1420 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1421#ifdef PERL_MAD
1422 if (PL_skipwhite) {
1423 sv_free(PL_skipwhite);
1424 PL_skipwhite = NULL;
1425 }
1426 if (PL_madskills)
1427 PL_skipwhite = newSVpvs("");
1428#endif /* PERL_MAD */
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1431 while (1) {
1432 char c = *s;
1433 if (c == '#') {
1434 do {
1435 c = *++s;
1436 } while (!(c == '\n' || (c == 0 && s == bufend)));
1437 } else if (c == '\n') {
1438 s++;
1439 PL_parser->linestart = s;
1440 if (s == bufend)
1441 need_incline = 1;
1442 else
1443 incline(s);
1444 } else if (isSPACE(c)) {
1445 s++;
1446 } else if (c == 0 && s == bufend) {
1447 bool got_more;
1448#ifdef PERL_MAD
1449 if (PL_madskills)
1450 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1451#endif /* PERL_MAD */
f0998909
Z
1452 if (flags & LEX_NO_NEXT_CHUNK)
1453 break;
f0e67a1d
Z
1454 PL_parser->bufptr = s;
1455 CopLINE_inc(PL_curcop);
1456 got_more = lex_next_chunk(flags);
1457 CopLINE_dec(PL_curcop);
1458 s = PL_parser->bufptr;
1459 bufend = PL_parser->bufend;
1460 if (!got_more)
1461 break;
1462 if (need_incline && PL_parser->rsfp) {
1463 incline(s);
1464 need_incline = 0;
1465 }
1466 } else {
1467 break;
1468 }
1469 }
1470#ifdef PERL_MAD
1471 if (PL_madskills)
1472 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473#endif /* PERL_MAD */
1474 PL_parser->bufptr = s;
1475}
1476
1477/*
ffb4593c
NT
1478 * S_incline
1479 * This subroutine has nothing to do with tilting, whether at windmills
1480 * or pinball tables. Its name is short for "increment line". It
57843af0 1481 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1482 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1483 * # line 500 "foo.pm"
1484 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1485 */
1486
76e3520e 1487STATIC void
d9095cec 1488S_incline(pTHX_ const char *s)
463ee0b2 1489{
97aff369 1490 dVAR;
d9095cec
NC
1491 const char *t;
1492 const char *n;
1493 const char *e;
463ee0b2 1494
7918f24d
NC
1495 PERL_ARGS_ASSERT_INCLINE;
1496
57843af0 1497 CopLINE_inc(PL_curcop);
463ee0b2
LW
1498 if (*s++ != '#')
1499 return;
d4c19fe8
AL
1500 while (SPACE_OR_TAB(*s))
1501 s++;
73659bf1
GS
1502 if (strnEQ(s, "line", 4))
1503 s += 4;
1504 else
1505 return;
084592ab 1506 if (SPACE_OR_TAB(*s))
73659bf1 1507 s++;
4e553d73 1508 else
73659bf1 1509 return;
d4c19fe8
AL
1510 while (SPACE_OR_TAB(*s))
1511 s++;
463ee0b2
LW
1512 if (!isDIGIT(*s))
1513 return;
d4c19fe8 1514
463ee0b2
LW
1515 n = s;
1516 while (isDIGIT(*s))
1517 s++;
07714eb4 1518 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1519 return;
bf4acbe4 1520 while (SPACE_OR_TAB(*s))
463ee0b2 1521 s++;
73659bf1 1522 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1523 s++;
73659bf1
GS
1524 e = t + 1;
1525 }
463ee0b2 1526 else {
c35e046a
AL
1527 t = s;
1528 while (!isSPACE(*t))
1529 t++;
73659bf1 1530 e = t;
463ee0b2 1531 }
bf4acbe4 1532 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1533 e++;
1534 if (*e != '\n' && *e != '\0')
1535 return; /* false alarm */
1536
f4dd75d9 1537 if (t - s > 0) {
d9095cec 1538 const STRLEN len = t - s;
8a5ee598 1539#ifndef USE_ITHREADS
19bad673
NC
1540 SV *const temp_sv = CopFILESV(PL_curcop);
1541 const char *cf;
1542 STRLEN tmplen;
1543
1544 if (temp_sv) {
1545 cf = SvPVX(temp_sv);
1546 tmplen = SvCUR(temp_sv);
1547 } else {
1548 cf = NULL;
1549 tmplen = 0;
1550 }
1551
42d9b98d 1552 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1553 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1554 * to *{"::_<newfilename"} */
44867030
NC
1555 /* However, the long form of evals is only turned on by the
1556 debugger - usually they're "(eval %lu)" */
1557 char smallbuf[128];
1558 char *tmpbuf;
1559 GV **gvp;
d9095cec 1560 STRLEN tmplen2 = len;
798b63bc 1561 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1562 tmpbuf = smallbuf;
1563 else
2ae0db35 1564 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1565 tmpbuf[0] = '_';
1566 tmpbuf[1] = '<';
2ae0db35 1567 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1568 tmplen += 2;
8a5ee598
RGS
1569 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1570 if (gvp) {
44867030
NC
1571 char *tmpbuf2;
1572 GV *gv2;
1573
1574 if (tmplen2 + 2 <= sizeof smallbuf)
1575 tmpbuf2 = smallbuf;
1576 else
1577 Newx(tmpbuf2, tmplen2 + 2, char);
1578
1579 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1580 /* Either they malloc'd it, or we malloc'd it,
1581 so no prefix is present in ours. */
1582 tmpbuf2[0] = '_';
1583 tmpbuf2[1] = '<';
1584 }
1585
1586 memcpy(tmpbuf2 + 2, s, tmplen2);
1587 tmplen2 += 2;
1588
8a5ee598 1589 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1590 if (!isGV(gv2)) {
8a5ee598 1591 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1592 /* adjust ${"::_<newfilename"} to store the new file name */
1593 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1594 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1595 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1596 }
44867030
NC
1597
1598 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1599 }
e66cf94c 1600 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1601 }
8a5ee598 1602#endif
05ec9bb3 1603 CopFILE_free(PL_curcop);
d9095cec 1604 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1605 }
57843af0 1606 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1607}
1608
29595ff2 1609#ifdef PERL_MAD
cd81e915 1610/* skip space before PL_thistoken */
29595ff2
NC
1611
1612STATIC char *
1613S_skipspace0(pTHX_ register char *s)
1614{
7918f24d
NC
1615 PERL_ARGS_ASSERT_SKIPSPACE0;
1616
29595ff2
NC
1617 s = skipspace(s);
1618 if (!PL_madskills)
1619 return s;
cd81e915
NC
1620 if (PL_skipwhite) {
1621 if (!PL_thiswhite)
6b29d1f5 1622 PL_thiswhite = newSVpvs("");
cd81e915
NC
1623 sv_catsv(PL_thiswhite, PL_skipwhite);
1624 sv_free(PL_skipwhite);
1625 PL_skipwhite = 0;
1626 }
1627 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1628 return s;
1629}
1630
cd81e915 1631/* skip space after PL_thistoken */
29595ff2
NC
1632
1633STATIC char *
1634S_skipspace1(pTHX_ register char *s)
1635{
d4c19fe8 1636 const char *start = s;
29595ff2
NC
1637 I32 startoff = start - SvPVX(PL_linestr);
1638
7918f24d
NC
1639 PERL_ARGS_ASSERT_SKIPSPACE1;
1640
29595ff2
NC
1641 s = skipspace(s);
1642 if (!PL_madskills)
1643 return s;
1644 start = SvPVX(PL_linestr) + startoff;
cd81e915 1645 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1646 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1647 PL_thistoken = newSVpvn(tstart, start - tstart);
1648 }
1649 PL_realtokenstart = -1;
1650 if (PL_skipwhite) {
1651 if (!PL_nextwhite)
6b29d1f5 1652 PL_nextwhite = newSVpvs("");
cd81e915
NC
1653 sv_catsv(PL_nextwhite, PL_skipwhite);
1654 sv_free(PL_skipwhite);
1655 PL_skipwhite = 0;
29595ff2
NC
1656 }
1657 return s;
1658}
1659
1660STATIC char *
1661S_skipspace2(pTHX_ register char *s, SV **svp)
1662{
c35e046a
AL
1663 char *start;
1664 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1665 const I32 startoff = s - SvPVX(PL_linestr);
1666
7918f24d
NC
1667 PERL_ARGS_ASSERT_SKIPSPACE2;
1668
29595ff2
NC
1669 s = skipspace(s);
1670 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1671 if (!PL_madskills || !svp)
1672 return s;
1673 start = SvPVX(PL_linestr) + startoff;
cd81e915 1674 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1675 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1676 PL_thistoken = newSVpvn(tstart, start - tstart);
1677 PL_realtokenstart = -1;
29595ff2 1678 }
cd81e915 1679 if (PL_skipwhite) {
29595ff2 1680 if (!*svp)
6b29d1f5 1681 *svp = newSVpvs("");
cd81e915
NC
1682 sv_setsv(*svp, PL_skipwhite);
1683 sv_free(PL_skipwhite);
1684 PL_skipwhite = 0;
29595ff2
NC
1685 }
1686
1687 return s;
1688}
1689#endif
1690
80a702cd 1691STATIC void
15f169a1 1692S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1693{
1694 AV *av = CopFILEAVx(PL_curcop);
1695 if (av) {
b9f83d2f 1696 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1697 if (orig_sv)
1698 sv_setsv(sv, orig_sv);
1699 else
1700 sv_setpvn(sv, buf, len);
80a702cd
RGS
1701 (void)SvIOK_on(sv);
1702 SvIV_set(sv, 0);
1703 av_store(av, (I32)CopLINE(PL_curcop), sv);
1704 }
1705}
1706
ffb4593c
NT
1707/*
1708 * S_skipspace
1709 * Called to gobble the appropriate amount and type of whitespace.
1710 * Skips comments as well.
1711 */
1712
76e3520e 1713STATIC char *
cea2e8a9 1714S_skipspace(pTHX_ register char *s)
a687059c 1715{
5db06880 1716#ifdef PERL_MAD
f0e67a1d
Z
1717 char *start = s;
1718#endif /* PERL_MAD */
7918f24d 1719 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1720#ifdef PERL_MAD
cd81e915
NC
1721 if (PL_skipwhite) {
1722 sv_free(PL_skipwhite);
f0e67a1d 1723 PL_skipwhite = NULL;
5db06880 1724 }
f0e67a1d 1725#endif /* PERL_MAD */
3280af22 1726 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1727 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1728 s++;
f0e67a1d
Z
1729 } else {
1730 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1731 PL_bufptr = s;
f0998909
Z
1732 lex_read_space(LEX_KEEP_PREVIOUS |
1733 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1734 LEX_NO_NEXT_CHUNK : 0));
3280af22 1735 s = PL_bufptr;
f0e67a1d
Z
1736 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1737 if (PL_linestart > PL_bufptr)
1738 PL_bufptr = PL_linestart;
1739 return s;
463ee0b2 1740 }
5db06880 1741#ifdef PERL_MAD
f0e67a1d
Z
1742 if (PL_madskills)
1743 PL_skipwhite = newSVpvn(start, s-start);
1744#endif /* PERL_MAD */
5db06880 1745 return s;
a687059c 1746}
378cc40b 1747
ffb4593c
NT
1748/*
1749 * S_check_uni
1750 * Check the unary operators to ensure there's no ambiguity in how they're
1751 * used. An ambiguous piece of code would be:
1752 * rand + 5
1753 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1754 * the +5 is its argument.
1755 */
1756
76e3520e 1757STATIC void
cea2e8a9 1758S_check_uni(pTHX)
ba106d47 1759{
97aff369 1760 dVAR;
d4c19fe8
AL
1761 const char *s;
1762 const char *t;
2f3197b3 1763
3280af22 1764 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1765 return;
3280af22
NIS
1766 while (isSPACE(*PL_last_uni))
1767 PL_last_uni++;
c35e046a
AL
1768 s = PL_last_uni;
1769 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1770 s++;
3280af22 1771 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1772 return;
6136c704 1773
9b387841
NC
1774 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1775 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1776 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1777}
1778
ffb4593c
NT
1779/*
1780 * LOP : macro to build a list operator. Its behaviour has been replaced
1781 * with a subroutine, S_lop() for which LOP is just another name.
1782 */
1783
a0d0e21e
LW
1784#define LOP(f,x) return lop(f,x,s)
1785
ffb4593c
NT
1786/*
1787 * S_lop
1788 * Build a list operator (or something that might be one). The rules:
1789 * - if we have a next token, then it's a list operator [why?]
1790 * - if the next thing is an opening paren, then it's a function
1791 * - else it's a list operator
1792 */
1793
76e3520e 1794STATIC I32
a0be28da 1795S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1796{
97aff369 1797 dVAR;
7918f24d
NC
1798
1799 PERL_ARGS_ASSERT_LOP;
1800
6154021b 1801 pl_yylval.ival = f;
35c8bce7 1802 CLINE;
3280af22
NIS
1803 PL_expect = x;
1804 PL_bufptr = s;
1805 PL_last_lop = PL_oldbufptr;
eb160463 1806 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1807#ifdef PERL_MAD
1808 if (PL_lasttoke)
1809 return REPORT(LSTOP);
1810#else
3280af22 1811 if (PL_nexttoke)
bbf60fe6 1812 return REPORT(LSTOP);
5db06880 1813#endif
79072805 1814 if (*s == '(')
bbf60fe6 1815 return REPORT(FUNC);
29595ff2 1816 s = PEEKSPACE(s);
79072805 1817 if (*s == '(')
bbf60fe6 1818 return REPORT(FUNC);
79072805 1819 else
bbf60fe6 1820 return REPORT(LSTOP);
79072805
LW
1821}
1822
5db06880
NC
1823#ifdef PERL_MAD
1824 /*
1825 * S_start_force
1826 * Sets up for an eventual force_next(). start_force(0) basically does
1827 * an unshift, while start_force(-1) does a push. yylex removes items
1828 * on the "pop" end.
1829 */
1830
1831STATIC void
1832S_start_force(pTHX_ int where)
1833{
1834 int i;
1835
cd81e915 1836 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1837 where = PL_lasttoke;
cd81e915
NC
1838 assert(PL_curforce < 0 || PL_curforce == where);
1839 if (PL_curforce != where) {
5db06880
NC
1840 for (i = PL_lasttoke; i > where; --i) {
1841 PL_nexttoke[i] = PL_nexttoke[i-1];
1842 }
1843 PL_lasttoke++;
1844 }
cd81e915 1845 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1846 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1847 PL_curforce = where;
1848 if (PL_nextwhite) {
5db06880 1849 if (PL_madskills)
6b29d1f5 1850 curmad('^', newSVpvs(""));
cd81e915 1851 CURMAD('_', PL_nextwhite);
5db06880
NC
1852 }
1853}
1854
1855STATIC void
1856S_curmad(pTHX_ char slot, SV *sv)
1857{
1858 MADPROP **where;
1859
1860 if (!sv)
1861 return;
cd81e915
NC
1862 if (PL_curforce < 0)
1863 where = &PL_thismad;
5db06880 1864 else
cd81e915 1865 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1866
cd81e915 1867 if (PL_faketokens)
76f68e9b 1868 sv_setpvs(sv, "");
5db06880
NC
1869 else {
1870 if (!IN_BYTES) {
1871 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1872 SvUTF8_on(sv);
1873 else if (PL_encoding) {
1874 sv_recode_to_utf8(sv, PL_encoding);
1875 }
1876 }
1877 }
1878
1879 /* keep a slot open for the head of the list? */
1880 if (slot != '_' && *where && (*where)->mad_key == '^') {
1881 (*where)->mad_key = slot;
daba3364 1882 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1883 (*where)->mad_val = (void*)sv;
1884 }
1885 else
1886 addmad(newMADsv(slot, sv), where, 0);
1887}
1888#else
b3f24c00
MHM
1889# define start_force(where) NOOP
1890# define curmad(slot, sv) NOOP
5db06880
NC
1891#endif
1892
ffb4593c
NT
1893/*
1894 * S_force_next
9cbb5ea2 1895 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1896 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1897 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1898 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1899 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1900 */
1901
4e553d73 1902STATIC void
cea2e8a9 1903S_force_next(pTHX_ I32 type)
79072805 1904{
97aff369 1905 dVAR;
704d4215
GG
1906#ifdef DEBUGGING
1907 if (DEBUG_T_TEST) {
1908 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1909 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1910 }
1911#endif
5db06880 1912#ifdef PERL_MAD
cd81e915 1913 if (PL_curforce < 0)
5db06880 1914 start_force(PL_lasttoke);
cd81e915 1915 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1916 if (PL_lex_state != LEX_KNOWNEXT)
1917 PL_lex_defer = PL_lex_state;
1918 PL_lex_state = LEX_KNOWNEXT;
1919 PL_lex_expect = PL_expect;
cd81e915 1920 PL_curforce = -1;
5db06880 1921#else
3280af22
NIS
1922 PL_nexttype[PL_nexttoke] = type;
1923 PL_nexttoke++;
1924 if (PL_lex_state != LEX_KNOWNEXT) {
1925 PL_lex_defer = PL_lex_state;
1926 PL_lex_expect = PL_expect;
1927 PL_lex_state = LEX_KNOWNEXT;
79072805 1928 }
5db06880 1929#endif
79072805
LW
1930}
1931
d0a148a6 1932STATIC SV *
15f169a1 1933S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1934{
97aff369 1935 dVAR;
740cce10 1936 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1937 !IN_BYTES
1938 && UTF
1939 && !is_ascii_string((const U8*)start, len)
740cce10 1940 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1941 return sv;
1942}
1943
ffb4593c
NT
1944/*
1945 * S_force_word
1946 * When the lexer knows the next thing is a word (for instance, it has
1947 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1948 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1949 * lookahead.
ffb4593c
NT
1950 *
1951 * Arguments:
b1b65b59 1952 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1953 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1954 * int check_keyword : if true, Perl checks to make sure the word isn't
1955 * a keyword (do this if the word is a label, e.g. goto FOO)
1956 * int allow_pack : if true, : characters will also be allowed (require,
1957 * use, etc. do this)
9cbb5ea2 1958 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1959 */
1960
76e3520e 1961STATIC char *
cea2e8a9 1962S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1963{
97aff369 1964 dVAR;
463ee0b2
LW
1965 register char *s;
1966 STRLEN len;
4e553d73 1967
7918f24d
NC
1968 PERL_ARGS_ASSERT_FORCE_WORD;
1969
29595ff2 1970 start = SKIPSPACE1(start);
463ee0b2 1971 s = start;
7e2040f0 1972 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1973 (allow_pack && *s == ':') ||
15f0808c 1974 (allow_initial_tick && *s == '\'') )
a0d0e21e 1975 {
3280af22 1976 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1977 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1978 return start;
cd81e915 1979 start_force(PL_curforce);
5db06880
NC
1980 if (PL_madskills)
1981 curmad('X', newSVpvn(start,s-start));
463ee0b2 1982 if (token == METHOD) {
29595ff2 1983 s = SKIPSPACE1(s);
463ee0b2 1984 if (*s == '(')
3280af22 1985 PL_expect = XTERM;
463ee0b2 1986 else {
3280af22 1987 PL_expect = XOPERATOR;
463ee0b2 1988 }
79072805 1989 }
e74e6b3d 1990 if (PL_madskills)
63575281 1991 curmad('g', newSVpvs( "forced" ));
9ded7720 1992 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1993 = (OP*)newSVOP(OP_CONST,0,
1994 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1995 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1996 force_next(token);
1997 }
1998 return s;
1999}
2000
ffb4593c
NT
2001/*
2002 * S_force_ident
9cbb5ea2 2003 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2004 * text only contains the "foo" portion. The first argument is a pointer
2005 * to the "foo", and the second argument is the type symbol to prefix.
2006 * Forces the next token to be a "WORD".
9cbb5ea2 2007 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2008 */
2009
76e3520e 2010STATIC void
bfed75c6 2011S_force_ident(pTHX_ register const char *s, int kind)
79072805 2012{
97aff369 2013 dVAR;
7918f24d
NC
2014
2015 PERL_ARGS_ASSERT_FORCE_IDENT;
2016
c35e046a 2017 if (*s) {
90e5519e
NC
2018 const STRLEN len = strlen(s);
2019 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2020 start_force(PL_curforce);
9ded7720 2021 NEXTVAL_NEXTTOKE.opval = o;
79072805 2022 force_next(WORD);
748a9306 2023 if (kind) {
11343788 2024 o->op_private = OPpCONST_ENTERED;
55497cff 2025 /* XXX see note in pp_entereval() for why we forgo typo
2026 warnings if the symbol must be introduced in an eval.
2027 GSAR 96-10-12 */
90e5519e
NC
2028 gv_fetchpvn_flags(s, len,
2029 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2030 : GV_ADD,
2031 kind == '$' ? SVt_PV :
2032 kind == '@' ? SVt_PVAV :
2033 kind == '%' ? SVt_PVHV :
a0d0e21e 2034 SVt_PVGV
90e5519e 2035 );
748a9306 2036 }
79072805
LW
2037 }
2038}
2039
1571675a
GS
2040NV
2041Perl_str_to_version(pTHX_ SV *sv)
2042{
2043 NV retval = 0.0;
2044 NV nshift = 1.0;
2045 STRLEN len;
cfd0369c 2046 const char *start = SvPV_const(sv,len);
9d4ba2ae 2047 const char * const end = start + len;
504618e9 2048 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2049
2050 PERL_ARGS_ASSERT_STR_TO_VERSION;
2051
1571675a 2052 while (start < end) {
ba210ebe 2053 STRLEN skip;
1571675a
GS
2054 UV n;
2055 if (utf)
9041c2e3 2056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2057 else {
2058 n = *(U8*)start;
2059 skip = 1;
2060 }
2061 retval += ((NV)n)/nshift;
2062 start += skip;
2063 nshift *= 1000;
2064 }
2065 return retval;
2066}
2067
4e553d73 2068/*
ffb4593c
NT
2069 * S_force_version
2070 * Forces the next token to be a version number.
e759cc13
RGS
2071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2072 * and if "guessing" is TRUE, then no new token is created (and the caller
2073 * must use an alternative parsing method).
ffb4593c
NT
2074 */
2075
76e3520e 2076STATIC char *
e759cc13 2077S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2078{
97aff369 2079 dVAR;
5f66b61c 2080 OP *version = NULL;
44dcb63b 2081 char *d;
5db06880
NC
2082#ifdef PERL_MAD
2083 I32 startoff = s - SvPVX(PL_linestr);
2084#endif
89bfa8cd 2085
7918f24d
NC
2086 PERL_ARGS_ASSERT_FORCE_VERSION;
2087
29595ff2 2088 s = SKIPSPACE1(s);
89bfa8cd 2089
44dcb63b 2090 d = s;
dd629d5b 2091 if (*d == 'v')
44dcb63b 2092 d++;
44dcb63b 2093 if (isDIGIT(*d)) {
e759cc13
RGS
2094 while (isDIGIT(*d) || *d == '_' || *d == '.')
2095 d++;
5db06880
NC
2096#ifdef PERL_MAD
2097 if (PL_madskills) {
cd81e915 2098 start_force(PL_curforce);
5db06880
NC
2099 curmad('X', newSVpvn(s,d-s));
2100 }
2101#endif
4e4da3ac 2102 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2103 SV *ver;
8d08d9ba
DG
2104#ifdef USE_LOCALE_NUMERIC
2105 char *loc = setlocale(LC_NUMERIC, "C");
2106#endif
6154021b 2107 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2108#ifdef USE_LOCALE_NUMERIC
2109 setlocale(LC_NUMERIC, loc);
2110#endif
6154021b 2111 version = pl_yylval.opval;
dd629d5b
GS
2112 ver = cSVOPx(version)->op_sv;
2113 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2114 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2115 SvNV_set(ver, str_to_version(ver));
1571675a 2116 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2117 }
89bfa8cd 2118 }
5db06880
NC
2119 else if (guessing) {
2120#ifdef PERL_MAD
2121 if (PL_madskills) {
cd81e915
NC
2122 sv_free(PL_nextwhite); /* let next token collect whitespace */
2123 PL_nextwhite = 0;
5db06880
NC
2124 s = SvPVX(PL_linestr) + startoff;
2125 }
2126#endif
e759cc13 2127 return s;
5db06880 2128 }
89bfa8cd 2129 }
2130
5db06880
NC
2131#ifdef PERL_MAD
2132 if (PL_madskills && !version) {
cd81e915
NC
2133 sv_free(PL_nextwhite); /* let next token collect whitespace */
2134 PL_nextwhite = 0;
5db06880
NC
2135 s = SvPVX(PL_linestr) + startoff;
2136 }
2137#endif
89bfa8cd 2138 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2139 start_force(PL_curforce);
9ded7720 2140 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2141 force_next(WORD);
89bfa8cd 2142
e759cc13 2143 return s;
89bfa8cd 2144}
2145
ffb4593c 2146/*
91152fc1
DG
2147 * S_force_strict_version
2148 * Forces the next token to be a version number using strict syntax rules.
2149 */
2150
2151STATIC char *
2152S_force_strict_version(pTHX_ char *s)
2153{
2154 dVAR;
2155 OP *version = NULL;
2156#ifdef PERL_MAD
2157 I32 startoff = s - SvPVX(PL_linestr);
2158#endif
2159 const char *errstr = NULL;
2160
2161 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2162
2163 while (isSPACE(*s)) /* leading whitespace */
2164 s++;
2165
2166 if (is_STRICT_VERSION(s,&errstr)) {
2167 SV *ver = newSV(0);
2168 s = (char *)scan_version(s, ver, 0);
2169 version = newSVOP(OP_CONST, 0, ver);
2170 }
4e4da3ac
Z
2171 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2172 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2173 {
91152fc1
DG
2174 PL_bufptr = s;
2175 if (errstr)
2176 yyerror(errstr); /* version required */
2177 return s;
2178 }
2179
2180#ifdef PERL_MAD
2181 if (PL_madskills && !version) {
2182 sv_free(PL_nextwhite); /* let next token collect whitespace */
2183 PL_nextwhite = 0;
2184 s = SvPVX(PL_linestr) + startoff;
2185 }
2186#endif
2187 /* NOTE: The parser sees the package name and the VERSION swapped */
2188 start_force(PL_curforce);
2189 NEXTVAL_NEXTTOKE.opval = version;
2190 force_next(WORD);
2191
2192 return s;
2193}
2194
2195/*
ffb4593c
NT
2196 * S_tokeq
2197 * Tokenize a quoted string passed in as an SV. It finds the next
2198 * chunk, up to end of string or a backslash. It may make a new
2199 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2200 * turns \\ into \.
2201 */
2202
76e3520e 2203STATIC SV *
cea2e8a9 2204S_tokeq(pTHX_ SV *sv)
79072805 2205{
97aff369 2206 dVAR;
79072805
LW
2207 register char *s;
2208 register char *send;
2209 register char *d;
b3ac6de7
IZ
2210 STRLEN len = 0;
2211 SV *pv = sv;
79072805 2212
7918f24d
NC
2213 PERL_ARGS_ASSERT_TOKEQ;
2214
79072805 2215 if (!SvLEN(sv))
b3ac6de7 2216 goto finish;
79072805 2217
a0d0e21e 2218 s = SvPV_force(sv, len);
21a311ee 2219 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2220 goto finish;
463ee0b2 2221 send = s + len;
79072805
LW
2222 while (s < send && *s != '\\')
2223 s++;
2224 if (s == send)
b3ac6de7 2225 goto finish;
79072805 2226 d = s;
be4731d2 2227 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2228 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2229 }
79072805
LW
2230 while (s < send) {
2231 if (*s == '\\') {
a0d0e21e 2232 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2233 s++; /* all that, just for this */
2234 }
2235 *d++ = *s++;
2236 }
2237 *d = '\0';
95a20fc0 2238 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2239 finish:
3280af22 2240 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2241 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2242 return sv;
2243}
2244
ffb4593c
NT
2245/*
2246 * Now come three functions related to double-quote context,
2247 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2248 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2249 * interact with PL_lex_state, and create fake ( ... ) argument lists
2250 * to handle functions and concatenation.
2251 * They assume that whoever calls them will be setting up a fake
2252 * join call, because each subthing puts a ',' after it. This lets
2253 * "lower \luPpEr"
2254 * become
2255 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2256 *
2257 * (I'm not sure whether the spurious commas at the end of lcfirst's
2258 * arguments and join's arguments are created or not).
2259 */
2260
2261/*
2262 * S_sublex_start
6154021b 2263 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2264 *
2265 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2266 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2267 *
2268 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2269 *
2270 * Everything else becomes a FUNC.
2271 *
2272 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2273 * had an OP_CONST or OP_READLINE). This just sets us up for a
2274 * call to S_sublex_push().
2275 */
2276
76e3520e 2277STATIC I32
cea2e8a9 2278S_sublex_start(pTHX)
79072805 2279{
97aff369 2280 dVAR;
6154021b 2281 register const I32 op_type = pl_yylval.ival;
79072805
LW
2282
2283 if (op_type == OP_NULL) {
6154021b 2284 pl_yylval.opval = PL_lex_op;
5f66b61c 2285 PL_lex_op = NULL;
79072805
LW
2286 return THING;
2287 }
2288 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2289 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2290
2291 if (SvTYPE(sv) == SVt_PVIV) {
2292 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2293 STRLEN len;
96a5add6 2294 const char * const p = SvPV_const(sv, len);
740cce10 2295 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2296 SvREFCNT_dec(sv);
2297 sv = nsv;
4e553d73 2298 }
6154021b 2299 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2300 PL_lex_stuff = NULL;
6f33ba73
RGS
2301 /* Allow <FH> // "foo" */
2302 if (op_type == OP_READLINE)
2303 PL_expect = XTERMORDORDOR;
79072805
LW
2304 return THING;
2305 }
e3f73d4e
RGS
2306 else if (op_type == OP_BACKTICK && PL_lex_op) {
2307 /* readpipe() vas overriden */
2308 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2309 pl_yylval.opval = PL_lex_op;
9b201d7d 2310 PL_lex_op = NULL;
e3f73d4e
RGS
2311 PL_lex_stuff = NULL;
2312 return THING;
2313 }
79072805 2314
3280af22 2315 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2316 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2317 PL_sublex_info.sub_op = PL_lex_op;
2318 PL_lex_state = LEX_INTERPPUSH;
55497cff 2319
3280af22
NIS
2320 PL_expect = XTERM;
2321 if (PL_lex_op) {
6154021b 2322 pl_yylval.opval = PL_lex_op;
5f66b61c 2323 PL_lex_op = NULL;
55497cff 2324 return PMFUNC;
2325 }
2326 else
2327 return FUNC;
2328}
2329
ffb4593c
NT
2330/*
2331 * S_sublex_push
2332 * Create a new scope to save the lexing state. The scope will be
2333 * ended in S_sublex_done. Returns a '(', starting the function arguments
2334 * to the uc, lc, etc. found before.
2335 * Sets PL_lex_state to LEX_INTERPCONCAT.
2336 */
2337
76e3520e 2338STATIC I32
cea2e8a9 2339S_sublex_push(pTHX)
55497cff 2340{
27da23d5 2341 dVAR;
f46d017c 2342 ENTER;
55497cff 2343
3280af22 2344 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2345 SAVEBOOL(PL_lex_dojoin);
3280af22 2346 SAVEI32(PL_lex_brackets);
3280af22
NIS
2347 SAVEI32(PL_lex_casemods);
2348 SAVEI32(PL_lex_starts);
651b5b28 2349 SAVEI8(PL_lex_state);
7766f137 2350 SAVEVPTR(PL_lex_inpat);
98246f1e 2351 SAVEI16(PL_lex_inwhat);
57843af0 2352 SAVECOPLINE(PL_curcop);
3280af22 2353 SAVEPPTR(PL_bufptr);
8452ff4b 2354 SAVEPPTR(PL_bufend);
3280af22
NIS
2355 SAVEPPTR(PL_oldbufptr);
2356 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2357 SAVEPPTR(PL_last_lop);
2358 SAVEPPTR(PL_last_uni);
3280af22
NIS
2359 SAVEPPTR(PL_linestart);
2360 SAVESPTR(PL_linestr);
8edd5f42
RGS
2361 SAVEGENERICPV(PL_lex_brackstack);
2362 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2363
2364 PL_linestr = PL_lex_stuff;
a0714e2c 2365 PL_lex_stuff = NULL;
3280af22 2366
9cbb5ea2
GS
2367 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2368 = SvPVX(PL_linestr);
3280af22 2369 PL_bufend += SvCUR(PL_linestr);
bd61b366 2370 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2371 SAVEFREESV(PL_linestr);
2372
2373 PL_lex_dojoin = FALSE;
2374 PL_lex_brackets = 0;
a02a5408
JC
2375 Newx(PL_lex_brackstack, 120, char);
2376 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2377 PL_lex_casemods = 0;
2378 *PL_lex_casestack = '\0';
2379 PL_lex_starts = 0;
2380 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2381 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2382
2383 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2384 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2385 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2386 else
5f66b61c 2387 PL_lex_inpat = NULL;
79072805 2388
55497cff 2389 return '(';
79072805
LW
2390}
2391
ffb4593c
NT
2392/*
2393 * S_sublex_done
2394 * Restores lexer state after a S_sublex_push.
2395 */
2396
76e3520e 2397STATIC I32
cea2e8a9 2398S_sublex_done(pTHX)
79072805 2399{
27da23d5 2400 dVAR;
3280af22 2401 if (!PL_lex_starts++) {
396482e1 2402 SV * const sv = newSVpvs("");
9aa983d2
JH
2403 if (SvUTF8(PL_linestr))
2404 SvUTF8_on(sv);
3280af22 2405 PL_expect = XOPERATOR;
6154021b 2406 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2407 return THING;
2408 }
2409
3280af22
NIS
2410 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2411 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2412 return yylex();
79072805
LW
2413 }
2414
ffb4593c 2415 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2416 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2417 PL_linestr = PL_lex_repl;
2418 PL_lex_inpat = 0;
2419 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2420 PL_bufend += SvCUR(PL_linestr);
bd61b366 2421 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2422 SAVEFREESV(PL_linestr);
2423 PL_lex_dojoin = FALSE;
2424 PL_lex_brackets = 0;
3280af22
NIS
2425 PL_lex_casemods = 0;
2426 *PL_lex_casestack = '\0';
2427 PL_lex_starts = 0;
25da4f38 2428 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2429 PL_lex_state = LEX_INTERPNORMAL;
2430 PL_lex_starts++;
e9fa98b2
HS
2431 /* we don't clear PL_lex_repl here, so that we can check later
2432 whether this is an evalled subst; that means we rely on the
2433 logic to ensure sublex_done() is called again only via the
2434 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2435 }
e9fa98b2 2436 else {
3280af22 2437 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2438 PL_lex_repl = NULL;
e9fa98b2 2439 }
79072805 2440 return ',';
ffed7fef
LW
2441 }
2442 else {
5db06880
NC
2443#ifdef PERL_MAD
2444 if (PL_madskills) {
cd81e915
NC
2445 if (PL_thiswhite) {
2446 if (!PL_endwhite)
6b29d1f5 2447 PL_endwhite = newSVpvs("");
cd81e915
NC
2448 sv_catsv(PL_endwhite, PL_thiswhite);
2449 PL_thiswhite = 0;
2450 }
2451 if (PL_thistoken)
76f68e9b 2452 sv_setpvs(PL_thistoken,"");
5db06880 2453 else
cd81e915 2454 PL_realtokenstart = -1;
5db06880
NC
2455 }
2456#endif
f46d017c 2457 LEAVE;
3280af22
NIS
2458 PL_bufend = SvPVX(PL_linestr);
2459 PL_bufend += SvCUR(PL_linestr);
2460 PL_expect = XOPERATOR;
09bef843 2461 PL_sublex_info.sub_inwhat = 0;
79072805 2462 return ')';
ffed7fef
LW
2463 }
2464}
2465
02aa26ce
NT
2466/*
2467 scan_const
2468
2469 Extracts a pattern, double-quoted string, or transliteration. This
2470 is terrifying code.
2471
94def140 2472 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2473 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2474 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2475
94def140
TS
2476 Returns a pointer to the character scanned up to. If this is
2477 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2478 successfully parsed), will leave an OP for the substring scanned
6154021b 2479 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2480 by looking at the next characters herself.
2481
02aa26ce
NT
2482 In patterns:
2483 backslashes:
ff3f963a 2484 constants: \N{NAME} only
02aa26ce
NT
2485 case and quoting: \U \Q \E
2486 stops on @ and $, but not for $ as tail anchor
2487
2488 In transliterations:
2489 characters are VERY literal, except for - not at the start or end
94def140
TS
2490 of the string, which indicates a range. If the range is in bytes,
2491 scan_const expands the range to the full set of intermediate
2492 characters. If the range is in utf8, the hyphen is replaced with
2493 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2494
2495 In double-quoted strings:
2496 backslashes:
2497 double-quoted style: \r and \n
ff3f963a 2498 constants: \x31, etc.
94def140 2499 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2500 case and quoting: \U \Q \E
2501 stops on @ and $
2502
2503 scan_const does *not* construct ops to handle interpolated strings.
2504 It stops processing as soon as it finds an embedded $ or @ variable
2505 and leaves it to the caller to work out what's going on.
2506
94def140
TS
2507 embedded arrays (whether in pattern or not) could be:
2508 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2509
2510 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2511
2512 $ in pattern could be $foo or could be tail anchor. Assumption:
2513 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2514 followed by one of "()| \r\n\t"
02aa26ce
NT
2515
2516 \1 (backreferences) are turned into $1
2517
2518 The structure of the code is
2519 while (there's a character to process) {
94def140
TS
2520 handle transliteration ranges
2521 skip regexp comments /(?#comment)/ and codes /(?{code})/
2522 skip #-initiated comments in //x patterns
2523 check for embedded arrays
02aa26ce
NT
2524 check for embedded scalars
2525 if (backslash) {
94def140 2526 deprecate \1 in substitution replacements
02aa26ce
NT
2527 handle string-changing backslashes \l \U \Q \E, etc.
2528 switch (what was escaped) {
94def140 2529 handle \- in a transliteration (becomes a literal -)
ff3f963a 2530 if a pattern and not \N{, go treat as regular character
94def140
TS
2531 handle \132 (octal characters)
2532 handle \x15 and \x{1234} (hex characters)
ff3f963a 2533 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2534 handle \cV (control characters)
2535 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2536 } (end switch)
77a135fe 2537 continue
02aa26ce 2538 } (end if backslash)
77a135fe 2539 handle regular character
02aa26ce 2540 } (end while character to read)
4e553d73 2541
02aa26ce
NT
2542*/
2543
76e3520e 2544STATIC char *
cea2e8a9 2545S_scan_const(pTHX_ char *start)
79072805 2546{
97aff369 2547 dVAR;
3280af22 2548 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2549 SV *sv = newSV(send - start); /* sv for the constant. See
2550 note below on sizing. */
02aa26ce
NT
2551 register char *s = start; /* start of the constant */
2552 register char *d = SvPVX(sv); /* destination for copies */
2553 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2554 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2555 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2556 I32 this_utf8 = UTF; /* Is the source string assumed
2557 to be UTF8? But, this can
2558 show as true when the source
2559 isn't utf8, as for example
2560 when it is entirely composed
2561 of hex constants */
2562
2563 /* Note on sizing: The scanned constant is placed into sv, which is
2564 * initialized by newSV() assuming one byte of output for every byte of
2565 * input. This routine expects newSV() to allocate an extra byte for a
2566 * trailing NUL, which this routine will append if it gets to the end of
2567 * the input. There may be more bytes of input than output (eg., \N{LATIN
2568 * CAPITAL LETTER A}), or more output than input if the constant ends up
2569 * recoded to utf8, but each time a construct is found that might increase
2570 * the needed size, SvGROW() is called. Its size parameter each time is
2571 * based on the best guess estimate at the time, namely the length used so
2572 * far, plus the length the current construct will occupy, plus room for
2573 * the trailing NUL, plus one byte for every input byte still unscanned */
2574
012bcf8d 2575 UV uv;
4c3a8340
TS
2576#ifdef EBCDIC
2577 UV literal_endpoint = 0;
e294cc5d 2578 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2579#endif
012bcf8d 2580
7918f24d
NC
2581 PERL_ARGS_ASSERT_SCAN_CONST;
2582
2b9d42f0
NIS
2583 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2584 /* If we are doing a trans and we know we want UTF8 set expectation */
2585 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2586 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2587 }
2588
2589
79072805 2590 while (s < send || dorange) {
ff3f963a 2591
02aa26ce 2592 /* get transliterations out of the way (they're most literal) */
3280af22 2593 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2594 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2595 if (dorange) {
1ba5c669
JH
2596 I32 i; /* current expanded character */
2597 I32 min; /* first character in range */
2598 I32 max; /* last character in range */
02aa26ce 2599
e294cc5d
JH
2600#ifdef EBCDIC
2601 UV uvmax = 0;
2602#endif
2603
2604 if (has_utf8
2605#ifdef EBCDIC
2606 && !native_range
2607#endif
2608 ) {
9d4ba2ae 2609 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2610 char *e = d++;
2611 while (e-- > c)
2612 *(e + 1) = *e;
25716404 2613 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2614 /* mark the range as done, and continue */
2615 dorange = FALSE;
2616 didrange = TRUE;
2617 continue;
2618 }
2b9d42f0 2619
95a20fc0 2620 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2621#ifdef EBCDIC
2622 SvGROW(sv,
2623 SvLEN(sv) + (has_utf8 ?
2624 (512 - UTF_CONTINUATION_MARK +
2625 UNISKIP(0x100))
2626 : 256));
2627 /* How many two-byte within 0..255: 128 in UTF-8,
2628 * 96 in UTF-8-mod. */
2629#else
9cbb5ea2 2630 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2631#endif
9cbb5ea2 2632 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2633#ifdef EBCDIC
2634 if (has_utf8) {
2635 int j;
2636 for (j = 0; j <= 1; j++) {
2637 char * const c = (char*)utf8_hop((U8*)d, -1);
2638 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2639 if (j)
2640 min = (U8)uv;
2641 else if (uv < 256)
2642 max = (U8)uv;
2643 else {
2644 max = (U8)0xff; /* only to \xff */
2645 uvmax = uv; /* \x{100} to uvmax */
2646 }
2647 d = c; /* eat endpoint chars */
2648 }
2649 }
2650 else {
2651#endif
2652 d -= 2; /* eat the first char and the - */
2653 min = (U8)*d; /* first char in range */
2654 max = (U8)d[1]; /* last char in range */
2655#ifdef EBCDIC
2656 }
2657#endif
8ada0baa 2658
c2e66d9e 2659 if (min > max) {
01ec43d0 2660 Perl_croak(aTHX_
d1573ac7 2661 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2662 (char)min, (char)max);
c2e66d9e
GS
2663 }
2664
c7f1f016 2665#ifdef EBCDIC
4c3a8340
TS
2666 if (literal_endpoint == 2 &&
2667 ((isLOWER(min) && isLOWER(max)) ||
2668 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2669 if (isLOWER(min)) {
2670 for (i = min; i <= max; i++)
2671 if (isLOWER(i))
db42d148 2672 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2673 } else {
2674 for (i = min; i <= max; i++)
2675 if (isUPPER(i))
db42d148 2676 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2677 }
2678 }
2679 else
2680#endif
2681 for (i = min; i <= max; i++)
e294cc5d
JH
2682#ifdef EBCDIC
2683 if (has_utf8) {
2684 const U8 ch = (U8)NATIVE_TO_UTF(i);
2685 if (UNI_IS_INVARIANT(ch))
2686 *d++ = (U8)i;
2687 else {
2688 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2689 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2690 }
2691 }
2692 else
2693#endif
2694 *d++ = (char)i;
2695
2696#ifdef EBCDIC
2697 if (uvmax) {
2698 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2699 if (uvmax > 0x101)
2700 *d++ = (char)UTF_TO_NATIVE(0xff);
2701 if (uvmax > 0x100)
2702 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2703 }
2704#endif
02aa26ce
NT
2705
2706 /* mark the range as done, and continue */
79072805 2707 dorange = FALSE;
01ec43d0 2708 didrange = TRUE;
4c3a8340
TS
2709#ifdef EBCDIC
2710 literal_endpoint = 0;
2711#endif
79072805 2712 continue;
4e553d73 2713 }
02aa26ce
NT
2714
2715 /* range begins (ignore - as first or last char) */
79072805 2716 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2717 if (didrange) {
1fafa243 2718 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2719 }
e294cc5d
JH
2720 if (has_utf8
2721#ifdef EBCDIC
2722 && !native_range
2723#endif
2724 ) {
25716404 2725 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2726 s++;
2727 continue;
2728 }
79072805
LW
2729 dorange = TRUE;
2730 s++;
01ec43d0
GS
2731 }
2732 else {
2733 didrange = FALSE;
4c3a8340
TS
2734#ifdef EBCDIC
2735 literal_endpoint = 0;
e294cc5d 2736 native_range = TRUE;
4c3a8340 2737#endif
01ec43d0 2738 }
79072805 2739 }
02aa26ce
NT
2740
2741 /* if we get here, we're not doing a transliteration */
2742
0f5d15d6
IZ
2743 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2744 except for the last char, which will be done separately. */
3280af22 2745 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2746 if (s[2] == '#') {
e994fd66 2747 while (s+1 < send && *s != ')')
db42d148 2748 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2749 }
2750 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2751 || (s[2] == '?' && s[3] == '{'))
155aba94 2752 {
cc6b7395 2753 I32 count = 1;
0f5d15d6 2754 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2755 char c;
2756
d9f97599
GS
2757 while (count && (c = *regparse)) {
2758 if (c == '\\' && regparse[1])
2759 regparse++;
4e553d73 2760 else if (c == '{')
cc6b7395 2761 count++;
4e553d73 2762 else if (c == '}')
cc6b7395 2763 count--;
d9f97599 2764 regparse++;
cc6b7395 2765 }
e994fd66 2766 if (*regparse != ')')
5bdf89e7 2767 regparse--; /* Leave one char for continuation. */
0f5d15d6 2768 while (s < regparse)
db42d148 2769 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2770 }
748a9306 2771 }
02aa26ce
NT
2772
2773 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2774 else if (*s == '#' && PL_lex_inpat &&
2775 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2776 while (s+1 < send && *s != '\n')
db42d148 2777 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2778 }
02aa26ce 2779
5d1d4326 2780 /* check for embedded arrays
da6eedaa 2781 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2782 */
1749ea0d
TS
2783 else if (*s == '@' && s[1]) {
2784 if (isALNUM_lazy_if(s+1,UTF))
2785 break;
2786 if (strchr(":'{$", s[1]))
2787 break;
2788 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2789 break; /* in regexp, neither @+ nor @- are interpolated */
2790 }
02aa26ce
NT
2791
2792 /* check for embedded scalars. only stop if we're sure it's a
2793 variable.
2794 */
79072805 2795 else if (*s == '$') {
3280af22 2796 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2797 break;
77772344 2798 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2799 if (s[1] == '\\') {
2800 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2801 "Possible unintended interpolation of $\\ in regex");
77772344 2802 }
79072805 2803 break; /* in regexp, $ might be tail anchor */
77772344 2804 }
79072805 2805 }
02aa26ce 2806
2b9d42f0
NIS
2807 /* End of else if chain - OP_TRANS rejoin rest */
2808
02aa26ce 2809 /* backslashes */
79072805 2810 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2811 char* e; /* Can be used for ending '}', etc. */
2812
79072805 2813 s++;
02aa26ce 2814
7d0fc23c
KW
2815 /* warn on \1 - \9 in substitution replacements, but note that \11
2816 * is an octal; and \19 is \1 followed by '9' */
3280af22 2817 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2818 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2819 {
a2a5de95 2820 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2821 *--s = '$';
2822 break;
2823 }
02aa26ce
NT
2824
2825 /* string-change backslash escapes */
3280af22 2826 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2827 --s;
2828 break;
2829 }
ff3f963a
KW
2830 /* In a pattern, process \N, but skip any other backslash escapes.
2831 * This is because we don't want to translate an escape sequence
2832 * into a meta symbol and have the regex compiler use the meta
2833 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2834 * in spite of this, we do have to process \N here while the proper
2835 * charnames handler is in scope. See bugs #56444 and #62056.
2836 * There is a complication because \N in a pattern may also stand
2837 * for 'match a non-nl', and not mean a charname, in which case its
2838 * processing should be deferred to the regex compiler. To be a
2839 * charname it must be followed immediately by a '{', and not look
2840 * like \N followed by a curly quantifier, i.e., not something like
2841 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2842 * quantifier */
2843 else if (PL_lex_inpat
2844 && (*s != 'N'
2845 || s[1] != '{'
2846 || regcurly(s + 1)))
2847 {
cc74c5bd
TS
2848 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2849 goto default_action;
2850 }
02aa26ce 2851
79072805 2852 switch (*s) {
02aa26ce
NT
2853
2854 /* quoted - in transliterations */
79072805 2855 case '-':
3280af22 2856 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2857 *d++ = *s++;
2858 continue;
2859 }
2860 /* FALL THROUGH */
2861 default:
11b8faa4 2862 {
a2a5de95
NC
2863 if ((isALPHA(*s) || isDIGIT(*s)))
2864 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2865 "Unrecognized escape \\%c passed through",
2866 *s);
11b8faa4 2867 /* default action is to copy the quoted character */
f9a63242 2868 goto default_action;
11b8faa4 2869 }
02aa26ce 2870
632403cc 2871 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2872 case '0': case '1': case '2': case '3':
2873 case '4': case '5': case '6': case '7':
ba210ebe 2874 {
53305cf1
NC
2875 I32 flags = 0;
2876 STRLEN len = 3;
77a135fe 2877 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2878 s += len;
2879 }
012bcf8d 2880 goto NUM_ESCAPE_INSERT;
02aa26ce 2881
f0a2b745
KW
2882 /* eg. \o{24} indicates the octal constant \024 */
2883 case 'o':
2884 {
2885 STRLEN len;
454155d9 2886 const char* error;
f0a2b745 2887
454155d9 2888 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2889 s += len;
454155d9 2890 if (! valid) {
f0a2b745
KW
2891 yyerror(error);
2892 continue;
2893 }
2894 goto NUM_ESCAPE_INSERT;
2895 }
2896
77a135fe 2897 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2898 case 'x':
a0ed51b3
LW
2899 ++s;
2900 if (*s == '{') {
9d4ba2ae 2901 char* const e = strchr(s, '}');
a4c04bdc
NC
2902 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2903 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2904 STRLEN len;
355860ce 2905
53305cf1 2906 ++s;
adaeee49 2907 if (!e) {
a0ed51b3 2908 yyerror("Missing right brace on \\x{}");
355860ce 2909 continue;
ba210ebe 2910 }
53305cf1 2911 len = e - s;
77a135fe 2912 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2913 s = e + 1;
a0ed51b3
LW
2914 }
2915 else {
ba210ebe 2916 {
53305cf1 2917 STRLEN len = 2;
a4c04bdc 2918 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2919 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2920 s += len;
2921 }
012bcf8d
GS
2922 }
2923
2924 NUM_ESCAPE_INSERT:
ff3f963a
KW
2925 /* Insert oct or hex escaped character. There will always be
2926 * enough room in sv since such escapes will be longer than any
2927 * UTF-8 sequence they can end up as, except if they force us
2928 * to recode the rest of the string into utf8 */
ba7cea30 2929
77a135fe 2930 /* Here uv is the ordinal of the next character being added in
ff3f963a 2931 * unicode (converted from native). */
77a135fe 2932 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2933 if (!has_utf8 && uv > 255) {
77a135fe
KW
2934 /* Might need to recode whatever we have accumulated so
2935 * far if it contains any chars variant in utf8 or
2936 * utf-ebcdic. */
2937
2938 SvCUR_set(sv, d - SvPVX_const(sv));
2939 SvPOK_on(sv);
2940 *d = '\0';
77a135fe 2941 /* See Note on sizing above. */
7bf79863
KW
2942 sv_utf8_upgrade_flags_grow(sv,
2943 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2944 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2945 d = SvPVX(sv) + SvCUR(sv);
2946 has_utf8 = TRUE;
012bcf8d
GS
2947 }
2948
77a135fe
KW
2949 if (has_utf8) {
2950 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2951 if (PL_lex_inwhat == OP_TRANS &&
2952 PL_sublex_info.sub_op) {
2953 PL_sublex_info.sub_op->op_private |=
2954 (PL_lex_repl ? OPpTRANS_FROM_UTF
2955 : OPpTRANS_TO_UTF);
f9a63242 2956 }
e294cc5d
JH
2957#ifdef EBCDIC
2958 if (uv > 255 && !dorange)
2959 native_range = FALSE;
2960#endif
012bcf8d 2961 }
a0ed51b3 2962 else {
012bcf8d 2963 *d++ = (char)uv;
a0ed51b3 2964 }
012bcf8d
GS
2965 }
2966 else {
c4d5f83a 2967 *d++ = (char) uv;
a0ed51b3 2968 }
79072805 2969 continue;
02aa26ce 2970
4a2d328f 2971 case 'N':
ff3f963a
KW
2972 /* In a non-pattern \N must be a named character, like \N{LATIN
2973 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2974 * mean to match a non-newline. For non-patterns, named
2975 * characters are converted to their string equivalents. In
2976 * patterns, named characters are not converted to their
2977 * ultimate forms for the same reasons that other escapes
2978 * aren't. Instead, they are converted to the \N{U+...} form
2979 * to get the value from the charnames that is in effect right
2980 * now, while preserving the fact that it was a named character
2981 * so that the regex compiler knows this */
2982
2983 /* This section of code doesn't generally use the
2984 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2985 * a close examination of this macro and determined it is a
2986 * no-op except on utfebcdic variant characters. Every
2987 * character generated by this that would normally need to be
2988 * enclosed by this macro is invariant, so the macro is not
2989 * needed, and would complicate use of copy(). There are other
2990 * parts of this file where the macro is used inconsistently,
2991 * but are saved by it being a no-op */
2992
2993 /* The structure of this section of code (besides checking for
2994 * errors and upgrading to utf8) is:
2995 * Further disambiguate between the two meanings of \N, and if
2996 * not a charname, go process it elsewhere
0a96133f
KW
2997 * If of form \N{U+...}, pass it through if a pattern;
2998 * otherwise convert to utf8
2999 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3000 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3001
3002 /* Here, s points to the 'N'; the test below is guaranteed to
3003 * succeed if we are being called on a pattern as we already
3004 * know from a test above that the next character is a '{'.
3005 * On a non-pattern \N must mean 'named sequence, which
3006 * requires braces */
3007 s++;
3008 if (*s != '{') {
3009 yyerror("Missing braces on \\N{}");
3010 continue;
3011 }
3012 s++;
3013
0a96133f 3014 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3015 if (! (e = strchr(s, '}'))) {
3016 if (! PL_lex_inpat) {
5777a3f7 3017 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3018 } else {
3019 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3020 }
0a96133f 3021 continue;
ff3f963a 3022 }
cddc7ef4 3023
ff3f963a 3024 /* Here it looks like a named character */
cddc7ef4 3025
ff3f963a
KW
3026 if (PL_lex_inpat) {
3027
3028 /* XXX This block is temporary code. \N{} implies that the
3029 * pattern is to have Unicode semantics, and therefore
3030 * currently has to be encoded in utf8. By putting it in
3031 * utf8 now, we save a whole pass in the regular expression
3032 * compiler. Once that code is changed so Unicode
3033 * semantics doesn't necessarily have to be in utf8, this
3034 * block should be removed */
3035 if (!has_utf8) {
77a135fe 3036 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3037 SvPOK_on(sv);
e4f3eed8 3038 *d = '\0';
77a135fe 3039 /* See Note on sizing above. */
7bf79863 3040 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3041 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042 /* 5 = '\N{' + cur char + NUL */
3043 (STRLEN)(send - s) + 5);
f08d6ad9 3044 d = SvPVX(sv) + SvCUR(sv);
89491803 3045 has_utf8 = TRUE;
ff3f963a
KW
3046 }
3047 }
423cee85 3048
ff3f963a
KW
3049 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3050 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3051 | PERL_SCAN_DISALLOW_PREFIX;
3052 STRLEN len;
3053
3054 /* For \N{U+...}, the '...' is a unicode value even on
3055 * EBCDIC machines */
3056 s += 2; /* Skip to next char after the 'U+' */
3057 len = e - s;
3058 uv = grok_hex(s, &len, &flags, NULL);
3059 if (len == 0 || len != (STRLEN)(e - s)) {
3060 yyerror("Invalid hexadecimal number in \\N{U+...}");
3061 s = e + 1;
3062 continue;
3063 }
3064
3065 if (PL_lex_inpat) {
3066
3067 /* Pass through to the regex compiler unchanged. The
3068 * reason we evaluated the number above is to make sure
0a96133f 3069 * there wasn't a syntax error. */
ff3f963a
KW
3070 s -= 5; /* Include the '\N{U+' */
3071 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3072 d += e - s + 1;
3073 }
3074 else { /* Not a pattern: convert the hex to string */
3075
3076 /* If destination is not in utf8, unconditionally
3077 * recode it to be so. This is because \N{} implies
3078 * Unicode semantics, and scalars have to be in utf8
3079 * to guarantee those semantics */
3080 if (! has_utf8) {
3081 SvCUR_set(sv, d - SvPVX_const(sv));
3082 SvPOK_on(sv);
3083 *d = '\0';
3084 /* See Note on sizing above. */
3085 sv_utf8_upgrade_flags_grow(
3086 sv,
3087 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3088 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3089 d = SvPVX(sv) + SvCUR(sv);
3090 has_utf8 = TRUE;
3091 }
3092
3093 /* Add the string to the output */
3094 if (UNI_IS_INVARIANT(uv)) {
3095 *d++ = (char) uv;
3096 }
3097 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3098 }
3099 }
3100 else { /* Here is \N{NAME} but not \N{U+...}. */
3101
3102 SV *res; /* result from charnames */
3103 const char *str; /* the string in 'res' */
3104 STRLEN len; /* its length */
3105
3106 /* Get the value for NAME */
3107 res = newSVpvn(s, e - s);
3108 res = new_constant( NULL, 0, "charnames",
3109 /* includes all of: \N{...} */
3110 res, NULL, s - 3, e - s + 4 );
3111
3112 /* Most likely res will be in utf8 already since the
3113 * standard charnames uses pack U, but a custom translator
3114 * can leave it otherwise, so make sure. XXX This can be
3115 * revisited to not have charnames use utf8 for characters
3116 * that don't need it when regexes don't have to be in utf8
3117 * for Unicode semantics. If doing so, remember EBCDIC */
3118 sv_utf8_upgrade(res);
3119 str = SvPV_const(res, len);
3120
3121 /* Don't accept malformed input */
3122 if (! is_utf8_string((U8 *) str, len)) {
3123 yyerror("Malformed UTF-8 returned by \\N");
3124 }
3125 else if (PL_lex_inpat) {
3126
3127 if (! len) { /* The name resolved to an empty string */
3128 Copy("\\N{}", d, 4, char);
3129 d += 4;
3130 }
3131 else {
3132 /* In order to not lose information for the regex
3133 * compiler, pass the result in the specially made
3134 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3135 * the code points in hex of each character
3136 * returned by charnames */
3137
3138 const char *str_end = str + len;
3139 STRLEN char_length; /* cur char's byte length */
3140 STRLEN output_length; /* and the number of bytes
3141 after this is translated
3142 into hex digits */
3143 const STRLEN off = d - SvPVX_const(sv);
3144
3145 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3146 * max('U+', '.'); and 1 for NUL */
3147 char hex_string[2 * UTF8_MAXBYTES + 5];
3148
3149 /* Get the first character of the result. */
3150 U32 uv = utf8n_to_uvuni((U8 *) str,
3151 len,
3152 &char_length,
3153 UTF8_ALLOW_ANYUV);
3154
3155 /* The call to is_utf8_string() above hopefully
3156 * guarantees that there won't be an error. But
3157 * it's easy here to make sure. The function just
3158 * above warns and returns 0 if invalid utf8, but
3159 * it can also return 0 if the input is validly a
3160 * NUL. Disambiguate */
3161 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3162 uv = UNICODE_REPLACEMENT;
3163 }
3164
3165 /* Convert first code point to hex, including the
3166 * boiler plate before it */
3167 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3168 output_length = strlen(hex_string);
3169
3170 /* Make sure there is enough space to hold it */
3171 d = off + SvGROW(sv, off
3172 + output_length
3173 + (STRLEN)(send - e)
3174 + 2); /* '}' + NUL */
3175 /* And output it */
3176 Copy(hex_string, d, output_length, char);
3177 d += output_length;
3178
3179 /* For each subsequent character, append dot and
3180 * its ordinal in hex */
3181 while ((str += char_length) < str_end) {
3182 const STRLEN off = d - SvPVX_const(sv);
3183 U32 uv = utf8n_to_uvuni((U8 *) str,
3184 str_end - str,
3185 &char_length,
3186 UTF8_ALLOW_ANYUV);
3187 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3188 uv = UNICODE_REPLACEMENT;
3189 }
3190
3191 sprintf(hex_string, ".%X", (unsigned int) uv);
3192 output_length = strlen(hex_string);
3193
3194 d = off + SvGROW(sv, off
3195 + output_length
3196 + (STRLEN)(send - e)
3197 + 2); /* '}' + NUL */
3198 Copy(hex_string, d, output_length, char);
3199 d += output_length;
3200 }
3201
3202 *d++ = '}'; /* Done. Add the trailing brace */
3203 }
3204 }
3205 else { /* Here, not in a pattern. Convert the name to a
3206 * string. */
3207
3208 /* If destination is not in utf8, unconditionally
3209 * recode it to be so. This is because \N{} implies
3210 * Unicode semantics, and scalars have to be in utf8
3211 * to guarantee those semantics */
3212 if (! has_utf8) {
3213 SvCUR_set(sv, d - SvPVX_const(sv));
3214 SvPOK_on(sv);
3215 *d = '\0';
3216 /* See Note on sizing above. */
3217 sv_utf8_upgrade_flags_grow(sv,
3218 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3219 len + (STRLEN)(send - s) + 1);
3220 d = SvPVX(sv) + SvCUR(sv);
3221 has_utf8 = TRUE;
3222 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3223
3224 /* See Note on sizing above. (NOTE: SvCUR() is not
3225 * set correctly here). */
3226 const STRLEN off = d - SvPVX_const(sv);
3227 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3228 }
3229 Copy(str, d, len, char);
3230 d += len;
423cee85 3231 }
423cee85 3232 SvREFCNT_dec(res);
cb233ae3
KW
3233
3234 /* Deprecate non-approved name syntax */
3235 if (ckWARN_d(WARN_DEPRECATED)) {
3236 bool problematic = FALSE;
3237 char* i = s;
3238
3239 /* For non-ut8 input, look to see that the first
3240 * character is an alpha, then loop through the rest
3241 * checking that each is a continuation */
3242 if (! this_utf8) {
3243 if (! isALPHAU(*i)) problematic = TRUE;
3244 else for (i = s + 1; i < e; i++) {
3245 if (isCHARNAME_CONT(*i)) continue;
3246 problematic = TRUE;
3247 break;
3248 }
3249 }
3250 else {
3251 /* Similarly for utf8. For invariants can check
3252 * directly. We accept anything above the latin1
3253 * range because it is immaterial to Perl if it is
3254 * correct or not, and is expensive to check. But
3255 * it is fairly easy in the latin1 range to convert
3256 * the variants into a single character and check
3257 * those */
3258 if (UTF8_IS_INVARIANT(*i)) {
3259 if (! isALPHAU(*i)) problematic = TRUE;
3260 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3261 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3262 *(i+1)))))
3263 {
3264 problematic = TRUE;
3265 }
3266 }
3267 if (! problematic) for (i = s + UTF8SKIP(s);
3268 i < e;
3269 i+= UTF8SKIP(i))
3270 {
3271 if (UTF8_IS_INVARIANT(*i)) {
3272 if (isCHARNAME_CONT(*i)) continue;
3273 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3274 continue;
3275 } else if (isCHARNAME_CONT(
3276 UNI_TO_NATIVE(
3277 UTF8_ACCUMULATE(*i, *(i+1)))))
3278 {
3279 continue;
3280 }
3281 problematic = TRUE;
3282 break;
3283 }
3284 }
3285 if (problematic) {
6e1bad6c
KW
3286 /* The e-i passed to the final %.*s makes sure that
3287 * should the trailing NUL be missing that this
3288 * print won't run off the end of the string */
cb233ae3 3289 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3290 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3291 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3292 }
3293 }
3294 } /* End \N{NAME} */
ff3f963a
KW
3295#ifdef EBCDIC
3296 if (!dorange)
3297 native_range = FALSE; /* \N{} is defined to be Unicode */
3298#endif
3299 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3300 continue;
3301
02aa26ce 3302 /* \c is a control character */
79072805
LW
3303 case 'c':
3304 s++;
961ce445 3305 if (s < send) {
f9d13529 3306 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3307 }
961ce445
RGS
3308 else {
3309 yyerror("Missing control char name in \\c");
3310 }
79072805 3311 continue;
02aa26ce
NT
3312
3313 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3314 case 'b':
db42d148 3315 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3316 break;
3317 case 'n':
db42d148 3318 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3319 break;
3320 case 'r':
db42d148 3321 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3322 break;
3323 case 'f':
db42d148 3324 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3325 break;
3326 case 't':
db42d148 3327 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3328 break;
34a3fe2a 3329 case 'e':
db42d148 3330 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3331 break;
3332 case 'a':
db42d148 3333 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3334 break;
02aa26ce
NT
3335 } /* end switch */
3336
79072805
LW
3337 s++;
3338 continue;
02aa26ce 3339 } /* end if (backslash) */
4c3a8340
TS
3340#ifdef EBCDIC
3341 else
3342 literal_endpoint++;
3343#endif
02aa26ce 3344
f9a63242 3345 default_action:
77a135fe
KW
3346 /* If we started with encoded form, or already know we want it,
3347 then encode the next character */
3348 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3349 STRLEN len = 1;
77a135fe
KW
3350
3351
3352 /* One might think that it is wasted effort in the case of the
3353 * source being utf8 (this_utf8 == TRUE) to take the next character
3354 * in the source, convert it to an unsigned value, and then convert
3355 * it back again. But the source has not been validated here. The
3356 * routine that does the conversion checks for errors like
3357 * malformed utf8 */
3358
5f66b61c
AL
3359 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3360 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3361 if (!has_utf8) {
3362 SvCUR_set(sv, d - SvPVX_const(sv));
3363 SvPOK_on(sv);
3364 *d = '\0';
77a135fe 3365 /* See Note on sizing above. */
7bf79863
KW
3366 sv_utf8_upgrade_flags_grow(sv,
3367 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3368 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3369 d = SvPVX(sv) + SvCUR(sv);
3370 has_utf8 = TRUE;
3371 } else if (need > len) {
3372 /* encoded value larger than old, may need extra space (NOTE:
3373 * SvCUR() is not set correctly here). See Note on sizing
3374 * above. */
9d4ba2ae 3375 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3376 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3377 }
77a135fe
KW
3378 s += len;
3379
5f66b61c 3380 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3381#ifdef EBCDIC
3382 if (uv > 255 && !dorange)
3383 native_range = FALSE;
3384#endif
2b9d42f0
NIS
3385 }
3386 else {
3387 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3388 }
02aa26ce
NT
3389 } /* while loop to process each character */
3390
3391 /* terminate the string and set up the sv */
79072805 3392 *d = '\0';
95a20fc0 3393 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3394 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3395 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3396
79072805 3397 SvPOK_on(sv);
9f4817db 3398 if (PL_encoding && !has_utf8) {
d0063567
DK
3399 sv_recode_to_utf8(sv, PL_encoding);
3400 if (SvUTF8(sv))
3401 has_utf8 = TRUE;
9f4817db 3402 }
2b9d42f0 3403 if (has_utf8) {
7e2040f0 3404 SvUTF8_on(sv);
2b9d42f0 3405 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3406 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3407 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3408 }
3409 }
79072805 3410
02aa26ce 3411 /* shrink the sv if we allocated more than we used */
79072805 3412 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3413 SvPV_shrink_to_cur(sv);
79072805 3414 }
02aa26ce 3415
6154021b 3416 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3417 if (s > PL_bufptr) {
eb0d8d16
NC
3418 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3419 const char *const key = PL_lex_inpat ? "qr" : "q";
3420 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3421 const char *type;
3422 STRLEN typelen;
3423
3424 if (PL_lex_inwhat == OP_TRANS) {
3425 type = "tr";
3426 typelen = 2;
3427 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3428 type = "s";
3429 typelen = 1;
3430 } else {
3431 type = "qq";
3432 typelen = 2;
3433 }
3434
3435 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3436 type, typelen);
3437 }
6154021b 3438 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3439 } else
8990e307 3440 SvREFCNT_dec(sv);
79072805
LW
3441 return s;
3442}
3443
ffb4593c
NT
3444/* S_intuit_more
3445 * Returns TRUE if there's more to the expression (e.g., a subscript),
3446 * FALSE otherwise.
ffb4593c
NT
3447 *
3448 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3449 *
3450 * ->[ and ->{ return TRUE
3451 * { and [ outside a pattern are always subscripts, so return TRUE
3452 * if we're outside a pattern and it's not { or [, then return FALSE
3453 * if we're in a pattern and the first char is a {
3454 * {4,5} (any digits around the comma) returns FALSE
3455 * if we're in a pattern and the first char is a [
3456 * [] returns FALSE
3457 * [SOMETHING] has a funky algorithm to decide whether it's a
3458 * character class or not. It has to deal with things like
3459 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3460 * anything else returns TRUE
3461 */
3462
9cbb5ea2
GS
3463/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3464
76e3520e 3465STATIC int
cea2e8a9 3466S_intuit_more(pTHX_ register char *s)
79072805 3467{
97aff369 3468 dVAR;
7918f24d
NC
3469
3470 PERL_ARGS_ASSERT_INTUIT_MORE;
3471
3280af22 3472 if (PL_lex_brackets)
79072805
LW
3473 return TRUE;
3474 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3475 return TRUE;
3476 if (*s != '{' && *s != '[')
3477 return FALSE;
3280af22 3478 if (!PL_lex_inpat)
79072805
LW
3479 return TRUE;
3480
3481 /* In a pattern, so maybe we have {n,m}. */
3482 if (*s == '{') {
3483 s++;
3484 if (!isDIGIT(*s))
3485 return TRUE;
3486 while (isDIGIT(*s))
3487 s++;
3488 if (*s == ',')
3489 s++;
3490 while (isDIGIT(*s))
3491 s++;
3492 if (*s == '}')
3493 return FALSE;
3494 return TRUE;
3495
3496 }
3497
3498 /* On the other hand, maybe we have a character class */
3499
3500 s++;
3501 if (*s == ']' || *s == '^')
3502 return FALSE;
3503 else {
ffb4593c 3504 /* this is terrifying, and it works */
79072805
LW
3505 int weight = 2; /* let's weigh the evidence */
3506 char seen[256];
f27ffc4a 3507 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3508 const char * const send = strchr(s,']');
3280af22 3509 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3510
3511 if (!send) /* has to be an expression */
3512 return TRUE;
3513
3514 Zero(seen,256,char);
3515 if (*s == '$')
3516 weight -= 3;
3517 else if (isDIGIT(*s)) {
3518 if (s[1] != ']') {
3519 if (isDIGIT(s[1]) && s[2] == ']')
3520 weight -= 10;
3521 }
3522 else
3523 weight -= 100;
3524 }
3525 for (; s < send; s++) {
3526 last_un_char = un_char;
3527 un_char = (unsigned char)*s;
3528 switch (*s) {
3529 case '@':
3530 case '&':
3531 case '$':
3532 weight -= seen[un_char] * 10;
7e2040f0 3533 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3534 int len;
8903cb82 3535 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3536 len = (int)strlen(tmpbuf);
3537 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3538 weight -= 100;
3539 else
3540 weight -= 10;
3541 }
3542 else if (*s == '$' && s[1] &&
93a17b20
LW
3543 strchr("[#!%*<>()-=",s[1])) {
3544 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3545 weight -= 10;
3546 else
3547 weight -= 1;
3548 }
3549 break;
3550 case '\\':
3551 un_char = 254;
3552 if (s[1]) {
93a17b20 3553 if (strchr("wds]",s[1]))
79072805 3554 weight += 100;
10edeb5d 3555 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3556 weight += 1;
93a17b20 3557 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3558 weight += 40;
3559 else if (isDIGIT(s[1])) {
3560 weight += 40;
3561 while (s[1] && isDIGIT(s[1]))
3562 s++;
3563 }
3564 }
3565 else
3566 weight += 100;
3567 break;
3568 case '-':
3569 if (s[1] == '\\')
3570 weight += 50;
93a17b20 3571 if (strchr("aA01! ",last_un_char))
79072805 3572 weight += 30;
93a17b20 3573 if (strchr("zZ79~",s[1]))
79072805 3574 weight += 30;
f27ffc4a
GS
3575 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3576 weight -= 5; /* cope with negative subscript */
79072805
LW
3577 break;
3578 default:
3792a11b
NC
3579 if (!isALNUM(last_un_char)
3580 && !(last_un_char == '$' || last_un_char == '@'
3581 || last_un_char == '&')
3582 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3583 char *d = tmpbuf;
3584 while (isALPHA(*s))
3585 *d++ = *s++;
3586 *d = '\0';
5458a98a 3587 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3588 weight -= 150;
3589 }
3590 if (un_char == last_un_char + 1)
3591 weight += 5;
3592 weight -= seen[un_char];
3593 break;
3594 }
3595 seen[un_char]++;
3596 }
3597 if (weight >= 0) /* probably a character class */
3598 return FALSE;
3599 }
3600
3601 return TRUE;
3602}
ffed7fef 3603
ffb4593c
NT
3604/*
3605 * S_intuit_method
3606 *
3607 * Does all the checking to disambiguate
3608 * foo bar
3609 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3610 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3611 *
3612 * First argument is the stuff after the first token, e.g. "bar".
3613 *
3614 * Not a method if bar is a filehandle.
3615 * Not a method if foo is a subroutine prototyped to take a filehandle.
3616 * Not a method if it's really "Foo $bar"
3617 * Method if it's "foo $bar"
3618 * Not a method if it's really "print foo $bar"
3619 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3620 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3621 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3622 * =>
3623 */
3624
76e3520e 3625STATIC int
62d55b22 3626S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3627{
97aff369 3628 dVAR;
a0d0e21e 3629 char *s = start + (*start == '$');
3280af22 3630 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3631 STRLEN len;
3632 GV* indirgv;
5db06880
NC
3633#ifdef PERL_MAD
3634 int soff;
3635#endif
a0d0e21e 3636
7918f24d
NC
3637 PERL_ARGS_ASSERT_INTUIT_METHOD;
3638
a0d0e21e 3639 if (gv) {
62d55b22 3640 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3641 return 0;
62d55b22
NC
3642 if (cv) {
3643 if (SvPOK(cv)) {
3644 const char *proto = SvPVX_const(cv);
3645 if (proto) {
3646 if (*proto == ';')
3647 proto++;
3648 if (*proto == '*')
3649 return 0;
3650 }
b6c543e3
IZ
3651 }
3652 } else
c35e046a 3653 gv = NULL;
a0d0e21e 3654 }
8903cb82 3655 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3656 /* start is the beginning of the possible filehandle/object,
3657 * and s is the end of it
3658 * tmpbuf is a copy of it
3659 */
3660
a0d0e21e 3661 if (*start == '$') {
3ef1310e
RGS
3662 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3663 isUPPER(*PL_tokenbuf))
a0d0e21e 3664 return 0;
5db06880
NC
3665#ifdef PERL_MAD
3666 len = start - SvPVX(PL_linestr);
3667#endif
29595ff2 3668 s = PEEKSPACE(s);
f0092767 3669#ifdef PERL_MAD
5db06880
NC
3670 start = SvPVX(PL_linestr) + len;
3671#endif
3280af22
NIS
3672 PL_bufptr = start;
3673 PL_expect = XREF;
a0d0e21e
LW
3674 return *s == '(' ? FUNCMETH : METHOD;
3675 }
5458a98a 3676 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3677 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3678 len -= 2;
3679 tmpbuf[len] = '\0';
5db06880
NC
3680#ifdef PERL_MAD
3681 soff = s - SvPVX(PL_linestr);
3682#endif
c3e0f903
GS
3683 goto bare_package;
3684 }
90e5519e 3685 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3686 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3687 return 0;
3688 /* filehandle or package name makes it a method */
da51bb9b 3689 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3690#ifdef PERL_MAD
3691 soff = s - SvPVX(PL_linestr);
3692#endif
29595ff2 3693 s = PEEKSPACE(s);
3280af22 3694 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3695 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3696 bare_package:
cd81e915 3697 start_force(PL_curforce);
9ded7720 3698 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3699 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3700 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3701 if (PL_madskills)
3702 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3703 PL_expect = XTERM;
a0d0e21e 3704 force_next(WORD);
3280af22 3705 PL_bufptr = s;
5db06880
NC
3706#ifdef PERL_MAD
3707 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3708#endif
a0d0e21e
LW
3709 return *s == '(' ? FUNCMETH : METHOD;
3710 }
3711 }
3712 return 0;
3713}
3714
16d20bd9 3715/* Encoded script support. filter_add() effectively inserts a
4e553d73 3716 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3717 * Note that the filter function only applies to the current source file
3718 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3719 *
3720 * The datasv parameter (which may be NULL) can be used to pass
3721 * private data to this instance of the filter. The filter function
3722 * can recover the SV using the FILTER_DATA macro and use it to
3723 * store private buffers and state information.
3724 *
3725 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3726 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3727 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3728 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3729 * private use must be set using malloc'd pointers.
3730 */
16d20bd9
AD
3731
3732SV *
864dbfa3 3733Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3734{
97aff369 3735 dVAR;
f4c556ac 3736 if (!funcp)
a0714e2c 3737 return NULL;
f4c556ac 3738
5486870f
DM
3739 if (!PL_parser)
3740 return NULL;
3741
3280af22
NIS
3742 if (!PL_rsfp_filters)
3743 PL_rsfp_filters = newAV();
16d20bd9 3744 if (!datasv)
561b68a9 3745 datasv = newSV(0);
862a34c6 3746 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3747 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3748 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3749 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3750 FPTR2DPTR(void *, IoANY(datasv)),
3751 SvPV_nolen(datasv)));
3280af22
NIS
3752 av_unshift(PL_rsfp_filters, 1);
3753 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3754 return(datasv);
3755}
4e553d73 3756
16d20bd9
AD
3757
3758/* Delete most recently added instance of this filter function. */
a0d0e21e 3759void
864dbfa3 3760Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3761{
97aff369 3762 dVAR;
e0c19803 3763 SV *datasv;
24801a4b 3764
7918f24d
NC
3765 PERL_ARGS_ASSERT_FILTER_DEL;
3766
33073adb 3767#ifdef DEBUGGING
55662e27
JH
3768 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3769 FPTR2DPTR(void*, funcp)));
33073adb 3770#endif
5486870f 3771 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3772 return;
3773 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3774 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3775 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3776 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3777
16d20bd9
AD
3778 return;
3779 }
3780 /* we need to search for the correct entry and clear it */
cea2e8a9 3781 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3782}
3783
3784
1de9afcd
RGS
3785/* Invoke the idxth filter function for the current rsfp. */
3786/* maxlen 0 = read one text line */
16d20bd9 3787I32
864dbfa3 3788Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3789{
97aff369 3790 dVAR;
16d20bd9
AD
3791 filter_t funcp;
3792 SV *datasv = NULL;
f482118e
NC
3793 /* This API is bad. It should have been using unsigned int for maxlen.
3794 Not sure if we want to change the API, but if not we should sanity
3795 check the value here. */
39cd7a59
NC
3796 const unsigned int correct_length
3797 = maxlen < 0 ?
3798#ifdef PERL_MICRO
3799 0x7FFFFFFF
3800#else
3801 INT_MAX
3802#endif
3803 : maxlen;
e50aee73 3804
7918f24d
NC
3805 PERL_ARGS_ASSERT_FILTER_READ;
3806
5486870f 3807 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3808 return -1;
1de9afcd 3809 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3810 /* Provide a default input filter to make life easy. */
3811 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3812 DEBUG_P(PerlIO_printf(Perl_debug_log,
3813 "filter_read %d: from rsfp\n", idx));
f482118e 3814 if (correct_length) {
16d20bd9
AD
3815 /* Want a block */
3816 int len ;
f54cb97a 3817 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3818
3819 /* ensure buf_sv is large enough */
881d8f0a 3820 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3821 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3822 correct_length)) <= 0) {
3280af22 3823 if (PerlIO_error(PL_rsfp))
37120919
AD
3824 return -1; /* error */
3825 else
3826 return 0 ; /* end of file */
3827 }
16d20bd9 3828 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3829 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3830 } else {
3831 /* Want a line */
3280af22
NIS
3832 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3833 if (PerlIO_error(PL_rsfp))
37120919
AD
3834 return -1; /* error */
3835 else
3836 return 0 ; /* end of file */
3837 }
16d20bd9
AD
3838 }
3839 return SvCUR(buf_sv);
3840 }
3841 /* Skip this filter slot if filter has been deleted */
1de9afcd 3842 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3843 DEBUG_P(PerlIO_printf(Perl_debug_log,
3844 "filter_read %d: skipped (filter deleted)\n",
3845 idx));
f482118e 3846 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3847 }
3848 /* Get function pointer hidden within datasv */
8141890a 3849 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3850 DEBUG_P(PerlIO_printf(Perl_debug_log,
3851 "filter_read %d: via function %p (%s)\n",
ca0270c4 3852 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3853 /* Call function. The function is expected to */
3854 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3855 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3856 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3857}
3858
76e3520e 3859STATIC char *
5cc814fd 3860S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3861{
97aff369 3862 dVAR;
7918f24d
NC
3863
3864 PERL_ARGS_ASSERT_FILTER_GETS;
3865
c39cd008 3866#ifdef PERL_CR_FILTER
3280af22 3867 if (!PL_rsfp_filters) {
c39cd008 3868 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3869 }
3870#endif
3280af22 3871 if (PL_rsfp_filters) {
55497cff 3872 if (!append)
3873 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3874 if (FILTER_READ(0, sv, 0) > 0)
3875 return ( SvPVX(sv) ) ;
3876 else
bd61b366 3877 return NULL ;
16d20bd9 3878 }
9d116dd7 3879 else
5cc814fd 3880 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3881}
3882
01ec43d0 3883STATIC HV *
9bde8eb0 3884S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3885{
97aff369 3886 dVAR;
def3634b
GS
3887 GV *gv;
3888
7918f24d
NC
3889 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3890
01ec43d0 3891 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3892 return PL_curstash;
3893
3894 if (len > 2 &&
3895 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3896 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3897 {
3898 return GvHV(gv); /* Foo:: */
def3634b
GS
3899 }
3900
3901 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3902 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3903 if (gv && GvCV(gv)) {
3904 SV * const sv = cv_const_sv(GvCV(gv));
3905 if (sv)
9bde8eb0 3906 pkgname = SvPV_const(sv, len);
def3634b
GS
3907 }
3908
9bde8eb0 3909 return gv_stashpvn(pkgname, len, 0);
def3634b 3910}
a0d0e21e 3911
e3f73d4e
RGS
3912/*
3913 * S_readpipe_override
3914 * Check whether readpipe() is overriden, and generates the appropriate
3915 * optree, provided sublex_start() is called afterwards.
3916 */
3917STATIC void
1d51329b 3918S_readpipe_override(pTHX)
e3f73d4e
RGS
3919{
3920 GV **gvp;
3921 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3922 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3923 if ((gv_readpipe
3924 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3925 ||
3926 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3927 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3928 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3929 {
3930 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3931 append_elem(OP_LIST,
3932 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3933 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3934 }
e3f73d4e
RGS
3935}
3936
5db06880
NC
3937#ifdef PERL_MAD
3938 /*
3939 * Perl_madlex
3940 * The intent of this yylex wrapper is to minimize the changes to the
3941 * tokener when we aren't interested in collecting madprops. It remains
3942 * to be seen how successful this strategy will be...
3943 */
3944
3945int
3946Perl_madlex(pTHX)
3947{
3948 int optype;
3949 char *s = PL_bufptr;
3950
cd81e915
NC
3951 /* make sure PL_thiswhite is initialized */
3952 PL_thiswhite = 0;
3953 PL_thismad = 0;
5db06880 3954
cd81e915 3955 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3956 if (PL_pending_ident)
3957 return S_pending_ident(aTHX);
3958
3959 /* previous token ate up our whitespace? */
cd81e915
NC
3960 if (!PL_lasttoke && PL_nextwhite) {
3961 PL_thiswhite = PL_nextwhite;
3962 PL_nextwhite = 0;
5db06880
NC
3963 }
3964
3965 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3966 PL_realtokenstart = -1;
3967 PL_thistoken = 0;
5db06880
NC
3968 optype = yylex();
3969 s = PL_bufptr;
cd81e915 3970 assert(PL_curforce < 0);
5db06880 3971
cd81e915
NC
3972 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3973 if (!PL_thistoken) {
3974 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3975 PL_thistoken = newSVpvs("");
5db06880 3976 else {
c35e046a 3977 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3978 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3979 }
3980 }
cd81e915
NC
3981 if (PL_thismad) /* install head */
3982 CURMAD('X', PL_thistoken);
5db06880
NC
3983 }
3984
3985 /* last whitespace of a sublex? */
cd81e915
NC
3986 if (optype == ')' && PL_endwhite) {
3987 CURMAD('X', PL_endwhite);
5db06880
NC
3988 }
3989
cd81e915 3990 if (!PL_thismad) {
5db06880
NC
3991
3992 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3993 if (!PL_thiswhite && !PL_endwhite && !optype) {
3994 sv_free(PL_thistoken);
3995 PL_thistoken = 0;
5db06880
NC
3996 return 0;
3997 }
3998
3999 /* put off final whitespace till peg */
4000 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4001 PL_nextwhite = PL_thiswhite;
4002 PL_thiswhite = 0;
5db06880 4003 }
cd81e915
NC
4004 else if (PL_thisopen) {
4005 CURMAD('q', PL_thisopen);
4006 if (PL_thistoken)
4007 sv_free(PL_thistoken);
4008 PL_thistoken = 0;
5db06880
NC
4009 }
4010 else {
4011 /* Store actual token text as madprop X */
cd81e915 4012 CURMAD('X', PL_thistoken);
5db06880
NC
4013 }
4014
cd81e915 4015 if (PL_thiswhite) {
5db06880 4016 /* add preceding whitespace as madprop _ */
cd81e915 4017 CURMAD('_', PL_thiswhite);
5db06880
NC
4018 }
4019
cd81e915 4020 if (PL_thisstuff) {
5db06880 4021 /* add quoted material as madprop = */
cd81e915 4022 CURMAD('=', PL_thisstuff);
5db06880
NC
4023 }
4024
cd81e915 4025 if (PL_thisclose) {
5db06880 4026 /* add terminating quote as madprop Q */
cd81e915 4027 CURMAD('Q', PL_thisclose);
5db06880
NC
4028 }
4029 }
4030
4031 /* special processing based on optype */
4032
4033 switch (optype) {
4034
4035 /* opval doesn't need a TOKEN since it can already store mp */
4036 case WORD:
4037 case METHOD:
4038 case FUNCMETH:
4039 case THING:
4040 case PMFUNC:
4041 case PRIVATEREF:
4042 case FUNC0SUB:
4043 case UNIOPSUB:
4044 case LSTOPSUB:
6154021b
RGS
4045 if (pl_yylval.opval)
4046 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4047 PL_thismad = 0;
5db06880
NC
4048 return optype;
4049
4050 /* fake EOF */
4051 case 0:
4052 optype = PEG;
cd81e915
NC
4053 if (PL_endwhite) {
4054 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4055 PL_endwhite = 0;
5db06880
NC
4056 }
4057 break;
4058
4059 case ']':
4060 case '}':
cd81e915 4061 if (PL_faketokens)
5db06880
NC
4062 break;
4063 /* remember any fake bracket that lexer is about to discard */
4064 if (PL_lex_brackets == 1 &&
4065 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4066 {
4067 s = PL_bufptr;
4068 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4069 s++;
4070 if (*s == '}') {
cd81e915
NC
4071 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4072 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4073 PL_thiswhite = 0;
5db06880
NC
4074 PL_bufptr = s - 1;
4075 break; /* don't bother looking for trailing comment */
4076 }
4077 else
4078 s = PL_bufptr;
4079 }
4080 if (optype == ']')
4081 break;
4082 /* FALLTHROUGH */
4083
4084 /* attach a trailing comment to its statement instead of next token */
4085 case ';':
cd81e915 4086 if (PL_faketokens)
5db06880
NC
4087 break;
4088 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4089 s = PL_bufptr;
4090 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4091 s++;
4092 if (*s == '\n' || *s == '#') {
4093 while (s < PL_bufend && *s != '\n')
4094 s++;
4095 if (s < PL_bufend)
4096 s++;
cd81e915
NC
4097 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4098 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4099 PL_thiswhite = 0;
5db06880
NC
4100 PL_bufptr = s;
4101 }
4102 }
4103 break;
4104
4105 /* pval */
4106 case LABEL:
4107 break;
4108
4109 /* ival */
4110 default:
4111 break;
4112
4113 }
4114
4115 /* Create new token struct. Note: opvals return early above. */
6154021b 4116 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4117 PL_thismad = 0;
5db06880
NC
4118 return optype;
4119}
4120#endif
4121
468aa647 4122STATIC char *
cc6ed77d 4123S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4124 dVAR;
7918f24d
NC
4125
4126 PERL_ARGS_ASSERT_TOKENIZE_USE;
4127
468aa647
RGS
4128 if (PL_expect != XSTATE)
4129 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4130 is_use ? "use" : "no"));
29595ff2 4131 s = SKIPSPACE1(s);
468aa647
RGS
4132 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4133 s = force_version(s, TRUE);
17c59fdf
VP
4134 if (*s == ';' || *s == '}'
4135 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4136 start_force(PL_curforce);
9ded7720 4137 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4138 force_next(WORD);
4139 }
4140 else if (*s == 'v') {
4141 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4142 s = force_version(s, FALSE);
4143 }
4144 }
4145 else {
4146 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4147 s = force_version(s, FALSE);
4148 }
6154021b 4149 pl_yylval.ival = is_use;
468aa647
RGS
4150 return s;
4151}
748a9306 4152#ifdef DEBUGGING
27da23d5 4153 static const char* const exp_name[] =
09bef843 4154 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4155 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4156 };
748a9306 4157#endif
463ee0b2 4158
02aa26ce
NT
4159/*
4160 yylex
4161
4162 Works out what to call the token just pulled out of the input
4163 stream. The yacc parser takes care of taking the ops we return and
4164 stitching them into a tree.
4165
4166 Returns:
4167 PRIVATEREF
4168
4169 Structure:
4170 if read an identifier
4171 if we're in a my declaration
4172 croak if they tried to say my($foo::bar)
4173 build the ops for a my() declaration
4174 if it's an access to a my() variable
4175 are we in a sort block?
4176 croak if my($a); $a <=> $b
4177 build ops for access to a my() variable
4178 if in a dq string, and they've said @foo and we can't find @foo
4179 croak
4180 build ops for a bareword
4181 if we already built the token before, use it.
4182*/
4183
20141f0e 4184
dba4d153
JH
4185#ifdef __SC__
4186#pragma segment Perl_yylex
4187#endif
dba4d153 4188int
dba4d153 4189Perl_yylex(pTHX)
20141f0e 4190{
97aff369 4191 dVAR;
3afc138a 4192 register char *s = PL_bufptr;
378cc40b 4193 register char *d;
463ee0b2 4194 STRLEN len;
aa7440fb 4195 bool bof = FALSE;
580561a3 4196 U32 fake_eof = 0;
a687059c 4197
10edeb5d
JH
4198 /* orig_keyword, gvp, and gv are initialized here because
4199 * jump to the label just_a_word_zero can bypass their
4200 * initialization later. */
4201 I32 orig_keyword = 0;
4202 GV *gv = NULL;
4203 GV **gvp = NULL;
4204
bbf60fe6 4205 DEBUG_T( {
396482e1 4206 SV* tmp = newSVpvs("");
b6007c36
DM
4207 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4208 (IV)CopLINE(PL_curcop),
4209 lex_state_names[PL_lex_state],
4210 exp_name[PL_expect],
4211 pv_display(tmp, s, strlen(s), 0, 60));
4212 SvREFCNT_dec(tmp);
bbf60fe6 4213 } );
02aa26ce 4214 /* check if there's an identifier for us to look at */
ba979b31 4215 if (PL_pending_ident)
bbf60fe6 4216 return REPORT(S_pending_ident(aTHX));
bbce6d69 4217
02aa26ce
NT
4218 /* no identifier pending identification */
4219
3280af22 4220 switch (PL_lex_state) {
79072805
LW
4221#ifdef COMMENTARY
4222 case LEX_NORMAL: /* Some compilers will produce faster */
4223 case LEX_INTERPNORMAL: /* code if we comment these out. */
4224 break;
4225#endif
4226
09bef843 4227 /* when we've already built the next token, just pull it out of the queue */
79072805 4228 case LEX_KNOWNEXT:
5db06880
NC
4229#ifdef PERL_MAD
4230 PL_lasttoke--;
6154021b 4231 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4232 if (PL_madskills) {
cd81e915 4233 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4234 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4235 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4236 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4237 PL_thismad->mad_val = 0;
4238 mad_free(PL_thismad);
4239 PL_thismad = 0;
5db06880
NC
4240 }
4241 }
4242 if (!PL_lasttoke) {
4243 PL_lex_state = PL_lex_defer;
4244 PL_expect = PL_lex_expect;
4245 PL_lex_defer = LEX_NORMAL;
4246 if (!PL_nexttoke[PL_lasttoke].next_type)
4247 return yylex();
4248 }
4249#else
3280af22 4250 PL_nexttoke--;
6154021b 4251 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4252 if (!PL_nexttoke) {
4253 PL_lex_state = PL_lex_defer;
4254 PL_expect = PL_lex_expect;
4255 PL_lex_defer = LEX_NORMAL;
463ee0b2 4256 }
5db06880
NC
4257#endif
4258#ifdef PERL_MAD
4259 /* FIXME - can these be merged? */
4260 return(PL_nexttoke[PL_lasttoke].next_type);
4261#else
bbf60fe6 4262 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4263#endif
79072805 4264
02aa26ce 4265 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4266 when we get here, PL_bufptr is at the \
02aa26ce 4267 */
79072805
LW
4268 case LEX_INTERPCASEMOD:
4269#ifdef DEBUGGING
3280af22 4270 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4271 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4272#endif
02aa26ce 4273 /* handle \E or end of string */
3280af22 4274 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4275 /* if at a \E */
3280af22 4276 if (PL_lex_casemods) {
f54cb97a 4277 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4278 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4279
3792a11b
NC
4280 if (PL_bufptr != PL_bufend
4281 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4282 PL_bufptr += 2;
4283 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4284#ifdef PERL_MAD
4285 if (PL_madskills)
6b29d1f5 4286 PL_thistoken = newSVpvs("\\E");
5db06880 4287#endif
a0d0e21e 4288 }
bbf60fe6 4289 return REPORT(')');
79072805 4290 }
5db06880
NC
4291#ifdef PERL_MAD
4292 while (PL_bufptr != PL_bufend &&
4293 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4294 if (!PL_thiswhite)
6b29d1f5 4295 PL_thiswhite = newSVpvs("");
cd81e915 4296 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4297 PL_bufptr += 2;
4298 }
4299#else
3280af22
NIS
4300 if (PL_bufptr != PL_bufend)
4301 PL_bufptr += 2;
5db06880 4302#endif
3280af22 4303 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4304 return yylex();
79072805
LW
4305 }
4306 else {
607df283 4307 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4308 "### Saw case modifier\n"); });
3280af22 4309 s = PL_bufptr + 1;
6e909404 4310 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4311#ifdef PERL_MAD
cd81e915 4312 if (!PL_thiswhite)
6b29d1f5 4313 PL_thiswhite = newSVpvs("");
cd81e915 4314 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4315#endif
89122651 4316 PL_bufptr = s + 3;
6e909404
JH
4317 PL_lex_state = LEX_INTERPCONCAT;
4318 return yylex();
a0d0e21e 4319 }
6e909404 4320 else {
90771dc0 4321 I32 tmp;
5db06880
NC
4322 if (!PL_madskills) /* when just compiling don't need correct */
4323 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4324 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4325 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4326 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4327 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4328 return REPORT(')');
6e909404
JH
4329 }
4330 if (PL_lex_casemods > 10)
4331 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4332 PL_lex_casestack[PL_lex_casemods++] = *s;
4333 PL_lex_casestack[PL_lex_casemods] = '\0';
4334 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4335 start_force(PL_curforce);
9ded7720 4336 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4337 force_next('(');
cd81e915 4338 start_force(PL_curforce);
6e909404 4339 if (*s == 'l')
9ded7720 4340 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4341 else if (*s == 'u')
9ded7720 4342 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4343 else if (*s == 'L')
9ded7720 4344 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4345 else if (*s == 'U')
9ded7720 4346 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4347 else if (*s == 'Q')
9ded7720 4348 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4349 else
4350 Perl_croak(aTHX_ "panic: yylex");
5db06880 4351 if (PL_madskills) {
a5849ce5
NC
4352 SV* const tmpsv = newSVpvs("\\ ");
4353 /* replace the space with the character we want to escape
4354 */
4355 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4356 curmad('_', tmpsv);
4357 }
6e909404 4358 PL_bufptr = s + 1;
a0d0e21e 4359 }
79072805 4360 force_next(FUNC);
3280af22
NIS
4361 if (PL_lex_starts) {
4362 s = PL_bufptr;
4363 PL_lex_starts = 0;
5db06880
NC
4364#ifdef PERL_MAD
4365 if (PL_madskills) {
cd81e915
NC
4366 if (PL_thistoken)
4367 sv_free(PL_thistoken);
6b29d1f5 4368 PL_thistoken = newSVpvs("");
5db06880
NC
4369 }
4370#endif
131b3ad0
DM
4371 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4372 if (PL_lex_casemods == 1 && PL_lex_inpat)
4373 OPERATOR(',');
4374 else
4375 Aop(OP_CONCAT);
79072805
LW
4376 }
4377 else
cea2e8a9 4378 return yylex();
79072805
LW
4379 }
4380
55497cff 4381 case LEX_INTERPPUSH:
bbf60fe6 4382 return REPORT(sublex_push());
55497cff 4383
79072805 4384 case LEX_INTERPSTART:
3280af22 4385 if (PL_bufptr == PL_bufend)
bbf60fe6 4386 return REPORT(sublex_done());
607df283 4387 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4388 "### Interpolated variable\n"); });
3280af22
NIS
4389 PL_expect = XTERM;
4390 PL_lex_dojoin = (*PL_bufptr == '@');
4391 PL_lex_state = LEX_INTERPNORMAL;
4392 if (PL_lex_dojoin) {
cd81e915 4393 start_force(PL_curforce);
9ded7720 4394 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4395 force_next(',');
cd81e915 4396 start_force(PL_curforce);
a0d0e21e 4397 force_ident("\"", '$');
cd81e915 4398 start_force(PL_curforce);
9ded7720 4399 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4400 force_next('$');
cd81e915 4401 start_force(PL_curforce);
9ded7720 4402 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4403 force_next('(');
cd81e915 4404 start_force(PL_curforce);
9ded7720 4405 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4406 force_next(FUNC);
4407 }
3280af22
NIS
4408 if (PL_lex_starts++) {
4409 s = PL_bufptr;
5db06880
NC
4410#ifdef PERL_MAD
4411 if (PL_madskills) {
cd81e915
NC
4412 if (PL_thistoken)
4413 sv_free(PL_thistoken);
6b29d1f5 4414 PL_thistoken = newSVpvs("");
5db06880
NC
4415 }
4416#endif
131b3ad0
DM
4417 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4418 if (!PL_lex_casemods && PL_lex_inpat)
4419 OPERATOR(',');
4420 else
4421 Aop(OP_CONCAT);
79072805 4422 }
cea2e8a9 4423 return yylex();
79072805
LW
4424
4425 case LEX_INTERPENDMAYBE:
3280af22
NIS
4426 if (intuit_more(PL_bufptr)) {
4427 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4428 break;
4429 }
4430 /* FALL THROUGH */
4431
4432 case LEX_INTERPEND:
3280af22
NIS
4433 if (PL_lex_dojoin) {
4434 PL_lex_dojoin = FALSE;
4435 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4436#ifdef PERL_MAD
4437 if (PL_madskills) {
cd81e915
NC
4438 if (PL_thistoken)
4439 sv_free(PL_thistoken);
6b29d1f5 4440 PL_thistoken = newSVpvs("");
5db06880
NC
4441 }
4442#endif
bbf60fe6 4443 return REPORT(')');
79072805 4444 }
43a16006 4445 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4446 && SvEVALED(PL_lex_repl))
43a16006 4447 {
e9fa98b2 4448 if (PL_bufptr != PL_bufend)
cea2e8a9 4449 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4450 PL_lex_repl = NULL;
e9fa98b2 4451 }
79072805
LW
4452 /* FALLTHROUGH */
4453 case LEX_INTERPCONCAT:
4454#ifdef DEBUGGING
3280af22 4455 if (PL_lex_brackets)
cea2e8a9 4456 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4457#endif
3280af22 4458 if (PL_bufptr == PL_bufend)
bbf60fe6 4459 return REPORT(sublex_done());
79072805 4460
3280af22
NIS
4461 if (SvIVX(PL_linestr) == '\'') {
4462 SV *sv = newSVsv(PL_linestr);
4463 if (!PL_lex_inpat)
76e3520e 4464 sv = tokeq(sv);
3280af22 4465 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4466 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4467 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4468 s = PL_bufend;
79072805
LW
4469 }
4470 else {
3280af22 4471 s = scan_const(PL_bufptr);
79072805 4472 if (*s == '\\')
3280af22 4473 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4474 else
3280af22 4475 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4476 }
4477
3280af22 4478 if (s != PL_bufptr) {
cd81e915 4479 start_force(PL_curforce);
5db06880
NC
4480 if (PL_madskills) {
4481 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4482 }
6154021b 4483 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4484 PL_expect = XTERM;
79072805 4485 force_next(THING);
131b3ad0 4486 if (PL_lex_starts++) {
5db06880
NC
4487#ifdef PERL_MAD
4488 if (PL_madskills) {
cd81e915
NC
4489 if (PL_thistoken)
4490 sv_free(PL_thistoken);
6b29d1f5 4491 PL_thistoken = newSVpvs("");
5db06880
NC
4492 }
4493#endif
131b3ad0
DM
4494 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495 if (!PL_lex_casemods && PL_lex_inpat)
4496 OPERATOR(',');
4497 else
4498 Aop(OP_CONCAT);
4499 }
79072805 4500 else {
3280af22 4501 PL_bufptr = s;
cea2e8a9 4502 return yylex();
79072805
LW
4503 }
4504 }
4505
cea2e8a9 4506 return yylex();
a0d0e21e 4507 case LEX_FORMLINE:
3280af22
NIS
4508 PL_lex_state = LEX_NORMAL;
4509 s = scan_formline(PL_bufptr);
4510 if (!PL_lex_formbrack)
a0d0e21e
LW
4511 goto rightbracket;
4512 OPERATOR(';');
79072805
LW
4513 }
4514
3280af22
NIS
4515 s = PL_bufptr;
4516 PL_oldoldbufptr = PL_oldbufptr;
4517 PL_oldbufptr = s;
463ee0b2
LW
4518
4519 retry:
5db06880 4520#ifdef PERL_MAD
cd81e915
NC
4521 if (PL_thistoken) {
4522 sv_free(PL_thistoken);
4523 PL_thistoken = 0;
5db06880 4524 }
cd81e915 4525 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4526#endif
378cc40b
LW
4527 switch (*s) {
4528 default:
7e2040f0 4529 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4530 goto keylookup;
b1fc3636
CJ
4531 {
4532 unsigned char c = *s;
4533 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4534 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4535 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4536 } else {
4537 d = PL_linestart;
4538 }
4539 *s = '\0';
4540 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4541 }
e929a76b
LW
4542 case 4:
4543 case 26:
4544 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4545 case 0:
5db06880
NC
4546#ifdef PERL_MAD
4547 if (PL_madskills)
cd81e915 4548 PL_faketokens = 0;
5db06880 4549#endif
3280af22
NIS
4550 if (!PL_rsfp) {
4551 PL_last_uni = 0;
4552 PL_last_lop = 0;
c5ee2135 4553 if (PL_lex_brackets) {
10edeb5d
JH
4554 yyerror((const char *)
4555 (PL_lex_formbrack
4556 ? "Format not terminated"
4557 : "Missing right curly or square bracket"));
c5ee2135 4558 }
4e553d73 4559 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4560 "### Tokener got EOF\n");
5f80b19c 4561 } );
79072805 4562 TOKEN(0);
463ee0b2 4563 }
3280af22 4564 if (s++ < PL_bufend)
a687059c 4565 goto retry; /* ignore stray nulls */
3280af22
NIS
4566 PL_last_uni = 0;
4567 PL_last_lop = 0;
4568 if (!PL_in_eval && !PL_preambled) {
4569 PL_preambled = TRUE;
5db06880
NC
4570#ifdef PERL_MAD
4571 if (PL_madskills)
cd81e915 4572 PL_faketokens = 1;
5db06880 4573#endif
5ab7ff98
NC
4574 if (PL_perldb) {
4575 /* Generate a string of Perl code to load the debugger.
4576 * If PERL5DB is set, it will return the contents of that,
4577 * otherwise a compile-time require of perl5db.pl. */
4578
4579 const char * const pdb = PerlEnv_getenv("PERL5DB");
4580
4581 if (pdb) {
4582 sv_setpv(PL_linestr, pdb);
4583 sv_catpvs(PL_linestr,";");
4584 } else {
4585 SETERRNO(0,SS_NORMAL);
4586 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4587 }
4588 } else
4589 sv_setpvs(PL_linestr,"");
c62eb204
NC
4590 if (PL_preambleav) {
4591 SV **svp = AvARRAY(PL_preambleav);
4592 SV **const end = svp + AvFILLp(PL_preambleav);
4593 while(svp <= end) {
4594 sv_catsv(PL_linestr, *svp);
4595 ++svp;
396482e1 4596 sv_catpvs(PL_linestr, ";");
91b7def8 4597 }
daba3364 4598 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4599 PL_preambleav = NULL;
91b7def8 4600 }
9f639728
FR
4601 if (PL_minus_E)
4602 sv_catpvs(PL_linestr,
4603 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4604 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4605 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4606 if (PL_minus_l)
396482e1 4607 sv_catpvs(PL_linestr,"chomp;");
3280af22 4608 if (PL_minus_a) {
3280af22 4609 if (PL_minus_F) {
3792a11b
NC
4610 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4611 || *PL_splitstr == '"')
3280af22 4612 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4613 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4614 else {
c8ef6a4b
NC
4615 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4616 bytes can be used as quoting characters. :-) */
dd374669 4617 const char *splits = PL_splitstr;
91d456ae 4618 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4619 do {
4620 /* Need to \ \s */
dd374669
AL
4621 if (*splits == '\\')
4622 sv_catpvn(PL_linestr, splits, 1);
4623 sv_catpvn(PL_linestr, splits, 1);
4624 } while (*splits++);
48c4c863
NC
4625 /* This loop will embed the trailing NUL of
4626 PL_linestr as the last thing it does before
4627 terminating. */
396482e1 4628 sv_catpvs(PL_linestr, ");");
54310121 4629 }
2304df62
AD
4630 }
4631 else
396482e1 4632 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4633 }
79072805 4634 }
396482e1 4635 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4636 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4637 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4638 PL_last_lop = PL_last_uni = NULL;
65269a95 4639 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4640 update_debugger_info(PL_linestr, NULL, 0);
79072805 4641 goto retry;
a687059c 4642 }
e929a76b 4643 do {
580561a3
Z
4644 fake_eof = 0;
4645 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4646 if (0) {
7e28d3af 4647 fake_eof:
f0e67a1d
Z
4648 fake_eof = LEX_FAKE_EOF;
4649 }
4650 PL_bufptr = PL_bufend;
17cc9359 4651 CopLINE_inc(PL_curcop);
f0e67a1d 4652 if (!lex_next_chunk(fake_eof)) {
17cc9359 4653 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4654 s = PL_bufptr;
4655 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4656 }
17cc9359 4657 CopLINE_dec(PL_curcop);
5db06880 4658#ifdef PERL_MAD
f0e67a1d 4659 if (!PL_rsfp)
cd81e915 4660 PL_realtokenstart = -1;
5db06880 4661#endif
f0e67a1d 4662 s = PL_bufptr;
7aa207d6
JH
4663 /* If it looks like the start of a BOM or raw UTF-16,
4664 * check if it in fact is. */
580561a3 4665 if (bof && PL_rsfp &&
7aa207d6
JH
4666 (*s == 0 ||
4667 *(U8*)s == 0xEF ||
4668 *(U8*)s >= 0xFE ||
4669 s[1] == 0)) {
eb160463 4670 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4671 if (bof) {
3280af22 4672 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4673 s = swallow_bom((U8*)s);
e929a76b 4674 }
378cc40b 4675 }
3280af22 4676 if (PL_doextract) {
a0d0e21e 4677 /* Incest with pod. */
5db06880
NC
4678#ifdef PERL_MAD
4679 if (PL_madskills)
cd81e915 4680 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4681#endif
01a57ef7 4682 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4683 sv_setpvs(PL_linestr, "");
3280af22
NIS
4684 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4685 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4686 PL_last_lop = PL_last_uni = NULL;
3280af22 4687 PL_doextract = FALSE;
a0d0e21e 4688 }
4e553d73 4689 }
85613cab
Z
4690 if (PL_rsfp)
4691 incline(s);
3280af22
NIS
4692 } while (PL_doextract);
4693 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4694 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4695 PL_last_lop = PL_last_uni = NULL;
57843af0 4696 if (CopLINE(PL_curcop) == 1) {
3280af22 4697 while (s < PL_bufend && isSPACE(*s))
79072805 4698 s++;
a0d0e21e 4699 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4700 s++;
5db06880
NC
4701#ifdef PERL_MAD
4702 if (PL_madskills)
cd81e915 4703 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4704#endif
bd61b366 4705 d = NULL;
3280af22 4706 if (!PL_in_eval) {
44a8e56a 4707 if (*s == '#' && *(s+1) == '!')
4708 d = s + 2;
4709#ifdef ALTERNATE_SHEBANG
4710 else {
bfed75c6 4711 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4712 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4713 d = s + (sizeof(as) - 1);
4714 }
4715#endif /* ALTERNATE_SHEBANG */
4716 }
4717 if (d) {
b8378b72 4718 char *ipath;
774d564b 4719 char *ipathend;
b8378b72 4720
774d564b 4721 while (isSPACE(*d))
b8378b72
CS
4722 d++;
4723 ipath = d;
774d564b 4724 while (*d && !isSPACE(*d))
4725 d++;
4726 ipathend = d;
4727
4728#ifdef ARG_ZERO_IS_SCRIPT
4729 if (ipathend > ipath) {
4730 /*
4731 * HP-UX (at least) sets argv[0] to the script name,
4732 * which makes $^X incorrect. And Digital UNIX and Linux,
4733 * at least, set argv[0] to the basename of the Perl
4734 * interpreter. So, having found "#!", we'll set it right.
4735 */
fafc274c
NC
4736 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4737 SVt_PV)); /* $^X */
774d564b 4738 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4739 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4740 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4741 SvSETMAGIC(x);
4742 }
556c1dec
JH
4743 else {
4744 STRLEN blen;
4745 STRLEN llen;
cfd0369c 4746 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4747 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4748 if (llen < blen) {
4749 bstart += blen - llen;
4750 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4751 sv_setpvn(x, ipath, ipathend - ipath);
4752 SvSETMAGIC(x);
4753 }
4754 }
4755 }
774d564b 4756 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4757 }
774d564b 4758#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4759
4760 /*
4761 * Look for options.
4762 */
748a9306 4763 d = instr(s,"perl -");
84e30d1a 4764 if (!d) {
748a9306 4765 d = instr(s,"perl");
84e30d1a
GS
4766#if defined(DOSISH)
4767 /* avoid getting into infinite loops when shebang
4768 * line contains "Perl" rather than "perl" */
4769 if (!d) {
4770 for (d = ipathend-4; d >= ipath; --d) {
4771 if ((*d == 'p' || *d == 'P')
4772 && !ibcmp(d, "perl", 4))
4773 {
4774 break;
4775 }
4776 }
4777 if (d < ipath)
bd61b366 4778 d = NULL;
84e30d1a
GS
4779 }
4780#endif
4781 }
44a8e56a 4782#ifdef ALTERNATE_SHEBANG
4783 /*
4784 * If the ALTERNATE_SHEBANG on this system starts with a
4785 * character that can be part of a Perl expression, then if
4786 * we see it but not "perl", we're probably looking at the
4787 * start of Perl code, not a request to hand off to some
4788 * other interpreter. Similarly, if "perl" is there, but
4789 * not in the first 'word' of the line, we assume the line
4790 * contains the start of the Perl program.
44a8e56a 4791 */
4792 if (d && *s != '#') {
f54cb97a 4793 const char *c = ipath;
44a8e56a 4794 while (*c && !strchr("; \t\r\n\f\v#", *c))
4795 c++;
4796 if (c < d)
bd61b366 4797 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4798 else
4799 *s = '#'; /* Don't try to parse shebang line */
4800 }
774d564b 4801#endif /* ALTERNATE_SHEBANG */
748a9306 4802 if (!d &&
44a8e56a 4803 *s == '#' &&
774d564b 4804 ipathend > ipath &&
3280af22 4805 !PL_minus_c &&
748a9306 4806 !instr(s,"indir") &&
3280af22 4807 instr(PL_origargv[0],"perl"))
748a9306 4808 {
27da23d5 4809 dVAR;
9f68db38 4810 char **newargv;
9f68db38 4811
774d564b 4812 *ipathend = '\0';
4813 s = ipathend + 1;
3280af22 4814 while (s < PL_bufend && isSPACE(*s))
9f68db38 4815 s++;
3280af22 4816 if (s < PL_bufend) {
d85f917e 4817 Newx(newargv,PL_origargc+3,char*);
9f68db38 4818 newargv[1] = s;
3280af22 4819 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4820 s++;
4821 *s = '\0';
3280af22 4822 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4823 }
4824 else
3280af22 4825 newargv = PL_origargv;
774d564b 4826 newargv[0] = ipath;
b35112e7 4827 PERL_FPU_PRE_EXEC
b4748376 4828 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4829 PERL_FPU_POST_EXEC
cea2e8a9 4830 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4831 }
748a9306 4832 if (d) {
c35e046a
AL
4833 while (*d && !isSPACE(*d))
4834 d++;
4835 while (SPACE_OR_TAB(*d))
4836 d++;
748a9306
LW
4837
4838 if (*d++ == '-') {
f54cb97a 4839 const bool switches_done = PL_doswitches;
fb993905
GA
4840 const U32 oldpdb = PL_perldb;
4841 const bool oldn = PL_minus_n;
4842 const bool oldp = PL_minus_p;
c7030b81 4843 const char *d1 = d;
fb993905 4844
8cc95fdb 4845 do {
4ba71d51
FC
4846 bool baduni = FALSE;
4847 if (*d1 == 'C') {
bd0ab00d
NC
4848 const char *d2 = d1 + 1;
4849 if (parse_unicode_opts((const char **)&d2)
4850 != PL_unicode)
4851 baduni = TRUE;
4ba71d51
FC
4852 }
4853 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4854 const char * const m = d1;
4855 while (*d1 && !isSPACE(*d1))
4856 d1++;
cea2e8a9 4857 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4858 (int)(d1 - m), m);
8cc95fdb 4859 }
c7030b81
NC
4860 d1 = moreswitches(d1);
4861 } while (d1);
f0b2cf55
YST
4862 if (PL_doswitches && !switches_done) {
4863 int argc = PL_origargc;
4864 char **argv = PL_origargv;
4865 do {
4866 argc--,argv++;
4867 } while (argc && argv[0][0] == '-' && argv[0][1]);
4868 init_argv_symbols(argc,argv);
4869 }
65269a95 4870 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4871 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4872 /* if we have already added "LINE: while (<>) {",
4873 we must not do it again */
748a9306 4874 {
76f68e9b 4875 sv_setpvs(PL_linestr, "");
3280af22
NIS
4876 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4877 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4878 PL_last_lop = PL_last_uni = NULL;
3280af22 4879 PL_preambled = FALSE;
65269a95 4880 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4881 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4882 goto retry;
4883 }
a0d0e21e 4884 }
79072805 4885 }
9f68db38 4886 }
79072805 4887 }
3280af22
NIS
4888 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4889 PL_bufptr = s;
4890 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4891 return yylex();
ae986130 4892 }
378cc40b 4893 goto retry;
4fdae800 4894 case '\r':
6a27c188 4895#ifdef PERL_STRICT_CR
cea2e8a9 4896 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4897 Perl_croak(aTHX_
cc507455 4898 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4899#endif
4fdae800 4900 case ' ': case '\t': case '\f': case 013:
5db06880 4901#ifdef PERL_MAD
cd81e915 4902 PL_realtokenstart = -1;
ac372eb8
RD
4903 if (!PL_thiswhite)
4904 PL_thiswhite = newSVpvs("");
4905 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4906#endif
ac372eb8 4907 s++;
378cc40b 4908 goto retry;
378cc40b 4909 case '#':
e929a76b 4910 case '\n':
5db06880 4911#ifdef PERL_MAD
cd81e915 4912 PL_realtokenstart = -1;
5db06880 4913 if (PL_madskills)
cd81e915 4914 PL_faketokens = 0;
5db06880 4915#endif
3280af22 4916 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4917 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4918 /* handle eval qq[#line 1 "foo"\n ...] */
4919 CopLINE_dec(PL_curcop);
4920 incline(s);
4921 }
5db06880
NC
4922 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4923 s = SKIPSPACE0(s);
4924 if (!PL_in_eval || PL_rsfp)
4925 incline(s);
4926 }
4927 else {
4928 d = s;
4929 while (d < PL_bufend && *d != '\n')
4930 d++;
4931 if (d < PL_bufend)
4932 d++;
4933 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4934 Perl_croak(aTHX_ "panic: input overflow");
4935#ifdef PERL_MAD
4936 if (PL_madskills)
cd81e915 4937 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4938#endif
4939 s = d;
4940 incline(s);
4941 }
3280af22
NIS
4942 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4943 PL_bufptr = s;
4944 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4945 return yylex();
a687059c 4946 }
378cc40b 4947 }
a687059c 4948 else {
5db06880
NC
4949#ifdef PERL_MAD
4950 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4951 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4952 PL_faketokens = 0;
5db06880
NC
4953 s = SKIPSPACE0(s);
4954 TOKEN(PEG); /* make sure any #! line is accessible */
4955 }
4956 s = SKIPSPACE0(s);
4957 }
4958 else {
4959/* if (PL_madskills && PL_lex_formbrack) { */
4960 d = s;
4961 while (d < PL_bufend && *d != '\n')
4962 d++;
4963 if (d < PL_bufend)
4964 d++;
4965 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4966 Perl_croak(aTHX_ "panic: input overflow");
4967 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4968 if (!PL_thiswhite)
6b29d1f5 4969 PL_thiswhite = newSVpvs("");
5db06880 4970 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4971 sv_setpvs(PL_thiswhite, "");
cd81e915 4972 PL_faketokens = 0;
5db06880 4973 }
cd81e915 4974 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4975 }
4976 s = d;
4977/* }
4978 *s = '\0';
4979 PL_bufend = s; */
4980 }
4981#else
378cc40b 4982 *s = '\0';
3280af22 4983 PL_bufend = s;
5db06880 4984#endif
a687059c 4985 }
378cc40b
LW
4986 goto retry;
4987 case '-':
79072805 4988 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4989 I32 ftst = 0;
90771dc0 4990 char tmp;
e5edeb50 4991
378cc40b 4992 s++;
3280af22 4993 PL_bufptr = s;
748a9306
LW
4994 tmp = *s++;
4995
bf4acbe4 4996 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4997 s++;
4998
4999 if (strnEQ(s,"=>",2)) {
3280af22 5000 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5001 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5002 OPERATOR('-'); /* unary minus */
5003 }
3280af22 5004 PL_last_uni = PL_oldbufptr;
748a9306 5005 switch (tmp) {
e5edeb50
JH
5006 case 'r': ftst = OP_FTEREAD; break;
5007 case 'w': ftst = OP_FTEWRITE; break;
5008 case 'x': ftst = OP_FTEEXEC; break;
5009 case 'o': ftst = OP_FTEOWNED; break;
5010 case 'R': ftst = OP_FTRREAD; break;
5011 case 'W': ftst = OP_FTRWRITE; break;
5012 case 'X': ftst = OP_FTREXEC; break;
5013 case 'O': ftst = OP_FTROWNED; break;
5014 case 'e': ftst = OP_FTIS; break;
5015 case 'z': ftst = OP_FTZERO; break;
5016 case 's': ftst = OP_FTSIZE; break;
5017 case 'f': ftst = OP_FTFILE; break;
5018 case 'd': ftst = OP_FTDIR; break;
5019 case 'l': ftst = OP_FTLINK; break;
5020 case 'p': ftst = OP_FTPIPE; break;
5021 case 'S': ftst = OP_FTSOCK; break;
5022 case 'u': ftst = OP_FTSUID; break;
5023 case 'g': ftst = OP_FTSGID; break;
5024 case 'k': ftst = OP_FTSVTX; break;
5025 case 'b': ftst = OP_FTBLK; break;
5026 case 'c': ftst = OP_FTCHR; break;
5027 case 't': ftst = OP_FTTTY; break;
5028 case 'T': ftst = OP_FTTEXT; break;
5029 case 'B': ftst = OP_FTBINARY; break;
5030 case 'M': case 'A': case 'C':
fafc274c 5031 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5032 switch (tmp) {
5033 case 'M': ftst = OP_FTMTIME; break;
5034 case 'A': ftst = OP_FTATIME; break;
5035 case 'C': ftst = OP_FTCTIME; break;
5036 default: break;
5037 }
5038 break;
378cc40b 5039 default:
378cc40b
LW
5040 break;
5041 }
e5edeb50 5042 if (ftst) {
eb160463 5043 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5044 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5045 "### Saw file test %c\n", (int)tmp);
5f80b19c 5046 } );
e5edeb50
JH
5047 FTST(ftst);
5048 }
5049 else {
5050 /* Assume it was a minus followed by a one-letter named
5051 * subroutine call (or a -bareword), then. */
95c31fe3 5052 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5053 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5054 (int) tmp);
5f80b19c 5055 } );
3cf7b4c4 5056 s = --PL_bufptr;
e5edeb50 5057 }
378cc40b 5058 }
90771dc0
NC
5059 {
5060 const char tmp = *s++;
5061 if (*s == tmp) {
5062 s++;
5063 if (PL_expect == XOPERATOR)
5064 TERM(POSTDEC);
5065 else
5066 OPERATOR(PREDEC);
5067 }
5068 else if (*s == '>') {
5069 s++;
29595ff2 5070 s = SKIPSPACE1(s);
90771dc0
NC
5071 if (isIDFIRST_lazy_if(s,UTF)) {
5072 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5073 TOKEN(ARROW);
5074 }
5075 else if (*s == '$')
5076 OPERATOR(ARROW);
5077 else
5078 TERM(ARROW);
5079 }
3280af22 5080 if (PL_expect == XOPERATOR)
90771dc0
NC
5081 Aop(OP_SUBTRACT);
5082 else {
5083 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5084 check_uni();
5085 OPERATOR('-'); /* unary minus */
79072805 5086 }
2f3197b3 5087 }
79072805 5088
378cc40b 5089 case '+':
90771dc0
NC
5090 {
5091 const char tmp = *s++;
5092 if (*s == tmp) {
5093 s++;
5094 if (PL_expect == XOPERATOR)
5095 TERM(POSTINC);
5096 else
5097 OPERATOR(PREINC);
5098 }
3280af22 5099 if (PL_expect == XOPERATOR)
90771dc0
NC
5100 Aop(OP_ADD);
5101 else {
5102 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5103 check_uni();
5104 OPERATOR('+');
5105 }
2f3197b3 5106 }
a687059c 5107
378cc40b 5108 case '*':
3280af22
NIS
5109 if (PL_expect != XOPERATOR) {
5110 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5111 PL_expect = XOPERATOR;
5112 force_ident(PL_tokenbuf, '*');
5113 if (!*PL_tokenbuf)
a0d0e21e 5114 PREREF('*');
79072805 5115 TERM('*');
a687059c 5116 }
79072805
LW
5117 s++;
5118 if (*s == '*') {
a687059c 5119 s++;
79072805 5120 PWop(OP_POW);
a687059c 5121 }
79072805
LW
5122 Mop(OP_MULTIPLY);
5123
378cc40b 5124 case '%':
3280af22 5125 if (PL_expect == XOPERATOR) {
bbce6d69 5126 ++s;
5127 Mop(OP_MODULO);
a687059c 5128 }
3280af22 5129 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5130 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5131 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5132 if (!PL_tokenbuf[1]) {
bbce6d69 5133 PREREF('%');
a687059c 5134 }
3280af22 5135 PL_pending_ident = '%';
bbce6d69 5136 TERM('%');
a687059c 5137
378cc40b 5138 case '^':
79072805 5139 s++;
a0d0e21e 5140 BOop(OP_BIT_XOR);
79072805 5141 case '[':
3280af22 5142 PL_lex_brackets++;
df3467db
IG
5143 {
5144 const char tmp = *s++;
5145 OPERATOR(tmp);
5146 }
378cc40b 5147 case '~':
0d863452 5148 if (s[1] == '~'
3e7dd34d 5149 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5150 {
5151 s += 2;
5152 Eop(OP_SMARTMATCH);
5153 }
378cc40b 5154 case ',':
90771dc0
NC
5155 {
5156 const char tmp = *s++;
5157 OPERATOR(tmp);
5158 }
a0d0e21e
LW
5159 case ':':
5160 if (s[1] == ':') {
5161 len = 0;
0bfa2a8a 5162 goto just_a_word_zero_gv;
a0d0e21e
LW
5163 }
5164 s++;
09bef843
SB
5165 switch (PL_expect) {
5166 OP *attrs;
5db06880
NC
5167#ifdef PERL_MAD
5168 I32 stuffstart;
5169#endif
09bef843
SB
5170 case XOPERATOR:
5171 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5172 break;
5173 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5174 if (*s == '=') {
5175 deprecate(":= for an empty attribute list");
5176 }
09bef843
SB
5177 goto grabattrs;
5178 case XATTRBLOCK:
5179 PL_expect = XBLOCK;
5180 goto grabattrs;
5181 case XATTRTERM:
5182 PL_expect = XTERMBLOCK;
5183 grabattrs:
5db06880
NC
5184#ifdef PERL_MAD
5185 stuffstart = s - SvPVX(PL_linestr) - 1;
5186#endif
29595ff2 5187 s = PEEKSPACE(s);
5f66b61c 5188 attrs = NULL;
7e2040f0 5189 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5190 I32 tmp;
5cc237b8 5191 SV *sv;
09bef843 5192 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5193 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5194 if (tmp < 0) tmp = -tmp;
5195 switch (tmp) {
5196 case KEY_or:
5197 case KEY_and:
5198 case KEY_for:
11baf631 5199 case KEY_foreach:
f9829d6b
GS
5200 case KEY_unless:
5201 case KEY_if:
5202 case KEY_while:
5203 case KEY_until:
5204 goto got_attrs;
5205 default:
5206 break;
5207 }
5208 }
5cc237b8 5209 sv = newSVpvn(s, len);
09bef843
SB
5210 if (*d == '(') {
5211 d = scan_str(d,TRUE,TRUE);
5212 if (!d) {
09bef843
SB
5213 /* MUST advance bufptr here to avoid bogus
5214 "at end of line" context messages from yyerror().
5215 */
5216 PL_bufptr = s + len;
5217 yyerror("Unterminated attribute parameter in attribute list");
5218 if (attrs)
5219 op_free(attrs);
5cc237b8 5220 sv_free(sv);
bbf60fe6 5221 return REPORT(0); /* EOF indicator */
09bef843
SB
5222 }
5223 }
5224 if (PL_lex_stuff) {
09bef843
SB
5225 sv_catsv(sv, PL_lex_stuff);
5226 attrs = append_elem(OP_LIST, attrs,
5227 newSVOP(OP_CONST, 0, sv));
5228 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5229 PL_lex_stuff = NULL;
09bef843
SB
5230 }
5231 else {
5cc237b8
BS
5232 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5233 sv_free(sv);
1108974d 5234 if (PL_in_my == KEY_our) {
df9a6019 5235 deprecate(":unique");
1108974d 5236 }
bfed75c6 5237 else
371fce9b
DM
5238 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5239 }
5240
d3cea301
SB
5241 /* NOTE: any CV attrs applied here need to be part of
5242 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5243 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5244 sv_free(sv);
78f9721b 5245 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5246 }
5247 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5248 sv_free(sv);
8e5dadda 5249 deprecate(":locked");
5cc237b8
BS
5250 }
5251 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5252 sv_free(sv);
78f9721b 5253 CvMETHOD_on(PL_compcv);
5cc237b8 5254 }
78f9721b
SM
5255 /* After we've set the flags, it could be argued that
5256 we don't need to do the attributes.pm-based setting
5257 process, and shouldn't bother appending recognized
d3cea301
SB
5258 flags. To experiment with that, uncomment the
5259 following "else". (Note that's already been
5260 uncommented. That keeps the above-applied built-in
5261 attributes from being intercepted (and possibly
5262 rejected) by a package's attribute routines, but is
5263 justified by the performance win for the common case
5264 of applying only built-in attributes.) */
0256094b 5265 else
78f9721b
SM
5266 attrs = append_elem(OP_LIST, attrs,
5267 newSVOP(OP_CONST, 0,
5cc237b8 5268 sv));
09bef843 5269 }
29595ff2 5270 s = PEEKSPACE(d);
0120eecf 5271 if (*s == ':' && s[1] != ':')
29595ff2 5272 s = PEEKSPACE(s+1);
0120eecf
GS
5273 else if (s == d)
5274 break; /* require real whitespace or :'s */
29595ff2 5275 /* XXX losing whitespace on sequential attributes here */
09bef843 5276 }
90771dc0
NC
5277 {
5278 const char tmp
5279 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5280 if (*s != ';' && *s != '}' && *s != tmp
5281 && (tmp != '=' || *s != ')')) {
5282 const char q = ((*s == '\'') ? '"' : '\'');
5283 /* If here for an expression, and parsed no attrs, back
5284 off. */
5285 if (tmp == '=' && !attrs) {
5286 s = PL_bufptr;
5287 break;
5288 }
5289 /* MUST advance bufptr here to avoid bogus "at end of line"
5290 context messages from yyerror().
5291 */
5292 PL_bufptr = s;
10edeb5d
JH
5293 yyerror( (const char *)
5294 (*s
5295 ? Perl_form(aTHX_ "Invalid separator character "
5296 "%c%c%c in attribute list", q, *s, q)
5297 : "Unterminated attribute list" ) );
90771dc0
NC
5298 if (attrs)
5299 op_free(attrs);
5300 OPERATOR(':');
09bef843 5301 }
09bef843 5302 }
f9829d6b 5303 got_attrs:
09bef843 5304 if (attrs) {
cd81e915 5305 start_force(PL_curforce);
9ded7720 5306 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5307 CURMAD('_', PL_nextwhite);
89122651 5308 force_next(THING);
5db06880
NC
5309 }
5310#ifdef PERL_MAD
5311 if (PL_madskills) {
cd81e915 5312 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5313 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5314 }
5db06880 5315#endif
09bef843
SB
5316 TOKEN(COLONATTR);
5317 }
a0d0e21e 5318 OPERATOR(':');
8990e307
LW
5319 case '(':
5320 s++;
3280af22
NIS
5321 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5322 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5323 else
3280af22 5324 PL_expect = XTERM;
29595ff2 5325 s = SKIPSPACE1(s);
a0d0e21e 5326 TOKEN('(');
378cc40b 5327 case ';':
f4dd75d9 5328 CLINE;
90771dc0
NC
5329 {
5330 const char tmp = *s++;
5331 OPERATOR(tmp);
5332 }
378cc40b 5333 case ')':
90771dc0
NC
5334 {
5335 const char tmp = *s++;
29595ff2 5336 s = SKIPSPACE1(s);
90771dc0
NC
5337 if (*s == '{')
5338 PREBLOCK(tmp);
5339 TERM(tmp);
5340 }
79072805
LW
5341 case ']':
5342 s++;
3280af22 5343 if (PL_lex_brackets <= 0)
d98d5fff 5344 yyerror("Unmatched right square bracket");
463ee0b2 5345 else
3280af22
NIS
5346 --PL_lex_brackets;
5347 if (PL_lex_state == LEX_INTERPNORMAL) {
5348 if (PL_lex_brackets == 0) {
02255c60
FC
5349 if (*s == '-' && s[1] == '>')
5350 PL_lex_state = LEX_INTERPENDMAYBE;
5351 else if (*s != '[' && *s != '{')
3280af22 5352 PL_lex_state = LEX_INTERPEND;
79072805
LW
5353 }
5354 }
4633a7c4 5355 TERM(']');
79072805
LW
5356 case '{':
5357 leftbracket:
79072805 5358 s++;
3280af22 5359 if (PL_lex_brackets > 100) {
8edd5f42 5360 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5361 }
3280af22 5362 switch (PL_expect) {
a0d0e21e 5363 case XTERM:
3280af22 5364 if (PL_lex_formbrack) {
a0d0e21e
LW
5365 s--;
5366 PRETERMBLOCK(DO);
5367 }
3280af22
NIS
5368 if (PL_oldoldbufptr == PL_last_lop)
5369 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5370 else
3280af22 5371 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5372 OPERATOR(HASHBRACK);
a0d0e21e 5373 case XOPERATOR:
bf4acbe4 5374 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5375 s++;
44a8e56a 5376 d = s;
3280af22
NIS
5377 PL_tokenbuf[0] = '\0';
5378 if (d < PL_bufend && *d == '-') {
5379 PL_tokenbuf[0] = '-';
44a8e56a 5380 d++;
bf4acbe4 5381 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5382 d++;
5383 }
7e2040f0 5384 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5385 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5386 FALSE, &len);
bf4acbe4 5387 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5388 d++;
5389 if (*d == '}') {
f54cb97a 5390 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5391 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5392 if (minus)
5393 force_next('-');
748a9306
LW
5394 }
5395 }
5396 /* FALL THROUGH */
09bef843 5397 case XATTRBLOCK:
748a9306 5398 case XBLOCK:
3280af22
NIS
5399 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5400 PL_expect = XSTATE;
a0d0e21e 5401 break;
09bef843 5402 case XATTRTERM:
a0d0e21e 5403 case XTERMBLOCK:
3280af22
NIS
5404 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5405 PL_expect = XSTATE;
a0d0e21e
LW
5406 break;
5407 default: {
f54cb97a 5408 const char *t;
3280af22
NIS
5409 if (PL_oldoldbufptr == PL_last_lop)
5410 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5411 else
3280af22 5412 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5413 s = SKIPSPACE1(s);
8452ff4b
SB
5414 if (*s == '}') {
5415 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5416 PL_expect = XTERM;
5417 /* This hack is to get the ${} in the message. */
5418 PL_bufptr = s+1;
5419 yyerror("syntax error");
5420 break;
5421 }
a0d0e21e 5422 OPERATOR(HASHBRACK);
8452ff4b 5423 }
b8a4b1be
GS
5424 /* This hack serves to disambiguate a pair of curlies
5425 * as being a block or an anon hash. Normally, expectation
5426 * determines that, but in cases where we're not in a
5427 * position to expect anything in particular (like inside
5428 * eval"") we have to resolve the ambiguity. This code
5429 * covers the case where the first term in the curlies is a
5430 * quoted string. Most other cases need to be explicitly
a0288114 5431 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5432 * curly in order to force resolution as an anon hash.
5433 *
5434 * XXX should probably propagate the outer expectation
5435 * into eval"" to rely less on this hack, but that could
5436 * potentially break current behavior of eval"".
5437 * GSAR 97-07-21
5438 */
5439 t = s;
5440 if (*s == '\'' || *s == '"' || *s == '`') {
5441 /* common case: get past first string, handling escapes */
3280af22 5442 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5443 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5444 t++;
5445 t++;
a0d0e21e 5446 }
b8a4b1be 5447 else if (*s == 'q') {
3280af22 5448 if (++t < PL_bufend
b8a4b1be 5449 && (!isALNUM(*t)
3280af22 5450 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5451 && !isALNUM(*t))))
5452 {
abc667d1 5453 /* skip q//-like construct */
f54cb97a 5454 const char *tmps;
b8a4b1be
GS
5455 char open, close, term;
5456 I32 brackets = 1;
5457
3280af22 5458 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5459 t++;
abc667d1
DM
5460 /* check for q => */
5461 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5462 OPERATOR(HASHBRACK);
5463 }
b8a4b1be
GS
5464 term = *t;
5465 open = term;
5466 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5467 term = tmps[5];
5468 close = term;
5469 if (open == close)
3280af22
NIS
5470 for (t++; t < PL_bufend; t++) {
5471 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5472 t++;
6d07e5e9 5473 else if (*t == open)
b8a4b1be
GS
5474 break;
5475 }
abc667d1 5476 else {
3280af22
NIS
5477 for (t++; t < PL_bufend; t++) {
5478 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5479 t++;
6d07e5e9 5480 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5481 break;
5482 else if (*t == open)
5483 brackets++;
5484 }
abc667d1
DM
5485 }
5486 t++;
b8a4b1be 5487 }
abc667d1
DM
5488 else
5489 /* skip plain q word */
5490 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5491 t += UTF8SKIP(t);
a0d0e21e 5492 }
7e2040f0 5493 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5494 t += UTF8SKIP(t);
7e2040f0 5495 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5496 t += UTF8SKIP(t);
a0d0e21e 5497 }
3280af22 5498 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5499 t++;
b8a4b1be
GS
5500 /* if comma follows first term, call it an anon hash */
5501 /* XXX it could be a comma expression with loop modifiers */
3280af22 5502 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5503 || (*t == '=' && t[1] == '>')))
a0d0e21e 5504 OPERATOR(HASHBRACK);
3280af22 5505 if (PL_expect == XREF)
4e4e412b 5506 PL_expect = XTERM;
a0d0e21e 5507 else {
3280af22
NIS
5508 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5509 PL_expect = XSTATE;
a0d0e21e 5510 }
8990e307 5511 }
a0d0e21e 5512 break;
463ee0b2 5513 }
6154021b 5514 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5515 if (isSPACE(*s) || *s == '#')
3280af22 5516 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5517 TOKEN('{');
378cc40b 5518 case '}':
79072805
LW
5519 rightbracket:
5520 s++;
3280af22 5521 if (PL_lex_brackets <= 0)
d98d5fff 5522 yyerror("Unmatched right curly bracket");
463ee0b2 5523 else
3280af22 5524 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5525 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5526 PL_lex_formbrack = 0;
5527 if (PL_lex_state == LEX_INTERPNORMAL) {
5528 if (PL_lex_brackets == 0) {
9059aa12
LW
5529 if (PL_expect & XFAKEBRACK) {
5530 PL_expect &= XENUMMASK;
3280af22
NIS
5531 PL_lex_state = LEX_INTERPEND;
5532 PL_bufptr = s;
5db06880
NC
5533#if 0
5534 if (PL_madskills) {
cd81e915 5535 if (!PL_thiswhite)
6b29d1f5 5536 PL_thiswhite = newSVpvs("");
76f68e9b 5537 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5538 }
5539#endif
cea2e8a9 5540 return yylex(); /* ignore fake brackets */
79072805 5541 }
fa83b5b6 5542 if (*s == '-' && s[1] == '>')
3280af22 5543 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5544 else if (*s != '[' && *s != '{')
3280af22 5545 PL_lex_state = LEX_INTERPEND;
79072805
LW
5546 }
5547 }
9059aa12
LW
5548 if (PL_expect & XFAKEBRACK) {
5549 PL_expect &= XENUMMASK;
3280af22 5550 PL_bufptr = s;
cea2e8a9 5551 return yylex(); /* ignore fake brackets */
748a9306 5552 }
cd81e915 5553 start_force(PL_curforce);
5db06880
NC
5554 if (PL_madskills) {
5555 curmad('X', newSVpvn(s-1,1));
cd81e915 5556 CURMAD('_', PL_thiswhite);
5db06880 5557 }
79072805 5558 force_next('}');
5db06880 5559#ifdef PERL_MAD
cd81e915 5560 if (!PL_thistoken)
6b29d1f5 5561 PL_thistoken = newSVpvs("");
5db06880 5562#endif
79072805 5563 TOKEN(';');
378cc40b
LW
5564 case '&':
5565 s++;
90771dc0 5566 if (*s++ == '&')
a0d0e21e 5567 AOPERATOR(ANDAND);
378cc40b 5568 s--;
3280af22 5569 if (PL_expect == XOPERATOR) {
041457d9
DM
5570 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5571 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5572 {
57843af0 5573 CopLINE_dec(PL_curcop);
f1f66076 5574 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5575 CopLINE_inc(PL_curcop);
463ee0b2 5576 }
79072805 5577 BAop(OP_BIT_AND);
463ee0b2 5578 }
79072805 5579
3280af22
NIS
5580 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5581 if (*PL_tokenbuf) {
5582 PL_expect = XOPERATOR;
5583 force_ident(PL_tokenbuf, '&');
463ee0b2 5584 }
79072805
LW
5585 else
5586 PREREF('&');
6154021b 5587 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5588 TERM('&');
5589
378cc40b
LW
5590 case '|':
5591 s++;
90771dc0 5592 if (*s++ == '|')
a0d0e21e 5593 AOPERATOR(OROR);
378cc40b 5594 s--;
79072805 5595 BOop(OP_BIT_OR);
378cc40b
LW
5596 case '=':
5597 s++;
748a9306 5598 {
90771dc0
NC
5599 const char tmp = *s++;
5600 if (tmp == '=')
5601 Eop(OP_EQ);
5602 if (tmp == '>')
5603 OPERATOR(',');
5604 if (tmp == '~')
5605 PMop(OP_MATCH);
5606 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5607 && strchr("+-*/%.^&|<",tmp))
5608 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5609 "Reversed %c= operator",(int)tmp);
5610 s--;
5611 if (PL_expect == XSTATE && isALPHA(tmp) &&
5612 (s == PL_linestart+1 || s[-2] == '\n') )
5613 {
5614 if (PL_in_eval && !PL_rsfp) {
5615 d = PL_bufend;
5616 while (s < d) {
5617 if (*s++ == '\n') {
5618 incline(s);
5619 if (strnEQ(s,"=cut",4)) {
5620 s = strchr(s,'\n');
5621 if (s)
5622 s++;
5623 else
5624 s = d;
5625 incline(s);
5626 goto retry;
5627 }
5628 }
a5f75d66 5629 }
90771dc0 5630 goto retry;
a5f75d66 5631 }
5db06880
NC
5632#ifdef PERL_MAD
5633 if (PL_madskills) {
cd81e915 5634 if (!PL_thiswhite)
6b29d1f5 5635 PL_thiswhite = newSVpvs("");
cd81e915 5636 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5637 PL_bufend - PL_linestart);
5638 }
5639#endif
90771dc0
NC
5640 s = PL_bufend;
5641 PL_doextract = TRUE;
5642 goto retry;
a5f75d66 5643 }
a0d0e21e 5644 }
3280af22 5645 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5646 const char *t = s;
51882d45 5647#ifdef PERL_STRICT_CR
c35e046a 5648 while (SPACE_OR_TAB(*t))
51882d45 5649#else
c35e046a 5650 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5651#endif
c35e046a 5652 t++;
a0d0e21e
LW
5653 if (*t == '\n' || *t == '#') {
5654 s--;
3280af22 5655 PL_expect = XBLOCK;
a0d0e21e
LW
5656 goto leftbracket;
5657 }
79072805 5658 }
6154021b 5659 pl_yylval.ival = 0;
a0d0e21e 5660 OPERATOR(ASSIGNOP);
378cc40b
LW
5661 case '!':
5662 s++;
90771dc0
NC
5663 {
5664 const char tmp = *s++;
5665 if (tmp == '=') {
5666 /* was this !=~ where !~ was meant?
5667 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5668
5669 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5670 const char *t = s+1;
5671
5672 while (t < PL_bufend && isSPACE(*t))
5673 ++t;
5674
5675 if (*t == '/' || *t == '?' ||
5676 ((*t == 'm' || *t == 's' || *t == 'y')
5677 && !isALNUM(t[1])) ||
5678 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5679 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5680 "!=~ should be !~");
5681 }
5682 Eop(OP_NE);
5683 }
5684 if (tmp == '~')
5685 PMop(OP_NOT);
5686 }
378cc40b
LW
5687 s--;
5688 OPERATOR('!');
5689 case '<':
3280af22 5690 if (PL_expect != XOPERATOR) {
93a17b20 5691 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5692 check_uni();
79072805
LW
5693 if (s[1] == '<')
5694 s = scan_heredoc(s);
5695 else
5696 s = scan_inputsymbol(s);
5697 TERM(sublex_start());
378cc40b
LW
5698 }
5699 s++;
90771dc0
NC
5700 {
5701 char tmp = *s++;
5702 if (tmp == '<')
5703 SHop(OP_LEFT_SHIFT);
5704 if (tmp == '=') {
5705 tmp = *s++;
5706 if (tmp == '>')
5707 Eop(OP_NCMP);
5708 s--;
5709 Rop(OP_LE);
5710 }
395c3793 5711 }
378cc40b 5712 s--;
79072805 5713 Rop(OP_LT);
378cc40b
LW
5714 case '>':
5715 s++;
90771dc0
NC
5716 {
5717 const char tmp = *s++;
5718 if (tmp == '>')
5719 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5720 else if (tmp == '=')
90771dc0
NC
5721 Rop(OP_GE);
5722 }
378cc40b 5723 s--;
79072805 5724 Rop(OP_GT);
378cc40b
LW
5725
5726 case '$':
bbce6d69 5727 CLINE;
5728
3280af22
NIS
5729 if (PL_expect == XOPERATOR) {
5730 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5731 return deprecate_commaless_var_list();
a0d0e21e 5732 }
8990e307 5733 }
a0d0e21e 5734
c0b977fd 5735 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5736 PL_tokenbuf[0] = '@';
376b8730
SM
5737 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5738 sizeof PL_tokenbuf - 1, FALSE);
5739 if (PL_expect == XOPERATOR)
5740 no_op("Array length", s);
3280af22 5741 if (!PL_tokenbuf[1])
a0d0e21e 5742 PREREF(DOLSHARP);
3280af22
NIS
5743 PL_expect = XOPERATOR;
5744 PL_pending_ident = '#';
463ee0b2 5745 TOKEN(DOLSHARP);
79072805 5746 }
bbce6d69 5747
3280af22 5748 PL_tokenbuf[0] = '$';
376b8730
SM
5749 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5750 sizeof PL_tokenbuf - 1, FALSE);
5751 if (PL_expect == XOPERATOR)
5752 no_op("Scalar", s);
3280af22
NIS
5753 if (!PL_tokenbuf[1]) {
5754 if (s == PL_bufend)
bbce6d69 5755 yyerror("Final $ should be \\$ or $name");
5756 PREREF('$');
8990e307 5757 }
a0d0e21e 5758
bbce6d69 5759 /* This kludge not intended to be bulletproof. */
3280af22 5760 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5761 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5762 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5763 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5764 TERM(THING);
5765 }
5766
ff68c719 5767 d = s;
90771dc0
NC
5768 {
5769 const char tmp = *s;
ae28bb2a 5770 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5771 s = SKIPSPACE1(s);
ff68c719 5772
90771dc0
NC
5773 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5774 && intuit_more(s)) {
5775 if (*s == '[') {
5776 PL_tokenbuf[0] = '@';
5777 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5778 char *t = s+1;
5779
5780 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5781 t++;
90771dc0 5782 if (*t++ == ',') {
29595ff2 5783 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5784 while (t < PL_bufend && *t != ']')
5785 t++;
9014280d 5786 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5787 "Multidimensional syntax %.*s not supported",
36c7798d 5788 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5789 }
748a9306 5790 }
93a17b20 5791 }
90771dc0
NC
5792 else if (*s == '{') {
5793 char *t;
5794 PL_tokenbuf[0] = '%';
5795 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5796 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5797 {
5798 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5799 do {
5800 t++;
5801 } while (isSPACE(*t));
90771dc0 5802 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5803 STRLEN len;
90771dc0 5804 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5805 &len);
c35e046a
AL
5806 while (isSPACE(*t))
5807 t++;
780a5241 5808 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5809 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5810 "You need to quote \"%s\"",
5811 tmpbuf);
5812 }
5813 }
5814 }
93a17b20 5815 }
bbce6d69 5816
90771dc0
NC
5817 PL_expect = XOPERATOR;
5818 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5819 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5820 if (!islop || PL_last_lop_op == OP_GREPSTART)
5821 PL_expect = XOPERATOR;
5822 else if (strchr("$@\"'`q", *s))
5823 PL_expect = XTERM; /* e.g. print $fh "foo" */
5824 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5825 PL_expect = XTERM; /* e.g. print $fh &sub */
5826 else if (isIDFIRST_lazy_if(s,UTF)) {
5827 char tmpbuf[sizeof PL_tokenbuf];
5828 int t2;
5829 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5830 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5831 /* binary operators exclude handle interpretations */
5832 switch (t2) {
5833 case -KEY_x:
5834 case -KEY_eq:
5835 case -KEY_ne:
5836 case -KEY_gt:
5837 case -KEY_lt:
5838 case -KEY_ge:
5839 case -KEY_le:
5840 case -KEY_cmp:
5841 break;
5842 default:
5843 PL_expect = XTERM; /* e.g. print $fh length() */
5844 break;
5845 }
5846 }
5847 else {
5848 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5849 }
5850 }
90771dc0
NC
5851 else if (isDIGIT(*s))
5852 PL_expect = XTERM; /* e.g. print $fh 3 */
5853 else if (*s == '.' && isDIGIT(s[1]))
5854 PL_expect = XTERM; /* e.g. print $fh .3 */
5855 else if ((*s == '?' || *s == '-' || *s == '+')
5856 && !isSPACE(s[1]) && s[1] != '=')
5857 PL_expect = XTERM; /* e.g. print $fh -1 */
5858 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5859 && s[1] != '/')
5860 PL_expect = XTERM; /* e.g. print $fh /.../
5861 XXX except DORDOR operator
5862 */
5863 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5864 && s[2] != '=')
5865 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5866 }
bbce6d69 5867 }
3280af22 5868 PL_pending_ident = '$';
79072805 5869 TOKEN('$');
378cc40b
LW
5870
5871 case '@':
3280af22 5872 if (PL_expect == XOPERATOR)
bbce6d69 5873 no_op("Array", s);
3280af22
NIS
5874 PL_tokenbuf[0] = '@';
5875 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5876 if (!PL_tokenbuf[1]) {
bbce6d69 5877 PREREF('@');
5878 }
3280af22 5879 if (PL_lex_state == LEX_NORMAL)
29595ff2 5880 s = SKIPSPACE1(s);
3280af22 5881 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5882 if (*s == '{')
3280af22 5883 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5884
5885 /* Warn about @ where they meant $. */
041457d9
DM
5886 if (*s == '[' || *s == '{') {
5887 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5888 const char *t = s + 1;
7e2040f0 5889 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5890 t++;
5891 if (*t == '}' || *t == ']') {
5892 t++;
29595ff2 5893 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5894 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5895 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5896 (int)(t-PL_bufptr), PL_bufptr,
5897 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5898 }
93a17b20
LW
5899 }
5900 }
463ee0b2 5901 }
3280af22 5902 PL_pending_ident = '@';
79072805 5903 TERM('@');
378cc40b 5904
c963b151 5905 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5906 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5907 s += 2;
5908 AOPERATOR(DORDOR);
5909 }
c963b151 5910 case '?': /* may either be conditional or pattern */
be25f609 5911 if (PL_expect == XOPERATOR) {
90771dc0 5912 char tmp = *s++;
c963b151 5913 if(tmp == '?') {
be25f609 5914 OPERATOR('?');
c963b151
BD
5915 }
5916 else {
5917 tmp = *s++;
5918 if(tmp == '/') {
5919 /* A // operator. */
5920 AOPERATOR(DORDOR);
5921 }
5922 else {
5923 s--;
5924 Mop(OP_DIVIDE);
5925 }
5926 }
5927 }
5928 else {
5929 /* Disable warning on "study /blah/" */
5930 if (PL_oldoldbufptr == PL_last_uni
5931 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5932 || memNE(PL_last_uni, "study", 5)
5933 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5934 ))
5935 check_uni();
5936 s = scan_pat(s,OP_MATCH);
5937 TERM(sublex_start());
5938 }
378cc40b
LW
5939
5940 case '.':
51882d45
GS
5941 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5942#ifdef PERL_STRICT_CR
5943 && s[1] == '\n'
5944#else
5945 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5946#endif
5947 && (s == PL_linestart || s[-1] == '\n') )
5948 {
3280af22
NIS
5949 PL_lex_formbrack = 0;
5950 PL_expect = XSTATE;
79072805
LW
5951 goto rightbracket;
5952 }
be25f609 5953 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5954 s += 3;
5955 OPERATOR(YADAYADA);
5956 }
3280af22 5957 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5958 char tmp = *s++;
a687059c
LW
5959 if (*s == tmp) {
5960 s++;
2f3197b3
LW
5961 if (*s == tmp) {
5962 s++;
6154021b 5963 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5964 }
5965 else
6154021b 5966 pl_yylval.ival = 0;
378cc40b 5967 OPERATOR(DOTDOT);
a687059c 5968 }
79072805 5969 Aop(OP_CONCAT);
378cc40b
LW
5970 }
5971 /* FALL THROUGH */
5972 case '0': case '1': case '2': case '3': case '4':
5973 case '5': case '6': case '7': case '8': case '9':
6154021b 5974 s = scan_num(s, &pl_yylval);
931e0695 5975 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5976 if (PL_expect == XOPERATOR)
8990e307 5977 no_op("Number",s);
79072805
LW
5978 TERM(THING);
5979
5980 case '\'':
5db06880 5981 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5982 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5983 if (PL_expect == XOPERATOR) {
5984 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5985 return deprecate_commaless_var_list();
a0d0e21e 5986 }
463ee0b2 5987 else
8990e307 5988 no_op("String",s);
463ee0b2 5989 }
79072805 5990 if (!s)
d4c19fe8 5991 missingterm(NULL);
6154021b 5992 pl_yylval.ival = OP_CONST;
79072805
LW
5993 TERM(sublex_start());
5994
5995 case '"':
5db06880 5996 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5997 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5998 if (PL_expect == XOPERATOR) {
5999 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6000 return deprecate_commaless_var_list();
a0d0e21e 6001 }
463ee0b2 6002 else
8990e307 6003 no_op("String",s);
463ee0b2 6004 }
79072805 6005 if (!s)
d4c19fe8 6006 missingterm(NULL);
6154021b 6007 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6008 /* FIXME. I think that this can be const if char *d is replaced by
6009 more localised variables. */
3280af22 6010 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6011 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6012 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6013 break;
6014 }
6015 }
79072805
LW
6016 TERM(sublex_start());
6017
6018 case '`':
5db06880 6019 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6020 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6021 if (PL_expect == XOPERATOR)
8990e307 6022 no_op("Backticks",s);
79072805 6023 if (!s)
d4c19fe8 6024 missingterm(NULL);
9b201d7d 6025 readpipe_override();
79072805
LW
6026 TERM(sublex_start());
6027
6028 case '\\':
6029 s++;
a2a5de95
NC
6030 if (PL_lex_inwhat && isDIGIT(*s))
6031 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6032 *s, *s);
3280af22 6033 if (PL_expect == XOPERATOR)
8990e307 6034 no_op("Backslash",s);
79072805
LW
6035 OPERATOR(REFGEN);
6036
a7cb1f99 6037 case 'v':
e526c9e6 6038 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6039 char *start = s + 2;
dd629d5b 6040 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6041 start++;
6042 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6043 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6044 TERM(THING);
6045 }
e526c9e6 6046 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6047 else if (!isALPHA(*start) && (PL_expect == XTERM
6048 || PL_expect == XREF || PL_expect == XSTATE
6049 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6050 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6051 if (!gv) {
6154021b 6052 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6053 TERM(THING);
6054 }
6055 }
a7cb1f99
GS
6056 }
6057 goto keylookup;
79072805 6058 case 'x':
3280af22 6059 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6060 s++;
6061 Mop(OP_REPEAT);
2f3197b3 6062 }
79072805
LW
6063 goto keylookup;
6064
378cc40b 6065 case '_':
79072805
LW
6066 case 'a': case 'A':
6067 case 'b': case 'B':
6068 case 'c': case 'C':
6069 case 'd': case 'D':
6070 case 'e': case 'E':
6071 case 'f': case 'F':
6072 case 'g': case 'G':
6073 case 'h': case 'H':
6074 case 'i': case 'I':
6075 case 'j': case 'J':
6076 case 'k': case 'K':
6077 case 'l': case 'L':
6078 case 'm': case 'M':
6079 case 'n': case 'N':
6080 case 'o': case 'O':
6081 case 'p': case 'P':
6082 case 'q': case 'Q':
6083 case 'r': case 'R':
6084 case 's': case 'S':
6085 case 't': case 'T':
6086 case 'u': case 'U':
a7cb1f99 6087 case 'V':
79072805
LW
6088 case 'w': case 'W':
6089 case 'X':
6090 case 'y': case 'Y':
6091 case 'z': case 'Z':
6092
49dc05e3 6093 keylookup: {
88e1f1a2 6094 bool anydelim;
90771dc0 6095 I32 tmp;
10edeb5d
JH
6096
6097 orig_keyword = 0;
6098 gv = NULL;
6099 gvp = NULL;
49dc05e3 6100
3280af22
NIS
6101 PL_bufptr = s;
6102 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6103
6104 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6105 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6106 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6107 (PL_tokenbuf[0] == 'q' &&
6108 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6109
6110 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6111 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6112 goto just_a_word;
6113
3643fb5f 6114 d = s;
3280af22 6115 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6116 d++; /* no comments skipped here, or s### is misparsed */
6117
748a9306 6118 /* Is this a word before a => operator? */
1c3923b3 6119 if (*d == '=' && d[1] == '>') {
748a9306 6120 CLINE;
6154021b 6121 pl_yylval.opval
d0a148a6
NC
6122 = (OP*)newSVOP(OP_CONST, 0,
6123 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6124 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6125 TERM(WORD);
6126 }
6127
88e1f1a2
JV
6128 /* Check for plugged-in keyword */
6129 {
6130 OP *o;
6131 int result;
6132 char *saved_bufptr = PL_bufptr;
6133 PL_bufptr = s;
6134 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6135 s = PL_bufptr;
6136 if (result == KEYWORD_PLUGIN_DECLINE) {
6137 /* not a plugged-in keyword */
6138 PL_bufptr = saved_bufptr;
6139 } else if (result == KEYWORD_PLUGIN_STMT) {
6140 pl_yylval.opval = o;
6141 CLINE;
6142 PL_expect = XSTATE;
6143 return REPORT(PLUGSTMT);
6144 } else if (result == KEYWORD_PLUGIN_EXPR) {
6145 pl_yylval.opval = o;
6146 CLINE;
6147 PL_expect = XOPERATOR;
6148 return REPORT(PLUGEXPR);
6149 } else {
6150 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6151 PL_tokenbuf);
6152 }
6153 }
6154
6155 /* Check for built-in keyword */
6156 tmp = keyword(PL_tokenbuf, len, 0);
6157
6158 /* Is this a label? */
6159 if (!anydelim && PL_expect == XSTATE
6160 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6161 s = d + 1;
6162 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6163 CLINE;
6164 TOKEN(LABEL);
6165 }
6166
a0d0e21e 6167 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6168 GV *ogv = NULL; /* override (winner) */
6169 GV *hgv = NULL; /* hidden (loser) */
3280af22 6170 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6171 CV *cv;
90e5519e 6172 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6173 (cv = GvCVu(gv)))
6174 {
6175 if (GvIMPORTED_CV(gv))
6176 ogv = gv;
6177 else if (! CvMETHOD(cv))
6178 hgv = gv;
6179 }
6180 if (!ogv &&
3280af22 6181 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6182 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6183 GvCVu(gv) && GvIMPORTED_CV(gv))
6184 {
6185 ogv = gv;
6186 }
6187 }
6188 if (ogv) {
30fe34ed 6189 orig_keyword = tmp;
56f7f34b 6190 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6191 }
6192 else if (gv && !gvp
6193 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6194 && GvCVu(gv))
6e7b2336
GS
6195 {
6196 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6197 }
56f7f34b
CS
6198 else { /* no override */
6199 tmp = -tmp;
a2a5de95
NC
6200 if (tmp == KEY_dump) {
6201 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6202 "dump() better written as CORE::dump()");
ac206dc8 6203 }
a0714e2c 6204 gv = NULL;
56f7f34b 6205 gvp = 0;
a2a5de95
NC
6206 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6207 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6208 "Ambiguous call resolved as CORE::%s(), "
6209 "qualify as such or use &",
6210 GvENAME(hgv));
49dc05e3 6211 }
a0d0e21e
LW
6212 }
6213
6214 reserved_word:
6215 switch (tmp) {
79072805
LW
6216
6217 default: /* not a keyword */
0bfa2a8a
NC
6218 /* Trade off - by using this evil construction we can pull the
6219 variable gv into the block labelled keylookup. If not, then
6220 we have to give it function scope so that the goto from the
6221 earlier ':' case doesn't bypass the initialisation. */
6222 if (0) {
6223 just_a_word_zero_gv:
6224 gv = NULL;
6225 gvp = NULL;
8bee0991 6226 orig_keyword = 0;
0bfa2a8a 6227 }
93a17b20 6228 just_a_word: {
96e4d5b1 6229 SV *sv;
ce29ac45 6230 int pkgname = 0;
f54cb97a 6231 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6232 OP *rv2cv_op;
5069cc75 6233 CV *cv;
5db06880 6234#ifdef PERL_MAD
cd81e915 6235 SV *nextPL_nextwhite = 0;
5db06880
NC
6236#endif
6237
8990e307
LW
6238
6239 /* Get the rest if it looks like a package qualifier */
6240
155aba94 6241 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6242 STRLEN morelen;
3280af22 6243 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6244 TRUE, &morelen);
6245 if (!morelen)
cea2e8a9 6246 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6247 *s == '\'' ? "'" : "::");
c3e0f903 6248 len += morelen;
ce29ac45 6249 pkgname = 1;
a0d0e21e 6250 }
8990e307 6251
3280af22
NIS
6252 if (PL_expect == XOPERATOR) {
6253 if (PL_bufptr == PL_linestart) {
57843af0 6254 CopLINE_dec(PL_curcop);
f1f66076 6255 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6256 CopLINE_inc(PL_curcop);
463ee0b2
LW
6257 }
6258 else
54310121 6259 no_op("Bareword",s);
463ee0b2 6260 }
8990e307 6261
c3e0f903
GS
6262 /* Look for a subroutine with this name in current package,
6263 unless name is "Foo::", in which case Foo is a bearword
6264 (and a package name). */
6265
5db06880 6266 if (len > 2 && !PL_madskills &&
3280af22 6267 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6268 {
f776e3cd 6269 if (ckWARN(WARN_BAREWORD)
90e5519e 6270 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6271 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6272 "Bareword \"%s\" refers to nonexistent package",
3280af22 6273 PL_tokenbuf);
c3e0f903 6274 len -= 2;
3280af22 6275 PL_tokenbuf[len] = '\0';
a0714e2c 6276 gv = NULL;
c3e0f903
GS
6277 gvp = 0;
6278 }
6279 else {
62d55b22
NC
6280 if (!gv) {
6281 /* Mustn't actually add anything to a symbol table.
6282 But also don't want to "initialise" any placeholder
6283 constants that might already be there into full
6284 blown PVGVs with attached PVCV. */
90e5519e
NC
6285 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6286 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6287 }
b3d904f3 6288 len = 0;
c3e0f903
GS
6289 }
6290
6291 /* if we saw a global override before, get the right name */
8990e307 6292
49dc05e3 6293 if (gvp) {
396482e1 6294 sv = newSVpvs("CORE::GLOBAL::");
3280af22 6295 sv_catpv(sv,PL_tokenbuf);
49dc05e3 6296 }
8a7a129d
NC
6297 else {
6298 /* If len is 0, newSVpv does strlen(), which is correct.
6299 If len is non-zero, then it will be the true length,
6300 and so the scalar will be created correctly. */
6301 sv = newSVpv(PL_tokenbuf,len);
6302 }
5db06880 6303#ifdef PERL_MAD
cd81e915
NC
6304 if (PL_madskills && !PL_thistoken) {
6305 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6306 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6307 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6308 }
6309#endif
8990e307 6310
a0d0e21e
LW
6311 /* Presume this is going to be a bareword of some sort. */
6312
6313 CLINE;
6154021b
RGS
6314 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6315 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
6316 /* UTF-8 package name? */
6317 if (UTF && !IN_BYTES &&
95a20fc0 6318 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 6319 SvUTF8_on(sv);
a0d0e21e 6320
c3e0f903
GS
6321 /* And if "Foo::", then that's what it certainly is. */
6322
6323 if (len)
6324 goto safe_bareword;
6325
f7461760
Z
6326 cv = NULL;
6327 {
6328 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6329 const_op->op_private = OPpCONST_BARE;
6330 rv2cv_op = newCVREF(0, const_op);
6331 }
6332 if (rv2cv_op->op_type == OP_RV2CV &&
6333 (rv2cv_op->op_flags & OPf_KIDS)) {
6334 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6335 switch (rv_op->op_type) {
6336 case OP_CONST: {
6337 SV *sv = cSVOPx_sv(rv_op);
6338 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6339 cv = (CV*)SvRV(sv);
6340 } break;
6341 case OP_GV: {
6342 GV *gv = cGVOPx_gv(rv_op);
6343 CV *maybe_cv = GvCVu(gv);
6344 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6345 cv = maybe_cv;
6346 } break;
6347 }
6348 }
5069cc75 6349
8990e307
LW
6350 /* See if it's the indirect object for a list operator. */
6351
3280af22
NIS
6352 if (PL_oldoldbufptr &&
6353 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6354 (PL_oldoldbufptr == PL_last_lop
6355 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6356 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6357 (PL_expect == XREF ||
6358 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6359 {
748a9306
LW
6360 bool immediate_paren = *s == '(';
6361
a0d0e21e 6362 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6363 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6364#ifdef PERL_MAD
cd81e915 6365 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6366#endif
a0d0e21e
LW
6367
6368 /* Two barewords in a row may indicate method call. */
6369
62d55b22 6370 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6371 (tmp = intuit_method(s, gv, cv))) {
6372 op_free(rv2cv_op);
bbf60fe6 6373 return REPORT(tmp);
f7461760 6374 }
a0d0e21e
LW
6375
6376 /* If not a declared subroutine, it's an indirect object. */
6377 /* (But it's an indir obj regardless for sort.) */
7294df96 6378 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6379
7294df96
RGS
6380 if (
6381 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6382 (!cv &&
a9ef352a 6383 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6384 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6385 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6386 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6387 )
a9ef352a 6388 {
3280af22 6389 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6390 goto bareword;
93a17b20
LW
6391 }
6392 }
8990e307 6393
3280af22 6394 PL_expect = XOPERATOR;
5db06880
NC
6395#ifdef PERL_MAD
6396 if (isSPACE(*s))
cd81e915
NC
6397 s = SKIPSPACE2(s,nextPL_nextwhite);
6398 PL_nextwhite = nextPL_nextwhite;
5db06880 6399#else
8990e307 6400 s = skipspace(s);
5db06880 6401#endif
1c3923b3
GS
6402
6403 /* Is this a word before a => operator? */
ce29ac45 6404 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6405 op_free(rv2cv_op);
1c3923b3 6406 CLINE;
6154021b 6407 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6408 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6409 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6410 TERM(WORD);
6411 }
6412
6413 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6414 if (*s == '(') {
79072805 6415 CLINE;
5069cc75 6416 if (cv) {
c35e046a
AL
6417 d = s + 1;
6418 while (SPACE_OR_TAB(*d))
6419 d++;
f7461760 6420 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6421 s = d + 1;
c631f32b 6422 goto its_constant;
96e4d5b1 6423 }
6424 }
5db06880
NC
6425#ifdef PERL_MAD
6426 if (PL_madskills) {
cd81e915
NC
6427 PL_nextwhite = PL_thiswhite;
6428 PL_thiswhite = 0;
5db06880 6429 }
cd81e915 6430 start_force(PL_curforce);
5db06880 6431#endif
6154021b 6432 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6433 PL_expect = XOPERATOR;
5db06880
NC
6434#ifdef PERL_MAD
6435 if (PL_madskills) {
cd81e915
NC
6436 PL_nextwhite = nextPL_nextwhite;
6437 curmad('X', PL_thistoken);
6b29d1f5 6438 PL_thistoken = newSVpvs("");
5db06880
NC
6439 }
6440#endif
f7461760 6441 op_free(rv2cv_op);
93a17b20 6442 force_next(WORD);
6154021b 6443 pl_yylval.ival = 0;
463ee0b2 6444 TOKEN('&');
79072805 6445 }
93a17b20 6446
a0d0e21e 6447 /* If followed by var or block, call it a method (unless sub) */
8990e307 6448
f7461760
Z
6449 if ((*s == '$' || *s == '{') && !cv) {
6450 op_free(rv2cv_op);
3280af22
NIS
6451 PL_last_lop = PL_oldbufptr;
6452 PL_last_lop_op = OP_METHOD;
93a17b20 6453 PREBLOCK(METHOD);
463ee0b2
LW
6454 }
6455
8990e307
LW
6456 /* If followed by a bareword, see if it looks like indir obj. */
6457
30fe34ed
RGS
6458 if (!orig_keyword
6459 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6460 && (tmp = intuit_method(s, gv, cv))) {
6461 op_free(rv2cv_op);
bbf60fe6 6462 return REPORT(tmp);
f7461760 6463 }
93a17b20 6464
8990e307
LW
6465 /* Not a method, so call it a subroutine (if defined) */
6466
5069cc75 6467 if (cv) {
9b387841
NC
6468 if (lastchar == '-')
6469 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6470 "Ambiguous use of -%s resolved as -&%s()",
6471 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6472 /* Check for a constant sub */
f7461760 6473 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6474 its_constant:
f7461760 6475 op_free(rv2cv_op);
6154021b
RGS
6476 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6477 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6478 pl_yylval.opval->op_private = 0;
96e4d5b1 6479 TOKEN(WORD);
89bfa8cd 6480 }
6481
6154021b 6482 op_free(pl_yylval.opval);
f7461760 6483 pl_yylval.opval = rv2cv_op;
6154021b 6484 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6485 PL_last_lop = PL_oldbufptr;
bf848113 6486 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6487 /* Is there a prototype? */
5db06880
NC
6488 if (
6489#ifdef PERL_MAD
6490 cv &&
6491#endif
d9f2850e
RGS
6492 SvPOK(cv))
6493 {
5f66b61c 6494 STRLEN protolen;
daba3364 6495 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6496 if (!protolen)
4633a7c4 6497 TERM(FUNC0SUB);
0f5d0394
AE
6498 while (*proto == ';')
6499 proto++;
649d02de
FC
6500 if (
6501 (
6502 (
6503 *proto == '$' || *proto == '_'
6504 || *proto == '*'
6505 )
6506 && proto[1] == '\0'
6507 )
6508 || (
6509 *proto == '\\' && proto[1] && proto[2] == '\0'
6510 )
6511 )
6512 OPERATOR(UNIOPSUB);
6513 if (*proto == '\\' && proto[1] == '[') {
6514 const char *p = proto + 2;
6515 while(*p && *p != ']')
6516 ++p;
6517 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6518 }
7a52d87a 6519 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6520 if (PL_curstash)
6521 sv_setpvs(PL_subname, "__ANON__");
6522 else
6523 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6524 PREBLOCK(LSTOPSUB);
6525 }
a9ef352a 6526 }
5db06880
NC
6527#ifdef PERL_MAD
6528 {
6529 if (PL_madskills) {
cd81e915
NC
6530 PL_nextwhite = PL_thiswhite;
6531 PL_thiswhite = 0;
5db06880 6532 }
cd81e915 6533 start_force(PL_curforce);
6154021b 6534 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6535 PL_expect = XTERM;
6536 if (PL_madskills) {
cd81e915
NC
6537 PL_nextwhite = nextPL_nextwhite;
6538 curmad('X', PL_thistoken);
6b29d1f5 6539 PL_thistoken = newSVpvs("");
5db06880
NC
6540 }
6541 force_next(WORD);
6542 TOKEN(NOAMP);
6543 }
6544 }
6545
6546 /* Guess harder when madskills require "best effort". */
6547 if (PL_madskills && (!gv || !GvCVu(gv))) {
6548 int probable_sub = 0;
6549 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6550 probable_sub = 1;
6551 else if (isALPHA(*s)) {
6552 char tmpbuf[1024];
6553 STRLEN tmplen;
6554 d = s;
6555 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6556 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6557 probable_sub = 1;
6558 else {
6559 while (d < PL_bufend && isSPACE(*d))
6560 d++;
6561 if (*d == '=' && d[1] == '>')
6562 probable_sub = 1;
6563 }
6564 }
6565 if (probable_sub) {
7a6d04f4 6566 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6567 op_free(pl_yylval.opval);
f7461760 6568 pl_yylval.opval = rv2cv_op;
6154021b 6569 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6570 PL_last_lop = PL_oldbufptr;
6571 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6572 PL_nextwhite = PL_thiswhite;
6573 PL_thiswhite = 0;
6574 start_force(PL_curforce);
6154021b 6575 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6576 PL_expect = XTERM;
cd81e915
NC
6577 PL_nextwhite = nextPL_nextwhite;
6578 curmad('X', PL_thistoken);
6b29d1f5 6579 PL_thistoken = newSVpvs("");
5db06880
NC
6580 force_next(WORD);
6581 TOKEN(NOAMP);
6582 }
6583#else
6154021b 6584 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6585 PL_expect = XTERM;
8990e307
LW
6586 force_next(WORD);
6587 TOKEN(NOAMP);
5db06880 6588#endif
8990e307 6589 }
748a9306 6590
8990e307
LW
6591 /* Call it a bare word */
6592
5603f27d 6593 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6594 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6595 else {
9a073a1d
RGS
6596 bareword:
6597 /* after "print" and similar functions (corresponding to
6598 * "F? L" in opcode.pl), whatever wasn't already parsed as
6599 * a filehandle should be subject to "strict subs".
6600 * Likewise for the optional indirect-object argument to system
6601 * or exec, which can't be a bareword */
6602 if ((PL_last_lop_op == OP_PRINT
6603 || PL_last_lop_op == OP_PRTF
6604 || PL_last_lop_op == OP_SAY
6605 || PL_last_lop_op == OP_SYSTEM
6606 || PL_last_lop_op == OP_EXEC)
6607 && (PL_hints & HINT_STRICT_SUBS))
6608 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6609 if (lastchar != '-') {
6610 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6611 d = PL_tokenbuf;
6612 while (isLOWER(*d))
6613 d++;
da51bb9b 6614 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6615 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6616 PL_tokenbuf);
6617 }
748a9306
LW
6618 }
6619 }
f7461760 6620 op_free(rv2cv_op);
c3e0f903
GS
6621
6622 safe_bareword:
9b387841
NC
6623 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6624 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625 "Operator or semicolon missing before %c%s",
6626 lastchar, PL_tokenbuf);
6627 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6628 "Ambiguous use of %c resolved as operator %c",
6629 lastchar, lastchar);
748a9306 6630 }
93a17b20 6631 TOKEN(WORD);
79072805 6632 }
79072805 6633
68dc0745 6634 case KEY___FILE__:
6154021b 6635 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6636 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6637 TERM(THING);
6638
79072805 6639 case KEY___LINE__:
6154021b 6640 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6641 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6642 TERM(THING);
68dc0745 6643
6644 case KEY___PACKAGE__:
6154021b 6645 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6646 (PL_curstash
5aaec2b4 6647 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6648 : &PL_sv_undef));
79072805 6649 TERM(THING);
79072805 6650
e50aee73 6651 case KEY___DATA__:
79072805
LW
6652 case KEY___END__: {
6653 GV *gv;
3280af22 6654 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6655 const char *pname = "main";
3280af22 6656 if (PL_tokenbuf[2] == 'D')
bfcb3514 6657 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6658 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6659 SVt_PVIO);
a5f75d66 6660 GvMULTI_on(gv);
79072805 6661 if (!GvIO(gv))
a0d0e21e 6662 GvIOp(gv) = newIO();
3280af22 6663 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6664#if defined(HAS_FCNTL) && defined(F_SETFD)
6665 {
f54cb97a 6666 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6667 fcntl(fd,F_SETFD,fd >= 3);
6668 }
79072805 6669#endif
fd049845 6670 /* Mark this internal pseudo-handle as clean */
6671 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6672 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6673 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6674 else
50952442 6675 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6676#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6677 /* if the script was opened in binmode, we need to revert
53129d29 6678 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6679 * XXX this is a questionable hack at best. */
53129d29
GS
6680 if (PL_bufend-PL_bufptr > 2
6681 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6682 {
6683 Off_t loc = 0;
50952442 6684 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6685 loc = PerlIO_tell(PL_rsfp);
6686 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6687 }
2986a63f
JH
6688#ifdef NETWARE
6689 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6690#else
c39cd008 6691 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6692#endif /* NETWARE */
1143fce0
JH
6693#ifdef PERLIO_IS_STDIO /* really? */
6694# if defined(__BORLANDC__)
cb359b41
JH
6695 /* XXX see note in do_binmode() */
6696 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6697# endif
6698#endif
c39cd008
GS
6699 if (loc > 0)
6700 PerlIO_seek(PL_rsfp, loc, 0);
6701 }
6702 }
6703#endif
7948272d 6704#ifdef PERLIO_LAYERS
52d2e0f4
JH
6705 if (!IN_BYTES) {
6706 if (UTF)
6707 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6708 else if (PL_encoding) {
6709 SV *name;
6710 dSP;
6711 ENTER;
6712 SAVETMPS;
6713 PUSHMARK(sp);
6714 EXTEND(SP, 1);
6715 XPUSHs(PL_encoding);
6716 PUTBACK;
6717 call_method("name", G_SCALAR);
6718 SPAGAIN;
6719 name = POPs;
6720 PUTBACK;
bfed75c6 6721 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6722 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6723 SVfARG(name)));
52d2e0f4
JH
6724 FREETMPS;
6725 LEAVE;
6726 }
6727 }
7948272d 6728#endif
5db06880
NC
6729#ifdef PERL_MAD
6730 if (PL_madskills) {
cd81e915
NC
6731 if (PL_realtokenstart >= 0) {
6732 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6733 if (!PL_endwhite)
6b29d1f5 6734 PL_endwhite = newSVpvs("");
cd81e915
NC
6735 sv_catsv(PL_endwhite, PL_thiswhite);
6736 PL_thiswhite = 0;
6737 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6738 PL_realtokenstart = -1;
5db06880 6739 }
5cc814fd
NC
6740 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6741 != NULL) ;
5db06880
NC
6742 }
6743#endif
4608196e 6744 PL_rsfp = NULL;
79072805
LW
6745 }
6746 goto fake_eof;
e929a76b 6747 }
de3bb511 6748
8990e307 6749 case KEY_AUTOLOAD:
ed6116ce 6750 case KEY_DESTROY:
79072805 6751 case KEY_BEGIN:
3c10abe3 6752 case KEY_UNITCHECK:
7d30b5c4 6753 case KEY_CHECK:
7d07dbc2 6754 case KEY_INIT:
7d30b5c4 6755 case KEY_END:
3280af22
NIS
6756 if (PL_expect == XSTATE) {
6757 s = PL_bufptr;
93a17b20 6758 goto really_sub;
79072805
LW
6759 }
6760 goto just_a_word;
6761
a0d0e21e
LW
6762 case KEY_CORE:
6763 if (*s == ':' && s[1] == ':') {
6764 s += 2;
748a9306 6765 d = s;
3280af22 6766 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6767 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6768 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6769 if (tmp < 0)
6770 tmp = -tmp;
850e8516 6771 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6772 /* that's a way to remember we saw "CORE::" */
850e8516 6773 orig_keyword = tmp;
a0d0e21e
LW
6774 goto reserved_word;
6775 }
6776 goto just_a_word;
6777
463ee0b2
LW
6778 case KEY_abs:
6779 UNI(OP_ABS);
6780
79072805
LW
6781 case KEY_alarm:
6782 UNI(OP_ALARM);
6783
6784 case KEY_accept:
a0d0e21e 6785 LOP(OP_ACCEPT,XTERM);
79072805 6786
463ee0b2
LW
6787 case KEY_and:
6788 OPERATOR(ANDOP);
6789
79072805 6790 case KEY_atan2:
a0d0e21e 6791 LOP(OP_ATAN2,XTERM);
85e6fe83 6792
79072805 6793 case KEY_bind:
a0d0e21e 6794 LOP(OP_BIND,XTERM);
79072805
LW
6795
6796 case KEY_binmode:
1c1fc3ea 6797 LOP(OP_BINMODE,XTERM);
79072805
LW
6798
6799 case KEY_bless:
a0d0e21e 6800 LOP(OP_BLESS,XTERM);
79072805 6801
0d863452
RH
6802 case KEY_break:
6803 FUN0(OP_BREAK);
6804
79072805
LW
6805 case KEY_chop:
6806 UNI(OP_CHOP);
6807
6808 case KEY_continue:
0d863452
RH
6809 /* When 'use switch' is in effect, continue has a dual
6810 life as a control operator. */
6811 {
ef89dcc3 6812 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6813 PREBLOCK(CONTINUE);
6814 else {
6815 /* We have to disambiguate the two senses of
6816 "continue". If the next token is a '{' then
6817 treat it as the start of a continue block;
6818 otherwise treat it as a control operator.
6819 */
6820 s = skipspace(s);
6821 if (*s == '{')
79072805 6822 PREBLOCK(CONTINUE);
0d863452
RH
6823 else
6824 FUN0(OP_CONTINUE);
6825 }
6826 }
79072805
LW
6827
6828 case KEY_chdir:
fafc274c
NC
6829 /* may use HOME */
6830 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6831 UNI(OP_CHDIR);
6832
6833 case KEY_close:
6834 UNI(OP_CLOSE);
6835
6836 case KEY_closedir:
6837 UNI(OP_CLOSEDIR);
6838
6839 case KEY_cmp:
6840 Eop(OP_SCMP);
6841
6842 case KEY_caller:
6843 UNI(OP_CALLER);
6844
6845 case KEY_crypt:
6846#ifdef FCRYPT
f4c556ac
GS
6847 if (!PL_cryptseen) {
6848 PL_cryptseen = TRUE;
de3bb511 6849 init_des();
f4c556ac 6850 }
a687059c 6851#endif
a0d0e21e 6852 LOP(OP_CRYPT,XTERM);
79072805
LW
6853
6854 case KEY_chmod:
a0d0e21e 6855 LOP(OP_CHMOD,XTERM);
79072805
LW
6856
6857 case KEY_chown:
a0d0e21e 6858 LOP(OP_CHOWN,XTERM);
79072805
LW
6859
6860 case KEY_connect:
a0d0e21e 6861 LOP(OP_CONNECT,XTERM);
79072805 6862
463ee0b2
LW
6863 case KEY_chr:
6864 UNI(OP_CHR);
6865
79072805
LW
6866 case KEY_cos:
6867 UNI(OP_COS);
6868
6869 case KEY_chroot:
6870 UNI(OP_CHROOT);
6871
0d863452
RH
6872 case KEY_default:
6873 PREBLOCK(DEFAULT);
6874
79072805 6875 case KEY_do:
29595ff2 6876 s = SKIPSPACE1(s);
79072805 6877 if (*s == '{')
a0d0e21e 6878 PRETERMBLOCK(DO);
79072805 6879 if (*s != '\'')
89c5585f 6880 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6881 if (orig_keyword == KEY_do) {
6882 orig_keyword = 0;
6154021b 6883 pl_yylval.ival = 1;
850e8516
RGS
6884 }
6885 else
6154021b 6886 pl_yylval.ival = 0;
378cc40b 6887 OPERATOR(DO);
79072805
LW
6888
6889 case KEY_die:
3280af22 6890 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6891 LOP(OP_DIE,XTERM);
79072805
LW
6892
6893 case KEY_defined:
6894 UNI(OP_DEFINED);
6895
6896 case KEY_delete:
a0d0e21e 6897 UNI(OP_DELETE);
79072805
LW
6898
6899 case KEY_dbmopen:
5c1737d1 6900 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6901 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6902
6903 case KEY_dbmclose:
6904 UNI(OP_DBMCLOSE);
6905
6906 case KEY_dump:
a0d0e21e 6907 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6908 LOOPX(OP_DUMP);
6909
6910 case KEY_else:
6911 PREBLOCK(ELSE);
6912
6913 case KEY_elsif:
6154021b 6914 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6915 OPERATOR(ELSIF);
6916
6917 case KEY_eq:
6918 Eop(OP_SEQ);
6919
a0d0e21e
LW
6920 case KEY_exists:
6921 UNI(OP_EXISTS);
4e553d73 6922
79072805 6923 case KEY_exit:
5db06880
NC
6924 if (PL_madskills)
6925 UNI(OP_INT);
79072805
LW
6926 UNI(OP_EXIT);
6927
6928 case KEY_eval:
29595ff2 6929 s = SKIPSPACE1(s);
32e2a35d
RGS
6930 if (*s == '{') { /* block eval */
6931 PL_expect = XTERMBLOCK;
6932 UNIBRACK(OP_ENTERTRY);
6933 }
6934 else { /* string eval */
6935 PL_expect = XTERM;
6936 UNIBRACK(OP_ENTEREVAL);
6937 }
79072805
LW
6938
6939 case KEY_eof:
6940 UNI(OP_EOF);
6941
6942 case KEY_exp:
6943 UNI(OP_EXP);
6944
6945 case KEY_each:
6946 UNI(OP_EACH);
6947
6948 case KEY_exec:
a0d0e21e 6949 LOP(OP_EXEC,XREF);
79072805
LW
6950
6951 case KEY_endhostent:
6952 FUN0(OP_EHOSTENT);
6953
6954 case KEY_endnetent:
6955 FUN0(OP_ENETENT);
6956
6957 case KEY_endservent:
6958 FUN0(OP_ESERVENT);
6959
6960 case KEY_endprotoent:
6961 FUN0(OP_EPROTOENT);
6962
6963 case KEY_endpwent:
6964 FUN0(OP_EPWENT);
6965
6966 case KEY_endgrent:
6967 FUN0(OP_EGRENT);
6968
6969 case KEY_for:
6970 case KEY_foreach:
6154021b 6971 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6972 s = SKIPSPACE1(s);
7e2040f0 6973 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6974 char *p = s;
5db06880
NC
6975#ifdef PERL_MAD
6976 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6977#endif
6978
3280af22 6979 if ((PL_bufend - p) >= 3 &&
55497cff 6980 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6981 p += 2;
77ca0c92
LW
6982 else if ((PL_bufend - p) >= 4 &&
6983 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6984 p += 3;
29595ff2 6985 p = PEEKSPACE(p);
7e2040f0 6986 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6987 p = scan_ident(p, PL_bufend,
6988 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6989 p = PEEKSPACE(p);
77ca0c92
LW
6990 }
6991 if (*p != '$')
cea2e8a9 6992 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6993#ifdef PERL_MAD
6994 s = SvPVX(PL_linestr) + soff;
6995#endif
55497cff 6996 }
79072805
LW
6997 OPERATOR(FOR);
6998
6999 case KEY_formline:
a0d0e21e 7000 LOP(OP_FORMLINE,XTERM);
79072805
LW
7001
7002 case KEY_fork:
7003 FUN0(OP_FORK);
7004
7005 case KEY_fcntl:
a0d0e21e 7006 LOP(OP_FCNTL,XTERM);
79072805
LW
7007
7008 case KEY_fileno:
7009 UNI(OP_FILENO);
7010
7011 case KEY_flock:
a0d0e21e 7012 LOP(OP_FLOCK,XTERM);
79072805
LW
7013
7014 case KEY_gt:
7015 Rop(OP_SGT);
7016
7017 case KEY_ge:
7018 Rop(OP_SGE);
7019
7020 case KEY_grep:
2c38e13d 7021 LOP(OP_GREPSTART, XREF);
79072805
LW
7022
7023 case KEY_goto:
a0d0e21e 7024 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7025 LOOPX(OP_GOTO);
7026
7027 case KEY_gmtime:
7028 UNI(OP_GMTIME);
7029
7030 case KEY_getc:
6f33ba73 7031 UNIDOR(OP_GETC);
79072805
LW
7032
7033 case KEY_getppid:
7034 FUN0(OP_GETPPID);
7035
7036 case KEY_getpgrp:
7037 UNI(OP_GETPGRP);
7038
7039 case KEY_getpriority:
a0d0e21e 7040 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7041
7042 case KEY_getprotobyname:
7043 UNI(OP_GPBYNAME);
7044
7045 case KEY_getprotobynumber:
a0d0e21e 7046 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7047
7048 case KEY_getprotoent:
7049 FUN0(OP_GPROTOENT);
7050
7051 case KEY_getpwent:
7052 FUN0(OP_GPWENT);
7053
7054 case KEY_getpwnam:
ff68c719 7055 UNI(OP_GPWNAM);
79072805
LW
7056
7057 case KEY_getpwuid:
ff68c719 7058 UNI(OP_GPWUID);
79072805
LW
7059
7060 case KEY_getpeername:
7061 UNI(OP_GETPEERNAME);
7062
7063 case KEY_gethostbyname:
7064 UNI(OP_GHBYNAME);
7065
7066 case KEY_gethostbyaddr:
a0d0e21e 7067 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7068
7069 case KEY_gethostent:
7070 FUN0(OP_GHOSTENT);
7071
7072 case KEY_getnetbyname:
7073 UNI(OP_GNBYNAME);
7074
7075 case KEY_getnetbyaddr:
a0d0e21e 7076 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7077
7078 case KEY_getnetent:
7079 FUN0(OP_GNETENT);
7080
7081 case KEY_getservbyname:
a0d0e21e 7082 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7083
7084 case KEY_getservbyport:
a0d0e21e 7085 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7086
7087 case KEY_getservent:
7088 FUN0(OP_GSERVENT);
7089
7090 case KEY_getsockname:
7091 UNI(OP_GETSOCKNAME);
7092
7093 case KEY_getsockopt:
a0d0e21e 7094 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7095
7096 case KEY_getgrent:
7097 FUN0(OP_GGRENT);
7098
7099 case KEY_getgrnam:
ff68c719 7100 UNI(OP_GGRNAM);
79072805
LW
7101
7102 case KEY_getgrgid:
ff68c719 7103 UNI(OP_GGRGID);
79072805
LW
7104
7105 case KEY_getlogin:
7106 FUN0(OP_GETLOGIN);
7107
0d863452 7108 case KEY_given:
6154021b 7109 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7110 OPERATOR(GIVEN);
7111
93a17b20 7112 case KEY_glob:
a0d0e21e 7113 LOP(OP_GLOB,XTERM);
93a17b20 7114
79072805
LW
7115 case KEY_hex:
7116 UNI(OP_HEX);
7117
7118 case KEY_if:
6154021b 7119 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7120 OPERATOR(IF);
7121
7122 case KEY_index:
a0d0e21e 7123 LOP(OP_INDEX,XTERM);
79072805
LW
7124
7125 case KEY_int:
7126 UNI(OP_INT);
7127
7128 case KEY_ioctl:
a0d0e21e 7129 LOP(OP_IOCTL,XTERM);
79072805
LW
7130
7131 case KEY_join:
a0d0e21e 7132 LOP(OP_JOIN,XTERM);
79072805
LW
7133
7134 case KEY_keys:
7135 UNI(OP_KEYS);
7136
7137 case KEY_kill:
a0d0e21e 7138 LOP(OP_KILL,XTERM);
79072805
LW
7139
7140 case KEY_last:
a0d0e21e 7141 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7142 LOOPX(OP_LAST);
4e553d73 7143
79072805
LW
7144 case KEY_lc:
7145 UNI(OP_LC);
7146
7147 case KEY_lcfirst:
7148 UNI(OP_LCFIRST);
7149
7150 case KEY_local:
6154021b 7151 pl_yylval.ival = 0;
79072805
LW
7152 OPERATOR(LOCAL);
7153
7154 case KEY_length:
7155 UNI(OP_LENGTH);
7156
7157 case KEY_lt:
7158 Rop(OP_SLT);
7159
7160 case KEY_le:
7161 Rop(OP_SLE);
7162
7163 case KEY_localtime:
7164 UNI(OP_LOCALTIME);
7165
7166 case KEY_log:
7167 UNI(OP_LOG);
7168
7169 case KEY_link:
a0d0e21e 7170 LOP(OP_LINK,XTERM);
79072805
LW
7171
7172 case KEY_listen:
a0d0e21e 7173 LOP(OP_LISTEN,XTERM);
79072805 7174
c0329465
MB
7175 case KEY_lock:
7176 UNI(OP_LOCK);
7177
79072805
LW
7178 case KEY_lstat:
7179 UNI(OP_LSTAT);
7180
7181 case KEY_m:
8782bef2 7182 s = scan_pat(s,OP_MATCH);
79072805
LW
7183 TERM(sublex_start());
7184
a0d0e21e 7185 case KEY_map:
2c38e13d 7186 LOP(OP_MAPSTART, XREF);
4e4e412b 7187
79072805 7188 case KEY_mkdir:
a0d0e21e 7189 LOP(OP_MKDIR,XTERM);
79072805
LW
7190
7191 case KEY_msgctl:
a0d0e21e 7192 LOP(OP_MSGCTL,XTERM);
79072805
LW
7193
7194 case KEY_msgget:
a0d0e21e 7195 LOP(OP_MSGGET,XTERM);
79072805
LW
7196
7197 case KEY_msgrcv:
a0d0e21e 7198 LOP(OP_MSGRCV,XTERM);
79072805
LW
7199
7200 case KEY_msgsnd:
a0d0e21e 7201 LOP(OP_MSGSND,XTERM);
79072805 7202
77ca0c92 7203 case KEY_our:
93a17b20 7204 case KEY_my:
952306ac 7205 case KEY_state:
eac04b2e 7206 PL_in_my = (U16)tmp;
29595ff2 7207 s = SKIPSPACE1(s);
7e2040f0 7208 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7209#ifdef PERL_MAD
7210 char* start = s;
7211#endif
3280af22 7212 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7213 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7214 goto really_sub;
def3634b 7215 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7216 if (!PL_in_my_stash) {
c750a3ec 7217 char tmpbuf[1024];
3280af22 7218 PL_bufptr = s;
d9fad198 7219 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7220 yyerror(tmpbuf);
7221 }
5db06880
NC
7222#ifdef PERL_MAD
7223 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7224 sv_catsv(PL_thistoken, PL_nextwhite);
7225 PL_nextwhite = 0;
7226 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7227 }
7228#endif
c750a3ec 7229 }
6154021b 7230 pl_yylval.ival = 1;
55497cff 7231 OPERATOR(MY);
93a17b20 7232
79072805 7233 case KEY_next:
a0d0e21e 7234 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7235 LOOPX(OP_NEXT);
7236
7237 case KEY_ne:
7238 Eop(OP_SNE);
7239
a0d0e21e 7240 case KEY_no:
468aa647 7241 s = tokenize_use(0, s);
a0d0e21e
LW
7242 OPERATOR(USE);
7243
7244 case KEY_not:
29595ff2 7245 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7246 FUN1(OP_NOT);
7247 else
7248 OPERATOR(NOTOP);
a0d0e21e 7249
79072805 7250 case KEY_open:
29595ff2 7251 s = SKIPSPACE1(s);
7e2040f0 7252 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7253 const char *t;
c35e046a
AL
7254 for (d = s; isALNUM_lazy_if(d,UTF);)
7255 d++;
7256 for (t=d; isSPACE(*t);)
7257 t++;
e2ab214b 7258 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7259 /* [perl #16184] */
7260 && !(t[0] == '=' && t[1] == '>')
7261 ) {
5f66b61c 7262 int parms_len = (int)(d-s);
9014280d 7263 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7264 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7265 parms_len, s, parms_len, s);
66fbe8fb 7266 }
93a17b20 7267 }
a0d0e21e 7268 LOP(OP_OPEN,XTERM);
79072805 7269
463ee0b2 7270 case KEY_or:
6154021b 7271 pl_yylval.ival = OP_OR;
463ee0b2
LW
7272 OPERATOR(OROP);
7273
79072805
LW
7274 case KEY_ord:
7275 UNI(OP_ORD);
7276
7277 case KEY_oct:
7278 UNI(OP_OCT);
7279
7280 case KEY_opendir:
a0d0e21e 7281 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7282
7283 case KEY_print:
3280af22 7284 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7285 LOP(OP_PRINT,XREF);
79072805
LW
7286
7287 case KEY_printf:
3280af22 7288 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7289 LOP(OP_PRTF,XREF);
79072805 7290
c07a80fd 7291 case KEY_prototype:
7292 UNI(OP_PROTOTYPE);
7293
79072805 7294 case KEY_push:
a0d0e21e 7295 LOP(OP_PUSH,XTERM);
79072805
LW
7296
7297 case KEY_pop:
6f33ba73 7298 UNIDOR(OP_POP);
79072805 7299
a0d0e21e 7300 case KEY_pos:
6f33ba73 7301 UNIDOR(OP_POS);
4e553d73 7302
79072805 7303 case KEY_pack:
a0d0e21e 7304 LOP(OP_PACK,XTERM);
79072805
LW
7305
7306 case KEY_package:
a0d0e21e 7307 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7308 s = SKIPSPACE1(s);
91152fc1 7309 s = force_strict_version(s);
4e4da3ac 7310 PL_lex_expect = XBLOCK;
79072805
LW
7311 OPERATOR(PACKAGE);
7312
7313 case KEY_pipe:
a0d0e21e 7314 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7315
7316 case KEY_q:
5db06880 7317 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7318 if (!s)
d4c19fe8 7319 missingterm(NULL);
6154021b 7320 pl_yylval.ival = OP_CONST;
79072805
LW
7321 TERM(sublex_start());
7322
a0d0e21e
LW
7323 case KEY_quotemeta:
7324 UNI(OP_QUOTEMETA);
7325
8990e307 7326 case KEY_qw:
5db06880 7327 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7328 if (!s)
d4c19fe8 7329 missingterm(NULL);
3480a8d2 7330 PL_expect = XOPERATOR;
8127e0e3
GS
7331 force_next(')');
7332 if (SvCUR(PL_lex_stuff)) {
5f66b61c 7333 OP *words = NULL;
8127e0e3 7334 int warned = 0;
3280af22 7335 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7336 while (len) {
d4c19fe8
AL
7337 for (; isSPACE(*d) && len; --len, ++d)
7338 /**/;
8127e0e3 7339 if (len) {
d4c19fe8 7340 SV *sv;
f54cb97a 7341 const char *b = d;
e476b1b5 7342 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7343 for (; !isSPACE(*d) && len; --len, ++d) {
7344 if (*d == ',') {
9014280d 7345 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7346 "Possible attempt to separate words with commas");
7347 ++warned;
7348 }
7349 else if (*d == '#') {
9014280d 7350 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7351 "Possible attempt to put comments in qw() list");
7352 ++warned;
7353 }
7354 }
7355 }
7356 else {
d4c19fe8
AL
7357 for (; !isSPACE(*d) && len; --len, ++d)
7358 /**/;
8127e0e3 7359 }
740cce10 7360 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7361 words = append_elem(OP_LIST, words,
7948272d 7362 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7363 }
7364 }
8127e0e3 7365 if (words) {
cd81e915 7366 start_force(PL_curforce);
9ded7720 7367 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7368 force_next(THING);
7369 }
55497cff 7370 }
37fd879b 7371 if (PL_lex_stuff) {
8127e0e3 7372 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7373 PL_lex_stuff = NULL;
37fd879b 7374 }
3280af22 7375 PL_expect = XTERM;
8127e0e3 7376 TOKEN('(');
8990e307 7377
79072805 7378 case KEY_qq:
5db06880 7379 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7380 if (!s)
d4c19fe8 7381 missingterm(NULL);
6154021b 7382 pl_yylval.ival = OP_STRINGIFY;
3280af22 7383 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7384 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7385 TERM(sublex_start());
7386
8782bef2
GB
7387 case KEY_qr:
7388 s = scan_pat(s,OP_QR);
7389 TERM(sublex_start());
7390
79072805 7391 case KEY_qx:
5db06880 7392 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7393 if (!s)
d4c19fe8 7394 missingterm(NULL);
9b201d7d 7395 readpipe_override();
79072805
LW
7396 TERM(sublex_start());
7397
7398 case KEY_return:
7399 OLDLOP(OP_RETURN);
7400
7401 case KEY_require:
29595ff2 7402 s = SKIPSPACE1(s);
e759cc13
RGS
7403 if (isDIGIT(*s)) {
7404 s = force_version(s, FALSE);
a7cb1f99 7405 }
e759cc13
RGS
7406 else if (*s != 'v' || !isDIGIT(s[1])
7407 || (s = force_version(s, TRUE), *s == 'v'))
7408 {
a7cb1f99
GS
7409 *PL_tokenbuf = '\0';
7410 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7411 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7412 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7413 else if (*s == '<')
7414 yyerror("<> should be quotes");
7415 }
a72a1c8b
RGS
7416 if (orig_keyword == KEY_require) {
7417 orig_keyword = 0;
6154021b 7418 pl_yylval.ival = 1;
a72a1c8b
RGS
7419 }
7420 else
6154021b 7421 pl_yylval.ival = 0;
a72a1c8b
RGS
7422 PL_expect = XTERM;
7423 PL_bufptr = s;
7424 PL_last_uni = PL_oldbufptr;
7425 PL_last_lop_op = OP_REQUIRE;
7426 s = skipspace(s);
7427 return REPORT( (int)REQUIRE );
79072805
LW
7428
7429 case KEY_reset:
7430 UNI(OP_RESET);
7431
7432 case KEY_redo:
a0d0e21e 7433 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7434 LOOPX(OP_REDO);
7435
7436 case KEY_rename:
a0d0e21e 7437 LOP(OP_RENAME,XTERM);
79072805
LW
7438
7439 case KEY_rand:
7440 UNI(OP_RAND);
7441
7442 case KEY_rmdir:
7443 UNI(OP_RMDIR);
7444
7445 case KEY_rindex:
a0d0e21e 7446 LOP(OP_RINDEX,XTERM);
79072805
LW
7447
7448 case KEY_read:
a0d0e21e 7449 LOP(OP_READ,XTERM);
79072805
LW
7450
7451 case KEY_readdir:
7452 UNI(OP_READDIR);
7453
93a17b20 7454 case KEY_readline:
6f33ba73 7455 UNIDOR(OP_READLINE);
93a17b20
LW
7456
7457 case KEY_readpipe:
0858480c 7458 UNIDOR(OP_BACKTICK);
93a17b20 7459
79072805
LW
7460 case KEY_rewinddir:
7461 UNI(OP_REWINDDIR);
7462
7463 case KEY_recv:
a0d0e21e 7464 LOP(OP_RECV,XTERM);
79072805
LW
7465
7466 case KEY_reverse:
a0d0e21e 7467 LOP(OP_REVERSE,XTERM);
79072805
LW
7468
7469 case KEY_readlink:
6f33ba73 7470 UNIDOR(OP_READLINK);
79072805
LW
7471
7472 case KEY_ref:
7473 UNI(OP_REF);
7474
7475 case KEY_s:
7476 s = scan_subst(s);
6154021b 7477 if (pl_yylval.opval)
79072805
LW
7478 TERM(sublex_start());
7479 else
7480 TOKEN(1); /* force error */
7481
0d863452
RH
7482 case KEY_say:
7483 checkcomma(s,PL_tokenbuf,"filehandle");
7484 LOP(OP_SAY,XREF);
7485
a0d0e21e
LW
7486 case KEY_chomp:
7487 UNI(OP_CHOMP);
4e553d73 7488
79072805
LW
7489 case KEY_scalar:
7490 UNI(OP_SCALAR);
7491
7492 case KEY_select:
a0d0e21e 7493 LOP(OP_SELECT,XTERM);
79072805
LW
7494
7495 case KEY_seek:
a0d0e21e 7496 LOP(OP_SEEK,XTERM);
79072805
LW
7497
7498 case KEY_semctl:
a0d0e21e 7499 LOP(OP_SEMCTL,XTERM);
79072805
LW
7500
7501 case KEY_semget:
a0d0e21e 7502 LOP(OP_SEMGET,XTERM);
79072805
LW
7503
7504 case KEY_semop:
a0d0e21e 7505 LOP(OP_SEMOP,XTERM);
79072805
LW
7506
7507 case KEY_send:
a0d0e21e 7508 LOP(OP_SEND,XTERM);
79072805
LW
7509
7510 case KEY_setpgrp:
a0d0e21e 7511 LOP(OP_SETPGRP,XTERM);
79072805
LW
7512
7513 case KEY_setpriority:
a0d0e21e 7514 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7515
7516 case KEY_sethostent:
ff68c719 7517 UNI(OP_SHOSTENT);
79072805
LW
7518
7519 case KEY_setnetent:
ff68c719 7520 UNI(OP_SNETENT);
79072805
LW
7521
7522 case KEY_setservent:
ff68c719 7523 UNI(OP_SSERVENT);
79072805
LW
7524
7525 case KEY_setprotoent:
ff68c719 7526 UNI(OP_SPROTOENT);
79072805
LW
7527
7528 case KEY_setpwent:
7529 FUN0(OP_SPWENT);
7530
7531 case KEY_setgrent:
7532 FUN0(OP_SGRENT);
7533
7534 case KEY_seekdir:
a0d0e21e 7535 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7536
7537 case KEY_setsockopt:
a0d0e21e 7538 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7539
7540 case KEY_shift:
6f33ba73 7541 UNIDOR(OP_SHIFT);
79072805
LW
7542
7543 case KEY_shmctl:
a0d0e21e 7544 LOP(OP_SHMCTL,XTERM);
79072805
LW
7545
7546 case KEY_shmget:
a0d0e21e 7547 LOP(OP_SHMGET,XTERM);
79072805
LW
7548
7549 case KEY_shmread:
a0d0e21e 7550 LOP(OP_SHMREAD,XTERM);
79072805
LW
7551
7552 case KEY_shmwrite:
a0d0e21e 7553 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7554
7555 case KEY_shutdown:
a0d0e21e 7556 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7557
7558 case KEY_sin:
7559 UNI(OP_SIN);
7560
7561 case KEY_sleep:
7562 UNI(OP_SLEEP);
7563
7564 case KEY_socket:
a0d0e21e 7565 LOP(OP_SOCKET,XTERM);
79072805
LW
7566
7567 case KEY_socketpair:
a0d0e21e 7568 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7569
7570 case KEY_sort:
3280af22 7571 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7572 s = SKIPSPACE1(s);
79072805 7573 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7574 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7575 PL_expect = XTERM;
15f0808c 7576 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7577 LOP(OP_SORT,XREF);
79072805
LW
7578
7579 case KEY_split:
a0d0e21e 7580 LOP(OP_SPLIT,XTERM);
79072805
LW
7581
7582 case KEY_sprintf:
a0d0e21e 7583 LOP(OP_SPRINTF,XTERM);
79072805
LW
7584
7585 case KEY_splice:
a0d0e21e 7586 LOP(OP_SPLICE,XTERM);
79072805
LW
7587
7588 case KEY_sqrt:
7589 UNI(OP_SQRT);
7590
7591 case KEY_srand:
7592 UNI(OP_SRAND);
7593
7594 case KEY_stat:
7595 UNI(OP_STAT);
7596
7597 case KEY_study:
79072805
LW
7598 UNI(OP_STUDY);
7599
7600 case KEY_substr:
a0d0e21e 7601 LOP(OP_SUBSTR,XTERM);
79072805
LW
7602
7603 case KEY_format:
7604 case KEY_sub:
93a17b20 7605 really_sub:
09bef843 7606 {
3280af22 7607 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7608 SSize_t tboffset = 0;
09bef843 7609 expectation attrful;
28cc6278 7610 bool have_name, have_proto;
f54cb97a 7611 const int key = tmp;
09bef843 7612
5db06880
NC
7613#ifdef PERL_MAD
7614 SV *tmpwhite = 0;
7615
cd81e915 7616 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7617 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7618 PL_thistoken = 0;
5db06880
NC
7619
7620 d = s;
7621 s = SKIPSPACE2(s,tmpwhite);
7622#else
09bef843 7623 s = skipspace(s);
5db06880 7624#endif
09bef843 7625
7e2040f0 7626 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7627 (*s == ':' && s[1] == ':'))
7628 {
5db06880 7629#ifdef PERL_MAD
4f61fd4b 7630 SV *nametoke = NULL;
5db06880
NC
7631#endif
7632
09bef843
SB
7633 PL_expect = XBLOCK;
7634 attrful = XATTRBLOCK;
b1b65b59
JH
7635 /* remember buffer pos'n for later force_word */
7636 tboffset = s - PL_oldbufptr;
09bef843 7637 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7638#ifdef PERL_MAD
7639 if (PL_madskills)
7640 nametoke = newSVpvn(s, d - s);
7641#endif
6502358f
NC
7642 if (memchr(tmpbuf, ':', len))
7643 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7644 else {
7645 sv_setsv(PL_subname,PL_curstname);
396482e1 7646 sv_catpvs(PL_subname,"::");
09bef843
SB
7647 sv_catpvn(PL_subname,tmpbuf,len);
7648 }
09bef843 7649 have_name = TRUE;
5db06880
NC
7650
7651#ifdef PERL_MAD
7652
7653 start_force(0);
7654 CURMAD('X', nametoke);
7655 CURMAD('_', tmpwhite);
7656 (void) force_word(PL_oldbufptr + tboffset, WORD,
7657 FALSE, TRUE, TRUE);
7658
7659 s = SKIPSPACE2(d,tmpwhite);
7660#else
7661 s = skipspace(d);
7662#endif
09bef843 7663 }
463ee0b2 7664 else {
09bef843
SB
7665 if (key == KEY_my)
7666 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7667 PL_expect = XTERMBLOCK;
7668 attrful = XATTRTERM;
76f68e9b 7669 sv_setpvs(PL_subname,"?");
09bef843 7670 have_name = FALSE;
463ee0b2 7671 }
4633a7c4 7672
09bef843
SB
7673 if (key == KEY_format) {
7674 if (*s == '=')
7675 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7676#ifdef PERL_MAD
cd81e915 7677 PL_thistoken = subtoken;
5db06880
NC
7678 s = d;
7679#else
09bef843 7680 if (have_name)
b1b65b59
JH
7681 (void) force_word(PL_oldbufptr + tboffset, WORD,
7682 FALSE, TRUE, TRUE);
5db06880 7683#endif
09bef843
SB
7684 OPERATOR(FORMAT);
7685 }
79072805 7686
09bef843
SB
7687 /* Look for a prototype */
7688 if (*s == '(') {
d9f2850e
RGS
7689 char *p;
7690 bool bad_proto = FALSE;
9e8d7757
RB
7691 bool in_brackets = FALSE;
7692 char greedy_proto = ' ';
7693 bool proto_after_greedy_proto = FALSE;
7694 bool must_be_last = FALSE;
7695 bool underscore = FALSE;
aef2a98a 7696 bool seen_underscore = FALSE;
197afce1 7697 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7698
5db06880 7699 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7700 if (!s)
09bef843 7701 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7702 /* strip spaces and check for bad characters */
09bef843
SB
7703 d = SvPVX(PL_lex_stuff);
7704 tmp = 0;
d9f2850e
RGS
7705 for (p = d; *p; ++p) {
7706 if (!isSPACE(*p)) {
7707 d[tmp++] = *p;
9e8d7757 7708
197afce1 7709 if (warnillegalproto) {
9e8d7757
RB
7710 if (must_be_last)
7711 proto_after_greedy_proto = TRUE;
7712 if (!strchr("$@%*;[]&\\_", *p)) {
7713 bad_proto = TRUE;
7714 }
7715 else {
7716 if ( underscore ) {
7717 if ( *p != ';' )
7718 bad_proto = TRUE;
7719 underscore = FALSE;
7720 }
7721 if ( *p == '[' ) {
7722 in_brackets = TRUE;
7723 }
7724 else if ( *p == ']' ) {
7725 in_brackets = FALSE;
7726 }
7727 else if ( (*p == '@' || *p == '%') &&
7728 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7729 !in_brackets ) {
7730 must_be_last = TRUE;
7731 greedy_proto = *p;
7732 }
7733 else if ( *p == '_' ) {
aef2a98a 7734 underscore = seen_underscore = TRUE;
9e8d7757
RB
7735 }
7736 }
7737 }
d37a9538 7738 }
09bef843 7739 }
d9f2850e 7740 d[tmp] = '\0';
9e8d7757 7741 if (proto_after_greedy_proto)
197afce1 7742 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7743 "Prototype after '%c' for %"SVf" : %s",
7744 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7745 if (bad_proto)
197afce1 7746 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7747 "Illegal character %sin prototype for %"SVf" : %s",
7748 seen_underscore ? "after '_' " : "",
be2597df 7749 SVfARG(PL_subname), d);
b162af07 7750 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7751 have_proto = TRUE;
68dc0745 7752
5db06880
NC
7753#ifdef PERL_MAD
7754 start_force(0);
cd81e915 7755 CURMAD('q', PL_thisopen);
5db06880 7756 CURMAD('_', tmpwhite);
cd81e915
NC
7757 CURMAD('=', PL_thisstuff);
7758 CURMAD('Q', PL_thisclose);
5db06880
NC
7759 NEXTVAL_NEXTTOKE.opval =
7760 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7761 PL_lex_stuff = NULL;
5db06880
NC
7762 force_next(THING);
7763
7764 s = SKIPSPACE2(s,tmpwhite);
7765#else
09bef843 7766 s = skipspace(s);
5db06880 7767#endif
4633a7c4 7768 }
09bef843
SB
7769 else
7770 have_proto = FALSE;
7771
7772 if (*s == ':' && s[1] != ':')
7773 PL_expect = attrful;
8e742a20
MHM
7774 else if (*s != '{' && key == KEY_sub) {
7775 if (!have_name)
7776 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7777 else if (*s != ';' && *s != '}')
be2597df 7778 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7779 }
09bef843 7780
5db06880
NC
7781#ifdef PERL_MAD
7782 start_force(0);
7783 if (tmpwhite) {
7784 if (PL_madskills)
6b29d1f5 7785 curmad('^', newSVpvs(""));
5db06880
NC
7786 CURMAD('_', tmpwhite);
7787 }
7788 force_next(0);
7789
cd81e915 7790 PL_thistoken = subtoken;
5db06880 7791#else
09bef843 7792 if (have_proto) {
9ded7720 7793 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7794 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7795 PL_lex_stuff = NULL;
09bef843 7796 force_next(THING);
68dc0745 7797 }
5db06880 7798#endif
09bef843 7799 if (!have_name) {
49a54bbe
NC
7800 if (PL_curstash)
7801 sv_setpvs(PL_subname, "__ANON__");
7802 else
7803 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7804 TOKEN(ANONSUB);
4633a7c4 7805 }
5db06880 7806#ifndef PERL_MAD
b1b65b59
JH
7807 (void) force_word(PL_oldbufptr + tboffset, WORD,
7808 FALSE, TRUE, TRUE);
5db06880 7809#endif
09bef843
SB
7810 if (key == KEY_my)
7811 TOKEN(MYSUB);
7812 TOKEN(SUB);
4633a7c4 7813 }
79072805
LW
7814
7815 case KEY_system:
a0d0e21e 7816 LOP(OP_SYSTEM,XREF);
79072805
LW
7817
7818 case KEY_symlink:
a0d0e21e 7819 LOP(OP_SYMLINK,XTERM);
79072805
LW
7820
7821 case KEY_syscall:
a0d0e21e 7822 LOP(OP_SYSCALL,XTERM);
79072805 7823
c07a80fd 7824 case KEY_sysopen:
7825 LOP(OP_SYSOPEN,XTERM);
7826
137443ea 7827 case KEY_sysseek:
7828 LOP(OP_SYSSEEK,XTERM);
7829
79072805 7830 case KEY_sysread:
a0d0e21e 7831 LOP(OP_SYSREAD,XTERM);
79072805
LW
7832
7833 case KEY_syswrite:
a0d0e21e 7834 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7835
7836 case KEY_tr:
7837 s = scan_trans(s);
7838 TERM(sublex_start());
7839
7840 case KEY_tell:
7841 UNI(OP_TELL);
7842
7843 case KEY_telldir:
7844 UNI(OP_TELLDIR);
7845
463ee0b2 7846 case KEY_tie:
a0d0e21e 7847 LOP(OP_TIE,XTERM);
463ee0b2 7848
c07a80fd 7849 case KEY_tied:
7850 UNI(OP_TIED);
7851
79072805
LW
7852 case KEY_time:
7853 FUN0(OP_TIME);
7854
7855 case KEY_times:
7856 FUN0(OP_TMS);
7857
7858 case KEY_truncate:
a0d0e21e 7859 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7860
7861 case KEY_uc:
7862 UNI(OP_UC);
7863
7864 case KEY_ucfirst:
7865 UNI(OP_UCFIRST);
7866
463ee0b2
LW
7867 case KEY_untie:
7868 UNI(OP_UNTIE);
7869
79072805 7870 case KEY_until:
6154021b 7871 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7872 OPERATOR(UNTIL);
7873
7874 case KEY_unless:
6154021b 7875 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7876 OPERATOR(UNLESS);
7877
7878 case KEY_unlink:
a0d0e21e 7879 LOP(OP_UNLINK,XTERM);
79072805
LW
7880
7881 case KEY_undef:
6f33ba73 7882 UNIDOR(OP_UNDEF);
79072805
LW
7883
7884 case KEY_unpack:
a0d0e21e 7885 LOP(OP_UNPACK,XTERM);
79072805
LW
7886
7887 case KEY_utime:
a0d0e21e 7888 LOP(OP_UTIME,XTERM);
79072805
LW
7889
7890 case KEY_umask:
6f33ba73 7891 UNIDOR(OP_UMASK);
79072805
LW
7892
7893 case KEY_unshift:
a0d0e21e
LW
7894 LOP(OP_UNSHIFT,XTERM);
7895
7896 case KEY_use:
468aa647 7897 s = tokenize_use(1, s);
a0d0e21e 7898 OPERATOR(USE);
79072805
LW
7899
7900 case KEY_values:
7901 UNI(OP_VALUES);
7902
7903 case KEY_vec:
a0d0e21e 7904 LOP(OP_VEC,XTERM);
79072805 7905
0d863452 7906 case KEY_when:
6154021b 7907 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7908 OPERATOR(WHEN);
7909
79072805 7910 case KEY_while:
6154021b 7911 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7912 OPERATOR(WHILE);
7913
7914 case KEY_warn:
3280af22 7915 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7916 LOP(OP_WARN,XTERM);
79072805
LW
7917
7918 case KEY_wait:
7919 FUN0(OP_WAIT);
7920
7921 case KEY_waitpid:
a0d0e21e 7922 LOP(OP_WAITPID,XTERM);
79072805
LW
7923
7924 case KEY_wantarray:
7925 FUN0(OP_WANTARRAY);
7926
7927 case KEY_write:
9d116dd7
JH
7928#ifdef EBCDIC
7929 {
df3728a2
JH
7930 char ctl_l[2];
7931 ctl_l[0] = toCTRL('L');
7932 ctl_l[1] = '\0';
fafc274c 7933 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7934 }
7935#else
fafc274c
NC
7936 /* Make sure $^L is defined */
7937 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7938#endif
79072805
LW
7939 UNI(OP_ENTERWRITE);
7940
7941 case KEY_x:
3280af22 7942 if (PL_expect == XOPERATOR)
79072805
LW
7943 Mop(OP_REPEAT);
7944 check_uni();
7945 goto just_a_word;
7946
a0d0e21e 7947 case KEY_xor:
6154021b 7948 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7949 OPERATOR(OROP);
7950
79072805
LW
7951 case KEY_y:
7952 s = scan_trans(s);
7953 TERM(sublex_start());
7954 }
49dc05e3 7955 }}
79072805 7956}
bf4acbe4
GS
7957#ifdef __SC__
7958#pragma segment Main
7959#endif
79072805 7960
e930465f
JH
7961static int
7962S_pending_ident(pTHX)
8eceec63 7963{
97aff369 7964 dVAR;
8eceec63 7965 register char *d;
bbd11bfc 7966 PADOFFSET tmp = 0;
8eceec63
SC
7967 /* pit holds the identifier we read and pending_ident is reset */
7968 char pit = PL_pending_ident;
9bde8eb0
NC
7969 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7970 /* All routes through this function want to know if there is a colon. */
c099d646 7971 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7972 PL_pending_ident = 0;
7973
cd81e915 7974 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7975 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7976 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7977
7978 /* if we're in a my(), we can't allow dynamics here.
7979 $foo'bar has already been turned into $foo::bar, so
7980 just check for colons.
7981
7982 if it's a legal name, the OP is a PADANY.
7983 */
7984 if (PL_in_my) {
7985 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7986 if (has_colon)
8eceec63
SC
7987 yyerror(Perl_form(aTHX_ "No package name allowed for "
7988 "variable %s in \"our\"",
7989 PL_tokenbuf));
d6447115 7990 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7991 }
7992 else {
9bde8eb0 7993 if (has_colon)
952306ac
RGS
7994 yyerror(Perl_form(aTHX_ PL_no_myglob,
7995 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7996
6154021b 7997 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7998 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7999 return PRIVATEREF;
8000 }
8001 }
8002
8003 /*
8004 build the ops for accesses to a my() variable.
8005
8006 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8007 then used in a comparison. This catches most, but not
8008 all cases. For instance, it catches
8009 sort { my($a); $a <=> $b }
8010 but not
8011 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8012 (although why you'd do that is anyone's guess).
8013 */
8014
9bde8eb0 8015 if (!has_colon) {
8716503d 8016 if (!PL_in_my)
f8f98e0a 8017 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8018 if (tmp != NOT_IN_PAD) {
8eceec63 8019 /* might be an "our" variable" */
00b1698f 8020 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8021 /* build ops for a bareword */
b64e5050
AL
8022 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8023 HEK * const stashname = HvNAME_HEK(stash);
8024 SV * const sym = newSVhek(stashname);
396482e1 8025 sv_catpvs(sym, "::");
9bde8eb0 8026 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8027 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8028 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8029 gv_fetchsv(sym,
8eceec63
SC
8030 (PL_in_eval
8031 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8032 : GV_ADDMULTI
8eceec63
SC
8033 ),
8034 ((PL_tokenbuf[0] == '$') ? SVt_PV
8035 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8036 : SVt_PVHV));
8037 return WORD;
8038 }
8039
8040 /* if it's a sort block and they're naming $a or $b */
8041 if (PL_last_lop_op == OP_SORT &&
8042 PL_tokenbuf[0] == '$' &&
8043 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8044 && !PL_tokenbuf[2])
8045 {
8046 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8047 d < PL_bufend && *d != '\n';
8048 d++)
8049 {
8050 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8051 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8052 PL_tokenbuf);
8053 }
8054 }
8055 }
8056
6154021b
RGS
8057 pl_yylval.opval = newOP(OP_PADANY, 0);
8058 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8059 return PRIVATEREF;
8060 }
8061 }
8062
8063 /*
8064 Whine if they've said @foo in a doublequoted string,
8065 and @foo isn't a variable we can find in the symbol
8066 table.
8067 */
d824713b
NC
8068 if (ckWARN(WARN_AMBIGUOUS) &&
8069 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8070 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8071 SVt_PVAV);
8eceec63 8072 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8073 /* DO NOT warn for @- and @+ */
8074 && !( PL_tokenbuf[2] == '\0' &&
8075 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8076 )
8eceec63
SC
8077 {
8078 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8079 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8080 "Possible unintended interpolation of %s in string",
8081 PL_tokenbuf);
8eceec63
SC
8082 }
8083 }
8084
8085 /* build ops for a bareword */
6154021b 8086 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8087 tokenbuf_len - 1));
6154021b 8088 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8089 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8090 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8091 ((PL_tokenbuf[0] == '$') ? SVt_PV
8092 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8093 : SVt_PVHV));
8eceec63
SC
8094 return WORD;
8095}
8096
4c3bbe0f
MHM
8097/*
8098 * The following code was generated by perl_keyword.pl.
8099 */
e2e1dd5a 8100
79072805 8101I32
5458a98a 8102Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8103{
952306ac 8104 dVAR;
7918f24d
NC
8105
8106 PERL_ARGS_ASSERT_KEYWORD;
8107
4c3bbe0f
MHM
8108 switch (len)
8109 {
8110 case 1: /* 5 tokens of length 1 */
8111 switch (name[0])
e2e1dd5a 8112 {
4c3bbe0f
MHM
8113 case 'm':
8114 { /* m */
8115 return KEY_m;
8116 }
8117
4c3bbe0f
MHM
8118 case 'q':
8119 { /* q */
8120 return KEY_q;
8121 }
8122
4c3bbe0f
MHM
8123 case 's':
8124 { /* s */
8125 return KEY_s;
8126 }
8127
4c3bbe0f
MHM
8128 case 'x':
8129 { /* x */
8130 return -KEY_x;
8131 }
8132
4c3bbe0f
MHM
8133 case 'y':
8134 { /* y */
8135 return KEY_y;
8136 }
8137
4c3bbe0f
MHM
8138 default:
8139 goto unknown;
e2e1dd5a 8140 }
4c3bbe0f
MHM
8141
8142 case 2: /* 18 tokens of length 2 */
8143 switch (name[0])
e2e1dd5a 8144 {
4c3bbe0f
MHM
8145 case 'd':
8146 if (name[1] == 'o')
8147 { /* do */
8148 return KEY_do;
8149 }
8150
8151 goto unknown;
8152
8153 case 'e':
8154 if (name[1] == 'q')
8155 { /* eq */
8156 return -KEY_eq;
8157 }
8158
8159 goto unknown;
8160
8161 case 'g':
8162 switch (name[1])
8163 {
8164 case 'e':
8165 { /* ge */
8166 return -KEY_ge;
8167 }
8168
4c3bbe0f
MHM
8169 case 't':
8170 { /* gt */
8171 return -KEY_gt;
8172 }
8173
4c3bbe0f
MHM
8174 default:
8175 goto unknown;
8176 }
8177
8178 case 'i':
8179 if (name[1] == 'f')
8180 { /* if */
8181 return KEY_if;
8182 }
8183
8184 goto unknown;
8185
8186 case 'l':
8187 switch (name[1])
8188 {
8189 case 'c':
8190 { /* lc */
8191 return -KEY_lc;
8192 }
8193
4c3bbe0f
MHM
8194 case 'e':
8195 { /* le */
8196 return -KEY_le;
8197 }
8198
4c3bbe0f
MHM
8199 case 't':
8200 { /* lt */
8201 return -KEY_lt;
8202 }
8203
4c3bbe0f
MHM
8204 default:
8205 goto unknown;
8206 }
8207
8208 case 'm':
8209 if (name[1] == 'y')
8210 { /* my */
8211 return KEY_my;
8212 }
8213
8214 goto unknown;
8215
8216 case 'n':
8217 switch (name[1])
8218 {
8219 case 'e':
8220 { /* ne */
8221 return -KEY_ne;
8222 }
8223
4c3bbe0f
MHM
8224 case 'o':
8225 { /* no */
8226 return KEY_no;
8227 }
8228
4c3bbe0f
MHM
8229 default:
8230 goto unknown;
8231 }
8232
8233 case 'o':
8234 if (name[1] == 'r')
8235 { /* or */
8236 return -KEY_or;
8237 }
8238
8239 goto unknown;
8240
8241 case 'q':
8242 switch (name[1])
8243 {
8244 case 'q':
8245 { /* qq */
8246 return KEY_qq;
8247 }
8248
4c3bbe0f
MHM
8249 case 'r':
8250 { /* qr */
8251 return KEY_qr;
8252 }
8253
4c3bbe0f
MHM
8254 case 'w':
8255 { /* qw */
8256 return KEY_qw;
8257 }
8258
4c3bbe0f
MHM
8259 case 'x':
8260 { /* qx */
8261 return KEY_qx;
8262 }
8263
4c3bbe0f
MHM
8264 default:
8265 goto unknown;
8266 }
8267
8268 case 't':
8269 if (name[1] == 'r')
8270 { /* tr */
8271 return KEY_tr;
8272 }
8273
8274 goto unknown;
8275
8276 case 'u':
8277 if (name[1] == 'c')
8278 { /* uc */
8279 return -KEY_uc;
8280 }
8281
8282 goto unknown;
8283
8284 default:
8285 goto unknown;
e2e1dd5a 8286 }
4c3bbe0f 8287
0d863452 8288 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8289 switch (name[0])
e2e1dd5a 8290 {
4c3bbe0f
MHM
8291 case 'E':
8292 if (name[1] == 'N' &&
8293 name[2] == 'D')
8294 { /* END */
8295 return KEY_END;
8296 }
8297
8298 goto unknown;
8299
8300 case 'a':
8301 switch (name[1])
8302 {
8303 case 'b':
8304 if (name[2] == 's')
8305 { /* abs */
8306 return -KEY_abs;
8307 }
8308
8309 goto unknown;
8310
8311 case 'n':
8312 if (name[2] == 'd')
8313 { /* and */
8314 return -KEY_and;
8315 }
8316
8317 goto unknown;
8318
8319 default:
8320 goto unknown;
8321 }
8322
8323 case 'c':
8324 switch (name[1])
8325 {
8326 case 'h':
8327 if (name[2] == 'r')
8328 { /* chr */
8329 return -KEY_chr;
8330 }
8331
8332 goto unknown;
8333
8334 case 'm':
8335 if (name[2] == 'p')
8336 { /* cmp */
8337 return -KEY_cmp;
8338 }
8339
8340 goto unknown;
8341
8342 case 'o':
8343 if (name[2] == 's')
8344 { /* cos */
8345 return -KEY_cos;
8346 }
8347
8348 goto unknown;
8349
8350 default:
8351 goto unknown;
8352 }
8353
8354 case 'd':
8355 if (name[1] == 'i' &&
8356 name[2] == 'e')
8357 { /* die */
8358 return -KEY_die;
8359 }
8360
8361 goto unknown;
8362
8363 case 'e':
8364 switch (name[1])
8365 {
8366 case 'o':
8367 if (name[2] == 'f')
8368 { /* eof */
8369 return -KEY_eof;
8370 }
8371
8372 goto unknown;
8373
4c3bbe0f
MHM
8374 case 'x':
8375 if (name[2] == 'p')
8376 { /* exp */
8377 return -KEY_exp;
8378 }
8379
8380 goto unknown;
8381
8382 default:
8383 goto unknown;
8384 }
8385
8386 case 'f':
8387 if (name[1] == 'o' &&
8388 name[2] == 'r')
8389 { /* for */
8390 return KEY_for;
8391 }
8392
8393 goto unknown;
8394
8395 case 'h':
8396 if (name[1] == 'e' &&
8397 name[2] == 'x')
8398 { /* hex */
8399 return -KEY_hex;
8400 }
8401
8402 goto unknown;
8403
8404 case 'i':
8405 if (name[1] == 'n' &&
8406 name[2] == 't')
8407 { /* int */
8408 return -KEY_int;
8409 }
8410
8411 goto unknown;
8412
8413 case 'l':
8414 if (name[1] == 'o' &&
8415 name[2] == 'g')
8416 { /* log */
8417 return -KEY_log;
8418 }
8419
8420 goto unknown;
8421
8422 case 'm':
8423 if (name[1] == 'a' &&
8424 name[2] == 'p')
8425 { /* map */
8426 return KEY_map;
8427 }
8428
8429 goto unknown;
8430
8431 case 'n':
8432 if (name[1] == 'o' &&
8433 name[2] == 't')
8434 { /* not */
8435 return -KEY_not;
8436 }
8437
8438 goto unknown;
8439
8440 case 'o':
8441 switch (name[1])
8442 {
8443 case 'c':
8444 if (name[2] == 't')
8445 { /* oct */
8446 return -KEY_oct;
8447 }
8448
8449 goto unknown;
8450
8451 case 'r':
8452 if (name[2] == 'd')
8453 { /* ord */
8454 return -KEY_ord;
8455 }
8456
8457 goto unknown;
8458
8459 case 'u':
8460 if (name[2] == 'r')
8461 { /* our */
8462 return KEY_our;
8463 }
8464
8465 goto unknown;
8466
8467 default:
8468 goto unknown;
8469 }
8470
8471 case 'p':
8472 if (name[1] == 'o')
8473 {
8474 switch (name[2])
8475 {
8476 case 'p':
8477 { /* pop */
8478 return -KEY_pop;
8479 }
8480
4c3bbe0f
MHM
8481 case 's':
8482 { /* pos */
8483 return KEY_pos;
8484 }
8485
4c3bbe0f
MHM
8486 default:
8487 goto unknown;
8488 }
8489 }
8490
8491 goto unknown;
8492
8493 case 'r':
8494 if (name[1] == 'e' &&
8495 name[2] == 'f')
8496 { /* ref */
8497 return -KEY_ref;
8498 }
8499
8500 goto unknown;
8501
8502 case 's':
8503 switch (name[1])
8504 {
0d863452
RH
8505 case 'a':
8506 if (name[2] == 'y')
8507 { /* say */
e3e804c9 8508 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8509 }
8510
8511 goto unknown;
8512
4c3bbe0f
MHM
8513 case 'i':
8514 if (name[2] == 'n')
8515 { /* sin */
8516 return -KEY_sin;
8517 }
8518
8519 goto unknown;
8520
8521 case 'u':
8522 if (name[2] == 'b')
8523 { /* sub */
8524 return KEY_sub;
8525 }
8526
8527 goto unknown;
8528
8529 default:
8530 goto unknown;
8531 }
8532
8533 case 't':
8534 if (name[1] == 'i' &&
8535 name[2] == 'e')
8536 { /* tie */
1db4d195 8537 return -KEY_tie;
4c3bbe0f
MHM
8538 }
8539
8540 goto unknown;
8541
8542 case 'u':
8543 if (name[1] == 's' &&
8544 name[2] == 'e')
8545 { /* use */
8546 return KEY_use;
8547 }
8548
8549 goto unknown;
8550
8551 case 'v':
8552 if (name[1] == 'e' &&
8553 name[2] == 'c')
8554 { /* vec */
8555 return -KEY_vec;
8556 }
8557
8558 goto unknown;
8559
8560 case 'x':
8561 if (name[1] == 'o' &&
8562 name[2] == 'r')
8563 { /* xor */
8564 return -KEY_xor;
8565 }
8566
8567 goto unknown;
8568
8569 default:
8570 goto unknown;
e2e1dd5a 8571 }
4c3bbe0f 8572
0d863452 8573 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8574 switch (name[0])
e2e1dd5a 8575 {
4c3bbe0f
MHM
8576 case 'C':
8577 if (name[1] == 'O' &&
8578 name[2] == 'R' &&
8579 name[3] == 'E')
8580 { /* CORE */
8581 return -KEY_CORE;
8582 }
8583
8584 goto unknown;
8585
8586 case 'I':
8587 if (name[1] == 'N' &&
8588 name[2] == 'I' &&
8589 name[3] == 'T')
8590 { /* INIT */
8591 return KEY_INIT;
8592 }
8593
8594 goto unknown;
8595
8596 case 'b':
8597 if (name[1] == 'i' &&
8598 name[2] == 'n' &&
8599 name[3] == 'd')
8600 { /* bind */
8601 return -KEY_bind;
8602 }
8603
8604 goto unknown;
8605
8606 case 'c':
8607 if (name[1] == 'h' &&
8608 name[2] == 'o' &&
8609 name[3] == 'p')
8610 { /* chop */
8611 return -KEY_chop;
8612 }
8613
8614 goto unknown;
8615
8616 case 'd':
8617 if (name[1] == 'u' &&
8618 name[2] == 'm' &&
8619 name[3] == 'p')
8620 { /* dump */
8621 return -KEY_dump;
8622 }
8623
8624 goto unknown;
8625
8626 case 'e':
8627 switch (name[1])
8628 {
8629 case 'a':
8630 if (name[2] == 'c' &&
8631 name[3] == 'h')
8632 { /* each */
8633 return -KEY_each;
8634 }
8635
8636 goto unknown;
8637
8638 case 'l':
8639 if (name[2] == 's' &&
8640 name[3] == 'e')
8641 { /* else */
8642 return KEY_else;
8643 }
8644
8645 goto unknown;
8646
8647 case 'v':
8648 if (name[2] == 'a' &&
8649 name[3] == 'l')
8650 { /* eval */
8651 return KEY_eval;
8652 }
8653
8654 goto unknown;
8655
8656 case 'x':
8657 switch (name[2])
8658 {
8659 case 'e':
8660 if (name[3] == 'c')
8661 { /* exec */
8662 return -KEY_exec;
8663 }
8664
8665 goto unknown;
8666
8667 case 'i':
8668 if (name[3] == 't')
8669 { /* exit */
8670 return -KEY_exit;
8671 }
8672
8673 goto unknown;
8674
8675 default:
8676 goto unknown;
8677 }
8678
8679 default:
8680 goto unknown;
8681 }
8682
8683 case 'f':
8684 if (name[1] == 'o' &&
8685 name[2] == 'r' &&
8686 name[3] == 'k')
8687 { /* fork */
8688 return -KEY_fork;
8689 }
8690
8691 goto unknown;
8692
8693 case 'g':
8694 switch (name[1])
8695 {
8696 case 'e':
8697 if (name[2] == 't' &&
8698 name[3] == 'c')
8699 { /* getc */
8700 return -KEY_getc;
8701 }
8702
8703 goto unknown;
8704
8705 case 'l':
8706 if (name[2] == 'o' &&
8707 name[3] == 'b')
8708 { /* glob */
8709 return KEY_glob;
8710 }
8711
8712 goto unknown;
8713
8714 case 'o':
8715 if (name[2] == 't' &&
8716 name[3] == 'o')
8717 { /* goto */
8718 return KEY_goto;
8719 }
8720
8721 goto unknown;
8722
8723 case 'r':
8724 if (name[2] == 'e' &&
8725 name[3] == 'p')
8726 { /* grep */
8727 return KEY_grep;
8728 }
8729
8730 goto unknown;
8731
8732 default:
8733 goto unknown;
8734 }
8735
8736 case 'j':
8737 if (name[1] == 'o' &&
8738 name[2] == 'i' &&
8739 name[3] == 'n')
8740 { /* join */
8741 return -KEY_join;
8742 }
8743
8744 goto unknown;
8745
8746 case 'k':
8747 switch (name[1])
8748 {
8749 case 'e':
8750 if (name[2] == 'y' &&
8751 name[3] == 's')
8752 { /* keys */
8753 return -KEY_keys;
8754 }
8755
8756 goto unknown;
8757
8758 case 'i':
8759 if (name[2] == 'l' &&
8760 name[3] == 'l')
8761 { /* kill */
8762 return -KEY_kill;
8763 }
8764
8765 goto unknown;
8766
8767 default:
8768 goto unknown;
8769 }
8770
8771 case 'l':
8772 switch (name[1])
8773 {
8774 case 'a':
8775 if (name[2] == 's' &&
8776 name[3] == 't')
8777 { /* last */
8778 return KEY_last;
8779 }
8780
8781 goto unknown;
8782
8783 case 'i':
8784 if (name[2] == 'n' &&
8785 name[3] == 'k')
8786 { /* link */
8787 return -KEY_link;
8788 }
8789
8790 goto unknown;
8791
8792 case 'o':
8793 if (name[2] == 'c' &&
8794 name[3] == 'k')
8795 { /* lock */
8796 return -KEY_lock;
8797 }
8798
8799 goto unknown;
8800
8801 default:
8802 goto unknown;
8803 }
8804
8805 case 'n':
8806 if (name[1] == 'e' &&
8807 name[2] == 'x' &&
8808 name[3] == 't')
8809 { /* next */
8810 return KEY_next;
8811 }
8812
8813 goto unknown;
8814
8815 case 'o':
8816 if (name[1] == 'p' &&
8817 name[2] == 'e' &&
8818 name[3] == 'n')
8819 { /* open */
8820 return -KEY_open;
8821 }
8822
8823 goto unknown;
8824
8825 case 'p':
8826 switch (name[1])
8827 {
8828 case 'a':
8829 if (name[2] == 'c' &&
8830 name[3] == 'k')
8831 { /* pack */
8832 return -KEY_pack;
8833 }
8834
8835 goto unknown;
8836
8837 case 'i':
8838 if (name[2] == 'p' &&
8839 name[3] == 'e')
8840 { /* pipe */
8841 return -KEY_pipe;
8842 }
8843
8844 goto unknown;
8845
8846 case 'u':
8847 if (name[2] == 's' &&
8848 name[3] == 'h')
8849 { /* push */
8850 return -KEY_push;
8851 }
8852
8853 goto unknown;
8854
8855 default:
8856 goto unknown;
8857 }
8858
8859 case 'r':
8860 switch (name[1])
8861 {
8862 case 'a':
8863 if (name[2] == 'n' &&
8864 name[3] == 'd')
8865 { /* rand */
8866 return -KEY_rand;
8867 }
8868
8869 goto unknown;
8870
8871 case 'e':
8872 switch (name[2])
8873 {
8874 case 'a':
8875 if (name[3] == 'd')
8876 { /* read */
8877 return -KEY_read;
8878 }
8879
8880 goto unknown;
8881
8882 case 'c':
8883 if (name[3] == 'v')
8884 { /* recv */
8885 return -KEY_recv;
8886 }
8887
8888 goto unknown;
8889
8890 case 'd':
8891 if (name[3] == 'o')
8892 { /* redo */
8893 return KEY_redo;
8894 }
8895
8896 goto unknown;
8897
8898 default:
8899 goto unknown;
8900 }
8901
8902 default:
8903 goto unknown;
8904 }
8905
8906 case 's':
8907 switch (name[1])
8908 {
8909 case 'e':
8910 switch (name[2])
8911 {
8912 case 'e':
8913 if (name[3] == 'k')
8914 { /* seek */
8915 return -KEY_seek;
8916 }
8917
8918 goto unknown;
8919
8920 case 'n':
8921 if (name[3] == 'd')
8922 { /* send */
8923 return -KEY_send;
8924 }
8925
8926 goto unknown;
8927
8928 default:
8929 goto unknown;
8930 }
8931
8932 case 'o':
8933 if (name[2] == 'r' &&
8934 name[3] == 't')
8935 { /* sort */
8936 return KEY_sort;
8937 }
8938
8939 goto unknown;
8940
8941 case 'q':
8942 if (name[2] == 'r' &&
8943 name[3] == 't')
8944 { /* sqrt */
8945 return -KEY_sqrt;
8946 }
8947
8948 goto unknown;
8949
8950 case 't':
8951 if (name[2] == 'a' &&
8952 name[3] == 't')
8953 { /* stat */
8954 return -KEY_stat;
8955 }
8956
8957 goto unknown;
8958
8959 default:
8960 goto unknown;
8961 }
8962
8963 case 't':
8964 switch (name[1])
8965 {
8966 case 'e':
8967 if (name[2] == 'l' &&
8968 name[3] == 'l')
8969 { /* tell */
8970 return -KEY_tell;
8971 }
8972
8973 goto unknown;
8974
8975 case 'i':
8976 switch (name[2])
8977 {
8978 case 'e':
8979 if (name[3] == 'd')
8980 { /* tied */
1db4d195 8981 return -KEY_tied;
4c3bbe0f
MHM
8982 }
8983
8984 goto unknown;
8985
8986 case 'm':
8987 if (name[3] == 'e')
8988 { /* time */
8989 return -KEY_time;
8990 }
8991
8992 goto unknown;
8993
8994 default:
8995 goto unknown;
8996 }
8997
8998 default:
8999 goto unknown;
9000 }
9001
9002 case 'w':
0d863452 9003 switch (name[1])
4c3bbe0f 9004 {
0d863452 9005 case 'a':
952306ac
RGS
9006 switch (name[2])
9007 {
9008 case 'i':
9009 if (name[3] == 't')
9010 { /* wait */
9011 return -KEY_wait;
9012 }
4c3bbe0f 9013
952306ac 9014 goto unknown;
4c3bbe0f 9015
952306ac
RGS
9016 case 'r':
9017 if (name[3] == 'n')
9018 { /* warn */
9019 return -KEY_warn;
9020 }
4c3bbe0f 9021
952306ac 9022 goto unknown;
4c3bbe0f 9023
952306ac
RGS
9024 default:
9025 goto unknown;
9026 }
0d863452
RH
9027
9028 case 'h':
9029 if (name[2] == 'e' &&
9030 name[3] == 'n')
9031 { /* when */
5458a98a 9032 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9033 }
4c3bbe0f 9034
952306ac 9035 goto unknown;
4c3bbe0f 9036
952306ac
RGS
9037 default:
9038 goto unknown;
9039 }
4c3bbe0f 9040
0d863452
RH
9041 default:
9042 goto unknown;
9043 }
9044
952306ac 9045 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9046 switch (name[0])
e2e1dd5a 9047 {
4c3bbe0f
MHM
9048 case 'B':
9049 if (name[1] == 'E' &&
9050 name[2] == 'G' &&
9051 name[3] == 'I' &&
9052 name[4] == 'N')
9053 { /* BEGIN */
9054 return KEY_BEGIN;
9055 }
9056
9057 goto unknown;
9058
9059 case 'C':
9060 if (name[1] == 'H' &&
9061 name[2] == 'E' &&
9062 name[3] == 'C' &&
9063 name[4] == 'K')
9064 { /* CHECK */
9065 return KEY_CHECK;
9066 }
9067
9068 goto unknown;
9069
9070 case 'a':
9071 switch (name[1])
9072 {
9073 case 'l':
9074 if (name[2] == 'a' &&
9075 name[3] == 'r' &&
9076 name[4] == 'm')
9077 { /* alarm */
9078 return -KEY_alarm;
9079 }
9080
9081 goto unknown;
9082
9083 case 't':
9084 if (name[2] == 'a' &&
9085 name[3] == 'n' &&
9086 name[4] == '2')
9087 { /* atan2 */
9088 return -KEY_atan2;
9089 }
9090
9091 goto unknown;
9092
9093 default:
9094 goto unknown;
9095 }
9096
9097 case 'b':
0d863452
RH
9098 switch (name[1])
9099 {
9100 case 'l':
9101 if (name[2] == 'e' &&
952306ac
RGS
9102 name[3] == 's' &&
9103 name[4] == 's')
9104 { /* bless */
9105 return -KEY_bless;
9106 }
4c3bbe0f 9107
952306ac 9108 goto unknown;
4c3bbe0f 9109
0d863452
RH
9110 case 'r':
9111 if (name[2] == 'e' &&
9112 name[3] == 'a' &&
9113 name[4] == 'k')
9114 { /* break */
5458a98a 9115 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9116 }
9117
9118 goto unknown;
9119
9120 default:
9121 goto unknown;
9122 }
9123
4c3bbe0f
MHM
9124 case 'c':
9125 switch (name[1])
9126 {
9127 case 'h':
9128 switch (name[2])
9129 {
9130 case 'd':
9131 if (name[3] == 'i' &&
9132 name[4] == 'r')
9133 { /* chdir */
9134 return -KEY_chdir;
9135 }
9136
9137 goto unknown;
9138
9139 case 'm':
9140 if (name[3] == 'o' &&
9141 name[4] == 'd')
9142 { /* chmod */
9143 return -KEY_chmod;
9144 }
9145
9146 goto unknown;
9147
9148 case 'o':
9149 switch (name[3])
9150 {
9151 case 'm':
9152 if (name[4] == 'p')
9153 { /* chomp */
9154 return -KEY_chomp;
9155 }
9156
9157 goto unknown;
9158
9159 case 'w':
9160 if (name[4] == 'n')
9161 { /* chown */
9162 return -KEY_chown;
9163 }
9164
9165 goto unknown;
9166
9167 default:
9168 goto unknown;
9169 }
9170
9171 default:
9172 goto unknown;
9173 }
9174
9175 case 'l':
9176 if (name[2] == 'o' &&
9177 name[3] == 's' &&
9178 name[4] == 'e')
9179 { /* close */
9180 return -KEY_close;
9181 }
9182
9183 goto unknown;
9184
9185 case 'r':
9186 if (name[2] == 'y' &&
9187 name[3] == 'p' &&
9188 name[4] == 't')
9189 { /* crypt */
9190 return -KEY_crypt;
9191 }
9192
9193 goto unknown;
9194
9195 default:
9196 goto unknown;
9197 }
9198
9199 case 'e':
9200 if (name[1] == 'l' &&
9201 name[2] == 's' &&
9202 name[3] == 'i' &&
9203 name[4] == 'f')
9204 { /* elsif */
9205 return KEY_elsif;
9206 }
9207
9208 goto unknown;
9209
9210 case 'f':
9211 switch (name[1])
9212 {
9213 case 'c':
9214 if (name[2] == 'n' &&
9215 name[3] == 't' &&
9216 name[4] == 'l')
9217 { /* fcntl */
9218 return -KEY_fcntl;
9219 }
9220
9221 goto unknown;
9222
9223 case 'l':
9224 if (name[2] == 'o' &&
9225 name[3] == 'c' &&
9226 name[4] == 'k')
9227 { /* flock */
9228 return -KEY_flock;
9229 }
9230
9231 goto unknown;
9232
9233 default:
9234 goto unknown;
9235 }
9236
0d863452
RH
9237 case 'g':
9238 if (name[1] == 'i' &&
9239 name[2] == 'v' &&
9240 name[3] == 'e' &&
9241 name[4] == 'n')
9242 { /* given */
5458a98a 9243 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9244 }
9245
9246 goto unknown;
9247
4c3bbe0f
MHM
9248 case 'i':
9249 switch (name[1])
9250 {
9251 case 'n':
9252 if (name[2] == 'd' &&
9253 name[3] == 'e' &&
9254 name[4] == 'x')
9255 { /* index */
9256 return -KEY_index;
9257 }
9258
9259 goto unknown;
9260
9261 case 'o':
9262 if (name[2] == 'c' &&
9263 name[3] == 't' &&
9264 name[4] == 'l')
9265 { /* ioctl */
9266 return -KEY_ioctl;
9267 }
9268
9269 goto unknown;
9270
9271 default:
9272 goto unknown;
9273 }
9274
9275 case 'l':
9276 switch (name[1])
9277 {
9278 case 'o':
9279 if (name[2] == 'c' &&
9280 name[3] == 'a' &&
9281 name[4] == 'l')
9282 { /* local */
9283 return KEY_local;
9284 }
9285
9286 goto unknown;
9287
9288 case 's':
9289 if (name[2] == 't' &&
9290 name[3] == 'a' &&
9291 name[4] == 't')
9292 { /* lstat */
9293 return -KEY_lstat;
9294 }
9295
9296 goto unknown;
9297
9298 default:
9299 goto unknown;
9300 }
9301
9302 case 'm':
9303 if (name[1] == 'k' &&
9304 name[2] == 'd' &&
9305 name[3] == 'i' &&
9306 name[4] == 'r')
9307 { /* mkdir */
9308 return -KEY_mkdir;
9309 }
9310
9311 goto unknown;
9312
9313 case 'p':
9314 if (name[1] == 'r' &&
9315 name[2] == 'i' &&
9316 name[3] == 'n' &&
9317 name[4] == 't')
9318 { /* print */
9319 return KEY_print;
9320 }
9321
9322 goto unknown;
9323
9324 case 'r':
9325 switch (name[1])
9326 {
9327 case 'e':
9328 if (name[2] == 's' &&
9329 name[3] == 'e' &&
9330 name[4] == 't')
9331 { /* reset */
9332 return -KEY_reset;
9333 }
9334
9335 goto unknown;
9336
9337 case 'm':
9338 if (name[2] == 'd' &&
9339 name[3] == 'i' &&
9340 name[4] == 'r')
9341 { /* rmdir */
9342 return -KEY_rmdir;
9343 }
9344
9345 goto unknown;
9346
9347 default:
9348 goto unknown;
9349 }
9350
9351 case 's':
9352 switch (name[1])
9353 {
9354 case 'e':
9355 if (name[2] == 'm' &&
9356 name[3] == 'o' &&
9357 name[4] == 'p')
9358 { /* semop */
9359 return -KEY_semop;
9360 }
9361
9362 goto unknown;
9363
9364 case 'h':
9365 if (name[2] == 'i' &&
9366 name[3] == 'f' &&
9367 name[4] == 't')
9368 { /* shift */
9369 return -KEY_shift;
9370 }
9371
9372 goto unknown;
9373
9374 case 'l':
9375 if (name[2] == 'e' &&
9376 name[3] == 'e' &&
9377 name[4] == 'p')
9378 { /* sleep */
9379 return -KEY_sleep;
9380 }
9381
9382 goto unknown;
9383
9384 case 'p':
9385 if (name[2] == 'l' &&
9386 name[3] == 'i' &&
9387 name[4] == 't')
9388 { /* split */
9389 return KEY_split;
9390 }
9391
9392 goto unknown;
9393
9394 case 'r':
9395 if (name[2] == 'a' &&
9396 name[3] == 'n' &&
9397 name[4] == 'd')
9398 { /* srand */
9399 return -KEY_srand;
9400 }
9401
9402 goto unknown;
9403
9404 case 't':
952306ac
RGS
9405 switch (name[2])
9406 {
9407 case 'a':
9408 if (name[3] == 't' &&
9409 name[4] == 'e')
9410 { /* state */
5458a98a 9411 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9412 }
4c3bbe0f 9413
952306ac
RGS
9414 goto unknown;
9415
9416 case 'u':
9417 if (name[3] == 'd' &&
9418 name[4] == 'y')
9419 { /* study */
9420 return KEY_study;
9421 }
9422
9423 goto unknown;
9424
9425 default:
9426 goto unknown;
9427 }
4c3bbe0f
MHM
9428
9429 default:
9430 goto unknown;
9431 }
9432
9433 case 't':
9434 if (name[1] == 'i' &&
9435 name[2] == 'm' &&
9436 name[3] == 'e' &&
9437 name[4] == 's')
9438 { /* times */
9439 return -KEY_times;
9440 }
9441
9442 goto unknown;
9443
9444 case 'u':
9445 switch (name[1])
9446 {
9447 case 'm':
9448 if (name[2] == 'a' &&
9449 name[3] == 's' &&
9450 name[4] == 'k')
9451 { /* umask */
9452 return -KEY_umask;
9453 }
9454
9455 goto unknown;
9456
9457 case 'n':
9458 switch (name[2])
9459 {
9460 case 'd':
9461 if (name[3] == 'e' &&
9462 name[4] == 'f')
9463 { /* undef */
9464 return KEY_undef;
9465 }
9466
9467 goto unknown;
9468
9469 case 't':
9470 if (name[3] == 'i')
9471 {
9472 switch (name[4])
9473 {
9474 case 'e':
9475 { /* untie */
1db4d195 9476 return -KEY_untie;
4c3bbe0f
MHM
9477 }
9478
4c3bbe0f
MHM
9479 case 'l':
9480 { /* until */
9481 return KEY_until;
9482 }
9483
4c3bbe0f
MHM
9484 default:
9485 goto unknown;
9486 }
9487 }
9488
9489 goto unknown;
9490
9491 default:
9492 goto unknown;
9493 }
9494
9495 case 't':
9496 if (name[2] == 'i' &&
9497 name[3] == 'm' &&
9498 name[4] == 'e')
9499 { /* utime */
9500 return -KEY_utime;
9501 }
9502
9503 goto unknown;
9504
9505 default:
9506 goto unknown;
9507 }
9508
9509 case 'w':
9510 switch (name[1])
9511 {
9512 case 'h':
9513 if (name[2] == 'i' &&
9514 name[3] == 'l' &&
9515 name[4] == 'e')
9516 { /* while */
9517 return KEY_while;
9518 }
9519
9520 goto unknown;
9521
9522 case 'r':
9523 if (name[2] == 'i' &&
9524 name[3] == 't' &&
9525 name[4] == 'e')
9526 { /* write */
9527 return -KEY_write;
9528 }
9529
9530 goto unknown;
9531
9532 default:
9533 goto unknown;
9534 }
9535
9536 default:
9537 goto unknown;
e2e1dd5a 9538 }
4c3bbe0f
MHM
9539
9540 case 6: /* 33 tokens of length 6 */
9541 switch (name[0])
9542 {
9543 case 'a':
9544 if (name[1] == 'c' &&
9545 name[2] == 'c' &&
9546 name[3] == 'e' &&
9547 name[4] == 'p' &&
9548 name[5] == 't')
9549 { /* accept */
9550 return -KEY_accept;
9551 }
9552
9553 goto unknown;
9554
9555 case 'c':
9556 switch (name[1])
9557 {
9558 case 'a':
9559 if (name[2] == 'l' &&
9560 name[3] == 'l' &&
9561 name[4] == 'e' &&
9562 name[5] == 'r')
9563 { /* caller */
9564 return -KEY_caller;
9565 }
9566
9567 goto unknown;
9568
9569 case 'h':
9570 if (name[2] == 'r' &&
9571 name[3] == 'o' &&
9572 name[4] == 'o' &&
9573 name[5] == 't')
9574 { /* chroot */
9575 return -KEY_chroot;
9576 }
9577
9578 goto unknown;
9579
9580 default:
9581 goto unknown;
9582 }
9583
9584 case 'd':
9585 if (name[1] == 'e' &&
9586 name[2] == 'l' &&
9587 name[3] == 'e' &&
9588 name[4] == 't' &&
9589 name[5] == 'e')
9590 { /* delete */
9591 return KEY_delete;
9592 }
9593
9594 goto unknown;
9595
9596 case 'e':
9597 switch (name[1])
9598 {
9599 case 'l':
9600 if (name[2] == 's' &&
9601 name[3] == 'e' &&
9602 name[4] == 'i' &&
9603 name[5] == 'f')
9604 { /* elseif */
9b387841 9605 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9606 }
9607
9608 goto unknown;
9609
9610 case 'x':
9611 if (name[2] == 'i' &&
9612 name[3] == 's' &&
9613 name[4] == 't' &&
9614 name[5] == 's')
9615 { /* exists */
9616 return KEY_exists;
9617 }
9618
9619 goto unknown;
9620
9621 default:
9622 goto unknown;
9623 }
9624
9625 case 'f':
9626 switch (name[1])
9627 {
9628 case 'i':
9629 if (name[2] == 'l' &&
9630 name[3] == 'e' &&
9631 name[4] == 'n' &&
9632 name[5] == 'o')
9633 { /* fileno */
9634 return -KEY_fileno;
9635 }
9636
9637 goto unknown;
9638
9639 case 'o':
9640 if (name[2] == 'r' &&
9641 name[3] == 'm' &&
9642 name[4] == 'a' &&
9643 name[5] == 't')
9644 { /* format */
9645 return KEY_format;
9646 }
9647
9648 goto unknown;
9649
9650 default:
9651 goto unknown;
9652 }
9653
9654 case 'g':
9655 if (name[1] == 'm' &&
9656 name[2] == 't' &&
9657 name[3] == 'i' &&
9658 name[4] == 'm' &&
9659 name[5] == 'e')
9660 { /* gmtime */
9661 return -KEY_gmtime;
9662 }
9663
9664 goto unknown;
9665
9666 case 'l':
9667 switch (name[1])
9668 {
9669 case 'e':
9670 if (name[2] == 'n' &&
9671 name[3] == 'g' &&
9672 name[4] == 't' &&
9673 name[5] == 'h')
9674 { /* length */
9675 return -KEY_length;
9676 }
9677
9678 goto unknown;
9679
9680 case 'i':
9681 if (name[2] == 's' &&
9682 name[3] == 't' &&
9683 name[4] == 'e' &&
9684 name[5] == 'n')
9685 { /* listen */
9686 return -KEY_listen;
9687 }
9688
9689 goto unknown;
9690
9691 default:
9692 goto unknown;
9693 }
9694
9695 case 'm':
9696 if (name[1] == 's' &&
9697 name[2] == 'g')
9698 {
9699 switch (name[3])
9700 {
9701 case 'c':
9702 if (name[4] == 't' &&
9703 name[5] == 'l')
9704 { /* msgctl */
9705 return -KEY_msgctl;
9706 }
9707
9708 goto unknown;
9709
9710 case 'g':
9711 if (name[4] == 'e' &&
9712 name[5] == 't')
9713 { /* msgget */
9714 return -KEY_msgget;
9715 }
9716
9717 goto unknown;
9718
9719 case 'r':
9720 if (name[4] == 'c' &&
9721 name[5] == 'v')
9722 { /* msgrcv */
9723 return -KEY_msgrcv;
9724 }
9725
9726 goto unknown;
9727
9728 case 's':
9729 if (name[4] == 'n' &&
9730 name[5] == 'd')
9731 { /* msgsnd */
9732 return -KEY_msgsnd;
9733 }
9734
9735 goto unknown;
9736
9737 default:
9738 goto unknown;
9739 }
9740 }
9741
9742 goto unknown;
9743
9744 case 'p':
9745 if (name[1] == 'r' &&
9746 name[2] == 'i' &&
9747 name[3] == 'n' &&
9748 name[4] == 't' &&
9749 name[5] == 'f')
9750 { /* printf */
9751 return KEY_printf;
9752 }
9753
9754 goto unknown;
9755
9756 case 'r':
9757 switch (name[1])
9758 {
9759 case 'e':
9760 switch (name[2])
9761 {
9762 case 'n':
9763 if (name[3] == 'a' &&
9764 name[4] == 'm' &&
9765 name[5] == 'e')
9766 { /* rename */
9767 return -KEY_rename;
9768 }
9769
9770 goto unknown;
9771
9772 case 't':
9773 if (name[3] == 'u' &&
9774 name[4] == 'r' &&
9775 name[5] == 'n')
9776 { /* return */
9777 return KEY_return;
9778 }
9779
9780 goto unknown;
9781
9782 default:
9783 goto unknown;
9784 }
9785
9786 case 'i':
9787 if (name[2] == 'n' &&
9788 name[3] == 'd' &&
9789 name[4] == 'e' &&
9790 name[5] == 'x')
9791 { /* rindex */
9792 return -KEY_rindex;
9793 }
9794
9795 goto unknown;
9796
9797 default:
9798 goto unknown;
9799 }
9800
9801 case 's':
9802 switch (name[1])
9803 {
9804 case 'c':
9805 if (name[2] == 'a' &&
9806 name[3] == 'l' &&
9807 name[4] == 'a' &&
9808 name[5] == 'r')
9809 { /* scalar */
9810 return KEY_scalar;
9811 }
9812
9813 goto unknown;
9814
9815 case 'e':
9816 switch (name[2])
9817 {
9818 case 'l':
9819 if (name[3] == 'e' &&
9820 name[4] == 'c' &&
9821 name[5] == 't')
9822 { /* select */
9823 return -KEY_select;
9824 }
9825
9826 goto unknown;
9827
9828 case 'm':
9829 switch (name[3])
9830 {
9831 case 'c':
9832 if (name[4] == 't' &&
9833 name[5] == 'l')
9834 { /* semctl */
9835 return -KEY_semctl;
9836 }
9837
9838 goto unknown;
9839
9840 case 'g':
9841 if (name[4] == 'e' &&
9842 name[5] == 't')
9843 { /* semget */
9844 return -KEY_semget;
9845 }
9846
9847 goto unknown;
9848
9849 default:
9850 goto unknown;
9851 }
9852
9853 default:
9854 goto unknown;
9855 }
9856
9857 case 'h':
9858 if (name[2] == 'm')
9859 {
9860 switch (name[3])
9861 {
9862 case 'c':
9863 if (name[4] == 't' &&
9864 name[5] == 'l')
9865 { /* shmctl */
9866 return -KEY_shmctl;
9867 }
9868
9869 goto unknown;
9870
9871 case 'g':
9872 if (name[4] == 'e' &&
9873 name[5] == 't')
9874 { /* shmget */
9875 return -KEY_shmget;
9876 }
9877
9878 goto unknown;
9879
9880 default:
9881 goto unknown;
9882 }
9883 }
9884
9885 goto unknown;
9886
9887 case 'o':
9888 if (name[2] == 'c' &&
9889 name[3] == 'k' &&
9890 name[4] == 'e' &&
9891 name[5] == 't')
9892 { /* socket */
9893 return -KEY_socket;
9894 }
9895
9896 goto unknown;
9897
9898 case 'p':
9899 if (name[2] == 'l' &&
9900 name[3] == 'i' &&
9901 name[4] == 'c' &&
9902 name[5] == 'e')
9903 { /* splice */
9904 return -KEY_splice;
9905 }
9906
9907 goto unknown;
9908
9909 case 'u':
9910 if (name[2] == 'b' &&
9911 name[3] == 's' &&
9912 name[4] == 't' &&
9913 name[5] == 'r')
9914 { /* substr */
9915 return -KEY_substr;
9916 }
9917
9918 goto unknown;
9919
9920 case 'y':
9921 if (name[2] == 's' &&
9922 name[3] == 't' &&
9923 name[4] == 'e' &&
9924 name[5] == 'm')
9925 { /* system */
9926 return -KEY_system;
9927 }
9928
9929 goto unknown;
9930
9931 default:
9932 goto unknown;
9933 }
9934
9935 case 'u':
9936 if (name[1] == 'n')
9937 {
9938 switch (name[2])
9939 {
9940 case 'l':
9941 switch (name[3])
9942 {
9943 case 'e':
9944 if (name[4] == 's' &&
9945 name[5] == 's')
9946 { /* unless */
9947 return KEY_unless;
9948 }
9949
9950 goto unknown;
9951
9952 case 'i':
9953 if (name[4] == 'n' &&
9954 name[5] == 'k')
9955 { /* unlink */
9956 return -KEY_unlink;
9957 }
9958
9959 goto unknown;
9960
9961 default:
9962 goto unknown;
9963 }
9964
9965 case 'p':
9966 if (name[3] == 'a' &&
9967 name[4] == 'c' &&
9968 name[5] == 'k')
9969 { /* unpack */
9970 return -KEY_unpack;
9971 }
9972
9973 goto unknown;
9974
9975 default:
9976 goto unknown;
9977 }
9978 }
9979
9980 goto unknown;
9981
9982 case 'v':
9983 if (name[1] == 'a' &&
9984 name[2] == 'l' &&
9985 name[3] == 'u' &&
9986 name[4] == 'e' &&
9987 name[5] == 's')
9988 { /* values */
9989 return -KEY_values;
9990 }
9991
9992 goto unknown;
9993
9994 default:
9995 goto unknown;
e2e1dd5a 9996 }
4c3bbe0f 9997
0d863452 9998 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9999 switch (name[0])
10000 {
10001 case 'D':
10002 if (name[1] == 'E' &&
10003 name[2] == 'S' &&
10004 name[3] == 'T' &&
10005 name[4] == 'R' &&
10006 name[5] == 'O' &&
10007 name[6] == 'Y')
10008 { /* DESTROY */
10009 return KEY_DESTROY;
10010 }
10011
10012 goto unknown;
10013
10014 case '_':
10015 if (name[1] == '_' &&
10016 name[2] == 'E' &&
10017 name[3] == 'N' &&
10018 name[4] == 'D' &&
10019 name[5] == '_' &&
10020 name[6] == '_')
10021 { /* __END__ */
10022 return KEY___END__;
10023 }
10024
10025 goto unknown;
10026
10027 case 'b':
10028 if (name[1] == 'i' &&
10029 name[2] == 'n' &&
10030 name[3] == 'm' &&
10031 name[4] == 'o' &&
10032 name[5] == 'd' &&
10033 name[6] == 'e')
10034 { /* binmode */
10035 return -KEY_binmode;
10036 }
10037
10038 goto unknown;
10039
10040 case 'c':
10041 if (name[1] == 'o' &&
10042 name[2] == 'n' &&
10043 name[3] == 'n' &&
10044 name[4] == 'e' &&
10045 name[5] == 'c' &&
10046 name[6] == 't')
10047 { /* connect */
10048 return -KEY_connect;
10049 }
10050
10051 goto unknown;
10052
10053 case 'd':
10054 switch (name[1])
10055 {
10056 case 'b':
10057 if (name[2] == 'm' &&
10058 name[3] == 'o' &&
10059 name[4] == 'p' &&
10060 name[5] == 'e' &&
10061 name[6] == 'n')
10062 { /* dbmopen */
10063 return -KEY_dbmopen;
10064 }
10065
10066 goto unknown;
10067
10068 case 'e':
0d863452
RH
10069 if (name[2] == 'f')
10070 {
10071 switch (name[3])
10072 {
10073 case 'a':
10074 if (name[4] == 'u' &&
10075 name[5] == 'l' &&
10076 name[6] == 't')
10077 { /* default */
5458a98a 10078 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10079 }
10080
10081 goto unknown;
10082
10083 case 'i':
10084 if (name[4] == 'n' &&
952306ac
RGS
10085 name[5] == 'e' &&
10086 name[6] == 'd')
10087 { /* defined */
10088 return KEY_defined;
10089 }
4c3bbe0f 10090
952306ac 10091 goto unknown;
4c3bbe0f 10092
952306ac
RGS
10093 default:
10094 goto unknown;
10095 }
0d863452
RH
10096 }
10097
10098 goto unknown;
10099
10100 default:
10101 goto unknown;
10102 }
4c3bbe0f
MHM
10103
10104 case 'f':
10105 if (name[1] == 'o' &&
10106 name[2] == 'r' &&
10107 name[3] == 'e' &&
10108 name[4] == 'a' &&
10109 name[5] == 'c' &&
10110 name[6] == 'h')
10111 { /* foreach */
10112 return KEY_foreach;
10113 }
10114
10115 goto unknown;
10116
10117 case 'g':
10118 if (name[1] == 'e' &&
10119 name[2] == 't' &&
10120 name[3] == 'p')
10121 {
10122 switch (name[4])
10123 {
10124 case 'g':
10125 if (name[5] == 'r' &&
10126 name[6] == 'p')
10127 { /* getpgrp */
10128 return -KEY_getpgrp;
10129 }
10130
10131 goto unknown;
10132
10133 case 'p':
10134 if (name[5] == 'i' &&
10135 name[6] == 'd')
10136 { /* getppid */
10137 return -KEY_getppid;
10138 }
10139
10140 goto unknown;
10141
10142 default:
10143 goto unknown;
10144 }
10145 }
10146
10147 goto unknown;
10148
10149 case 'l':
10150 if (name[1] == 'c' &&
10151 name[2] == 'f' &&
10152 name[3] == 'i' &&
10153 name[4] == 'r' &&
10154 name[5] == 's' &&
10155 name[6] == 't')
10156 { /* lcfirst */
10157 return -KEY_lcfirst;
10158 }
10159
10160 goto unknown;
10161
10162 case 'o':
10163 if (name[1] == 'p' &&
10164 name[2] == 'e' &&
10165 name[3] == 'n' &&
10166 name[4] == 'd' &&
10167 name[5] == 'i' &&
10168 name[6] == 'r')
10169 { /* opendir */
10170 return -KEY_opendir;
10171 }
10172
10173 goto unknown;
10174
10175 case 'p':
10176 if (name[1] == 'a' &&
10177 name[2] == 'c' &&
10178 name[3] == 'k' &&
10179 name[4] == 'a' &&
10180 name[5] == 'g' &&
10181 name[6] == 'e')
10182 { /* package */
10183 return KEY_package;
10184 }
10185
10186 goto unknown;
10187
10188 case 'r':
10189 if (name[1] == 'e')
10190 {
10191 switch (name[2])
10192 {
10193 case 'a':
10194 if (name[3] == 'd' &&
10195 name[4] == 'd' &&
10196 name[5] == 'i' &&
10197 name[6] == 'r')
10198 { /* readdir */
10199 return -KEY_readdir;
10200 }
10201
10202 goto unknown;
10203
10204 case 'q':
10205 if (name[3] == 'u' &&
10206 name[4] == 'i' &&
10207 name[5] == 'r' &&
10208 name[6] == 'e')
10209 { /* require */
10210 return KEY_require;
10211 }
10212
10213 goto unknown;
10214
10215 case 'v':
10216 if (name[3] == 'e' &&
10217 name[4] == 'r' &&
10218 name[5] == 's' &&
10219 name[6] == 'e')
10220 { /* reverse */
10221 return -KEY_reverse;
10222 }
10223
10224 goto unknown;
10225
10226 default:
10227 goto unknown;
10228 }
10229 }
10230
10231 goto unknown;
10232
10233 case 's':
10234 switch (name[1])
10235 {
10236 case 'e':
10237 switch (name[2])
10238 {
10239 case 'e':
10240 if (name[3] == 'k' &&
10241 name[4] == 'd' &&
10242 name[5] == 'i' &&
10243 name[6] == 'r')
10244 { /* seekdir */
10245 return -KEY_seekdir;
10246 }
10247
10248 goto unknown;
10249
10250 case 't':
10251 if (name[3] == 'p' &&
10252 name[4] == 'g' &&
10253 name[5] == 'r' &&
10254 name[6] == 'p')
10255 { /* setpgrp */
10256 return -KEY_setpgrp;
10257 }
10258
10259 goto unknown;
10260
10261 default:
10262 goto unknown;
10263 }
10264
10265 case 'h':
10266 if (name[2] == 'm' &&
10267 name[3] == 'r' &&
10268 name[4] == 'e' &&
10269 name[5] == 'a' &&
10270 name[6] == 'd')
10271 { /* shmread */
10272 return -KEY_shmread;
10273 }
10274
10275 goto unknown;
10276
10277 case 'p':
10278 if (name[2] == 'r' &&
10279 name[3] == 'i' &&
10280 name[4] == 'n' &&
10281 name[5] == 't' &&
10282 name[6] == 'f')
10283 { /* sprintf */
10284 return -KEY_sprintf;
10285 }
10286
10287 goto unknown;
10288
10289 case 'y':
10290 switch (name[2])
10291 {
10292 case 'm':
10293 if (name[3] == 'l' &&
10294 name[4] == 'i' &&
10295 name[5] == 'n' &&
10296 name[6] == 'k')
10297 { /* symlink */
10298 return -KEY_symlink;
10299 }
10300
10301 goto unknown;
10302
10303 case 's':
10304 switch (name[3])
10305 {
10306 case 'c':
10307 if (name[4] == 'a' &&
10308 name[5] == 'l' &&
10309 name[6] == 'l')
10310 { /* syscall */
10311 return -KEY_syscall;
10312 }
10313
10314 goto unknown;
10315
10316 case 'o':
10317 if (name[4] == 'p' &&
10318 name[5] == 'e' &&
10319 name[6] == 'n')
10320 { /* sysopen */
10321 return -KEY_sysopen;
10322 }
10323
10324 goto unknown;
10325
10326 case 'r':
10327 if (name[4] == 'e' &&
10328 name[5] == 'a' &&
10329 name[6] == 'd')
10330 { /* sysread */
10331 return -KEY_sysread;
10332 }
10333
10334 goto unknown;
10335
10336 case 's':
10337 if (name[4] == 'e' &&
10338 name[5] == 'e' &&
10339 name[6] == 'k')
10340 { /* sysseek */
10341 return -KEY_sysseek;
10342 }
10343
10344 goto unknown;
10345
10346 default:
10347 goto unknown;
10348 }
10349
10350 default:
10351 goto unknown;
10352 }
10353
10354 default:
10355 goto unknown;
10356 }
10357
10358 case 't':
10359 if (name[1] == 'e' &&
10360 name[2] == 'l' &&
10361 name[3] == 'l' &&
10362 name[4] == 'd' &&
10363 name[5] == 'i' &&
10364 name[6] == 'r')
10365 { /* telldir */
10366 return -KEY_telldir;
10367 }
10368
10369 goto unknown;
10370
10371 case 'u':
10372 switch (name[1])
10373 {
10374 case 'c':
10375 if (name[2] == 'f' &&
10376 name[3] == 'i' &&
10377 name[4] == 'r' &&
10378 name[5] == 's' &&
10379 name[6] == 't')
10380 { /* ucfirst */
10381 return -KEY_ucfirst;
10382 }
10383
10384 goto unknown;
10385
10386 case 'n':
10387 if (name[2] == 's' &&
10388 name[3] == 'h' &&
10389 name[4] == 'i' &&
10390 name[5] == 'f' &&
10391 name[6] == 't')
10392 { /* unshift */
10393 return -KEY_unshift;
10394 }
10395
10396 goto unknown;
10397
10398 default:
10399 goto unknown;
10400 }
10401
10402 case 'w':
10403 if (name[1] == 'a' &&
10404 name[2] == 'i' &&
10405 name[3] == 't' &&
10406 name[4] == 'p' &&
10407 name[5] == 'i' &&
10408 name[6] == 'd')
10409 { /* waitpid */
10410 return -KEY_waitpid;
10411 }
10412
10413 goto unknown;
10414
10415 default:
10416 goto unknown;
10417 }
10418
10419 case 8: /* 26 tokens of length 8 */
10420 switch (name[0])
10421 {
10422 case 'A':
10423 if (name[1] == 'U' &&
10424 name[2] == 'T' &&
10425 name[3] == 'O' &&
10426 name[4] == 'L' &&
10427 name[5] == 'O' &&
10428 name[6] == 'A' &&
10429 name[7] == 'D')
10430 { /* AUTOLOAD */
10431 return KEY_AUTOLOAD;
10432 }
10433
10434 goto unknown;
10435
10436 case '_':
10437 if (name[1] == '_')
10438 {
10439 switch (name[2])
10440 {
10441 case 'D':
10442 if (name[3] == 'A' &&
10443 name[4] == 'T' &&
10444 name[5] == 'A' &&
10445 name[6] == '_' &&
10446 name[7] == '_')
10447 { /* __DATA__ */
10448 return KEY___DATA__;
10449 }
10450
10451 goto unknown;
10452
10453 case 'F':
10454 if (name[3] == 'I' &&
10455 name[4] == 'L' &&
10456 name[5] == 'E' &&
10457 name[6] == '_' &&
10458 name[7] == '_')
10459 { /* __FILE__ */
10460 return -KEY___FILE__;
10461 }
10462
10463 goto unknown;
10464
10465 case 'L':
10466 if (name[3] == 'I' &&
10467 name[4] == 'N' &&
10468 name[5] == 'E' &&
10469 name[6] == '_' &&
10470 name[7] == '_')
10471 { /* __LINE__ */
10472 return -KEY___LINE__;
10473 }
10474
10475 goto unknown;
10476
10477 default:
10478 goto unknown;
10479 }
10480 }
10481
10482 goto unknown;
10483
10484 case 'c':
10485 switch (name[1])
10486 {
10487 case 'l':
10488 if (name[2] == 'o' &&
10489 name[3] == 's' &&
10490 name[4] == 'e' &&
10491 name[5] == 'd' &&
10492 name[6] == 'i' &&
10493 name[7] == 'r')
10494 { /* closedir */
10495 return -KEY_closedir;
10496 }
10497
10498 goto unknown;
10499
10500 case 'o':
10501 if (name[2] == 'n' &&
10502 name[3] == 't' &&
10503 name[4] == 'i' &&
10504 name[5] == 'n' &&
10505 name[6] == 'u' &&
10506 name[7] == 'e')
10507 { /* continue */
10508 return -KEY_continue;
10509 }
10510
10511 goto unknown;
10512
10513 default:
10514 goto unknown;
10515 }
10516
10517 case 'd':
10518 if (name[1] == 'b' &&
10519 name[2] == 'm' &&
10520 name[3] == 'c' &&
10521 name[4] == 'l' &&
10522 name[5] == 'o' &&
10523 name[6] == 's' &&
10524 name[7] == 'e')
10525 { /* dbmclose */
10526 return -KEY_dbmclose;
10527 }
10528
10529 goto unknown;
10530
10531 case 'e':
10532 if (name[1] == 'n' &&
10533 name[2] == 'd')
10534 {
10535 switch (name[3])
10536 {
10537 case 'g':
10538 if (name[4] == 'r' &&
10539 name[5] == 'e' &&
10540 name[6] == 'n' &&
10541 name[7] == 't')
10542 { /* endgrent */
10543 return -KEY_endgrent;
10544 }
10545
10546 goto unknown;
10547
10548 case 'p':
10549 if (name[4] == 'w' &&
10550 name[5] == 'e' &&
10551 name[6] == 'n' &&
10552 name[7] == 't')
10553 { /* endpwent */
10554 return -KEY_endpwent;
10555 }
10556
10557 goto unknown;
10558
10559 default:
10560 goto unknown;
10561 }
10562 }
10563
10564 goto unknown;
10565
10566 case 'f':
10567 if (name[1] == 'o' &&
10568 name[2] == 'r' &&
10569 name[3] == 'm' &&
10570 name[4] == 'l' &&
10571 name[5] == 'i' &&
10572 name[6] == 'n' &&
10573 name[7] == 'e')
10574 { /* formline */
10575 return -KEY_formline;
10576 }
10577
10578 goto unknown;
10579
10580 case 'g':
10581 if (name[1] == 'e' &&
10582 name[2] == 't')
10583 {
10584 switch (name[3])
10585 {
10586 case 'g':
10587 if (name[4] == 'r')
10588 {
10589 switch (name[5])
10590 {
10591 case 'e':
10592 if (name[6] == 'n' &&
10593 name[7] == 't')
10594 { /* getgrent */
10595 return -KEY_getgrent;
10596 }
10597
10598 goto unknown;
10599
10600 case 'g':
10601 if (name[6] == 'i' &&
10602 name[7] == 'd')
10603 { /* getgrgid */
10604 return -KEY_getgrgid;
10605 }
10606
10607 goto unknown;
10608
10609 case 'n':
10610 if (name[6] == 'a' &&
10611 name[7] == 'm')
10612 { /* getgrnam */
10613 return -KEY_getgrnam;
10614 }
10615
10616 goto unknown;
10617
10618 default:
10619 goto unknown;
10620 }
10621 }
10622
10623 goto unknown;
10624
10625 case 'l':
10626 if (name[4] == 'o' &&
10627 name[5] == 'g' &&
10628 name[6] == 'i' &&
10629 name[7] == 'n')
10630 { /* getlogin */
10631 return -KEY_getlogin;
10632 }
10633
10634 goto unknown;
10635
10636 case 'p':
10637 if (name[4] == 'w')
10638 {
10639 switch (name[5])
10640 {
10641 case 'e':
10642 if (name[6] == 'n' &&
10643 name[7] == 't')
10644 { /* getpwent */
10645 return -KEY_getpwent;
10646 }
10647
10648 goto unknown;
10649
10650 case 'n':
10651 if (name[6] == 'a' &&
10652 name[7] == 'm')
10653 { /* getpwnam */
10654 return -KEY_getpwnam;
10655 }
10656
10657 goto unknown;
10658
10659 case 'u':
10660 if (name[6] == 'i' &&
10661 name[7] == 'd')
10662 { /* getpwuid */
10663 return -KEY_getpwuid;
10664 }
10665
10666 goto unknown;
10667
10668 default:
10669 goto unknown;
10670 }
10671 }
10672
10673 goto unknown;
10674
10675 default:
10676 goto unknown;
10677 }
10678 }
10679
10680 goto unknown;
10681
10682 case 'r':
10683 if (name[1] == 'e' &&
10684 name[2] == 'a' &&
10685 name[3] == 'd')
10686 {
10687 switch (name[4])
10688 {
10689 case 'l':
10690 if (name[5] == 'i' &&
10691 name[6] == 'n')
10692 {
10693 switch (name[7])
10694 {
10695 case 'e':
10696 { /* readline */
10697 return -KEY_readline;
10698 }
10699
4c3bbe0f
MHM
10700 case 'k':
10701 { /* readlink */
10702 return -KEY_readlink;
10703 }
10704
4c3bbe0f
MHM
10705 default:
10706 goto unknown;
10707 }
10708 }
10709
10710 goto unknown;
10711
10712 case 'p':
10713 if (name[5] == 'i' &&
10714 name[6] == 'p' &&
10715 name[7] == 'e')
10716 { /* readpipe */
10717 return -KEY_readpipe;
10718 }
10719
10720 goto unknown;
10721
10722 default:
10723 goto unknown;
10724 }
10725 }
10726
10727 goto unknown;
10728
10729 case 's':
10730 switch (name[1])
10731 {
10732 case 'e':
10733 if (name[2] == 't')
10734 {
10735 switch (name[3])
10736 {
10737 case 'g':
10738 if (name[4] == 'r' &&
10739 name[5] == 'e' &&
10740 name[6] == 'n' &&
10741 name[7] == 't')
10742 { /* setgrent */
10743 return -KEY_setgrent;
10744 }
10745
10746 goto unknown;
10747
10748 case 'p':
10749 if (name[4] == 'w' &&
10750 name[5] == 'e' &&
10751 name[6] == 'n' &&
10752 name[7] == 't')
10753 { /* setpwent */
10754 return -KEY_setpwent;
10755 }
10756
10757 goto unknown;
10758
10759 default:
10760 goto unknown;
10761 }
10762 }
10763
10764 goto unknown;
10765
10766 case 'h':
10767 switch (name[2])
10768 {
10769 case 'm':
10770 if (name[3] == 'w' &&
10771 name[4] == 'r' &&
10772 name[5] == 'i' &&
10773 name[6] == 't' &&
10774 name[7] == 'e')
10775 { /* shmwrite */
10776 return -KEY_shmwrite;
10777 }
10778
10779 goto unknown;
10780
10781 case 'u':
10782 if (name[3] == 't' &&
10783 name[4] == 'd' &&
10784 name[5] == 'o' &&
10785 name[6] == 'w' &&
10786 name[7] == 'n')
10787 { /* shutdown */
10788 return -KEY_shutdown;
10789 }
10790
10791 goto unknown;
10792
10793 default:
10794 goto unknown;
10795 }
10796
10797 case 'y':
10798 if (name[2] == 's' &&
10799 name[3] == 'w' &&
10800 name[4] == 'r' &&
10801 name[5] == 'i' &&
10802 name[6] == 't' &&
10803 name[7] == 'e')
10804 { /* syswrite */
10805 return -KEY_syswrite;
10806 }
10807
10808 goto unknown;
10809
10810 default:
10811 goto unknown;
10812 }
10813
10814 case 't':
10815 if (name[1] == 'r' &&
10816 name[2] == 'u' &&
10817 name[3] == 'n' &&
10818 name[4] == 'c' &&
10819 name[5] == 'a' &&
10820 name[6] == 't' &&
10821 name[7] == 'e')
10822 { /* truncate */
10823 return -KEY_truncate;
10824 }
10825
10826 goto unknown;
10827
10828 default:
10829 goto unknown;
10830 }
10831
3c10abe3 10832 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10833 switch (name[0])
10834 {
3c10abe3
AG
10835 case 'U':
10836 if (name[1] == 'N' &&
10837 name[2] == 'I' &&
10838 name[3] == 'T' &&
10839 name[4] == 'C' &&
10840 name[5] == 'H' &&
10841 name[6] == 'E' &&
10842 name[7] == 'C' &&
10843 name[8] == 'K')
10844 { /* UNITCHECK */
10845 return KEY_UNITCHECK;
10846 }
10847
10848 goto unknown;
10849
4c3bbe0f
MHM
10850 case 'e':
10851 if (name[1] == 'n' &&
10852 name[2] == 'd' &&
10853 name[3] == 'n' &&
10854 name[4] == 'e' &&
10855 name[5] == 't' &&
10856 name[6] == 'e' &&
10857 name[7] == 'n' &&
10858 name[8] == 't')
10859 { /* endnetent */
10860 return -KEY_endnetent;
10861 }
10862
10863 goto unknown;
10864
10865 case 'g':
10866 if (name[1] == 'e' &&
10867 name[2] == 't' &&
10868 name[3] == 'n' &&
10869 name[4] == 'e' &&
10870 name[5] == 't' &&
10871 name[6] == 'e' &&
10872 name[7] == 'n' &&
10873 name[8] == 't')
10874 { /* getnetent */
10875 return -KEY_getnetent;
10876 }
10877
10878 goto unknown;
10879
10880 case 'l':
10881 if (name[1] == 'o' &&
10882 name[2] == 'c' &&
10883 name[3] == 'a' &&
10884 name[4] == 'l' &&
10885 name[5] == 't' &&
10886 name[6] == 'i' &&
10887 name[7] == 'm' &&
10888 name[8] == 'e')
10889 { /* localtime */
10890 return -KEY_localtime;
10891 }
10892
10893 goto unknown;
10894
10895 case 'p':
10896 if (name[1] == 'r' &&
10897 name[2] == 'o' &&
10898 name[3] == 't' &&
10899 name[4] == 'o' &&
10900 name[5] == 't' &&
10901 name[6] == 'y' &&
10902 name[7] == 'p' &&
10903 name[8] == 'e')
10904 { /* prototype */
10905 return KEY_prototype;
10906 }
10907
10908 goto unknown;
10909
10910 case 'q':
10911 if (name[1] == 'u' &&
10912 name[2] == 'o' &&
10913 name[3] == 't' &&
10914 name[4] == 'e' &&
10915 name[5] == 'm' &&
10916 name[6] == 'e' &&
10917 name[7] == 't' &&
10918 name[8] == 'a')
10919 { /* quotemeta */
10920 return -KEY_quotemeta;
10921 }
10922
10923 goto unknown;
10924
10925 case 'r':
10926 if (name[1] == 'e' &&
10927 name[2] == 'w' &&
10928 name[3] == 'i' &&
10929 name[4] == 'n' &&
10930 name[5] == 'd' &&
10931 name[6] == 'd' &&
10932 name[7] == 'i' &&
10933 name[8] == 'r')
10934 { /* rewinddir */
10935 return -KEY_rewinddir;
10936 }
10937
10938 goto unknown;
10939
10940 case 's':
10941 if (name[1] == 'e' &&
10942 name[2] == 't' &&
10943 name[3] == 'n' &&
10944 name[4] == 'e' &&
10945 name[5] == 't' &&
10946 name[6] == 'e' &&
10947 name[7] == 'n' &&
10948 name[8] == 't')
10949 { /* setnetent */
10950 return -KEY_setnetent;
10951 }
10952
10953 goto unknown;
10954
10955 case 'w':
10956 if (name[1] == 'a' &&
10957 name[2] == 'n' &&
10958 name[3] == 't' &&
10959 name[4] == 'a' &&
10960 name[5] == 'r' &&
10961 name[6] == 'r' &&
10962 name[7] == 'a' &&
10963 name[8] == 'y')
10964 { /* wantarray */
10965 return -KEY_wantarray;
10966 }
10967
10968 goto unknown;
10969
10970 default:
10971 goto unknown;
10972 }
10973
10974 case 10: /* 9 tokens of length 10 */
10975 switch (name[0])
10976 {
10977 case 'e':
10978 if (name[1] == 'n' &&
10979 name[2] == 'd')
10980 {
10981 switch (name[3])
10982 {
10983 case 'h':
10984 if (name[4] == 'o' &&
10985 name[5] == 's' &&
10986 name[6] == 't' &&
10987 name[7] == 'e' &&
10988 name[8] == 'n' &&
10989 name[9] == 't')
10990 { /* endhostent */
10991 return -KEY_endhostent;
10992 }
10993
10994 goto unknown;
10995
10996 case 's':
10997 if (name[4] == 'e' &&
10998 name[5] == 'r' &&
10999 name[6] == 'v' &&
11000 name[7] == 'e' &&
11001 name[8] == 'n' &&
11002 name[9] == 't')
11003 { /* endservent */
11004 return -KEY_endservent;
11005 }
11006
11007 goto unknown;
11008
11009 default:
11010 goto unknown;
11011 }
11012 }
11013
11014 goto unknown;
11015
11016 case 'g':
11017 if (name[1] == 'e' &&
11018 name[2] == 't')
11019 {
11020 switch (name[3])
11021 {
11022 case 'h':
11023 if (name[4] == 'o' &&
11024 name[5] == 's' &&
11025 name[6] == 't' &&
11026 name[7] == 'e' &&
11027 name[8] == 'n' &&
11028 name[9] == 't')
11029 { /* gethostent */
11030 return -KEY_gethostent;
11031 }
11032
11033 goto unknown;
11034
11035 case 's':
11036 switch (name[4])
11037 {
11038 case 'e':
11039 if (name[5] == 'r' &&
11040 name[6] == 'v' &&
11041 name[7] == 'e' &&
11042 name[8] == 'n' &&
11043 name[9] == 't')
11044 { /* getservent */
11045 return -KEY_getservent;
11046 }
11047
11048 goto unknown;
11049
11050 case 'o':
11051 if (name[5] == 'c' &&
11052 name[6] == 'k' &&
11053 name[7] == 'o' &&
11054 name[8] == 'p' &&
11055 name[9] == 't')
11056 { /* getsockopt */
11057 return -KEY_getsockopt;
11058 }
11059
11060 goto unknown;
11061
11062 default:
11063 goto unknown;
11064 }
11065
11066 default:
11067 goto unknown;
11068 }
11069 }
11070
11071 goto unknown;
11072
11073 case 's':
11074 switch (name[1])
11075 {
11076 case 'e':
11077 if (name[2] == 't')
11078 {
11079 switch (name[3])
11080 {
11081 case 'h':
11082 if (name[4] == 'o' &&
11083 name[5] == 's' &&
11084 name[6] == 't' &&
11085 name[7] == 'e' &&
11086 name[8] == 'n' &&
11087 name[9] == 't')
11088 { /* sethostent */
11089 return -KEY_sethostent;
11090 }
11091
11092 goto unknown;
11093
11094 case 's':
11095 switch (name[4])
11096 {
11097 case 'e':
11098 if (name[5] == 'r' &&
11099 name[6] == 'v' &&
11100 name[7] == 'e' &&
11101 name[8] == 'n' &&
11102 name[9] == 't')
11103 { /* setservent */
11104 return -KEY_setservent;
11105 }
11106
11107 goto unknown;
11108
11109 case 'o':
11110 if (name[5] == 'c' &&
11111 name[6] == 'k' &&
11112 name[7] == 'o' &&
11113 name[8] == 'p' &&
11114 name[9] == 't')
11115 { /* setsockopt */
11116 return -KEY_setsockopt;
11117 }
11118
11119 goto unknown;
11120
11121 default:
11122 goto unknown;
11123 }
11124
11125 default:
11126 goto unknown;
11127 }
11128 }
11129
11130 goto unknown;
11131
11132 case 'o':
11133 if (name[2] == 'c' &&
11134 name[3] == 'k' &&
11135 name[4] == 'e' &&
11136 name[5] == 't' &&
11137 name[6] == 'p' &&
11138 name[7] == 'a' &&
11139 name[8] == 'i' &&
11140 name[9] == 'r')
11141 { /* socketpair */
11142 return -KEY_socketpair;
11143 }
11144
11145 goto unknown;
11146
11147 default:
11148 goto unknown;
11149 }
11150
11151 default:
11152 goto unknown;
e2e1dd5a 11153 }
4c3bbe0f
MHM
11154
11155 case 11: /* 8 tokens of length 11 */
11156 switch (name[0])
11157 {
11158 case '_':
11159 if (name[1] == '_' &&
11160 name[2] == 'P' &&
11161 name[3] == 'A' &&
11162 name[4] == 'C' &&
11163 name[5] == 'K' &&
11164 name[6] == 'A' &&
11165 name[7] == 'G' &&
11166 name[8] == 'E' &&
11167 name[9] == '_' &&
11168 name[10] == '_')
11169 { /* __PACKAGE__ */
11170 return -KEY___PACKAGE__;
11171 }
11172
11173 goto unknown;
11174
11175 case 'e':
11176 if (name[1] == 'n' &&
11177 name[2] == 'd' &&
11178 name[3] == 'p' &&
11179 name[4] == 'r' &&
11180 name[5] == 'o' &&
11181 name[6] == 't' &&
11182 name[7] == 'o' &&
11183 name[8] == 'e' &&
11184 name[9] == 'n' &&
11185 name[10] == 't')
11186 { /* endprotoent */
11187 return -KEY_endprotoent;
11188 }
11189
11190 goto unknown;
11191
11192 case 'g':
11193 if (name[1] == 'e' &&
11194 name[2] == 't')
11195 {
11196 switch (name[3])
11197 {
11198 case 'p':
11199 switch (name[4])
11200 {
11201 case 'e':
11202 if (name[5] == 'e' &&
11203 name[6] == 'r' &&
11204 name[7] == 'n' &&
11205 name[8] == 'a' &&
11206 name[9] == 'm' &&
11207 name[10] == 'e')
11208 { /* getpeername */
11209 return -KEY_getpeername;
11210 }
11211
11212 goto unknown;
11213
11214 case 'r':
11215 switch (name[5])
11216 {
11217 case 'i':
11218 if (name[6] == 'o' &&
11219 name[7] == 'r' &&
11220 name[8] == 'i' &&
11221 name[9] == 't' &&
11222 name[10] == 'y')
11223 { /* getpriority */
11224 return -KEY_getpriority;
11225 }
11226
11227 goto unknown;
11228
11229 case 'o':
11230 if (name[6] == 't' &&
11231 name[7] == 'o' &&
11232 name[8] == 'e' &&
11233 name[9] == 'n' &&
11234 name[10] == 't')
11235 { /* getprotoent */
11236 return -KEY_getprotoent;
11237 }
11238
11239 goto unknown;
11240
11241 default:
11242 goto unknown;
11243 }
11244
11245 default:
11246 goto unknown;
11247 }
11248
11249 case 's':
11250 if (name[4] == 'o' &&
11251 name[5] == 'c' &&
11252 name[6] == 'k' &&
11253 name[7] == 'n' &&
11254 name[8] == 'a' &&
11255 name[9] == 'm' &&
11256 name[10] == 'e')
11257 { /* getsockname */
11258 return -KEY_getsockname;
11259 }
11260
11261 goto unknown;
11262
11263 default:
11264 goto unknown;
11265 }
11266 }
11267
11268 goto unknown;
11269
11270 case 's':
11271 if (name[1] == 'e' &&
11272 name[2] == 't' &&
11273 name[3] == 'p' &&
11274 name[4] == 'r')
11275 {
11276 switch (name[5])
11277 {
11278 case 'i':
11279 if (name[6] == 'o' &&
11280 name[7] == 'r' &&
11281 name[8] == 'i' &&
11282 name[9] == 't' &&
11283 name[10] == 'y')
11284 { /* setpriority */
11285 return -KEY_setpriority;
11286 }
11287
11288 goto unknown;
11289
11290 case 'o':
11291 if (name[6] == 't' &&
11292 name[7] == 'o' &&
11293 name[8] == 'e' &&
11294 name[9] == 'n' &&
11295 name[10] == 't')
11296 { /* setprotoent */
11297 return -KEY_setprotoent;
11298 }
11299
11300 goto unknown;
11301
11302 default:
11303 goto unknown;
11304 }
11305 }
11306
11307 goto unknown;
11308
11309 default:
11310 goto unknown;
e2e1dd5a 11311 }
4c3bbe0f
MHM
11312
11313 case 12: /* 2 tokens of length 12 */
11314 if (name[0] == 'g' &&
11315 name[1] == 'e' &&
11316 name[2] == 't' &&
11317 name[3] == 'n' &&
11318 name[4] == 'e' &&
11319 name[5] == 't' &&
11320 name[6] == 'b' &&
11321 name[7] == 'y')
11322 {
11323 switch (name[8])
11324 {
11325 case 'a':
11326 if (name[9] == 'd' &&
11327 name[10] == 'd' &&
11328 name[11] == 'r')
11329 { /* getnetbyaddr */
11330 return -KEY_getnetbyaddr;
11331 }
11332
11333 goto unknown;
11334
11335 case 'n':
11336 if (name[9] == 'a' &&
11337 name[10] == 'm' &&
11338 name[11] == 'e')
11339 { /* getnetbyname */
11340 return -KEY_getnetbyname;
11341 }
11342
11343 goto unknown;
11344
11345 default:
11346 goto unknown;
11347 }
e2e1dd5a 11348 }
4c3bbe0f
MHM
11349
11350 goto unknown;
11351
11352 case 13: /* 4 tokens of length 13 */
11353 if (name[0] == 'g' &&
11354 name[1] == 'e' &&
11355 name[2] == 't')
11356 {
11357 switch (name[3])
11358 {
11359 case 'h':
11360 if (name[4] == 'o' &&
11361 name[5] == 's' &&
11362 name[6] == 't' &&
11363 name[7] == 'b' &&
11364 name[8] == 'y')
11365 {
11366 switch (name[9])
11367 {
11368 case 'a':
11369 if (name[10] == 'd' &&
11370 name[11] == 'd' &&
11371 name[12] == 'r')
11372 { /* gethostbyaddr */
11373 return -KEY_gethostbyaddr;
11374 }
11375
11376 goto unknown;
11377
11378 case 'n':
11379 if (name[10] == 'a' &&
11380 name[11] == 'm' &&
11381 name[12] == 'e')
11382 { /* gethostbyname */
11383 return -KEY_gethostbyname;
11384 }
11385
11386 goto unknown;
11387
11388 default:
11389 goto unknown;
11390 }
11391 }
11392
11393 goto unknown;
11394
11395 case 's':
11396 if (name[4] == 'e' &&
11397 name[5] == 'r' &&
11398 name[6] == 'v' &&
11399 name[7] == 'b' &&
11400 name[8] == 'y')
11401 {
11402 switch (name[9])
11403 {
11404 case 'n':
11405 if (name[10] == 'a' &&
11406 name[11] == 'm' &&
11407 name[12] == 'e')
11408 { /* getservbyname */
11409 return -KEY_getservbyname;
11410 }
11411
11412 goto unknown;
11413
11414 case 'p':
11415 if (name[10] == 'o' &&
11416 name[11] == 'r' &&
11417 name[12] == 't')
11418 { /* getservbyport */
11419 return -KEY_getservbyport;
11420 }
11421
11422 goto unknown;
11423
11424 default:
11425 goto unknown;
11426 }
11427 }
11428
11429 goto unknown;
11430
11431 default:
11432 goto unknown;
11433 }
e2e1dd5a 11434 }
4c3bbe0f
MHM
11435
11436 goto unknown;
11437
11438 case 14: /* 1 tokens of length 14 */
11439 if (name[0] == 'g' &&
11440 name[1] == 'e' &&
11441 name[2] == 't' &&
11442 name[3] == 'p' &&
11443 name[4] == 'r' &&
11444 name[5] == 'o' &&
11445 name[6] == 't' &&
11446 name[7] == 'o' &&
11447 name[8] == 'b' &&
11448 name[9] == 'y' &&
11449 name[10] == 'n' &&
11450 name[11] == 'a' &&
11451 name[12] == 'm' &&
11452 name[13] == 'e')
11453 { /* getprotobyname */
11454 return -KEY_getprotobyname;
11455 }
11456
11457 goto unknown;
11458
11459 case 16: /* 1 tokens of length 16 */
11460 if (name[0] == 'g' &&
11461 name[1] == 'e' &&
11462 name[2] == 't' &&
11463 name[3] == 'p' &&
11464 name[4] == 'r' &&
11465 name[5] == 'o' &&
11466 name[6] == 't' &&
11467 name[7] == 'o' &&
11468 name[8] == 'b' &&
11469 name[9] == 'y' &&
11470 name[10] == 'n' &&
11471 name[11] == 'u' &&
11472 name[12] == 'm' &&
11473 name[13] == 'b' &&
11474 name[14] == 'e' &&
11475 name[15] == 'r')
11476 { /* getprotobynumber */
11477 return -KEY_getprotobynumber;
11478 }
11479
11480 goto unknown;
11481
11482 default:
11483 goto unknown;
e2e1dd5a 11484 }
4c3bbe0f
MHM
11485
11486unknown:
e2e1dd5a 11487 return 0;
a687059c
LW
11488}
11489
76e3520e 11490STATIC void
c94115d8 11491S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11492{
97aff369 11493 dVAR;
2f3197b3 11494
7918f24d
NC
11495 PERL_ARGS_ASSERT_CHECKCOMMA;
11496
d008e5eb 11497 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11498 if (ckWARN(WARN_SYNTAX)) {
11499 int level = 1;
26ff0806 11500 const char *w;
d008e5eb
GS
11501 for (w = s+2; *w && level; w++) {
11502 if (*w == '(')
11503 ++level;
11504 else if (*w == ')')
11505 --level;
11506 }
888fea98
NC
11507 while (isSPACE(*w))
11508 ++w;
b1439985
RGS
11509 /* the list of chars below is for end of statements or
11510 * block / parens, boolean operators (&&, ||, //) and branch
11511 * constructs (or, and, if, until, unless, while, err, for).
11512 * Not a very solid hack... */
11513 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11514 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11515 "%s (...) interpreted as function",name);
d008e5eb 11516 }
2f3197b3 11517 }
3280af22 11518 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11519 s++;
a687059c
LW
11520 if (*s == '(')
11521 s++;
3280af22 11522 while (s < PL_bufend && isSPACE(*s))
a687059c 11523 s++;
7e2040f0 11524 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11525 const char * const w = s++;
7e2040f0 11526 while (isALNUM_lazy_if(s,UTF))
a687059c 11527 s++;
3280af22 11528 while (s < PL_bufend && isSPACE(*s))
a687059c 11529 s++;
e929a76b 11530 if (*s == ',') {
c94115d8 11531 GV* gv;
5458a98a 11532 if (keyword(w, s - w, 0))
e929a76b 11533 return;
c94115d8
NC
11534
11535 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11536 if (gv && GvCVu(gv))
abbb3198 11537 return;
cea2e8a9 11538 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11539 }
11540 }
11541}
11542
423cee85
JH
11543/* Either returns sv, or mortalizes sv and returns a new SV*.
11544 Best used as sv=new_constant(..., sv, ...).
11545 If s, pv are NULL, calls subroutine with one argument,
11546 and type is used with error messages only. */
11547
b3ac6de7 11548STATIC SV *
eb0d8d16
NC
11549S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11550 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11551{
27da23d5 11552 dVAR; dSP;
890ce7af 11553 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11554 SV *res;
b3ac6de7
IZ
11555 SV **cvp;
11556 SV *cv, *typesv;
89e33a05 11557 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11558
7918f24d
NC
11559 PERL_ARGS_ASSERT_NEW_CONSTANT;
11560
f0af216f 11561 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11562 SV *msg;
11563
10edeb5d
JH
11564 why2 = (const char *)
11565 (strEQ(key,"charnames")
11566 ? "(possibly a missing \"use charnames ...\")"
11567 : "");
4e553d73 11568 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11569 (type ? type: "undef"), why2);
11570
11571 /* This is convoluted and evil ("goto considered harmful")
11572 * but I do not understand the intricacies of all the different
11573 * failure modes of %^H in here. The goal here is to make
11574 * the most probable error message user-friendly. --jhi */
11575
11576 goto msgdone;
11577
423cee85 11578 report:
4e553d73 11579 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11580 (type ? type: "undef"), why1, why2, why3);
41ab332f 11581 msgdone:
95a20fc0 11582 yyerror(SvPVX_const(msg));
423cee85
JH
11583 SvREFCNT_dec(msg);
11584 return sv;
11585 }
ff3f963a
KW
11586
11587 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11588 if (PL_error_count > 0 && strEQ(key,"charnames"))
11589 return &PL_sv_undef;
ff3f963a 11590
eb0d8d16 11591 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11592 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11593 why1 = "$^H{";
11594 why2 = key;
f0af216f 11595 why3 = "} is not defined";
423cee85 11596 goto report;
b3ac6de7
IZ
11597 }
11598 sv_2mortal(sv); /* Parent created it permanently */
11599 cv = *cvp;
423cee85 11600 if (!pv && s)
59cd0e26 11601 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11602 if (type && pv)
59cd0e26 11603 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11604 else
423cee85 11605 typesv = &PL_sv_undef;
4e553d73 11606
e788e7d3 11607 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11608 ENTER ;
11609 SAVETMPS;
4e553d73 11610
423cee85 11611 PUSHMARK(SP) ;
a5845cb7 11612 EXTEND(sp, 3);
423cee85
JH
11613 if (pv)
11614 PUSHs(pv);
b3ac6de7 11615 PUSHs(sv);
423cee85
JH
11616 if (pv)
11617 PUSHs(typesv);
b3ac6de7 11618 PUTBACK;
423cee85 11619 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11620
423cee85 11621 SPAGAIN ;
4e553d73 11622
423cee85 11623 /* Check the eval first */
9b0e499b 11624 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11625 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11626 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11627 (void)POPs;
b37c2d43 11628 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11629 }
11630 else {
11631 res = POPs;
b37c2d43 11632 SvREFCNT_inc_simple_void(res);
423cee85 11633 }
4e553d73 11634
423cee85
JH
11635 PUTBACK ;
11636 FREETMPS ;
11637 LEAVE ;
b3ac6de7 11638 POPSTACK;
4e553d73 11639
b3ac6de7 11640 if (!SvOK(res)) {
423cee85
JH
11641 why1 = "Call to &{$^H{";
11642 why2 = key;
f0af216f 11643 why3 = "}} did not return a defined value";
423cee85
JH
11644 sv = res;
11645 goto report;
9b0e499b 11646 }
423cee85 11647
9b0e499b 11648 return res;
b3ac6de7 11649}
4e553d73 11650
d0a148a6
NC
11651/* Returns a NUL terminated string, with the length of the string written to
11652 *slp
11653 */
76e3520e 11654STATIC char *
cea2e8a9 11655S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11656{
97aff369 11657 dVAR;
463ee0b2 11658 register char *d = dest;
890ce7af 11659 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11660
11661 PERL_ARGS_ASSERT_SCAN_WORD;
11662
463ee0b2 11663 for (;;) {
8903cb82 11664 if (d >= e)
cea2e8a9 11665 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11666 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11667 *d++ = *s++;
c35e046a 11668 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11669 *d++ = ':';
11670 *d++ = ':';
11671 s++;
11672 }
c35e046a 11673 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11674 *d++ = *s++;
11675 *d++ = *s++;
11676 }
fd400ab9 11677 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11678 char *t = s + UTF8SKIP(s);
c35e046a 11679 size_t len;
fd400ab9 11680 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11681 t += UTF8SKIP(t);
c35e046a
AL
11682 len = t - s;
11683 if (d + len > e)
cea2e8a9 11684 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11685 Copy(s, d, len, char);
11686 d += len;
a0ed51b3
LW
11687 s = t;
11688 }
463ee0b2
LW
11689 else {
11690 *d = '\0';
11691 *slp = d - dest;
11692 return s;
e929a76b 11693 }
378cc40b
LW
11694 }
11695}
11696
76e3520e 11697STATIC char *
f54cb97a 11698S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11699{
97aff369 11700 dVAR;
6136c704 11701 char *bracket = NULL;
748a9306 11702 char funny = *s++;
6136c704 11703 register char *d = dest;
0b3da58d 11704 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11705
7918f24d
NC
11706 PERL_ARGS_ASSERT_SCAN_IDENT;
11707
a0d0e21e 11708 if (isSPACE(*s))
29595ff2 11709 s = PEEKSPACE(s);
de3bb511 11710 if (isDIGIT(*s)) {
8903cb82 11711 while (isDIGIT(*s)) {
11712 if (d >= e)
cea2e8a9 11713 Perl_croak(aTHX_ ident_too_long);
378cc40b 11714 *d++ = *s++;
8903cb82 11715 }
378cc40b
LW
11716 }
11717 else {
463ee0b2 11718 for (;;) {
8903cb82 11719 if (d >= e)
cea2e8a9 11720 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11721 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11722 *d++ = *s++;
7e2040f0 11723 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11724 *d++ = ':';
11725 *d++ = ':';
11726 s++;
11727 }
a0d0e21e 11728 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11729 *d++ = *s++;
11730 *d++ = *s++;
11731 }
fd400ab9 11732 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11733 char *t = s + UTF8SKIP(s);
fd400ab9 11734 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11735 t += UTF8SKIP(t);
11736 if (d + (t - s) > e)
cea2e8a9 11737 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11738 Copy(s, d, t - s, char);
11739 d += t - s;
11740 s = t;
11741 }
463ee0b2
LW
11742 else
11743 break;
11744 }
378cc40b
LW
11745 }
11746 *d = '\0';
11747 d = dest;
79072805 11748 if (*d) {
3280af22
NIS
11749 if (PL_lex_state != LEX_NORMAL)
11750 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11751 return s;
378cc40b 11752 }
748a9306 11753 if (*s == '$' && s[1] &&
3792a11b 11754 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11755 {
4810e5ec 11756 return s;
5cd24f17 11757 }
79072805
LW
11758 if (*s == '{') {
11759 bracket = s;
11760 s++;
11761 }
11762 else if (ck_uni)
11763 check_uni();
93a17b20 11764 if (s < send)
79072805
LW
11765 *d = *s++;
11766 d[1] = '\0';
2b92dfce 11767 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11768 *d = toCTRL(*s);
11769 s++;
de3bb511 11770 }
79072805 11771 if (bracket) {
748a9306 11772 if (isSPACE(s[-1])) {
fa83b5b6 11773 while (s < send) {
f54cb97a 11774 const char ch = *s++;
bf4acbe4 11775 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11776 *d = ch;
11777 break;
11778 }
11779 }
748a9306 11780 }
7e2040f0 11781 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11782 d++;
a0ed51b3 11783 if (UTF) {
6136c704
AL
11784 char *end = s;
11785 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11786 end += UTF8SKIP(end);
11787 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11788 end += UTF8SKIP(end);
a0ed51b3 11789 }
6136c704
AL
11790 Copy(s, d, end - s, char);
11791 d += end - s;
11792 s = end;
a0ed51b3
LW
11793 }
11794 else {
2b92dfce 11795 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11796 *d++ = *s++;
2b92dfce 11797 if (d >= e)
cea2e8a9 11798 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11799 }
79072805 11800 *d = '\0';
c35e046a
AL
11801 while (s < send && SPACE_OR_TAB(*s))
11802 s++;
ff68c719 11803 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11804 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11805 const char * const brack =
11806 (const char *)
11807 ((*s == '[') ? "[...]" : "{...}");
9014280d 11808 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11809 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11810 funny, dest, brack, funny, dest, brack);
11811 }
79072805 11812 bracket++;
a0be28da 11813 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11814 return s;
11815 }
4e553d73
NIS
11816 }
11817 /* Handle extended ${^Foo} variables
2b92dfce
GS
11818 * 1999-02-27 mjd-perl-patch@plover.com */
11819 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11820 && isALNUM(*s))
11821 {
11822 d++;
11823 while (isALNUM(*s) && d < e) {
11824 *d++ = *s++;
11825 }
11826 if (d >= e)
cea2e8a9 11827 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11828 *d = '\0';
79072805
LW
11829 }
11830 if (*s == '}') {
11831 s++;
7df0d042 11832 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11833 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11834 PL_expect = XREF;
11835 }
d008e5eb 11836 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11837 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11838 (keyword(dest, d - dest, 0)
11839 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11840 {
c35e046a
AL
11841 if (funny == '#')
11842 funny = '@';
9014280d 11843 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11844 "Ambiguous use of %c{%s} resolved to %c%s",
11845 funny, dest, funny, dest);
11846 }
11847 }
79072805
LW
11848 }
11849 else {
11850 s = bracket; /* let the parser handle it */
93a17b20 11851 *dest = '\0';
79072805
LW
11852 }
11853 }
3280af22
NIS
11854 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11855 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11856 return s;
11857}
11858
879d0c72
NC
11859static U32
11860S_pmflag(U32 pmfl, const char ch) {
11861 switch (ch) {
11862 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11863 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11864 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11865 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11866 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11867 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11868 }
11869 return pmfl;
11870}
11871
76e3520e 11872STATIC char *
cea2e8a9 11873S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11874{
97aff369 11875 dVAR;
79072805 11876 PMOP *pm;
5db06880 11877 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11878 const char * const valid_flags =
a20207d7 11879 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11880#ifdef PERL_MAD
11881 char *modstart;
11882#endif
11883
7918f24d 11884 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11885
25c09cbf 11886 if (!s) {
6136c704 11887 const char * const delimiter = skipspace(start);
10edeb5d
JH
11888 Perl_croak(aTHX_
11889 (const char *)
11890 (*delimiter == '?'
11891 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11892 : "Search pattern not terminated" ));
25c09cbf 11893 }
bbce6d69 11894
8782bef2 11895 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11896 if (PL_multi_open == '?') {
11897 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11898 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11899
11900 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11901 allows us to restrict the list needed by reset to just the ??
11902 matches. */
11903 assert(type != OP_TRANS);
11904 if (PL_curstash) {
daba3364 11905 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11906 U32 elements;
11907 if (!mg) {
daba3364 11908 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11909 0);
11910 }
11911 elements = mg->mg_len / sizeof(PMOP**);
11912 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11913 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11914 mg->mg_len = elements * sizeof(PMOP**);
11915 PmopSTASH_set(pm,PL_curstash);
11916 }
11917 }
5db06880
NC
11918#ifdef PERL_MAD
11919 modstart = s;
11920#endif
6136c704 11921 while (*s && strchr(valid_flags, *s))
879d0c72 11922 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11923
11924 if (isALNUM(*s)) {
11925 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11926 "Having no space between pattern and following word is deprecated");
11927
11928 }
5db06880
NC
11929#ifdef PERL_MAD
11930 if (PL_madskills && modstart != s) {
11931 SV* tmptoken = newSVpvn(modstart, s - modstart);
11932 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11933 }
11934#endif
4ac733c9 11935 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11936 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11937 {
a2a5de95
NC
11938 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11939 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11940 }
11941
3280af22 11942 PL_lex_op = (OP*)pm;
6154021b 11943 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11944 return s;
11945}
11946
76e3520e 11947STATIC char *
cea2e8a9 11948S_scan_subst(pTHX_ char *start)
79072805 11949{
27da23d5 11950 dVAR;
a0d0e21e 11951 register char *s;
79072805 11952 register PMOP *pm;
4fdae800 11953 I32 first_start;
79072805 11954 I32 es = 0;
5db06880
NC
11955#ifdef PERL_MAD
11956 char *modstart;
11957#endif
79072805 11958
7918f24d
NC
11959 PERL_ARGS_ASSERT_SCAN_SUBST;
11960
6154021b 11961 pl_yylval.ival = OP_NULL;
79072805 11962
5db06880 11963 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11964
37fd879b 11965 if (!s)
cea2e8a9 11966 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11967
3280af22 11968 if (s[-1] == PL_multi_open)
79072805 11969 s--;
5db06880
NC
11970#ifdef PERL_MAD
11971 if (PL_madskills) {
cd81e915
NC
11972 CURMAD('q', PL_thisopen);
11973 CURMAD('_', PL_thiswhite);
11974 CURMAD('E', PL_thisstuff);
11975 CURMAD('Q', PL_thisclose);
11976 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11977 }
11978#endif
79072805 11979
3280af22 11980 first_start = PL_multi_start;
5db06880 11981 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11982 if (!s) {
37fd879b 11983 if (PL_lex_stuff) {
3280af22 11984 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11985 PL_lex_stuff = NULL;
37fd879b 11986 }
cea2e8a9 11987 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11988 }
3280af22 11989 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11990
79072805 11991 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11992
11993#ifdef PERL_MAD
11994 if (PL_madskills) {
cd81e915
NC
11995 CURMAD('z', PL_thisopen);
11996 CURMAD('R', PL_thisstuff);
11997 CURMAD('Z', PL_thisclose);
5db06880
NC
11998 }
11999 modstart = s;
12000#endif
12001
48c036b1 12002 while (*s) {
a20207d7 12003 if (*s == EXEC_PAT_MOD) {
a687059c 12004 s++;
2f3197b3 12005 es++;
a687059c 12006 }
a20207d7 12007 else if (strchr(S_PAT_MODS, *s))
879d0c72 12008 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
12009 else {
12010 if (isALNUM(*s)) {
12011 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12012 "Having no space between pattern and following word is deprecated");
12013
12014 }
48c036b1 12015 break;
aa78b661 12016 }
378cc40b 12017 }
79072805 12018
5db06880
NC
12019#ifdef PERL_MAD
12020 if (PL_madskills) {
12021 if (modstart != s)
12022 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12023 append_madprops(PL_thismad, (OP*)pm, 0);
12024 PL_thismad = 0;
5db06880
NC
12025 }
12026#endif
a2a5de95
NC
12027 if ((pm->op_pmflags & PMf_CONTINUE)) {
12028 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12029 }
12030
79072805 12031 if (es) {
6136c704
AL
12032 SV * const repl = newSVpvs("");
12033
0244c3a4
GS
12034 PL_sublex_info.super_bufptr = s;
12035 PL_sublex_info.super_bufend = PL_bufend;
12036 PL_multi_end = 0;
79072805 12037 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12038 while (es-- > 0) {
12039 if (es)
12040 sv_catpvs(repl, "eval ");
12041 else
12042 sv_catpvs(repl, "do ");
12043 }
6f43d98f 12044 sv_catpvs(repl, "{");
3280af22 12045 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12046 if (strchr(SvPVX(PL_lex_repl), '#'))
12047 sv_catpvs(repl, "\n");
12048 sv_catpvs(repl, "}");
25da4f38 12049 SvEVALED_on(repl);
3280af22
NIS
12050 SvREFCNT_dec(PL_lex_repl);
12051 PL_lex_repl = repl;
378cc40b 12052 }
79072805 12053
3280af22 12054 PL_lex_op = (OP*)pm;
6154021b 12055 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12056 return s;
12057}
12058
76e3520e 12059STATIC char *
cea2e8a9 12060S_scan_trans(pTHX_ char *start)
378cc40b 12061{
97aff369 12062 dVAR;
a0d0e21e 12063 register char* s;
11343788 12064 OP *o;
79072805 12065 short *tbl;
b84c11c8
NC
12066 U8 squash;
12067 U8 del;
12068 U8 complement;
5db06880
NC
12069#ifdef PERL_MAD
12070 char *modstart;
12071#endif
79072805 12072
7918f24d
NC
12073 PERL_ARGS_ASSERT_SCAN_TRANS;
12074
6154021b 12075 pl_yylval.ival = OP_NULL;
79072805 12076
5db06880 12077 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12078 if (!s)
cea2e8a9 12079 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12080
3280af22 12081 if (s[-1] == PL_multi_open)
2f3197b3 12082 s--;
5db06880
NC
12083#ifdef PERL_MAD
12084 if (PL_madskills) {
cd81e915
NC
12085 CURMAD('q', PL_thisopen);
12086 CURMAD('_', PL_thiswhite);
12087 CURMAD('E', PL_thisstuff);
12088 CURMAD('Q', PL_thisclose);
12089 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12090 }
12091#endif
2f3197b3 12092
5db06880 12093 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12094 if (!s) {
37fd879b 12095 if (PL_lex_stuff) {
3280af22 12096 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12097 PL_lex_stuff = NULL;
37fd879b 12098 }
cea2e8a9 12099 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12100 }
5db06880 12101 if (PL_madskills) {
cd81e915
NC
12102 CURMAD('z', PL_thisopen);
12103 CURMAD('R', PL_thisstuff);
12104 CURMAD('Z', PL_thisclose);
5db06880 12105 }
79072805 12106
a0ed51b3 12107 complement = del = squash = 0;
5db06880
NC
12108#ifdef PERL_MAD
12109 modstart = s;
12110#endif
7a1e2023
NC
12111 while (1) {
12112 switch (*s) {
12113 case 'c':
79072805 12114 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12115 break;
12116 case 'd':
a0ed51b3 12117 del = OPpTRANS_DELETE;
7a1e2023
NC
12118 break;
12119 case 's':
79072805 12120 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12121 break;
12122 default:
12123 goto no_more;
12124 }
395c3793
LW
12125 s++;
12126 }
7a1e2023 12127 no_more:
8973db79 12128
aa1f7c5b 12129 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12130 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12131 o->op_private &= ~OPpTRANS_ALL;
12132 o->op_private |= del|squash|complement|
7948272d
NIS
12133 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12134 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12135
3280af22 12136 PL_lex_op = o;
6154021b 12137 pl_yylval.ival = OP_TRANS;
5db06880
NC
12138
12139#ifdef PERL_MAD
12140 if (PL_madskills) {
12141 if (modstart != s)
12142 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12143 append_madprops(PL_thismad, o, 0);
12144 PL_thismad = 0;
5db06880
NC
12145 }
12146#endif
12147
79072805
LW
12148 return s;
12149}
12150
76e3520e 12151STATIC char *
cea2e8a9 12152S_scan_heredoc(pTHX_ register char *s)
79072805 12153{
97aff369 12154 dVAR;
79072805
LW
12155 SV *herewas;
12156 I32 op_type = OP_SCALAR;
12157 I32 len;
12158 SV *tmpstr;
12159 char term;
73d840c0 12160 const char *found_newline;
79072805 12161 register char *d;
fc36a67e 12162 register char *e;
4633a7c4 12163 char *peek;
f54cb97a 12164 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12165#ifdef PERL_MAD
12166 I32 stuffstart = s - SvPVX(PL_linestr);
12167 char *tstart;
12168
cd81e915 12169 PL_realtokenstart = -1;
5db06880 12170#endif
79072805 12171
7918f24d
NC
12172 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12173
79072805 12174 s += 2;
3280af22
NIS
12175 d = PL_tokenbuf;
12176 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12177 if (!outer)
79072805 12178 *d++ = '\n';
c35e046a
AL
12179 peek = s;
12180 while (SPACE_OR_TAB(*peek))
12181 peek++;
3792a11b 12182 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12183 s = peek;
79072805 12184 term = *s++;
3280af22 12185 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12186 d += len;
3280af22 12187 if (s < PL_bufend)
79072805 12188 s++;
79072805
LW
12189 }
12190 else {
12191 if (*s == '\\')
12192 s++, term = '\'';
12193 else
12194 term = '"';
7e2040f0 12195 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12196 deprecate("bare << to mean <<\"\"");
7e2040f0 12197 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12198 if (d < e)
12199 *d++ = *s;
12200 }
12201 }
3280af22 12202 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12203 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12204 *d++ = '\n';
12205 *d = '\0';
3280af22 12206 len = d - PL_tokenbuf;
5db06880
NC
12207
12208#ifdef PERL_MAD
12209 if (PL_madskills) {
12210 tstart = PL_tokenbuf + !outer;
cd81e915 12211 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12212 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12213 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12214 stuffstart = s - SvPVX(PL_linestr);
12215 }
12216#endif
6a27c188 12217#ifndef PERL_STRICT_CR
f63a84b2
LW
12218 d = strchr(s, '\r');
12219 if (d) {
b464bac0 12220 char * const olds = s;
f63a84b2 12221 s = d;
3280af22 12222 while (s < PL_bufend) {
f63a84b2
LW
12223 if (*s == '\r') {
12224 *d++ = '\n';
12225 if (*++s == '\n')
12226 s++;
12227 }
12228 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12229 *d++ = *s++;
12230 s++;
12231 }
12232 else
12233 *d++ = *s++;
12234 }
12235 *d = '\0';
3280af22 12236 PL_bufend = d;
95a20fc0 12237 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12238 s = olds;
12239 }
12240#endif
5db06880
NC
12241#ifdef PERL_MAD
12242 found_newline = 0;
12243#endif
10edeb5d 12244 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12245 herewas = newSVpvn(s,PL_bufend-s);
12246 }
12247 else {
5db06880
NC
12248#ifdef PERL_MAD
12249 herewas = newSVpvn(s-1,found_newline-s+1);
12250#else
73d840c0
AL
12251 s--;
12252 herewas = newSVpvn(s,found_newline-s);
5db06880 12253#endif
73d840c0 12254 }
5db06880
NC
12255#ifdef PERL_MAD
12256 if (PL_madskills) {
12257 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12258 if (PL_thisstuff)
12259 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12260 else
cd81e915 12261 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12262 }
12263#endif
79072805 12264 s += SvCUR(herewas);
748a9306 12265
5db06880
NC
12266#ifdef PERL_MAD
12267 stuffstart = s - SvPVX(PL_linestr);
12268
12269 if (found_newline)
12270 s--;
12271#endif
12272
7d0a29fe
NC
12273 tmpstr = newSV_type(SVt_PVIV);
12274 SvGROW(tmpstr, 80);
748a9306 12275 if (term == '\'') {
79072805 12276 op_type = OP_CONST;
45977657 12277 SvIV_set(tmpstr, -1);
748a9306
LW
12278 }
12279 else if (term == '`') {
79072805 12280 op_type = OP_BACKTICK;
45977657 12281 SvIV_set(tmpstr, '\\');
748a9306 12282 }
79072805
LW
12283
12284 CLINE;
57843af0 12285 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12286 PL_multi_open = PL_multi_close = '<';
12287 term = *PL_tokenbuf;
0244c3a4 12288 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12289 char * const bufptr = PL_sublex_info.super_bufptr;
12290 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12291 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12292 s = strchr(bufptr, '\n');
12293 if (!s)
12294 s = bufend;
12295 d = s;
12296 while (s < bufend &&
12297 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12298 if (*s++ == '\n')
57843af0 12299 CopLINE_inc(PL_curcop);
0244c3a4
GS
12300 }
12301 if (s >= bufend) {
eb160463 12302 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12303 missingterm(PL_tokenbuf);
12304 }
12305 sv_setpvn(herewas,bufptr,d-bufptr+1);
12306 sv_setpvn(tmpstr,d+1,s-d);
12307 s += len - 1;
12308 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12309 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12310
12311 s = olds;
12312 goto retval;
12313 }
12314 else if (!outer) {
79072805 12315 d = s;
3280af22
NIS
12316 while (s < PL_bufend &&
12317 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12318 if (*s++ == '\n')
57843af0 12319 CopLINE_inc(PL_curcop);
79072805 12320 }
3280af22 12321 if (s >= PL_bufend) {
eb160463 12322 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12323 missingterm(PL_tokenbuf);
79072805
LW
12324 }
12325 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12326#ifdef PERL_MAD
12327 if (PL_madskills) {
cd81e915
NC
12328 if (PL_thisstuff)
12329 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12330 else
cd81e915 12331 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12332 stuffstart = s - SvPVX(PL_linestr);
12333 }
12334#endif
79072805 12335 s += len - 1;
57843af0 12336 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12337
3280af22
NIS
12338 sv_catpvn(herewas,s,PL_bufend-s);
12339 sv_setsv(PL_linestr,herewas);
12340 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12341 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12342 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12343 }
12344 else
76f68e9b 12345 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12346 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12347#ifdef PERL_MAD
12348 if (PL_madskills) {
12349 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12350 if (PL_thisstuff)
12351 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12352 else
cd81e915 12353 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12354 }
12355#endif
f0e67a1d 12356 PL_bufptr = s;
17cc9359 12357 CopLINE_inc(PL_curcop);
f0e67a1d 12358 if (!outer || !lex_next_chunk(0)) {
eb160463 12359 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12360 missingterm(PL_tokenbuf);
79072805 12361 }
17cc9359 12362 CopLINE_dec(PL_curcop);
f0e67a1d 12363 s = PL_bufptr;
5db06880
NC
12364#ifdef PERL_MAD
12365 stuffstart = s - SvPVX(PL_linestr);
12366#endif
57843af0 12367 CopLINE_inc(PL_curcop);
3280af22 12368 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12369 PL_last_lop = PL_last_uni = NULL;
6a27c188 12370#ifndef PERL_STRICT_CR
3280af22 12371 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12372 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12373 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12374 {
3280af22
NIS
12375 PL_bufend[-2] = '\n';
12376 PL_bufend--;
95a20fc0 12377 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12378 }
3280af22
NIS
12379 else if (PL_bufend[-1] == '\r')
12380 PL_bufend[-1] = '\n';
f63a84b2 12381 }
3280af22
NIS
12382 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12383 PL_bufend[-1] = '\n';
f63a84b2 12384#endif
3280af22 12385 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12386 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12387 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12388 sv_catsv(PL_linestr,herewas);
12389 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12390 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12391 }
12392 else {
3280af22
NIS
12393 s = PL_bufend;
12394 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12395 }
12396 }
79072805 12397 s++;
0244c3a4 12398retval:
57843af0 12399 PL_multi_end = CopLINE(PL_curcop);
79072805 12400 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12401 SvPV_shrink_to_cur(tmpstr);
79072805 12402 }
8990e307 12403 SvREFCNT_dec(herewas);
2f31ce75 12404 if (!IN_BYTES) {
95a20fc0 12405 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12406 SvUTF8_on(tmpstr);
12407 else if (PL_encoding)
12408 sv_recode_to_utf8(tmpstr, PL_encoding);
12409 }
3280af22 12410 PL_lex_stuff = tmpstr;
6154021b 12411 pl_yylval.ival = op_type;
79072805
LW
12412 return s;
12413}
12414
02aa26ce
NT
12415/* scan_inputsymbol
12416 takes: current position in input buffer
12417 returns: new position in input buffer
6154021b 12418 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12419
12420 This code handles:
12421
12422 <> read from ARGV
12423 <FH> read from filehandle
12424 <pkg::FH> read from package qualified filehandle
12425 <pkg'FH> read from package qualified filehandle
12426 <$fh> read from filehandle in $fh
12427 <*.h> filename glob
12428
12429*/
12430
76e3520e 12431STATIC char *
cea2e8a9 12432S_scan_inputsymbol(pTHX_ char *start)
79072805 12433{
97aff369 12434 dVAR;
02aa26ce 12435 register char *s = start; /* current position in buffer */
1b420867 12436 char *end;
79072805 12437 I32 len;
6136c704
AL
12438 char *d = PL_tokenbuf; /* start of temp holding space */
12439 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12440
7918f24d
NC
12441 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12442
1b420867
GS
12443 end = strchr(s, '\n');
12444 if (!end)
12445 end = PL_bufend;
12446 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12447
12448 /* die if we didn't have space for the contents of the <>,
1b420867 12449 or if it didn't end, or if we see a newline
02aa26ce
NT
12450 */
12451
bb7a0f54 12452 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12453 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12454 if (s >= end)
cea2e8a9 12455 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12456
fc36a67e 12457 s++;
02aa26ce
NT
12458
12459 /* check for <$fh>
12460 Remember, only scalar variables are interpreted as filehandles by
12461 this code. Anything more complex (e.g., <$fh{$num}>) will be
12462 treated as a glob() call.
12463 This code makes use of the fact that except for the $ at the front,
12464 a scalar variable and a filehandle look the same.
12465 */
4633a7c4 12466 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12467
12468 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12469 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12470 d++;
02aa26ce
NT
12471
12472 /* If we've tried to read what we allow filehandles to look like, and
12473 there's still text left, then it must be a glob() and not a getline.
12474 Use scan_str to pull out the stuff between the <> and treat it
12475 as nothing more than a string.
12476 */
12477
3280af22 12478 if (d - PL_tokenbuf != len) {
6154021b 12479 pl_yylval.ival = OP_GLOB;
5db06880 12480 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12481 if (!s)
cea2e8a9 12482 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12483 return s;
12484 }
395c3793 12485 else {
9b3023bc 12486 bool readline_overriden = FALSE;
6136c704 12487 GV *gv_readline;
9b3023bc 12488 GV **gvp;
02aa26ce 12489 /* we're in a filehandle read situation */
3280af22 12490 d = PL_tokenbuf;
02aa26ce
NT
12491
12492 /* turn <> into <ARGV> */
79072805 12493 if (!len)
689badd5 12494 Copy("ARGV",d,5,char);
02aa26ce 12495
9b3023bc 12496 /* Check whether readline() is overriden */
fafc274c 12497 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12498 if ((gv_readline
ba979b31 12499 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12500 ||
017a3ce5 12501 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12502 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12503 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12504 readline_overriden = TRUE;
12505
02aa26ce
NT
12506 /* if <$fh>, create the ops to turn the variable into a
12507 filehandle
12508 */
79072805 12509 if (*d == '$') {
02aa26ce
NT
12510 /* try to find it in the pad for this block, otherwise find
12511 add symbol table ops
12512 */
f8f98e0a 12513 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12514 if (tmp != NOT_IN_PAD) {
00b1698f 12515 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12516 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12517 HEK * const stashname = HvNAME_HEK(stash);
12518 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12519 sv_catpvs(sym, "::");
f558d5af
JH
12520 sv_catpv(sym, d+1);
12521 d = SvPVX(sym);
12522 goto intro_sym;
12523 }
12524 else {
6136c704 12525 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12526 o->op_targ = tmp;
9b3023bc
RGS
12527 PL_lex_op = readline_overriden
12528 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12529 append_elem(OP_LIST, o,
12530 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12531 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12532 }
a0d0e21e
LW
12533 }
12534 else {
f558d5af
JH
12535 GV *gv;
12536 ++d;
12537intro_sym:
12538 gv = gv_fetchpv(d,
12539 (PL_in_eval
12540 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12541 : GV_ADDMULTI),
f558d5af 12542 SVt_PV);
9b3023bc
RGS
12543 PL_lex_op = readline_overriden
12544 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12545 append_elem(OP_LIST,
12546 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12547 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12548 : (OP*)newUNOP(OP_READLINE, 0,
12549 newUNOP(OP_RV2SV, 0,
12550 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12551 }
7c6fadd6
RGS
12552 if (!readline_overriden)
12553 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12554 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12555 pl_yylval.ival = OP_NULL;
79072805 12556 }
02aa26ce
NT
12557
12558 /* If it's none of the above, it must be a literal filehandle
12559 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12560 else {
6136c704 12561 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12562 PL_lex_op = readline_overriden
12563 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12564 append_elem(OP_LIST,
12565 newGVOP(OP_GV, 0, gv),
12566 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12567 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12568 pl_yylval.ival = OP_NULL;
79072805
LW
12569 }
12570 }
02aa26ce 12571
79072805
LW
12572 return s;
12573}
12574
02aa26ce
NT
12575
12576/* scan_str
12577 takes: start position in buffer
09bef843
SB
12578 keep_quoted preserve \ on the embedded delimiter(s)
12579 keep_delims preserve the delimiters around the string
02aa26ce
NT
12580 returns: position to continue reading from buffer
12581 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12582 updates the read buffer.
12583
12584 This subroutine pulls a string out of the input. It is called for:
12585 q single quotes q(literal text)
12586 ' single quotes 'literal text'
12587 qq double quotes qq(interpolate $here please)
12588 " double quotes "interpolate $here please"
12589 qx backticks qx(/bin/ls -l)
12590 ` backticks `/bin/ls -l`
12591 qw quote words @EXPORT_OK = qw( func() $spam )
12592 m// regexp match m/this/
12593 s/// regexp substitute s/this/that/
12594 tr/// string transliterate tr/this/that/
12595 y/// string transliterate y/this/that/
12596 ($*@) sub prototypes sub foo ($)
09bef843 12597 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12598 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12599
12600 In most of these cases (all but <>, patterns and transliterate)
12601 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12602 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12603 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12604 calls scan_str().
4e553d73 12605
02aa26ce
NT
12606 It skips whitespace before the string starts, and treats the first
12607 character as the delimiter. If the delimiter is one of ([{< then
12608 the corresponding "close" character )]}> is used as the closing
12609 delimiter. It allows quoting of delimiters, and if the string has
12610 balanced delimiters ([{<>}]) it allows nesting.
12611
37fd879b
HS
12612 On success, the SV with the resulting string is put into lex_stuff or,
12613 if that is already non-NULL, into lex_repl. The second case occurs only
12614 when parsing the RHS of the special constructs s/// and tr/// (y///).
12615 For convenience, the terminating delimiter character is stuffed into
12616 SvIVX of the SV.
02aa26ce
NT
12617*/
12618
76e3520e 12619STATIC char *
09bef843 12620S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12621{
97aff369 12622 dVAR;
02aa26ce 12623 SV *sv; /* scalar value: string */
d3fcec1f 12624 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12625 register char *s = start; /* current position in the buffer */
12626 register char term; /* terminating character */
12627 register char *to; /* current position in the sv's data */
12628 I32 brackets = 1; /* bracket nesting level */
89491803 12629 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12630 I32 termcode; /* terminating char. code */
89ebb4a3 12631 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12632 STRLEN termlen; /* length of terminating string */
0331ef07 12633 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12634#ifdef PERL_MAD
12635 int stuffstart;
12636 char *tstart;
12637#endif
02aa26ce 12638
7918f24d
NC
12639 PERL_ARGS_ASSERT_SCAN_STR;
12640
02aa26ce 12641 /* skip space before the delimiter */
29595ff2
NC
12642 if (isSPACE(*s)) {
12643 s = PEEKSPACE(s);
12644 }
02aa26ce 12645
5db06880 12646#ifdef PERL_MAD
cd81e915
NC
12647 if (PL_realtokenstart >= 0) {
12648 stuffstart = PL_realtokenstart;
12649 PL_realtokenstart = -1;
5db06880
NC
12650 }
12651 else
12652 stuffstart = start - SvPVX(PL_linestr);
12653#endif
02aa26ce 12654 /* mark where we are, in case we need to report errors */
79072805 12655 CLINE;
02aa26ce
NT
12656
12657 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12658 term = *s;
220e2d4e
IH
12659 if (!UTF) {
12660 termcode = termstr[0] = term;
12661 termlen = 1;
12662 }
12663 else {
f3b9ce0f 12664 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12665 Copy(s, termstr, termlen, U8);
12666 if (!UTF8_IS_INVARIANT(term))
12667 has_utf8 = TRUE;
12668 }
b1c7b182 12669
02aa26ce 12670 /* mark where we are */
57843af0 12671 PL_multi_start = CopLINE(PL_curcop);
3280af22 12672 PL_multi_open = term;
02aa26ce
NT
12673
12674 /* find corresponding closing delimiter */
93a17b20 12675 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12676 termcode = termstr[0] = term = tmps[5];
12677
3280af22 12678 PL_multi_close = term;
79072805 12679
561b68a9
SH
12680 /* create a new SV to hold the contents. 79 is the SV's initial length.
12681 What a random number. */
7d0a29fe
NC
12682 sv = newSV_type(SVt_PVIV);
12683 SvGROW(sv, 80);
45977657 12684 SvIV_set(sv, termcode);
a0d0e21e 12685 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12686
12687 /* move past delimiter and try to read a complete string */
09bef843 12688 if (keep_delims)
220e2d4e
IH
12689 sv_catpvn(sv, s, termlen);
12690 s += termlen;
5db06880
NC
12691#ifdef PERL_MAD
12692 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12693 if (!PL_thisopen && !keep_delims) {
12694 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12695 stuffstart = s - SvPVX(PL_linestr);
12696 }
12697#endif
93a17b20 12698 for (;;) {
220e2d4e
IH
12699 if (PL_encoding && !UTF) {
12700 bool cont = TRUE;
12701
12702 while (cont) {
95a20fc0 12703 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12704 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12705 &offset, (char*)termstr, termlen);
6136c704
AL
12706 const char * const ns = SvPVX_const(PL_linestr) + offset;
12707 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12708
12709 for (; s < ns; s++) {
12710 if (*s == '\n' && !PL_rsfp)
12711 CopLINE_inc(PL_curcop);
12712 }
12713 if (!found)
12714 goto read_more_line;
12715 else {
12716 /* handle quoted delimiters */
52327caf 12717 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12718 const char *t;
95a20fc0 12719 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12720 t--;
12721 if ((svlast-1 - t) % 2) {
12722 if (!keep_quoted) {
12723 *(svlast-1) = term;
12724 *svlast = '\0';
12725 SvCUR_set(sv, SvCUR(sv) - 1);
12726 }
12727 continue;
12728 }
12729 }
12730 if (PL_multi_open == PL_multi_close) {
12731 cont = FALSE;
12732 }
12733 else {
f54cb97a
AL
12734 const char *t;
12735 char *w;
0331ef07 12736 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12737 /* At here, all closes are "was quoted" one,
12738 so we don't check PL_multi_close. */
12739 if (*t == '\\') {
12740 if (!keep_quoted && *(t+1) == PL_multi_open)
12741 t++;
12742 else
12743 *w++ = *t++;
12744 }
12745 else if (*t == PL_multi_open)
12746 brackets++;
12747
12748 *w = *t;
12749 }
12750 if (w < t) {
12751 *w++ = term;
12752 *w = '\0';
95a20fc0 12753 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12754 }
0331ef07 12755 last_off = w - SvPVX(sv);
220e2d4e
IH
12756 if (--brackets <= 0)
12757 cont = FALSE;
12758 }
12759 }
12760 }
12761 if (!keep_delims) {
12762 SvCUR_set(sv, SvCUR(sv) - 1);
12763 *SvEND(sv) = '\0';
12764 }
12765 break;
12766 }
12767
02aa26ce 12768 /* extend sv if need be */
3280af22 12769 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12770 /* set 'to' to the next character in the sv's string */
463ee0b2 12771 to = SvPVX(sv)+SvCUR(sv);
09bef843 12772
02aa26ce 12773 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12774 if (PL_multi_open == PL_multi_close) {
12775 for (; s < PL_bufend; s++,to++) {
02aa26ce 12776 /* embedded newlines increment the current line number */
3280af22 12777 if (*s == '\n' && !PL_rsfp)
57843af0 12778 CopLINE_inc(PL_curcop);
02aa26ce 12779 /* handle quoted delimiters */
3280af22 12780 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12781 if (!keep_quoted && s[1] == term)
a0d0e21e 12782 s++;
02aa26ce 12783 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12784 else
12785 *to++ = *s++;
12786 }
02aa26ce
NT
12787 /* terminate when run out of buffer (the for() condition), or
12788 have found the terminator */
220e2d4e
IH
12789 else if (*s == term) {
12790 if (termlen == 1)
12791 break;
f3b9ce0f 12792 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12793 break;
12794 }
63cd0674 12795 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12796 has_utf8 = TRUE;
93a17b20
LW
12797 *to = *s;
12798 }
12799 }
02aa26ce
NT
12800
12801 /* if the terminator isn't the same as the start character (e.g.,
12802 matched brackets), we have to allow more in the quoting, and
12803 be prepared for nested brackets.
12804 */
93a17b20 12805 else {
02aa26ce 12806 /* read until we run out of string, or we find the terminator */
3280af22 12807 for (; s < PL_bufend; s++,to++) {
02aa26ce 12808 /* embedded newlines increment the line count */
3280af22 12809 if (*s == '\n' && !PL_rsfp)
57843af0 12810 CopLINE_inc(PL_curcop);
02aa26ce 12811 /* backslashes can escape the open or closing characters */
3280af22 12812 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12813 if (!keep_quoted &&
12814 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12815 s++;
12816 else
12817 *to++ = *s++;
12818 }
02aa26ce 12819 /* allow nested opens and closes */
3280af22 12820 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12821 break;
3280af22 12822 else if (*s == PL_multi_open)
93a17b20 12823 brackets++;
63cd0674 12824 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12825 has_utf8 = TRUE;
93a17b20
LW
12826 *to = *s;
12827 }
12828 }
02aa26ce 12829 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12830 *to = '\0';
95a20fc0 12831 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12832
02aa26ce
NT
12833 /*
12834 * this next chunk reads more into the buffer if we're not done yet
12835 */
12836
b1c7b182
GS
12837 if (s < PL_bufend)
12838 break; /* handle case where we are done yet :-) */
79072805 12839
6a27c188 12840#ifndef PERL_STRICT_CR
95a20fc0 12841 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12842 if ((to[-2] == '\r' && to[-1] == '\n') ||
12843 (to[-2] == '\n' && to[-1] == '\r'))
12844 {
f63a84b2
LW
12845 to[-2] = '\n';
12846 to--;
95a20fc0 12847 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12848 }
12849 else if (to[-1] == '\r')
12850 to[-1] = '\n';
12851 }
95a20fc0 12852 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12853 to[-1] = '\n';
12854#endif
12855
220e2d4e 12856 read_more_line:
02aa26ce
NT
12857 /* if we're out of file, or a read fails, bail and reset the current
12858 line marker so we can report where the unterminated string began
12859 */
5db06880
NC
12860#ifdef PERL_MAD
12861 if (PL_madskills) {
c35e046a 12862 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12863 if (PL_thisstuff)
12864 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12865 else
cd81e915 12866 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12867 }
12868#endif
f0e67a1d
Z
12869 CopLINE_inc(PL_curcop);
12870 PL_bufptr = PL_bufend;
12871 if (!lex_next_chunk(0)) {
c07a80fd 12872 sv_free(sv);
eb160463 12873 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12874 return NULL;
79072805 12875 }
f0e67a1d 12876 s = PL_bufptr;
5db06880
NC
12877#ifdef PERL_MAD
12878 stuffstart = 0;
12879#endif
378cc40b 12880 }
4e553d73 12881
02aa26ce
NT
12882 /* at this point, we have successfully read the delimited string */
12883
220e2d4e 12884 if (!PL_encoding || UTF) {
5db06880
NC
12885#ifdef PERL_MAD
12886 if (PL_madskills) {
c35e046a 12887 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12888 const int len = s - tstart;
cd81e915 12889 if (PL_thisstuff)
c35e046a 12890 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12891 else
c35e046a 12892 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12893 if (!PL_thisclose && !keep_delims)
12894 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12895 }
12896#endif
12897
220e2d4e
IH
12898 if (keep_delims)
12899 sv_catpvn(sv, s, termlen);
12900 s += termlen;
12901 }
5db06880
NC
12902#ifdef PERL_MAD
12903 else {
12904 if (PL_madskills) {
c35e046a
AL
12905 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12906 const int len = s - tstart - termlen;
cd81e915 12907 if (PL_thisstuff)
c35e046a 12908 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12909 else
c35e046a 12910 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12911 if (!PL_thisclose && !keep_delims)
12912 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12913 }
12914 }
12915#endif
220e2d4e 12916 if (has_utf8 || PL_encoding)
b1c7b182 12917 SvUTF8_on(sv);
d0063567 12918
57843af0 12919 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12920
12921 /* if we allocated too much space, give some back */
93a17b20
LW
12922 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12923 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12924 SvPV_renew(sv, SvLEN(sv));
79072805 12925 }
02aa26ce
NT
12926
12927 /* decide whether this is the first or second quoted string we've read
12928 for this op
12929 */
4e553d73 12930
3280af22
NIS
12931 if (PL_lex_stuff)
12932 PL_lex_repl = sv;
79072805 12933 else
3280af22 12934 PL_lex_stuff = sv;
378cc40b
LW
12935 return s;
12936}
12937
02aa26ce
NT
12938/*
12939 scan_num
12940 takes: pointer to position in buffer
12941 returns: pointer to new position in buffer
6154021b 12942 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12943
12944 Read a number in any of the formats that Perl accepts:
12945
7fd134d9
JH
12946 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12947 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12948 0b[01](_?[01])*
12949 0[0-7](_?[0-7])*
12950 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12951
3280af22 12952 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12953 thing it reads.
12954
12955 If it reads a number without a decimal point or an exponent, it will
12956 try converting the number to an integer and see if it can do so
12957 without loss of precision.
12958*/
4e553d73 12959
378cc40b 12960char *
bfed75c6 12961Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12962{
97aff369 12963 dVAR;
bfed75c6 12964 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12965 register char *d; /* destination in temp buffer */
12966 register char *e; /* end of temp buffer */
86554af2 12967 NV nv; /* number read, as a double */
a0714e2c 12968 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12969 bool floatit; /* boolean: int or float? */
cbbf8932 12970 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12971 static char const number_too_long[] = "Number too long";
378cc40b 12972
7918f24d
NC
12973 PERL_ARGS_ASSERT_SCAN_NUM;
12974
02aa26ce
NT
12975 /* We use the first character to decide what type of number this is */
12976
378cc40b 12977 switch (*s) {
79072805 12978 default:
cea2e8a9 12979 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12980
02aa26ce 12981 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12982 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12983 case '0':
12984 {
02aa26ce
NT
12985 /* variables:
12986 u holds the "number so far"
4f19785b
WSI
12987 shift the power of 2 of the base
12988 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12989 overflowed was the number more than we can hold?
12990
12991 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12992 we in octal/hex/binary?" indicator to disallow hex characters
12993 when in octal mode.
02aa26ce 12994 */
9e24b6e2
JH
12995 NV n = 0.0;
12996 UV u = 0;
79072805 12997 I32 shift;
9e24b6e2 12998 bool overflowed = FALSE;
61f33854 12999 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
13000 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13001 static const char* const bases[5] =
13002 { "", "binary", "", "octal", "hexadecimal" };
13003 static const char* const Bases[5] =
13004 { "", "Binary", "", "Octal", "Hexadecimal" };
13005 static const char* const maxima[5] =
13006 { "",
13007 "0b11111111111111111111111111111111",
13008 "",
13009 "037777777777",
13010 "0xffffffff" };
bfed75c6 13011 const char *base, *Base, *max;
378cc40b 13012
02aa26ce 13013 /* check for hex */
a674e8db 13014 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
13015 shift = 4;
13016 s += 2;
61f33854 13017 just_zero = FALSE;
a674e8db 13018 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13019 shift = 1;
13020 s += 2;
61f33854 13021 just_zero = FALSE;
378cc40b 13022 }
02aa26ce 13023 /* check for a decimal in disguise */
b78218b7 13024 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13025 goto decimal;
02aa26ce 13026 /* so it must be octal */
928753ea 13027 else {
378cc40b 13028 shift = 3;
928753ea
JH
13029 s++;
13030 }
13031
13032 if (*s == '_') {
a2a5de95 13033 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13034 "Misplaced _ in number");
13035 lastub = s++;
13036 }
9e24b6e2
JH
13037
13038 base = bases[shift];
13039 Base = Bases[shift];
13040 max = maxima[shift];
02aa26ce 13041
4f19785b 13042 /* read the rest of the number */
378cc40b 13043 for (;;) {
9e24b6e2 13044 /* x is used in the overflow test,
893fe2c2 13045 b is the digit we're adding on. */
9e24b6e2 13046 UV x, b;
55497cff 13047
378cc40b 13048 switch (*s) {
02aa26ce
NT
13049
13050 /* if we don't mention it, we're done */
378cc40b
LW
13051 default:
13052 goto out;
02aa26ce 13053
928753ea 13054 /* _ are ignored -- but warned about if consecutive */
de3bb511 13055 case '_':
a2a5de95
NC
13056 if (lastub && s == lastub + 1)
13057 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13058 "Misplaced _ in number");
928753ea 13059 lastub = s++;
de3bb511 13060 break;
02aa26ce
NT
13061
13062 /* 8 and 9 are not octal */
378cc40b 13063 case '8': case '9':
4f19785b 13064 if (shift == 3)
cea2e8a9 13065 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13066 /* FALL THROUGH */
02aa26ce
NT
13067
13068 /* octal digits */
4f19785b 13069 case '2': case '3': case '4':
378cc40b 13070 case '5': case '6': case '7':
4f19785b 13071 if (shift == 1)
cea2e8a9 13072 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13073 /* FALL THROUGH */
13074
13075 case '0': case '1':
02aa26ce 13076 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13077 goto digit;
02aa26ce
NT
13078
13079 /* hex digits */
378cc40b
LW
13080 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13081 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13082 /* make sure they said 0x */
378cc40b
LW
13083 if (shift != 4)
13084 goto out;
55497cff 13085 b = (*s++ & 7) + 9;
02aa26ce
NT
13086
13087 /* Prepare to put the digit we have onto the end
13088 of the number so far. We check for overflows.
13089 */
13090
55497cff 13091 digit:
61f33854 13092 just_zero = FALSE;
9e24b6e2
JH
13093 if (!overflowed) {
13094 x = u << shift; /* make room for the digit */
13095
13096 if ((x >> shift) != u
13097 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13098 overflowed = TRUE;
13099 n = (NV) u;
9b387841
NC
13100 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13101 "Integer overflow in %s number",
13102 base);
9e24b6e2
JH
13103 } else
13104 u = x | b; /* add the digit to the end */
13105 }
13106 if (overflowed) {
13107 n *= nvshift[shift];
13108 /* If an NV has not enough bits in its
13109 * mantissa to represent an UV this summing of
13110 * small low-order numbers is a waste of time
13111 * (because the NV cannot preserve the
13112 * low-order bits anyway): we could just
13113 * remember when did we overflow and in the
13114 * end just multiply n by the right
13115 * amount. */
13116 n += (NV) b;
55497cff 13117 }
378cc40b
LW
13118 break;
13119 }
13120 }
02aa26ce
NT
13121
13122 /* if we get here, we had success: make a scalar value from
13123 the number.
13124 */
378cc40b 13125 out:
928753ea
JH
13126
13127 /* final misplaced underbar check */
13128 if (s[-1] == '_') {
a2a5de95 13129 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13130 }
13131
9e24b6e2 13132 if (overflowed) {
a2a5de95
NC
13133 if (n > 4294967295.0)
13134 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13135 "%s number > %s non-portable",
13136 Base, max);
b081dd7e 13137 sv = newSVnv(n);
9e24b6e2
JH
13138 }
13139 else {
15041a67 13140#if UVSIZE > 4
a2a5de95
NC
13141 if (u > 0xffffffff)
13142 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13143 "%s number > %s non-portable",
13144 Base, max);
2cc4c2dc 13145#endif
b081dd7e 13146 sv = newSVuv(u);
9e24b6e2 13147 }
61f33854 13148 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13149 sv = new_constant(start, s - start, "integer",
eb0d8d16 13150 sv, NULL, NULL, 0);
61f33854 13151 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13152 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13153 }
13154 break;
02aa26ce
NT
13155
13156 /*
13157 handle decimal numbers.
13158 we're also sent here when we read a 0 as the first digit
13159 */
378cc40b
LW
13160 case '1': case '2': case '3': case '4': case '5':
13161 case '6': case '7': case '8': case '9': case '.':
13162 decimal:
3280af22
NIS
13163 d = PL_tokenbuf;
13164 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13165 floatit = FALSE;
02aa26ce
NT
13166
13167 /* read next group of digits and _ and copy into d */
de3bb511 13168 while (isDIGIT(*s) || *s == '_') {
4e553d73 13169 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13170 if -w is on
13171 */
93a17b20 13172 if (*s == '_') {
a2a5de95
NC
13173 if (lastub && s == lastub + 1)
13174 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13175 "Misplaced _ in number");
928753ea 13176 lastub = s++;
93a17b20 13177 }
fc36a67e 13178 else {
02aa26ce 13179 /* check for end of fixed-length buffer */
fc36a67e 13180 if (d >= e)
cea2e8a9 13181 Perl_croak(aTHX_ number_too_long);
02aa26ce 13182 /* if we're ok, copy the character */
378cc40b 13183 *d++ = *s++;
fc36a67e 13184 }
378cc40b 13185 }
02aa26ce
NT
13186
13187 /* final misplaced underbar check */
928753ea 13188 if (lastub && s == lastub + 1) {
a2a5de95 13189 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13190 }
02aa26ce
NT
13191
13192 /* read a decimal portion if there is one. avoid
13193 3..5 being interpreted as the number 3. followed
13194 by .5
13195 */
2f3197b3 13196 if (*s == '.' && s[1] != '.') {
79072805 13197 floatit = TRUE;
378cc40b 13198 *d++ = *s++;
02aa26ce 13199
928753ea 13200 if (*s == '_') {
a2a5de95
NC
13201 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13202 "Misplaced _ in number");
928753ea
JH
13203 lastub = s;
13204 }
13205
13206 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13207 */
fc36a67e 13208 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13209 /* fixed length buffer check */
fc36a67e 13210 if (d >= e)
cea2e8a9 13211 Perl_croak(aTHX_ number_too_long);
928753ea 13212 if (*s == '_') {
a2a5de95
NC
13213 if (lastub && s == lastub + 1)
13214 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13215 "Misplaced _ in number");
928753ea
JH
13216 lastub = s;
13217 }
13218 else
fc36a67e 13219 *d++ = *s;
378cc40b 13220 }
928753ea
JH
13221 /* fractional part ending in underbar? */
13222 if (s[-1] == '_') {
a2a5de95
NC
13223 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13224 "Misplaced _ in number");
928753ea 13225 }
dd629d5b
GS
13226 if (*s == '.' && isDIGIT(s[1])) {
13227 /* oops, it's really a v-string, but without the "v" */
f4758303 13228 s = start;
dd629d5b
GS
13229 goto vstring;
13230 }
378cc40b 13231 }
02aa26ce
NT
13232
13233 /* read exponent part, if present */
3792a11b 13234 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13235 floatit = TRUE;
13236 s++;
02aa26ce
NT
13237
13238 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13239 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13240
7fd134d9
JH
13241 /* stray preinitial _ */
13242 if (*s == '_') {
a2a5de95
NC
13243 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13244 "Misplaced _ in number");
7fd134d9
JH
13245 lastub = s++;
13246 }
13247
02aa26ce 13248 /* allow positive or negative exponent */
378cc40b
LW
13249 if (*s == '+' || *s == '-')
13250 *d++ = *s++;
02aa26ce 13251
7fd134d9
JH
13252 /* stray initial _ */
13253 if (*s == '_') {
a2a5de95
NC
13254 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13255 "Misplaced _ in number");
7fd134d9
JH
13256 lastub = s++;
13257 }
13258
7fd134d9
JH
13259 /* read digits of exponent */
13260 while (isDIGIT(*s) || *s == '_') {
13261 if (isDIGIT(*s)) {
13262 if (d >= e)
13263 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13264 *d++ = *s++;
7fd134d9
JH
13265 }
13266 else {
041457d9 13267 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13268 (!isDIGIT(s[1]) && s[1] != '_')))
13269 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13270 "Misplaced _ in number");
b3b48e3e 13271 lastub = s++;
7fd134d9 13272 }
7fd134d9 13273 }
378cc40b 13274 }
02aa26ce 13275
02aa26ce 13276
0b7fceb9 13277 /*
58bb9ec3
NC
13278 We try to do an integer conversion first if no characters
13279 indicating "float" have been found.
0b7fceb9
MU
13280 */
13281
13282 if (!floatit) {
58bb9ec3 13283 UV uv;
6136c704 13284 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13285
13286 if (flags == IS_NUMBER_IN_UV) {
13287 if (uv <= IV_MAX)
b081dd7e 13288 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13289 else
b081dd7e 13290 sv = newSVuv(uv);
58bb9ec3
NC
13291 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13292 if (uv <= (UV) IV_MIN)
b081dd7e 13293 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13294 else
13295 floatit = TRUE;
13296 } else
13297 floatit = TRUE;
13298 }
0b7fceb9 13299 if (floatit) {
58bb9ec3
NC
13300 /* terminate the string */
13301 *d = '\0';
86554af2 13302 nv = Atof(PL_tokenbuf);
b081dd7e 13303 sv = newSVnv(nv);
86554af2 13304 }
86554af2 13305
eb0d8d16
NC
13306 if ( floatit
13307 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13308 const char *const key = floatit ? "float" : "integer";
13309 const STRLEN keylen = floatit ? 5 : 7;
13310 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13311 key, keylen, sv, NULL, NULL, 0);
13312 }
378cc40b 13313 break;
0b7fceb9 13314
e312add1 13315 /* if it starts with a v, it could be a v-string */
a7cb1f99 13316 case 'v':
dd629d5b 13317vstring:
561b68a9 13318 sv = newSV(5); /* preallocate storage space */
65b06e02 13319 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13320 break;
79072805 13321 }
a687059c 13322
02aa26ce
NT
13323 /* make the op for the constant and return */
13324
a86a20aa 13325 if (sv)
b73d6f50 13326 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13327 else
5f66b61c 13328 lvalp->opval = NULL;
a687059c 13329
73d840c0 13330 return (char *)s;
378cc40b
LW
13331}
13332
76e3520e 13333STATIC char *
cea2e8a9 13334S_scan_formline(pTHX_ register char *s)
378cc40b 13335{
97aff369 13336 dVAR;
79072805 13337 register char *eol;
378cc40b 13338 register char *t;
6136c704 13339 SV * const stuff = newSVpvs("");
79072805 13340 bool needargs = FALSE;
c5ee2135 13341 bool eofmt = FALSE;
5db06880
NC
13342#ifdef PERL_MAD
13343 char *tokenstart = s;
4f61fd4b
JC
13344 SV* savewhite = NULL;
13345
5db06880 13346 if (PL_madskills) {
cd81e915
NC
13347 savewhite = PL_thiswhite;
13348 PL_thiswhite = 0;
5db06880
NC
13349 }
13350#endif
378cc40b 13351
7918f24d
NC
13352 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13353
79072805 13354 while (!needargs) {
a1b95068 13355 if (*s == '.') {
c35e046a 13356 t = s+1;
51882d45 13357#ifdef PERL_STRICT_CR
c35e046a
AL
13358 while (SPACE_OR_TAB(*t))
13359 t++;
51882d45 13360#else
c35e046a
AL
13361 while (SPACE_OR_TAB(*t) || *t == '\r')
13362 t++;
51882d45 13363#endif
c5ee2135
WL
13364 if (*t == '\n' || t == PL_bufend) {
13365 eofmt = TRUE;
79072805 13366 break;
c5ee2135 13367 }
79072805 13368 }
3280af22 13369 if (PL_in_eval && !PL_rsfp) {
07409e01 13370 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13371 if (!eol++)
3280af22 13372 eol = PL_bufend;
0f85fab0
LW
13373 }
13374 else
3280af22 13375 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13376 if (*s != '#') {
a0d0e21e
LW
13377 for (t = s; t < eol; t++) {
13378 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13379 needargs = FALSE;
13380 goto enough; /* ~~ must be first line in formline */
378cc40b 13381 }
a0d0e21e
LW
13382 if (*t == '@' || *t == '^')
13383 needargs = TRUE;
378cc40b 13384 }
7121b347
MG
13385 if (eol > s) {
13386 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13387#ifndef PERL_STRICT_CR
7121b347
MG
13388 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13389 char *end = SvPVX(stuff) + SvCUR(stuff);
13390 end[-2] = '\n';
13391 end[-1] = '\0';
b162af07 13392 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13393 }
2dc4c65b 13394#endif
7121b347
MG
13395 }
13396 else
13397 break;
79072805 13398 }
95a20fc0 13399 s = (char*)eol;
3280af22 13400 if (PL_rsfp) {
f0e67a1d 13401 bool got_some;
5db06880
NC
13402#ifdef PERL_MAD
13403 if (PL_madskills) {
cd81e915
NC
13404 if (PL_thistoken)
13405 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13406 else
cd81e915 13407 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13408 }
13409#endif
f0e67a1d
Z
13410 PL_bufptr = PL_bufend;
13411 CopLINE_inc(PL_curcop);
13412 got_some = lex_next_chunk(0);
13413 CopLINE_dec(PL_curcop);
13414 s = PL_bufptr;
5db06880 13415#ifdef PERL_MAD
f0e67a1d 13416 tokenstart = PL_bufptr;
5db06880 13417#endif
f0e67a1d 13418 if (!got_some)
378cc40b 13419 break;
378cc40b 13420 }
463ee0b2 13421 incline(s);
79072805 13422 }
a0d0e21e
LW
13423 enough:
13424 if (SvCUR(stuff)) {
3280af22 13425 PL_expect = XTERM;
79072805 13426 if (needargs) {
3280af22 13427 PL_lex_state = LEX_NORMAL;
cd81e915 13428 start_force(PL_curforce);
9ded7720 13429 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13430 force_next(',');
13431 }
a0d0e21e 13432 else
3280af22 13433 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13434 if (!IN_BYTES) {
95a20fc0 13435 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13436 SvUTF8_on(stuff);
13437 else if (PL_encoding)
13438 sv_recode_to_utf8(stuff, PL_encoding);
13439 }
cd81e915 13440 start_force(PL_curforce);
9ded7720 13441 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13442 force_next(THING);
cd81e915 13443 start_force(PL_curforce);
9ded7720 13444 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13445 force_next(LSTOP);
378cc40b 13446 }
79072805 13447 else {
8990e307 13448 SvREFCNT_dec(stuff);
c5ee2135
WL
13449 if (eofmt)
13450 PL_lex_formbrack = 0;
3280af22 13451 PL_bufptr = s;
79072805 13452 }
5db06880
NC
13453#ifdef PERL_MAD
13454 if (PL_madskills) {
cd81e915
NC
13455 if (PL_thistoken)
13456 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13457 else
cd81e915
NC
13458 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13459 PL_thiswhite = savewhite;
5db06880
NC
13460 }
13461#endif
79072805 13462 return s;
378cc40b 13463}
a687059c 13464
ba6d6ac9 13465I32
864dbfa3 13466Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13467{
97aff369 13468 dVAR;
a3b680e6 13469 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13470 CV* const outsidecv = PL_compcv;
8990e307 13471
3280af22
NIS
13472 if (PL_compcv) {
13473 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13474 }
7766f137 13475 SAVEI32(PL_subline);
3280af22 13476 save_item(PL_subname);
3280af22 13477 SAVESPTR(PL_compcv);
3280af22 13478
ea726b52 13479 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13480 CvFLAGS(PL_compcv) |= flags;
13481
57843af0 13482 PL_subline = CopLINE(PL_curcop);
dd2155a4 13483 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13484 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13485 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13486
8990e307
LW
13487 return oldsavestack_ix;
13488}
13489
084592ab
CN
13490#ifdef __SC__
13491#pragma segment Perl_yylex
13492#endif
af41e527
NC
13493static int
13494S_yywarn(pTHX_ const char *const s)
8990e307 13495{
97aff369 13496 dVAR;
7918f24d
NC
13497
13498 PERL_ARGS_ASSERT_YYWARN;
13499
faef0170 13500 PL_in_eval |= EVAL_WARNONLY;
748a9306 13501 yyerror(s);
faef0170 13502 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13503 return 0;
8990e307
LW
13504}
13505
13506int
15f169a1 13507Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13508{
97aff369 13509 dVAR;
bfed75c6
AL
13510 const char *where = NULL;
13511 const char *context = NULL;
68dc0745 13512 int contlen = -1;
46fc3d4c 13513 SV *msg;
5912531f 13514 int yychar = PL_parser->yychar;
463ee0b2 13515
7918f24d
NC
13516 PERL_ARGS_ASSERT_YYERROR;
13517
3280af22 13518 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13519 where = "at EOF";
8bcfe651
TM
13520 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13521 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13522 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13523 /*
13524 Only for NetWare:
13525 The code below is removed for NetWare because it abends/crashes on NetWare
13526 when the script has error such as not having the closing quotes like:
13527 if ($var eq "value)
13528 Checking of white spaces is anyway done in NetWare code.
13529 */
13530#ifndef NETWARE
3280af22
NIS
13531 while (isSPACE(*PL_oldoldbufptr))
13532 PL_oldoldbufptr++;
f355267c 13533#endif
3280af22
NIS
13534 context = PL_oldoldbufptr;
13535 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13536 }
8bcfe651
TM
13537 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13538 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13539 /*
13540 Only for NetWare:
13541 The code below is removed for NetWare because it abends/crashes on NetWare
13542 when the script has error such as not having the closing quotes like:
13543 if ($var eq "value)
13544 Checking of white spaces is anyway done in NetWare code.
13545 */
13546#ifndef NETWARE
3280af22
NIS
13547 while (isSPACE(*PL_oldbufptr))
13548 PL_oldbufptr++;
f355267c 13549#endif
3280af22
NIS
13550 context = PL_oldbufptr;
13551 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13552 }
13553 else if (yychar > 255)
68dc0745 13554 where = "next token ???";
12fbd33b 13555 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13556 if (PL_lex_state == LEX_NORMAL ||
13557 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13558 where = "at end of line";
3280af22 13559 else if (PL_lex_inpat)
68dc0745 13560 where = "within pattern";
463ee0b2 13561 else
68dc0745 13562 where = "within string";
463ee0b2 13563 }
46fc3d4c 13564 else {
84bafc02 13565 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13566 if (yychar < 32)
cea2e8a9 13567 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13568 else if (isPRINT_LC(yychar)) {
88c9ea1e 13569 const char string = yychar;
5e7aa789
NC
13570 sv_catpvn(where_sv, &string, 1);
13571 }
463ee0b2 13572 else
cea2e8a9 13573 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13574 where = SvPVX_const(where_sv);
463ee0b2 13575 }
46fc3d4c 13576 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13577 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13578 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13579 if (context)
cea2e8a9 13580 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13581 else
cea2e8a9 13582 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13583 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13584 Perl_sv_catpvf(aTHX_ msg,
57def98f 13585 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13586 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13587 PL_multi_end = 0;
a0d0e21e 13588 }
500960a6 13589 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13590 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13591 }
463ee0b2 13592 else
5a844595 13593 qerror(msg);
c7d6bfb2
GS
13594 if (PL_error_count >= 10) {
13595 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13596 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13597 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13598 else
13599 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13600 OutCopFILE(PL_curcop));
c7d6bfb2 13601 }
3280af22 13602 PL_in_my = 0;
5c284bb0 13603 PL_in_my_stash = NULL;
463ee0b2
LW
13604 return 0;
13605}
084592ab
CN
13606#ifdef __SC__
13607#pragma segment Main
13608#endif
4e35701f 13609
b250498f 13610STATIC char*
3ae08724 13611S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13612{
97aff369 13613 dVAR;
f54cb97a 13614 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13615
13616 PERL_ARGS_ASSERT_SWALLOW_BOM;
13617
7aa207d6 13618 switch (s[0]) {
4e553d73
NIS
13619 case 0xFF:
13620 if (s[1] == 0xFE) {
ee6ba15d 13621 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13622 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13623 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13624#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13625 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13626 s += 2;
dea0fc0b 13627 if (PL_bufend > (char*)s) {
81a923f4 13628 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13629 }
b250498f 13630#else
ee6ba15d 13631 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13632#endif
01ec43d0
GS
13633 }
13634 break;
78ae23f5 13635 case 0xFE:
7aa207d6 13636 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13637#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13638 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13639 s += 2;
13640 if (PL_bufend > (char *)s) {
81a923f4 13641 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13642 }
b250498f 13643#else
ee6ba15d 13644 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13645#endif
01ec43d0
GS
13646 }
13647 break;
3ae08724
GS
13648 case 0xEF:
13649 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13650 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13651 s += 3; /* UTF-8 */
13652 }
13653 break;
13654 case 0:
7aa207d6
JH
13655 if (slen > 3) {
13656 if (s[1] == 0) {
13657 if (s[2] == 0xFE && s[3] == 0xFF) {
13658 /* UTF-32 big-endian */
ee6ba15d 13659 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13660 }
13661 }
13662 else if (s[2] == 0 && s[3] != 0) {
13663 /* Leading bytes
13664 * 00 xx 00 xx
13665 * are a good indicator of UTF-16BE. */
ee6ba15d 13666#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13667 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13668 s = add_utf16_textfilter(s, FALSE);
13669#else
13670 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13671#endif
7aa207d6 13672 }
01ec43d0 13673 }
e294cc5d
JH
13674#ifdef EBCDIC
13675 case 0xDD:
13676 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13677 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13678 s += 4; /* UTF-8 */
13679 }
13680 break;
13681#endif
13682
7aa207d6
JH
13683 default:
13684 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13685 /* Leading bytes
13686 * xx 00 xx 00
13687 * are a good indicator of UTF-16LE. */
ee6ba15d 13688#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13689 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13690 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13691#else
13692 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13693#endif
7aa207d6 13694 }
01ec43d0 13695 }
b8f84bb2 13696 return (char*)s;
b250498f 13697}
4755096e 13698
6e3aabd6
GS
13699
13700#ifndef PERL_NO_UTF16_FILTER
13701static I32
a28af015 13702S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13703{
97aff369 13704 dVAR;
f3040f2c 13705 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13706 /* We re-use this each time round, throwing the contents away before we
13707 return. */
2a773401 13708 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13709 SV *const utf8_buffer = filter;
c28d6105 13710 IV status = IoPAGE(filter);
f2338a2e 13711 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13712 I32 retval;
c8b0cbae 13713
c85ae797
NC
13714 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13715
c8b0cbae
NC
13716 /* As we're automatically added, at the lowest level, and hence only called
13717 from this file, we can be sure that we're not called in block mode. Hence
13718 don't bother writing code to deal with block mode. */
13719 if (maxlen) {
13720 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13721 }
c28d6105
NC
13722 if (status < 0) {
13723 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13724 }
1de9afcd 13725 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13726 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13727 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13728 reverse ? 'l' : 'b', idx, maxlen, status,
13729 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13730
13731 while (1) {
13732 STRLEN chars;
13733 STRLEN have;
dea0fc0b 13734 I32 newlen;
2a773401 13735 U8 *end;
c28d6105
NC
13736 /* First, look in our buffer of existing UTF-8 data: */
13737 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13738
13739 if (nl) {
13740 ++nl;
13741 } else if (status == 0) {
13742 /* EOF */
13743 IoPAGE(filter) = 0;
13744 nl = SvEND(utf8_buffer);
13745 }
13746 if (nl) {
d2d1d4de
NC
13747 STRLEN got = nl - SvPVX(utf8_buffer);
13748 /* Did we have anything to append? */
13749 retval = got != 0;
13750 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13751 /* Everything else in this code works just fine if SVp_POK isn't
13752 set. This, however, needs it, and we need it to work, else
13753 we loop infinitely because the buffer is never consumed. */
13754 sv_chop(utf8_buffer, nl);
13755 break;
13756 }
ba77e4cc 13757
c28d6105
NC
13758 /* OK, not a complete line there, so need to read some more UTF-16.
13759 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13760 while (1) {
13761 if (status <= 0)
13762 break;
13763 if (SvCUR(utf16_buffer) >= 2) {
13764 /* Location of the high octet of the last complete code point.
13765 Gosh, UTF-16 is a pain. All the benefits of variable length,
13766 *coupled* with all the benefits of partial reads and
13767 endianness. */
13768 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13769 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13770
13771 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13772 break;
13773 }
13774
13775 /* We have the first half of a surrogate. Read more. */
13776 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13777 }
c28d6105 13778
c28d6105
NC
13779 status = FILTER_READ(idx + 1, utf16_buffer,
13780 160 + (SvCUR(utf16_buffer) & 1));
13781 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13782 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13783 if (status < 0) {
13784 /* Error */
13785 IoPAGE(filter) = status;
13786 return status;
13787 }
13788 }
13789
13790 chars = SvCUR(utf16_buffer) >> 1;
13791 have = SvCUR(utf8_buffer);
13792 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13793
aa6dbd60 13794 if (reverse) {
c28d6105
NC
13795 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13796 (U8*)SvPVX_const(utf8_buffer) + have,
13797 chars * 2, &newlen);
aa6dbd60 13798 } else {
2a773401 13799 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13800 (U8*)SvPVX_const(utf8_buffer) + have,
13801 chars * 2, &newlen);
2a773401 13802 }
c28d6105 13803 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13804 *end = '\0';
c28d6105 13805
e07286ed
NC
13806 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13807 it's private to us, and utf16_to_utf8{,reversed} take a
13808 (pointer,length) pair, rather than a NUL-terminated string. */
13809 if(SvCUR(utf16_buffer) & 1) {
13810 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13811 SvCUR_set(utf16_buffer, 1);
13812 } else {
13813 SvCUR_set(utf16_buffer, 0);
13814 }
2a773401 13815 }
c28d6105
NC
13816 DEBUG_P(PerlIO_printf(Perl_debug_log,
13817 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13818 status,
13819 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13820 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13821 return retval;
6e3aabd6 13822}
81a923f4
NC
13823
13824static U8 *
13825S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13826{
2a773401 13827 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13828
c85ae797
NC
13829 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13830
c28d6105 13831 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13832 sv_setpvs(filter, "");
2a773401 13833 IoLINES(filter) = reversed;
c28d6105
NC
13834 IoPAGE(filter) = 1; /* Not EOF */
13835
13836 /* Sadly, we have to return a valid pointer, come what may, so we have to
13837 ignore any error return from this. */
13838 SvCUR_set(PL_linestr, 0);
13839 if (FILTER_READ(0, PL_linestr, 0)) {
13840 SvUTF8_on(PL_linestr);
81a923f4 13841 } else {
c28d6105 13842 SvUTF8_on(PL_linestr);
81a923f4 13843 }
c28d6105 13844 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13845 return (U8*)SvPVX(PL_linestr);
13846}
6e3aabd6 13847#endif
9f4817db 13848
f333445c
JP
13849/*
13850Returns a pointer to the next character after the parsed
13851vstring, as well as updating the passed in sv.
13852
13853Function must be called like
13854
561b68a9 13855 sv = newSV(5);
65b06e02 13856 s = scan_vstring(s,e,sv);
f333445c 13857
65b06e02 13858where s and e are the start and end of the string.
f333445c
JP
13859The sv should already be large enough to store the vstring
13860passed in, for performance reasons.
13861
13862*/
13863
13864char *
15f169a1 13865Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13866{
97aff369 13867 dVAR;
bfed75c6
AL
13868 const char *pos = s;
13869 const char *start = s;
7918f24d
NC
13870
13871 PERL_ARGS_ASSERT_SCAN_VSTRING;
13872
f333445c 13873 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13874 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13875 pos++;
f333445c
JP
13876 if ( *pos != '.') {
13877 /* this may not be a v-string if followed by => */
bfed75c6 13878 const char *next = pos;
65b06e02 13879 while (next < e && isSPACE(*next))
8fc7bb1c 13880 ++next;
65b06e02 13881 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13882 /* return string not v-string */
13883 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13884 return (char *)pos;
f333445c
JP
13885 }
13886 }
13887
13888 if (!isALPHA(*pos)) {
89ebb4a3 13889 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13890
d4c19fe8
AL
13891 if (*s == 'v')
13892 s++; /* get past 'v' */
f333445c 13893
76f68e9b 13894 sv_setpvs(sv, "");
f333445c
JP
13895
13896 for (;;) {
d4c19fe8 13897 /* this is atoi() that tolerates underscores */
0bd48802
AL
13898 U8 *tmpend;
13899 UV rev = 0;
d4c19fe8
AL
13900 const char *end = pos;
13901 UV mult = 1;
13902 while (--end >= s) {
13903 if (*end != '_') {
13904 const UV orev = rev;
f333445c
JP
13905 rev += (*end - '0') * mult;
13906 mult *= 10;
9b387841
NC
13907 if (orev > rev)
13908 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13909 "Integer overflow in decimal number");
f333445c
JP
13910 }
13911 }
13912#ifdef EBCDIC
13913 if (rev > 0x7FFFFFFF)
13914 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13915#endif
13916 /* Append native character for the rev point */
13917 tmpend = uvchr_to_utf8(tmpbuf, rev);
13918 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13919 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13920 SvUTF8_on(sv);
65b06e02 13921 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13922 s = ++pos;
13923 else {
13924 s = pos;
13925 break;
13926 }
65b06e02 13927 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13928 pos++;
13929 }
13930 SvPOK_on(sv);
13931 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13932 SvRMAGICAL_on(sv);
13933 }
73d840c0 13934 return (char *)s;
f333445c
JP
13935}
13936
88e1f1a2
JV
13937int
13938Perl_keyword_plugin_standard(pTHX_
13939 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13940{
13941 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13942 PERL_UNUSED_CONTEXT;
13943 PERL_UNUSED_ARG(keyword_ptr);
13944 PERL_UNUSED_ARG(keyword_len);
13945 PERL_UNUSED_ARG(op_ptr);
13946 return KEYWORD_PLUGIN_DECLINE;
13947}
13948
1da4ca5f
NC
13949/*
13950 * Local variables:
13951 * c-indentation-style: bsd
13952 * c-basic-offset: 4
13953 * indent-tabs-mode: t
13954 * End:
13955 *
37442d52
RGS
13956 * ex: set ts=8 sts=4 sw=4 noet:
13957 */