This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
revert epigraphs.pod note in release manager's guide
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
3e5c0189 717 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
719a9bb0
NC
718 /* avoid tie/overload weirdness */
719 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 720 if (s[len-1] != ';')
bdc0bf6f 721 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
722 } else {
723 SvTEMP_off(line);
724 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 725 parser->linestr = line;
8990e307 726 }
f06b5848
DM
727 parser->oldoldbufptr =
728 parser->oldbufptr =
729 parser->bufptr =
730 parser->linestart = SvPVX(parser->linestr);
731 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
732 parser->last_lop = parser->last_uni = NULL;
79072805 733}
a687059c 734
e3abe207
DM
735
736/* delete a parser object */
737
738void
739Perl_parser_free(pTHX_ const yy_parser *parser)
740{
7918f24d
NC
741 PERL_ARGS_ASSERT_PARSER_FREE;
742
7c4baf47 743 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
744 SvREFCNT_dec(parser->linestr);
745
2f9285f8
DM
746 if (parser->rsfp == PerlIO_stdin())
747 PerlIO_clearerr(parser->rsfp);
799361c3
SH
748 else if (parser->rsfp && (!parser->old_parser ||
749 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 750 PerlIO_close(parser->rsfp);
5486870f 751 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 752
e3abe207
DM
753 Safefree(parser->stack);
754 Safefree(parser->lex_brackstack);
755 Safefree(parser->lex_casestack);
756 PL_parser = parser->old_parser;
757 Safefree(parser);
758}
759
760
ffb4593c
NT
761/*
762 * Perl_lex_end
9cbb5ea2
GS
763 * Finalizer for lexing operations. Must be called when the parser is
764 * done with the lexer.
ffb4593c
NT
765 */
766
463ee0b2 767void
864dbfa3 768Perl_lex_end(pTHX)
463ee0b2 769{
97aff369 770 dVAR;
3280af22 771 PL_doextract = FALSE;
463ee0b2
LW
772}
773
ffb4593c 774/*
f0e67a1d
Z
775=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
776
777Buffer scalar containing the chunk currently under consideration of the
778text currently being lexed. This is always a plain string scalar (for
779which C<SvPOK> is true). It is not intended to be used as a scalar by
780normal scalar means; instead refer to the buffer directly by the pointer
781variables described below.
782
783The lexer maintains various C<char*> pointers to things in the
784C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
785reallocated, all of these pointers must be updated. Don't attempt to
786do this manually, but rather use L</lex_grow_linestr> if you need to
787reallocate the buffer.
788
789The content of the text chunk in the buffer is commonly exactly one
790complete line of input, up to and including a newline terminator,
791but there are situations where it is otherwise. The octets of the
792buffer may be intended to be interpreted as either UTF-8 or Latin-1.
793The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
794flag on this scalar, which may disagree with it.
795
796For direct examination of the buffer, the variable
797L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
798lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
799of these pointers is usually preferable to examination of the scalar
800through normal scalar means.
801
802=for apidoc AmxU|char *|PL_parser-E<gt>bufend
803
804Direct pointer to the end of the chunk of text currently being lexed, the
805end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
806+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
807always located at the end of the buffer, and does not count as part of
808the buffer's contents.
809
810=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
811
812Points to the current position of lexing inside the lexer buffer.
813Characters around this point may be freely examined, within
814the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
815L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
816interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
817
818Lexing code (whether in the Perl core or not) moves this pointer past
819the characters that it consumes. It is also expected to perform some
820bookkeeping whenever a newline character is consumed. This movement
821can be more conveniently performed by the function L</lex_read_to>,
822which handles newlines appropriately.
823
824Interpretation of the buffer's octets can be abstracted out by
825using the slightly higher-level functions L</lex_peek_unichar> and
826L</lex_read_unichar>.
827
828=for apidoc AmxU|char *|PL_parser-E<gt>linestart
829
830Points to the start of the current line inside the lexer buffer.
831This is useful for indicating at which column an error occurred, and
832not much else. This must be updated by any lexing code that consumes
833a newline; the function L</lex_read_to> handles this detail.
834
835=cut
836*/
837
838/*
839=for apidoc Amx|bool|lex_bufutf8
840
841Indicates whether the octets in the lexer buffer
842(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
843of Unicode characters. If not, they should be interpreted as Latin-1
844characters. This is analogous to the C<SvUTF8> flag for scalars.
845
846In UTF-8 mode, it is not guaranteed that the lexer buffer actually
847contains valid UTF-8. Lexing code must be robust in the face of invalid
848encoding.
849
850The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
851is significant, but not the whole story regarding the input character
852encoding. Normally, when a file is being read, the scalar contains octets
853and its C<SvUTF8> flag is off, but the octets should be interpreted as
854UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
855however, the scalar may have the C<SvUTF8> flag on, and in this case its
856octets should be interpreted as UTF-8 unless the C<use bytes> pragma
857is in effect. This logic may change in the future; use this function
858instead of implementing the logic yourself.
859
860=cut
861*/
862
863bool
864Perl_lex_bufutf8(pTHX)
865{
866 return UTF;
867}
868
869/*
870=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
871
872Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
873at least I<len> octets (including terminating NUL). Returns a
874pointer to the reallocated buffer. This is necessary before making
875any direct modification of the buffer that would increase its length.
876L</lex_stuff_pvn> provides a more convenient way to insert text into
877the buffer.
878
879Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
880this function updates all of the lexer's variables that point directly
881into the buffer.
882
883=cut
884*/
885
886char *
887Perl_lex_grow_linestr(pTHX_ STRLEN len)
888{
889 SV *linestr;
890 char *buf;
891 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
892 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
893 linestr = PL_parser->linestr;
894 buf = SvPVX(linestr);
895 if (len <= SvLEN(linestr))
896 return buf;
897 bufend_pos = PL_parser->bufend - buf;
898 bufptr_pos = PL_parser->bufptr - buf;
899 oldbufptr_pos = PL_parser->oldbufptr - buf;
900 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
901 linestart_pos = PL_parser->linestart - buf;
902 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
903 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
904 buf = sv_grow(linestr, len);
905 PL_parser->bufend = buf + bufend_pos;
906 PL_parser->bufptr = buf + bufptr_pos;
907 PL_parser->oldbufptr = buf + oldbufptr_pos;
908 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
909 PL_parser->linestart = buf + linestart_pos;
910 if (PL_parser->last_uni)
911 PL_parser->last_uni = buf + last_uni_pos;
912 if (PL_parser->last_lop)
913 PL_parser->last_lop = buf + last_lop_pos;
914 return buf;
915}
916
917/*
83aa740e 918=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
919
920Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
921immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
922reallocating the buffer if necessary. This means that lexing code that
923runs later will see the characters as if they had appeared in the input.
924It is not recommended to do this as part of normal parsing, and most
925uses of this facility run the risk of the inserted characters being
926interpreted in an unintended manner.
927
928The string to be inserted is represented by I<len> octets starting
929at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
930according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
931The characters are recoded for the lexer buffer, according to how the
932buffer is currently being interpreted (L</lex_bufutf8>). If a string
933to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
934function is more convenient.
935
936=cut
937*/
938
939void
83aa740e 940Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 941{
749123ff 942 dVAR;
f0e67a1d
Z
943 char *bufptr;
944 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
945 if (flags & ~(LEX_STUFF_UTF8))
946 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
947 if (UTF) {
948 if (flags & LEX_STUFF_UTF8) {
949 goto plain_copy;
950 } else {
951 STRLEN highhalf = 0;
83aa740e 952 const char *p, *e = pv+len;
f0e67a1d
Z
953 for (p = pv; p != e; p++)
954 highhalf += !!(((U8)*p) & 0x80);
955 if (!highhalf)
956 goto plain_copy;
957 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
958 bufptr = PL_parser->bufptr;
959 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
960 SvCUR_set(PL_parser->linestr,
961 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
962 PL_parser->bufend += len+highhalf;
963 for (p = pv; p != e; p++) {
964 U8 c = (U8)*p;
965 if (c & 0x80) {
966 *bufptr++ = (char)(0xc0 | (c >> 6));
967 *bufptr++ = (char)(0x80 | (c & 0x3f));
968 } else {
969 *bufptr++ = (char)c;
970 }
971 }
972 }
973 } else {
974 if (flags & LEX_STUFF_UTF8) {
975 STRLEN highhalf = 0;
83aa740e 976 const char *p, *e = pv+len;
f0e67a1d
Z
977 for (p = pv; p != e; p++) {
978 U8 c = (U8)*p;
979 if (c >= 0xc4) {
980 Perl_croak(aTHX_ "Lexing code attempted to stuff "
981 "non-Latin-1 character into Latin-1 input");
982 } else if (c >= 0xc2 && p+1 != e &&
983 (((U8)p[1]) & 0xc0) == 0x80) {
984 p++;
985 highhalf++;
986 } else if (c >= 0x80) {
987 /* malformed UTF-8 */
988 ENTER;
989 SAVESPTR(PL_warnhook);
990 PL_warnhook = PERL_WARNHOOK_FATAL;
991 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
992 LEAVE;
993 }
994 }
995 if (!highhalf)
996 goto plain_copy;
997 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
998 bufptr = PL_parser->bufptr;
999 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1000 SvCUR_set(PL_parser->linestr,
1001 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1002 PL_parser->bufend += len-highhalf;
1003 for (p = pv; p != e; p++) {
1004 U8 c = (U8)*p;
1005 if (c & 0x80) {
1006 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1007 p++;
1008 } else {
1009 *bufptr++ = (char)c;
1010 }
1011 }
1012 } else {
1013 plain_copy:
1014 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1015 bufptr = PL_parser->bufptr;
1016 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1017 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1018 PL_parser->bufend += len;
1019 Copy(pv, bufptr, len, char);
1020 }
1021 }
1022}
1023
1024/*
1025=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1026
1027Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1028immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1029reallocating the buffer if necessary. This means that lexing code that
1030runs later will see the characters as if they had appeared in the input.
1031It is not recommended to do this as part of normal parsing, and most
1032uses of this facility run the risk of the inserted characters being
1033interpreted in an unintended manner.
1034
1035The string to be inserted is the string value of I<sv>. The characters
1036are recoded for the lexer buffer, according to how the buffer is currently
1037being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1038not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1039need to construct a scalar.
1040
1041=cut
1042*/
1043
1044void
1045Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1046{
1047 char *pv;
1048 STRLEN len;
1049 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1050 if (flags)
1051 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1052 pv = SvPV(sv, len);
1053 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1054}
1055
1056/*
1057=for apidoc Amx|void|lex_unstuff|char *ptr
1058
1059Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1060I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1061This hides the discarded text from any lexing code that runs later,
1062as if the text had never appeared.
1063
1064This is not the normal way to consume lexed text. For that, use
1065L</lex_read_to>.
1066
1067=cut
1068*/
1069
1070void
1071Perl_lex_unstuff(pTHX_ char *ptr)
1072{
1073 char *buf, *bufend;
1074 STRLEN unstuff_len;
1075 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1076 buf = PL_parser->bufptr;
1077 if (ptr < buf)
1078 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1079 if (ptr == buf)
1080 return;
1081 bufend = PL_parser->bufend;
1082 if (ptr > bufend)
1083 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1084 unstuff_len = ptr - buf;
1085 Move(ptr, buf, bufend+1-ptr, char);
1086 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1087 PL_parser->bufend = bufend - unstuff_len;
1088}
1089
1090/*
1091=for apidoc Amx|void|lex_read_to|char *ptr
1092
1093Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1094to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1095performing the correct bookkeeping whenever a newline character is passed.
1096This is the normal way to consume lexed text.
1097
1098Interpretation of the buffer's octets can be abstracted out by
1099using the slightly higher-level functions L</lex_peek_unichar> and
1100L</lex_read_unichar>.
1101
1102=cut
1103*/
1104
1105void
1106Perl_lex_read_to(pTHX_ char *ptr)
1107{
1108 char *s;
1109 PERL_ARGS_ASSERT_LEX_READ_TO;
1110 s = PL_parser->bufptr;
1111 if (ptr < s || ptr > PL_parser->bufend)
1112 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1113 for (; s != ptr; s++)
1114 if (*s == '\n') {
1115 CopLINE_inc(PL_curcop);
1116 PL_parser->linestart = s+1;
1117 }
1118 PL_parser->bufptr = ptr;
1119}
1120
1121/*
1122=for apidoc Amx|void|lex_discard_to|char *ptr
1123
1124Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1125up to I<ptr>. The remaining content of the buffer will be moved, and
1126all pointers into the buffer updated appropriately. I<ptr> must not
1127be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1128it is not permitted to discard text that has yet to be lexed.
1129
1130Normally it is not necessarily to do this directly, because it suffices to
1131use the implicit discarding behaviour of L</lex_next_chunk> and things
1132based on it. However, if a token stretches across multiple lines,
1f317c95 1133and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1134that purpose, then after completion of the token it would be wise to
1135explicitly discard the now-unneeded earlier lines, to avoid future
1136multi-line tokens growing the buffer without bound.
1137
1138=cut
1139*/
1140
1141void
1142Perl_lex_discard_to(pTHX_ char *ptr)
1143{
1144 char *buf;
1145 STRLEN discard_len;
1146 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1147 buf = SvPVX(PL_parser->linestr);
1148 if (ptr < buf)
1149 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1150 if (ptr == buf)
1151 return;
1152 if (ptr > PL_parser->bufptr)
1153 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1154 discard_len = ptr - buf;
1155 if (PL_parser->oldbufptr < ptr)
1156 PL_parser->oldbufptr = ptr;
1157 if (PL_parser->oldoldbufptr < ptr)
1158 PL_parser->oldoldbufptr = ptr;
1159 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1160 PL_parser->last_uni = NULL;
1161 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1162 PL_parser->last_lop = NULL;
1163 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1164 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1165 PL_parser->bufend -= discard_len;
1166 PL_parser->bufptr -= discard_len;
1167 PL_parser->oldbufptr -= discard_len;
1168 PL_parser->oldoldbufptr -= discard_len;
1169 if (PL_parser->last_uni)
1170 PL_parser->last_uni -= discard_len;
1171 if (PL_parser->last_lop)
1172 PL_parser->last_lop -= discard_len;
1173}
1174
1175/*
1176=for apidoc Amx|bool|lex_next_chunk|U32 flags
1177
1178Reads in the next chunk of text to be lexed, appending it to
1179L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1180looked to the end of the current chunk and wants to know more. It is
1181usual, but not necessary, for lexing to have consumed the entirety of
1182the current chunk at this time.
1183
1184If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1185chunk (i.e., the current chunk has been entirely consumed), normally the
1186current chunk will be discarded at the same time that the new chunk is
1187read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1188will not be discarded. If the current chunk has not been entirely
1189consumed, then it will not be discarded regardless of the flag.
1190
1191Returns true if some new text was added to the buffer, or false if the
1192buffer has reached the end of the input text.
1193
1194=cut
1195*/
1196
1197#define LEX_FAKE_EOF 0x80000000
1198
1199bool
1200Perl_lex_next_chunk(pTHX_ U32 flags)
1201{
1202 SV *linestr;
1203 char *buf;
1204 STRLEN old_bufend_pos, new_bufend_pos;
1205 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1206 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1207 bool got_some_for_debugger = 0;
f0e67a1d
Z
1208 bool got_some;
1209 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1211 linestr = PL_parser->linestr;
1212 buf = SvPVX(linestr);
1213 if (!(flags & LEX_KEEP_PREVIOUS) &&
1214 PL_parser->bufptr == PL_parser->bufend) {
1215 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1216 linestart_pos = 0;
1217 if (PL_parser->last_uni != PL_parser->bufend)
1218 PL_parser->last_uni = NULL;
1219 if (PL_parser->last_lop != PL_parser->bufend)
1220 PL_parser->last_lop = NULL;
1221 last_uni_pos = last_lop_pos = 0;
1222 *buf = 0;
1223 SvCUR(linestr) = 0;
1224 } else {
1225 old_bufend_pos = PL_parser->bufend - buf;
1226 bufptr_pos = PL_parser->bufptr - buf;
1227 oldbufptr_pos = PL_parser->oldbufptr - buf;
1228 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1229 linestart_pos = PL_parser->linestart - buf;
1230 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1231 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1232 }
1233 if (flags & LEX_FAKE_EOF) {
1234 goto eof;
1235 } else if (!PL_parser->rsfp) {
1236 got_some = 0;
1237 } else if (filter_gets(linestr, old_bufend_pos)) {
1238 got_some = 1;
17cc9359 1239 got_some_for_debugger = 1;
f0e67a1d 1240 } else {
580561a3
Z
1241 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1242 sv_setpvs(linestr, "");
f0e67a1d
Z
1243 eof:
1244 /* End of real input. Close filehandle (unless it was STDIN),
1245 * then add implicit termination.
1246 */
1247 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1248 PerlIO_clearerr(PL_parser->rsfp);
1249 else if (PL_parser->rsfp)
1250 (void)PerlIO_close(PL_parser->rsfp);
1251 PL_parser->rsfp = NULL;
1252 PL_doextract = FALSE;
1253#ifdef PERL_MAD
1254 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1255 PL_faketokens = 1;
1256#endif
1257 if (!PL_in_eval && PL_minus_p) {
1258 sv_catpvs(linestr,
1259 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1260 PL_minus_n = PL_minus_p = 0;
1261 } else if (!PL_in_eval && PL_minus_n) {
1262 sv_catpvs(linestr, /*{*/";}");
1263 PL_minus_n = 0;
1264 } else
1265 sv_catpvs(linestr, ";");
1266 got_some = 1;
1267 }
1268 buf = SvPVX(linestr);
1269 new_bufend_pos = SvCUR(linestr);
1270 PL_parser->bufend = buf + new_bufend_pos;
1271 PL_parser->bufptr = buf + bufptr_pos;
1272 PL_parser->oldbufptr = buf + oldbufptr_pos;
1273 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1274 PL_parser->linestart = buf + linestart_pos;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni = buf + last_uni_pos;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1279 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1280 PL_curstash != PL_debstash) {
1281 /* debugger active and we're not compiling the debugger code,
1282 * so store the line into the debugger's array of lines
1283 */
1284 update_debugger_info(NULL, buf+old_bufend_pos,
1285 new_bufend_pos-old_bufend_pos);
1286 }
1287 return got_some;
1288}
1289
1290/*
1291=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1292
1293Looks ahead one (Unicode) character in the text currently being lexed.
1294Returns the codepoint (unsigned integer value) of the next character,
1295or -1 if lexing has reached the end of the input text. To consume the
1296peeked character, use L</lex_read_unichar>.
1297
1298If the next character is in (or extends into) the next chunk of input
1299text, the next chunk will be read in. Normally the current chunk will be
1300discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1301then the current chunk will not be discarded.
1302
1303If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1304is encountered, an exception is generated.
1305
1306=cut
1307*/
1308
1309I32
1310Perl_lex_peek_unichar(pTHX_ U32 flags)
1311{
749123ff 1312 dVAR;
f0e67a1d
Z
1313 char *s, *bufend;
1314 if (flags & ~(LEX_KEEP_PREVIOUS))
1315 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1316 s = PL_parser->bufptr;
1317 bufend = PL_parser->bufend;
1318 if (UTF) {
1319 U8 head;
1320 I32 unichar;
1321 STRLEN len, retlen;
1322 if (s == bufend) {
1323 if (!lex_next_chunk(flags))
1324 return -1;
1325 s = PL_parser->bufptr;
1326 bufend = PL_parser->bufend;
1327 }
1328 head = (U8)*s;
1329 if (!(head & 0x80))
1330 return head;
1331 if (head & 0x40) {
1332 len = PL_utf8skip[head];
1333 while ((STRLEN)(bufend-s) < len) {
1334 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1335 break;
1336 s = PL_parser->bufptr;
1337 bufend = PL_parser->bufend;
1338 }
1339 }
1340 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1341 if (retlen == (STRLEN)-1) {
1342 /* malformed UTF-8 */
1343 ENTER;
1344 SAVESPTR(PL_warnhook);
1345 PL_warnhook = PERL_WARNHOOK_FATAL;
1346 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1347 LEAVE;
1348 }
1349 return unichar;
1350 } else {
1351 if (s == bufend) {
1352 if (!lex_next_chunk(flags))
1353 return -1;
1354 s = PL_parser->bufptr;
1355 }
1356 return (U8)*s;
1357 }
1358}
1359
1360/*
1361=for apidoc Amx|I32|lex_read_unichar|U32 flags
1362
1363Reads the next (Unicode) character in the text currently being lexed.
1364Returns the codepoint (unsigned integer value) of the character read,
1365and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1366if lexing has reached the end of the input text. To non-destructively
1367examine the next character, use L</lex_peek_unichar> instead.
1368
1369If the next character is in (or extends into) the next chunk of input
1370text, the next chunk will be read in. Normally the current chunk will be
1371discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1372then the current chunk will not be discarded.
1373
1374If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1375is encountered, an exception is generated.
1376
1377=cut
1378*/
1379
1380I32
1381Perl_lex_read_unichar(pTHX_ U32 flags)
1382{
1383 I32 c;
1384 if (flags & ~(LEX_KEEP_PREVIOUS))
1385 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1386 c = lex_peek_unichar(flags);
1387 if (c != -1) {
1388 if (c == '\n')
1389 CopLINE_inc(PL_curcop);
1390 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1391 }
1392 return c;
1393}
1394
1395/*
1396=for apidoc Amx|void|lex_read_space|U32 flags
1397
1398Reads optional spaces, in Perl style, in the text currently being
1399lexed. The spaces may include ordinary whitespace characters and
1400Perl-style comments. C<#line> directives are processed if encountered.
1401L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1402at a non-space character (or the end of the input text).
1403
1404If spaces extend into the next chunk of input text, the next chunk will
1405be read in. Normally the current chunk will be discarded at the same
1406time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1407chunk will not be discarded.
1408
1409=cut
1410*/
1411
f0998909
Z
1412#define LEX_NO_NEXT_CHUNK 0x80000000
1413
f0e67a1d
Z
1414void
1415Perl_lex_read_space(pTHX_ U32 flags)
1416{
1417 char *s, *bufend;
1418 bool need_incline = 0;
f0998909 1419 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1420 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1421#ifdef PERL_MAD
1422 if (PL_skipwhite) {
1423 sv_free(PL_skipwhite);
1424 PL_skipwhite = NULL;
1425 }
1426 if (PL_madskills)
1427 PL_skipwhite = newSVpvs("");
1428#endif /* PERL_MAD */
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1431 while (1) {
1432 char c = *s;
1433 if (c == '#') {
1434 do {
1435 c = *++s;
1436 } while (!(c == '\n' || (c == 0 && s == bufend)));
1437 } else if (c == '\n') {
1438 s++;
1439 PL_parser->linestart = s;
1440 if (s == bufend)
1441 need_incline = 1;
1442 else
1443 incline(s);
1444 } else if (isSPACE(c)) {
1445 s++;
1446 } else if (c == 0 && s == bufend) {
1447 bool got_more;
1448#ifdef PERL_MAD
1449 if (PL_madskills)
1450 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1451#endif /* PERL_MAD */
f0998909
Z
1452 if (flags & LEX_NO_NEXT_CHUNK)
1453 break;
f0e67a1d
Z
1454 PL_parser->bufptr = s;
1455 CopLINE_inc(PL_curcop);
1456 got_more = lex_next_chunk(flags);
1457 CopLINE_dec(PL_curcop);
1458 s = PL_parser->bufptr;
1459 bufend = PL_parser->bufend;
1460 if (!got_more)
1461 break;
1462 if (need_incline && PL_parser->rsfp) {
1463 incline(s);
1464 need_incline = 0;
1465 }
1466 } else {
1467 break;
1468 }
1469 }
1470#ifdef PERL_MAD
1471 if (PL_madskills)
1472 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473#endif /* PERL_MAD */
1474 PL_parser->bufptr = s;
1475}
1476
1477/*
ffb4593c
NT
1478 * S_incline
1479 * This subroutine has nothing to do with tilting, whether at windmills
1480 * or pinball tables. Its name is short for "increment line". It
57843af0 1481 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1482 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1483 * # line 500 "foo.pm"
1484 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1485 */
1486
76e3520e 1487STATIC void
d9095cec 1488S_incline(pTHX_ const char *s)
463ee0b2 1489{
97aff369 1490 dVAR;
d9095cec
NC
1491 const char *t;
1492 const char *n;
1493 const char *e;
463ee0b2 1494
7918f24d
NC
1495 PERL_ARGS_ASSERT_INCLINE;
1496
57843af0 1497 CopLINE_inc(PL_curcop);
463ee0b2
LW
1498 if (*s++ != '#')
1499 return;
d4c19fe8
AL
1500 while (SPACE_OR_TAB(*s))
1501 s++;
73659bf1
GS
1502 if (strnEQ(s, "line", 4))
1503 s += 4;
1504 else
1505 return;
084592ab 1506 if (SPACE_OR_TAB(*s))
73659bf1 1507 s++;
4e553d73 1508 else
73659bf1 1509 return;
d4c19fe8
AL
1510 while (SPACE_OR_TAB(*s))
1511 s++;
463ee0b2
LW
1512 if (!isDIGIT(*s))
1513 return;
d4c19fe8 1514
463ee0b2
LW
1515 n = s;
1516 while (isDIGIT(*s))
1517 s++;
07714eb4 1518 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1519 return;
bf4acbe4 1520 while (SPACE_OR_TAB(*s))
463ee0b2 1521 s++;
73659bf1 1522 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1523 s++;
73659bf1
GS
1524 e = t + 1;
1525 }
463ee0b2 1526 else {
c35e046a
AL
1527 t = s;
1528 while (!isSPACE(*t))
1529 t++;
73659bf1 1530 e = t;
463ee0b2 1531 }
bf4acbe4 1532 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1533 e++;
1534 if (*e != '\n' && *e != '\0')
1535 return; /* false alarm */
1536
f4dd75d9 1537 if (t - s > 0) {
d9095cec 1538 const STRLEN len = t - s;
8a5ee598 1539#ifndef USE_ITHREADS
19bad673
NC
1540 SV *const temp_sv = CopFILESV(PL_curcop);
1541 const char *cf;
1542 STRLEN tmplen;
1543
1544 if (temp_sv) {
1545 cf = SvPVX(temp_sv);
1546 tmplen = SvCUR(temp_sv);
1547 } else {
1548 cf = NULL;
1549 tmplen = 0;
1550 }
1551
42d9b98d 1552 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1553 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1554 * to *{"::_<newfilename"} */
44867030
NC
1555 /* However, the long form of evals is only turned on by the
1556 debugger - usually they're "(eval %lu)" */
1557 char smallbuf[128];
1558 char *tmpbuf;
1559 GV **gvp;
d9095cec 1560 STRLEN tmplen2 = len;
798b63bc 1561 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1562 tmpbuf = smallbuf;
1563 else
2ae0db35 1564 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1565 tmpbuf[0] = '_';
1566 tmpbuf[1] = '<';
2ae0db35 1567 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1568 tmplen += 2;
8a5ee598
RGS
1569 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1570 if (gvp) {
44867030
NC
1571 char *tmpbuf2;
1572 GV *gv2;
1573
1574 if (tmplen2 + 2 <= sizeof smallbuf)
1575 tmpbuf2 = smallbuf;
1576 else
1577 Newx(tmpbuf2, tmplen2 + 2, char);
1578
1579 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1580 /* Either they malloc'd it, or we malloc'd it,
1581 so no prefix is present in ours. */
1582 tmpbuf2[0] = '_';
1583 tmpbuf2[1] = '<';
1584 }
1585
1586 memcpy(tmpbuf2 + 2, s, tmplen2);
1587 tmplen2 += 2;
1588
8a5ee598 1589 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1590 if (!isGV(gv2)) {
8a5ee598 1591 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1592 /* adjust ${"::_<newfilename"} to store the new file name */
1593 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1594 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1595 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1596 }
44867030
NC
1597
1598 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1599 }
e66cf94c 1600 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1601 }
8a5ee598 1602#endif
05ec9bb3 1603 CopFILE_free(PL_curcop);
d9095cec 1604 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1605 }
57843af0 1606 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1607}
1608
29595ff2 1609#ifdef PERL_MAD
cd81e915 1610/* skip space before PL_thistoken */
29595ff2
NC
1611
1612STATIC char *
1613S_skipspace0(pTHX_ register char *s)
1614{
7918f24d
NC
1615 PERL_ARGS_ASSERT_SKIPSPACE0;
1616
29595ff2
NC
1617 s = skipspace(s);
1618 if (!PL_madskills)
1619 return s;
cd81e915
NC
1620 if (PL_skipwhite) {
1621 if (!PL_thiswhite)
6b29d1f5 1622 PL_thiswhite = newSVpvs("");
cd81e915
NC
1623 sv_catsv(PL_thiswhite, PL_skipwhite);
1624 sv_free(PL_skipwhite);
1625 PL_skipwhite = 0;
1626 }
1627 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1628 return s;
1629}
1630
cd81e915 1631/* skip space after PL_thistoken */
29595ff2
NC
1632
1633STATIC char *
1634S_skipspace1(pTHX_ register char *s)
1635{
d4c19fe8 1636 const char *start = s;
29595ff2
NC
1637 I32 startoff = start - SvPVX(PL_linestr);
1638
7918f24d
NC
1639 PERL_ARGS_ASSERT_SKIPSPACE1;
1640
29595ff2
NC
1641 s = skipspace(s);
1642 if (!PL_madskills)
1643 return s;
1644 start = SvPVX(PL_linestr) + startoff;
cd81e915 1645 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1646 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1647 PL_thistoken = newSVpvn(tstart, start - tstart);
1648 }
1649 PL_realtokenstart = -1;
1650 if (PL_skipwhite) {
1651 if (!PL_nextwhite)
6b29d1f5 1652 PL_nextwhite = newSVpvs("");
cd81e915
NC
1653 sv_catsv(PL_nextwhite, PL_skipwhite);
1654 sv_free(PL_skipwhite);
1655 PL_skipwhite = 0;
29595ff2
NC
1656 }
1657 return s;
1658}
1659
1660STATIC char *
1661S_skipspace2(pTHX_ register char *s, SV **svp)
1662{
c35e046a
AL
1663 char *start;
1664 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1665 const I32 startoff = s - SvPVX(PL_linestr);
1666
7918f24d
NC
1667 PERL_ARGS_ASSERT_SKIPSPACE2;
1668
29595ff2
NC
1669 s = skipspace(s);
1670 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1671 if (!PL_madskills || !svp)
1672 return s;
1673 start = SvPVX(PL_linestr) + startoff;
cd81e915 1674 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1675 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1676 PL_thistoken = newSVpvn(tstart, start - tstart);
1677 PL_realtokenstart = -1;
29595ff2 1678 }
cd81e915 1679 if (PL_skipwhite) {
29595ff2 1680 if (!*svp)
6b29d1f5 1681 *svp = newSVpvs("");
cd81e915
NC
1682 sv_setsv(*svp, PL_skipwhite);
1683 sv_free(PL_skipwhite);
1684 PL_skipwhite = 0;
29595ff2
NC
1685 }
1686
1687 return s;
1688}
1689#endif
1690
80a702cd 1691STATIC void
15f169a1 1692S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1693{
1694 AV *av = CopFILEAVx(PL_curcop);
1695 if (av) {
b9f83d2f 1696 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1697 if (orig_sv)
1698 sv_setsv(sv, orig_sv);
1699 else
1700 sv_setpvn(sv, buf, len);
80a702cd
RGS
1701 (void)SvIOK_on(sv);
1702 SvIV_set(sv, 0);
1703 av_store(av, (I32)CopLINE(PL_curcop), sv);
1704 }
1705}
1706
ffb4593c
NT
1707/*
1708 * S_skipspace
1709 * Called to gobble the appropriate amount and type of whitespace.
1710 * Skips comments as well.
1711 */
1712
76e3520e 1713STATIC char *
cea2e8a9 1714S_skipspace(pTHX_ register char *s)
a687059c 1715{
5db06880 1716#ifdef PERL_MAD
f0e67a1d
Z
1717 char *start = s;
1718#endif /* PERL_MAD */
7918f24d 1719 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1720#ifdef PERL_MAD
cd81e915
NC
1721 if (PL_skipwhite) {
1722 sv_free(PL_skipwhite);
f0e67a1d 1723 PL_skipwhite = NULL;
5db06880 1724 }
f0e67a1d 1725#endif /* PERL_MAD */
3280af22 1726 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1727 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1728 s++;
f0e67a1d
Z
1729 } else {
1730 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1731 PL_bufptr = s;
f0998909
Z
1732 lex_read_space(LEX_KEEP_PREVIOUS |
1733 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1734 LEX_NO_NEXT_CHUNK : 0));
3280af22 1735 s = PL_bufptr;
f0e67a1d
Z
1736 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1737 if (PL_linestart > PL_bufptr)
1738 PL_bufptr = PL_linestart;
1739 return s;
463ee0b2 1740 }
5db06880 1741#ifdef PERL_MAD
f0e67a1d
Z
1742 if (PL_madskills)
1743 PL_skipwhite = newSVpvn(start, s-start);
1744#endif /* PERL_MAD */
5db06880 1745 return s;
a687059c 1746}
378cc40b 1747
ffb4593c
NT
1748/*
1749 * S_check_uni
1750 * Check the unary operators to ensure there's no ambiguity in how they're
1751 * used. An ambiguous piece of code would be:
1752 * rand + 5
1753 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1754 * the +5 is its argument.
1755 */
1756
76e3520e 1757STATIC void
cea2e8a9 1758S_check_uni(pTHX)
ba106d47 1759{
97aff369 1760 dVAR;
d4c19fe8
AL
1761 const char *s;
1762 const char *t;
2f3197b3 1763
3280af22 1764 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1765 return;
3280af22
NIS
1766 while (isSPACE(*PL_last_uni))
1767 PL_last_uni++;
c35e046a
AL
1768 s = PL_last_uni;
1769 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1770 s++;
3280af22 1771 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1772 return;
6136c704 1773
9b387841
NC
1774 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1775 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1776 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1777}
1778
ffb4593c
NT
1779/*
1780 * LOP : macro to build a list operator. Its behaviour has been replaced
1781 * with a subroutine, S_lop() for which LOP is just another name.
1782 */
1783
a0d0e21e
LW
1784#define LOP(f,x) return lop(f,x,s)
1785
ffb4593c
NT
1786/*
1787 * S_lop
1788 * Build a list operator (or something that might be one). The rules:
1789 * - if we have a next token, then it's a list operator [why?]
1790 * - if the next thing is an opening paren, then it's a function
1791 * - else it's a list operator
1792 */
1793
76e3520e 1794STATIC I32
a0be28da 1795S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1796{
97aff369 1797 dVAR;
7918f24d
NC
1798
1799 PERL_ARGS_ASSERT_LOP;
1800
6154021b 1801 pl_yylval.ival = f;
35c8bce7 1802 CLINE;
3280af22
NIS
1803 PL_expect = x;
1804 PL_bufptr = s;
1805 PL_last_lop = PL_oldbufptr;
eb160463 1806 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1807#ifdef PERL_MAD
1808 if (PL_lasttoke)
1809 return REPORT(LSTOP);
1810#else
3280af22 1811 if (PL_nexttoke)
bbf60fe6 1812 return REPORT(LSTOP);
5db06880 1813#endif
79072805 1814 if (*s == '(')
bbf60fe6 1815 return REPORT(FUNC);
29595ff2 1816 s = PEEKSPACE(s);
79072805 1817 if (*s == '(')
bbf60fe6 1818 return REPORT(FUNC);
79072805 1819 else
bbf60fe6 1820 return REPORT(LSTOP);
79072805
LW
1821}
1822
5db06880
NC
1823#ifdef PERL_MAD
1824 /*
1825 * S_start_force
1826 * Sets up for an eventual force_next(). start_force(0) basically does
1827 * an unshift, while start_force(-1) does a push. yylex removes items
1828 * on the "pop" end.
1829 */
1830
1831STATIC void
1832S_start_force(pTHX_ int where)
1833{
1834 int i;
1835
cd81e915 1836 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1837 where = PL_lasttoke;
cd81e915
NC
1838 assert(PL_curforce < 0 || PL_curforce == where);
1839 if (PL_curforce != where) {
5db06880
NC
1840 for (i = PL_lasttoke; i > where; --i) {
1841 PL_nexttoke[i] = PL_nexttoke[i-1];
1842 }
1843 PL_lasttoke++;
1844 }
cd81e915 1845 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1846 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1847 PL_curforce = where;
1848 if (PL_nextwhite) {
5db06880 1849 if (PL_madskills)
6b29d1f5 1850 curmad('^', newSVpvs(""));
cd81e915 1851 CURMAD('_', PL_nextwhite);
5db06880
NC
1852 }
1853}
1854
1855STATIC void
1856S_curmad(pTHX_ char slot, SV *sv)
1857{
1858 MADPROP **where;
1859
1860 if (!sv)
1861 return;
cd81e915
NC
1862 if (PL_curforce < 0)
1863 where = &PL_thismad;
5db06880 1864 else
cd81e915 1865 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1866
cd81e915 1867 if (PL_faketokens)
76f68e9b 1868 sv_setpvs(sv, "");
5db06880
NC
1869 else {
1870 if (!IN_BYTES) {
1871 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1872 SvUTF8_on(sv);
1873 else if (PL_encoding) {
1874 sv_recode_to_utf8(sv, PL_encoding);
1875 }
1876 }
1877 }
1878
1879 /* keep a slot open for the head of the list? */
1880 if (slot != '_' && *where && (*where)->mad_key == '^') {
1881 (*where)->mad_key = slot;
daba3364 1882 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1883 (*where)->mad_val = (void*)sv;
1884 }
1885 else
1886 addmad(newMADsv(slot, sv), where, 0);
1887}
1888#else
b3f24c00
MHM
1889# define start_force(where) NOOP
1890# define curmad(slot, sv) NOOP
5db06880
NC
1891#endif
1892
ffb4593c
NT
1893/*
1894 * S_force_next
9cbb5ea2 1895 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1896 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1897 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1898 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1899 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1900 */
1901
4e553d73 1902STATIC void
cea2e8a9 1903S_force_next(pTHX_ I32 type)
79072805 1904{
97aff369 1905 dVAR;
704d4215
GG
1906#ifdef DEBUGGING
1907 if (DEBUG_T_TEST) {
1908 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1909 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1910 }
1911#endif
5db06880 1912#ifdef PERL_MAD
cd81e915 1913 if (PL_curforce < 0)
5db06880 1914 start_force(PL_lasttoke);
cd81e915 1915 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1916 if (PL_lex_state != LEX_KNOWNEXT)
1917 PL_lex_defer = PL_lex_state;
1918 PL_lex_state = LEX_KNOWNEXT;
1919 PL_lex_expect = PL_expect;
cd81e915 1920 PL_curforce = -1;
5db06880 1921#else
3280af22
NIS
1922 PL_nexttype[PL_nexttoke] = type;
1923 PL_nexttoke++;
1924 if (PL_lex_state != LEX_KNOWNEXT) {
1925 PL_lex_defer = PL_lex_state;
1926 PL_lex_expect = PL_expect;
1927 PL_lex_state = LEX_KNOWNEXT;
79072805 1928 }
5db06880 1929#endif
79072805
LW
1930}
1931
d0a148a6 1932STATIC SV *
15f169a1 1933S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1934{
97aff369 1935 dVAR;
740cce10 1936 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1937 !IN_BYTES
1938 && UTF
1939 && !is_ascii_string((const U8*)start, len)
740cce10 1940 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1941 return sv;
1942}
1943
ffb4593c
NT
1944/*
1945 * S_force_word
1946 * When the lexer knows the next thing is a word (for instance, it has
1947 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1948 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1949 * lookahead.
ffb4593c
NT
1950 *
1951 * Arguments:
b1b65b59 1952 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1953 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1954 * int check_keyword : if true, Perl checks to make sure the word isn't
1955 * a keyword (do this if the word is a label, e.g. goto FOO)
1956 * int allow_pack : if true, : characters will also be allowed (require,
1957 * use, etc. do this)
9cbb5ea2 1958 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1959 */
1960
76e3520e 1961STATIC char *
cea2e8a9 1962S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1963{
97aff369 1964 dVAR;
463ee0b2
LW
1965 register char *s;
1966 STRLEN len;
4e553d73 1967
7918f24d
NC
1968 PERL_ARGS_ASSERT_FORCE_WORD;
1969
29595ff2 1970 start = SKIPSPACE1(start);
463ee0b2 1971 s = start;
7e2040f0 1972 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1973 (allow_pack && *s == ':') ||
15f0808c 1974 (allow_initial_tick && *s == '\'') )
a0d0e21e 1975 {
3280af22 1976 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1977 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1978 return start;
cd81e915 1979 start_force(PL_curforce);
5db06880
NC
1980 if (PL_madskills)
1981 curmad('X', newSVpvn(start,s-start));
463ee0b2 1982 if (token == METHOD) {
29595ff2 1983 s = SKIPSPACE1(s);
463ee0b2 1984 if (*s == '(')
3280af22 1985 PL_expect = XTERM;
463ee0b2 1986 else {
3280af22 1987 PL_expect = XOPERATOR;
463ee0b2 1988 }
79072805 1989 }
e74e6b3d 1990 if (PL_madskills)
63575281 1991 curmad('g', newSVpvs( "forced" ));
9ded7720 1992 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1993 = (OP*)newSVOP(OP_CONST,0,
1994 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1995 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1996 force_next(token);
1997 }
1998 return s;
1999}
2000
ffb4593c
NT
2001/*
2002 * S_force_ident
9cbb5ea2 2003 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2004 * text only contains the "foo" portion. The first argument is a pointer
2005 * to the "foo", and the second argument is the type symbol to prefix.
2006 * Forces the next token to be a "WORD".
9cbb5ea2 2007 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2008 */
2009
76e3520e 2010STATIC void
bfed75c6 2011S_force_ident(pTHX_ register const char *s, int kind)
79072805 2012{
97aff369 2013 dVAR;
7918f24d
NC
2014
2015 PERL_ARGS_ASSERT_FORCE_IDENT;
2016
c35e046a 2017 if (*s) {
90e5519e
NC
2018 const STRLEN len = strlen(s);
2019 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2020 start_force(PL_curforce);
9ded7720 2021 NEXTVAL_NEXTTOKE.opval = o;
79072805 2022 force_next(WORD);
748a9306 2023 if (kind) {
11343788 2024 o->op_private = OPpCONST_ENTERED;
55497cff 2025 /* XXX see note in pp_entereval() for why we forgo typo
2026 warnings if the symbol must be introduced in an eval.
2027 GSAR 96-10-12 */
90e5519e
NC
2028 gv_fetchpvn_flags(s, len,
2029 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2030 : GV_ADD,
2031 kind == '$' ? SVt_PV :
2032 kind == '@' ? SVt_PVAV :
2033 kind == '%' ? SVt_PVHV :
a0d0e21e 2034 SVt_PVGV
90e5519e 2035 );
748a9306 2036 }
79072805
LW
2037 }
2038}
2039
1571675a
GS
2040NV
2041Perl_str_to_version(pTHX_ SV *sv)
2042{
2043 NV retval = 0.0;
2044 NV nshift = 1.0;
2045 STRLEN len;
cfd0369c 2046 const char *start = SvPV_const(sv,len);
9d4ba2ae 2047 const char * const end = start + len;
504618e9 2048 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2049
2050 PERL_ARGS_ASSERT_STR_TO_VERSION;
2051
1571675a 2052 while (start < end) {
ba210ebe 2053 STRLEN skip;
1571675a
GS
2054 UV n;
2055 if (utf)
9041c2e3 2056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2057 else {
2058 n = *(U8*)start;
2059 skip = 1;
2060 }
2061 retval += ((NV)n)/nshift;
2062 start += skip;
2063 nshift *= 1000;
2064 }
2065 return retval;
2066}
2067
4e553d73 2068/*
ffb4593c
NT
2069 * S_force_version
2070 * Forces the next token to be a version number.
e759cc13
RGS
2071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2072 * and if "guessing" is TRUE, then no new token is created (and the caller
2073 * must use an alternative parsing method).
ffb4593c
NT
2074 */
2075
76e3520e 2076STATIC char *
e759cc13 2077S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2078{
97aff369 2079 dVAR;
5f66b61c 2080 OP *version = NULL;
44dcb63b 2081 char *d;
5db06880
NC
2082#ifdef PERL_MAD
2083 I32 startoff = s - SvPVX(PL_linestr);
2084#endif
89bfa8cd 2085
7918f24d
NC
2086 PERL_ARGS_ASSERT_FORCE_VERSION;
2087
29595ff2 2088 s = SKIPSPACE1(s);
89bfa8cd 2089
44dcb63b 2090 d = s;
dd629d5b 2091 if (*d == 'v')
44dcb63b 2092 d++;
44dcb63b 2093 if (isDIGIT(*d)) {
e759cc13
RGS
2094 while (isDIGIT(*d) || *d == '_' || *d == '.')
2095 d++;
5db06880
NC
2096#ifdef PERL_MAD
2097 if (PL_madskills) {
cd81e915 2098 start_force(PL_curforce);
5db06880
NC
2099 curmad('X', newSVpvn(s,d-s));
2100 }
2101#endif
4e4da3ac 2102 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2103 SV *ver;
8d08d9ba
DG
2104#ifdef USE_LOCALE_NUMERIC
2105 char *loc = setlocale(LC_NUMERIC, "C");
2106#endif
6154021b 2107 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2108#ifdef USE_LOCALE_NUMERIC
2109 setlocale(LC_NUMERIC, loc);
2110#endif
6154021b 2111 version = pl_yylval.opval;
dd629d5b
GS
2112 ver = cSVOPx(version)->op_sv;
2113 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2114 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2115 SvNV_set(ver, str_to_version(ver));
1571675a 2116 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2117 }
89bfa8cd 2118 }
5db06880
NC
2119 else if (guessing) {
2120#ifdef PERL_MAD
2121 if (PL_madskills) {
cd81e915
NC
2122 sv_free(PL_nextwhite); /* let next token collect whitespace */
2123 PL_nextwhite = 0;
5db06880
NC
2124 s = SvPVX(PL_linestr) + startoff;
2125 }
2126#endif
e759cc13 2127 return s;
5db06880 2128 }
89bfa8cd 2129 }
2130
5db06880
NC
2131#ifdef PERL_MAD
2132 if (PL_madskills && !version) {
cd81e915
NC
2133 sv_free(PL_nextwhite); /* let next token collect whitespace */
2134 PL_nextwhite = 0;
5db06880
NC
2135 s = SvPVX(PL_linestr) + startoff;
2136 }
2137#endif
89bfa8cd 2138 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2139 start_force(PL_curforce);
9ded7720 2140 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2141 force_next(WORD);
89bfa8cd 2142
e759cc13 2143 return s;
89bfa8cd 2144}
2145
ffb4593c 2146/*
91152fc1
DG
2147 * S_force_strict_version
2148 * Forces the next token to be a version number using strict syntax rules.
2149 */
2150
2151STATIC char *
2152S_force_strict_version(pTHX_ char *s)
2153{
2154 dVAR;
2155 OP *version = NULL;
2156#ifdef PERL_MAD
2157 I32 startoff = s - SvPVX(PL_linestr);
2158#endif
2159 const char *errstr = NULL;
2160
2161 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2162
2163 while (isSPACE(*s)) /* leading whitespace */
2164 s++;
2165
2166 if (is_STRICT_VERSION(s,&errstr)) {
2167 SV *ver = newSV(0);
2168 s = (char *)scan_version(s, ver, 0);
2169 version = newSVOP(OP_CONST, 0, ver);
2170 }
4e4da3ac
Z
2171 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2172 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2173 {
91152fc1
DG
2174 PL_bufptr = s;
2175 if (errstr)
2176 yyerror(errstr); /* version required */
2177 return s;
2178 }
2179
2180#ifdef PERL_MAD
2181 if (PL_madskills && !version) {
2182 sv_free(PL_nextwhite); /* let next token collect whitespace */
2183 PL_nextwhite = 0;
2184 s = SvPVX(PL_linestr) + startoff;
2185 }
2186#endif
2187 /* NOTE: The parser sees the package name and the VERSION swapped */
2188 start_force(PL_curforce);
2189 NEXTVAL_NEXTTOKE.opval = version;
2190 force_next(WORD);
2191
2192 return s;
2193}
2194
2195/*
ffb4593c
NT
2196 * S_tokeq
2197 * Tokenize a quoted string passed in as an SV. It finds the next
2198 * chunk, up to end of string or a backslash. It may make a new
2199 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2200 * turns \\ into \.
2201 */
2202
76e3520e 2203STATIC SV *
cea2e8a9 2204S_tokeq(pTHX_ SV *sv)
79072805 2205{
97aff369 2206 dVAR;
79072805
LW
2207 register char *s;
2208 register char *send;
2209 register char *d;
b3ac6de7
IZ
2210 STRLEN len = 0;
2211 SV *pv = sv;
79072805 2212
7918f24d
NC
2213 PERL_ARGS_ASSERT_TOKEQ;
2214
79072805 2215 if (!SvLEN(sv))
b3ac6de7 2216 goto finish;
79072805 2217
a0d0e21e 2218 s = SvPV_force(sv, len);
21a311ee 2219 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2220 goto finish;
463ee0b2 2221 send = s + len;
79072805
LW
2222 while (s < send && *s != '\\')
2223 s++;
2224 if (s == send)
b3ac6de7 2225 goto finish;
79072805 2226 d = s;
be4731d2 2227 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2228 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2229 }
79072805
LW
2230 while (s < send) {
2231 if (*s == '\\') {
a0d0e21e 2232 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2233 s++; /* all that, just for this */
2234 }
2235 *d++ = *s++;
2236 }
2237 *d = '\0';
95a20fc0 2238 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2239 finish:
3280af22 2240 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2241 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2242 return sv;
2243}
2244
ffb4593c
NT
2245/*
2246 * Now come three functions related to double-quote context,
2247 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2248 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2249 * interact with PL_lex_state, and create fake ( ... ) argument lists
2250 * to handle functions and concatenation.
2251 * They assume that whoever calls them will be setting up a fake
2252 * join call, because each subthing puts a ',' after it. This lets
2253 * "lower \luPpEr"
2254 * become
2255 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2256 *
2257 * (I'm not sure whether the spurious commas at the end of lcfirst's
2258 * arguments and join's arguments are created or not).
2259 */
2260
2261/*
2262 * S_sublex_start
6154021b 2263 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2264 *
2265 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2266 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2267 *
2268 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2269 *
2270 * Everything else becomes a FUNC.
2271 *
2272 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2273 * had an OP_CONST or OP_READLINE). This just sets us up for a
2274 * call to S_sublex_push().
2275 */
2276
76e3520e 2277STATIC I32
cea2e8a9 2278S_sublex_start(pTHX)
79072805 2279{
97aff369 2280 dVAR;
6154021b 2281 register const I32 op_type = pl_yylval.ival;
79072805
LW
2282
2283 if (op_type == OP_NULL) {
6154021b 2284 pl_yylval.opval = PL_lex_op;
5f66b61c 2285 PL_lex_op = NULL;
79072805
LW
2286 return THING;
2287 }
2288 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2289 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2290
2291 if (SvTYPE(sv) == SVt_PVIV) {
2292 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2293 STRLEN len;
96a5add6 2294 const char * const p = SvPV_const(sv, len);
740cce10 2295 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2296 SvREFCNT_dec(sv);
2297 sv = nsv;
4e553d73 2298 }
6154021b 2299 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2300 PL_lex_stuff = NULL;
6f33ba73
RGS
2301 /* Allow <FH> // "foo" */
2302 if (op_type == OP_READLINE)
2303 PL_expect = XTERMORDORDOR;
79072805
LW
2304 return THING;
2305 }
e3f73d4e
RGS
2306 else if (op_type == OP_BACKTICK && PL_lex_op) {
2307 /* readpipe() vas overriden */
2308 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2309 pl_yylval.opval = PL_lex_op;
9b201d7d 2310 PL_lex_op = NULL;
e3f73d4e
RGS
2311 PL_lex_stuff = NULL;
2312 return THING;
2313 }
79072805 2314
3280af22 2315 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2316 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2317 PL_sublex_info.sub_op = PL_lex_op;
2318 PL_lex_state = LEX_INTERPPUSH;
55497cff 2319
3280af22
NIS
2320 PL_expect = XTERM;
2321 if (PL_lex_op) {
6154021b 2322 pl_yylval.opval = PL_lex_op;
5f66b61c 2323 PL_lex_op = NULL;
55497cff 2324 return PMFUNC;
2325 }
2326 else
2327 return FUNC;
2328}
2329
ffb4593c
NT
2330/*
2331 * S_sublex_push
2332 * Create a new scope to save the lexing state. The scope will be
2333 * ended in S_sublex_done. Returns a '(', starting the function arguments
2334 * to the uc, lc, etc. found before.
2335 * Sets PL_lex_state to LEX_INTERPCONCAT.
2336 */
2337
76e3520e 2338STATIC I32
cea2e8a9 2339S_sublex_push(pTHX)
55497cff 2340{
27da23d5 2341 dVAR;
f46d017c 2342 ENTER;
55497cff 2343
3280af22 2344 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2345 SAVEBOOL(PL_lex_dojoin);
3280af22 2346 SAVEI32(PL_lex_brackets);
3280af22
NIS
2347 SAVEI32(PL_lex_casemods);
2348 SAVEI32(PL_lex_starts);
651b5b28 2349 SAVEI8(PL_lex_state);
7766f137 2350 SAVEVPTR(PL_lex_inpat);
98246f1e 2351 SAVEI16(PL_lex_inwhat);
57843af0 2352 SAVECOPLINE(PL_curcop);
3280af22 2353 SAVEPPTR(PL_bufptr);
8452ff4b 2354 SAVEPPTR(PL_bufend);
3280af22
NIS
2355 SAVEPPTR(PL_oldbufptr);
2356 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2357 SAVEPPTR(PL_last_lop);
2358 SAVEPPTR(PL_last_uni);
3280af22
NIS
2359 SAVEPPTR(PL_linestart);
2360 SAVESPTR(PL_linestr);
8edd5f42
RGS
2361 SAVEGENERICPV(PL_lex_brackstack);
2362 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2363
2364 PL_linestr = PL_lex_stuff;
a0714e2c 2365 PL_lex_stuff = NULL;
3280af22 2366
9cbb5ea2
GS
2367 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2368 = SvPVX(PL_linestr);
3280af22 2369 PL_bufend += SvCUR(PL_linestr);
bd61b366 2370 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2371 SAVEFREESV(PL_linestr);
2372
2373 PL_lex_dojoin = FALSE;
2374 PL_lex_brackets = 0;
a02a5408
JC
2375 Newx(PL_lex_brackstack, 120, char);
2376 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2377 PL_lex_casemods = 0;
2378 *PL_lex_casestack = '\0';
2379 PL_lex_starts = 0;
2380 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2381 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2382
2383 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2384 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2385 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2386 else
5f66b61c 2387 PL_lex_inpat = NULL;
79072805 2388
55497cff 2389 return '(';
79072805
LW
2390}
2391
ffb4593c
NT
2392/*
2393 * S_sublex_done
2394 * Restores lexer state after a S_sublex_push.
2395 */
2396
76e3520e 2397STATIC I32
cea2e8a9 2398S_sublex_done(pTHX)
79072805 2399{
27da23d5 2400 dVAR;
3280af22 2401 if (!PL_lex_starts++) {
396482e1 2402 SV * const sv = newSVpvs("");
9aa983d2
JH
2403 if (SvUTF8(PL_linestr))
2404 SvUTF8_on(sv);
3280af22 2405 PL_expect = XOPERATOR;
6154021b 2406 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2407 return THING;
2408 }
2409
3280af22
NIS
2410 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2411 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2412 return yylex();
79072805
LW
2413 }
2414
ffb4593c 2415 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2416 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2417 PL_linestr = PL_lex_repl;
2418 PL_lex_inpat = 0;
2419 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2420 PL_bufend += SvCUR(PL_linestr);
bd61b366 2421 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2422 SAVEFREESV(PL_linestr);
2423 PL_lex_dojoin = FALSE;
2424 PL_lex_brackets = 0;
3280af22
NIS
2425 PL_lex_casemods = 0;
2426 *PL_lex_casestack = '\0';
2427 PL_lex_starts = 0;
25da4f38 2428 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2429 PL_lex_state = LEX_INTERPNORMAL;
2430 PL_lex_starts++;
e9fa98b2
HS
2431 /* we don't clear PL_lex_repl here, so that we can check later
2432 whether this is an evalled subst; that means we rely on the
2433 logic to ensure sublex_done() is called again only via the
2434 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2435 }
e9fa98b2 2436 else {
3280af22 2437 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2438 PL_lex_repl = NULL;
e9fa98b2 2439 }
79072805 2440 return ',';
ffed7fef
LW
2441 }
2442 else {
5db06880
NC
2443#ifdef PERL_MAD
2444 if (PL_madskills) {
cd81e915
NC
2445 if (PL_thiswhite) {
2446 if (!PL_endwhite)
6b29d1f5 2447 PL_endwhite = newSVpvs("");
cd81e915
NC
2448 sv_catsv(PL_endwhite, PL_thiswhite);
2449 PL_thiswhite = 0;
2450 }
2451 if (PL_thistoken)
76f68e9b 2452 sv_setpvs(PL_thistoken,"");
5db06880 2453 else
cd81e915 2454 PL_realtokenstart = -1;
5db06880
NC
2455 }
2456#endif
f46d017c 2457 LEAVE;
3280af22
NIS
2458 PL_bufend = SvPVX(PL_linestr);
2459 PL_bufend += SvCUR(PL_linestr);
2460 PL_expect = XOPERATOR;
09bef843 2461 PL_sublex_info.sub_inwhat = 0;
79072805 2462 return ')';
ffed7fef
LW
2463 }
2464}
2465
02aa26ce
NT
2466/*
2467 scan_const
2468
2469 Extracts a pattern, double-quoted string, or transliteration. This
2470 is terrifying code.
2471
94def140 2472 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2473 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2474 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2475
94def140
TS
2476 Returns a pointer to the character scanned up to. If this is
2477 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2478 successfully parsed), will leave an OP for the substring scanned
6154021b 2479 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2480 by looking at the next characters herself.
2481
02aa26ce
NT
2482 In patterns:
2483 backslashes:
ff3f963a 2484 constants: \N{NAME} only
02aa26ce
NT
2485 case and quoting: \U \Q \E
2486 stops on @ and $, but not for $ as tail anchor
2487
2488 In transliterations:
2489 characters are VERY literal, except for - not at the start or end
94def140
TS
2490 of the string, which indicates a range. If the range is in bytes,
2491 scan_const expands the range to the full set of intermediate
2492 characters. If the range is in utf8, the hyphen is replaced with
2493 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2494
2495 In double-quoted strings:
2496 backslashes:
2497 double-quoted style: \r and \n
ff3f963a 2498 constants: \x31, etc.
94def140 2499 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2500 case and quoting: \U \Q \E
2501 stops on @ and $
2502
2503 scan_const does *not* construct ops to handle interpolated strings.
2504 It stops processing as soon as it finds an embedded $ or @ variable
2505 and leaves it to the caller to work out what's going on.
2506
94def140
TS
2507 embedded arrays (whether in pattern or not) could be:
2508 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2509
2510 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2511
2512 $ in pattern could be $foo or could be tail anchor. Assumption:
2513 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2514 followed by one of "()| \r\n\t"
02aa26ce
NT
2515
2516 \1 (backreferences) are turned into $1
2517
2518 The structure of the code is
2519 while (there's a character to process) {
94def140
TS
2520 handle transliteration ranges
2521 skip regexp comments /(?#comment)/ and codes /(?{code})/
2522 skip #-initiated comments in //x patterns
2523 check for embedded arrays
02aa26ce
NT
2524 check for embedded scalars
2525 if (backslash) {
94def140 2526 deprecate \1 in substitution replacements
02aa26ce
NT
2527 handle string-changing backslashes \l \U \Q \E, etc.
2528 switch (what was escaped) {
94def140 2529 handle \- in a transliteration (becomes a literal -)
ff3f963a 2530 if a pattern and not \N{, go treat as regular character
94def140
TS
2531 handle \132 (octal characters)
2532 handle \x15 and \x{1234} (hex characters)
ff3f963a 2533 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2534 handle \cV (control characters)
2535 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2536 } (end switch)
77a135fe 2537 continue
02aa26ce 2538 } (end if backslash)
77a135fe 2539 handle regular character
02aa26ce 2540 } (end while character to read)
4e553d73 2541
02aa26ce
NT
2542*/
2543
76e3520e 2544STATIC char *
cea2e8a9 2545S_scan_const(pTHX_ char *start)
79072805 2546{
97aff369 2547 dVAR;
3280af22 2548 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2549 SV *sv = newSV(send - start); /* sv for the constant. See
2550 note below on sizing. */
02aa26ce
NT
2551 register char *s = start; /* start of the constant */
2552 register char *d = SvPVX(sv); /* destination for copies */
2553 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2554 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2555 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2556 I32 this_utf8 = UTF; /* Is the source string assumed
2557 to be UTF8? But, this can
2558 show as true when the source
2559 isn't utf8, as for example
2560 when it is entirely composed
2561 of hex constants */
2562
2563 /* Note on sizing: The scanned constant is placed into sv, which is
2564 * initialized by newSV() assuming one byte of output for every byte of
2565 * input. This routine expects newSV() to allocate an extra byte for a
2566 * trailing NUL, which this routine will append if it gets to the end of
2567 * the input. There may be more bytes of input than output (eg., \N{LATIN
2568 * CAPITAL LETTER A}), or more output than input if the constant ends up
2569 * recoded to utf8, but each time a construct is found that might increase
2570 * the needed size, SvGROW() is called. Its size parameter each time is
2571 * based on the best guess estimate at the time, namely the length used so
2572 * far, plus the length the current construct will occupy, plus room for
2573 * the trailing NUL, plus one byte for every input byte still unscanned */
2574
012bcf8d 2575 UV uv;
4c3a8340
TS
2576#ifdef EBCDIC
2577 UV literal_endpoint = 0;
e294cc5d 2578 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2579#endif
012bcf8d 2580
7918f24d
NC
2581 PERL_ARGS_ASSERT_SCAN_CONST;
2582
2b9d42f0
NIS
2583 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2584 /* If we are doing a trans and we know we want UTF8 set expectation */
2585 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2586 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2587 }
2588
2589
79072805 2590 while (s < send || dorange) {
ff3f963a 2591
02aa26ce 2592 /* get transliterations out of the way (they're most literal) */
3280af22 2593 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2594 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2595 if (dorange) {
1ba5c669
JH
2596 I32 i; /* current expanded character */
2597 I32 min; /* first character in range */
2598 I32 max; /* last character in range */
02aa26ce 2599
e294cc5d
JH
2600#ifdef EBCDIC
2601 UV uvmax = 0;
2602#endif
2603
2604 if (has_utf8
2605#ifdef EBCDIC
2606 && !native_range
2607#endif
2608 ) {
9d4ba2ae 2609 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2610 char *e = d++;
2611 while (e-- > c)
2612 *(e + 1) = *e;
25716404 2613 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2614 /* mark the range as done, and continue */
2615 dorange = FALSE;
2616 didrange = TRUE;
2617 continue;
2618 }
2b9d42f0 2619
95a20fc0 2620 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2621#ifdef EBCDIC
2622 SvGROW(sv,
2623 SvLEN(sv) + (has_utf8 ?
2624 (512 - UTF_CONTINUATION_MARK +
2625 UNISKIP(0x100))
2626 : 256));
2627 /* How many two-byte within 0..255: 128 in UTF-8,
2628 * 96 in UTF-8-mod. */
2629#else
9cbb5ea2 2630 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2631#endif
9cbb5ea2 2632 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2633#ifdef EBCDIC
2634 if (has_utf8) {
2635 int j;
2636 for (j = 0; j <= 1; j++) {
2637 char * const c = (char*)utf8_hop((U8*)d, -1);
2638 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2639 if (j)
2640 min = (U8)uv;
2641 else if (uv < 256)
2642 max = (U8)uv;
2643 else {
2644 max = (U8)0xff; /* only to \xff */
2645 uvmax = uv; /* \x{100} to uvmax */
2646 }
2647 d = c; /* eat endpoint chars */
2648 }
2649 }
2650 else {
2651#endif
2652 d -= 2; /* eat the first char and the - */
2653 min = (U8)*d; /* first char in range */
2654 max = (U8)d[1]; /* last char in range */
2655#ifdef EBCDIC
2656 }
2657#endif
8ada0baa 2658
c2e66d9e 2659 if (min > max) {
01ec43d0 2660 Perl_croak(aTHX_
d1573ac7 2661 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2662 (char)min, (char)max);
c2e66d9e
GS
2663 }
2664
c7f1f016 2665#ifdef EBCDIC
4c3a8340
TS
2666 if (literal_endpoint == 2 &&
2667 ((isLOWER(min) && isLOWER(max)) ||
2668 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2669 if (isLOWER(min)) {
2670 for (i = min; i <= max; i++)
2671 if (isLOWER(i))
db42d148 2672 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2673 } else {
2674 for (i = min; i <= max; i++)
2675 if (isUPPER(i))
db42d148 2676 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2677 }
2678 }
2679 else
2680#endif
2681 for (i = min; i <= max; i++)
e294cc5d
JH
2682#ifdef EBCDIC
2683 if (has_utf8) {
2684 const U8 ch = (U8)NATIVE_TO_UTF(i);
2685 if (UNI_IS_INVARIANT(ch))
2686 *d++ = (U8)i;
2687 else {
2688 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2689 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2690 }
2691 }
2692 else
2693#endif
2694 *d++ = (char)i;
2695
2696#ifdef EBCDIC
2697 if (uvmax) {
2698 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2699 if (uvmax > 0x101)
2700 *d++ = (char)UTF_TO_NATIVE(0xff);
2701 if (uvmax > 0x100)
2702 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2703 }
2704#endif
02aa26ce
NT
2705
2706 /* mark the range as done, and continue */
79072805 2707 dorange = FALSE;
01ec43d0 2708 didrange = TRUE;
4c3a8340
TS
2709#ifdef EBCDIC
2710 literal_endpoint = 0;
2711#endif
79072805 2712 continue;
4e553d73 2713 }
02aa26ce
NT
2714
2715 /* range begins (ignore - as first or last char) */
79072805 2716 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2717 if (didrange) {
1fafa243 2718 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2719 }
e294cc5d
JH
2720 if (has_utf8
2721#ifdef EBCDIC
2722 && !native_range
2723#endif
2724 ) {
25716404 2725 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2726 s++;
2727 continue;
2728 }
79072805
LW
2729 dorange = TRUE;
2730 s++;
01ec43d0
GS
2731 }
2732 else {
2733 didrange = FALSE;
4c3a8340
TS
2734#ifdef EBCDIC
2735 literal_endpoint = 0;
e294cc5d 2736 native_range = TRUE;
4c3a8340 2737#endif
01ec43d0 2738 }
79072805 2739 }
02aa26ce
NT
2740
2741 /* if we get here, we're not doing a transliteration */
2742
0f5d15d6
IZ
2743 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2744 except for the last char, which will be done separately. */
3280af22 2745 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2746 if (s[2] == '#') {
e994fd66 2747 while (s+1 < send && *s != ')')
db42d148 2748 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2749 }
2750 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2751 || (s[2] == '?' && s[3] == '{'))
155aba94 2752 {
cc6b7395 2753 I32 count = 1;
0f5d15d6 2754 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2755 char c;
2756
d9f97599
GS
2757 while (count && (c = *regparse)) {
2758 if (c == '\\' && regparse[1])
2759 regparse++;
4e553d73 2760 else if (c == '{')
cc6b7395 2761 count++;
4e553d73 2762 else if (c == '}')
cc6b7395 2763 count--;
d9f97599 2764 regparse++;
cc6b7395 2765 }
e994fd66 2766 if (*regparse != ')')
5bdf89e7 2767 regparse--; /* Leave one char for continuation. */
0f5d15d6 2768 while (s < regparse)
db42d148 2769 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2770 }
748a9306 2771 }
02aa26ce
NT
2772
2773 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2774 else if (*s == '#' && PL_lex_inpat &&
2775 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2776 while (s+1 < send && *s != '\n')
db42d148 2777 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2778 }
02aa26ce 2779
5d1d4326 2780 /* check for embedded arrays
da6eedaa 2781 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2782 */
1749ea0d
TS
2783 else if (*s == '@' && s[1]) {
2784 if (isALNUM_lazy_if(s+1,UTF))
2785 break;
2786 if (strchr(":'{$", s[1]))
2787 break;
2788 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2789 break; /* in regexp, neither @+ nor @- are interpolated */
2790 }
02aa26ce
NT
2791
2792 /* check for embedded scalars. only stop if we're sure it's a
2793 variable.
2794 */
79072805 2795 else if (*s == '$') {
3280af22 2796 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2797 break;
77772344 2798 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2799 if (s[1] == '\\') {
2800 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2801 "Possible unintended interpolation of $\\ in regex");
77772344 2802 }
79072805 2803 break; /* in regexp, $ might be tail anchor */
77772344 2804 }
79072805 2805 }
02aa26ce 2806
2b9d42f0
NIS
2807 /* End of else if chain - OP_TRANS rejoin rest */
2808
02aa26ce 2809 /* backslashes */
79072805 2810 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2811 char* e; /* Can be used for ending '}', etc. */
2812
79072805 2813 s++;
02aa26ce 2814
7d0fc23c
KW
2815 /* warn on \1 - \9 in substitution replacements, but note that \11
2816 * is an octal; and \19 is \1 followed by '9' */
3280af22 2817 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2818 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2819 {
a2a5de95 2820 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2821 *--s = '$';
2822 break;
2823 }
02aa26ce
NT
2824
2825 /* string-change backslash escapes */
3280af22 2826 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2827 --s;
2828 break;
2829 }
ff3f963a
KW
2830 /* In a pattern, process \N, but skip any other backslash escapes.
2831 * This is because we don't want to translate an escape sequence
2832 * into a meta symbol and have the regex compiler use the meta
2833 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2834 * in spite of this, we do have to process \N here while the proper
2835 * charnames handler is in scope. See bugs #56444 and #62056.
2836 * There is a complication because \N in a pattern may also stand
2837 * for 'match a non-nl', and not mean a charname, in which case its
2838 * processing should be deferred to the regex compiler. To be a
2839 * charname it must be followed immediately by a '{', and not look
2840 * like \N followed by a curly quantifier, i.e., not something like
2841 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2842 * quantifier */
2843 else if (PL_lex_inpat
2844 && (*s != 'N'
2845 || s[1] != '{'
2846 || regcurly(s + 1)))
2847 {
cc74c5bd
TS
2848 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2849 goto default_action;
2850 }
02aa26ce 2851
79072805 2852 switch (*s) {
02aa26ce
NT
2853
2854 /* quoted - in transliterations */
79072805 2855 case '-':
3280af22 2856 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2857 *d++ = *s++;
2858 continue;
2859 }
2860 /* FALL THROUGH */
2861 default:
11b8faa4 2862 {
a2a5de95
NC
2863 if ((isALPHA(*s) || isDIGIT(*s)))
2864 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2865 "Unrecognized escape \\%c passed through",
2866 *s);
11b8faa4 2867 /* default action is to copy the quoted character */
f9a63242 2868 goto default_action;
11b8faa4 2869 }
02aa26ce 2870
632403cc 2871 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2872 case '0': case '1': case '2': case '3':
2873 case '4': case '5': case '6': case '7':
ba210ebe 2874 {
53305cf1
NC
2875 I32 flags = 0;
2876 STRLEN len = 3;
77a135fe 2877 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2878 s += len;
2879 }
012bcf8d 2880 goto NUM_ESCAPE_INSERT;
02aa26ce 2881
f0a2b745
KW
2882 /* eg. \o{24} indicates the octal constant \024 */
2883 case 'o':
2884 {
2885 STRLEN len;
454155d9 2886 const char* error;
f0a2b745 2887
454155d9 2888 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2889 s += len;
454155d9 2890 if (! valid) {
f0a2b745
KW
2891 yyerror(error);
2892 continue;
2893 }
2894 goto NUM_ESCAPE_INSERT;
2895 }
2896
77a135fe 2897 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2898 case 'x':
a0ed51b3
LW
2899 ++s;
2900 if (*s == '{') {
9d4ba2ae 2901 char* const e = strchr(s, '}');
a4c04bdc
NC
2902 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2903 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2904 STRLEN len;
355860ce 2905
53305cf1 2906 ++s;
adaeee49 2907 if (!e) {
a0ed51b3 2908 yyerror("Missing right brace on \\x{}");
355860ce 2909 continue;
ba210ebe 2910 }
53305cf1 2911 len = e - s;
77a135fe 2912 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2913 s = e + 1;
a0ed51b3
LW
2914 }
2915 else {
ba210ebe 2916 {
53305cf1 2917 STRLEN len = 2;
a4c04bdc 2918 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2919 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2920 s += len;
2921 }
012bcf8d
GS
2922 }
2923
2924 NUM_ESCAPE_INSERT:
ff3f963a
KW
2925 /* Insert oct or hex escaped character. There will always be
2926 * enough room in sv since such escapes will be longer than any
2927 * UTF-8 sequence they can end up as, except if they force us
2928 * to recode the rest of the string into utf8 */
ba7cea30 2929
77a135fe 2930 /* Here uv is the ordinal of the next character being added in
ff3f963a 2931 * unicode (converted from native). */
77a135fe 2932 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2933 if (!has_utf8 && uv > 255) {
77a135fe
KW
2934 /* Might need to recode whatever we have accumulated so
2935 * far if it contains any chars variant in utf8 or
2936 * utf-ebcdic. */
2937
2938 SvCUR_set(sv, d - SvPVX_const(sv));
2939 SvPOK_on(sv);
2940 *d = '\0';
77a135fe 2941 /* See Note on sizing above. */
7bf79863
KW
2942 sv_utf8_upgrade_flags_grow(sv,
2943 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2944 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2945 d = SvPVX(sv) + SvCUR(sv);
2946 has_utf8 = TRUE;
012bcf8d
GS
2947 }
2948
77a135fe
KW
2949 if (has_utf8) {
2950 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2951 if (PL_lex_inwhat == OP_TRANS &&
2952 PL_sublex_info.sub_op) {
2953 PL_sublex_info.sub_op->op_private |=
2954 (PL_lex_repl ? OPpTRANS_FROM_UTF
2955 : OPpTRANS_TO_UTF);
f9a63242 2956 }
e294cc5d
JH
2957#ifdef EBCDIC
2958 if (uv > 255 && !dorange)
2959 native_range = FALSE;
2960#endif
012bcf8d 2961 }
a0ed51b3 2962 else {
012bcf8d 2963 *d++ = (char)uv;
a0ed51b3 2964 }
012bcf8d
GS
2965 }
2966 else {
c4d5f83a 2967 *d++ = (char) uv;
a0ed51b3 2968 }
79072805 2969 continue;
02aa26ce 2970
4a2d328f 2971 case 'N':
ff3f963a
KW
2972 /* In a non-pattern \N must be a named character, like \N{LATIN
2973 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2974 * mean to match a non-newline. For non-patterns, named
2975 * characters are converted to their string equivalents. In
2976 * patterns, named characters are not converted to their
2977 * ultimate forms for the same reasons that other escapes
2978 * aren't. Instead, they are converted to the \N{U+...} form
2979 * to get the value from the charnames that is in effect right
2980 * now, while preserving the fact that it was a named character
2981 * so that the regex compiler knows this */
2982
2983 /* This section of code doesn't generally use the
2984 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2985 * a close examination of this macro and determined it is a
2986 * no-op except on utfebcdic variant characters. Every
2987 * character generated by this that would normally need to be
2988 * enclosed by this macro is invariant, so the macro is not
2989 * needed, and would complicate use of copy(). There are other
2990 * parts of this file where the macro is used inconsistently,
2991 * but are saved by it being a no-op */
2992
2993 /* The structure of this section of code (besides checking for
2994 * errors and upgrading to utf8) is:
2995 * Further disambiguate between the two meanings of \N, and if
2996 * not a charname, go process it elsewhere
0a96133f
KW
2997 * If of form \N{U+...}, pass it through if a pattern;
2998 * otherwise convert to utf8
2999 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3000 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3001
3002 /* Here, s points to the 'N'; the test below is guaranteed to
3003 * succeed if we are being called on a pattern as we already
3004 * know from a test above that the next character is a '{'.
3005 * On a non-pattern \N must mean 'named sequence, which
3006 * requires braces */
3007 s++;
3008 if (*s != '{') {
3009 yyerror("Missing braces on \\N{}");
3010 continue;
3011 }
3012 s++;
3013
0a96133f 3014 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3015 if (! (e = strchr(s, '}'))) {
3016 if (! PL_lex_inpat) {
5777a3f7 3017 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3018 } else {
3019 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3020 }
0a96133f 3021 continue;
ff3f963a 3022 }
cddc7ef4 3023
ff3f963a 3024 /* Here it looks like a named character */
cddc7ef4 3025
ff3f963a
KW
3026 if (PL_lex_inpat) {
3027
3028 /* XXX This block is temporary code. \N{} implies that the
3029 * pattern is to have Unicode semantics, and therefore
3030 * currently has to be encoded in utf8. By putting it in
3031 * utf8 now, we save a whole pass in the regular expression
3032 * compiler. Once that code is changed so Unicode
3033 * semantics doesn't necessarily have to be in utf8, this
3034 * block should be removed */
3035 if (!has_utf8) {
77a135fe 3036 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3037 SvPOK_on(sv);
e4f3eed8 3038 *d = '\0';
77a135fe 3039 /* See Note on sizing above. */
7bf79863 3040 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3041 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042 /* 5 = '\N{' + cur char + NUL */
3043 (STRLEN)(send - s) + 5);
f08d6ad9 3044 d = SvPVX(sv) + SvCUR(sv);
89491803 3045 has_utf8 = TRUE;
ff3f963a
KW
3046 }
3047 }
423cee85 3048
ff3f963a
KW
3049 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3050 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3051 | PERL_SCAN_DISALLOW_PREFIX;
3052 STRLEN len;
3053
3054 /* For \N{U+...}, the '...' is a unicode value even on
3055 * EBCDIC machines */
3056 s += 2; /* Skip to next char after the 'U+' */
3057 len = e - s;
3058 uv = grok_hex(s, &len, &flags, NULL);
3059 if (len == 0 || len != (STRLEN)(e - s)) {
3060 yyerror("Invalid hexadecimal number in \\N{U+...}");
3061 s = e + 1;
3062 continue;
3063 }
3064
3065 if (PL_lex_inpat) {
3066
3067 /* Pass through to the regex compiler unchanged. The
3068 * reason we evaluated the number above is to make sure
0a96133f 3069 * there wasn't a syntax error. */
ff3f963a
KW
3070 s -= 5; /* Include the '\N{U+' */
3071 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3072 d += e - s + 1;
3073 }
3074 else { /* Not a pattern: convert the hex to string */
3075
3076 /* If destination is not in utf8, unconditionally
3077 * recode it to be so. This is because \N{} implies
3078 * Unicode semantics, and scalars have to be in utf8
3079 * to guarantee those semantics */
3080 if (! has_utf8) {
3081 SvCUR_set(sv, d - SvPVX_const(sv));
3082 SvPOK_on(sv);
3083 *d = '\0';
3084 /* See Note on sizing above. */
3085 sv_utf8_upgrade_flags_grow(
3086 sv,
3087 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3088 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3089 d = SvPVX(sv) + SvCUR(sv);
3090 has_utf8 = TRUE;
3091 }
3092
3093 /* Add the string to the output */
3094 if (UNI_IS_INVARIANT(uv)) {
3095 *d++ = (char) uv;
3096 }
3097 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3098 }
3099 }
3100 else { /* Here is \N{NAME} but not \N{U+...}. */
3101
3102 SV *res; /* result from charnames */
3103 const char *str; /* the string in 'res' */
3104 STRLEN len; /* its length */
3105
3106 /* Get the value for NAME */
3107 res = newSVpvn(s, e - s);
3108 res = new_constant( NULL, 0, "charnames",
3109 /* includes all of: \N{...} */
3110 res, NULL, s - 3, e - s + 4 );
3111
3112 /* Most likely res will be in utf8 already since the
3113 * standard charnames uses pack U, but a custom translator
3114 * can leave it otherwise, so make sure. XXX This can be
3115 * revisited to not have charnames use utf8 for characters
3116 * that don't need it when regexes don't have to be in utf8
3117 * for Unicode semantics. If doing so, remember EBCDIC */
3118 sv_utf8_upgrade(res);
3119 str = SvPV_const(res, len);
3120
3121 /* Don't accept malformed input */
3122 if (! is_utf8_string((U8 *) str, len)) {
3123 yyerror("Malformed UTF-8 returned by \\N");
3124 }
3125 else if (PL_lex_inpat) {
3126
3127 if (! len) { /* The name resolved to an empty string */
3128 Copy("\\N{}", d, 4, char);
3129 d += 4;
3130 }
3131 else {
3132 /* In order to not lose information for the regex
3133 * compiler, pass the result in the specially made
3134 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3135 * the code points in hex of each character
3136 * returned by charnames */
3137
3138 const char *str_end = str + len;
3139 STRLEN char_length; /* cur char's byte length */
3140 STRLEN output_length; /* and the number of bytes
3141 after this is translated
3142 into hex digits */
3143 const STRLEN off = d - SvPVX_const(sv);
3144
3145 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3146 * max('U+', '.'); and 1 for NUL */
3147 char hex_string[2 * UTF8_MAXBYTES + 5];
3148
3149 /* Get the first character of the result. */
3150 U32 uv = utf8n_to_uvuni((U8 *) str,
3151 len,
3152 &char_length,
3153 UTF8_ALLOW_ANYUV);
3154
3155 /* The call to is_utf8_string() above hopefully
3156 * guarantees that there won't be an error. But
3157 * it's easy here to make sure. The function just
3158 * above warns and returns 0 if invalid utf8, but
3159 * it can also return 0 if the input is validly a
3160 * NUL. Disambiguate */
3161 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3162 uv = UNICODE_REPLACEMENT;
3163 }
3164
3165 /* Convert first code point to hex, including the
3166 * boiler plate before it */
3167 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3168 output_length = strlen(hex_string);
3169
3170 /* Make sure there is enough space to hold it */
3171 d = off + SvGROW(sv, off
3172 + output_length
3173 + (STRLEN)(send - e)
3174 + 2); /* '}' + NUL */
3175 /* And output it */
3176 Copy(hex_string, d, output_length, char);
3177 d += output_length;
3178
3179 /* For each subsequent character, append dot and
3180 * its ordinal in hex */
3181 while ((str += char_length) < str_end) {
3182 const STRLEN off = d - SvPVX_const(sv);
3183 U32 uv = utf8n_to_uvuni((U8 *) str,
3184 str_end - str,
3185 &char_length,
3186 UTF8_ALLOW_ANYUV);
3187 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3188 uv = UNICODE_REPLACEMENT;
3189 }
3190
3191 sprintf(hex_string, ".%X", (unsigned int) uv);
3192 output_length = strlen(hex_string);
3193
3194 d = off + SvGROW(sv, off
3195 + output_length
3196 + (STRLEN)(send - e)
3197 + 2); /* '}' + NUL */
3198 Copy(hex_string, d, output_length, char);
3199 d += output_length;
3200 }
3201
3202 *d++ = '}'; /* Done. Add the trailing brace */
3203 }
3204 }
3205 else { /* Here, not in a pattern. Convert the name to a
3206 * string. */
3207
3208 /* If destination is not in utf8, unconditionally
3209 * recode it to be so. This is because \N{} implies
3210 * Unicode semantics, and scalars have to be in utf8
3211 * to guarantee those semantics */
3212 if (! has_utf8) {
3213 SvCUR_set(sv, d - SvPVX_const(sv));
3214 SvPOK_on(sv);
3215 *d = '\0';
3216 /* See Note on sizing above. */
3217 sv_utf8_upgrade_flags_grow(sv,
3218 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3219 len + (STRLEN)(send - s) + 1);
3220 d = SvPVX(sv) + SvCUR(sv);
3221 has_utf8 = TRUE;
3222 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3223
3224 /* See Note on sizing above. (NOTE: SvCUR() is not
3225 * set correctly here). */
3226 const STRLEN off = d - SvPVX_const(sv);
3227 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3228 }
3229 Copy(str, d, len, char);
3230 d += len;
423cee85 3231 }
423cee85 3232 SvREFCNT_dec(res);
cb233ae3
KW
3233
3234 /* Deprecate non-approved name syntax */
3235 if (ckWARN_d(WARN_DEPRECATED)) {
3236 bool problematic = FALSE;
3237 char* i = s;
3238
3239 /* For non-ut8 input, look to see that the first
3240 * character is an alpha, then loop through the rest
3241 * checking that each is a continuation */
3242 if (! this_utf8) {
3243 if (! isALPHAU(*i)) problematic = TRUE;
3244 else for (i = s + 1; i < e; i++) {
3245 if (isCHARNAME_CONT(*i)) continue;
3246 problematic = TRUE;
3247 break;
3248 }
3249 }
3250 else {
3251 /* Similarly for utf8. For invariants can check
3252 * directly. We accept anything above the latin1
3253 * range because it is immaterial to Perl if it is
3254 * correct or not, and is expensive to check. But
3255 * it is fairly easy in the latin1 range to convert
3256 * the variants into a single character and check
3257 * those */
3258 if (UTF8_IS_INVARIANT(*i)) {
3259 if (! isALPHAU(*i)) problematic = TRUE;
3260 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3261 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3262 *(i+1)))))
3263 {
3264 problematic = TRUE;
3265 }
3266 }
3267 if (! problematic) for (i = s + UTF8SKIP(s);
3268 i < e;
3269 i+= UTF8SKIP(i))
3270 {
3271 if (UTF8_IS_INVARIANT(*i)) {
3272 if (isCHARNAME_CONT(*i)) continue;
3273 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3274 continue;
3275 } else if (isCHARNAME_CONT(
3276 UNI_TO_NATIVE(
3277 UTF8_ACCUMULATE(*i, *(i+1)))))
3278 {
3279 continue;
3280 }
3281 problematic = TRUE;
3282 break;
3283 }
3284 }
3285 if (problematic) {
6e1bad6c
KW
3286 /* The e-i passed to the final %.*s makes sure that
3287 * should the trailing NUL be missing that this
3288 * print won't run off the end of the string */
cb233ae3 3289 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3290 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3291 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3292 }
3293 }
3294 } /* End \N{NAME} */
ff3f963a
KW
3295#ifdef EBCDIC
3296 if (!dorange)
3297 native_range = FALSE; /* \N{} is defined to be Unicode */
3298#endif
3299 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3300 continue;
3301
02aa26ce 3302 /* \c is a control character */
79072805
LW
3303 case 'c':
3304 s++;
961ce445 3305 if (s < send) {
f9d13529 3306 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3307 }
961ce445
RGS
3308 else {
3309 yyerror("Missing control char name in \\c");
3310 }
79072805 3311 continue;
02aa26ce
NT
3312
3313 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3314 case 'b':
db42d148 3315 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3316 break;
3317 case 'n':
db42d148 3318 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3319 break;
3320 case 'r':
db42d148 3321 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3322 break;
3323 case 'f':
db42d148 3324 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3325 break;
3326 case 't':
db42d148 3327 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3328 break;
34a3fe2a 3329 case 'e':
db42d148 3330 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3331 break;
3332 case 'a':
db42d148 3333 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3334 break;
02aa26ce
NT
3335 } /* end switch */
3336
79072805
LW
3337 s++;
3338 continue;
02aa26ce 3339 } /* end if (backslash) */
4c3a8340
TS
3340#ifdef EBCDIC
3341 else
3342 literal_endpoint++;
3343#endif
02aa26ce 3344
f9a63242 3345 default_action:
77a135fe
KW
3346 /* If we started with encoded form, or already know we want it,
3347 then encode the next character */
3348 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3349 STRLEN len = 1;
77a135fe
KW
3350
3351
3352 /* One might think that it is wasted effort in the case of the
3353 * source being utf8 (this_utf8 == TRUE) to take the next character
3354 * in the source, convert it to an unsigned value, and then convert
3355 * it back again. But the source has not been validated here. The
3356 * routine that does the conversion checks for errors like
3357 * malformed utf8 */
3358
5f66b61c
AL
3359 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3360 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3361 if (!has_utf8) {
3362 SvCUR_set(sv, d - SvPVX_const(sv));
3363 SvPOK_on(sv);
3364 *d = '\0';
77a135fe 3365 /* See Note on sizing above. */
7bf79863
KW
3366 sv_utf8_upgrade_flags_grow(sv,
3367 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3368 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3369 d = SvPVX(sv) + SvCUR(sv);
3370 has_utf8 = TRUE;
3371 } else if (need > len) {
3372 /* encoded value larger than old, may need extra space (NOTE:
3373 * SvCUR() is not set correctly here). See Note on sizing
3374 * above. */
9d4ba2ae 3375 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3376 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3377 }
77a135fe
KW
3378 s += len;
3379
5f66b61c 3380 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3381#ifdef EBCDIC
3382 if (uv > 255 && !dorange)
3383 native_range = FALSE;
3384#endif
2b9d42f0
NIS
3385 }
3386 else {
3387 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3388 }
02aa26ce
NT
3389 } /* while loop to process each character */
3390
3391 /* terminate the string and set up the sv */
79072805 3392 *d = '\0';
95a20fc0 3393 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3394 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3395 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3396
79072805 3397 SvPOK_on(sv);
9f4817db 3398 if (PL_encoding && !has_utf8) {
d0063567
DK
3399 sv_recode_to_utf8(sv, PL_encoding);
3400 if (SvUTF8(sv))
3401 has_utf8 = TRUE;
9f4817db 3402 }
2b9d42f0 3403 if (has_utf8) {
7e2040f0 3404 SvUTF8_on(sv);
2b9d42f0 3405 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3406 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3407 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3408 }
3409 }
79072805 3410
02aa26ce 3411 /* shrink the sv if we allocated more than we used */
79072805 3412 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3413 SvPV_shrink_to_cur(sv);
79072805 3414 }
02aa26ce 3415
6154021b 3416 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3417 if (s > PL_bufptr) {
eb0d8d16
NC
3418 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3419 const char *const key = PL_lex_inpat ? "qr" : "q";
3420 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3421 const char *type;
3422 STRLEN typelen;
3423
3424 if (PL_lex_inwhat == OP_TRANS) {
3425 type = "tr";
3426 typelen = 2;
3427 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3428 type = "s";
3429 typelen = 1;
3430 } else {
3431 type = "qq";
3432 typelen = 2;
3433 }
3434
3435 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3436 type, typelen);
3437 }
6154021b 3438 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3439 } else
8990e307 3440 SvREFCNT_dec(sv);
79072805
LW
3441 return s;
3442}
3443
ffb4593c
NT
3444/* S_intuit_more
3445 * Returns TRUE if there's more to the expression (e.g., a subscript),
3446 * FALSE otherwise.
ffb4593c
NT
3447 *
3448 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3449 *
3450 * ->[ and ->{ return TRUE
3451 * { and [ outside a pattern are always subscripts, so return TRUE
3452 * if we're outside a pattern and it's not { or [, then return FALSE
3453 * if we're in a pattern and the first char is a {
3454 * {4,5} (any digits around the comma) returns FALSE
3455 * if we're in a pattern and the first char is a [
3456 * [] returns FALSE
3457 * [SOMETHING] has a funky algorithm to decide whether it's a
3458 * character class or not. It has to deal with things like
3459 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3460 * anything else returns TRUE
3461 */
3462
9cbb5ea2
GS
3463/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3464
76e3520e 3465STATIC int
cea2e8a9 3466S_intuit_more(pTHX_ register char *s)
79072805 3467{
97aff369 3468 dVAR;
7918f24d
NC
3469
3470 PERL_ARGS_ASSERT_INTUIT_MORE;
3471
3280af22 3472 if (PL_lex_brackets)
79072805
LW
3473 return TRUE;
3474 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3475 return TRUE;
3476 if (*s != '{' && *s != '[')
3477 return FALSE;
3280af22 3478 if (!PL_lex_inpat)
79072805
LW
3479 return TRUE;
3480
3481 /* In a pattern, so maybe we have {n,m}. */
3482 if (*s == '{') {
3483 s++;
3484 if (!isDIGIT(*s))
3485 return TRUE;
3486 while (isDIGIT(*s))
3487 s++;
3488 if (*s == ',')
3489 s++;
3490 while (isDIGIT(*s))
3491 s++;
3492 if (*s == '}')
3493 return FALSE;
3494 return TRUE;
3495
3496 }
3497
3498 /* On the other hand, maybe we have a character class */
3499
3500 s++;
3501 if (*s == ']' || *s == '^')
3502 return FALSE;
3503 else {
ffb4593c 3504 /* this is terrifying, and it works */
79072805
LW
3505 int weight = 2; /* let's weigh the evidence */
3506 char seen[256];
f27ffc4a 3507 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3508 const char * const send = strchr(s,']');
3280af22 3509 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3510
3511 if (!send) /* has to be an expression */
3512 return TRUE;
3513
3514 Zero(seen,256,char);
3515 if (*s == '$')
3516 weight -= 3;
3517 else if (isDIGIT(*s)) {
3518 if (s[1] != ']') {
3519 if (isDIGIT(s[1]) && s[2] == ']')
3520 weight -= 10;
3521 }
3522 else
3523 weight -= 100;
3524 }
3525 for (; s < send; s++) {
3526 last_un_char = un_char;
3527 un_char = (unsigned char)*s;
3528 switch (*s) {
3529 case '@':
3530 case '&':
3531 case '$':
3532 weight -= seen[un_char] * 10;
7e2040f0 3533 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3534 int len;
8903cb82 3535 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3536 len = (int)strlen(tmpbuf);
3537 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3538 weight -= 100;
3539 else
3540 weight -= 10;
3541 }
3542 else if (*s == '$' && s[1] &&
93a17b20
LW
3543 strchr("[#!%*<>()-=",s[1])) {
3544 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3545 weight -= 10;
3546 else
3547 weight -= 1;
3548 }
3549 break;
3550 case '\\':
3551 un_char = 254;
3552 if (s[1]) {
93a17b20 3553 if (strchr("wds]",s[1]))
79072805 3554 weight += 100;
10edeb5d 3555 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3556 weight += 1;
93a17b20 3557 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3558 weight += 40;
3559 else if (isDIGIT(s[1])) {
3560 weight += 40;
3561 while (s[1] && isDIGIT(s[1]))
3562 s++;
3563 }
3564 }
3565 else
3566 weight += 100;
3567 break;
3568 case '-':
3569 if (s[1] == '\\')
3570 weight += 50;
93a17b20 3571 if (strchr("aA01! ",last_un_char))
79072805 3572 weight += 30;
93a17b20 3573 if (strchr("zZ79~",s[1]))
79072805 3574 weight += 30;
f27ffc4a
GS
3575 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3576 weight -= 5; /* cope with negative subscript */
79072805
LW
3577 break;
3578 default:
3792a11b
NC
3579 if (!isALNUM(last_un_char)
3580 && !(last_un_char == '$' || last_un_char == '@'
3581 || last_un_char == '&')
3582 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3583 char *d = tmpbuf;
3584 while (isALPHA(*s))
3585 *d++ = *s++;
3586 *d = '\0';
5458a98a 3587 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3588 weight -= 150;
3589 }
3590 if (un_char == last_un_char + 1)
3591 weight += 5;
3592 weight -= seen[un_char];
3593 break;
3594 }
3595 seen[un_char]++;
3596 }
3597 if (weight >= 0) /* probably a character class */
3598 return FALSE;
3599 }
3600
3601 return TRUE;
3602}
ffed7fef 3603
ffb4593c
NT
3604/*
3605 * S_intuit_method
3606 *
3607 * Does all the checking to disambiguate
3608 * foo bar
3609 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3610 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3611 *
3612 * First argument is the stuff after the first token, e.g. "bar".
3613 *
3614 * Not a method if bar is a filehandle.
3615 * Not a method if foo is a subroutine prototyped to take a filehandle.
3616 * Not a method if it's really "Foo $bar"
3617 * Method if it's "foo $bar"
3618 * Not a method if it's really "print foo $bar"
3619 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3620 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3621 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3622 * =>
3623 */
3624
76e3520e 3625STATIC int
62d55b22 3626S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3627{
97aff369 3628 dVAR;
a0d0e21e 3629 char *s = start + (*start == '$');
3280af22 3630 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3631 STRLEN len;
3632 GV* indirgv;
5db06880
NC
3633#ifdef PERL_MAD
3634 int soff;
3635#endif
a0d0e21e 3636
7918f24d
NC
3637 PERL_ARGS_ASSERT_INTUIT_METHOD;
3638
a0d0e21e 3639 if (gv) {
62d55b22 3640 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3641 return 0;
62d55b22
NC
3642 if (cv) {
3643 if (SvPOK(cv)) {
3644 const char *proto = SvPVX_const(cv);
3645 if (proto) {
3646 if (*proto == ';')
3647 proto++;
3648 if (*proto == '*')
3649 return 0;
3650 }
b6c543e3
IZ
3651 }
3652 } else
c35e046a 3653 gv = NULL;
a0d0e21e 3654 }
8903cb82 3655 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3656 /* start is the beginning of the possible filehandle/object,
3657 * and s is the end of it
3658 * tmpbuf is a copy of it
3659 */
3660
a0d0e21e 3661 if (*start == '$') {
3ef1310e
RGS
3662 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3663 isUPPER(*PL_tokenbuf))
a0d0e21e 3664 return 0;
5db06880
NC
3665#ifdef PERL_MAD
3666 len = start - SvPVX(PL_linestr);
3667#endif
29595ff2 3668 s = PEEKSPACE(s);
f0092767 3669#ifdef PERL_MAD
5db06880
NC
3670 start = SvPVX(PL_linestr) + len;
3671#endif
3280af22
NIS
3672 PL_bufptr = start;
3673 PL_expect = XREF;
a0d0e21e
LW
3674 return *s == '(' ? FUNCMETH : METHOD;
3675 }
5458a98a 3676 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3677 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3678 len -= 2;
3679 tmpbuf[len] = '\0';
5db06880
NC
3680#ifdef PERL_MAD
3681 soff = s - SvPVX(PL_linestr);
3682#endif
c3e0f903
GS
3683 goto bare_package;
3684 }
90e5519e 3685 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3686 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3687 return 0;
3688 /* filehandle or package name makes it a method */
da51bb9b 3689 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3690#ifdef PERL_MAD
3691 soff = s - SvPVX(PL_linestr);
3692#endif
29595ff2 3693 s = PEEKSPACE(s);
3280af22 3694 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3695 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3696 bare_package:
cd81e915 3697 start_force(PL_curforce);
9ded7720 3698 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3699 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3700 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3701 if (PL_madskills)
3702 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3703 PL_expect = XTERM;
a0d0e21e 3704 force_next(WORD);
3280af22 3705 PL_bufptr = s;
5db06880
NC
3706#ifdef PERL_MAD
3707 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3708#endif
a0d0e21e
LW
3709 return *s == '(' ? FUNCMETH : METHOD;
3710 }
3711 }
3712 return 0;
3713}
3714
16d20bd9 3715/* Encoded script support. filter_add() effectively inserts a
4e553d73 3716 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3717 * Note that the filter function only applies to the current source file
3718 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3719 *
3720 * The datasv parameter (which may be NULL) can be used to pass
3721 * private data to this instance of the filter. The filter function
3722 * can recover the SV using the FILTER_DATA macro and use it to
3723 * store private buffers and state information.
3724 *
3725 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3726 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3727 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3728 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3729 * private use must be set using malloc'd pointers.
3730 */
16d20bd9
AD
3731
3732SV *
864dbfa3 3733Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3734{
97aff369 3735 dVAR;
f4c556ac 3736 if (!funcp)
a0714e2c 3737 return NULL;
f4c556ac 3738
5486870f
DM
3739 if (!PL_parser)
3740 return NULL;
3741
3280af22
NIS
3742 if (!PL_rsfp_filters)
3743 PL_rsfp_filters = newAV();
16d20bd9 3744 if (!datasv)
561b68a9 3745 datasv = newSV(0);
862a34c6 3746 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3747 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3748 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3749 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3750 FPTR2DPTR(void *, IoANY(datasv)),
3751 SvPV_nolen(datasv)));
3280af22
NIS
3752 av_unshift(PL_rsfp_filters, 1);
3753 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3754 return(datasv);
3755}
4e553d73 3756
16d20bd9
AD
3757
3758/* Delete most recently added instance of this filter function. */
a0d0e21e 3759void
864dbfa3 3760Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3761{
97aff369 3762 dVAR;
e0c19803 3763 SV *datasv;
24801a4b 3764
7918f24d
NC
3765 PERL_ARGS_ASSERT_FILTER_DEL;
3766
33073adb 3767#ifdef DEBUGGING
55662e27
JH
3768 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3769 FPTR2DPTR(void*, funcp)));
33073adb 3770#endif
5486870f 3771 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3772 return;
3773 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3774 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3775 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3776 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3777
16d20bd9
AD
3778 return;
3779 }
3780 /* we need to search for the correct entry and clear it */
cea2e8a9 3781 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3782}
3783
3784
1de9afcd
RGS
3785/* Invoke the idxth filter function for the current rsfp. */
3786/* maxlen 0 = read one text line */
16d20bd9 3787I32
864dbfa3 3788Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3789{
97aff369 3790 dVAR;
16d20bd9
AD
3791 filter_t funcp;
3792 SV *datasv = NULL;
f482118e
NC
3793 /* This API is bad. It should have been using unsigned int for maxlen.
3794 Not sure if we want to change the API, but if not we should sanity
3795 check the value here. */
39cd7a59
NC
3796 const unsigned int correct_length
3797 = maxlen < 0 ?
3798#ifdef PERL_MICRO
3799 0x7FFFFFFF
3800#else
3801 INT_MAX
3802#endif
3803 : maxlen;
e50aee73 3804
7918f24d
NC
3805 PERL_ARGS_ASSERT_FILTER_READ;
3806
5486870f 3807 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3808 return -1;
1de9afcd 3809 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3810 /* Provide a default input filter to make life easy. */
3811 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3812 DEBUG_P(PerlIO_printf(Perl_debug_log,
3813 "filter_read %d: from rsfp\n", idx));
f482118e 3814 if (correct_length) {
16d20bd9
AD
3815 /* Want a block */
3816 int len ;
f54cb97a 3817 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3818
3819 /* ensure buf_sv is large enough */
881d8f0a 3820 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3821 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3822 correct_length)) <= 0) {
3280af22 3823 if (PerlIO_error(PL_rsfp))
37120919
AD
3824 return -1; /* error */
3825 else
3826 return 0 ; /* end of file */
3827 }
16d20bd9 3828 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3829 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3830 } else {
3831 /* Want a line */
3280af22
NIS
3832 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3833 if (PerlIO_error(PL_rsfp))
37120919
AD
3834 return -1; /* error */
3835 else
3836 return 0 ; /* end of file */
3837 }
16d20bd9
AD
3838 }
3839 return SvCUR(buf_sv);
3840 }
3841 /* Skip this filter slot if filter has been deleted */
1de9afcd 3842 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3843 DEBUG_P(PerlIO_printf(Perl_debug_log,
3844 "filter_read %d: skipped (filter deleted)\n",
3845 idx));
f482118e 3846 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3847 }
3848 /* Get function pointer hidden within datasv */
8141890a 3849 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3850 DEBUG_P(PerlIO_printf(Perl_debug_log,
3851 "filter_read %d: via function %p (%s)\n",
ca0270c4 3852 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3853 /* Call function. The function is expected to */
3854 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3855 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3856 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3857}
3858
76e3520e 3859STATIC char *
5cc814fd 3860S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3861{
97aff369 3862 dVAR;
7918f24d
NC
3863
3864 PERL_ARGS_ASSERT_FILTER_GETS;
3865
c39cd008 3866#ifdef PERL_CR_FILTER
3280af22 3867 if (!PL_rsfp_filters) {
c39cd008 3868 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3869 }
3870#endif
3280af22 3871 if (PL_rsfp_filters) {
55497cff 3872 if (!append)
3873 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3874 if (FILTER_READ(0, sv, 0) > 0)
3875 return ( SvPVX(sv) ) ;
3876 else
bd61b366 3877 return NULL ;
16d20bd9 3878 }
9d116dd7 3879 else
5cc814fd 3880 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3881}
3882
01ec43d0 3883STATIC HV *
9bde8eb0 3884S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3885{
97aff369 3886 dVAR;
def3634b
GS
3887 GV *gv;
3888
7918f24d
NC
3889 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3890
01ec43d0 3891 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3892 return PL_curstash;
3893
3894 if (len > 2 &&
3895 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3896 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3897 {
3898 return GvHV(gv); /* Foo:: */
def3634b
GS
3899 }
3900
3901 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3902 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3903 if (gv && GvCV(gv)) {
3904 SV * const sv = cv_const_sv(GvCV(gv));
3905 if (sv)
9bde8eb0 3906 pkgname = SvPV_const(sv, len);
def3634b
GS
3907 }
3908
9bde8eb0 3909 return gv_stashpvn(pkgname, len, 0);
def3634b 3910}
a0d0e21e 3911
e3f73d4e
RGS
3912/*
3913 * S_readpipe_override
3914 * Check whether readpipe() is overriden, and generates the appropriate
3915 * optree, provided sublex_start() is called afterwards.
3916 */
3917STATIC void
1d51329b 3918S_readpipe_override(pTHX)
e3f73d4e
RGS
3919{
3920 GV **gvp;
3921 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3922 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3923 if ((gv_readpipe
3924 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3925 ||
3926 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3927 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3928 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3929 {
3930 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3931 append_elem(OP_LIST,
3932 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3933 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3934 }
e3f73d4e
RGS
3935}
3936
5db06880
NC
3937#ifdef PERL_MAD
3938 /*
3939 * Perl_madlex
3940 * The intent of this yylex wrapper is to minimize the changes to the
3941 * tokener when we aren't interested in collecting madprops. It remains
3942 * to be seen how successful this strategy will be...
3943 */
3944
3945int
3946Perl_madlex(pTHX)
3947{
3948 int optype;
3949 char *s = PL_bufptr;
3950
cd81e915
NC
3951 /* make sure PL_thiswhite is initialized */
3952 PL_thiswhite = 0;
3953 PL_thismad = 0;
5db06880 3954
cd81e915 3955 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3956 if (PL_pending_ident)
3957 return S_pending_ident(aTHX);
3958
3959 /* previous token ate up our whitespace? */
cd81e915
NC
3960 if (!PL_lasttoke && PL_nextwhite) {
3961 PL_thiswhite = PL_nextwhite;
3962 PL_nextwhite = 0;
5db06880
NC
3963 }
3964
3965 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3966 PL_realtokenstart = -1;
3967 PL_thistoken = 0;
5db06880
NC
3968 optype = yylex();
3969 s = PL_bufptr;
cd81e915 3970 assert(PL_curforce < 0);
5db06880 3971
cd81e915
NC
3972 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3973 if (!PL_thistoken) {
3974 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3975 PL_thistoken = newSVpvs("");
5db06880 3976 else {
c35e046a 3977 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3978 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3979 }
3980 }
cd81e915
NC
3981 if (PL_thismad) /* install head */
3982 CURMAD('X', PL_thistoken);
5db06880
NC
3983 }
3984
3985 /* last whitespace of a sublex? */
cd81e915
NC
3986 if (optype == ')' && PL_endwhite) {
3987 CURMAD('X', PL_endwhite);
5db06880
NC
3988 }
3989
cd81e915 3990 if (!PL_thismad) {
5db06880
NC
3991
3992 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3993 if (!PL_thiswhite && !PL_endwhite && !optype) {
3994 sv_free(PL_thistoken);
3995 PL_thistoken = 0;
5db06880
NC
3996 return 0;
3997 }
3998
3999 /* put off final whitespace till peg */
4000 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4001 PL_nextwhite = PL_thiswhite;
4002 PL_thiswhite = 0;
5db06880 4003 }
cd81e915
NC
4004 else if (PL_thisopen) {
4005 CURMAD('q', PL_thisopen);
4006 if (PL_thistoken)
4007 sv_free(PL_thistoken);
4008 PL_thistoken = 0;
5db06880
NC
4009 }
4010 else {
4011 /* Store actual token text as madprop X */
cd81e915 4012 CURMAD('X', PL_thistoken);
5db06880
NC
4013 }
4014
cd81e915 4015 if (PL_thiswhite) {
5db06880 4016 /* add preceding whitespace as madprop _ */
cd81e915 4017 CURMAD('_', PL_thiswhite);
5db06880
NC
4018 }
4019
cd81e915 4020 if (PL_thisstuff) {
5db06880 4021 /* add quoted material as madprop = */
cd81e915 4022 CURMAD('=', PL_thisstuff);
5db06880
NC
4023 }
4024
cd81e915 4025 if (PL_thisclose) {
5db06880 4026 /* add terminating quote as madprop Q */
cd81e915 4027 CURMAD('Q', PL_thisclose);
5db06880
NC
4028 }
4029 }
4030
4031 /* special processing based on optype */
4032
4033 switch (optype) {
4034
4035 /* opval doesn't need a TOKEN since it can already store mp */
4036 case WORD:
4037 case METHOD:
4038 case FUNCMETH:
4039 case THING:
4040 case PMFUNC:
4041 case PRIVATEREF:
4042 case FUNC0SUB:
4043 case UNIOPSUB:
4044 case LSTOPSUB:
6154021b
RGS
4045 if (pl_yylval.opval)
4046 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4047 PL_thismad = 0;
5db06880
NC
4048 return optype;
4049
4050 /* fake EOF */
4051 case 0:
4052 optype = PEG;
cd81e915
NC
4053 if (PL_endwhite) {
4054 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4055 PL_endwhite = 0;
5db06880
NC
4056 }
4057 break;
4058
4059 case ']':
4060 case '}':
cd81e915 4061 if (PL_faketokens)
5db06880
NC
4062 break;
4063 /* remember any fake bracket that lexer is about to discard */
4064 if (PL_lex_brackets == 1 &&
4065 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4066 {
4067 s = PL_bufptr;
4068 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4069 s++;
4070 if (*s == '}') {
cd81e915
NC
4071 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4072 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4073 PL_thiswhite = 0;
5db06880
NC
4074 PL_bufptr = s - 1;
4075 break; /* don't bother looking for trailing comment */
4076 }
4077 else
4078 s = PL_bufptr;
4079 }
4080 if (optype == ']')
4081 break;
4082 /* FALLTHROUGH */
4083
4084 /* attach a trailing comment to its statement instead of next token */
4085 case ';':
cd81e915 4086 if (PL_faketokens)
5db06880
NC
4087 break;
4088 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4089 s = PL_bufptr;
4090 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4091 s++;
4092 if (*s == '\n' || *s == '#') {
4093 while (s < PL_bufend && *s != '\n')
4094 s++;
4095 if (s < PL_bufend)
4096 s++;
cd81e915
NC
4097 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4098 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4099 PL_thiswhite = 0;
5db06880
NC
4100 PL_bufptr = s;
4101 }
4102 }
4103 break;
4104
4105 /* pval */
4106 case LABEL:
4107 break;
4108
4109 /* ival */
4110 default:
4111 break;
4112
4113 }
4114
4115 /* Create new token struct. Note: opvals return early above. */
6154021b 4116 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4117 PL_thismad = 0;
5db06880
NC
4118 return optype;
4119}
4120#endif
4121
468aa647 4122STATIC char *
cc6ed77d 4123S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4124 dVAR;
7918f24d
NC
4125
4126 PERL_ARGS_ASSERT_TOKENIZE_USE;
4127
468aa647
RGS
4128 if (PL_expect != XSTATE)
4129 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4130 is_use ? "use" : "no"));
29595ff2 4131 s = SKIPSPACE1(s);
468aa647
RGS
4132 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4133 s = force_version(s, TRUE);
17c59fdf
VP
4134 if (*s == ';' || *s == '}'
4135 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4136 start_force(PL_curforce);
9ded7720 4137 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4138 force_next(WORD);
4139 }
4140 else if (*s == 'v') {
4141 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4142 s = force_version(s, FALSE);
4143 }
4144 }
4145 else {
4146 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4147 s = force_version(s, FALSE);
4148 }
6154021b 4149 pl_yylval.ival = is_use;
468aa647
RGS
4150 return s;
4151}
748a9306 4152#ifdef DEBUGGING
27da23d5 4153 static const char* const exp_name[] =
09bef843 4154 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4155 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4156 };
748a9306 4157#endif
463ee0b2 4158
02aa26ce
NT
4159/*
4160 yylex
4161
4162 Works out what to call the token just pulled out of the input
4163 stream. The yacc parser takes care of taking the ops we return and
4164 stitching them into a tree.
4165
4166 Returns:
4167 PRIVATEREF
4168
4169 Structure:
4170 if read an identifier
4171 if we're in a my declaration
4172 croak if they tried to say my($foo::bar)
4173 build the ops for a my() declaration
4174 if it's an access to a my() variable
4175 are we in a sort block?
4176 croak if my($a); $a <=> $b
4177 build ops for access to a my() variable
4178 if in a dq string, and they've said @foo and we can't find @foo
4179 croak
4180 build ops for a bareword
4181 if we already built the token before, use it.
4182*/
4183
20141f0e 4184
dba4d153
JH
4185#ifdef __SC__
4186#pragma segment Perl_yylex
4187#endif
dba4d153 4188int
dba4d153 4189Perl_yylex(pTHX)
20141f0e 4190{
97aff369 4191 dVAR;
3afc138a 4192 register char *s = PL_bufptr;
378cc40b 4193 register char *d;
463ee0b2 4194 STRLEN len;
aa7440fb 4195 bool bof = FALSE;
580561a3 4196 U32 fake_eof = 0;
a687059c 4197
10edeb5d
JH
4198 /* orig_keyword, gvp, and gv are initialized here because
4199 * jump to the label just_a_word_zero can bypass their
4200 * initialization later. */
4201 I32 orig_keyword = 0;
4202 GV *gv = NULL;
4203 GV **gvp = NULL;
4204
bbf60fe6 4205 DEBUG_T( {
396482e1 4206 SV* tmp = newSVpvs("");
b6007c36
DM
4207 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4208 (IV)CopLINE(PL_curcop),
4209 lex_state_names[PL_lex_state],
4210 exp_name[PL_expect],
4211 pv_display(tmp, s, strlen(s), 0, 60));
4212 SvREFCNT_dec(tmp);
bbf60fe6 4213 } );
02aa26ce 4214 /* check if there's an identifier for us to look at */
ba979b31 4215 if (PL_pending_ident)
bbf60fe6 4216 return REPORT(S_pending_ident(aTHX));
bbce6d69 4217
02aa26ce
NT
4218 /* no identifier pending identification */
4219
3280af22 4220 switch (PL_lex_state) {
79072805
LW
4221#ifdef COMMENTARY
4222 case LEX_NORMAL: /* Some compilers will produce faster */
4223 case LEX_INTERPNORMAL: /* code if we comment these out. */
4224 break;
4225#endif
4226
09bef843 4227 /* when we've already built the next token, just pull it out of the queue */
79072805 4228 case LEX_KNOWNEXT:
5db06880
NC
4229#ifdef PERL_MAD
4230 PL_lasttoke--;
6154021b 4231 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4232 if (PL_madskills) {
cd81e915 4233 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4234 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4235 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4236 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4237 PL_thismad->mad_val = 0;
4238 mad_free(PL_thismad);
4239 PL_thismad = 0;
5db06880
NC
4240 }
4241 }
4242 if (!PL_lasttoke) {
4243 PL_lex_state = PL_lex_defer;
4244 PL_expect = PL_lex_expect;
4245 PL_lex_defer = LEX_NORMAL;
4246 if (!PL_nexttoke[PL_lasttoke].next_type)
4247 return yylex();
4248 }
4249#else
3280af22 4250 PL_nexttoke--;
6154021b 4251 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4252 if (!PL_nexttoke) {
4253 PL_lex_state = PL_lex_defer;
4254 PL_expect = PL_lex_expect;
4255 PL_lex_defer = LEX_NORMAL;
463ee0b2 4256 }
5db06880
NC
4257#endif
4258#ifdef PERL_MAD
4259 /* FIXME - can these be merged? */
4260 return(PL_nexttoke[PL_lasttoke].next_type);
4261#else
bbf60fe6 4262 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4263#endif
79072805 4264
02aa26ce 4265 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4266 when we get here, PL_bufptr is at the \
02aa26ce 4267 */
79072805
LW
4268 case LEX_INTERPCASEMOD:
4269#ifdef DEBUGGING
3280af22 4270 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4271 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4272#endif
02aa26ce 4273 /* handle \E or end of string */
3280af22 4274 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4275 /* if at a \E */
3280af22 4276 if (PL_lex_casemods) {
f54cb97a 4277 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4278 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4279
3792a11b
NC
4280 if (PL_bufptr != PL_bufend
4281 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4282 PL_bufptr += 2;
4283 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4284#ifdef PERL_MAD
4285 if (PL_madskills)
6b29d1f5 4286 PL_thistoken = newSVpvs("\\E");
5db06880 4287#endif
a0d0e21e 4288 }
bbf60fe6 4289 return REPORT(')');
79072805 4290 }
5db06880
NC
4291#ifdef PERL_MAD
4292 while (PL_bufptr != PL_bufend &&
4293 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4294 if (!PL_thiswhite)
6b29d1f5 4295 PL_thiswhite = newSVpvs("");
cd81e915 4296 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4297 PL_bufptr += 2;
4298 }
4299#else
3280af22
NIS
4300 if (PL_bufptr != PL_bufend)
4301 PL_bufptr += 2;
5db06880 4302#endif
3280af22 4303 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4304 return yylex();
79072805
LW
4305 }
4306 else {
607df283 4307 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4308 "### Saw case modifier\n"); });
3280af22 4309 s = PL_bufptr + 1;
6e909404 4310 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4311#ifdef PERL_MAD
cd81e915 4312 if (!PL_thiswhite)
6b29d1f5 4313 PL_thiswhite = newSVpvs("");
cd81e915 4314 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4315#endif
89122651 4316 PL_bufptr = s + 3;
6e909404
JH
4317 PL_lex_state = LEX_INTERPCONCAT;
4318 return yylex();
a0d0e21e 4319 }
6e909404 4320 else {
90771dc0 4321 I32 tmp;
5db06880
NC
4322 if (!PL_madskills) /* when just compiling don't need correct */
4323 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4324 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4325 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4326 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4327 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4328 return REPORT(')');
6e909404
JH
4329 }
4330 if (PL_lex_casemods > 10)
4331 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4332 PL_lex_casestack[PL_lex_casemods++] = *s;
4333 PL_lex_casestack[PL_lex_casemods] = '\0';
4334 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4335 start_force(PL_curforce);
9ded7720 4336 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4337 force_next('(');
cd81e915 4338 start_force(PL_curforce);
6e909404 4339 if (*s == 'l')
9ded7720 4340 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4341 else if (*s == 'u')
9ded7720 4342 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4343 else if (*s == 'L')
9ded7720 4344 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4345 else if (*s == 'U')
9ded7720 4346 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4347 else if (*s == 'Q')
9ded7720 4348 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4349 else
4350 Perl_croak(aTHX_ "panic: yylex");
5db06880 4351 if (PL_madskills) {
a5849ce5
NC
4352 SV* const tmpsv = newSVpvs("\\ ");
4353 /* replace the space with the character we want to escape
4354 */
4355 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4356 curmad('_', tmpsv);
4357 }
6e909404 4358 PL_bufptr = s + 1;
a0d0e21e 4359 }
79072805 4360 force_next(FUNC);
3280af22
NIS
4361 if (PL_lex_starts) {
4362 s = PL_bufptr;
4363 PL_lex_starts = 0;
5db06880
NC
4364#ifdef PERL_MAD
4365 if (PL_madskills) {
cd81e915
NC
4366 if (PL_thistoken)
4367 sv_free(PL_thistoken);
6b29d1f5 4368 PL_thistoken = newSVpvs("");
5db06880
NC
4369 }
4370#endif
131b3ad0
DM
4371 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4372 if (PL_lex_casemods == 1 && PL_lex_inpat)
4373 OPERATOR(',');
4374 else
4375 Aop(OP_CONCAT);
79072805
LW
4376 }
4377 else
cea2e8a9 4378 return yylex();
79072805
LW
4379 }
4380
55497cff 4381 case LEX_INTERPPUSH:
bbf60fe6 4382 return REPORT(sublex_push());
55497cff 4383
79072805 4384 case LEX_INTERPSTART:
3280af22 4385 if (PL_bufptr == PL_bufend)
bbf60fe6 4386 return REPORT(sublex_done());
607df283 4387 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4388 "### Interpolated variable\n"); });
3280af22
NIS
4389 PL_expect = XTERM;
4390 PL_lex_dojoin = (*PL_bufptr == '@');
4391 PL_lex_state = LEX_INTERPNORMAL;
4392 if (PL_lex_dojoin) {
cd81e915 4393 start_force(PL_curforce);
9ded7720 4394 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4395 force_next(',');
cd81e915 4396 start_force(PL_curforce);
a0d0e21e 4397 force_ident("\"", '$');
cd81e915 4398 start_force(PL_curforce);
9ded7720 4399 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4400 force_next('$');
cd81e915 4401 start_force(PL_curforce);
9ded7720 4402 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4403 force_next('(');
cd81e915 4404 start_force(PL_curforce);
9ded7720 4405 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4406 force_next(FUNC);
4407 }
3280af22
NIS
4408 if (PL_lex_starts++) {
4409 s = PL_bufptr;
5db06880
NC
4410#ifdef PERL_MAD
4411 if (PL_madskills) {
cd81e915
NC
4412 if (PL_thistoken)
4413 sv_free(PL_thistoken);
6b29d1f5 4414 PL_thistoken = newSVpvs("");
5db06880
NC
4415 }
4416#endif
131b3ad0
DM
4417 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4418 if (!PL_lex_casemods && PL_lex_inpat)
4419 OPERATOR(',');
4420 else
4421 Aop(OP_CONCAT);
79072805 4422 }
cea2e8a9 4423 return yylex();
79072805
LW
4424
4425 case LEX_INTERPENDMAYBE:
3280af22
NIS
4426 if (intuit_more(PL_bufptr)) {
4427 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4428 break;
4429 }
4430 /* FALL THROUGH */
4431
4432 case LEX_INTERPEND:
3280af22
NIS
4433 if (PL_lex_dojoin) {
4434 PL_lex_dojoin = FALSE;
4435 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4436#ifdef PERL_MAD
4437 if (PL_madskills) {
cd81e915
NC
4438 if (PL_thistoken)
4439 sv_free(PL_thistoken);
6b29d1f5 4440 PL_thistoken = newSVpvs("");
5db06880
NC
4441 }
4442#endif
bbf60fe6 4443 return REPORT(')');
79072805 4444 }
43a16006 4445 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4446 && SvEVALED(PL_lex_repl))
43a16006 4447 {
e9fa98b2 4448 if (PL_bufptr != PL_bufend)
cea2e8a9 4449 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4450 PL_lex_repl = NULL;
e9fa98b2 4451 }
79072805
LW
4452 /* FALLTHROUGH */
4453 case LEX_INTERPCONCAT:
4454#ifdef DEBUGGING
3280af22 4455 if (PL_lex_brackets)
cea2e8a9 4456 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4457#endif
3280af22 4458 if (PL_bufptr == PL_bufend)
bbf60fe6 4459 return REPORT(sublex_done());
79072805 4460
3280af22
NIS
4461 if (SvIVX(PL_linestr) == '\'') {
4462 SV *sv = newSVsv(PL_linestr);
4463 if (!PL_lex_inpat)
76e3520e 4464 sv = tokeq(sv);
3280af22 4465 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4466 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4467 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4468 s = PL_bufend;
79072805
LW
4469 }
4470 else {
3280af22 4471 s = scan_const(PL_bufptr);
79072805 4472 if (*s == '\\')
3280af22 4473 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4474 else
3280af22 4475 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4476 }
4477
3280af22 4478 if (s != PL_bufptr) {
cd81e915 4479 start_force(PL_curforce);
5db06880
NC
4480 if (PL_madskills) {
4481 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4482 }
6154021b 4483 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4484 PL_expect = XTERM;
79072805 4485 force_next(THING);
131b3ad0 4486 if (PL_lex_starts++) {
5db06880
NC
4487#ifdef PERL_MAD
4488 if (PL_madskills) {
cd81e915
NC
4489 if (PL_thistoken)
4490 sv_free(PL_thistoken);
6b29d1f5 4491 PL_thistoken = newSVpvs("");
5db06880
NC
4492 }
4493#endif
131b3ad0
DM
4494 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495 if (!PL_lex_casemods && PL_lex_inpat)
4496 OPERATOR(',');
4497 else
4498 Aop(OP_CONCAT);
4499 }
79072805 4500 else {
3280af22 4501 PL_bufptr = s;
cea2e8a9 4502 return yylex();
79072805
LW
4503 }
4504 }
4505
cea2e8a9 4506 return yylex();
a0d0e21e 4507 case LEX_FORMLINE:
3280af22
NIS
4508 PL_lex_state = LEX_NORMAL;
4509 s = scan_formline(PL_bufptr);
4510 if (!PL_lex_formbrack)
a0d0e21e
LW
4511 goto rightbracket;
4512 OPERATOR(';');
79072805
LW
4513 }
4514
3280af22
NIS
4515 s = PL_bufptr;
4516 PL_oldoldbufptr = PL_oldbufptr;
4517 PL_oldbufptr = s;
463ee0b2
LW
4518
4519 retry:
5db06880 4520#ifdef PERL_MAD
cd81e915
NC
4521 if (PL_thistoken) {
4522 sv_free(PL_thistoken);
4523 PL_thistoken = 0;
5db06880 4524 }
cd81e915 4525 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4526#endif
378cc40b
LW
4527 switch (*s) {
4528 default:
7e2040f0 4529 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4530 goto keylookup;
b1fc3636
CJ
4531 {
4532 unsigned char c = *s;
4533 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4534 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4535 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4536 } else {
4537 d = PL_linestart;
4538 }
4539 *s = '\0';
4540 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4541 }
e929a76b
LW
4542 case 4:
4543 case 26:
4544 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4545 case 0:
5db06880
NC
4546#ifdef PERL_MAD
4547 if (PL_madskills)
cd81e915 4548 PL_faketokens = 0;
5db06880 4549#endif
3280af22
NIS
4550 if (!PL_rsfp) {
4551 PL_last_uni = 0;
4552 PL_last_lop = 0;
c5ee2135 4553 if (PL_lex_brackets) {
10edeb5d
JH
4554 yyerror((const char *)
4555 (PL_lex_formbrack
4556 ? "Format not terminated"
4557 : "Missing right curly or square bracket"));
c5ee2135 4558 }
4e553d73 4559 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4560 "### Tokener got EOF\n");
5f80b19c 4561 } );
79072805 4562 TOKEN(0);
463ee0b2 4563 }
3280af22 4564 if (s++ < PL_bufend)
a687059c 4565 goto retry; /* ignore stray nulls */
3280af22
NIS
4566 PL_last_uni = 0;
4567 PL_last_lop = 0;
4568 if (!PL_in_eval && !PL_preambled) {
4569 PL_preambled = TRUE;
5db06880
NC
4570#ifdef PERL_MAD
4571 if (PL_madskills)
cd81e915 4572 PL_faketokens = 1;
5db06880 4573#endif
5ab7ff98
NC
4574 if (PL_perldb) {
4575 /* Generate a string of Perl code to load the debugger.
4576 * If PERL5DB is set, it will return the contents of that,
4577 * otherwise a compile-time require of perl5db.pl. */
4578
4579 const char * const pdb = PerlEnv_getenv("PERL5DB");
4580
4581 if (pdb) {
4582 sv_setpv(PL_linestr, pdb);
4583 sv_catpvs(PL_linestr,";");
4584 } else {
4585 SETERRNO(0,SS_NORMAL);
4586 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4587 }
4588 } else
4589 sv_setpvs(PL_linestr,"");
c62eb204
NC
4590 if (PL_preambleav) {
4591 SV **svp = AvARRAY(PL_preambleav);
4592 SV **const end = svp + AvFILLp(PL_preambleav);
4593 while(svp <= end) {
4594 sv_catsv(PL_linestr, *svp);
4595 ++svp;
396482e1 4596 sv_catpvs(PL_linestr, ";");
91b7def8 4597 }
daba3364 4598 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4599 PL_preambleav = NULL;
91b7def8 4600 }
9f639728
FR
4601 if (PL_minus_E)
4602 sv_catpvs(PL_linestr,
4603 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4604 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4605 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4606 if (PL_minus_l)
396482e1 4607 sv_catpvs(PL_linestr,"chomp;");
3280af22 4608 if (PL_minus_a) {
3280af22 4609 if (PL_minus_F) {
3792a11b
NC
4610 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4611 || *PL_splitstr == '"')
3280af22 4612 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4613 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4614 else {
c8ef6a4b
NC
4615 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4616 bytes can be used as quoting characters. :-) */
dd374669 4617 const char *splits = PL_splitstr;
91d456ae 4618 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4619 do {
4620 /* Need to \ \s */
dd374669
AL
4621 if (*splits == '\\')
4622 sv_catpvn(PL_linestr, splits, 1);
4623 sv_catpvn(PL_linestr, splits, 1);
4624 } while (*splits++);
48c4c863
NC
4625 /* This loop will embed the trailing NUL of
4626 PL_linestr as the last thing it does before
4627 terminating. */
396482e1 4628 sv_catpvs(PL_linestr, ");");
54310121 4629 }
2304df62
AD
4630 }
4631 else
396482e1 4632 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4633 }
79072805 4634 }
396482e1 4635 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4636 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4637 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4638 PL_last_lop = PL_last_uni = NULL;
65269a95 4639 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4640 update_debugger_info(PL_linestr, NULL, 0);
79072805 4641 goto retry;
a687059c 4642 }
e929a76b 4643 do {
580561a3
Z
4644 fake_eof = 0;
4645 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4646 if (0) {
7e28d3af 4647 fake_eof:
f0e67a1d
Z
4648 fake_eof = LEX_FAKE_EOF;
4649 }
4650 PL_bufptr = PL_bufend;
17cc9359 4651 CopLINE_inc(PL_curcop);
f0e67a1d 4652 if (!lex_next_chunk(fake_eof)) {
17cc9359 4653 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4654 s = PL_bufptr;
4655 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4656 }
17cc9359 4657 CopLINE_dec(PL_curcop);
5db06880 4658#ifdef PERL_MAD
f0e67a1d 4659 if (!PL_rsfp)
cd81e915 4660 PL_realtokenstart = -1;
5db06880 4661#endif
f0e67a1d 4662 s = PL_bufptr;
7aa207d6
JH
4663 /* If it looks like the start of a BOM or raw UTF-16,
4664 * check if it in fact is. */
580561a3 4665 if (bof && PL_rsfp &&
7aa207d6
JH
4666 (*s == 0 ||
4667 *(U8*)s == 0xEF ||
4668 *(U8*)s >= 0xFE ||
4669 s[1] == 0)) {
eb160463 4670 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4671 if (bof) {
3280af22 4672 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4673 s = swallow_bom((U8*)s);
e929a76b 4674 }
378cc40b 4675 }
3280af22 4676 if (PL_doextract) {
a0d0e21e 4677 /* Incest with pod. */
5db06880
NC
4678#ifdef PERL_MAD
4679 if (PL_madskills)
cd81e915 4680 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4681#endif
01a57ef7 4682 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4683 sv_setpvs(PL_linestr, "");
3280af22
NIS
4684 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4685 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4686 PL_last_lop = PL_last_uni = NULL;
3280af22 4687 PL_doextract = FALSE;
a0d0e21e 4688 }
4e553d73 4689 }
85613cab
Z
4690 if (PL_rsfp)
4691 incline(s);
3280af22
NIS
4692 } while (PL_doextract);
4693 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4694 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4695 PL_last_lop = PL_last_uni = NULL;
57843af0 4696 if (CopLINE(PL_curcop) == 1) {
3280af22 4697 while (s < PL_bufend && isSPACE(*s))
79072805 4698 s++;
a0d0e21e 4699 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4700 s++;
5db06880
NC
4701#ifdef PERL_MAD
4702 if (PL_madskills)
cd81e915 4703 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4704#endif
bd61b366 4705 d = NULL;
3280af22 4706 if (!PL_in_eval) {
44a8e56a 4707 if (*s == '#' && *(s+1) == '!')
4708 d = s + 2;
4709#ifdef ALTERNATE_SHEBANG
4710 else {
bfed75c6 4711 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4712 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4713 d = s + (sizeof(as) - 1);
4714 }
4715#endif /* ALTERNATE_SHEBANG */
4716 }
4717 if (d) {
b8378b72 4718 char *ipath;
774d564b 4719 char *ipathend;
b8378b72 4720
774d564b 4721 while (isSPACE(*d))
b8378b72
CS
4722 d++;
4723 ipath = d;
774d564b 4724 while (*d && !isSPACE(*d))
4725 d++;
4726 ipathend = d;
4727
4728#ifdef ARG_ZERO_IS_SCRIPT
4729 if (ipathend > ipath) {
4730 /*
4731 * HP-UX (at least) sets argv[0] to the script name,
4732 * which makes $^X incorrect. And Digital UNIX and Linux,
4733 * at least, set argv[0] to the basename of the Perl
4734 * interpreter. So, having found "#!", we'll set it right.
4735 */
fafc274c
NC
4736 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4737 SVt_PV)); /* $^X */
774d564b 4738 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4739 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4740 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4741 SvSETMAGIC(x);
4742 }
556c1dec
JH
4743 else {
4744 STRLEN blen;
4745 STRLEN llen;
cfd0369c 4746 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4747 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4748 if (llen < blen) {
4749 bstart += blen - llen;
4750 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4751 sv_setpvn(x, ipath, ipathend - ipath);
4752 SvSETMAGIC(x);
4753 }
4754 }
4755 }
774d564b 4756 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4757 }
774d564b 4758#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4759
4760 /*
4761 * Look for options.
4762 */
748a9306 4763 d = instr(s,"perl -");
84e30d1a 4764 if (!d) {
748a9306 4765 d = instr(s,"perl");
84e30d1a
GS
4766#if defined(DOSISH)
4767 /* avoid getting into infinite loops when shebang
4768 * line contains "Perl" rather than "perl" */
4769 if (!d) {
4770 for (d = ipathend-4; d >= ipath; --d) {
4771 if ((*d == 'p' || *d == 'P')
4772 && !ibcmp(d, "perl", 4))
4773 {
4774 break;
4775 }
4776 }
4777 if (d < ipath)
bd61b366 4778 d = NULL;
84e30d1a
GS
4779 }
4780#endif
4781 }
44a8e56a 4782#ifdef ALTERNATE_SHEBANG
4783 /*
4784 * If the ALTERNATE_SHEBANG on this system starts with a
4785 * character that can be part of a Perl expression, then if
4786 * we see it but not "perl", we're probably looking at the
4787 * start of Perl code, not a request to hand off to some
4788 * other interpreter. Similarly, if "perl" is there, but
4789 * not in the first 'word' of the line, we assume the line
4790 * contains the start of the Perl program.
44a8e56a 4791 */
4792 if (d && *s != '#') {
f54cb97a 4793 const char *c = ipath;
44a8e56a 4794 while (*c && !strchr("; \t\r\n\f\v#", *c))
4795 c++;
4796 if (c < d)
bd61b366 4797 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4798 else
4799 *s = '#'; /* Don't try to parse shebang line */
4800 }
774d564b 4801#endif /* ALTERNATE_SHEBANG */
748a9306 4802 if (!d &&
44a8e56a 4803 *s == '#' &&
774d564b 4804 ipathend > ipath &&
3280af22 4805 !PL_minus_c &&
748a9306 4806 !instr(s,"indir") &&
3280af22 4807 instr(PL_origargv[0],"perl"))
748a9306 4808 {
27da23d5 4809 dVAR;
9f68db38 4810 char **newargv;
9f68db38 4811
774d564b 4812 *ipathend = '\0';
4813 s = ipathend + 1;
3280af22 4814 while (s < PL_bufend && isSPACE(*s))
9f68db38 4815 s++;
3280af22 4816 if (s < PL_bufend) {
d85f917e 4817 Newx(newargv,PL_origargc+3,char*);
9f68db38 4818 newargv[1] = s;
3280af22 4819 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4820 s++;
4821 *s = '\0';
3280af22 4822 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4823 }
4824 else
3280af22 4825 newargv = PL_origargv;
774d564b 4826 newargv[0] = ipath;
b35112e7 4827 PERL_FPU_PRE_EXEC
b4748376 4828 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4829 PERL_FPU_POST_EXEC
cea2e8a9 4830 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4831 }
748a9306 4832 if (d) {
c35e046a
AL
4833 while (*d && !isSPACE(*d))
4834 d++;
4835 while (SPACE_OR_TAB(*d))
4836 d++;
748a9306
LW
4837
4838 if (*d++ == '-') {
f54cb97a 4839 const bool switches_done = PL_doswitches;
fb993905
GA
4840 const U32 oldpdb = PL_perldb;
4841 const bool oldn = PL_minus_n;
4842 const bool oldp = PL_minus_p;
c7030b81 4843 const char *d1 = d;
fb993905 4844
8cc95fdb 4845 do {
4ba71d51
FC
4846 bool baduni = FALSE;
4847 if (*d1 == 'C') {
bd0ab00d
NC
4848 const char *d2 = d1 + 1;
4849 if (parse_unicode_opts((const char **)&d2)
4850 != PL_unicode)
4851 baduni = TRUE;
4ba71d51
FC
4852 }
4853 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4854 const char * const m = d1;
4855 while (*d1 && !isSPACE(*d1))
4856 d1++;
cea2e8a9 4857 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4858 (int)(d1 - m), m);
8cc95fdb 4859 }
c7030b81
NC
4860 d1 = moreswitches(d1);
4861 } while (d1);
f0b2cf55
YST
4862 if (PL_doswitches && !switches_done) {
4863 int argc = PL_origargc;
4864 char **argv = PL_origargv;
4865 do {
4866 argc--,argv++;
4867 } while (argc && argv[0][0] == '-' && argv[0][1]);
4868 init_argv_symbols(argc,argv);
4869 }
65269a95 4870 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4871 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4872 /* if we have already added "LINE: while (<>) {",
4873 we must not do it again */
748a9306 4874 {
76f68e9b 4875 sv_setpvs(PL_linestr, "");
3280af22
NIS
4876 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4877 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4878 PL_last_lop = PL_last_uni = NULL;
3280af22 4879 PL_preambled = FALSE;
65269a95 4880 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4881 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4882 goto retry;
4883 }
a0d0e21e 4884 }
79072805 4885 }
9f68db38 4886 }
79072805 4887 }
3280af22
NIS
4888 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4889 PL_bufptr = s;
4890 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4891 return yylex();
ae986130 4892 }
378cc40b 4893 goto retry;
4fdae800 4894 case '\r':
6a27c188 4895#ifdef PERL_STRICT_CR
cea2e8a9 4896 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4897 Perl_croak(aTHX_
cc507455 4898 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4899#endif
4fdae800 4900 case ' ': case '\t': case '\f': case 013:
5db06880 4901#ifdef PERL_MAD
cd81e915 4902 PL_realtokenstart = -1;
ac372eb8
RD
4903 if (!PL_thiswhite)
4904 PL_thiswhite = newSVpvs("");
4905 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4906#endif
ac372eb8 4907 s++;
378cc40b 4908 goto retry;
378cc40b 4909 case '#':
e929a76b 4910 case '\n':
5db06880 4911#ifdef PERL_MAD
cd81e915 4912 PL_realtokenstart = -1;
5db06880 4913 if (PL_madskills)
cd81e915 4914 PL_faketokens = 0;
5db06880 4915#endif
3280af22 4916 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4917 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4918 /* handle eval qq[#line 1 "foo"\n ...] */
4919 CopLINE_dec(PL_curcop);
4920 incline(s);
4921 }
5db06880
NC
4922 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4923 s = SKIPSPACE0(s);
4924 if (!PL_in_eval || PL_rsfp)
4925 incline(s);
4926 }
4927 else {
4928 d = s;
4929 while (d < PL_bufend && *d != '\n')
4930 d++;
4931 if (d < PL_bufend)
4932 d++;
4933 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4934 Perl_croak(aTHX_ "panic: input overflow");
4935#ifdef PERL_MAD
4936 if (PL_madskills)
cd81e915 4937 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4938#endif
4939 s = d;
4940 incline(s);
4941 }
3280af22
NIS
4942 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4943 PL_bufptr = s;
4944 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4945 return yylex();
a687059c 4946 }
378cc40b 4947 }
a687059c 4948 else {
5db06880
NC
4949#ifdef PERL_MAD
4950 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4951 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4952 PL_faketokens = 0;
5db06880
NC
4953 s = SKIPSPACE0(s);
4954 TOKEN(PEG); /* make sure any #! line is accessible */
4955 }
4956 s = SKIPSPACE0(s);
4957 }
4958 else {
4959/* if (PL_madskills && PL_lex_formbrack) { */
4960 d = s;
4961 while (d < PL_bufend && *d != '\n')
4962 d++;
4963 if (d < PL_bufend)
4964 d++;
4965 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4966 Perl_croak(aTHX_ "panic: input overflow");
4967 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4968 if (!PL_thiswhite)
6b29d1f5 4969 PL_thiswhite = newSVpvs("");
5db06880 4970 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4971 sv_setpvs(PL_thiswhite, "");
cd81e915 4972 PL_faketokens = 0;
5db06880 4973 }
cd81e915 4974 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4975 }
4976 s = d;
4977/* }
4978 *s = '\0';
4979 PL_bufend = s; */
4980 }
4981#else
378cc40b 4982 *s = '\0';
3280af22 4983 PL_bufend = s;
5db06880 4984#endif
a687059c 4985 }
378cc40b
LW
4986 goto retry;
4987 case '-':
79072805 4988 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4989 I32 ftst = 0;
90771dc0 4990 char tmp;
e5edeb50 4991
378cc40b 4992 s++;
3280af22 4993 PL_bufptr = s;
748a9306
LW
4994 tmp = *s++;
4995
bf4acbe4 4996 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4997 s++;
4998
4999 if (strnEQ(s,"=>",2)) {
3280af22 5000 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5001 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5002 OPERATOR('-'); /* unary minus */
5003 }
3280af22 5004 PL_last_uni = PL_oldbufptr;
748a9306 5005 switch (tmp) {
e5edeb50
JH
5006 case 'r': ftst = OP_FTEREAD; break;
5007 case 'w': ftst = OP_FTEWRITE; break;
5008 case 'x': ftst = OP_FTEEXEC; break;
5009 case 'o': ftst = OP_FTEOWNED; break;
5010 case 'R': ftst = OP_FTRREAD; break;
5011 case 'W': ftst = OP_FTRWRITE; break;
5012 case 'X': ftst = OP_FTREXEC; break;
5013 case 'O': ftst = OP_FTROWNED; break;
5014 case 'e': ftst = OP_FTIS; break;
5015 case 'z': ftst = OP_FTZERO; break;
5016 case 's': ftst = OP_FTSIZE; break;
5017 case 'f': ftst = OP_FTFILE; break;
5018 case 'd': ftst = OP_FTDIR; break;
5019 case 'l': ftst = OP_FTLINK; break;
5020 case 'p': ftst = OP_FTPIPE; break;
5021 case 'S': ftst = OP_FTSOCK; break;
5022 case 'u': ftst = OP_FTSUID; break;
5023 case 'g': ftst = OP_FTSGID; break;
5024 case 'k': ftst = OP_FTSVTX; break;
5025 case 'b': ftst = OP_FTBLK; break;
5026 case 'c': ftst = OP_FTCHR; break;
5027 case 't': ftst = OP_FTTTY; break;
5028 case 'T': ftst = OP_FTTEXT; break;
5029 case 'B': ftst = OP_FTBINARY; break;
5030 case 'M': case 'A': case 'C':
fafc274c 5031 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5032 switch (tmp) {
5033 case 'M': ftst = OP_FTMTIME; break;
5034 case 'A': ftst = OP_FTATIME; break;
5035 case 'C': ftst = OP_FTCTIME; break;
5036 default: break;
5037 }
5038 break;
378cc40b 5039 default:
378cc40b
LW
5040 break;
5041 }
e5edeb50 5042 if (ftst) {
eb160463 5043 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5044 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5045 "### Saw file test %c\n", (int)tmp);
5f80b19c 5046 } );
e5edeb50
JH
5047 FTST(ftst);
5048 }
5049 else {
5050 /* Assume it was a minus followed by a one-letter named
5051 * subroutine call (or a -bareword), then. */
95c31fe3 5052 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5053 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5054 (int) tmp);
5f80b19c 5055 } );
3cf7b4c4 5056 s = --PL_bufptr;
e5edeb50 5057 }
378cc40b 5058 }
90771dc0
NC
5059 {
5060 const char tmp = *s++;
5061 if (*s == tmp) {
5062 s++;
5063 if (PL_expect == XOPERATOR)
5064 TERM(POSTDEC);
5065 else
5066 OPERATOR(PREDEC);
5067 }
5068 else if (*s == '>') {
5069 s++;
29595ff2 5070 s = SKIPSPACE1(s);
90771dc0
NC
5071 if (isIDFIRST_lazy_if(s,UTF)) {
5072 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5073 TOKEN(ARROW);
5074 }
5075 else if (*s == '$')
5076 OPERATOR(ARROW);
5077 else
5078 TERM(ARROW);
5079 }
3280af22 5080 if (PL_expect == XOPERATOR)
90771dc0
NC
5081 Aop(OP_SUBTRACT);
5082 else {
5083 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5084 check_uni();
5085 OPERATOR('-'); /* unary minus */
79072805 5086 }
2f3197b3 5087 }
79072805 5088
378cc40b 5089 case '+':
90771dc0
NC
5090 {
5091 const char tmp = *s++;
5092 if (*s == tmp) {
5093 s++;
5094 if (PL_expect == XOPERATOR)
5095 TERM(POSTINC);
5096 else
5097 OPERATOR(PREINC);
5098 }
3280af22 5099 if (PL_expect == XOPERATOR)
90771dc0
NC
5100 Aop(OP_ADD);
5101 else {
5102 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5103 check_uni();
5104 OPERATOR('+');
5105 }
2f3197b3 5106 }
a687059c 5107
378cc40b 5108 case '*':
3280af22
NIS
5109 if (PL_expect != XOPERATOR) {
5110 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5111 PL_expect = XOPERATOR;
5112 force_ident(PL_tokenbuf, '*');
5113 if (!*PL_tokenbuf)
a0d0e21e 5114 PREREF('*');
79072805 5115 TERM('*');
a687059c 5116 }
79072805
LW
5117 s++;
5118 if (*s == '*') {
a687059c 5119 s++;
79072805 5120 PWop(OP_POW);
a687059c 5121 }
79072805
LW
5122 Mop(OP_MULTIPLY);
5123
378cc40b 5124 case '%':
3280af22 5125 if (PL_expect == XOPERATOR) {
bbce6d69 5126 ++s;
5127 Mop(OP_MODULO);
a687059c 5128 }
3280af22 5129 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5130 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5131 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5132 if (!PL_tokenbuf[1]) {
bbce6d69 5133 PREREF('%');
a687059c 5134 }
3280af22 5135 PL_pending_ident = '%';
bbce6d69 5136 TERM('%');
a687059c 5137
378cc40b 5138 case '^':
79072805 5139 s++;
a0d0e21e 5140 BOop(OP_BIT_XOR);
79072805 5141 case '[':
3280af22 5142 PL_lex_brackets++;
df3467db
IG
5143 {
5144 const char tmp = *s++;
5145 OPERATOR(tmp);
5146 }
378cc40b 5147 case '~':
0d863452 5148 if (s[1] == '~'
3e7dd34d 5149 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5150 {
5151 s += 2;
5152 Eop(OP_SMARTMATCH);
5153 }
378cc40b 5154 case ',':
90771dc0
NC
5155 {
5156 const char tmp = *s++;
5157 OPERATOR(tmp);
5158 }
a0d0e21e
LW
5159 case ':':
5160 if (s[1] == ':') {
5161 len = 0;
0bfa2a8a 5162 goto just_a_word_zero_gv;
a0d0e21e
LW
5163 }
5164 s++;
09bef843
SB
5165 switch (PL_expect) {
5166 OP *attrs;
5db06880
NC
5167#ifdef PERL_MAD
5168 I32 stuffstart;
5169#endif
09bef843
SB
5170 case XOPERATOR:
5171 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5172 break;
5173 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5174 if (*s == '=') {
5175 deprecate(":= for an empty attribute list");
5176 }
09bef843
SB
5177 goto grabattrs;
5178 case XATTRBLOCK:
5179 PL_expect = XBLOCK;
5180 goto grabattrs;
5181 case XATTRTERM:
5182 PL_expect = XTERMBLOCK;
5183 grabattrs:
5db06880
NC
5184#ifdef PERL_MAD
5185 stuffstart = s - SvPVX(PL_linestr) - 1;
5186#endif
29595ff2 5187 s = PEEKSPACE(s);
5f66b61c 5188 attrs = NULL;
7e2040f0 5189 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5190 I32 tmp;
5cc237b8 5191 SV *sv;
09bef843 5192 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5193 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5194 if (tmp < 0) tmp = -tmp;
5195 switch (tmp) {
5196 case KEY_or:
5197 case KEY_and:
5198 case KEY_for:
11baf631 5199 case KEY_foreach:
f9829d6b
GS
5200 case KEY_unless:
5201 case KEY_if:
5202 case KEY_while:
5203 case KEY_until:
5204 goto got_attrs;
5205 default:
5206 break;
5207 }
5208 }
5cc237b8 5209 sv = newSVpvn(s, len);
09bef843
SB
5210 if (*d == '(') {
5211 d = scan_str(d,TRUE,TRUE);
5212 if (!d) {
09bef843
SB
5213 /* MUST advance bufptr here to avoid bogus
5214 "at end of line" context messages from yyerror().
5215 */
5216 PL_bufptr = s + len;
5217 yyerror("Unterminated attribute parameter in attribute list");
5218 if (attrs)
5219 op_free(attrs);
5cc237b8 5220 sv_free(sv);
bbf60fe6 5221 return REPORT(0); /* EOF indicator */
09bef843
SB
5222 }
5223 }
5224 if (PL_lex_stuff) {
09bef843
SB
5225 sv_catsv(sv, PL_lex_stuff);
5226 attrs = append_elem(OP_LIST, attrs,
5227 newSVOP(OP_CONST, 0, sv));
5228 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5229 PL_lex_stuff = NULL;
09bef843
SB
5230 }
5231 else {
5cc237b8
BS
5232 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5233 sv_free(sv);
1108974d 5234 if (PL_in_my == KEY_our) {
df9a6019 5235 deprecate(":unique");
1108974d 5236 }
bfed75c6 5237 else
371fce9b
DM
5238 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5239 }
5240
d3cea301
SB
5241 /* NOTE: any CV attrs applied here need to be part of
5242 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5243 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5244 sv_free(sv);
78f9721b 5245 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5246 }
5247 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5248 sv_free(sv);
8e5dadda 5249 deprecate(":locked");
5cc237b8
BS
5250 }
5251 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5252 sv_free(sv);
78f9721b 5253 CvMETHOD_on(PL_compcv);
5cc237b8 5254 }
78f9721b
SM
5255 /* After we've set the flags, it could be argued that
5256 we don't need to do the attributes.pm-based setting
5257 process, and shouldn't bother appending recognized
d3cea301
SB
5258 flags. To experiment with that, uncomment the
5259 following "else". (Note that's already been
5260 uncommented. That keeps the above-applied built-in
5261 attributes from being intercepted (and possibly
5262 rejected) by a package's attribute routines, but is
5263 justified by the performance win for the common case
5264 of applying only built-in attributes.) */
0256094b 5265 else
78f9721b
SM
5266 attrs = append_elem(OP_LIST, attrs,
5267 newSVOP(OP_CONST, 0,
5cc237b8 5268 sv));
09bef843 5269 }
29595ff2 5270 s = PEEKSPACE(d);
0120eecf 5271 if (*s == ':' && s[1] != ':')
29595ff2 5272 s = PEEKSPACE(s+1);
0120eecf
GS
5273 else if (s == d)
5274 break; /* require real whitespace or :'s */
29595ff2 5275 /* XXX losing whitespace on sequential attributes here */
09bef843 5276 }
90771dc0
NC
5277 {
5278 const char tmp
5279 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5280 if (*s != ';' && *s != '}' && *s != tmp
5281 && (tmp != '=' || *s != ')')) {
5282 const char q = ((*s == '\'') ? '"' : '\'');
5283 /* If here for an expression, and parsed no attrs, back
5284 off. */
5285 if (tmp == '=' && !attrs) {
5286 s = PL_bufptr;
5287 break;
5288 }
5289 /* MUST advance bufptr here to avoid bogus "at end of line"
5290 context messages from yyerror().
5291 */
5292 PL_bufptr = s;
10edeb5d
JH
5293 yyerror( (const char *)
5294 (*s
5295 ? Perl_form(aTHX_ "Invalid separator character "
5296 "%c%c%c in attribute list", q, *s, q)
5297 : "Unterminated attribute list" ) );
90771dc0
NC
5298 if (attrs)
5299 op_free(attrs);
5300 OPERATOR(':');
09bef843 5301 }
09bef843 5302 }
f9829d6b 5303 got_attrs:
09bef843 5304 if (attrs) {
cd81e915 5305 start_force(PL_curforce);
9ded7720 5306 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5307 CURMAD('_', PL_nextwhite);
89122651 5308 force_next(THING);
5db06880
NC
5309 }
5310#ifdef PERL_MAD
5311 if (PL_madskills) {
cd81e915 5312 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5313 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5314 }
5db06880 5315#endif
09bef843
SB
5316 TOKEN(COLONATTR);
5317 }
a0d0e21e 5318 OPERATOR(':');
8990e307
LW
5319 case '(':
5320 s++;
3280af22
NIS
5321 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5322 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5323 else
3280af22 5324 PL_expect = XTERM;
29595ff2 5325 s = SKIPSPACE1(s);
a0d0e21e 5326 TOKEN('(');
378cc40b 5327 case ';':
f4dd75d9 5328 CLINE;
90771dc0
NC
5329 {
5330 const char tmp = *s++;
5331 OPERATOR(tmp);
5332 }
378cc40b 5333 case ')':
90771dc0
NC
5334 {
5335 const char tmp = *s++;
29595ff2 5336 s = SKIPSPACE1(s);
90771dc0
NC
5337 if (*s == '{')
5338 PREBLOCK(tmp);
5339 TERM(tmp);
5340 }
79072805
LW
5341 case ']':
5342 s++;
3280af22 5343 if (PL_lex_brackets <= 0)
d98d5fff 5344 yyerror("Unmatched right square bracket");
463ee0b2 5345 else
3280af22
NIS
5346 --PL_lex_brackets;
5347 if (PL_lex_state == LEX_INTERPNORMAL) {
5348 if (PL_lex_brackets == 0) {
02255c60
FC
5349 if (*s == '-' && s[1] == '>')
5350 PL_lex_state = LEX_INTERPENDMAYBE;
5351 else if (*s != '[' && *s != '{')
3280af22 5352 PL_lex_state = LEX_INTERPEND;
79072805
LW
5353 }
5354 }
4633a7c4 5355 TERM(']');
79072805
LW
5356 case '{':
5357 leftbracket:
79072805 5358 s++;
3280af22 5359 if (PL_lex_brackets > 100) {
8edd5f42 5360 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5361 }
3280af22 5362 switch (PL_expect) {
a0d0e21e 5363 case XTERM:
3280af22 5364 if (PL_lex_formbrack) {
a0d0e21e
LW
5365 s--;
5366 PRETERMBLOCK(DO);
5367 }
3280af22
NIS
5368 if (PL_oldoldbufptr == PL_last_lop)
5369 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5370 else
3280af22 5371 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5372 OPERATOR(HASHBRACK);
a0d0e21e 5373 case XOPERATOR:
bf4acbe4 5374 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5375 s++;
44a8e56a 5376 d = s;
3280af22
NIS
5377 PL_tokenbuf[0] = '\0';
5378 if (d < PL_bufend && *d == '-') {
5379 PL_tokenbuf[0] = '-';
44a8e56a 5380 d++;
bf4acbe4 5381 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5382 d++;
5383 }
7e2040f0 5384 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5385 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5386 FALSE, &len);
bf4acbe4 5387 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5388 d++;
5389 if (*d == '}') {
f54cb97a 5390 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5391 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5392 if (minus)
5393 force_next('-');
748a9306
LW
5394 }
5395 }
5396 /* FALL THROUGH */
09bef843 5397 case XATTRBLOCK:
748a9306 5398 case XBLOCK:
3280af22
NIS
5399 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5400 PL_expect = XSTATE;
a0d0e21e 5401 break;
09bef843 5402 case XATTRTERM:
a0d0e21e 5403 case XTERMBLOCK:
3280af22
NIS
5404 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5405 PL_expect = XSTATE;
a0d0e21e
LW
5406 break;
5407 default: {
f54cb97a 5408 const char *t;
3280af22
NIS
5409 if (PL_oldoldbufptr == PL_last_lop)
5410 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5411 else
3280af22 5412 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5413 s = SKIPSPACE1(s);
8452ff4b
SB
5414 if (*s == '}') {
5415 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5416 PL_expect = XTERM;
5417 /* This hack is to get the ${} in the message. */
5418 PL_bufptr = s+1;
5419 yyerror("syntax error");
5420 break;
5421 }
a0d0e21e 5422 OPERATOR(HASHBRACK);
8452ff4b 5423 }
b8a4b1be
GS
5424 /* This hack serves to disambiguate a pair of curlies
5425 * as being a block or an anon hash. Normally, expectation
5426 * determines that, but in cases where we're not in a
5427 * position to expect anything in particular (like inside
5428 * eval"") we have to resolve the ambiguity. This code
5429 * covers the case where the first term in the curlies is a
5430 * quoted string. Most other cases need to be explicitly
a0288114 5431 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5432 * curly in order to force resolution as an anon hash.
5433 *
5434 * XXX should probably propagate the outer expectation
5435 * into eval"" to rely less on this hack, but that could
5436 * potentially break current behavior of eval"".
5437 * GSAR 97-07-21
5438 */
5439 t = s;
5440 if (*s == '\'' || *s == '"' || *s == '`') {
5441 /* common case: get past first string, handling escapes */
3280af22 5442 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5443 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5444 t++;
5445 t++;
a0d0e21e 5446 }
b8a4b1be 5447 else if (*s == 'q') {
3280af22 5448 if (++t < PL_bufend
b8a4b1be 5449 && (!isALNUM(*t)
3280af22 5450 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5451 && !isALNUM(*t))))
5452 {
abc667d1 5453 /* skip q//-like construct */
f54cb97a 5454 const char *tmps;
b8a4b1be
GS
5455 char open, close, term;
5456 I32 brackets = 1;
5457
3280af22 5458 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5459 t++;
abc667d1
DM
5460 /* check for q => */
5461 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5462 OPERATOR(HASHBRACK);
5463 }
b8a4b1be
GS
5464 term = *t;
5465 open = term;
5466 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5467 term = tmps[5];
5468 close = term;
5469 if (open == close)
3280af22
NIS
5470 for (t++; t < PL_bufend; t++) {
5471 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5472 t++;
6d07e5e9 5473 else if (*t == open)
b8a4b1be
GS
5474 break;
5475 }
abc667d1 5476 else {
3280af22
NIS
5477 for (t++; t < PL_bufend; t++) {
5478 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5479 t++;
6d07e5e9 5480 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5481 break;
5482 else if (*t == open)
5483 brackets++;
5484 }
abc667d1
DM
5485 }
5486 t++;
b8a4b1be 5487 }
abc667d1
DM
5488 else
5489 /* skip plain q word */
5490 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5491 t += UTF8SKIP(t);
a0d0e21e 5492 }
7e2040f0 5493 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5494 t += UTF8SKIP(t);
7e2040f0 5495 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5496 t += UTF8SKIP(t);
a0d0e21e 5497 }
3280af22 5498 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5499 t++;
b8a4b1be
GS
5500 /* if comma follows first term, call it an anon hash */
5501 /* XXX it could be a comma expression with loop modifiers */
3280af22 5502 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5503 || (*t == '=' && t[1] == '>')))
a0d0e21e 5504 OPERATOR(HASHBRACK);
3280af22 5505 if (PL_expect == XREF)
4e4e412b 5506 PL_expect = XTERM;
a0d0e21e 5507 else {
3280af22
NIS
5508 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5509 PL_expect = XSTATE;
a0d0e21e 5510 }
8990e307 5511 }
a0d0e21e 5512 break;
463ee0b2 5513 }
6154021b 5514 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5515 if (isSPACE(*s) || *s == '#')
3280af22 5516 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5517 TOKEN('{');
378cc40b 5518 case '}':
79072805
LW
5519 rightbracket:
5520 s++;
3280af22 5521 if (PL_lex_brackets <= 0)
d98d5fff 5522 yyerror("Unmatched right curly bracket");
463ee0b2 5523 else
3280af22 5524 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5525 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5526 PL_lex_formbrack = 0;
5527 if (PL_lex_state == LEX_INTERPNORMAL) {
5528 if (PL_lex_brackets == 0) {
9059aa12
LW
5529 if (PL_expect & XFAKEBRACK) {
5530 PL_expect &= XENUMMASK;
3280af22
NIS
5531 PL_lex_state = LEX_INTERPEND;
5532 PL_bufptr = s;
5db06880
NC
5533#if 0
5534 if (PL_madskills) {
cd81e915 5535 if (!PL_thiswhite)
6b29d1f5 5536 PL_thiswhite = newSVpvs("");
76f68e9b 5537 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5538 }
5539#endif
cea2e8a9 5540 return yylex(); /* ignore fake brackets */
79072805 5541 }
fa83b5b6 5542 if (*s == '-' && s[1] == '>')
3280af22 5543 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5544 else if (*s != '[' && *s != '{')
3280af22 5545 PL_lex_state = LEX_INTERPEND;
79072805
LW
5546 }
5547 }
9059aa12
LW
5548 if (PL_expect & XFAKEBRACK) {
5549 PL_expect &= XENUMMASK;
3280af22 5550 PL_bufptr = s;
cea2e8a9 5551 return yylex(); /* ignore fake brackets */
748a9306 5552 }
cd81e915 5553 start_force(PL_curforce);
5db06880
NC
5554 if (PL_madskills) {
5555 curmad('X', newSVpvn(s-1,1));
cd81e915 5556 CURMAD('_', PL_thiswhite);
5db06880 5557 }
79072805 5558 force_next('}');
5db06880 5559#ifdef PERL_MAD
cd81e915 5560 if (!PL_thistoken)
6b29d1f5 5561 PL_thistoken = newSVpvs("");
5db06880 5562#endif
79072805 5563 TOKEN(';');
378cc40b
LW
5564 case '&':
5565 s++;
90771dc0 5566 if (*s++ == '&')
a0d0e21e 5567 AOPERATOR(ANDAND);
378cc40b 5568 s--;
3280af22 5569 if (PL_expect == XOPERATOR) {
041457d9
DM
5570 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5571 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5572 {
57843af0 5573 CopLINE_dec(PL_curcop);
f1f66076 5574 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5575 CopLINE_inc(PL_curcop);
463ee0b2 5576 }
79072805 5577 BAop(OP_BIT_AND);
463ee0b2 5578 }
79072805 5579
3280af22
NIS
5580 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5581 if (*PL_tokenbuf) {
5582 PL_expect = XOPERATOR;
5583 force_ident(PL_tokenbuf, '&');
463ee0b2 5584 }
79072805
LW
5585 else
5586 PREREF('&');
6154021b 5587 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5588 TERM('&');
5589
378cc40b
LW
5590 case '|':
5591 s++;
90771dc0 5592 if (*s++ == '|')
a0d0e21e 5593 AOPERATOR(OROR);
378cc40b 5594 s--;
79072805 5595 BOop(OP_BIT_OR);
378cc40b
LW
5596 case '=':
5597 s++;
748a9306 5598 {
90771dc0
NC
5599 const char tmp = *s++;
5600 if (tmp == '=')
5601 Eop(OP_EQ);
5602 if (tmp == '>')
5603 OPERATOR(',');
5604 if (tmp == '~')
5605 PMop(OP_MATCH);
5606 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5607 && strchr("+-*/%.^&|<",tmp))
5608 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5609 "Reversed %c= operator",(int)tmp);
5610 s--;
5611 if (PL_expect == XSTATE && isALPHA(tmp) &&
5612 (s == PL_linestart+1 || s[-2] == '\n') )
5613 {
5614 if (PL_in_eval && !PL_rsfp) {
5615 d = PL_bufend;
5616 while (s < d) {
5617 if (*s++ == '\n') {
5618 incline(s);
5619 if (strnEQ(s,"=cut",4)) {
5620 s = strchr(s,'\n');
5621 if (s)
5622 s++;
5623 else
5624 s = d;
5625 incline(s);
5626 goto retry;
5627 }
5628 }
a5f75d66 5629 }
90771dc0 5630 goto retry;
a5f75d66 5631 }
5db06880
NC
5632#ifdef PERL_MAD
5633 if (PL_madskills) {
cd81e915 5634 if (!PL_thiswhite)
6b29d1f5 5635 PL_thiswhite = newSVpvs("");
cd81e915 5636 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5637 PL_bufend - PL_linestart);
5638 }
5639#endif
90771dc0
NC
5640 s = PL_bufend;
5641 PL_doextract = TRUE;
5642 goto retry;
a5f75d66 5643 }
a0d0e21e 5644 }
3280af22 5645 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5646 const char *t = s;
51882d45 5647#ifdef PERL_STRICT_CR
c35e046a 5648 while (SPACE_OR_TAB(*t))
51882d45 5649#else
c35e046a 5650 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5651#endif
c35e046a 5652 t++;
a0d0e21e
LW
5653 if (*t == '\n' || *t == '#') {
5654 s--;
3280af22 5655 PL_expect = XBLOCK;
a0d0e21e
LW
5656 goto leftbracket;
5657 }
79072805 5658 }
6154021b 5659 pl_yylval.ival = 0;
a0d0e21e 5660 OPERATOR(ASSIGNOP);
378cc40b
LW
5661 case '!':
5662 s++;
90771dc0
NC
5663 {
5664 const char tmp = *s++;
5665 if (tmp == '=') {
5666 /* was this !=~ where !~ was meant?
5667 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5668
5669 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5670 const char *t = s+1;
5671
5672 while (t < PL_bufend && isSPACE(*t))
5673 ++t;
5674
5675 if (*t == '/' || *t == '?' ||
5676 ((*t == 'm' || *t == 's' || *t == 'y')
5677 && !isALNUM(t[1])) ||
5678 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5679 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5680 "!=~ should be !~");
5681 }
5682 Eop(OP_NE);
5683 }
5684 if (tmp == '~')
5685 PMop(OP_NOT);
5686 }
378cc40b
LW
5687 s--;
5688 OPERATOR('!');
5689 case '<':
3280af22 5690 if (PL_expect != XOPERATOR) {
93a17b20 5691 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5692 check_uni();
79072805
LW
5693 if (s[1] == '<')
5694 s = scan_heredoc(s);
5695 else
5696 s = scan_inputsymbol(s);
5697 TERM(sublex_start());
378cc40b
LW
5698 }
5699 s++;
90771dc0
NC
5700 {
5701 char tmp = *s++;
5702 if (tmp == '<')
5703 SHop(OP_LEFT_SHIFT);
5704 if (tmp == '=') {
5705 tmp = *s++;
5706 if (tmp == '>')
5707 Eop(OP_NCMP);
5708 s--;
5709 Rop(OP_LE);
5710 }
395c3793 5711 }
378cc40b 5712 s--;
79072805 5713 Rop(OP_LT);
378cc40b
LW
5714 case '>':
5715 s++;
90771dc0
NC
5716 {
5717 const char tmp = *s++;
5718 if (tmp == '>')
5719 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5720 else if (tmp == '=')
90771dc0
NC
5721 Rop(OP_GE);
5722 }
378cc40b 5723 s--;
79072805 5724 Rop(OP_GT);
378cc40b
LW
5725
5726 case '$':
bbce6d69 5727 CLINE;
5728
3280af22
NIS
5729 if (PL_expect == XOPERATOR) {
5730 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5731 return deprecate_commaless_var_list();
a0d0e21e 5732 }
8990e307 5733 }
a0d0e21e 5734
c0b977fd 5735 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5736 PL_tokenbuf[0] = '@';
376b8730
SM
5737 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5738 sizeof PL_tokenbuf - 1, FALSE);
5739 if (PL_expect == XOPERATOR)
5740 no_op("Array length", s);
3280af22 5741 if (!PL_tokenbuf[1])
a0d0e21e 5742 PREREF(DOLSHARP);
3280af22
NIS
5743 PL_expect = XOPERATOR;
5744 PL_pending_ident = '#';
463ee0b2 5745 TOKEN(DOLSHARP);
79072805 5746 }
bbce6d69 5747
3280af22 5748 PL_tokenbuf[0] = '$';
376b8730
SM
5749 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5750 sizeof PL_tokenbuf - 1, FALSE);
5751 if (PL_expect == XOPERATOR)
5752 no_op("Scalar", s);
3280af22
NIS
5753 if (!PL_tokenbuf[1]) {
5754 if (s == PL_bufend)
bbce6d69 5755 yyerror("Final $ should be \\$ or $name");
5756 PREREF('$');
8990e307 5757 }
a0d0e21e 5758
bbce6d69 5759 /* This kludge not intended to be bulletproof. */
3280af22 5760 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5761 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5762 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5763 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5764 TERM(THING);
5765 }
5766
ff68c719 5767 d = s;
90771dc0
NC
5768 {
5769 const char tmp = *s;
ae28bb2a 5770 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5771 s = SKIPSPACE1(s);
ff68c719 5772
90771dc0
NC
5773 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5774 && intuit_more(s)) {
5775 if (*s == '[') {
5776 PL_tokenbuf[0] = '@';
5777 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5778 char *t = s+1;
5779
5780 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5781 t++;
90771dc0 5782 if (*t++ == ',') {
29595ff2 5783 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5784 while (t < PL_bufend && *t != ']')
5785 t++;
9014280d 5786 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5787 "Multidimensional syntax %.*s not supported",
36c7798d 5788 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5789 }
748a9306 5790 }
93a17b20 5791 }
90771dc0
NC
5792 else if (*s == '{') {
5793 char *t;
5794 PL_tokenbuf[0] = '%';
5795 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5796 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5797 {
5798 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5799 do {
5800 t++;
5801 } while (isSPACE(*t));
90771dc0 5802 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5803 STRLEN len;
90771dc0 5804 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5805 &len);
c35e046a
AL
5806 while (isSPACE(*t))
5807 t++;
780a5241 5808 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5809 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5810 "You need to quote \"%s\"",
5811 tmpbuf);
5812 }
5813 }
5814 }
93a17b20 5815 }
bbce6d69 5816
90771dc0
NC
5817 PL_expect = XOPERATOR;
5818 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5819 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5820 if (!islop || PL_last_lop_op == OP_GREPSTART)
5821 PL_expect = XOPERATOR;
5822 else if (strchr("$@\"'`q", *s))
5823 PL_expect = XTERM; /* e.g. print $fh "foo" */
5824 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5825 PL_expect = XTERM; /* e.g. print $fh &sub */
5826 else if (isIDFIRST_lazy_if(s,UTF)) {
5827 char tmpbuf[sizeof PL_tokenbuf];
5828 int t2;
5829 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5830 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5831 /* binary operators exclude handle interpretations */
5832 switch (t2) {
5833 case -KEY_x:
5834 case -KEY_eq:
5835 case -KEY_ne:
5836 case -KEY_gt:
5837 case -KEY_lt:
5838 case -KEY_ge:
5839 case -KEY_le:
5840 case -KEY_cmp:
5841 break;
5842 default:
5843 PL_expect = XTERM; /* e.g. print $fh length() */
5844 break;
5845 }
5846 }
5847 else {
5848 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5849 }
5850 }
90771dc0
NC
5851 else if (isDIGIT(*s))
5852 PL_expect = XTERM; /* e.g. print $fh 3 */
5853 else if (*s == '.' && isDIGIT(s[1]))
5854 PL_expect = XTERM; /* e.g. print $fh .3 */
5855 else if ((*s == '?' || *s == '-' || *s == '+')
5856 && !isSPACE(s[1]) && s[1] != '=')
5857 PL_expect = XTERM; /* e.g. print $fh -1 */
5858 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5859 && s[1] != '/')
5860 PL_expect = XTERM; /* e.g. print $fh /.../
5861 XXX except DORDOR operator
5862 */
5863 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5864 && s[2] != '=')
5865 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5866 }
bbce6d69 5867 }
3280af22 5868 PL_pending_ident = '$';
79072805 5869 TOKEN('$');
378cc40b
LW
5870
5871 case '@':
3280af22 5872 if (PL_expect == XOPERATOR)
bbce6d69 5873 no_op("Array", s);
3280af22
NIS
5874 PL_tokenbuf[0] = '@';
5875 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5876 if (!PL_tokenbuf[1]) {
bbce6d69 5877 PREREF('@');
5878 }
3280af22 5879 if (PL_lex_state == LEX_NORMAL)
29595ff2 5880 s = SKIPSPACE1(s);
3280af22 5881 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5882 if (*s == '{')
3280af22 5883 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5884
5885 /* Warn about @ where they meant $. */
041457d9
DM
5886 if (*s == '[' || *s == '{') {
5887 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5888 const char *t = s + 1;
7e2040f0 5889 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5890 t++;
5891 if (*t == '}' || *t == ']') {
5892 t++;
29595ff2 5893 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5894 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5895 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5896 (int)(t-PL_bufptr), PL_bufptr,
5897 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5898 }
93a17b20
LW
5899 }
5900 }
463ee0b2 5901 }
3280af22 5902 PL_pending_ident = '@';
79072805 5903 TERM('@');
378cc40b 5904
c963b151 5905 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5906 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5907 s += 2;
5908 AOPERATOR(DORDOR);
5909 }
c963b151 5910 case '?': /* may either be conditional or pattern */
be25f609 5911 if (PL_expect == XOPERATOR) {
90771dc0 5912 char tmp = *s++;
c963b151 5913 if(tmp == '?') {
be25f609 5914 OPERATOR('?');
c963b151
BD
5915 }
5916 else {
5917 tmp = *s++;
5918 if(tmp == '/') {
5919 /* A // operator. */
5920 AOPERATOR(DORDOR);
5921 }
5922 else {
5923 s--;
5924 Mop(OP_DIVIDE);
5925 }
5926 }
5927 }
5928 else {
5929 /* Disable warning on "study /blah/" */
5930 if (PL_oldoldbufptr == PL_last_uni
5931 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5932 || memNE(PL_last_uni, "study", 5)
5933 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5934 ))
5935 check_uni();
5936 s = scan_pat(s,OP_MATCH);
5937 TERM(sublex_start());
5938 }
378cc40b
LW
5939
5940 case '.':
51882d45
GS
5941 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5942#ifdef PERL_STRICT_CR
5943 && s[1] == '\n'
5944#else
5945 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5946#endif
5947 && (s == PL_linestart || s[-1] == '\n') )
5948 {
3280af22
NIS
5949 PL_lex_formbrack = 0;
5950 PL_expect = XSTATE;
79072805
LW
5951 goto rightbracket;
5952 }
be25f609 5953 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5954 s += 3;
5955 OPERATOR(YADAYADA);
5956 }
3280af22 5957 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5958 char tmp = *s++;
a687059c
LW
5959 if (*s == tmp) {
5960 s++;
2f3197b3
LW
5961 if (*s == tmp) {
5962 s++;
6154021b 5963 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5964 }
5965 else
6154021b 5966 pl_yylval.ival = 0;
378cc40b 5967 OPERATOR(DOTDOT);
a687059c 5968 }
79072805 5969 Aop(OP_CONCAT);
378cc40b
LW
5970 }
5971 /* FALL THROUGH */
5972 case '0': case '1': case '2': case '3': case '4':
5973 case '5': case '6': case '7': case '8': case '9':
6154021b 5974 s = scan_num(s, &pl_yylval);
931e0695 5975 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5976 if (PL_expect == XOPERATOR)
8990e307 5977 no_op("Number",s);
79072805
LW
5978 TERM(THING);
5979
5980 case '\'':
5db06880 5981 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5982 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5983 if (PL_expect == XOPERATOR) {
5984 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5985 return deprecate_commaless_var_list();
a0d0e21e 5986 }
463ee0b2 5987 else
8990e307 5988 no_op("String",s);
463ee0b2 5989 }
79072805 5990 if (!s)
d4c19fe8 5991 missingterm(NULL);
6154021b 5992 pl_yylval.ival = OP_CONST;
79072805
LW
5993 TERM(sublex_start());
5994
5995 case '"':
5db06880 5996 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5997 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5998 if (PL_expect == XOPERATOR) {
5999 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6000 return deprecate_commaless_var_list();
a0d0e21e 6001 }
463ee0b2 6002 else
8990e307 6003 no_op("String",s);
463ee0b2 6004 }
79072805 6005 if (!s)
d4c19fe8 6006 missingterm(NULL);
6154021b 6007 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6008 /* FIXME. I think that this can be const if char *d is replaced by
6009 more localised variables. */
3280af22 6010 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6011 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6012 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6013 break;
6014 }
6015 }
79072805
LW
6016 TERM(sublex_start());
6017
6018 case '`':
5db06880 6019 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6020 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6021 if (PL_expect == XOPERATOR)
8990e307 6022 no_op("Backticks",s);
79072805 6023 if (!s)
d4c19fe8 6024 missingterm(NULL);
9b201d7d 6025 readpipe_override();
79072805
LW
6026 TERM(sublex_start());
6027
6028 case '\\':
6029 s++;
a2a5de95
NC
6030 if (PL_lex_inwhat && isDIGIT(*s))
6031 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6032 *s, *s);
3280af22 6033 if (PL_expect == XOPERATOR)
8990e307 6034 no_op("Backslash",s);
79072805
LW
6035 OPERATOR(REFGEN);
6036
a7cb1f99 6037 case 'v':
e526c9e6 6038 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6039 char *start = s + 2;
dd629d5b 6040 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6041 start++;
6042 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6043 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6044 TERM(THING);
6045 }
e526c9e6 6046 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6047 else if (!isALPHA(*start) && (PL_expect == XTERM
6048 || PL_expect == XREF || PL_expect == XSTATE
6049 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6050 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6051 if (!gv) {
6154021b 6052 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6053 TERM(THING);
6054 }
6055 }
a7cb1f99
GS
6056 }
6057 goto keylookup;
79072805 6058 case 'x':
3280af22 6059 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6060 s++;
6061 Mop(OP_REPEAT);
2f3197b3 6062 }
79072805
LW
6063 goto keylookup;
6064
378cc40b 6065 case '_':
79072805
LW
6066 case 'a': case 'A':
6067 case 'b': case 'B':
6068 case 'c': case 'C':
6069 case 'd': case 'D':
6070 case 'e': case 'E':
6071 case 'f': case 'F':
6072 case 'g': case 'G':
6073 case 'h': case 'H':
6074 case 'i': case 'I':
6075 case 'j': case 'J':
6076 case 'k': case 'K':
6077 case 'l': case 'L':
6078 case 'm': case 'M':
6079 case 'n': case 'N':
6080 case 'o': case 'O':
6081 case 'p': case 'P':
6082 case 'q': case 'Q':
6083 case 'r': case 'R':
6084 case 's': case 'S':
6085 case 't': case 'T':
6086 case 'u': case 'U':
a7cb1f99 6087 case 'V':
79072805
LW
6088 case 'w': case 'W':
6089 case 'X':
6090 case 'y': case 'Y':
6091 case 'z': case 'Z':
6092
49dc05e3 6093 keylookup: {
88e1f1a2 6094 bool anydelim;
90771dc0 6095 I32 tmp;
10edeb5d
JH
6096
6097 orig_keyword = 0;
6098 gv = NULL;
6099 gvp = NULL;
49dc05e3 6100
3280af22
NIS
6101 PL_bufptr = s;
6102 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6103
6104 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6105 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6106 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6107 (PL_tokenbuf[0] == 'q' &&
6108 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6109
6110 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6111 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6112 goto just_a_word;
6113
3643fb5f 6114 d = s;
3280af22 6115 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6116 d++; /* no comments skipped here, or s### is misparsed */
6117
748a9306 6118 /* Is this a word before a => operator? */
1c3923b3 6119 if (*d == '=' && d[1] == '>') {
748a9306 6120 CLINE;
6154021b 6121 pl_yylval.opval
d0a148a6
NC
6122 = (OP*)newSVOP(OP_CONST, 0,
6123 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6124 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6125 TERM(WORD);
6126 }
6127
88e1f1a2
JV
6128 /* Check for plugged-in keyword */
6129 {
6130 OP *o;
6131 int result;
6132 char *saved_bufptr = PL_bufptr;
6133 PL_bufptr = s;
6134 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6135 s = PL_bufptr;
6136 if (result == KEYWORD_PLUGIN_DECLINE) {
6137 /* not a plugged-in keyword */
6138 PL_bufptr = saved_bufptr;
6139 } else if (result == KEYWORD_PLUGIN_STMT) {
6140 pl_yylval.opval = o;
6141 CLINE;
6142 PL_expect = XSTATE;
6143 return REPORT(PLUGSTMT);
6144 } else if (result == KEYWORD_PLUGIN_EXPR) {
6145 pl_yylval.opval = o;
6146 CLINE;
6147 PL_expect = XOPERATOR;
6148 return REPORT(PLUGEXPR);
6149 } else {
6150 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6151 PL_tokenbuf);
6152 }
6153 }
6154
6155 /* Check for built-in keyword */
6156 tmp = keyword(PL_tokenbuf, len, 0);
6157
6158 /* Is this a label? */
6159 if (!anydelim && PL_expect == XSTATE
6160 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6161 s = d + 1;
6162 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6163 CLINE;
6164 TOKEN(LABEL);
6165 }
6166
a0d0e21e 6167 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6168 GV *ogv = NULL; /* override (winner) */
6169 GV *hgv = NULL; /* hidden (loser) */
3280af22 6170 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6171 CV *cv;
90e5519e 6172 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6173 (cv = GvCVu(gv)))
6174 {
6175 if (GvIMPORTED_CV(gv))
6176 ogv = gv;
6177 else if (! CvMETHOD(cv))
6178 hgv = gv;
6179 }
6180 if (!ogv &&
3280af22 6181 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6182 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6183 GvCVu(gv) && GvIMPORTED_CV(gv))
6184 {
6185 ogv = gv;
6186 }
6187 }
6188 if (ogv) {
30fe34ed 6189 orig_keyword = tmp;
56f7f34b 6190 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6191 }
6192 else if (gv && !gvp
6193 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6194 && GvCVu(gv))
6e7b2336
GS
6195 {
6196 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6197 }
56f7f34b
CS
6198 else { /* no override */
6199 tmp = -tmp;
a2a5de95
NC
6200 if (tmp == KEY_dump) {
6201 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6202 "dump() better written as CORE::dump()");
ac206dc8 6203 }
a0714e2c 6204 gv = NULL;
56f7f34b 6205 gvp = 0;
a2a5de95
NC
6206 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6207 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6208 "Ambiguous call resolved as CORE::%s(), "
6209 "qualify as such or use &",
6210 GvENAME(hgv));
49dc05e3 6211 }
a0d0e21e
LW
6212 }
6213
6214 reserved_word:
6215 switch (tmp) {
79072805
LW
6216
6217 default: /* not a keyword */
0bfa2a8a
NC
6218 /* Trade off - by using this evil construction we can pull the
6219 variable gv into the block labelled keylookup. If not, then
6220 we have to give it function scope so that the goto from the
6221 earlier ':' case doesn't bypass the initialisation. */
6222 if (0) {
6223 just_a_word_zero_gv:
6224 gv = NULL;
6225 gvp = NULL;
8bee0991 6226 orig_keyword = 0;
0bfa2a8a 6227 }
93a17b20 6228 just_a_word: {
96e4d5b1 6229 SV *sv;
ce29ac45 6230 int pkgname = 0;
f54cb97a 6231 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6232 OP *rv2cv_op;
5069cc75 6233 CV *cv;
5db06880 6234#ifdef PERL_MAD
cd81e915 6235 SV *nextPL_nextwhite = 0;
5db06880
NC
6236#endif
6237
8990e307
LW
6238
6239 /* Get the rest if it looks like a package qualifier */
6240
155aba94 6241 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6242 STRLEN morelen;
3280af22 6243 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6244 TRUE, &morelen);
6245 if (!morelen)
cea2e8a9 6246 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6247 *s == '\'' ? "'" : "::");
c3e0f903 6248 len += morelen;
ce29ac45 6249 pkgname = 1;
a0d0e21e 6250 }
8990e307 6251
3280af22
NIS
6252 if (PL_expect == XOPERATOR) {
6253 if (PL_bufptr == PL_linestart) {
57843af0 6254 CopLINE_dec(PL_curcop);
f1f66076 6255 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6256 CopLINE_inc(PL_curcop);
463ee0b2
LW
6257 }
6258 else
54310121 6259 no_op("Bareword",s);
463ee0b2 6260 }
8990e307 6261
c3e0f903
GS
6262 /* Look for a subroutine with this name in current package,
6263 unless name is "Foo::", in which case Foo is a bearword
6264 (and a package name). */
6265
5db06880 6266 if (len > 2 && !PL_madskills &&
3280af22 6267 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6268 {
f776e3cd 6269 if (ckWARN(WARN_BAREWORD)
90e5519e 6270 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6271 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6272 "Bareword \"%s\" refers to nonexistent package",
3280af22 6273 PL_tokenbuf);
c3e0f903 6274 len -= 2;
3280af22 6275 PL_tokenbuf[len] = '\0';
a0714e2c 6276 gv = NULL;
c3e0f903
GS
6277 gvp = 0;
6278 }
6279 else {
62d55b22
NC
6280 if (!gv) {
6281 /* Mustn't actually add anything to a symbol table.
6282 But also don't want to "initialise" any placeholder
6283 constants that might already be there into full
6284 blown PVGVs with attached PVCV. */
90e5519e
NC
6285 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6286 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6287 }
b3d904f3 6288 len = 0;
c3e0f903
GS
6289 }
6290
6291 /* if we saw a global override before, get the right name */
8990e307 6292
49dc05e3 6293 if (gvp) {
396482e1 6294 sv = newSVpvs("CORE::GLOBAL::");
3280af22 6295 sv_catpv(sv,PL_tokenbuf);
49dc05e3 6296 }
8a7a129d
NC
6297 else {
6298 /* If len is 0, newSVpv does strlen(), which is correct.
6299 If len is non-zero, then it will be the true length,
6300 and so the scalar will be created correctly. */
6301 sv = newSVpv(PL_tokenbuf,len);
6302 }
5db06880 6303#ifdef PERL_MAD
cd81e915
NC
6304 if (PL_madskills && !PL_thistoken) {
6305 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6306 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6307 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6308 }
6309#endif
8990e307 6310
a0d0e21e
LW
6311 /* Presume this is going to be a bareword of some sort. */
6312
6313 CLINE;
6154021b
RGS
6314 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6315 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
6316 /* UTF-8 package name? */
6317 if (UTF && !IN_BYTES &&
95a20fc0 6318 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 6319 SvUTF8_on(sv);
a0d0e21e 6320
c3e0f903
GS
6321 /* And if "Foo::", then that's what it certainly is. */
6322
6323 if (len)
6324 goto safe_bareword;
6325
f7461760
Z
6326 cv = NULL;
6327 {
6328 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6329 const_op->op_private = OPpCONST_BARE;
6330 rv2cv_op = newCVREF(0, const_op);
6331 }
6332 if (rv2cv_op->op_type == OP_RV2CV &&
6333 (rv2cv_op->op_flags & OPf_KIDS)) {
6334 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6335 switch (rv_op->op_type) {
6336 case OP_CONST: {
6337 SV *sv = cSVOPx_sv(rv_op);
6338 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6339 cv = (CV*)SvRV(sv);
6340 } break;
6341 case OP_GV: {
6342 GV *gv = cGVOPx_gv(rv_op);
6343 CV *maybe_cv = GvCVu(gv);
6344 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6345 cv = maybe_cv;
6346 } break;
6347 }
6348 }
5069cc75 6349
8990e307
LW
6350 /* See if it's the indirect object for a list operator. */
6351
3280af22
NIS
6352 if (PL_oldoldbufptr &&
6353 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6354 (PL_oldoldbufptr == PL_last_lop
6355 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6356 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6357 (PL_expect == XREF ||
6358 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6359 {
748a9306
LW
6360 bool immediate_paren = *s == '(';
6361
a0d0e21e 6362 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6363 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6364#ifdef PERL_MAD
cd81e915 6365 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6366#endif
a0d0e21e
LW
6367
6368 /* Two barewords in a row may indicate method call. */
6369
62d55b22 6370 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6371 (tmp = intuit_method(s, gv, cv))) {
6372 op_free(rv2cv_op);
bbf60fe6 6373 return REPORT(tmp);
f7461760 6374 }
a0d0e21e
LW
6375
6376 /* If not a declared subroutine, it's an indirect object. */
6377 /* (But it's an indir obj regardless for sort.) */
7294df96 6378 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6379
7294df96
RGS
6380 if (
6381 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6382 (!cv &&
a9ef352a 6383 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6384 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6385 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6386 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6387 )
a9ef352a 6388 {
3280af22 6389 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6390 goto bareword;
93a17b20
LW
6391 }
6392 }
8990e307 6393
3280af22 6394 PL_expect = XOPERATOR;
5db06880
NC
6395#ifdef PERL_MAD
6396 if (isSPACE(*s))
cd81e915
NC
6397 s = SKIPSPACE2(s,nextPL_nextwhite);
6398 PL_nextwhite = nextPL_nextwhite;
5db06880 6399#else
8990e307 6400 s = skipspace(s);
5db06880 6401#endif
1c3923b3
GS
6402
6403 /* Is this a word before a => operator? */
ce29ac45 6404 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6405 op_free(rv2cv_op);
1c3923b3 6406 CLINE;
6154021b 6407 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6408 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6409 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6410 TERM(WORD);
6411 }
6412
6413 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6414 if (*s == '(') {
79072805 6415 CLINE;
5069cc75 6416 if (cv) {
c35e046a
AL
6417 d = s + 1;
6418 while (SPACE_OR_TAB(*d))
6419 d++;
f7461760 6420 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6421 s = d + 1;
c631f32b 6422 goto its_constant;
96e4d5b1 6423 }
6424 }
5db06880
NC
6425#ifdef PERL_MAD
6426 if (PL_madskills) {
cd81e915
NC
6427 PL_nextwhite = PL_thiswhite;
6428 PL_thiswhite = 0;
5db06880 6429 }
cd81e915 6430 start_force(PL_curforce);
5db06880 6431#endif
6154021b 6432 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6433 PL_expect = XOPERATOR;
5db06880
NC
6434#ifdef PERL_MAD
6435 if (PL_madskills) {
cd81e915
NC
6436 PL_nextwhite = nextPL_nextwhite;
6437 curmad('X', PL_thistoken);
6b29d1f5 6438 PL_thistoken = newSVpvs("");
5db06880
NC
6439 }
6440#endif
f7461760 6441 op_free(rv2cv_op);
93a17b20 6442 force_next(WORD);
6154021b 6443 pl_yylval.ival = 0;
463ee0b2 6444 TOKEN('&');
79072805 6445 }
93a17b20 6446
a0d0e21e 6447 /* If followed by var or block, call it a method (unless sub) */
8990e307 6448
f7461760
Z
6449 if ((*s == '$' || *s == '{') && !cv) {
6450 op_free(rv2cv_op);
3280af22
NIS
6451 PL_last_lop = PL_oldbufptr;
6452 PL_last_lop_op = OP_METHOD;
93a17b20 6453 PREBLOCK(METHOD);
463ee0b2
LW
6454 }
6455
8990e307
LW
6456 /* If followed by a bareword, see if it looks like indir obj. */
6457
30fe34ed
RGS
6458 if (!orig_keyword
6459 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6460 && (tmp = intuit_method(s, gv, cv))) {
6461 op_free(rv2cv_op);
bbf60fe6 6462 return REPORT(tmp);
f7461760 6463 }
93a17b20 6464
8990e307
LW
6465 /* Not a method, so call it a subroutine (if defined) */
6466
5069cc75 6467 if (cv) {
9b387841
NC
6468 if (lastchar == '-')
6469 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6470 "Ambiguous use of -%s resolved as -&%s()",
6471 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6472 /* Check for a constant sub */
f7461760 6473 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6474 its_constant:
f7461760 6475 op_free(rv2cv_op);
6154021b
RGS
6476 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6477 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6478 pl_yylval.opval->op_private = 0;
96e4d5b1 6479 TOKEN(WORD);
89bfa8cd 6480 }
6481
6154021b 6482 op_free(pl_yylval.opval);
f7461760 6483 pl_yylval.opval = rv2cv_op;
6154021b 6484 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6485 PL_last_lop = PL_oldbufptr;
bf848113 6486 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6487 /* Is there a prototype? */
5db06880
NC
6488 if (
6489#ifdef PERL_MAD
6490 cv &&
6491#endif
d9f2850e
RGS
6492 SvPOK(cv))
6493 {
5f66b61c 6494 STRLEN protolen;
daba3364 6495 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6496 if (!protolen)
4633a7c4 6497 TERM(FUNC0SUB);
8c28b960 6498 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
4633a7c4 6499 OPERATOR(UNIOPSUB);
0f5d0394
AE
6500 while (*proto == ';')
6501 proto++;
7a52d87a 6502 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6503 if (PL_curstash)
6504 sv_setpvs(PL_subname, "__ANON__");
6505 else
6506 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6507 PREBLOCK(LSTOPSUB);
6508 }
a9ef352a 6509 }
5db06880
NC
6510#ifdef PERL_MAD
6511 {
6512 if (PL_madskills) {
cd81e915
NC
6513 PL_nextwhite = PL_thiswhite;
6514 PL_thiswhite = 0;
5db06880 6515 }
cd81e915 6516 start_force(PL_curforce);
6154021b 6517 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6518 PL_expect = XTERM;
6519 if (PL_madskills) {
cd81e915
NC
6520 PL_nextwhite = nextPL_nextwhite;
6521 curmad('X', PL_thistoken);
6b29d1f5 6522 PL_thistoken = newSVpvs("");
5db06880
NC
6523 }
6524 force_next(WORD);
6525 TOKEN(NOAMP);
6526 }
6527 }
6528
6529 /* Guess harder when madskills require "best effort". */
6530 if (PL_madskills && (!gv || !GvCVu(gv))) {
6531 int probable_sub = 0;
6532 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6533 probable_sub = 1;
6534 else if (isALPHA(*s)) {
6535 char tmpbuf[1024];
6536 STRLEN tmplen;
6537 d = s;
6538 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6539 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6540 probable_sub = 1;
6541 else {
6542 while (d < PL_bufend && isSPACE(*d))
6543 d++;
6544 if (*d == '=' && d[1] == '>')
6545 probable_sub = 1;
6546 }
6547 }
6548 if (probable_sub) {
7a6d04f4 6549 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6550 op_free(pl_yylval.opval);
f7461760 6551 pl_yylval.opval = rv2cv_op;
6154021b 6552 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6553 PL_last_lop = PL_oldbufptr;
6554 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6555 PL_nextwhite = PL_thiswhite;
6556 PL_thiswhite = 0;
6557 start_force(PL_curforce);
6154021b 6558 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6559 PL_expect = XTERM;
cd81e915
NC
6560 PL_nextwhite = nextPL_nextwhite;
6561 curmad('X', PL_thistoken);
6b29d1f5 6562 PL_thistoken = newSVpvs("");
5db06880
NC
6563 force_next(WORD);
6564 TOKEN(NOAMP);
6565 }
6566#else
6154021b 6567 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6568 PL_expect = XTERM;
8990e307
LW
6569 force_next(WORD);
6570 TOKEN(NOAMP);
5db06880 6571#endif
8990e307 6572 }
748a9306 6573
8990e307
LW
6574 /* Call it a bare word */
6575
5603f27d 6576 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6577 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6578 else {
9a073a1d
RGS
6579 bareword:
6580 /* after "print" and similar functions (corresponding to
6581 * "F? L" in opcode.pl), whatever wasn't already parsed as
6582 * a filehandle should be subject to "strict subs".
6583 * Likewise for the optional indirect-object argument to system
6584 * or exec, which can't be a bareword */
6585 if ((PL_last_lop_op == OP_PRINT
6586 || PL_last_lop_op == OP_PRTF
6587 || PL_last_lop_op == OP_SAY
6588 || PL_last_lop_op == OP_SYSTEM
6589 || PL_last_lop_op == OP_EXEC)
6590 && (PL_hints & HINT_STRICT_SUBS))
6591 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6592 if (lastchar != '-') {
6593 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6594 d = PL_tokenbuf;
6595 while (isLOWER(*d))
6596 d++;
da51bb9b 6597 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6598 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6599 PL_tokenbuf);
6600 }
748a9306
LW
6601 }
6602 }
f7461760 6603 op_free(rv2cv_op);
c3e0f903
GS
6604
6605 safe_bareword:
9b387841
NC
6606 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6607 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6608 "Operator or semicolon missing before %c%s",
6609 lastchar, PL_tokenbuf);
6610 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6611 "Ambiguous use of %c resolved as operator %c",
6612 lastchar, lastchar);
748a9306 6613 }
93a17b20 6614 TOKEN(WORD);
79072805 6615 }
79072805 6616
68dc0745 6617 case KEY___FILE__:
6154021b 6618 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6619 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6620 TERM(THING);
6621
79072805 6622 case KEY___LINE__:
6154021b 6623 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6624 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6625 TERM(THING);
68dc0745 6626
6627 case KEY___PACKAGE__:
6154021b 6628 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6629 (PL_curstash
5aaec2b4 6630 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6631 : &PL_sv_undef));
79072805 6632 TERM(THING);
79072805 6633
e50aee73 6634 case KEY___DATA__:
79072805
LW
6635 case KEY___END__: {
6636 GV *gv;
3280af22 6637 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6638 const char *pname = "main";
3280af22 6639 if (PL_tokenbuf[2] == 'D')
bfcb3514 6640 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6641 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6642 SVt_PVIO);
a5f75d66 6643 GvMULTI_on(gv);
79072805 6644 if (!GvIO(gv))
a0d0e21e 6645 GvIOp(gv) = newIO();
3280af22 6646 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6647#if defined(HAS_FCNTL) && defined(F_SETFD)
6648 {
f54cb97a 6649 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6650 fcntl(fd,F_SETFD,fd >= 3);
6651 }
79072805 6652#endif
fd049845 6653 /* Mark this internal pseudo-handle as clean */
6654 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6655 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6656 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6657 else
50952442 6658 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6659#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6660 /* if the script was opened in binmode, we need to revert
53129d29 6661 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6662 * XXX this is a questionable hack at best. */
53129d29
GS
6663 if (PL_bufend-PL_bufptr > 2
6664 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6665 {
6666 Off_t loc = 0;
50952442 6667 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6668 loc = PerlIO_tell(PL_rsfp);
6669 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6670 }
2986a63f
JH
6671#ifdef NETWARE
6672 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6673#else
c39cd008 6674 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6675#endif /* NETWARE */
1143fce0
JH
6676#ifdef PERLIO_IS_STDIO /* really? */
6677# if defined(__BORLANDC__)
cb359b41
JH
6678 /* XXX see note in do_binmode() */
6679 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6680# endif
6681#endif
c39cd008
GS
6682 if (loc > 0)
6683 PerlIO_seek(PL_rsfp, loc, 0);
6684 }
6685 }
6686#endif
7948272d 6687#ifdef PERLIO_LAYERS
52d2e0f4
JH
6688 if (!IN_BYTES) {
6689 if (UTF)
6690 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6691 else if (PL_encoding) {
6692 SV *name;
6693 dSP;
6694 ENTER;
6695 SAVETMPS;
6696 PUSHMARK(sp);
6697 EXTEND(SP, 1);
6698 XPUSHs(PL_encoding);
6699 PUTBACK;
6700 call_method("name", G_SCALAR);
6701 SPAGAIN;
6702 name = POPs;
6703 PUTBACK;
bfed75c6 6704 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6705 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6706 SVfARG(name)));
52d2e0f4
JH
6707 FREETMPS;
6708 LEAVE;
6709 }
6710 }
7948272d 6711#endif
5db06880
NC
6712#ifdef PERL_MAD
6713 if (PL_madskills) {
cd81e915
NC
6714 if (PL_realtokenstart >= 0) {
6715 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6716 if (!PL_endwhite)
6b29d1f5 6717 PL_endwhite = newSVpvs("");
cd81e915
NC
6718 sv_catsv(PL_endwhite, PL_thiswhite);
6719 PL_thiswhite = 0;
6720 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6721 PL_realtokenstart = -1;
5db06880 6722 }
5cc814fd
NC
6723 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6724 != NULL) ;
5db06880
NC
6725 }
6726#endif
4608196e 6727 PL_rsfp = NULL;
79072805
LW
6728 }
6729 goto fake_eof;
e929a76b 6730 }
de3bb511 6731
8990e307 6732 case KEY_AUTOLOAD:
ed6116ce 6733 case KEY_DESTROY:
79072805 6734 case KEY_BEGIN:
3c10abe3 6735 case KEY_UNITCHECK:
7d30b5c4 6736 case KEY_CHECK:
7d07dbc2 6737 case KEY_INIT:
7d30b5c4 6738 case KEY_END:
3280af22
NIS
6739 if (PL_expect == XSTATE) {
6740 s = PL_bufptr;
93a17b20 6741 goto really_sub;
79072805
LW
6742 }
6743 goto just_a_word;
6744
a0d0e21e
LW
6745 case KEY_CORE:
6746 if (*s == ':' && s[1] == ':') {
6747 s += 2;
748a9306 6748 d = s;
3280af22 6749 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6750 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6751 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6752 if (tmp < 0)
6753 tmp = -tmp;
850e8516 6754 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6755 /* that's a way to remember we saw "CORE::" */
850e8516 6756 orig_keyword = tmp;
a0d0e21e
LW
6757 goto reserved_word;
6758 }
6759 goto just_a_word;
6760
463ee0b2
LW
6761 case KEY_abs:
6762 UNI(OP_ABS);
6763
79072805
LW
6764 case KEY_alarm:
6765 UNI(OP_ALARM);
6766
6767 case KEY_accept:
a0d0e21e 6768 LOP(OP_ACCEPT,XTERM);
79072805 6769
463ee0b2
LW
6770 case KEY_and:
6771 OPERATOR(ANDOP);
6772
79072805 6773 case KEY_atan2:
a0d0e21e 6774 LOP(OP_ATAN2,XTERM);
85e6fe83 6775
79072805 6776 case KEY_bind:
a0d0e21e 6777 LOP(OP_BIND,XTERM);
79072805
LW
6778
6779 case KEY_binmode:
1c1fc3ea 6780 LOP(OP_BINMODE,XTERM);
79072805
LW
6781
6782 case KEY_bless:
a0d0e21e 6783 LOP(OP_BLESS,XTERM);
79072805 6784
0d863452
RH
6785 case KEY_break:
6786 FUN0(OP_BREAK);
6787
79072805
LW
6788 case KEY_chop:
6789 UNI(OP_CHOP);
6790
6791 case KEY_continue:
0d863452
RH
6792 /* When 'use switch' is in effect, continue has a dual
6793 life as a control operator. */
6794 {
ef89dcc3 6795 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6796 PREBLOCK(CONTINUE);
6797 else {
6798 /* We have to disambiguate the two senses of
6799 "continue". If the next token is a '{' then
6800 treat it as the start of a continue block;
6801 otherwise treat it as a control operator.
6802 */
6803 s = skipspace(s);
6804 if (*s == '{')
79072805 6805 PREBLOCK(CONTINUE);
0d863452
RH
6806 else
6807 FUN0(OP_CONTINUE);
6808 }
6809 }
79072805
LW
6810
6811 case KEY_chdir:
fafc274c
NC
6812 /* may use HOME */
6813 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6814 UNI(OP_CHDIR);
6815
6816 case KEY_close:
6817 UNI(OP_CLOSE);
6818
6819 case KEY_closedir:
6820 UNI(OP_CLOSEDIR);
6821
6822 case KEY_cmp:
6823 Eop(OP_SCMP);
6824
6825 case KEY_caller:
6826 UNI(OP_CALLER);
6827
6828 case KEY_crypt:
6829#ifdef FCRYPT
f4c556ac
GS
6830 if (!PL_cryptseen) {
6831 PL_cryptseen = TRUE;
de3bb511 6832 init_des();
f4c556ac 6833 }
a687059c 6834#endif
a0d0e21e 6835 LOP(OP_CRYPT,XTERM);
79072805
LW
6836
6837 case KEY_chmod:
a0d0e21e 6838 LOP(OP_CHMOD,XTERM);
79072805
LW
6839
6840 case KEY_chown:
a0d0e21e 6841 LOP(OP_CHOWN,XTERM);
79072805
LW
6842
6843 case KEY_connect:
a0d0e21e 6844 LOP(OP_CONNECT,XTERM);
79072805 6845
463ee0b2
LW
6846 case KEY_chr:
6847 UNI(OP_CHR);
6848
79072805
LW
6849 case KEY_cos:
6850 UNI(OP_COS);
6851
6852 case KEY_chroot:
6853 UNI(OP_CHROOT);
6854
0d863452
RH
6855 case KEY_default:
6856 PREBLOCK(DEFAULT);
6857
79072805 6858 case KEY_do:
29595ff2 6859 s = SKIPSPACE1(s);
79072805 6860 if (*s == '{')
a0d0e21e 6861 PRETERMBLOCK(DO);
79072805 6862 if (*s != '\'')
89c5585f 6863 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6864 if (orig_keyword == KEY_do) {
6865 orig_keyword = 0;
6154021b 6866 pl_yylval.ival = 1;
850e8516
RGS
6867 }
6868 else
6154021b 6869 pl_yylval.ival = 0;
378cc40b 6870 OPERATOR(DO);
79072805
LW
6871
6872 case KEY_die:
3280af22 6873 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6874 LOP(OP_DIE,XTERM);
79072805
LW
6875
6876 case KEY_defined:
6877 UNI(OP_DEFINED);
6878
6879 case KEY_delete:
a0d0e21e 6880 UNI(OP_DELETE);
79072805
LW
6881
6882 case KEY_dbmopen:
5c1737d1 6883 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6884 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6885
6886 case KEY_dbmclose:
6887 UNI(OP_DBMCLOSE);
6888
6889 case KEY_dump:
a0d0e21e 6890 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6891 LOOPX(OP_DUMP);
6892
6893 case KEY_else:
6894 PREBLOCK(ELSE);
6895
6896 case KEY_elsif:
6154021b 6897 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6898 OPERATOR(ELSIF);
6899
6900 case KEY_eq:
6901 Eop(OP_SEQ);
6902
a0d0e21e
LW
6903 case KEY_exists:
6904 UNI(OP_EXISTS);
4e553d73 6905
79072805 6906 case KEY_exit:
5db06880
NC
6907 if (PL_madskills)
6908 UNI(OP_INT);
79072805
LW
6909 UNI(OP_EXIT);
6910
6911 case KEY_eval:
29595ff2 6912 s = SKIPSPACE1(s);
32e2a35d
RGS
6913 if (*s == '{') { /* block eval */
6914 PL_expect = XTERMBLOCK;
6915 UNIBRACK(OP_ENTERTRY);
6916 }
6917 else { /* string eval */
6918 PL_expect = XTERM;
6919 UNIBRACK(OP_ENTEREVAL);
6920 }
79072805
LW
6921
6922 case KEY_eof:
6923 UNI(OP_EOF);
6924
6925 case KEY_exp:
6926 UNI(OP_EXP);
6927
6928 case KEY_each:
6929 UNI(OP_EACH);
6930
6931 case KEY_exec:
a0d0e21e 6932 LOP(OP_EXEC,XREF);
79072805
LW
6933
6934 case KEY_endhostent:
6935 FUN0(OP_EHOSTENT);
6936
6937 case KEY_endnetent:
6938 FUN0(OP_ENETENT);
6939
6940 case KEY_endservent:
6941 FUN0(OP_ESERVENT);
6942
6943 case KEY_endprotoent:
6944 FUN0(OP_EPROTOENT);
6945
6946 case KEY_endpwent:
6947 FUN0(OP_EPWENT);
6948
6949 case KEY_endgrent:
6950 FUN0(OP_EGRENT);
6951
6952 case KEY_for:
6953 case KEY_foreach:
6154021b 6954 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6955 s = SKIPSPACE1(s);
7e2040f0 6956 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6957 char *p = s;
5db06880
NC
6958#ifdef PERL_MAD
6959 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6960#endif
6961
3280af22 6962 if ((PL_bufend - p) >= 3 &&
55497cff 6963 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6964 p += 2;
77ca0c92
LW
6965 else if ((PL_bufend - p) >= 4 &&
6966 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6967 p += 3;
29595ff2 6968 p = PEEKSPACE(p);
7e2040f0 6969 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6970 p = scan_ident(p, PL_bufend,
6971 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6972 p = PEEKSPACE(p);
77ca0c92
LW
6973 }
6974 if (*p != '$')
cea2e8a9 6975 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6976#ifdef PERL_MAD
6977 s = SvPVX(PL_linestr) + soff;
6978#endif
55497cff 6979 }
79072805
LW
6980 OPERATOR(FOR);
6981
6982 case KEY_formline:
a0d0e21e 6983 LOP(OP_FORMLINE,XTERM);
79072805
LW
6984
6985 case KEY_fork:
6986 FUN0(OP_FORK);
6987
6988 case KEY_fcntl:
a0d0e21e 6989 LOP(OP_FCNTL,XTERM);
79072805
LW
6990
6991 case KEY_fileno:
6992 UNI(OP_FILENO);
6993
6994 case KEY_flock:
a0d0e21e 6995 LOP(OP_FLOCK,XTERM);
79072805
LW
6996
6997 case KEY_gt:
6998 Rop(OP_SGT);
6999
7000 case KEY_ge:
7001 Rop(OP_SGE);
7002
7003 case KEY_grep:
2c38e13d 7004 LOP(OP_GREPSTART, XREF);
79072805
LW
7005
7006 case KEY_goto:
a0d0e21e 7007 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7008 LOOPX(OP_GOTO);
7009
7010 case KEY_gmtime:
7011 UNI(OP_GMTIME);
7012
7013 case KEY_getc:
6f33ba73 7014 UNIDOR(OP_GETC);
79072805
LW
7015
7016 case KEY_getppid:
7017 FUN0(OP_GETPPID);
7018
7019 case KEY_getpgrp:
7020 UNI(OP_GETPGRP);
7021
7022 case KEY_getpriority:
a0d0e21e 7023 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7024
7025 case KEY_getprotobyname:
7026 UNI(OP_GPBYNAME);
7027
7028 case KEY_getprotobynumber:
a0d0e21e 7029 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7030
7031 case KEY_getprotoent:
7032 FUN0(OP_GPROTOENT);
7033
7034 case KEY_getpwent:
7035 FUN0(OP_GPWENT);
7036
7037 case KEY_getpwnam:
ff68c719 7038 UNI(OP_GPWNAM);
79072805
LW
7039
7040 case KEY_getpwuid:
ff68c719 7041 UNI(OP_GPWUID);
79072805
LW
7042
7043 case KEY_getpeername:
7044 UNI(OP_GETPEERNAME);
7045
7046 case KEY_gethostbyname:
7047 UNI(OP_GHBYNAME);
7048
7049 case KEY_gethostbyaddr:
a0d0e21e 7050 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7051
7052 case KEY_gethostent:
7053 FUN0(OP_GHOSTENT);
7054
7055 case KEY_getnetbyname:
7056 UNI(OP_GNBYNAME);
7057
7058 case KEY_getnetbyaddr:
a0d0e21e 7059 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7060
7061 case KEY_getnetent:
7062 FUN0(OP_GNETENT);
7063
7064 case KEY_getservbyname:
a0d0e21e 7065 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7066
7067 case KEY_getservbyport:
a0d0e21e 7068 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7069
7070 case KEY_getservent:
7071 FUN0(OP_GSERVENT);
7072
7073 case KEY_getsockname:
7074 UNI(OP_GETSOCKNAME);
7075
7076 case KEY_getsockopt:
a0d0e21e 7077 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7078
7079 case KEY_getgrent:
7080 FUN0(OP_GGRENT);
7081
7082 case KEY_getgrnam:
ff68c719 7083 UNI(OP_GGRNAM);
79072805
LW
7084
7085 case KEY_getgrgid:
ff68c719 7086 UNI(OP_GGRGID);
79072805
LW
7087
7088 case KEY_getlogin:
7089 FUN0(OP_GETLOGIN);
7090
0d863452 7091 case KEY_given:
6154021b 7092 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7093 OPERATOR(GIVEN);
7094
93a17b20 7095 case KEY_glob:
a0d0e21e 7096 LOP(OP_GLOB,XTERM);
93a17b20 7097
79072805
LW
7098 case KEY_hex:
7099 UNI(OP_HEX);
7100
7101 case KEY_if:
6154021b 7102 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7103 OPERATOR(IF);
7104
7105 case KEY_index:
a0d0e21e 7106 LOP(OP_INDEX,XTERM);
79072805
LW
7107
7108 case KEY_int:
7109 UNI(OP_INT);
7110
7111 case KEY_ioctl:
a0d0e21e 7112 LOP(OP_IOCTL,XTERM);
79072805
LW
7113
7114 case KEY_join:
a0d0e21e 7115 LOP(OP_JOIN,XTERM);
79072805
LW
7116
7117 case KEY_keys:
7118 UNI(OP_KEYS);
7119
7120 case KEY_kill:
a0d0e21e 7121 LOP(OP_KILL,XTERM);
79072805
LW
7122
7123 case KEY_last:
a0d0e21e 7124 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7125 LOOPX(OP_LAST);
4e553d73 7126
79072805
LW
7127 case KEY_lc:
7128 UNI(OP_LC);
7129
7130 case KEY_lcfirst:
7131 UNI(OP_LCFIRST);
7132
7133 case KEY_local:
6154021b 7134 pl_yylval.ival = 0;
79072805
LW
7135 OPERATOR(LOCAL);
7136
7137 case KEY_length:
7138 UNI(OP_LENGTH);
7139
7140 case KEY_lt:
7141 Rop(OP_SLT);
7142
7143 case KEY_le:
7144 Rop(OP_SLE);
7145
7146 case KEY_localtime:
7147 UNI(OP_LOCALTIME);
7148
7149 case KEY_log:
7150 UNI(OP_LOG);
7151
7152 case KEY_link:
a0d0e21e 7153 LOP(OP_LINK,XTERM);
79072805
LW
7154
7155 case KEY_listen:
a0d0e21e 7156 LOP(OP_LISTEN,XTERM);
79072805 7157
c0329465
MB
7158 case KEY_lock:
7159 UNI(OP_LOCK);
7160
79072805
LW
7161 case KEY_lstat:
7162 UNI(OP_LSTAT);
7163
7164 case KEY_m:
8782bef2 7165 s = scan_pat(s,OP_MATCH);
79072805
LW
7166 TERM(sublex_start());
7167
a0d0e21e 7168 case KEY_map:
2c38e13d 7169 LOP(OP_MAPSTART, XREF);
4e4e412b 7170
79072805 7171 case KEY_mkdir:
a0d0e21e 7172 LOP(OP_MKDIR,XTERM);
79072805
LW
7173
7174 case KEY_msgctl:
a0d0e21e 7175 LOP(OP_MSGCTL,XTERM);
79072805
LW
7176
7177 case KEY_msgget:
a0d0e21e 7178 LOP(OP_MSGGET,XTERM);
79072805
LW
7179
7180 case KEY_msgrcv:
a0d0e21e 7181 LOP(OP_MSGRCV,XTERM);
79072805
LW
7182
7183 case KEY_msgsnd:
a0d0e21e 7184 LOP(OP_MSGSND,XTERM);
79072805 7185
77ca0c92 7186 case KEY_our:
93a17b20 7187 case KEY_my:
952306ac 7188 case KEY_state:
eac04b2e 7189 PL_in_my = (U16)tmp;
29595ff2 7190 s = SKIPSPACE1(s);
7e2040f0 7191 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7192#ifdef PERL_MAD
7193 char* start = s;
7194#endif
3280af22 7195 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7196 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7197 goto really_sub;
def3634b 7198 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7199 if (!PL_in_my_stash) {
c750a3ec 7200 char tmpbuf[1024];
3280af22 7201 PL_bufptr = s;
d9fad198 7202 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7203 yyerror(tmpbuf);
7204 }
5db06880
NC
7205#ifdef PERL_MAD
7206 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7207 sv_catsv(PL_thistoken, PL_nextwhite);
7208 PL_nextwhite = 0;
7209 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7210 }
7211#endif
c750a3ec 7212 }
6154021b 7213 pl_yylval.ival = 1;
55497cff 7214 OPERATOR(MY);
93a17b20 7215
79072805 7216 case KEY_next:
a0d0e21e 7217 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7218 LOOPX(OP_NEXT);
7219
7220 case KEY_ne:
7221 Eop(OP_SNE);
7222
a0d0e21e 7223 case KEY_no:
468aa647 7224 s = tokenize_use(0, s);
a0d0e21e
LW
7225 OPERATOR(USE);
7226
7227 case KEY_not:
29595ff2 7228 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7229 FUN1(OP_NOT);
7230 else
7231 OPERATOR(NOTOP);
a0d0e21e 7232
79072805 7233 case KEY_open:
29595ff2 7234 s = SKIPSPACE1(s);
7e2040f0 7235 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7236 const char *t;
c35e046a
AL
7237 for (d = s; isALNUM_lazy_if(d,UTF);)
7238 d++;
7239 for (t=d; isSPACE(*t);)
7240 t++;
e2ab214b 7241 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7242 /* [perl #16184] */
7243 && !(t[0] == '=' && t[1] == '>')
7244 ) {
5f66b61c 7245 int parms_len = (int)(d-s);
9014280d 7246 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7247 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7248 parms_len, s, parms_len, s);
66fbe8fb 7249 }
93a17b20 7250 }
a0d0e21e 7251 LOP(OP_OPEN,XTERM);
79072805 7252
463ee0b2 7253 case KEY_or:
6154021b 7254 pl_yylval.ival = OP_OR;
463ee0b2
LW
7255 OPERATOR(OROP);
7256
79072805
LW
7257 case KEY_ord:
7258 UNI(OP_ORD);
7259
7260 case KEY_oct:
7261 UNI(OP_OCT);
7262
7263 case KEY_opendir:
a0d0e21e 7264 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7265
7266 case KEY_print:
3280af22 7267 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7268 LOP(OP_PRINT,XREF);
79072805
LW
7269
7270 case KEY_printf:
3280af22 7271 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7272 LOP(OP_PRTF,XREF);
79072805 7273
c07a80fd 7274 case KEY_prototype:
7275 UNI(OP_PROTOTYPE);
7276
79072805 7277 case KEY_push:
a0d0e21e 7278 LOP(OP_PUSH,XTERM);
79072805
LW
7279
7280 case KEY_pop:
6f33ba73 7281 UNIDOR(OP_POP);
79072805 7282
a0d0e21e 7283 case KEY_pos:
6f33ba73 7284 UNIDOR(OP_POS);
4e553d73 7285
79072805 7286 case KEY_pack:
a0d0e21e 7287 LOP(OP_PACK,XTERM);
79072805
LW
7288
7289 case KEY_package:
a0d0e21e 7290 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7291 s = SKIPSPACE1(s);
91152fc1 7292 s = force_strict_version(s);
4e4da3ac 7293 PL_lex_expect = XBLOCK;
79072805
LW
7294 OPERATOR(PACKAGE);
7295
7296 case KEY_pipe:
a0d0e21e 7297 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7298
7299 case KEY_q:
5db06880 7300 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7301 if (!s)
d4c19fe8 7302 missingterm(NULL);
6154021b 7303 pl_yylval.ival = OP_CONST;
79072805
LW
7304 TERM(sublex_start());
7305
a0d0e21e
LW
7306 case KEY_quotemeta:
7307 UNI(OP_QUOTEMETA);
7308
8990e307 7309 case KEY_qw:
5db06880 7310 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7311 if (!s)
d4c19fe8 7312 missingterm(NULL);
3480a8d2 7313 PL_expect = XOPERATOR;
8127e0e3
GS
7314 force_next(')');
7315 if (SvCUR(PL_lex_stuff)) {
5f66b61c 7316 OP *words = NULL;
8127e0e3 7317 int warned = 0;
3280af22 7318 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7319 while (len) {
d4c19fe8
AL
7320 for (; isSPACE(*d) && len; --len, ++d)
7321 /**/;
8127e0e3 7322 if (len) {
d4c19fe8 7323 SV *sv;
f54cb97a 7324 const char *b = d;
e476b1b5 7325 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7326 for (; !isSPACE(*d) && len; --len, ++d) {
7327 if (*d == ',') {
9014280d 7328 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7329 "Possible attempt to separate words with commas");
7330 ++warned;
7331 }
7332 else if (*d == '#') {
9014280d 7333 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7334 "Possible attempt to put comments in qw() list");
7335 ++warned;
7336 }
7337 }
7338 }
7339 else {
d4c19fe8
AL
7340 for (; !isSPACE(*d) && len; --len, ++d)
7341 /**/;
8127e0e3 7342 }
740cce10 7343 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7344 words = append_elem(OP_LIST, words,
7948272d 7345 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7346 }
7347 }
8127e0e3 7348 if (words) {
cd81e915 7349 start_force(PL_curforce);
9ded7720 7350 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7351 force_next(THING);
7352 }
55497cff 7353 }
37fd879b 7354 if (PL_lex_stuff) {
8127e0e3 7355 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7356 PL_lex_stuff = NULL;
37fd879b 7357 }
3280af22 7358 PL_expect = XTERM;
8127e0e3 7359 TOKEN('(');
8990e307 7360
79072805 7361 case KEY_qq:
5db06880 7362 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7363 if (!s)
d4c19fe8 7364 missingterm(NULL);
6154021b 7365 pl_yylval.ival = OP_STRINGIFY;
3280af22 7366 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7367 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7368 TERM(sublex_start());
7369
8782bef2
GB
7370 case KEY_qr:
7371 s = scan_pat(s,OP_QR);
7372 TERM(sublex_start());
7373
79072805 7374 case KEY_qx:
5db06880 7375 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7376 if (!s)
d4c19fe8 7377 missingterm(NULL);
9b201d7d 7378 readpipe_override();
79072805
LW
7379 TERM(sublex_start());
7380
7381 case KEY_return:
7382 OLDLOP(OP_RETURN);
7383
7384 case KEY_require:
29595ff2 7385 s = SKIPSPACE1(s);
e759cc13
RGS
7386 if (isDIGIT(*s)) {
7387 s = force_version(s, FALSE);
a7cb1f99 7388 }
e759cc13
RGS
7389 else if (*s != 'v' || !isDIGIT(s[1])
7390 || (s = force_version(s, TRUE), *s == 'v'))
7391 {
a7cb1f99
GS
7392 *PL_tokenbuf = '\0';
7393 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7394 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7395 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7396 else if (*s == '<')
7397 yyerror("<> should be quotes");
7398 }
a72a1c8b
RGS
7399 if (orig_keyword == KEY_require) {
7400 orig_keyword = 0;
6154021b 7401 pl_yylval.ival = 1;
a72a1c8b
RGS
7402 }
7403 else
6154021b 7404 pl_yylval.ival = 0;
a72a1c8b
RGS
7405 PL_expect = XTERM;
7406 PL_bufptr = s;
7407 PL_last_uni = PL_oldbufptr;
7408 PL_last_lop_op = OP_REQUIRE;
7409 s = skipspace(s);
7410 return REPORT( (int)REQUIRE );
79072805
LW
7411
7412 case KEY_reset:
7413 UNI(OP_RESET);
7414
7415 case KEY_redo:
a0d0e21e 7416 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7417 LOOPX(OP_REDO);
7418
7419 case KEY_rename:
a0d0e21e 7420 LOP(OP_RENAME,XTERM);
79072805
LW
7421
7422 case KEY_rand:
7423 UNI(OP_RAND);
7424
7425 case KEY_rmdir:
7426 UNI(OP_RMDIR);
7427
7428 case KEY_rindex:
a0d0e21e 7429 LOP(OP_RINDEX,XTERM);
79072805
LW
7430
7431 case KEY_read:
a0d0e21e 7432 LOP(OP_READ,XTERM);
79072805
LW
7433
7434 case KEY_readdir:
7435 UNI(OP_READDIR);
7436
93a17b20 7437 case KEY_readline:
6f33ba73 7438 UNIDOR(OP_READLINE);
93a17b20
LW
7439
7440 case KEY_readpipe:
0858480c 7441 UNIDOR(OP_BACKTICK);
93a17b20 7442
79072805
LW
7443 case KEY_rewinddir:
7444 UNI(OP_REWINDDIR);
7445
7446 case KEY_recv:
a0d0e21e 7447 LOP(OP_RECV,XTERM);
79072805
LW
7448
7449 case KEY_reverse:
a0d0e21e 7450 LOP(OP_REVERSE,XTERM);
79072805
LW
7451
7452 case KEY_readlink:
6f33ba73 7453 UNIDOR(OP_READLINK);
79072805
LW
7454
7455 case KEY_ref:
7456 UNI(OP_REF);
7457
7458 case KEY_s:
7459 s = scan_subst(s);
6154021b 7460 if (pl_yylval.opval)
79072805
LW
7461 TERM(sublex_start());
7462 else
7463 TOKEN(1); /* force error */
7464
0d863452
RH
7465 case KEY_say:
7466 checkcomma(s,PL_tokenbuf,"filehandle");
7467 LOP(OP_SAY,XREF);
7468
a0d0e21e
LW
7469 case KEY_chomp:
7470 UNI(OP_CHOMP);
4e553d73 7471
79072805
LW
7472 case KEY_scalar:
7473 UNI(OP_SCALAR);
7474
7475 case KEY_select:
a0d0e21e 7476 LOP(OP_SELECT,XTERM);
79072805
LW
7477
7478 case KEY_seek:
a0d0e21e 7479 LOP(OP_SEEK,XTERM);
79072805
LW
7480
7481 case KEY_semctl:
a0d0e21e 7482 LOP(OP_SEMCTL,XTERM);
79072805
LW
7483
7484 case KEY_semget:
a0d0e21e 7485 LOP(OP_SEMGET,XTERM);
79072805
LW
7486
7487 case KEY_semop:
a0d0e21e 7488 LOP(OP_SEMOP,XTERM);
79072805
LW
7489
7490 case KEY_send:
a0d0e21e 7491 LOP(OP_SEND,XTERM);
79072805
LW
7492
7493 case KEY_setpgrp:
a0d0e21e 7494 LOP(OP_SETPGRP,XTERM);
79072805
LW
7495
7496 case KEY_setpriority:
a0d0e21e 7497 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7498
7499 case KEY_sethostent:
ff68c719 7500 UNI(OP_SHOSTENT);
79072805
LW
7501
7502 case KEY_setnetent:
ff68c719 7503 UNI(OP_SNETENT);
79072805
LW
7504
7505 case KEY_setservent:
ff68c719 7506 UNI(OP_SSERVENT);
79072805
LW
7507
7508 case KEY_setprotoent:
ff68c719 7509 UNI(OP_SPROTOENT);
79072805
LW
7510
7511 case KEY_setpwent:
7512 FUN0(OP_SPWENT);
7513
7514 case KEY_setgrent:
7515 FUN0(OP_SGRENT);
7516
7517 case KEY_seekdir:
a0d0e21e 7518 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7519
7520 case KEY_setsockopt:
a0d0e21e 7521 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7522
7523 case KEY_shift:
6f33ba73 7524 UNIDOR(OP_SHIFT);
79072805
LW
7525
7526 case KEY_shmctl:
a0d0e21e 7527 LOP(OP_SHMCTL,XTERM);
79072805
LW
7528
7529 case KEY_shmget:
a0d0e21e 7530 LOP(OP_SHMGET,XTERM);
79072805
LW
7531
7532 case KEY_shmread:
a0d0e21e 7533 LOP(OP_SHMREAD,XTERM);
79072805
LW
7534
7535 case KEY_shmwrite:
a0d0e21e 7536 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7537
7538 case KEY_shutdown:
a0d0e21e 7539 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7540
7541 case KEY_sin:
7542 UNI(OP_SIN);
7543
7544 case KEY_sleep:
7545 UNI(OP_SLEEP);
7546
7547 case KEY_socket:
a0d0e21e 7548 LOP(OP_SOCKET,XTERM);
79072805
LW
7549
7550 case KEY_socketpair:
a0d0e21e 7551 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7552
7553 case KEY_sort:
3280af22 7554 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7555 s = SKIPSPACE1(s);
79072805 7556 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7557 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7558 PL_expect = XTERM;
15f0808c 7559 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7560 LOP(OP_SORT,XREF);
79072805
LW
7561
7562 case KEY_split:
a0d0e21e 7563 LOP(OP_SPLIT,XTERM);
79072805
LW
7564
7565 case KEY_sprintf:
a0d0e21e 7566 LOP(OP_SPRINTF,XTERM);
79072805
LW
7567
7568 case KEY_splice:
a0d0e21e 7569 LOP(OP_SPLICE,XTERM);
79072805
LW
7570
7571 case KEY_sqrt:
7572 UNI(OP_SQRT);
7573
7574 case KEY_srand:
7575 UNI(OP_SRAND);
7576
7577 case KEY_stat:
7578 UNI(OP_STAT);
7579
7580 case KEY_study:
79072805
LW
7581 UNI(OP_STUDY);
7582
7583 case KEY_substr:
a0d0e21e 7584 LOP(OP_SUBSTR,XTERM);
79072805
LW
7585
7586 case KEY_format:
7587 case KEY_sub:
93a17b20 7588 really_sub:
09bef843 7589 {
3280af22 7590 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7591 SSize_t tboffset = 0;
09bef843 7592 expectation attrful;
28cc6278 7593 bool have_name, have_proto;
f54cb97a 7594 const int key = tmp;
09bef843 7595
5db06880
NC
7596#ifdef PERL_MAD
7597 SV *tmpwhite = 0;
7598
cd81e915 7599 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7600 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7601 PL_thistoken = 0;
5db06880
NC
7602
7603 d = s;
7604 s = SKIPSPACE2(s,tmpwhite);
7605#else
09bef843 7606 s = skipspace(s);
5db06880 7607#endif
09bef843 7608
7e2040f0 7609 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7610 (*s == ':' && s[1] == ':'))
7611 {
5db06880 7612#ifdef PERL_MAD
4f61fd4b 7613 SV *nametoke = NULL;
5db06880
NC
7614#endif
7615
09bef843
SB
7616 PL_expect = XBLOCK;
7617 attrful = XATTRBLOCK;
b1b65b59
JH
7618 /* remember buffer pos'n for later force_word */
7619 tboffset = s - PL_oldbufptr;
09bef843 7620 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7621#ifdef PERL_MAD
7622 if (PL_madskills)
7623 nametoke = newSVpvn(s, d - s);
7624#endif
6502358f
NC
7625 if (memchr(tmpbuf, ':', len))
7626 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7627 else {
7628 sv_setsv(PL_subname,PL_curstname);
396482e1 7629 sv_catpvs(PL_subname,"::");
09bef843
SB
7630 sv_catpvn(PL_subname,tmpbuf,len);
7631 }
09bef843 7632 have_name = TRUE;
5db06880
NC
7633
7634#ifdef PERL_MAD
7635
7636 start_force(0);
7637 CURMAD('X', nametoke);
7638 CURMAD('_', tmpwhite);
7639 (void) force_word(PL_oldbufptr + tboffset, WORD,
7640 FALSE, TRUE, TRUE);
7641
7642 s = SKIPSPACE2(d,tmpwhite);
7643#else
7644 s = skipspace(d);
7645#endif
09bef843 7646 }
463ee0b2 7647 else {
09bef843
SB
7648 if (key == KEY_my)
7649 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7650 PL_expect = XTERMBLOCK;
7651 attrful = XATTRTERM;
76f68e9b 7652 sv_setpvs(PL_subname,"?");
09bef843 7653 have_name = FALSE;
463ee0b2 7654 }
4633a7c4 7655
09bef843
SB
7656 if (key == KEY_format) {
7657 if (*s == '=')
7658 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7659#ifdef PERL_MAD
cd81e915 7660 PL_thistoken = subtoken;
5db06880
NC
7661 s = d;
7662#else
09bef843 7663 if (have_name)
b1b65b59
JH
7664 (void) force_word(PL_oldbufptr + tboffset, WORD,
7665 FALSE, TRUE, TRUE);
5db06880 7666#endif
09bef843
SB
7667 OPERATOR(FORMAT);
7668 }
79072805 7669
09bef843
SB
7670 /* Look for a prototype */
7671 if (*s == '(') {
d9f2850e
RGS
7672 char *p;
7673 bool bad_proto = FALSE;
9e8d7757
RB
7674 bool in_brackets = FALSE;
7675 char greedy_proto = ' ';
7676 bool proto_after_greedy_proto = FALSE;
7677 bool must_be_last = FALSE;
7678 bool underscore = FALSE;
aef2a98a 7679 bool seen_underscore = FALSE;
197afce1 7680 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7681
5db06880 7682 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7683 if (!s)
09bef843 7684 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7685 /* strip spaces and check for bad characters */
09bef843
SB
7686 d = SvPVX(PL_lex_stuff);
7687 tmp = 0;
d9f2850e
RGS
7688 for (p = d; *p; ++p) {
7689 if (!isSPACE(*p)) {
7690 d[tmp++] = *p;
9e8d7757 7691
197afce1 7692 if (warnillegalproto) {
9e8d7757
RB
7693 if (must_be_last)
7694 proto_after_greedy_proto = TRUE;
7695 if (!strchr("$@%*;[]&\\_", *p)) {
7696 bad_proto = TRUE;
7697 }
7698 else {
7699 if ( underscore ) {
7700 if ( *p != ';' )
7701 bad_proto = TRUE;
7702 underscore = FALSE;
7703 }
7704 if ( *p == '[' ) {
7705 in_brackets = TRUE;
7706 }
7707 else if ( *p == ']' ) {
7708 in_brackets = FALSE;
7709 }
7710 else if ( (*p == '@' || *p == '%') &&
7711 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7712 !in_brackets ) {
7713 must_be_last = TRUE;
7714 greedy_proto = *p;
7715 }
7716 else if ( *p == '_' ) {
aef2a98a 7717 underscore = seen_underscore = TRUE;
9e8d7757
RB
7718 }
7719 }
7720 }
d37a9538 7721 }
09bef843 7722 }
d9f2850e 7723 d[tmp] = '\0';
9e8d7757 7724 if (proto_after_greedy_proto)
197afce1 7725 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7726 "Prototype after '%c' for %"SVf" : %s",
7727 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7728 if (bad_proto)
197afce1 7729 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7730 "Illegal character %sin prototype for %"SVf" : %s",
7731 seen_underscore ? "after '_' " : "",
be2597df 7732 SVfARG(PL_subname), d);
b162af07 7733 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7734 have_proto = TRUE;
68dc0745 7735
5db06880
NC
7736#ifdef PERL_MAD
7737 start_force(0);
cd81e915 7738 CURMAD('q', PL_thisopen);
5db06880 7739 CURMAD('_', tmpwhite);
cd81e915
NC
7740 CURMAD('=', PL_thisstuff);
7741 CURMAD('Q', PL_thisclose);
5db06880
NC
7742 NEXTVAL_NEXTTOKE.opval =
7743 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7744 PL_lex_stuff = NULL;
5db06880
NC
7745 force_next(THING);
7746
7747 s = SKIPSPACE2(s,tmpwhite);
7748#else
09bef843 7749 s = skipspace(s);
5db06880 7750#endif
4633a7c4 7751 }
09bef843
SB
7752 else
7753 have_proto = FALSE;
7754
7755 if (*s == ':' && s[1] != ':')
7756 PL_expect = attrful;
8e742a20
MHM
7757 else if (*s != '{' && key == KEY_sub) {
7758 if (!have_name)
7759 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7760 else if (*s != ';' && *s != '}')
be2597df 7761 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7762 }
09bef843 7763
5db06880
NC
7764#ifdef PERL_MAD
7765 start_force(0);
7766 if (tmpwhite) {
7767 if (PL_madskills)
6b29d1f5 7768 curmad('^', newSVpvs(""));
5db06880
NC
7769 CURMAD('_', tmpwhite);
7770 }
7771 force_next(0);
7772
cd81e915 7773 PL_thistoken = subtoken;
5db06880 7774#else
09bef843 7775 if (have_proto) {
9ded7720 7776 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7777 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7778 PL_lex_stuff = NULL;
09bef843 7779 force_next(THING);
68dc0745 7780 }
5db06880 7781#endif
09bef843 7782 if (!have_name) {
49a54bbe
NC
7783 if (PL_curstash)
7784 sv_setpvs(PL_subname, "__ANON__");
7785 else
7786 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7787 TOKEN(ANONSUB);
4633a7c4 7788 }
5db06880 7789#ifndef PERL_MAD
b1b65b59
JH
7790 (void) force_word(PL_oldbufptr + tboffset, WORD,
7791 FALSE, TRUE, TRUE);
5db06880 7792#endif
09bef843
SB
7793 if (key == KEY_my)
7794 TOKEN(MYSUB);
7795 TOKEN(SUB);
4633a7c4 7796 }
79072805
LW
7797
7798 case KEY_system:
a0d0e21e 7799 LOP(OP_SYSTEM,XREF);
79072805
LW
7800
7801 case KEY_symlink:
a0d0e21e 7802 LOP(OP_SYMLINK,XTERM);
79072805
LW
7803
7804 case KEY_syscall:
a0d0e21e 7805 LOP(OP_SYSCALL,XTERM);
79072805 7806
c07a80fd 7807 case KEY_sysopen:
7808 LOP(OP_SYSOPEN,XTERM);
7809
137443ea 7810 case KEY_sysseek:
7811 LOP(OP_SYSSEEK,XTERM);
7812
79072805 7813 case KEY_sysread:
a0d0e21e 7814 LOP(OP_SYSREAD,XTERM);
79072805
LW
7815
7816 case KEY_syswrite:
a0d0e21e 7817 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7818
7819 case KEY_tr:
7820 s = scan_trans(s);
7821 TERM(sublex_start());
7822
7823 case KEY_tell:
7824 UNI(OP_TELL);
7825
7826 case KEY_telldir:
7827 UNI(OP_TELLDIR);
7828
463ee0b2 7829 case KEY_tie:
a0d0e21e 7830 LOP(OP_TIE,XTERM);
463ee0b2 7831
c07a80fd 7832 case KEY_tied:
7833 UNI(OP_TIED);
7834
79072805
LW
7835 case KEY_time:
7836 FUN0(OP_TIME);
7837
7838 case KEY_times:
7839 FUN0(OP_TMS);
7840
7841 case KEY_truncate:
a0d0e21e 7842 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7843
7844 case KEY_uc:
7845 UNI(OP_UC);
7846
7847 case KEY_ucfirst:
7848 UNI(OP_UCFIRST);
7849
463ee0b2
LW
7850 case KEY_untie:
7851 UNI(OP_UNTIE);
7852
79072805 7853 case KEY_until:
6154021b 7854 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7855 OPERATOR(UNTIL);
7856
7857 case KEY_unless:
6154021b 7858 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7859 OPERATOR(UNLESS);
7860
7861 case KEY_unlink:
a0d0e21e 7862 LOP(OP_UNLINK,XTERM);
79072805
LW
7863
7864 case KEY_undef:
6f33ba73 7865 UNIDOR(OP_UNDEF);
79072805
LW
7866
7867 case KEY_unpack:
a0d0e21e 7868 LOP(OP_UNPACK,XTERM);
79072805
LW
7869
7870 case KEY_utime:
a0d0e21e 7871 LOP(OP_UTIME,XTERM);
79072805
LW
7872
7873 case KEY_umask:
6f33ba73 7874 UNIDOR(OP_UMASK);
79072805
LW
7875
7876 case KEY_unshift:
a0d0e21e
LW
7877 LOP(OP_UNSHIFT,XTERM);
7878
7879 case KEY_use:
468aa647 7880 s = tokenize_use(1, s);
a0d0e21e 7881 OPERATOR(USE);
79072805
LW
7882
7883 case KEY_values:
7884 UNI(OP_VALUES);
7885
7886 case KEY_vec:
a0d0e21e 7887 LOP(OP_VEC,XTERM);
79072805 7888
0d863452 7889 case KEY_when:
6154021b 7890 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7891 OPERATOR(WHEN);
7892
79072805 7893 case KEY_while:
6154021b 7894 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7895 OPERATOR(WHILE);
7896
7897 case KEY_warn:
3280af22 7898 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7899 LOP(OP_WARN,XTERM);
79072805
LW
7900
7901 case KEY_wait:
7902 FUN0(OP_WAIT);
7903
7904 case KEY_waitpid:
a0d0e21e 7905 LOP(OP_WAITPID,XTERM);
79072805
LW
7906
7907 case KEY_wantarray:
7908 FUN0(OP_WANTARRAY);
7909
7910 case KEY_write:
9d116dd7
JH
7911#ifdef EBCDIC
7912 {
df3728a2
JH
7913 char ctl_l[2];
7914 ctl_l[0] = toCTRL('L');
7915 ctl_l[1] = '\0';
fafc274c 7916 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7917 }
7918#else
fafc274c
NC
7919 /* Make sure $^L is defined */
7920 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7921#endif
79072805
LW
7922 UNI(OP_ENTERWRITE);
7923
7924 case KEY_x:
3280af22 7925 if (PL_expect == XOPERATOR)
79072805
LW
7926 Mop(OP_REPEAT);
7927 check_uni();
7928 goto just_a_word;
7929
a0d0e21e 7930 case KEY_xor:
6154021b 7931 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7932 OPERATOR(OROP);
7933
79072805
LW
7934 case KEY_y:
7935 s = scan_trans(s);
7936 TERM(sublex_start());
7937 }
49dc05e3 7938 }}
79072805 7939}
bf4acbe4
GS
7940#ifdef __SC__
7941#pragma segment Main
7942#endif
79072805 7943
e930465f
JH
7944static int
7945S_pending_ident(pTHX)
8eceec63 7946{
97aff369 7947 dVAR;
8eceec63 7948 register char *d;
bbd11bfc 7949 PADOFFSET tmp = 0;
8eceec63
SC
7950 /* pit holds the identifier we read and pending_ident is reset */
7951 char pit = PL_pending_ident;
9bde8eb0
NC
7952 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7953 /* All routes through this function want to know if there is a colon. */
c099d646 7954 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7955 PL_pending_ident = 0;
7956
cd81e915 7957 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7958 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7959 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7960
7961 /* if we're in a my(), we can't allow dynamics here.
7962 $foo'bar has already been turned into $foo::bar, so
7963 just check for colons.
7964
7965 if it's a legal name, the OP is a PADANY.
7966 */
7967 if (PL_in_my) {
7968 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7969 if (has_colon)
8eceec63
SC
7970 yyerror(Perl_form(aTHX_ "No package name allowed for "
7971 "variable %s in \"our\"",
7972 PL_tokenbuf));
d6447115 7973 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7974 }
7975 else {
9bde8eb0 7976 if (has_colon)
952306ac
RGS
7977 yyerror(Perl_form(aTHX_ PL_no_myglob,
7978 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7979
6154021b 7980 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7981 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7982 return PRIVATEREF;
7983 }
7984 }
7985
7986 /*
7987 build the ops for accesses to a my() variable.
7988
7989 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7990 then used in a comparison. This catches most, but not
7991 all cases. For instance, it catches
7992 sort { my($a); $a <=> $b }
7993 but not
7994 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7995 (although why you'd do that is anyone's guess).
7996 */
7997
9bde8eb0 7998 if (!has_colon) {
8716503d 7999 if (!PL_in_my)
f8f98e0a 8000 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8001 if (tmp != NOT_IN_PAD) {
8eceec63 8002 /* might be an "our" variable" */
00b1698f 8003 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8004 /* build ops for a bareword */
b64e5050
AL
8005 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8006 HEK * const stashname = HvNAME_HEK(stash);
8007 SV * const sym = newSVhek(stashname);
396482e1 8008 sv_catpvs(sym, "::");
9bde8eb0 8009 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8010 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8011 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8012 gv_fetchsv(sym,
8eceec63
SC
8013 (PL_in_eval
8014 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8015 : GV_ADDMULTI
8eceec63
SC
8016 ),
8017 ((PL_tokenbuf[0] == '$') ? SVt_PV
8018 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8019 : SVt_PVHV));
8020 return WORD;
8021 }
8022
8023 /* if it's a sort block and they're naming $a or $b */
8024 if (PL_last_lop_op == OP_SORT &&
8025 PL_tokenbuf[0] == '$' &&
8026 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8027 && !PL_tokenbuf[2])
8028 {
8029 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8030 d < PL_bufend && *d != '\n';
8031 d++)
8032 {
8033 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8034 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8035 PL_tokenbuf);
8036 }
8037 }
8038 }
8039
6154021b
RGS
8040 pl_yylval.opval = newOP(OP_PADANY, 0);
8041 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8042 return PRIVATEREF;
8043 }
8044 }
8045
8046 /*
8047 Whine if they've said @foo in a doublequoted string,
8048 and @foo isn't a variable we can find in the symbol
8049 table.
8050 */
d824713b
NC
8051 if (ckWARN(WARN_AMBIGUOUS) &&
8052 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8053 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8054 SVt_PVAV);
8eceec63 8055 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8056 /* DO NOT warn for @- and @+ */
8057 && !( PL_tokenbuf[2] == '\0' &&
8058 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8059 )
8eceec63
SC
8060 {
8061 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8062 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8063 "Possible unintended interpolation of %s in string",
8064 PL_tokenbuf);
8eceec63
SC
8065 }
8066 }
8067
8068 /* build ops for a bareword */
6154021b 8069 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8070 tokenbuf_len - 1));
6154021b 8071 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8072 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8073 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8074 ((PL_tokenbuf[0] == '$') ? SVt_PV
8075 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8076 : SVt_PVHV));
8eceec63
SC
8077 return WORD;
8078}
8079
4c3bbe0f
MHM
8080/*
8081 * The following code was generated by perl_keyword.pl.
8082 */
e2e1dd5a 8083
79072805 8084I32
5458a98a 8085Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8086{
952306ac 8087 dVAR;
7918f24d
NC
8088
8089 PERL_ARGS_ASSERT_KEYWORD;
8090
4c3bbe0f
MHM
8091 switch (len)
8092 {
8093 case 1: /* 5 tokens of length 1 */
8094 switch (name[0])
e2e1dd5a 8095 {
4c3bbe0f
MHM
8096 case 'm':
8097 { /* m */
8098 return KEY_m;
8099 }
8100
4c3bbe0f
MHM
8101 case 'q':
8102 { /* q */
8103 return KEY_q;
8104 }
8105
4c3bbe0f
MHM
8106 case 's':
8107 { /* s */
8108 return KEY_s;
8109 }
8110
4c3bbe0f
MHM
8111 case 'x':
8112 { /* x */
8113 return -KEY_x;
8114 }
8115
4c3bbe0f
MHM
8116 case 'y':
8117 { /* y */
8118 return KEY_y;
8119 }
8120
4c3bbe0f
MHM
8121 default:
8122 goto unknown;
e2e1dd5a 8123 }
4c3bbe0f
MHM
8124
8125 case 2: /* 18 tokens of length 2 */
8126 switch (name[0])
e2e1dd5a 8127 {
4c3bbe0f
MHM
8128 case 'd':
8129 if (name[1] == 'o')
8130 { /* do */
8131 return KEY_do;
8132 }
8133
8134 goto unknown;
8135
8136 case 'e':
8137 if (name[1] == 'q')
8138 { /* eq */
8139 return -KEY_eq;
8140 }
8141
8142 goto unknown;
8143
8144 case 'g':
8145 switch (name[1])
8146 {
8147 case 'e':
8148 { /* ge */
8149 return -KEY_ge;
8150 }
8151
4c3bbe0f
MHM
8152 case 't':
8153 { /* gt */
8154 return -KEY_gt;
8155 }
8156
4c3bbe0f
MHM
8157 default:
8158 goto unknown;
8159 }
8160
8161 case 'i':
8162 if (name[1] == 'f')
8163 { /* if */
8164 return KEY_if;
8165 }
8166
8167 goto unknown;
8168
8169 case 'l':
8170 switch (name[1])
8171 {
8172 case 'c':
8173 { /* lc */
8174 return -KEY_lc;
8175 }
8176
4c3bbe0f
MHM
8177 case 'e':
8178 { /* le */
8179 return -KEY_le;
8180 }
8181
4c3bbe0f
MHM
8182 case 't':
8183 { /* lt */
8184 return -KEY_lt;
8185 }
8186
4c3bbe0f
MHM
8187 default:
8188 goto unknown;
8189 }
8190
8191 case 'm':
8192 if (name[1] == 'y')
8193 { /* my */
8194 return KEY_my;
8195 }
8196
8197 goto unknown;
8198
8199 case 'n':
8200 switch (name[1])
8201 {
8202 case 'e':
8203 { /* ne */
8204 return -KEY_ne;
8205 }
8206
4c3bbe0f
MHM
8207 case 'o':
8208 { /* no */
8209 return KEY_no;
8210 }
8211
4c3bbe0f
MHM
8212 default:
8213 goto unknown;
8214 }
8215
8216 case 'o':
8217 if (name[1] == 'r')
8218 { /* or */
8219 return -KEY_or;
8220 }
8221
8222 goto unknown;
8223
8224 case 'q':
8225 switch (name[1])
8226 {
8227 case 'q':
8228 { /* qq */
8229 return KEY_qq;
8230 }
8231
4c3bbe0f
MHM
8232 case 'r':
8233 { /* qr */
8234 return KEY_qr;
8235 }
8236
4c3bbe0f
MHM
8237 case 'w':
8238 { /* qw */
8239 return KEY_qw;
8240 }
8241
4c3bbe0f
MHM
8242 case 'x':
8243 { /* qx */
8244 return KEY_qx;
8245 }
8246
4c3bbe0f
MHM
8247 default:
8248 goto unknown;
8249 }
8250
8251 case 't':
8252 if (name[1] == 'r')
8253 { /* tr */
8254 return KEY_tr;
8255 }
8256
8257 goto unknown;
8258
8259 case 'u':
8260 if (name[1] == 'c')
8261 { /* uc */
8262 return -KEY_uc;
8263 }
8264
8265 goto unknown;
8266
8267 default:
8268 goto unknown;
e2e1dd5a 8269 }
4c3bbe0f 8270
0d863452 8271 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8272 switch (name[0])
e2e1dd5a 8273 {
4c3bbe0f
MHM
8274 case 'E':
8275 if (name[1] == 'N' &&
8276 name[2] == 'D')
8277 { /* END */
8278 return KEY_END;
8279 }
8280
8281 goto unknown;
8282
8283 case 'a':
8284 switch (name[1])
8285 {
8286 case 'b':
8287 if (name[2] == 's')
8288 { /* abs */
8289 return -KEY_abs;
8290 }
8291
8292 goto unknown;
8293
8294 case 'n':
8295 if (name[2] == 'd')
8296 { /* and */
8297 return -KEY_and;
8298 }
8299
8300 goto unknown;
8301
8302 default:
8303 goto unknown;
8304 }
8305
8306 case 'c':
8307 switch (name[1])
8308 {
8309 case 'h':
8310 if (name[2] == 'r')
8311 { /* chr */
8312 return -KEY_chr;
8313 }
8314
8315 goto unknown;
8316
8317 case 'm':
8318 if (name[2] == 'p')
8319 { /* cmp */
8320 return -KEY_cmp;
8321 }
8322
8323 goto unknown;
8324
8325 case 'o':
8326 if (name[2] == 's')
8327 { /* cos */
8328 return -KEY_cos;
8329 }
8330
8331 goto unknown;
8332
8333 default:
8334 goto unknown;
8335 }
8336
8337 case 'd':
8338 if (name[1] == 'i' &&
8339 name[2] == 'e')
8340 { /* die */
8341 return -KEY_die;
8342 }
8343
8344 goto unknown;
8345
8346 case 'e':
8347 switch (name[1])
8348 {
8349 case 'o':
8350 if (name[2] == 'f')
8351 { /* eof */
8352 return -KEY_eof;
8353 }
8354
8355 goto unknown;
8356
4c3bbe0f
MHM
8357 case 'x':
8358 if (name[2] == 'p')
8359 { /* exp */
8360 return -KEY_exp;
8361 }
8362
8363 goto unknown;
8364
8365 default:
8366 goto unknown;
8367 }
8368
8369 case 'f':
8370 if (name[1] == 'o' &&
8371 name[2] == 'r')
8372 { /* for */
8373 return KEY_for;
8374 }
8375
8376 goto unknown;
8377
8378 case 'h':
8379 if (name[1] == 'e' &&
8380 name[2] == 'x')
8381 { /* hex */
8382 return -KEY_hex;
8383 }
8384
8385 goto unknown;
8386
8387 case 'i':
8388 if (name[1] == 'n' &&
8389 name[2] == 't')
8390 { /* int */
8391 return -KEY_int;
8392 }
8393
8394 goto unknown;
8395
8396 case 'l':
8397 if (name[1] == 'o' &&
8398 name[2] == 'g')
8399 { /* log */
8400 return -KEY_log;
8401 }
8402
8403 goto unknown;
8404
8405 case 'm':
8406 if (name[1] == 'a' &&
8407 name[2] == 'p')
8408 { /* map */
8409 return KEY_map;
8410 }
8411
8412 goto unknown;
8413
8414 case 'n':
8415 if (name[1] == 'o' &&
8416 name[2] == 't')
8417 { /* not */
8418 return -KEY_not;
8419 }
8420
8421 goto unknown;
8422
8423 case 'o':
8424 switch (name[1])
8425 {
8426 case 'c':
8427 if (name[2] == 't')
8428 { /* oct */
8429 return -KEY_oct;
8430 }
8431
8432 goto unknown;
8433
8434 case 'r':
8435 if (name[2] == 'd')
8436 { /* ord */
8437 return -KEY_ord;
8438 }
8439
8440 goto unknown;
8441
8442 case 'u':
8443 if (name[2] == 'r')
8444 { /* our */
8445 return KEY_our;
8446 }
8447
8448 goto unknown;
8449
8450 default:
8451 goto unknown;
8452 }
8453
8454 case 'p':
8455 if (name[1] == 'o')
8456 {
8457 switch (name[2])
8458 {
8459 case 'p':
8460 { /* pop */
8461 return -KEY_pop;
8462 }
8463
4c3bbe0f
MHM
8464 case 's':
8465 { /* pos */
8466 return KEY_pos;
8467 }
8468
4c3bbe0f
MHM
8469 default:
8470 goto unknown;
8471 }
8472 }
8473
8474 goto unknown;
8475
8476 case 'r':
8477 if (name[1] == 'e' &&
8478 name[2] == 'f')
8479 { /* ref */
8480 return -KEY_ref;
8481 }
8482
8483 goto unknown;
8484
8485 case 's':
8486 switch (name[1])
8487 {
0d863452
RH
8488 case 'a':
8489 if (name[2] == 'y')
8490 { /* say */
e3e804c9 8491 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8492 }
8493
8494 goto unknown;
8495
4c3bbe0f
MHM
8496 case 'i':
8497 if (name[2] == 'n')
8498 { /* sin */
8499 return -KEY_sin;
8500 }
8501
8502 goto unknown;
8503
8504 case 'u':
8505 if (name[2] == 'b')
8506 { /* sub */
8507 return KEY_sub;
8508 }
8509
8510 goto unknown;
8511
8512 default:
8513 goto unknown;
8514 }
8515
8516 case 't':
8517 if (name[1] == 'i' &&
8518 name[2] == 'e')
8519 { /* tie */
1db4d195 8520 return -KEY_tie;
4c3bbe0f
MHM
8521 }
8522
8523 goto unknown;
8524
8525 case 'u':
8526 if (name[1] == 's' &&
8527 name[2] == 'e')
8528 { /* use */
8529 return KEY_use;
8530 }
8531
8532 goto unknown;
8533
8534 case 'v':
8535 if (name[1] == 'e' &&
8536 name[2] == 'c')
8537 { /* vec */
8538 return -KEY_vec;
8539 }
8540
8541 goto unknown;
8542
8543 case 'x':
8544 if (name[1] == 'o' &&
8545 name[2] == 'r')
8546 { /* xor */
8547 return -KEY_xor;
8548 }
8549
8550 goto unknown;
8551
8552 default:
8553 goto unknown;
e2e1dd5a 8554 }
4c3bbe0f 8555
0d863452 8556 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8557 switch (name[0])
e2e1dd5a 8558 {
4c3bbe0f
MHM
8559 case 'C':
8560 if (name[1] == 'O' &&
8561 name[2] == 'R' &&
8562 name[3] == 'E')
8563 { /* CORE */
8564 return -KEY_CORE;
8565 }
8566
8567 goto unknown;
8568
8569 case 'I':
8570 if (name[1] == 'N' &&
8571 name[2] == 'I' &&
8572 name[3] == 'T')
8573 { /* INIT */
8574 return KEY_INIT;
8575 }
8576
8577 goto unknown;
8578
8579 case 'b':
8580 if (name[1] == 'i' &&
8581 name[2] == 'n' &&
8582 name[3] == 'd')
8583 { /* bind */
8584 return -KEY_bind;
8585 }
8586
8587 goto unknown;
8588
8589 case 'c':
8590 if (name[1] == 'h' &&
8591 name[2] == 'o' &&
8592 name[3] == 'p')
8593 { /* chop */
8594 return -KEY_chop;
8595 }
8596
8597 goto unknown;
8598
8599 case 'd':
8600 if (name[1] == 'u' &&
8601 name[2] == 'm' &&
8602 name[3] == 'p')
8603 { /* dump */
8604 return -KEY_dump;
8605 }
8606
8607 goto unknown;
8608
8609 case 'e':
8610 switch (name[1])
8611 {
8612 case 'a':
8613 if (name[2] == 'c' &&
8614 name[3] == 'h')
8615 { /* each */
8616 return -KEY_each;
8617 }
8618
8619 goto unknown;
8620
8621 case 'l':
8622 if (name[2] == 's' &&
8623 name[3] == 'e')
8624 { /* else */
8625 return KEY_else;
8626 }
8627
8628 goto unknown;
8629
8630 case 'v':
8631 if (name[2] == 'a' &&
8632 name[3] == 'l')
8633 { /* eval */
8634 return KEY_eval;
8635 }
8636
8637 goto unknown;
8638
8639 case 'x':
8640 switch (name[2])
8641 {
8642 case 'e':
8643 if (name[3] == 'c')
8644 { /* exec */
8645 return -KEY_exec;
8646 }
8647
8648 goto unknown;
8649
8650 case 'i':
8651 if (name[3] == 't')
8652 { /* exit */
8653 return -KEY_exit;
8654 }
8655
8656 goto unknown;
8657
8658 default:
8659 goto unknown;
8660 }
8661
8662 default:
8663 goto unknown;
8664 }
8665
8666 case 'f':
8667 if (name[1] == 'o' &&
8668 name[2] == 'r' &&
8669 name[3] == 'k')
8670 { /* fork */
8671 return -KEY_fork;
8672 }
8673
8674 goto unknown;
8675
8676 case 'g':
8677 switch (name[1])
8678 {
8679 case 'e':
8680 if (name[2] == 't' &&
8681 name[3] == 'c')
8682 { /* getc */
8683 return -KEY_getc;
8684 }
8685
8686 goto unknown;
8687
8688 case 'l':
8689 if (name[2] == 'o' &&
8690 name[3] == 'b')
8691 { /* glob */
8692 return KEY_glob;
8693 }
8694
8695 goto unknown;
8696
8697 case 'o':
8698 if (name[2] == 't' &&
8699 name[3] == 'o')
8700 { /* goto */
8701 return KEY_goto;
8702 }
8703
8704 goto unknown;
8705
8706 case 'r':
8707 if (name[2] == 'e' &&
8708 name[3] == 'p')
8709 { /* grep */
8710 return KEY_grep;
8711 }
8712
8713 goto unknown;
8714
8715 default:
8716 goto unknown;
8717 }
8718
8719 case 'j':
8720 if (name[1] == 'o' &&
8721 name[2] == 'i' &&
8722 name[3] == 'n')
8723 { /* join */
8724 return -KEY_join;
8725 }
8726
8727 goto unknown;
8728
8729 case 'k':
8730 switch (name[1])
8731 {
8732 case 'e':
8733 if (name[2] == 'y' &&
8734 name[3] == 's')
8735 { /* keys */
8736 return -KEY_keys;
8737 }
8738
8739 goto unknown;
8740
8741 case 'i':
8742 if (name[2] == 'l' &&
8743 name[3] == 'l')
8744 { /* kill */
8745 return -KEY_kill;
8746 }
8747
8748 goto unknown;
8749
8750 default:
8751 goto unknown;
8752 }
8753
8754 case 'l':
8755 switch (name[1])
8756 {
8757 case 'a':
8758 if (name[2] == 's' &&
8759 name[3] == 't')
8760 { /* last */
8761 return KEY_last;
8762 }
8763
8764 goto unknown;
8765
8766 case 'i':
8767 if (name[2] == 'n' &&
8768 name[3] == 'k')
8769 { /* link */
8770 return -KEY_link;
8771 }
8772
8773 goto unknown;
8774
8775 case 'o':
8776 if (name[2] == 'c' &&
8777 name[3] == 'k')
8778 { /* lock */
8779 return -KEY_lock;
8780 }
8781
8782 goto unknown;
8783
8784 default:
8785 goto unknown;
8786 }
8787
8788 case 'n':
8789 if (name[1] == 'e' &&
8790 name[2] == 'x' &&
8791 name[3] == 't')
8792 { /* next */
8793 return KEY_next;
8794 }
8795
8796 goto unknown;
8797
8798 case 'o':
8799 if (name[1] == 'p' &&
8800 name[2] == 'e' &&
8801 name[3] == 'n')
8802 { /* open */
8803 return -KEY_open;
8804 }
8805
8806 goto unknown;
8807
8808 case 'p':
8809 switch (name[1])
8810 {
8811 case 'a':
8812 if (name[2] == 'c' &&
8813 name[3] == 'k')
8814 { /* pack */
8815 return -KEY_pack;
8816 }
8817
8818 goto unknown;
8819
8820 case 'i':
8821 if (name[2] == 'p' &&
8822 name[3] == 'e')
8823 { /* pipe */
8824 return -KEY_pipe;
8825 }
8826
8827 goto unknown;
8828
8829 case 'u':
8830 if (name[2] == 's' &&
8831 name[3] == 'h')
8832 { /* push */
8833 return -KEY_push;
8834 }
8835
8836 goto unknown;
8837
8838 default:
8839 goto unknown;
8840 }
8841
8842 case 'r':
8843 switch (name[1])
8844 {
8845 case 'a':
8846 if (name[2] == 'n' &&
8847 name[3] == 'd')
8848 { /* rand */
8849 return -KEY_rand;
8850 }
8851
8852 goto unknown;
8853
8854 case 'e':
8855 switch (name[2])
8856 {
8857 case 'a':
8858 if (name[3] == 'd')
8859 { /* read */
8860 return -KEY_read;
8861 }
8862
8863 goto unknown;
8864
8865 case 'c':
8866 if (name[3] == 'v')
8867 { /* recv */
8868 return -KEY_recv;
8869 }
8870
8871 goto unknown;
8872
8873 case 'd':
8874 if (name[3] == 'o')
8875 { /* redo */
8876 return KEY_redo;
8877 }
8878
8879 goto unknown;
8880
8881 default:
8882 goto unknown;
8883 }
8884
8885 default:
8886 goto unknown;
8887 }
8888
8889 case 's':
8890 switch (name[1])
8891 {
8892 case 'e':
8893 switch (name[2])
8894 {
8895 case 'e':
8896 if (name[3] == 'k')
8897 { /* seek */
8898 return -KEY_seek;
8899 }
8900
8901 goto unknown;
8902
8903 case 'n':
8904 if (name[3] == 'd')
8905 { /* send */
8906 return -KEY_send;
8907 }
8908
8909 goto unknown;
8910
8911 default:
8912 goto unknown;
8913 }
8914
8915 case 'o':
8916 if (name[2] == 'r' &&
8917 name[3] == 't')
8918 { /* sort */
8919 return KEY_sort;
8920 }
8921
8922 goto unknown;
8923
8924 case 'q':
8925 if (name[2] == 'r' &&
8926 name[3] == 't')
8927 { /* sqrt */
8928 return -KEY_sqrt;
8929 }
8930
8931 goto unknown;
8932
8933 case 't':
8934 if (name[2] == 'a' &&
8935 name[3] == 't')
8936 { /* stat */
8937 return -KEY_stat;
8938 }
8939
8940 goto unknown;
8941
8942 default:
8943 goto unknown;
8944 }
8945
8946 case 't':
8947 switch (name[1])
8948 {
8949 case 'e':
8950 if (name[2] == 'l' &&
8951 name[3] == 'l')
8952 { /* tell */
8953 return -KEY_tell;
8954 }
8955
8956 goto unknown;
8957
8958 case 'i':
8959 switch (name[2])
8960 {
8961 case 'e':
8962 if (name[3] == 'd')
8963 { /* tied */
1db4d195 8964 return -KEY_tied;
4c3bbe0f
MHM
8965 }
8966
8967 goto unknown;
8968
8969 case 'm':
8970 if (name[3] == 'e')
8971 { /* time */
8972 return -KEY_time;
8973 }
8974
8975 goto unknown;
8976
8977 default:
8978 goto unknown;
8979 }
8980
8981 default:
8982 goto unknown;
8983 }
8984
8985 case 'w':
0d863452 8986 switch (name[1])
4c3bbe0f 8987 {
0d863452 8988 case 'a':
952306ac
RGS
8989 switch (name[2])
8990 {
8991 case 'i':
8992 if (name[3] == 't')
8993 { /* wait */
8994 return -KEY_wait;
8995 }
4c3bbe0f 8996
952306ac 8997 goto unknown;
4c3bbe0f 8998
952306ac
RGS
8999 case 'r':
9000 if (name[3] == 'n')
9001 { /* warn */
9002 return -KEY_warn;
9003 }
4c3bbe0f 9004
952306ac 9005 goto unknown;
4c3bbe0f 9006
952306ac
RGS
9007 default:
9008 goto unknown;
9009 }
0d863452
RH
9010
9011 case 'h':
9012 if (name[2] == 'e' &&
9013 name[3] == 'n')
9014 { /* when */
5458a98a 9015 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9016 }
4c3bbe0f 9017
952306ac 9018 goto unknown;
4c3bbe0f 9019
952306ac
RGS
9020 default:
9021 goto unknown;
9022 }
4c3bbe0f 9023
0d863452
RH
9024 default:
9025 goto unknown;
9026 }
9027
952306ac 9028 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9029 switch (name[0])
e2e1dd5a 9030 {
4c3bbe0f
MHM
9031 case 'B':
9032 if (name[1] == 'E' &&
9033 name[2] == 'G' &&
9034 name[3] == 'I' &&
9035 name[4] == 'N')
9036 { /* BEGIN */
9037 return KEY_BEGIN;
9038 }
9039
9040 goto unknown;
9041
9042 case 'C':
9043 if (name[1] == 'H' &&
9044 name[2] == 'E' &&
9045 name[3] == 'C' &&
9046 name[4] == 'K')
9047 { /* CHECK */
9048 return KEY_CHECK;
9049 }
9050
9051 goto unknown;
9052
9053 case 'a':
9054 switch (name[1])
9055 {
9056 case 'l':
9057 if (name[2] == 'a' &&
9058 name[3] == 'r' &&
9059 name[4] == 'm')
9060 { /* alarm */
9061 return -KEY_alarm;
9062 }
9063
9064 goto unknown;
9065
9066 case 't':
9067 if (name[2] == 'a' &&
9068 name[3] == 'n' &&
9069 name[4] == '2')
9070 { /* atan2 */
9071 return -KEY_atan2;
9072 }
9073
9074 goto unknown;
9075
9076 default:
9077 goto unknown;
9078 }
9079
9080 case 'b':
0d863452
RH
9081 switch (name[1])
9082 {
9083 case 'l':
9084 if (name[2] == 'e' &&
952306ac
RGS
9085 name[3] == 's' &&
9086 name[4] == 's')
9087 { /* bless */
9088 return -KEY_bless;
9089 }
4c3bbe0f 9090
952306ac 9091 goto unknown;
4c3bbe0f 9092
0d863452
RH
9093 case 'r':
9094 if (name[2] == 'e' &&
9095 name[3] == 'a' &&
9096 name[4] == 'k')
9097 { /* break */
5458a98a 9098 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9099 }
9100
9101 goto unknown;
9102
9103 default:
9104 goto unknown;
9105 }
9106
4c3bbe0f
MHM
9107 case 'c':
9108 switch (name[1])
9109 {
9110 case 'h':
9111 switch (name[2])
9112 {
9113 case 'd':
9114 if (name[3] == 'i' &&
9115 name[4] == 'r')
9116 { /* chdir */
9117 return -KEY_chdir;
9118 }
9119
9120 goto unknown;
9121
9122 case 'm':
9123 if (name[3] == 'o' &&
9124 name[4] == 'd')
9125 { /* chmod */
9126 return -KEY_chmod;
9127 }
9128
9129 goto unknown;
9130
9131 case 'o':
9132 switch (name[3])
9133 {
9134 case 'm':
9135 if (name[4] == 'p')
9136 { /* chomp */
9137 return -KEY_chomp;
9138 }
9139
9140 goto unknown;
9141
9142 case 'w':
9143 if (name[4] == 'n')
9144 { /* chown */
9145 return -KEY_chown;
9146 }
9147
9148 goto unknown;
9149
9150 default:
9151 goto unknown;
9152 }
9153
9154 default:
9155 goto unknown;
9156 }
9157
9158 case 'l':
9159 if (name[2] == 'o' &&
9160 name[3] == 's' &&
9161 name[4] == 'e')
9162 { /* close */
9163 return -KEY_close;
9164 }
9165
9166 goto unknown;
9167
9168 case 'r':
9169 if (name[2] == 'y' &&
9170 name[3] == 'p' &&
9171 name[4] == 't')
9172 { /* crypt */
9173 return -KEY_crypt;
9174 }
9175
9176 goto unknown;
9177
9178 default:
9179 goto unknown;
9180 }
9181
9182 case 'e':
9183 if (name[1] == 'l' &&
9184 name[2] == 's' &&
9185 name[3] == 'i' &&
9186 name[4] == 'f')
9187 { /* elsif */
9188 return KEY_elsif;
9189 }
9190
9191 goto unknown;
9192
9193 case 'f':
9194 switch (name[1])
9195 {
9196 case 'c':
9197 if (name[2] == 'n' &&
9198 name[3] == 't' &&
9199 name[4] == 'l')
9200 { /* fcntl */
9201 return -KEY_fcntl;
9202 }
9203
9204 goto unknown;
9205
9206 case 'l':
9207 if (name[2] == 'o' &&
9208 name[3] == 'c' &&
9209 name[4] == 'k')
9210 { /* flock */
9211 return -KEY_flock;
9212 }
9213
9214 goto unknown;
9215
9216 default:
9217 goto unknown;
9218 }
9219
0d863452
RH
9220 case 'g':
9221 if (name[1] == 'i' &&
9222 name[2] == 'v' &&
9223 name[3] == 'e' &&
9224 name[4] == 'n')
9225 { /* given */
5458a98a 9226 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9227 }
9228
9229 goto unknown;
9230
4c3bbe0f
MHM
9231 case 'i':
9232 switch (name[1])
9233 {
9234 case 'n':
9235 if (name[2] == 'd' &&
9236 name[3] == 'e' &&
9237 name[4] == 'x')
9238 { /* index */
9239 return -KEY_index;
9240 }
9241
9242 goto unknown;
9243
9244 case 'o':
9245 if (name[2] == 'c' &&
9246 name[3] == 't' &&
9247 name[4] == 'l')
9248 { /* ioctl */
9249 return -KEY_ioctl;
9250 }
9251
9252 goto unknown;
9253
9254 default:
9255 goto unknown;
9256 }
9257
9258 case 'l':
9259 switch (name[1])
9260 {
9261 case 'o':
9262 if (name[2] == 'c' &&
9263 name[3] == 'a' &&
9264 name[4] == 'l')
9265 { /* local */
9266 return KEY_local;
9267 }
9268
9269 goto unknown;
9270
9271 case 's':
9272 if (name[2] == 't' &&
9273 name[3] == 'a' &&
9274 name[4] == 't')
9275 { /* lstat */
9276 return -KEY_lstat;
9277 }
9278
9279 goto unknown;
9280
9281 default:
9282 goto unknown;
9283 }
9284
9285 case 'm':
9286 if (name[1] == 'k' &&
9287 name[2] == 'd' &&
9288 name[3] == 'i' &&
9289 name[4] == 'r')
9290 { /* mkdir */
9291 return -KEY_mkdir;
9292 }
9293
9294 goto unknown;
9295
9296 case 'p':
9297 if (name[1] == 'r' &&
9298 name[2] == 'i' &&
9299 name[3] == 'n' &&
9300 name[4] == 't')
9301 { /* print */
9302 return KEY_print;
9303 }
9304
9305 goto unknown;
9306
9307 case 'r':
9308 switch (name[1])
9309 {
9310 case 'e':
9311 if (name[2] == 's' &&
9312 name[3] == 'e' &&
9313 name[4] == 't')
9314 { /* reset */
9315 return -KEY_reset;
9316 }
9317
9318 goto unknown;
9319
9320 case 'm':
9321 if (name[2] == 'd' &&
9322 name[3] == 'i' &&
9323 name[4] == 'r')
9324 { /* rmdir */
9325 return -KEY_rmdir;
9326 }
9327
9328 goto unknown;
9329
9330 default:
9331 goto unknown;
9332 }
9333
9334 case 's':
9335 switch (name[1])
9336 {
9337 case 'e':
9338 if (name[2] == 'm' &&
9339 name[3] == 'o' &&
9340 name[4] == 'p')
9341 { /* semop */
9342 return -KEY_semop;
9343 }
9344
9345 goto unknown;
9346
9347 case 'h':
9348 if (name[2] == 'i' &&
9349 name[3] == 'f' &&
9350 name[4] == 't')
9351 { /* shift */
9352 return -KEY_shift;
9353 }
9354
9355 goto unknown;
9356
9357 case 'l':
9358 if (name[2] == 'e' &&
9359 name[3] == 'e' &&
9360 name[4] == 'p')
9361 { /* sleep */
9362 return -KEY_sleep;
9363 }
9364
9365 goto unknown;
9366
9367 case 'p':
9368 if (name[2] == 'l' &&
9369 name[3] == 'i' &&
9370 name[4] == 't')
9371 { /* split */
9372 return KEY_split;
9373 }
9374
9375 goto unknown;
9376
9377 case 'r':
9378 if (name[2] == 'a' &&
9379 name[3] == 'n' &&
9380 name[4] == 'd')
9381 { /* srand */
9382 return -KEY_srand;
9383 }
9384
9385 goto unknown;
9386
9387 case 't':
952306ac
RGS
9388 switch (name[2])
9389 {
9390 case 'a':
9391 if (name[3] == 't' &&
9392 name[4] == 'e')
9393 { /* state */
5458a98a 9394 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9395 }
4c3bbe0f 9396
952306ac
RGS
9397 goto unknown;
9398
9399 case 'u':
9400 if (name[3] == 'd' &&
9401 name[4] == 'y')
9402 { /* study */
9403 return KEY_study;
9404 }
9405
9406 goto unknown;
9407
9408 default:
9409 goto unknown;
9410 }
4c3bbe0f
MHM
9411
9412 default:
9413 goto unknown;
9414 }
9415
9416 case 't':
9417 if (name[1] == 'i' &&
9418 name[2] == 'm' &&
9419 name[3] == 'e' &&
9420 name[4] == 's')
9421 { /* times */
9422 return -KEY_times;
9423 }
9424
9425 goto unknown;
9426
9427 case 'u':
9428 switch (name[1])
9429 {
9430 case 'm':
9431 if (name[2] == 'a' &&
9432 name[3] == 's' &&
9433 name[4] == 'k')
9434 { /* umask */
9435 return -KEY_umask;
9436 }
9437
9438 goto unknown;
9439
9440 case 'n':
9441 switch (name[2])
9442 {
9443 case 'd':
9444 if (name[3] == 'e' &&
9445 name[4] == 'f')
9446 { /* undef */
9447 return KEY_undef;
9448 }
9449
9450 goto unknown;
9451
9452 case 't':
9453 if (name[3] == 'i')
9454 {
9455 switch (name[4])
9456 {
9457 case 'e':
9458 { /* untie */
1db4d195 9459 return -KEY_untie;
4c3bbe0f
MHM
9460 }
9461
4c3bbe0f
MHM
9462 case 'l':
9463 { /* until */
9464 return KEY_until;
9465 }
9466
4c3bbe0f
MHM
9467 default:
9468 goto unknown;
9469 }
9470 }
9471
9472 goto unknown;
9473
9474 default:
9475 goto unknown;
9476 }
9477
9478 case 't':
9479 if (name[2] == 'i' &&
9480 name[3] == 'm' &&
9481 name[4] == 'e')
9482 { /* utime */
9483 return -KEY_utime;
9484 }
9485
9486 goto unknown;
9487
9488 default:
9489 goto unknown;
9490 }
9491
9492 case 'w':
9493 switch (name[1])
9494 {
9495 case 'h':
9496 if (name[2] == 'i' &&
9497 name[3] == 'l' &&
9498 name[4] == 'e')
9499 { /* while */
9500 return KEY_while;
9501 }
9502
9503 goto unknown;
9504
9505 case 'r':
9506 if (name[2] == 'i' &&
9507 name[3] == 't' &&
9508 name[4] == 'e')
9509 { /* write */
9510 return -KEY_write;
9511 }
9512
9513 goto unknown;
9514
9515 default:
9516 goto unknown;
9517 }
9518
9519 default:
9520 goto unknown;
e2e1dd5a 9521 }
4c3bbe0f
MHM
9522
9523 case 6: /* 33 tokens of length 6 */
9524 switch (name[0])
9525 {
9526 case 'a':
9527 if (name[1] == 'c' &&
9528 name[2] == 'c' &&
9529 name[3] == 'e' &&
9530 name[4] == 'p' &&
9531 name[5] == 't')
9532 { /* accept */
9533 return -KEY_accept;
9534 }
9535
9536 goto unknown;
9537
9538 case 'c':
9539 switch (name[1])
9540 {
9541 case 'a':
9542 if (name[2] == 'l' &&
9543 name[3] == 'l' &&
9544 name[4] == 'e' &&
9545 name[5] == 'r')
9546 { /* caller */
9547 return -KEY_caller;
9548 }
9549
9550 goto unknown;
9551
9552 case 'h':
9553 if (name[2] == 'r' &&
9554 name[3] == 'o' &&
9555 name[4] == 'o' &&
9556 name[5] == 't')
9557 { /* chroot */
9558 return -KEY_chroot;
9559 }
9560
9561 goto unknown;
9562
9563 default:
9564 goto unknown;
9565 }
9566
9567 case 'd':
9568 if (name[1] == 'e' &&
9569 name[2] == 'l' &&
9570 name[3] == 'e' &&
9571 name[4] == 't' &&
9572 name[5] == 'e')
9573 { /* delete */
9574 return KEY_delete;
9575 }
9576
9577 goto unknown;
9578
9579 case 'e':
9580 switch (name[1])
9581 {
9582 case 'l':
9583 if (name[2] == 's' &&
9584 name[3] == 'e' &&
9585 name[4] == 'i' &&
9586 name[5] == 'f')
9587 { /* elseif */
9b387841 9588 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9589 }
9590
9591 goto unknown;
9592
9593 case 'x':
9594 if (name[2] == 'i' &&
9595 name[3] == 's' &&
9596 name[4] == 't' &&
9597 name[5] == 's')
9598 { /* exists */
9599 return KEY_exists;
9600 }
9601
9602 goto unknown;
9603
9604 default:
9605 goto unknown;
9606 }
9607
9608 case 'f':
9609 switch (name[1])
9610 {
9611 case 'i':
9612 if (name[2] == 'l' &&
9613 name[3] == 'e' &&
9614 name[4] == 'n' &&
9615 name[5] == 'o')
9616 { /* fileno */
9617 return -KEY_fileno;
9618 }
9619
9620 goto unknown;
9621
9622 case 'o':
9623 if (name[2] == 'r' &&
9624 name[3] == 'm' &&
9625 name[4] == 'a' &&
9626 name[5] == 't')
9627 { /* format */
9628 return KEY_format;
9629 }
9630
9631 goto unknown;
9632
9633 default:
9634 goto unknown;
9635 }
9636
9637 case 'g':
9638 if (name[1] == 'm' &&
9639 name[2] == 't' &&
9640 name[3] == 'i' &&
9641 name[4] == 'm' &&
9642 name[5] == 'e')
9643 { /* gmtime */
9644 return -KEY_gmtime;
9645 }
9646
9647 goto unknown;
9648
9649 case 'l':
9650 switch (name[1])
9651 {
9652 case 'e':
9653 if (name[2] == 'n' &&
9654 name[3] == 'g' &&
9655 name[4] == 't' &&
9656 name[5] == 'h')
9657 { /* length */
9658 return -KEY_length;
9659 }
9660
9661 goto unknown;
9662
9663 case 'i':
9664 if (name[2] == 's' &&
9665 name[3] == 't' &&
9666 name[4] == 'e' &&
9667 name[5] == 'n')
9668 { /* listen */
9669 return -KEY_listen;
9670 }
9671
9672 goto unknown;
9673
9674 default:
9675 goto unknown;
9676 }
9677
9678 case 'm':
9679 if (name[1] == 's' &&
9680 name[2] == 'g')
9681 {
9682 switch (name[3])
9683 {
9684 case 'c':
9685 if (name[4] == 't' &&
9686 name[5] == 'l')
9687 { /* msgctl */
9688 return -KEY_msgctl;
9689 }
9690
9691 goto unknown;
9692
9693 case 'g':
9694 if (name[4] == 'e' &&
9695 name[5] == 't')
9696 { /* msgget */
9697 return -KEY_msgget;
9698 }
9699
9700 goto unknown;
9701
9702 case 'r':
9703 if (name[4] == 'c' &&
9704 name[5] == 'v')
9705 { /* msgrcv */
9706 return -KEY_msgrcv;
9707 }
9708
9709 goto unknown;
9710
9711 case 's':
9712 if (name[4] == 'n' &&
9713 name[5] == 'd')
9714 { /* msgsnd */
9715 return -KEY_msgsnd;
9716 }
9717
9718 goto unknown;
9719
9720 default:
9721 goto unknown;
9722 }
9723 }
9724
9725 goto unknown;
9726
9727 case 'p':
9728 if (name[1] == 'r' &&
9729 name[2] == 'i' &&
9730 name[3] == 'n' &&
9731 name[4] == 't' &&
9732 name[5] == 'f')
9733 { /* printf */
9734 return KEY_printf;
9735 }
9736
9737 goto unknown;
9738
9739 case 'r':
9740 switch (name[1])
9741 {
9742 case 'e':
9743 switch (name[2])
9744 {
9745 case 'n':
9746 if (name[3] == 'a' &&
9747 name[4] == 'm' &&
9748 name[5] == 'e')
9749 { /* rename */
9750 return -KEY_rename;
9751 }
9752
9753 goto unknown;
9754
9755 case 't':
9756 if (name[3] == 'u' &&
9757 name[4] == 'r' &&
9758 name[5] == 'n')
9759 { /* return */
9760 return KEY_return;
9761 }
9762
9763 goto unknown;
9764
9765 default:
9766 goto unknown;
9767 }
9768
9769 case 'i':
9770 if (name[2] == 'n' &&
9771 name[3] == 'd' &&
9772 name[4] == 'e' &&
9773 name[5] == 'x')
9774 { /* rindex */
9775 return -KEY_rindex;
9776 }
9777
9778 goto unknown;
9779
9780 default:
9781 goto unknown;
9782 }
9783
9784 case 's':
9785 switch (name[1])
9786 {
9787 case 'c':
9788 if (name[2] == 'a' &&
9789 name[3] == 'l' &&
9790 name[4] == 'a' &&
9791 name[5] == 'r')
9792 { /* scalar */
9793 return KEY_scalar;
9794 }
9795
9796 goto unknown;
9797
9798 case 'e':
9799 switch (name[2])
9800 {
9801 case 'l':
9802 if (name[3] == 'e' &&
9803 name[4] == 'c' &&
9804 name[5] == 't')
9805 { /* select */
9806 return -KEY_select;
9807 }
9808
9809 goto unknown;
9810
9811 case 'm':
9812 switch (name[3])
9813 {
9814 case 'c':
9815 if (name[4] == 't' &&
9816 name[5] == 'l')
9817 { /* semctl */
9818 return -KEY_semctl;
9819 }
9820
9821 goto unknown;
9822
9823 case 'g':
9824 if (name[4] == 'e' &&
9825 name[5] == 't')
9826 { /* semget */
9827 return -KEY_semget;
9828 }
9829
9830 goto unknown;
9831
9832 default:
9833 goto unknown;
9834 }
9835
9836 default:
9837 goto unknown;
9838 }
9839
9840 case 'h':
9841 if (name[2] == 'm')
9842 {
9843 switch (name[3])
9844 {
9845 case 'c':
9846 if (name[4] == 't' &&
9847 name[5] == 'l')
9848 { /* shmctl */
9849 return -KEY_shmctl;
9850 }
9851
9852 goto unknown;
9853
9854 case 'g':
9855 if (name[4] == 'e' &&
9856 name[5] == 't')
9857 { /* shmget */
9858 return -KEY_shmget;
9859 }
9860
9861 goto unknown;
9862
9863 default:
9864 goto unknown;
9865 }
9866 }
9867
9868 goto unknown;
9869
9870 case 'o':
9871 if (name[2] == 'c' &&
9872 name[3] == 'k' &&
9873 name[4] == 'e' &&
9874 name[5] == 't')
9875 { /* socket */
9876 return -KEY_socket;
9877 }
9878
9879 goto unknown;
9880
9881 case 'p':
9882 if (name[2] == 'l' &&
9883 name[3] == 'i' &&
9884 name[4] == 'c' &&
9885 name[5] == 'e')
9886 { /* splice */
9887 return -KEY_splice;
9888 }
9889
9890 goto unknown;
9891
9892 case 'u':
9893 if (name[2] == 'b' &&
9894 name[3] == 's' &&
9895 name[4] == 't' &&
9896 name[5] == 'r')
9897 { /* substr */
9898 return -KEY_substr;
9899 }
9900
9901 goto unknown;
9902
9903 case 'y':
9904 if (name[2] == 's' &&
9905 name[3] == 't' &&
9906 name[4] == 'e' &&
9907 name[5] == 'm')
9908 { /* system */
9909 return -KEY_system;
9910 }
9911
9912 goto unknown;
9913
9914 default:
9915 goto unknown;
9916 }
9917
9918 case 'u':
9919 if (name[1] == 'n')
9920 {
9921 switch (name[2])
9922 {
9923 case 'l':
9924 switch (name[3])
9925 {
9926 case 'e':
9927 if (name[4] == 's' &&
9928 name[5] == 's')
9929 { /* unless */
9930 return KEY_unless;
9931 }
9932
9933 goto unknown;
9934
9935 case 'i':
9936 if (name[4] == 'n' &&
9937 name[5] == 'k')
9938 { /* unlink */
9939 return -KEY_unlink;
9940 }
9941
9942 goto unknown;
9943
9944 default:
9945 goto unknown;
9946 }
9947
9948 case 'p':
9949 if (name[3] == 'a' &&
9950 name[4] == 'c' &&
9951 name[5] == 'k')
9952 { /* unpack */
9953 return -KEY_unpack;
9954 }
9955
9956 goto unknown;
9957
9958 default:
9959 goto unknown;
9960 }
9961 }
9962
9963 goto unknown;
9964
9965 case 'v':
9966 if (name[1] == 'a' &&
9967 name[2] == 'l' &&
9968 name[3] == 'u' &&
9969 name[4] == 'e' &&
9970 name[5] == 's')
9971 { /* values */
9972 return -KEY_values;
9973 }
9974
9975 goto unknown;
9976
9977 default:
9978 goto unknown;
e2e1dd5a 9979 }
4c3bbe0f 9980
0d863452 9981 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9982 switch (name[0])
9983 {
9984 case 'D':
9985 if (name[1] == 'E' &&
9986 name[2] == 'S' &&
9987 name[3] == 'T' &&
9988 name[4] == 'R' &&
9989 name[5] == 'O' &&
9990 name[6] == 'Y')
9991 { /* DESTROY */
9992 return KEY_DESTROY;
9993 }
9994
9995 goto unknown;
9996
9997 case '_':
9998 if (name[1] == '_' &&
9999 name[2] == 'E' &&
10000 name[3] == 'N' &&
10001 name[4] == 'D' &&
10002 name[5] == '_' &&
10003 name[6] == '_')
10004 { /* __END__ */
10005 return KEY___END__;
10006 }
10007
10008 goto unknown;
10009
10010 case 'b':
10011 if (name[1] == 'i' &&
10012 name[2] == 'n' &&
10013 name[3] == 'm' &&
10014 name[4] == 'o' &&
10015 name[5] == 'd' &&
10016 name[6] == 'e')
10017 { /* binmode */
10018 return -KEY_binmode;
10019 }
10020
10021 goto unknown;
10022
10023 case 'c':
10024 if (name[1] == 'o' &&
10025 name[2] == 'n' &&
10026 name[3] == 'n' &&
10027 name[4] == 'e' &&
10028 name[5] == 'c' &&
10029 name[6] == 't')
10030 { /* connect */
10031 return -KEY_connect;
10032 }
10033
10034 goto unknown;
10035
10036 case 'd':
10037 switch (name[1])
10038 {
10039 case 'b':
10040 if (name[2] == 'm' &&
10041 name[3] == 'o' &&
10042 name[4] == 'p' &&
10043 name[5] == 'e' &&
10044 name[6] == 'n')
10045 { /* dbmopen */
10046 return -KEY_dbmopen;
10047 }
10048
10049 goto unknown;
10050
10051 case 'e':
0d863452
RH
10052 if (name[2] == 'f')
10053 {
10054 switch (name[3])
10055 {
10056 case 'a':
10057 if (name[4] == 'u' &&
10058 name[5] == 'l' &&
10059 name[6] == 't')
10060 { /* default */
5458a98a 10061 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10062 }
10063
10064 goto unknown;
10065
10066 case 'i':
10067 if (name[4] == 'n' &&
952306ac
RGS
10068 name[5] == 'e' &&
10069 name[6] == 'd')
10070 { /* defined */
10071 return KEY_defined;
10072 }
4c3bbe0f 10073
952306ac 10074 goto unknown;
4c3bbe0f 10075
952306ac
RGS
10076 default:
10077 goto unknown;
10078 }
0d863452
RH
10079 }
10080
10081 goto unknown;
10082
10083 default:
10084 goto unknown;
10085 }
4c3bbe0f
MHM
10086
10087 case 'f':
10088 if (name[1] == 'o' &&
10089 name[2] == 'r' &&
10090 name[3] == 'e' &&
10091 name[4] == 'a' &&
10092 name[5] == 'c' &&
10093 name[6] == 'h')
10094 { /* foreach */
10095 return KEY_foreach;
10096 }
10097
10098 goto unknown;
10099
10100 case 'g':
10101 if (name[1] == 'e' &&
10102 name[2] == 't' &&
10103 name[3] == 'p')
10104 {
10105 switch (name[4])
10106 {
10107 case 'g':
10108 if (name[5] == 'r' &&
10109 name[6] == 'p')
10110 { /* getpgrp */
10111 return -KEY_getpgrp;
10112 }
10113
10114 goto unknown;
10115
10116 case 'p':
10117 if (name[5] == 'i' &&
10118 name[6] == 'd')
10119 { /* getppid */
10120 return -KEY_getppid;
10121 }
10122
10123 goto unknown;
10124
10125 default:
10126 goto unknown;
10127 }
10128 }
10129
10130 goto unknown;
10131
10132 case 'l':
10133 if (name[1] == 'c' &&
10134 name[2] == 'f' &&
10135 name[3] == 'i' &&
10136 name[4] == 'r' &&
10137 name[5] == 's' &&
10138 name[6] == 't')
10139 { /* lcfirst */
10140 return -KEY_lcfirst;
10141 }
10142
10143 goto unknown;
10144
10145 case 'o':
10146 if (name[1] == 'p' &&
10147 name[2] == 'e' &&
10148 name[3] == 'n' &&
10149 name[4] == 'd' &&
10150 name[5] == 'i' &&
10151 name[6] == 'r')
10152 { /* opendir */
10153 return -KEY_opendir;
10154 }
10155
10156 goto unknown;
10157
10158 case 'p':
10159 if (name[1] == 'a' &&
10160 name[2] == 'c' &&
10161 name[3] == 'k' &&
10162 name[4] == 'a' &&
10163 name[5] == 'g' &&
10164 name[6] == 'e')
10165 { /* package */
10166 return KEY_package;
10167 }
10168
10169 goto unknown;
10170
10171 case 'r':
10172 if (name[1] == 'e')
10173 {
10174 switch (name[2])
10175 {
10176 case 'a':
10177 if (name[3] == 'd' &&
10178 name[4] == 'd' &&
10179 name[5] == 'i' &&
10180 name[6] == 'r')
10181 { /* readdir */
10182 return -KEY_readdir;
10183 }
10184
10185 goto unknown;
10186
10187 case 'q':
10188 if (name[3] == 'u' &&
10189 name[4] == 'i' &&
10190 name[5] == 'r' &&
10191 name[6] == 'e')
10192 { /* require */
10193 return KEY_require;
10194 }
10195
10196 goto unknown;
10197
10198 case 'v':
10199 if (name[3] == 'e' &&
10200 name[4] == 'r' &&
10201 name[5] == 's' &&
10202 name[6] == 'e')
10203 { /* reverse */
10204 return -KEY_reverse;
10205 }
10206
10207 goto unknown;
10208
10209 default:
10210 goto unknown;
10211 }
10212 }
10213
10214 goto unknown;
10215
10216 case 's':
10217 switch (name[1])
10218 {
10219 case 'e':
10220 switch (name[2])
10221 {
10222 case 'e':
10223 if (name[3] == 'k' &&
10224 name[4] == 'd' &&
10225 name[5] == 'i' &&
10226 name[6] == 'r')
10227 { /* seekdir */
10228 return -KEY_seekdir;
10229 }
10230
10231 goto unknown;
10232
10233 case 't':
10234 if (name[3] == 'p' &&
10235 name[4] == 'g' &&
10236 name[5] == 'r' &&
10237 name[6] == 'p')
10238 { /* setpgrp */
10239 return -KEY_setpgrp;
10240 }
10241
10242 goto unknown;
10243
10244 default:
10245 goto unknown;
10246 }
10247
10248 case 'h':
10249 if (name[2] == 'm' &&
10250 name[3] == 'r' &&
10251 name[4] == 'e' &&
10252 name[5] == 'a' &&
10253 name[6] == 'd')
10254 { /* shmread */
10255 return -KEY_shmread;
10256 }
10257
10258 goto unknown;
10259
10260 case 'p':
10261 if (name[2] == 'r' &&
10262 name[3] == 'i' &&
10263 name[4] == 'n' &&
10264 name[5] == 't' &&
10265 name[6] == 'f')
10266 { /* sprintf */
10267 return -KEY_sprintf;
10268 }
10269
10270 goto unknown;
10271
10272 case 'y':
10273 switch (name[2])
10274 {
10275 case 'm':
10276 if (name[3] == 'l' &&
10277 name[4] == 'i' &&
10278 name[5] == 'n' &&
10279 name[6] == 'k')
10280 { /* symlink */
10281 return -KEY_symlink;
10282 }
10283
10284 goto unknown;
10285
10286 case 's':
10287 switch (name[3])
10288 {
10289 case 'c':
10290 if (name[4] == 'a' &&
10291 name[5] == 'l' &&
10292 name[6] == 'l')
10293 { /* syscall */
10294 return -KEY_syscall;
10295 }
10296
10297 goto unknown;
10298
10299 case 'o':
10300 if (name[4] == 'p' &&
10301 name[5] == 'e' &&
10302 name[6] == 'n')
10303 { /* sysopen */
10304 return -KEY_sysopen;
10305 }
10306
10307 goto unknown;
10308
10309 case 'r':
10310 if (name[4] == 'e' &&
10311 name[5] == 'a' &&
10312 name[6] == 'd')
10313 { /* sysread */
10314 return -KEY_sysread;
10315 }
10316
10317 goto unknown;
10318
10319 case 's':
10320 if (name[4] == 'e' &&
10321 name[5] == 'e' &&
10322 name[6] == 'k')
10323 { /* sysseek */
10324 return -KEY_sysseek;
10325 }
10326
10327 goto unknown;
10328
10329 default:
10330 goto unknown;
10331 }
10332
10333 default:
10334 goto unknown;
10335 }
10336
10337 default:
10338 goto unknown;
10339 }
10340
10341 case 't':
10342 if (name[1] == 'e' &&
10343 name[2] == 'l' &&
10344 name[3] == 'l' &&
10345 name[4] == 'd' &&
10346 name[5] == 'i' &&
10347 name[6] == 'r')
10348 { /* telldir */
10349 return -KEY_telldir;
10350 }
10351
10352 goto unknown;
10353
10354 case 'u':
10355 switch (name[1])
10356 {
10357 case 'c':
10358 if (name[2] == 'f' &&
10359 name[3] == 'i' &&
10360 name[4] == 'r' &&
10361 name[5] == 's' &&
10362 name[6] == 't')
10363 { /* ucfirst */
10364 return -KEY_ucfirst;
10365 }
10366
10367 goto unknown;
10368
10369 case 'n':
10370 if (name[2] == 's' &&
10371 name[3] == 'h' &&
10372 name[4] == 'i' &&
10373 name[5] == 'f' &&
10374 name[6] == 't')
10375 { /* unshift */
10376 return -KEY_unshift;
10377 }
10378
10379 goto unknown;
10380
10381 default:
10382 goto unknown;
10383 }
10384
10385 case 'w':
10386 if (name[1] == 'a' &&
10387 name[2] == 'i' &&
10388 name[3] == 't' &&
10389 name[4] == 'p' &&
10390 name[5] == 'i' &&
10391 name[6] == 'd')
10392 { /* waitpid */
10393 return -KEY_waitpid;
10394 }
10395
10396 goto unknown;
10397
10398 default:
10399 goto unknown;
10400 }
10401
10402 case 8: /* 26 tokens of length 8 */
10403 switch (name[0])
10404 {
10405 case 'A':
10406 if (name[1] == 'U' &&
10407 name[2] == 'T' &&
10408 name[3] == 'O' &&
10409 name[4] == 'L' &&
10410 name[5] == 'O' &&
10411 name[6] == 'A' &&
10412 name[7] == 'D')
10413 { /* AUTOLOAD */
10414 return KEY_AUTOLOAD;
10415 }
10416
10417 goto unknown;
10418
10419 case '_':
10420 if (name[1] == '_')
10421 {
10422 switch (name[2])
10423 {
10424 case 'D':
10425 if (name[3] == 'A' &&
10426 name[4] == 'T' &&
10427 name[5] == 'A' &&
10428 name[6] == '_' &&
10429 name[7] == '_')
10430 { /* __DATA__ */
10431 return KEY___DATA__;
10432 }
10433
10434 goto unknown;
10435
10436 case 'F':
10437 if (name[3] == 'I' &&
10438 name[4] == 'L' &&
10439 name[5] == 'E' &&
10440 name[6] == '_' &&
10441 name[7] == '_')
10442 { /* __FILE__ */
10443 return -KEY___FILE__;
10444 }
10445
10446 goto unknown;
10447
10448 case 'L':
10449 if (name[3] == 'I' &&
10450 name[4] == 'N' &&
10451 name[5] == 'E' &&
10452 name[6] == '_' &&
10453 name[7] == '_')
10454 { /* __LINE__ */
10455 return -KEY___LINE__;
10456 }
10457
10458 goto unknown;
10459
10460 default:
10461 goto unknown;
10462 }
10463 }
10464
10465 goto unknown;
10466
10467 case 'c':
10468 switch (name[1])
10469 {
10470 case 'l':
10471 if (name[2] == 'o' &&
10472 name[3] == 's' &&
10473 name[4] == 'e' &&
10474 name[5] == 'd' &&
10475 name[6] == 'i' &&
10476 name[7] == 'r')
10477 { /* closedir */
10478 return -KEY_closedir;
10479 }
10480
10481 goto unknown;
10482
10483 case 'o':
10484 if (name[2] == 'n' &&
10485 name[3] == 't' &&
10486 name[4] == 'i' &&
10487 name[5] == 'n' &&
10488 name[6] == 'u' &&
10489 name[7] == 'e')
10490 { /* continue */
10491 return -KEY_continue;
10492 }
10493
10494 goto unknown;
10495
10496 default:
10497 goto unknown;
10498 }
10499
10500 case 'd':
10501 if (name[1] == 'b' &&
10502 name[2] == 'm' &&
10503 name[3] == 'c' &&
10504 name[4] == 'l' &&
10505 name[5] == 'o' &&
10506 name[6] == 's' &&
10507 name[7] == 'e')
10508 { /* dbmclose */
10509 return -KEY_dbmclose;
10510 }
10511
10512 goto unknown;
10513
10514 case 'e':
10515 if (name[1] == 'n' &&
10516 name[2] == 'd')
10517 {
10518 switch (name[3])
10519 {
10520 case 'g':
10521 if (name[4] == 'r' &&
10522 name[5] == 'e' &&
10523 name[6] == 'n' &&
10524 name[7] == 't')
10525 { /* endgrent */
10526 return -KEY_endgrent;
10527 }
10528
10529 goto unknown;
10530
10531 case 'p':
10532 if (name[4] == 'w' &&
10533 name[5] == 'e' &&
10534 name[6] == 'n' &&
10535 name[7] == 't')
10536 { /* endpwent */
10537 return -KEY_endpwent;
10538 }
10539
10540 goto unknown;
10541
10542 default:
10543 goto unknown;
10544 }
10545 }
10546
10547 goto unknown;
10548
10549 case 'f':
10550 if (name[1] == 'o' &&
10551 name[2] == 'r' &&
10552 name[3] == 'm' &&
10553 name[4] == 'l' &&
10554 name[5] == 'i' &&
10555 name[6] == 'n' &&
10556 name[7] == 'e')
10557 { /* formline */
10558 return -KEY_formline;
10559 }
10560
10561 goto unknown;
10562
10563 case 'g':
10564 if (name[1] == 'e' &&
10565 name[2] == 't')
10566 {
10567 switch (name[3])
10568 {
10569 case 'g':
10570 if (name[4] == 'r')
10571 {
10572 switch (name[5])
10573 {
10574 case 'e':
10575 if (name[6] == 'n' &&
10576 name[7] == 't')
10577 { /* getgrent */
10578 return -KEY_getgrent;
10579 }
10580
10581 goto unknown;
10582
10583 case 'g':
10584 if (name[6] == 'i' &&
10585 name[7] == 'd')
10586 { /* getgrgid */
10587 return -KEY_getgrgid;
10588 }
10589
10590 goto unknown;
10591
10592 case 'n':
10593 if (name[6] == 'a' &&
10594 name[7] == 'm')
10595 { /* getgrnam */
10596 return -KEY_getgrnam;
10597 }
10598
10599 goto unknown;
10600
10601 default:
10602 goto unknown;
10603 }
10604 }
10605
10606 goto unknown;
10607
10608 case 'l':
10609 if (name[4] == 'o' &&
10610 name[5] == 'g' &&
10611 name[6] == 'i' &&
10612 name[7] == 'n')
10613 { /* getlogin */
10614 return -KEY_getlogin;
10615 }
10616
10617 goto unknown;
10618
10619 case 'p':
10620 if (name[4] == 'w')
10621 {
10622 switch (name[5])
10623 {
10624 case 'e':
10625 if (name[6] == 'n' &&
10626 name[7] == 't')
10627 { /* getpwent */
10628 return -KEY_getpwent;
10629 }
10630
10631 goto unknown;
10632
10633 case 'n':
10634 if (name[6] == 'a' &&
10635 name[7] == 'm')
10636 { /* getpwnam */
10637 return -KEY_getpwnam;
10638 }
10639
10640 goto unknown;
10641
10642 case 'u':
10643 if (name[6] == 'i' &&
10644 name[7] == 'd')
10645 { /* getpwuid */
10646 return -KEY_getpwuid;
10647 }
10648
10649 goto unknown;
10650
10651 default:
10652 goto unknown;
10653 }
10654 }
10655
10656 goto unknown;
10657
10658 default:
10659 goto unknown;
10660 }
10661 }
10662
10663 goto unknown;
10664
10665 case 'r':
10666 if (name[1] == 'e' &&
10667 name[2] == 'a' &&
10668 name[3] == 'd')
10669 {
10670 switch (name[4])
10671 {
10672 case 'l':
10673 if (name[5] == 'i' &&
10674 name[6] == 'n')
10675 {
10676 switch (name[7])
10677 {
10678 case 'e':
10679 { /* readline */
10680 return -KEY_readline;
10681 }
10682
4c3bbe0f
MHM
10683 case 'k':
10684 { /* readlink */
10685 return -KEY_readlink;
10686 }
10687
4c3bbe0f
MHM
10688 default:
10689 goto unknown;
10690 }
10691 }
10692
10693 goto unknown;
10694
10695 case 'p':
10696 if (name[5] == 'i' &&
10697 name[6] == 'p' &&
10698 name[7] == 'e')
10699 { /* readpipe */
10700 return -KEY_readpipe;
10701 }
10702
10703 goto unknown;
10704
10705 default:
10706 goto unknown;
10707 }
10708 }
10709
10710 goto unknown;
10711
10712 case 's':
10713 switch (name[1])
10714 {
10715 case 'e':
10716 if (name[2] == 't')
10717 {
10718 switch (name[3])
10719 {
10720 case 'g':
10721 if (name[4] == 'r' &&
10722 name[5] == 'e' &&
10723 name[6] == 'n' &&
10724 name[7] == 't')
10725 { /* setgrent */
10726 return -KEY_setgrent;
10727 }
10728
10729 goto unknown;
10730
10731 case 'p':
10732 if (name[4] == 'w' &&
10733 name[5] == 'e' &&
10734 name[6] == 'n' &&
10735 name[7] == 't')
10736 { /* setpwent */
10737 return -KEY_setpwent;
10738 }
10739
10740 goto unknown;
10741
10742 default:
10743 goto unknown;
10744 }
10745 }
10746
10747 goto unknown;
10748
10749 case 'h':
10750 switch (name[2])
10751 {
10752 case 'm':
10753 if (name[3] == 'w' &&
10754 name[4] == 'r' &&
10755 name[5] == 'i' &&
10756 name[6] == 't' &&
10757 name[7] == 'e')
10758 { /* shmwrite */
10759 return -KEY_shmwrite;
10760 }
10761
10762 goto unknown;
10763
10764 case 'u':
10765 if (name[3] == 't' &&
10766 name[4] == 'd' &&
10767 name[5] == 'o' &&
10768 name[6] == 'w' &&
10769 name[7] == 'n')
10770 { /* shutdown */
10771 return -KEY_shutdown;
10772 }
10773
10774 goto unknown;
10775
10776 default:
10777 goto unknown;
10778 }
10779
10780 case 'y':
10781 if (name[2] == 's' &&
10782 name[3] == 'w' &&
10783 name[4] == 'r' &&
10784 name[5] == 'i' &&
10785 name[6] == 't' &&
10786 name[7] == 'e')
10787 { /* syswrite */
10788 return -KEY_syswrite;
10789 }
10790
10791 goto unknown;
10792
10793 default:
10794 goto unknown;
10795 }
10796
10797 case 't':
10798 if (name[1] == 'r' &&
10799 name[2] == 'u' &&
10800 name[3] == 'n' &&
10801 name[4] == 'c' &&
10802 name[5] == 'a' &&
10803 name[6] == 't' &&
10804 name[7] == 'e')
10805 { /* truncate */
10806 return -KEY_truncate;
10807 }
10808
10809 goto unknown;
10810
10811 default:
10812 goto unknown;
10813 }
10814
3c10abe3 10815 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10816 switch (name[0])
10817 {
3c10abe3
AG
10818 case 'U':
10819 if (name[1] == 'N' &&
10820 name[2] == 'I' &&
10821 name[3] == 'T' &&
10822 name[4] == 'C' &&
10823 name[5] == 'H' &&
10824 name[6] == 'E' &&
10825 name[7] == 'C' &&
10826 name[8] == 'K')
10827 { /* UNITCHECK */
10828 return KEY_UNITCHECK;
10829 }
10830
10831 goto unknown;
10832
4c3bbe0f
MHM
10833 case 'e':
10834 if (name[1] == 'n' &&
10835 name[2] == 'd' &&
10836 name[3] == 'n' &&
10837 name[4] == 'e' &&
10838 name[5] == 't' &&
10839 name[6] == 'e' &&
10840 name[7] == 'n' &&
10841 name[8] == 't')
10842 { /* endnetent */
10843 return -KEY_endnetent;
10844 }
10845
10846 goto unknown;
10847
10848 case 'g':
10849 if (name[1] == 'e' &&
10850 name[2] == 't' &&
10851 name[3] == 'n' &&
10852 name[4] == 'e' &&
10853 name[5] == 't' &&
10854 name[6] == 'e' &&
10855 name[7] == 'n' &&
10856 name[8] == 't')
10857 { /* getnetent */
10858 return -KEY_getnetent;
10859 }
10860
10861 goto unknown;
10862
10863 case 'l':
10864 if (name[1] == 'o' &&
10865 name[2] == 'c' &&
10866 name[3] == 'a' &&
10867 name[4] == 'l' &&
10868 name[5] == 't' &&
10869 name[6] == 'i' &&
10870 name[7] == 'm' &&
10871 name[8] == 'e')
10872 { /* localtime */
10873 return -KEY_localtime;
10874 }
10875
10876 goto unknown;
10877
10878 case 'p':
10879 if (name[1] == 'r' &&
10880 name[2] == 'o' &&
10881 name[3] == 't' &&
10882 name[4] == 'o' &&
10883 name[5] == 't' &&
10884 name[6] == 'y' &&
10885 name[7] == 'p' &&
10886 name[8] == 'e')
10887 { /* prototype */
10888 return KEY_prototype;
10889 }
10890
10891 goto unknown;
10892
10893 case 'q':
10894 if (name[1] == 'u' &&
10895 name[2] == 'o' &&
10896 name[3] == 't' &&
10897 name[4] == 'e' &&
10898 name[5] == 'm' &&
10899 name[6] == 'e' &&
10900 name[7] == 't' &&
10901 name[8] == 'a')
10902 { /* quotemeta */
10903 return -KEY_quotemeta;
10904 }
10905
10906 goto unknown;
10907
10908 case 'r':
10909 if (name[1] == 'e' &&
10910 name[2] == 'w' &&
10911 name[3] == 'i' &&
10912 name[4] == 'n' &&
10913 name[5] == 'd' &&
10914 name[6] == 'd' &&
10915 name[7] == 'i' &&
10916 name[8] == 'r')
10917 { /* rewinddir */
10918 return -KEY_rewinddir;
10919 }
10920
10921 goto unknown;
10922
10923 case 's':
10924 if (name[1] == 'e' &&
10925 name[2] == 't' &&
10926 name[3] == 'n' &&
10927 name[4] == 'e' &&
10928 name[5] == 't' &&
10929 name[6] == 'e' &&
10930 name[7] == 'n' &&
10931 name[8] == 't')
10932 { /* setnetent */
10933 return -KEY_setnetent;
10934 }
10935
10936 goto unknown;
10937
10938 case 'w':
10939 if (name[1] == 'a' &&
10940 name[2] == 'n' &&
10941 name[3] == 't' &&
10942 name[4] == 'a' &&
10943 name[5] == 'r' &&
10944 name[6] == 'r' &&
10945 name[7] == 'a' &&
10946 name[8] == 'y')
10947 { /* wantarray */
10948 return -KEY_wantarray;
10949 }
10950
10951 goto unknown;
10952
10953 default:
10954 goto unknown;
10955 }
10956
10957 case 10: /* 9 tokens of length 10 */
10958 switch (name[0])
10959 {
10960 case 'e':
10961 if (name[1] == 'n' &&
10962 name[2] == 'd')
10963 {
10964 switch (name[3])
10965 {
10966 case 'h':
10967 if (name[4] == 'o' &&
10968 name[5] == 's' &&
10969 name[6] == 't' &&
10970 name[7] == 'e' &&
10971 name[8] == 'n' &&
10972 name[9] == 't')
10973 { /* endhostent */
10974 return -KEY_endhostent;
10975 }
10976
10977 goto unknown;
10978
10979 case 's':
10980 if (name[4] == 'e' &&
10981 name[5] == 'r' &&
10982 name[6] == 'v' &&
10983 name[7] == 'e' &&
10984 name[8] == 'n' &&
10985 name[9] == 't')
10986 { /* endservent */
10987 return -KEY_endservent;
10988 }
10989
10990 goto unknown;
10991
10992 default:
10993 goto unknown;
10994 }
10995 }
10996
10997 goto unknown;
10998
10999 case 'g':
11000 if (name[1] == 'e' &&
11001 name[2] == 't')
11002 {
11003 switch (name[3])
11004 {
11005 case 'h':
11006 if (name[4] == 'o' &&
11007 name[5] == 's' &&
11008 name[6] == 't' &&
11009 name[7] == 'e' &&
11010 name[8] == 'n' &&
11011 name[9] == 't')
11012 { /* gethostent */
11013 return -KEY_gethostent;
11014 }
11015
11016 goto unknown;
11017
11018 case 's':
11019 switch (name[4])
11020 {
11021 case 'e':
11022 if (name[5] == 'r' &&
11023 name[6] == 'v' &&
11024 name[7] == 'e' &&
11025 name[8] == 'n' &&
11026 name[9] == 't')
11027 { /* getservent */
11028 return -KEY_getservent;
11029 }
11030
11031 goto unknown;
11032
11033 case 'o':
11034 if (name[5] == 'c' &&
11035 name[6] == 'k' &&
11036 name[7] == 'o' &&
11037 name[8] == 'p' &&
11038 name[9] == 't')
11039 { /* getsockopt */
11040 return -KEY_getsockopt;
11041 }
11042
11043 goto unknown;
11044
11045 default:
11046 goto unknown;
11047 }
11048
11049 default:
11050 goto unknown;
11051 }
11052 }
11053
11054 goto unknown;
11055
11056 case 's':
11057 switch (name[1])
11058 {
11059 case 'e':
11060 if (name[2] == 't')
11061 {
11062 switch (name[3])
11063 {
11064 case 'h':
11065 if (name[4] == 'o' &&
11066 name[5] == 's' &&
11067 name[6] == 't' &&
11068 name[7] == 'e' &&
11069 name[8] == 'n' &&
11070 name[9] == 't')
11071 { /* sethostent */
11072 return -KEY_sethostent;
11073 }
11074
11075 goto unknown;
11076
11077 case 's':
11078 switch (name[4])
11079 {
11080 case 'e':
11081 if (name[5] == 'r' &&
11082 name[6] == 'v' &&
11083 name[7] == 'e' &&
11084 name[8] == 'n' &&
11085 name[9] == 't')
11086 { /* setservent */
11087 return -KEY_setservent;
11088 }
11089
11090 goto unknown;
11091
11092 case 'o':
11093 if (name[5] == 'c' &&
11094 name[6] == 'k' &&
11095 name[7] == 'o' &&
11096 name[8] == 'p' &&
11097 name[9] == 't')
11098 { /* setsockopt */
11099 return -KEY_setsockopt;
11100 }
11101
11102 goto unknown;
11103
11104 default:
11105 goto unknown;
11106 }
11107
11108 default:
11109 goto unknown;
11110 }
11111 }
11112
11113 goto unknown;
11114
11115 case 'o':
11116 if (name[2] == 'c' &&
11117 name[3] == 'k' &&
11118 name[4] == 'e' &&
11119 name[5] == 't' &&
11120 name[6] == 'p' &&
11121 name[7] == 'a' &&
11122 name[8] == 'i' &&
11123 name[9] == 'r')
11124 { /* socketpair */
11125 return -KEY_socketpair;
11126 }
11127
11128 goto unknown;
11129
11130 default:
11131 goto unknown;
11132 }
11133
11134 default:
11135 goto unknown;
e2e1dd5a 11136 }
4c3bbe0f
MHM
11137
11138 case 11: /* 8 tokens of length 11 */
11139 switch (name[0])
11140 {
11141 case '_':
11142 if (name[1] == '_' &&
11143 name[2] == 'P' &&
11144 name[3] == 'A' &&
11145 name[4] == 'C' &&
11146 name[5] == 'K' &&
11147 name[6] == 'A' &&
11148 name[7] == 'G' &&
11149 name[8] == 'E' &&
11150 name[9] == '_' &&
11151 name[10] == '_')
11152 { /* __PACKAGE__ */
11153 return -KEY___PACKAGE__;
11154 }
11155
11156 goto unknown;
11157
11158 case 'e':
11159 if (name[1] == 'n' &&
11160 name[2] == 'd' &&
11161 name[3] == 'p' &&
11162 name[4] == 'r' &&
11163 name[5] == 'o' &&
11164 name[6] == 't' &&
11165 name[7] == 'o' &&
11166 name[8] == 'e' &&
11167 name[9] == 'n' &&
11168 name[10] == 't')
11169 { /* endprotoent */
11170 return -KEY_endprotoent;
11171 }
11172
11173 goto unknown;
11174
11175 case 'g':
11176 if (name[1] == 'e' &&
11177 name[2] == 't')
11178 {
11179 switch (name[3])
11180 {
11181 case 'p':
11182 switch (name[4])
11183 {
11184 case 'e':
11185 if (name[5] == 'e' &&
11186 name[6] == 'r' &&
11187 name[7] == 'n' &&
11188 name[8] == 'a' &&
11189 name[9] == 'm' &&
11190 name[10] == 'e')
11191 { /* getpeername */
11192 return -KEY_getpeername;
11193 }
11194
11195 goto unknown;
11196
11197 case 'r':
11198 switch (name[5])
11199 {
11200 case 'i':
11201 if (name[6] == 'o' &&
11202 name[7] == 'r' &&
11203 name[8] == 'i' &&
11204 name[9] == 't' &&
11205 name[10] == 'y')
11206 { /* getpriority */
11207 return -KEY_getpriority;
11208 }
11209
11210 goto unknown;
11211
11212 case 'o':
11213 if (name[6] == 't' &&
11214 name[7] == 'o' &&
11215 name[8] == 'e' &&
11216 name[9] == 'n' &&
11217 name[10] == 't')
11218 { /* getprotoent */
11219 return -KEY_getprotoent;
11220 }
11221
11222 goto unknown;
11223
11224 default:
11225 goto unknown;
11226 }
11227
11228 default:
11229 goto unknown;
11230 }
11231
11232 case 's':
11233 if (name[4] == 'o' &&
11234 name[5] == 'c' &&
11235 name[6] == 'k' &&
11236 name[7] == 'n' &&
11237 name[8] == 'a' &&
11238 name[9] == 'm' &&
11239 name[10] == 'e')
11240 { /* getsockname */
11241 return -KEY_getsockname;
11242 }
11243
11244 goto unknown;
11245
11246 default:
11247 goto unknown;
11248 }
11249 }
11250
11251 goto unknown;
11252
11253 case 's':
11254 if (name[1] == 'e' &&
11255 name[2] == 't' &&
11256 name[3] == 'p' &&
11257 name[4] == 'r')
11258 {
11259 switch (name[5])
11260 {
11261 case 'i':
11262 if (name[6] == 'o' &&
11263 name[7] == 'r' &&
11264 name[8] == 'i' &&
11265 name[9] == 't' &&
11266 name[10] == 'y')
11267 { /* setpriority */
11268 return -KEY_setpriority;
11269 }
11270
11271 goto unknown;
11272
11273 case 'o':
11274 if (name[6] == 't' &&
11275 name[7] == 'o' &&
11276 name[8] == 'e' &&
11277 name[9] == 'n' &&
11278 name[10] == 't')
11279 { /* setprotoent */
11280 return -KEY_setprotoent;
11281 }
11282
11283 goto unknown;
11284
11285 default:
11286 goto unknown;
11287 }
11288 }
11289
11290 goto unknown;
11291
11292 default:
11293 goto unknown;
e2e1dd5a 11294 }
4c3bbe0f
MHM
11295
11296 case 12: /* 2 tokens of length 12 */
11297 if (name[0] == 'g' &&
11298 name[1] == 'e' &&
11299 name[2] == 't' &&
11300 name[3] == 'n' &&
11301 name[4] == 'e' &&
11302 name[5] == 't' &&
11303 name[6] == 'b' &&
11304 name[7] == 'y')
11305 {
11306 switch (name[8])
11307 {
11308 case 'a':
11309 if (name[9] == 'd' &&
11310 name[10] == 'd' &&
11311 name[11] == 'r')
11312 { /* getnetbyaddr */
11313 return -KEY_getnetbyaddr;
11314 }
11315
11316 goto unknown;
11317
11318 case 'n':
11319 if (name[9] == 'a' &&
11320 name[10] == 'm' &&
11321 name[11] == 'e')
11322 { /* getnetbyname */
11323 return -KEY_getnetbyname;
11324 }
11325
11326 goto unknown;
11327
11328 default:
11329 goto unknown;
11330 }
e2e1dd5a 11331 }
4c3bbe0f
MHM
11332
11333 goto unknown;
11334
11335 case 13: /* 4 tokens of length 13 */
11336 if (name[0] == 'g' &&
11337 name[1] == 'e' &&
11338 name[2] == 't')
11339 {
11340 switch (name[3])
11341 {
11342 case 'h':
11343 if (name[4] == 'o' &&
11344 name[5] == 's' &&
11345 name[6] == 't' &&
11346 name[7] == 'b' &&
11347 name[8] == 'y')
11348 {
11349 switch (name[9])
11350 {
11351 case 'a':
11352 if (name[10] == 'd' &&
11353 name[11] == 'd' &&
11354 name[12] == 'r')
11355 { /* gethostbyaddr */
11356 return -KEY_gethostbyaddr;
11357 }
11358
11359 goto unknown;
11360
11361 case 'n':
11362 if (name[10] == 'a' &&
11363 name[11] == 'm' &&
11364 name[12] == 'e')
11365 { /* gethostbyname */
11366 return -KEY_gethostbyname;
11367 }
11368
11369 goto unknown;
11370
11371 default:
11372 goto unknown;
11373 }
11374 }
11375
11376 goto unknown;
11377
11378 case 's':
11379 if (name[4] == 'e' &&
11380 name[5] == 'r' &&
11381 name[6] == 'v' &&
11382 name[7] == 'b' &&
11383 name[8] == 'y')
11384 {
11385 switch (name[9])
11386 {
11387 case 'n':
11388 if (name[10] == 'a' &&
11389 name[11] == 'm' &&
11390 name[12] == 'e')
11391 { /* getservbyname */
11392 return -KEY_getservbyname;
11393 }
11394
11395 goto unknown;
11396
11397 case 'p':
11398 if (name[10] == 'o' &&
11399 name[11] == 'r' &&
11400 name[12] == 't')
11401 { /* getservbyport */
11402 return -KEY_getservbyport;
11403 }
11404
11405 goto unknown;
11406
11407 default:
11408 goto unknown;
11409 }
11410 }
11411
11412 goto unknown;
11413
11414 default:
11415 goto unknown;
11416 }
e2e1dd5a 11417 }
4c3bbe0f
MHM
11418
11419 goto unknown;
11420
11421 case 14: /* 1 tokens of length 14 */
11422 if (name[0] == 'g' &&
11423 name[1] == 'e' &&
11424 name[2] == 't' &&
11425 name[3] == 'p' &&
11426 name[4] == 'r' &&
11427 name[5] == 'o' &&
11428 name[6] == 't' &&
11429 name[7] == 'o' &&
11430 name[8] == 'b' &&
11431 name[9] == 'y' &&
11432 name[10] == 'n' &&
11433 name[11] == 'a' &&
11434 name[12] == 'm' &&
11435 name[13] == 'e')
11436 { /* getprotobyname */
11437 return -KEY_getprotobyname;
11438 }
11439
11440 goto unknown;
11441
11442 case 16: /* 1 tokens of length 16 */
11443 if (name[0] == 'g' &&
11444 name[1] == 'e' &&
11445 name[2] == 't' &&
11446 name[3] == 'p' &&
11447 name[4] == 'r' &&
11448 name[5] == 'o' &&
11449 name[6] == 't' &&
11450 name[7] == 'o' &&
11451 name[8] == 'b' &&
11452 name[9] == 'y' &&
11453 name[10] == 'n' &&
11454 name[11] == 'u' &&
11455 name[12] == 'm' &&
11456 name[13] == 'b' &&
11457 name[14] == 'e' &&
11458 name[15] == 'r')
11459 { /* getprotobynumber */
11460 return -KEY_getprotobynumber;
11461 }
11462
11463 goto unknown;
11464
11465 default:
11466 goto unknown;
e2e1dd5a 11467 }
4c3bbe0f
MHM
11468
11469unknown:
e2e1dd5a 11470 return 0;
a687059c
LW
11471}
11472
76e3520e 11473STATIC void
c94115d8 11474S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11475{
97aff369 11476 dVAR;
2f3197b3 11477
7918f24d
NC
11478 PERL_ARGS_ASSERT_CHECKCOMMA;
11479
d008e5eb 11480 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11481 if (ckWARN(WARN_SYNTAX)) {
11482 int level = 1;
26ff0806 11483 const char *w;
d008e5eb
GS
11484 for (w = s+2; *w && level; w++) {
11485 if (*w == '(')
11486 ++level;
11487 else if (*w == ')')
11488 --level;
11489 }
888fea98
NC
11490 while (isSPACE(*w))
11491 ++w;
b1439985
RGS
11492 /* the list of chars below is for end of statements or
11493 * block / parens, boolean operators (&&, ||, //) and branch
11494 * constructs (or, and, if, until, unless, while, err, for).
11495 * Not a very solid hack... */
11496 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11497 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11498 "%s (...) interpreted as function",name);
d008e5eb 11499 }
2f3197b3 11500 }
3280af22 11501 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11502 s++;
a687059c
LW
11503 if (*s == '(')
11504 s++;
3280af22 11505 while (s < PL_bufend && isSPACE(*s))
a687059c 11506 s++;
7e2040f0 11507 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11508 const char * const w = s++;
7e2040f0 11509 while (isALNUM_lazy_if(s,UTF))
a687059c 11510 s++;
3280af22 11511 while (s < PL_bufend && isSPACE(*s))
a687059c 11512 s++;
e929a76b 11513 if (*s == ',') {
c94115d8 11514 GV* gv;
5458a98a 11515 if (keyword(w, s - w, 0))
e929a76b 11516 return;
c94115d8
NC
11517
11518 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11519 if (gv && GvCVu(gv))
abbb3198 11520 return;
cea2e8a9 11521 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11522 }
11523 }
11524}
11525
423cee85
JH
11526/* Either returns sv, or mortalizes sv and returns a new SV*.
11527 Best used as sv=new_constant(..., sv, ...).
11528 If s, pv are NULL, calls subroutine with one argument,
11529 and type is used with error messages only. */
11530
b3ac6de7 11531STATIC SV *
eb0d8d16
NC
11532S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11533 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11534{
27da23d5 11535 dVAR; dSP;
890ce7af 11536 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11537 SV *res;
b3ac6de7
IZ
11538 SV **cvp;
11539 SV *cv, *typesv;
89e33a05 11540 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11541
7918f24d
NC
11542 PERL_ARGS_ASSERT_NEW_CONSTANT;
11543
f0af216f 11544 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11545 SV *msg;
11546
10edeb5d
JH
11547 why2 = (const char *)
11548 (strEQ(key,"charnames")
11549 ? "(possibly a missing \"use charnames ...\")"
11550 : "");
4e553d73 11551 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11552 (type ? type: "undef"), why2);
11553
11554 /* This is convoluted and evil ("goto considered harmful")
11555 * but I do not understand the intricacies of all the different
11556 * failure modes of %^H in here. The goal here is to make
11557 * the most probable error message user-friendly. --jhi */
11558
11559 goto msgdone;
11560
423cee85 11561 report:
4e553d73 11562 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11563 (type ? type: "undef"), why1, why2, why3);
41ab332f 11564 msgdone:
95a20fc0 11565 yyerror(SvPVX_const(msg));
423cee85
JH
11566 SvREFCNT_dec(msg);
11567 return sv;
11568 }
ff3f963a
KW
11569
11570 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11571 if (PL_error_count > 0 && strEQ(key,"charnames"))
11572 return &PL_sv_undef;
ff3f963a 11573
eb0d8d16 11574 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11575 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11576 why1 = "$^H{";
11577 why2 = key;
f0af216f 11578 why3 = "} is not defined";
423cee85 11579 goto report;
b3ac6de7
IZ
11580 }
11581 sv_2mortal(sv); /* Parent created it permanently */
11582 cv = *cvp;
423cee85 11583 if (!pv && s)
59cd0e26 11584 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11585 if (type && pv)
59cd0e26 11586 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11587 else
423cee85 11588 typesv = &PL_sv_undef;
4e553d73 11589
e788e7d3 11590 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11591 ENTER ;
11592 SAVETMPS;
4e553d73 11593
423cee85 11594 PUSHMARK(SP) ;
a5845cb7 11595 EXTEND(sp, 3);
423cee85
JH
11596 if (pv)
11597 PUSHs(pv);
b3ac6de7 11598 PUSHs(sv);
423cee85
JH
11599 if (pv)
11600 PUSHs(typesv);
b3ac6de7 11601 PUTBACK;
423cee85 11602 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11603
423cee85 11604 SPAGAIN ;
4e553d73 11605
423cee85 11606 /* Check the eval first */
9b0e499b 11607 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11608 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11609 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11610 (void)POPs;
b37c2d43 11611 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11612 }
11613 else {
11614 res = POPs;
b37c2d43 11615 SvREFCNT_inc_simple_void(res);
423cee85 11616 }
4e553d73 11617
423cee85
JH
11618 PUTBACK ;
11619 FREETMPS ;
11620 LEAVE ;
b3ac6de7 11621 POPSTACK;
4e553d73 11622
b3ac6de7 11623 if (!SvOK(res)) {
423cee85
JH
11624 why1 = "Call to &{$^H{";
11625 why2 = key;
f0af216f 11626 why3 = "}} did not return a defined value";
423cee85
JH
11627 sv = res;
11628 goto report;
9b0e499b 11629 }
423cee85 11630
9b0e499b 11631 return res;
b3ac6de7 11632}
4e553d73 11633
d0a148a6
NC
11634/* Returns a NUL terminated string, with the length of the string written to
11635 *slp
11636 */
76e3520e 11637STATIC char *
cea2e8a9 11638S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11639{
97aff369 11640 dVAR;
463ee0b2 11641 register char *d = dest;
890ce7af 11642 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11643
11644 PERL_ARGS_ASSERT_SCAN_WORD;
11645
463ee0b2 11646 for (;;) {
8903cb82 11647 if (d >= e)
cea2e8a9 11648 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11649 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11650 *d++ = *s++;
c35e046a 11651 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11652 *d++ = ':';
11653 *d++ = ':';
11654 s++;
11655 }
c35e046a 11656 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11657 *d++ = *s++;
11658 *d++ = *s++;
11659 }
fd400ab9 11660 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11661 char *t = s + UTF8SKIP(s);
c35e046a 11662 size_t len;
fd400ab9 11663 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11664 t += UTF8SKIP(t);
c35e046a
AL
11665 len = t - s;
11666 if (d + len > e)
cea2e8a9 11667 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11668 Copy(s, d, len, char);
11669 d += len;
a0ed51b3
LW
11670 s = t;
11671 }
463ee0b2
LW
11672 else {
11673 *d = '\0';
11674 *slp = d - dest;
11675 return s;
e929a76b 11676 }
378cc40b
LW
11677 }
11678}
11679
76e3520e 11680STATIC char *
f54cb97a 11681S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11682{
97aff369 11683 dVAR;
6136c704 11684 char *bracket = NULL;
748a9306 11685 char funny = *s++;
6136c704 11686 register char *d = dest;
0b3da58d 11687 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11688
7918f24d
NC
11689 PERL_ARGS_ASSERT_SCAN_IDENT;
11690
a0d0e21e 11691 if (isSPACE(*s))
29595ff2 11692 s = PEEKSPACE(s);
de3bb511 11693 if (isDIGIT(*s)) {
8903cb82 11694 while (isDIGIT(*s)) {
11695 if (d >= e)
cea2e8a9 11696 Perl_croak(aTHX_ ident_too_long);
378cc40b 11697 *d++ = *s++;
8903cb82 11698 }
378cc40b
LW
11699 }
11700 else {
463ee0b2 11701 for (;;) {
8903cb82 11702 if (d >= e)
cea2e8a9 11703 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11704 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11705 *d++ = *s++;
7e2040f0 11706 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11707 *d++ = ':';
11708 *d++ = ':';
11709 s++;
11710 }
a0d0e21e 11711 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11712 *d++ = *s++;
11713 *d++ = *s++;
11714 }
fd400ab9 11715 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11716 char *t = s + UTF8SKIP(s);
fd400ab9 11717 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11718 t += UTF8SKIP(t);
11719 if (d + (t - s) > e)
cea2e8a9 11720 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11721 Copy(s, d, t - s, char);
11722 d += t - s;
11723 s = t;
11724 }
463ee0b2
LW
11725 else
11726 break;
11727 }
378cc40b
LW
11728 }
11729 *d = '\0';
11730 d = dest;
79072805 11731 if (*d) {
3280af22
NIS
11732 if (PL_lex_state != LEX_NORMAL)
11733 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11734 return s;
378cc40b 11735 }
748a9306 11736 if (*s == '$' && s[1] &&
3792a11b 11737 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11738 {
4810e5ec 11739 return s;
5cd24f17 11740 }
79072805
LW
11741 if (*s == '{') {
11742 bracket = s;
11743 s++;
11744 }
11745 else if (ck_uni)
11746 check_uni();
93a17b20 11747 if (s < send)
79072805
LW
11748 *d = *s++;
11749 d[1] = '\0';
2b92dfce 11750 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11751 *d = toCTRL(*s);
11752 s++;
de3bb511 11753 }
79072805 11754 if (bracket) {
748a9306 11755 if (isSPACE(s[-1])) {
fa83b5b6 11756 while (s < send) {
f54cb97a 11757 const char ch = *s++;
bf4acbe4 11758 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11759 *d = ch;
11760 break;
11761 }
11762 }
748a9306 11763 }
7e2040f0 11764 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11765 d++;
a0ed51b3 11766 if (UTF) {
6136c704
AL
11767 char *end = s;
11768 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11769 end += UTF8SKIP(end);
11770 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11771 end += UTF8SKIP(end);
a0ed51b3 11772 }
6136c704
AL
11773 Copy(s, d, end - s, char);
11774 d += end - s;
11775 s = end;
a0ed51b3
LW
11776 }
11777 else {
2b92dfce 11778 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11779 *d++ = *s++;
2b92dfce 11780 if (d >= e)
cea2e8a9 11781 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11782 }
79072805 11783 *d = '\0';
c35e046a
AL
11784 while (s < send && SPACE_OR_TAB(*s))
11785 s++;
ff68c719 11786 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11787 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11788 const char * const brack =
11789 (const char *)
11790 ((*s == '[') ? "[...]" : "{...}");
9014280d 11791 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11792 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11793 funny, dest, brack, funny, dest, brack);
11794 }
79072805 11795 bracket++;
a0be28da 11796 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11797 return s;
11798 }
4e553d73
NIS
11799 }
11800 /* Handle extended ${^Foo} variables
2b92dfce
GS
11801 * 1999-02-27 mjd-perl-patch@plover.com */
11802 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11803 && isALNUM(*s))
11804 {
11805 d++;
11806 while (isALNUM(*s) && d < e) {
11807 *d++ = *s++;
11808 }
11809 if (d >= e)
cea2e8a9 11810 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11811 *d = '\0';
79072805
LW
11812 }
11813 if (*s == '}') {
11814 s++;
7df0d042 11815 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11816 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11817 PL_expect = XREF;
11818 }
d008e5eb 11819 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11820 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11821 (keyword(dest, d - dest, 0)
11822 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11823 {
c35e046a
AL
11824 if (funny == '#')
11825 funny = '@';
9014280d 11826 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11827 "Ambiguous use of %c{%s} resolved to %c%s",
11828 funny, dest, funny, dest);
11829 }
11830 }
79072805
LW
11831 }
11832 else {
11833 s = bracket; /* let the parser handle it */
93a17b20 11834 *dest = '\0';
79072805
LW
11835 }
11836 }
3280af22
NIS
11837 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11838 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11839 return s;
11840}
11841
879d0c72
NC
11842static U32
11843S_pmflag(U32 pmfl, const char ch) {
11844 switch (ch) {
11845 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11846 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11847 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11848 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11849 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11850 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11851 }
11852 return pmfl;
11853}
11854
76e3520e 11855STATIC char *
cea2e8a9 11856S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11857{
97aff369 11858 dVAR;
79072805 11859 PMOP *pm;
5db06880 11860 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11861 const char * const valid_flags =
a20207d7 11862 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11863#ifdef PERL_MAD
11864 char *modstart;
11865#endif
11866
7918f24d 11867 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11868
25c09cbf 11869 if (!s) {
6136c704 11870 const char * const delimiter = skipspace(start);
10edeb5d
JH
11871 Perl_croak(aTHX_
11872 (const char *)
11873 (*delimiter == '?'
11874 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11875 : "Search pattern not terminated" ));
25c09cbf 11876 }
bbce6d69 11877
8782bef2 11878 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11879 if (PL_multi_open == '?') {
11880 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11881 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11882
11883 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11884 allows us to restrict the list needed by reset to just the ??
11885 matches. */
11886 assert(type != OP_TRANS);
11887 if (PL_curstash) {
daba3364 11888 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11889 U32 elements;
11890 if (!mg) {
daba3364 11891 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11892 0);
11893 }
11894 elements = mg->mg_len / sizeof(PMOP**);
11895 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11896 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11897 mg->mg_len = elements * sizeof(PMOP**);
11898 PmopSTASH_set(pm,PL_curstash);
11899 }
11900 }
5db06880
NC
11901#ifdef PERL_MAD
11902 modstart = s;
11903#endif
6136c704 11904 while (*s && strchr(valid_flags, *s))
879d0c72 11905 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11906
11907 if (isALNUM(*s)) {
11908 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11909 "Having no space between pattern and following word is deprecated");
11910
11911 }
5db06880
NC
11912#ifdef PERL_MAD
11913 if (PL_madskills && modstart != s) {
11914 SV* tmptoken = newSVpvn(modstart, s - modstart);
11915 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11916 }
11917#endif
4ac733c9 11918 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11919 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11920 {
a2a5de95
NC
11921 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11922 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11923 }
11924
3280af22 11925 PL_lex_op = (OP*)pm;
6154021b 11926 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11927 return s;
11928}
11929
76e3520e 11930STATIC char *
cea2e8a9 11931S_scan_subst(pTHX_ char *start)
79072805 11932{
27da23d5 11933 dVAR;
a0d0e21e 11934 register char *s;
79072805 11935 register PMOP *pm;
4fdae800 11936 I32 first_start;
79072805 11937 I32 es = 0;
5db06880
NC
11938#ifdef PERL_MAD
11939 char *modstart;
11940#endif
79072805 11941
7918f24d
NC
11942 PERL_ARGS_ASSERT_SCAN_SUBST;
11943
6154021b 11944 pl_yylval.ival = OP_NULL;
79072805 11945
5db06880 11946 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11947
37fd879b 11948 if (!s)
cea2e8a9 11949 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11950
3280af22 11951 if (s[-1] == PL_multi_open)
79072805 11952 s--;
5db06880
NC
11953#ifdef PERL_MAD
11954 if (PL_madskills) {
cd81e915
NC
11955 CURMAD('q', PL_thisopen);
11956 CURMAD('_', PL_thiswhite);
11957 CURMAD('E', PL_thisstuff);
11958 CURMAD('Q', PL_thisclose);
11959 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11960 }
11961#endif
79072805 11962
3280af22 11963 first_start = PL_multi_start;
5db06880 11964 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11965 if (!s) {
37fd879b 11966 if (PL_lex_stuff) {
3280af22 11967 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11968 PL_lex_stuff = NULL;
37fd879b 11969 }
cea2e8a9 11970 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11971 }
3280af22 11972 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11973
79072805 11974 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11975
11976#ifdef PERL_MAD
11977 if (PL_madskills) {
cd81e915
NC
11978 CURMAD('z', PL_thisopen);
11979 CURMAD('R', PL_thisstuff);
11980 CURMAD('Z', PL_thisclose);
5db06880
NC
11981 }
11982 modstart = s;
11983#endif
11984
48c036b1 11985 while (*s) {
a20207d7 11986 if (*s == EXEC_PAT_MOD) {
a687059c 11987 s++;
2f3197b3 11988 es++;
a687059c 11989 }
a20207d7 11990 else if (strchr(S_PAT_MODS, *s))
879d0c72 11991 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
11992 else {
11993 if (isALNUM(*s)) {
11994 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11995 "Having no space between pattern and following word is deprecated");
11996
11997 }
48c036b1 11998 break;
aa78b661 11999 }
378cc40b 12000 }
79072805 12001
5db06880
NC
12002#ifdef PERL_MAD
12003 if (PL_madskills) {
12004 if (modstart != s)
12005 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12006 append_madprops(PL_thismad, (OP*)pm, 0);
12007 PL_thismad = 0;
5db06880
NC
12008 }
12009#endif
a2a5de95
NC
12010 if ((pm->op_pmflags & PMf_CONTINUE)) {
12011 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12012 }
12013
79072805 12014 if (es) {
6136c704
AL
12015 SV * const repl = newSVpvs("");
12016
0244c3a4
GS
12017 PL_sublex_info.super_bufptr = s;
12018 PL_sublex_info.super_bufend = PL_bufend;
12019 PL_multi_end = 0;
79072805 12020 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12021 while (es-- > 0) {
12022 if (es)
12023 sv_catpvs(repl, "eval ");
12024 else
12025 sv_catpvs(repl, "do ");
12026 }
6f43d98f 12027 sv_catpvs(repl, "{");
3280af22 12028 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12029 if (strchr(SvPVX(PL_lex_repl), '#'))
12030 sv_catpvs(repl, "\n");
12031 sv_catpvs(repl, "}");
25da4f38 12032 SvEVALED_on(repl);
3280af22
NIS
12033 SvREFCNT_dec(PL_lex_repl);
12034 PL_lex_repl = repl;
378cc40b 12035 }
79072805 12036
3280af22 12037 PL_lex_op = (OP*)pm;
6154021b 12038 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12039 return s;
12040}
12041
76e3520e 12042STATIC char *
cea2e8a9 12043S_scan_trans(pTHX_ char *start)
378cc40b 12044{
97aff369 12045 dVAR;
a0d0e21e 12046 register char* s;
11343788 12047 OP *o;
79072805 12048 short *tbl;
b84c11c8
NC
12049 U8 squash;
12050 U8 del;
12051 U8 complement;
5db06880
NC
12052#ifdef PERL_MAD
12053 char *modstart;
12054#endif
79072805 12055
7918f24d
NC
12056 PERL_ARGS_ASSERT_SCAN_TRANS;
12057
6154021b 12058 pl_yylval.ival = OP_NULL;
79072805 12059
5db06880 12060 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12061 if (!s)
cea2e8a9 12062 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12063
3280af22 12064 if (s[-1] == PL_multi_open)
2f3197b3 12065 s--;
5db06880
NC
12066#ifdef PERL_MAD
12067 if (PL_madskills) {
cd81e915
NC
12068 CURMAD('q', PL_thisopen);
12069 CURMAD('_', PL_thiswhite);
12070 CURMAD('E', PL_thisstuff);
12071 CURMAD('Q', PL_thisclose);
12072 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12073 }
12074#endif
2f3197b3 12075
5db06880 12076 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12077 if (!s) {
37fd879b 12078 if (PL_lex_stuff) {
3280af22 12079 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12080 PL_lex_stuff = NULL;
37fd879b 12081 }
cea2e8a9 12082 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12083 }
5db06880 12084 if (PL_madskills) {
cd81e915
NC
12085 CURMAD('z', PL_thisopen);
12086 CURMAD('R', PL_thisstuff);
12087 CURMAD('Z', PL_thisclose);
5db06880 12088 }
79072805 12089
a0ed51b3 12090 complement = del = squash = 0;
5db06880
NC
12091#ifdef PERL_MAD
12092 modstart = s;
12093#endif
7a1e2023
NC
12094 while (1) {
12095 switch (*s) {
12096 case 'c':
79072805 12097 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12098 break;
12099 case 'd':
a0ed51b3 12100 del = OPpTRANS_DELETE;
7a1e2023
NC
12101 break;
12102 case 's':
79072805 12103 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12104 break;
12105 default:
12106 goto no_more;
12107 }
395c3793
LW
12108 s++;
12109 }
7a1e2023 12110 no_more:
8973db79 12111
aa1f7c5b 12112 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12113 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12114 o->op_private &= ~OPpTRANS_ALL;
12115 o->op_private |= del|squash|complement|
7948272d
NIS
12116 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12117 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12118
3280af22 12119 PL_lex_op = o;
6154021b 12120 pl_yylval.ival = OP_TRANS;
5db06880
NC
12121
12122#ifdef PERL_MAD
12123 if (PL_madskills) {
12124 if (modstart != s)
12125 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12126 append_madprops(PL_thismad, o, 0);
12127 PL_thismad = 0;
5db06880
NC
12128 }
12129#endif
12130
79072805
LW
12131 return s;
12132}
12133
76e3520e 12134STATIC char *
cea2e8a9 12135S_scan_heredoc(pTHX_ register char *s)
79072805 12136{
97aff369 12137 dVAR;
79072805
LW
12138 SV *herewas;
12139 I32 op_type = OP_SCALAR;
12140 I32 len;
12141 SV *tmpstr;
12142 char term;
73d840c0 12143 const char *found_newline;
79072805 12144 register char *d;
fc36a67e 12145 register char *e;
4633a7c4 12146 char *peek;
f54cb97a 12147 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12148#ifdef PERL_MAD
12149 I32 stuffstart = s - SvPVX(PL_linestr);
12150 char *tstart;
12151
cd81e915 12152 PL_realtokenstart = -1;
5db06880 12153#endif
79072805 12154
7918f24d
NC
12155 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12156
79072805 12157 s += 2;
3280af22
NIS
12158 d = PL_tokenbuf;
12159 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12160 if (!outer)
79072805 12161 *d++ = '\n';
c35e046a
AL
12162 peek = s;
12163 while (SPACE_OR_TAB(*peek))
12164 peek++;
3792a11b 12165 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12166 s = peek;
79072805 12167 term = *s++;
3280af22 12168 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12169 d += len;
3280af22 12170 if (s < PL_bufend)
79072805 12171 s++;
79072805
LW
12172 }
12173 else {
12174 if (*s == '\\')
12175 s++, term = '\'';
12176 else
12177 term = '"';
7e2040f0 12178 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12179 deprecate("bare << to mean <<\"\"");
7e2040f0 12180 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12181 if (d < e)
12182 *d++ = *s;
12183 }
12184 }
3280af22 12185 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12186 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12187 *d++ = '\n';
12188 *d = '\0';
3280af22 12189 len = d - PL_tokenbuf;
5db06880
NC
12190
12191#ifdef PERL_MAD
12192 if (PL_madskills) {
12193 tstart = PL_tokenbuf + !outer;
cd81e915 12194 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12195 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12196 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12197 stuffstart = s - SvPVX(PL_linestr);
12198 }
12199#endif
6a27c188 12200#ifndef PERL_STRICT_CR
f63a84b2
LW
12201 d = strchr(s, '\r');
12202 if (d) {
b464bac0 12203 char * const olds = s;
f63a84b2 12204 s = d;
3280af22 12205 while (s < PL_bufend) {
f63a84b2
LW
12206 if (*s == '\r') {
12207 *d++ = '\n';
12208 if (*++s == '\n')
12209 s++;
12210 }
12211 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12212 *d++ = *s++;
12213 s++;
12214 }
12215 else
12216 *d++ = *s++;
12217 }
12218 *d = '\0';
3280af22 12219 PL_bufend = d;
95a20fc0 12220 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12221 s = olds;
12222 }
12223#endif
5db06880
NC
12224#ifdef PERL_MAD
12225 found_newline = 0;
12226#endif
10edeb5d 12227 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12228 herewas = newSVpvn(s,PL_bufend-s);
12229 }
12230 else {
5db06880
NC
12231#ifdef PERL_MAD
12232 herewas = newSVpvn(s-1,found_newline-s+1);
12233#else
73d840c0
AL
12234 s--;
12235 herewas = newSVpvn(s,found_newline-s);
5db06880 12236#endif
73d840c0 12237 }
5db06880
NC
12238#ifdef PERL_MAD
12239 if (PL_madskills) {
12240 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12241 if (PL_thisstuff)
12242 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12243 else
cd81e915 12244 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12245 }
12246#endif
79072805 12247 s += SvCUR(herewas);
748a9306 12248
5db06880
NC
12249#ifdef PERL_MAD
12250 stuffstart = s - SvPVX(PL_linestr);
12251
12252 if (found_newline)
12253 s--;
12254#endif
12255
7d0a29fe
NC
12256 tmpstr = newSV_type(SVt_PVIV);
12257 SvGROW(tmpstr, 80);
748a9306 12258 if (term == '\'') {
79072805 12259 op_type = OP_CONST;
45977657 12260 SvIV_set(tmpstr, -1);
748a9306
LW
12261 }
12262 else if (term == '`') {
79072805 12263 op_type = OP_BACKTICK;
45977657 12264 SvIV_set(tmpstr, '\\');
748a9306 12265 }
79072805
LW
12266
12267 CLINE;
57843af0 12268 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12269 PL_multi_open = PL_multi_close = '<';
12270 term = *PL_tokenbuf;
0244c3a4 12271 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12272 char * const bufptr = PL_sublex_info.super_bufptr;
12273 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12274 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12275 s = strchr(bufptr, '\n');
12276 if (!s)
12277 s = bufend;
12278 d = s;
12279 while (s < bufend &&
12280 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12281 if (*s++ == '\n')
57843af0 12282 CopLINE_inc(PL_curcop);
0244c3a4
GS
12283 }
12284 if (s >= bufend) {
eb160463 12285 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12286 missingterm(PL_tokenbuf);
12287 }
12288 sv_setpvn(herewas,bufptr,d-bufptr+1);
12289 sv_setpvn(tmpstr,d+1,s-d);
12290 s += len - 1;
12291 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12292 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12293
12294 s = olds;
12295 goto retval;
12296 }
12297 else if (!outer) {
79072805 12298 d = s;
3280af22
NIS
12299 while (s < PL_bufend &&
12300 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12301 if (*s++ == '\n')
57843af0 12302 CopLINE_inc(PL_curcop);
79072805 12303 }
3280af22 12304 if (s >= PL_bufend) {
eb160463 12305 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12306 missingterm(PL_tokenbuf);
79072805
LW
12307 }
12308 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12309#ifdef PERL_MAD
12310 if (PL_madskills) {
cd81e915
NC
12311 if (PL_thisstuff)
12312 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12313 else
cd81e915 12314 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12315 stuffstart = s - SvPVX(PL_linestr);
12316 }
12317#endif
79072805 12318 s += len - 1;
57843af0 12319 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12320
3280af22
NIS
12321 sv_catpvn(herewas,s,PL_bufend-s);
12322 sv_setsv(PL_linestr,herewas);
12323 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12324 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12325 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12326 }
12327 else
76f68e9b 12328 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12329 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12330#ifdef PERL_MAD
12331 if (PL_madskills) {
12332 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12333 if (PL_thisstuff)
12334 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12335 else
cd81e915 12336 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12337 }
12338#endif
f0e67a1d 12339 PL_bufptr = s;
17cc9359 12340 CopLINE_inc(PL_curcop);
f0e67a1d 12341 if (!outer || !lex_next_chunk(0)) {
eb160463 12342 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12343 missingterm(PL_tokenbuf);
79072805 12344 }
17cc9359 12345 CopLINE_dec(PL_curcop);
f0e67a1d 12346 s = PL_bufptr;
5db06880
NC
12347#ifdef PERL_MAD
12348 stuffstart = s - SvPVX(PL_linestr);
12349#endif
57843af0 12350 CopLINE_inc(PL_curcop);
3280af22 12351 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12352 PL_last_lop = PL_last_uni = NULL;
6a27c188 12353#ifndef PERL_STRICT_CR
3280af22 12354 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12355 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12356 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12357 {
3280af22
NIS
12358 PL_bufend[-2] = '\n';
12359 PL_bufend--;
95a20fc0 12360 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12361 }
3280af22
NIS
12362 else if (PL_bufend[-1] == '\r')
12363 PL_bufend[-1] = '\n';
f63a84b2 12364 }
3280af22
NIS
12365 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12366 PL_bufend[-1] = '\n';
f63a84b2 12367#endif
3280af22 12368 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12369 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12370 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12371 sv_catsv(PL_linestr,herewas);
12372 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12373 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12374 }
12375 else {
3280af22
NIS
12376 s = PL_bufend;
12377 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12378 }
12379 }
79072805 12380 s++;
0244c3a4 12381retval:
57843af0 12382 PL_multi_end = CopLINE(PL_curcop);
79072805 12383 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12384 SvPV_shrink_to_cur(tmpstr);
79072805 12385 }
8990e307 12386 SvREFCNT_dec(herewas);
2f31ce75 12387 if (!IN_BYTES) {
95a20fc0 12388 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12389 SvUTF8_on(tmpstr);
12390 else if (PL_encoding)
12391 sv_recode_to_utf8(tmpstr, PL_encoding);
12392 }
3280af22 12393 PL_lex_stuff = tmpstr;
6154021b 12394 pl_yylval.ival = op_type;
79072805
LW
12395 return s;
12396}
12397
02aa26ce
NT
12398/* scan_inputsymbol
12399 takes: current position in input buffer
12400 returns: new position in input buffer
6154021b 12401 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12402
12403 This code handles:
12404
12405 <> read from ARGV
12406 <FH> read from filehandle
12407 <pkg::FH> read from package qualified filehandle
12408 <pkg'FH> read from package qualified filehandle
12409 <$fh> read from filehandle in $fh
12410 <*.h> filename glob
12411
12412*/
12413
76e3520e 12414STATIC char *
cea2e8a9 12415S_scan_inputsymbol(pTHX_ char *start)
79072805 12416{
97aff369 12417 dVAR;
02aa26ce 12418 register char *s = start; /* current position in buffer */
1b420867 12419 char *end;
79072805 12420 I32 len;
6136c704
AL
12421 char *d = PL_tokenbuf; /* start of temp holding space */
12422 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12423
7918f24d
NC
12424 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12425
1b420867
GS
12426 end = strchr(s, '\n');
12427 if (!end)
12428 end = PL_bufend;
12429 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12430
12431 /* die if we didn't have space for the contents of the <>,
1b420867 12432 or if it didn't end, or if we see a newline
02aa26ce
NT
12433 */
12434
bb7a0f54 12435 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12436 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12437 if (s >= end)
cea2e8a9 12438 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12439
fc36a67e 12440 s++;
02aa26ce
NT
12441
12442 /* check for <$fh>
12443 Remember, only scalar variables are interpreted as filehandles by
12444 this code. Anything more complex (e.g., <$fh{$num}>) will be
12445 treated as a glob() call.
12446 This code makes use of the fact that except for the $ at the front,
12447 a scalar variable and a filehandle look the same.
12448 */
4633a7c4 12449 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12450
12451 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12452 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12453 d++;
02aa26ce
NT
12454
12455 /* If we've tried to read what we allow filehandles to look like, and
12456 there's still text left, then it must be a glob() and not a getline.
12457 Use scan_str to pull out the stuff between the <> and treat it
12458 as nothing more than a string.
12459 */
12460
3280af22 12461 if (d - PL_tokenbuf != len) {
6154021b 12462 pl_yylval.ival = OP_GLOB;
5db06880 12463 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12464 if (!s)
cea2e8a9 12465 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12466 return s;
12467 }
395c3793 12468 else {
9b3023bc 12469 bool readline_overriden = FALSE;
6136c704 12470 GV *gv_readline;
9b3023bc 12471 GV **gvp;
02aa26ce 12472 /* we're in a filehandle read situation */
3280af22 12473 d = PL_tokenbuf;
02aa26ce
NT
12474
12475 /* turn <> into <ARGV> */
79072805 12476 if (!len)
689badd5 12477 Copy("ARGV",d,5,char);
02aa26ce 12478
9b3023bc 12479 /* Check whether readline() is overriden */
fafc274c 12480 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12481 if ((gv_readline
ba979b31 12482 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12483 ||
017a3ce5 12484 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12485 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12486 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12487 readline_overriden = TRUE;
12488
02aa26ce
NT
12489 /* if <$fh>, create the ops to turn the variable into a
12490 filehandle
12491 */
79072805 12492 if (*d == '$') {
02aa26ce
NT
12493 /* try to find it in the pad for this block, otherwise find
12494 add symbol table ops
12495 */
f8f98e0a 12496 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12497 if (tmp != NOT_IN_PAD) {
00b1698f 12498 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12499 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12500 HEK * const stashname = HvNAME_HEK(stash);
12501 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12502 sv_catpvs(sym, "::");
f558d5af
JH
12503 sv_catpv(sym, d+1);
12504 d = SvPVX(sym);
12505 goto intro_sym;
12506 }
12507 else {
6136c704 12508 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12509 o->op_targ = tmp;
9b3023bc
RGS
12510 PL_lex_op = readline_overriden
12511 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12512 append_elem(OP_LIST, o,
12513 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12514 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12515 }
a0d0e21e
LW
12516 }
12517 else {
f558d5af
JH
12518 GV *gv;
12519 ++d;
12520intro_sym:
12521 gv = gv_fetchpv(d,
12522 (PL_in_eval
12523 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12524 : GV_ADDMULTI),
f558d5af 12525 SVt_PV);
9b3023bc
RGS
12526 PL_lex_op = readline_overriden
12527 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12528 append_elem(OP_LIST,
12529 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12530 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12531 : (OP*)newUNOP(OP_READLINE, 0,
12532 newUNOP(OP_RV2SV, 0,
12533 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12534 }
7c6fadd6
RGS
12535 if (!readline_overriden)
12536 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12537 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12538 pl_yylval.ival = OP_NULL;
79072805 12539 }
02aa26ce
NT
12540
12541 /* If it's none of the above, it must be a literal filehandle
12542 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12543 else {
6136c704 12544 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12545 PL_lex_op = readline_overriden
12546 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12547 append_elem(OP_LIST,
12548 newGVOP(OP_GV, 0, gv),
12549 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12550 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12551 pl_yylval.ival = OP_NULL;
79072805
LW
12552 }
12553 }
02aa26ce 12554
79072805
LW
12555 return s;
12556}
12557
02aa26ce
NT
12558
12559/* scan_str
12560 takes: start position in buffer
09bef843
SB
12561 keep_quoted preserve \ on the embedded delimiter(s)
12562 keep_delims preserve the delimiters around the string
02aa26ce
NT
12563 returns: position to continue reading from buffer
12564 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12565 updates the read buffer.
12566
12567 This subroutine pulls a string out of the input. It is called for:
12568 q single quotes q(literal text)
12569 ' single quotes 'literal text'
12570 qq double quotes qq(interpolate $here please)
12571 " double quotes "interpolate $here please"
12572 qx backticks qx(/bin/ls -l)
12573 ` backticks `/bin/ls -l`
12574 qw quote words @EXPORT_OK = qw( func() $spam )
12575 m// regexp match m/this/
12576 s/// regexp substitute s/this/that/
12577 tr/// string transliterate tr/this/that/
12578 y/// string transliterate y/this/that/
12579 ($*@) sub prototypes sub foo ($)
09bef843 12580 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12581 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12582
12583 In most of these cases (all but <>, patterns and transliterate)
12584 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12585 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12586 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12587 calls scan_str().
4e553d73 12588
02aa26ce
NT
12589 It skips whitespace before the string starts, and treats the first
12590 character as the delimiter. If the delimiter is one of ([{< then
12591 the corresponding "close" character )]}> is used as the closing
12592 delimiter. It allows quoting of delimiters, and if the string has
12593 balanced delimiters ([{<>}]) it allows nesting.
12594
37fd879b
HS
12595 On success, the SV with the resulting string is put into lex_stuff or,
12596 if that is already non-NULL, into lex_repl. The second case occurs only
12597 when parsing the RHS of the special constructs s/// and tr/// (y///).
12598 For convenience, the terminating delimiter character is stuffed into
12599 SvIVX of the SV.
02aa26ce
NT
12600*/
12601
76e3520e 12602STATIC char *
09bef843 12603S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12604{
97aff369 12605 dVAR;
02aa26ce 12606 SV *sv; /* scalar value: string */
d3fcec1f 12607 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12608 register char *s = start; /* current position in the buffer */
12609 register char term; /* terminating character */
12610 register char *to; /* current position in the sv's data */
12611 I32 brackets = 1; /* bracket nesting level */
89491803 12612 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12613 I32 termcode; /* terminating char. code */
89ebb4a3 12614 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12615 STRLEN termlen; /* length of terminating string */
0331ef07 12616 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12617#ifdef PERL_MAD
12618 int stuffstart;
12619 char *tstart;
12620#endif
02aa26ce 12621
7918f24d
NC
12622 PERL_ARGS_ASSERT_SCAN_STR;
12623
02aa26ce 12624 /* skip space before the delimiter */
29595ff2
NC
12625 if (isSPACE(*s)) {
12626 s = PEEKSPACE(s);
12627 }
02aa26ce 12628
5db06880 12629#ifdef PERL_MAD
cd81e915
NC
12630 if (PL_realtokenstart >= 0) {
12631 stuffstart = PL_realtokenstart;
12632 PL_realtokenstart = -1;
5db06880
NC
12633 }
12634 else
12635 stuffstart = start - SvPVX(PL_linestr);
12636#endif
02aa26ce 12637 /* mark where we are, in case we need to report errors */
79072805 12638 CLINE;
02aa26ce
NT
12639
12640 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12641 term = *s;
220e2d4e
IH
12642 if (!UTF) {
12643 termcode = termstr[0] = term;
12644 termlen = 1;
12645 }
12646 else {
f3b9ce0f 12647 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12648 Copy(s, termstr, termlen, U8);
12649 if (!UTF8_IS_INVARIANT(term))
12650 has_utf8 = TRUE;
12651 }
b1c7b182 12652
02aa26ce 12653 /* mark where we are */
57843af0 12654 PL_multi_start = CopLINE(PL_curcop);
3280af22 12655 PL_multi_open = term;
02aa26ce
NT
12656
12657 /* find corresponding closing delimiter */
93a17b20 12658 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12659 termcode = termstr[0] = term = tmps[5];
12660
3280af22 12661 PL_multi_close = term;
79072805 12662
561b68a9
SH
12663 /* create a new SV to hold the contents. 79 is the SV's initial length.
12664 What a random number. */
7d0a29fe
NC
12665 sv = newSV_type(SVt_PVIV);
12666 SvGROW(sv, 80);
45977657 12667 SvIV_set(sv, termcode);
a0d0e21e 12668 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12669
12670 /* move past delimiter and try to read a complete string */
09bef843 12671 if (keep_delims)
220e2d4e
IH
12672 sv_catpvn(sv, s, termlen);
12673 s += termlen;
5db06880
NC
12674#ifdef PERL_MAD
12675 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12676 if (!PL_thisopen && !keep_delims) {
12677 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12678 stuffstart = s - SvPVX(PL_linestr);
12679 }
12680#endif
93a17b20 12681 for (;;) {
220e2d4e
IH
12682 if (PL_encoding && !UTF) {
12683 bool cont = TRUE;
12684
12685 while (cont) {
95a20fc0 12686 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12687 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12688 &offset, (char*)termstr, termlen);
6136c704
AL
12689 const char * const ns = SvPVX_const(PL_linestr) + offset;
12690 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12691
12692 for (; s < ns; s++) {
12693 if (*s == '\n' && !PL_rsfp)
12694 CopLINE_inc(PL_curcop);
12695 }
12696 if (!found)
12697 goto read_more_line;
12698 else {
12699 /* handle quoted delimiters */
52327caf 12700 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12701 const char *t;
95a20fc0 12702 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12703 t--;
12704 if ((svlast-1 - t) % 2) {
12705 if (!keep_quoted) {
12706 *(svlast-1) = term;
12707 *svlast = '\0';
12708 SvCUR_set(sv, SvCUR(sv) - 1);
12709 }
12710 continue;
12711 }
12712 }
12713 if (PL_multi_open == PL_multi_close) {
12714 cont = FALSE;
12715 }
12716 else {
f54cb97a
AL
12717 const char *t;
12718 char *w;
0331ef07 12719 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12720 /* At here, all closes are "was quoted" one,
12721 so we don't check PL_multi_close. */
12722 if (*t == '\\') {
12723 if (!keep_quoted && *(t+1) == PL_multi_open)
12724 t++;
12725 else
12726 *w++ = *t++;
12727 }
12728 else if (*t == PL_multi_open)
12729 brackets++;
12730
12731 *w = *t;
12732 }
12733 if (w < t) {
12734 *w++ = term;
12735 *w = '\0';
95a20fc0 12736 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12737 }
0331ef07 12738 last_off = w - SvPVX(sv);
220e2d4e
IH
12739 if (--brackets <= 0)
12740 cont = FALSE;
12741 }
12742 }
12743 }
12744 if (!keep_delims) {
12745 SvCUR_set(sv, SvCUR(sv) - 1);
12746 *SvEND(sv) = '\0';
12747 }
12748 break;
12749 }
12750
02aa26ce 12751 /* extend sv if need be */
3280af22 12752 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12753 /* set 'to' to the next character in the sv's string */
463ee0b2 12754 to = SvPVX(sv)+SvCUR(sv);
09bef843 12755
02aa26ce 12756 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12757 if (PL_multi_open == PL_multi_close) {
12758 for (; s < PL_bufend; s++,to++) {
02aa26ce 12759 /* embedded newlines increment the current line number */
3280af22 12760 if (*s == '\n' && !PL_rsfp)
57843af0 12761 CopLINE_inc(PL_curcop);
02aa26ce 12762 /* handle quoted delimiters */
3280af22 12763 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12764 if (!keep_quoted && s[1] == term)
a0d0e21e 12765 s++;
02aa26ce 12766 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12767 else
12768 *to++ = *s++;
12769 }
02aa26ce
NT
12770 /* terminate when run out of buffer (the for() condition), or
12771 have found the terminator */
220e2d4e
IH
12772 else if (*s == term) {
12773 if (termlen == 1)
12774 break;
f3b9ce0f 12775 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12776 break;
12777 }
63cd0674 12778 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12779 has_utf8 = TRUE;
93a17b20
LW
12780 *to = *s;
12781 }
12782 }
02aa26ce
NT
12783
12784 /* if the terminator isn't the same as the start character (e.g.,
12785 matched brackets), we have to allow more in the quoting, and
12786 be prepared for nested brackets.
12787 */
93a17b20 12788 else {
02aa26ce 12789 /* read until we run out of string, or we find the terminator */
3280af22 12790 for (; s < PL_bufend; s++,to++) {
02aa26ce 12791 /* embedded newlines increment the line count */
3280af22 12792 if (*s == '\n' && !PL_rsfp)
57843af0 12793 CopLINE_inc(PL_curcop);
02aa26ce 12794 /* backslashes can escape the open or closing characters */
3280af22 12795 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12796 if (!keep_quoted &&
12797 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12798 s++;
12799 else
12800 *to++ = *s++;
12801 }
02aa26ce 12802 /* allow nested opens and closes */
3280af22 12803 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12804 break;
3280af22 12805 else if (*s == PL_multi_open)
93a17b20 12806 brackets++;
63cd0674 12807 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12808 has_utf8 = TRUE;
93a17b20
LW
12809 *to = *s;
12810 }
12811 }
02aa26ce 12812 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12813 *to = '\0';
95a20fc0 12814 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12815
02aa26ce
NT
12816 /*
12817 * this next chunk reads more into the buffer if we're not done yet
12818 */
12819
b1c7b182
GS
12820 if (s < PL_bufend)
12821 break; /* handle case where we are done yet :-) */
79072805 12822
6a27c188 12823#ifndef PERL_STRICT_CR
95a20fc0 12824 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12825 if ((to[-2] == '\r' && to[-1] == '\n') ||
12826 (to[-2] == '\n' && to[-1] == '\r'))
12827 {
f63a84b2
LW
12828 to[-2] = '\n';
12829 to--;
95a20fc0 12830 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12831 }
12832 else if (to[-1] == '\r')
12833 to[-1] = '\n';
12834 }
95a20fc0 12835 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12836 to[-1] = '\n';
12837#endif
12838
220e2d4e 12839 read_more_line:
02aa26ce
NT
12840 /* if we're out of file, or a read fails, bail and reset the current
12841 line marker so we can report where the unterminated string began
12842 */
5db06880
NC
12843#ifdef PERL_MAD
12844 if (PL_madskills) {
c35e046a 12845 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12846 if (PL_thisstuff)
12847 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12848 else
cd81e915 12849 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12850 }
12851#endif
f0e67a1d
Z
12852 CopLINE_inc(PL_curcop);
12853 PL_bufptr = PL_bufend;
12854 if (!lex_next_chunk(0)) {
c07a80fd 12855 sv_free(sv);
eb160463 12856 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12857 return NULL;
79072805 12858 }
f0e67a1d 12859 s = PL_bufptr;
5db06880
NC
12860#ifdef PERL_MAD
12861 stuffstart = 0;
12862#endif
378cc40b 12863 }
4e553d73 12864
02aa26ce
NT
12865 /* at this point, we have successfully read the delimited string */
12866
220e2d4e 12867 if (!PL_encoding || UTF) {
5db06880
NC
12868#ifdef PERL_MAD
12869 if (PL_madskills) {
c35e046a 12870 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12871 const int len = s - tstart;
cd81e915 12872 if (PL_thisstuff)
c35e046a 12873 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12874 else
c35e046a 12875 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12876 if (!PL_thisclose && !keep_delims)
12877 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12878 }
12879#endif
12880
220e2d4e
IH
12881 if (keep_delims)
12882 sv_catpvn(sv, s, termlen);
12883 s += termlen;
12884 }
5db06880
NC
12885#ifdef PERL_MAD
12886 else {
12887 if (PL_madskills) {
c35e046a
AL
12888 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12889 const int len = s - tstart - termlen;
cd81e915 12890 if (PL_thisstuff)
c35e046a 12891 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12892 else
c35e046a 12893 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12894 if (!PL_thisclose && !keep_delims)
12895 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12896 }
12897 }
12898#endif
220e2d4e 12899 if (has_utf8 || PL_encoding)
b1c7b182 12900 SvUTF8_on(sv);
d0063567 12901
57843af0 12902 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12903
12904 /* if we allocated too much space, give some back */
93a17b20
LW
12905 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12906 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12907 SvPV_renew(sv, SvLEN(sv));
79072805 12908 }
02aa26ce
NT
12909
12910 /* decide whether this is the first or second quoted string we've read
12911 for this op
12912 */
4e553d73 12913
3280af22
NIS
12914 if (PL_lex_stuff)
12915 PL_lex_repl = sv;
79072805 12916 else
3280af22 12917 PL_lex_stuff = sv;
378cc40b
LW
12918 return s;
12919}
12920
02aa26ce
NT
12921/*
12922 scan_num
12923 takes: pointer to position in buffer
12924 returns: pointer to new position in buffer
6154021b 12925 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12926
12927 Read a number in any of the formats that Perl accepts:
12928
7fd134d9
JH
12929 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12930 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12931 0b[01](_?[01])*
12932 0[0-7](_?[0-7])*
12933 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12934
3280af22 12935 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12936 thing it reads.
12937
12938 If it reads a number without a decimal point or an exponent, it will
12939 try converting the number to an integer and see if it can do so
12940 without loss of precision.
12941*/
4e553d73 12942
378cc40b 12943char *
bfed75c6 12944Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12945{
97aff369 12946 dVAR;
bfed75c6 12947 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12948 register char *d; /* destination in temp buffer */
12949 register char *e; /* end of temp buffer */
86554af2 12950 NV nv; /* number read, as a double */
a0714e2c 12951 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12952 bool floatit; /* boolean: int or float? */
cbbf8932 12953 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12954 static char const number_too_long[] = "Number too long";
378cc40b 12955
7918f24d
NC
12956 PERL_ARGS_ASSERT_SCAN_NUM;
12957
02aa26ce
NT
12958 /* We use the first character to decide what type of number this is */
12959
378cc40b 12960 switch (*s) {
79072805 12961 default:
cea2e8a9 12962 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12963
02aa26ce 12964 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12965 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12966 case '0':
12967 {
02aa26ce
NT
12968 /* variables:
12969 u holds the "number so far"
4f19785b
WSI
12970 shift the power of 2 of the base
12971 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12972 overflowed was the number more than we can hold?
12973
12974 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12975 we in octal/hex/binary?" indicator to disallow hex characters
12976 when in octal mode.
02aa26ce 12977 */
9e24b6e2
JH
12978 NV n = 0.0;
12979 UV u = 0;
79072805 12980 I32 shift;
9e24b6e2 12981 bool overflowed = FALSE;
61f33854 12982 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12983 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12984 static const char* const bases[5] =
12985 { "", "binary", "", "octal", "hexadecimal" };
12986 static const char* const Bases[5] =
12987 { "", "Binary", "", "Octal", "Hexadecimal" };
12988 static const char* const maxima[5] =
12989 { "",
12990 "0b11111111111111111111111111111111",
12991 "",
12992 "037777777777",
12993 "0xffffffff" };
bfed75c6 12994 const char *base, *Base, *max;
378cc40b 12995
02aa26ce 12996 /* check for hex */
a674e8db 12997 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
12998 shift = 4;
12999 s += 2;
61f33854 13000 just_zero = FALSE;
a674e8db 13001 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13002 shift = 1;
13003 s += 2;
61f33854 13004 just_zero = FALSE;
378cc40b 13005 }
02aa26ce 13006 /* check for a decimal in disguise */
b78218b7 13007 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13008 goto decimal;
02aa26ce 13009 /* so it must be octal */
928753ea 13010 else {
378cc40b 13011 shift = 3;
928753ea
JH
13012 s++;
13013 }
13014
13015 if (*s == '_') {
a2a5de95 13016 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13017 "Misplaced _ in number");
13018 lastub = s++;
13019 }
9e24b6e2
JH
13020
13021 base = bases[shift];
13022 Base = Bases[shift];
13023 max = maxima[shift];
02aa26ce 13024
4f19785b 13025 /* read the rest of the number */
378cc40b 13026 for (;;) {
9e24b6e2 13027 /* x is used in the overflow test,
893fe2c2 13028 b is the digit we're adding on. */
9e24b6e2 13029 UV x, b;
55497cff 13030
378cc40b 13031 switch (*s) {
02aa26ce
NT
13032
13033 /* if we don't mention it, we're done */
378cc40b
LW
13034 default:
13035 goto out;
02aa26ce 13036
928753ea 13037 /* _ are ignored -- but warned about if consecutive */
de3bb511 13038 case '_':
a2a5de95
NC
13039 if (lastub && s == lastub + 1)
13040 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13041 "Misplaced _ in number");
928753ea 13042 lastub = s++;
de3bb511 13043 break;
02aa26ce
NT
13044
13045 /* 8 and 9 are not octal */
378cc40b 13046 case '8': case '9':
4f19785b 13047 if (shift == 3)
cea2e8a9 13048 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13049 /* FALL THROUGH */
02aa26ce
NT
13050
13051 /* octal digits */
4f19785b 13052 case '2': case '3': case '4':
378cc40b 13053 case '5': case '6': case '7':
4f19785b 13054 if (shift == 1)
cea2e8a9 13055 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13056 /* FALL THROUGH */
13057
13058 case '0': case '1':
02aa26ce 13059 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13060 goto digit;
02aa26ce
NT
13061
13062 /* hex digits */
378cc40b
LW
13063 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13064 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13065 /* make sure they said 0x */
378cc40b
LW
13066 if (shift != 4)
13067 goto out;
55497cff 13068 b = (*s++ & 7) + 9;
02aa26ce
NT
13069
13070 /* Prepare to put the digit we have onto the end
13071 of the number so far. We check for overflows.
13072 */
13073
55497cff 13074 digit:
61f33854 13075 just_zero = FALSE;
9e24b6e2
JH
13076 if (!overflowed) {
13077 x = u << shift; /* make room for the digit */
13078
13079 if ((x >> shift) != u
13080 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13081 overflowed = TRUE;
13082 n = (NV) u;
9b387841
NC
13083 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13084 "Integer overflow in %s number",
13085 base);
9e24b6e2
JH
13086 } else
13087 u = x | b; /* add the digit to the end */
13088 }
13089 if (overflowed) {
13090 n *= nvshift[shift];
13091 /* If an NV has not enough bits in its
13092 * mantissa to represent an UV this summing of
13093 * small low-order numbers is a waste of time
13094 * (because the NV cannot preserve the
13095 * low-order bits anyway): we could just
13096 * remember when did we overflow and in the
13097 * end just multiply n by the right
13098 * amount. */
13099 n += (NV) b;
55497cff 13100 }
378cc40b
LW
13101 break;
13102 }
13103 }
02aa26ce
NT
13104
13105 /* if we get here, we had success: make a scalar value from
13106 the number.
13107 */
378cc40b 13108 out:
928753ea
JH
13109
13110 /* final misplaced underbar check */
13111 if (s[-1] == '_') {
a2a5de95 13112 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13113 }
13114
9e24b6e2 13115 if (overflowed) {
a2a5de95
NC
13116 if (n > 4294967295.0)
13117 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13118 "%s number > %s non-portable",
13119 Base, max);
b081dd7e 13120 sv = newSVnv(n);
9e24b6e2
JH
13121 }
13122 else {
15041a67 13123#if UVSIZE > 4
a2a5de95
NC
13124 if (u > 0xffffffff)
13125 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13126 "%s number > %s non-portable",
13127 Base, max);
2cc4c2dc 13128#endif
b081dd7e 13129 sv = newSVuv(u);
9e24b6e2 13130 }
61f33854 13131 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13132 sv = new_constant(start, s - start, "integer",
eb0d8d16 13133 sv, NULL, NULL, 0);
61f33854 13134 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13135 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13136 }
13137 break;
02aa26ce
NT
13138
13139 /*
13140 handle decimal numbers.
13141 we're also sent here when we read a 0 as the first digit
13142 */
378cc40b
LW
13143 case '1': case '2': case '3': case '4': case '5':
13144 case '6': case '7': case '8': case '9': case '.':
13145 decimal:
3280af22
NIS
13146 d = PL_tokenbuf;
13147 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13148 floatit = FALSE;
02aa26ce
NT
13149
13150 /* read next group of digits and _ and copy into d */
de3bb511 13151 while (isDIGIT(*s) || *s == '_') {
4e553d73 13152 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13153 if -w is on
13154 */
93a17b20 13155 if (*s == '_') {
a2a5de95
NC
13156 if (lastub && s == lastub + 1)
13157 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13158 "Misplaced _ in number");
928753ea 13159 lastub = s++;
93a17b20 13160 }
fc36a67e 13161 else {
02aa26ce 13162 /* check for end of fixed-length buffer */
fc36a67e 13163 if (d >= e)
cea2e8a9 13164 Perl_croak(aTHX_ number_too_long);
02aa26ce 13165 /* if we're ok, copy the character */
378cc40b 13166 *d++ = *s++;
fc36a67e 13167 }
378cc40b 13168 }
02aa26ce
NT
13169
13170 /* final misplaced underbar check */
928753ea 13171 if (lastub && s == lastub + 1) {
a2a5de95 13172 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13173 }
02aa26ce
NT
13174
13175 /* read a decimal portion if there is one. avoid
13176 3..5 being interpreted as the number 3. followed
13177 by .5
13178 */
2f3197b3 13179 if (*s == '.' && s[1] != '.') {
79072805 13180 floatit = TRUE;
378cc40b 13181 *d++ = *s++;
02aa26ce 13182
928753ea 13183 if (*s == '_') {
a2a5de95
NC
13184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13185 "Misplaced _ in number");
928753ea
JH
13186 lastub = s;
13187 }
13188
13189 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13190 */
fc36a67e 13191 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13192 /* fixed length buffer check */
fc36a67e 13193 if (d >= e)
cea2e8a9 13194 Perl_croak(aTHX_ number_too_long);
928753ea 13195 if (*s == '_') {
a2a5de95
NC
13196 if (lastub && s == lastub + 1)
13197 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13198 "Misplaced _ in number");
928753ea
JH
13199 lastub = s;
13200 }
13201 else
fc36a67e 13202 *d++ = *s;
378cc40b 13203 }
928753ea
JH
13204 /* fractional part ending in underbar? */
13205 if (s[-1] == '_') {
a2a5de95
NC
13206 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13207 "Misplaced _ in number");
928753ea 13208 }
dd629d5b
GS
13209 if (*s == '.' && isDIGIT(s[1])) {
13210 /* oops, it's really a v-string, but without the "v" */
f4758303 13211 s = start;
dd629d5b
GS
13212 goto vstring;
13213 }
378cc40b 13214 }
02aa26ce
NT
13215
13216 /* read exponent part, if present */
3792a11b 13217 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13218 floatit = TRUE;
13219 s++;
02aa26ce
NT
13220
13221 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13222 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13223
7fd134d9
JH
13224 /* stray preinitial _ */
13225 if (*s == '_') {
a2a5de95
NC
13226 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13227 "Misplaced _ in number");
7fd134d9
JH
13228 lastub = s++;
13229 }
13230
02aa26ce 13231 /* allow positive or negative exponent */
378cc40b
LW
13232 if (*s == '+' || *s == '-')
13233 *d++ = *s++;
02aa26ce 13234
7fd134d9
JH
13235 /* stray initial _ */
13236 if (*s == '_') {
a2a5de95
NC
13237 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13238 "Misplaced _ in number");
7fd134d9
JH
13239 lastub = s++;
13240 }
13241
7fd134d9
JH
13242 /* read digits of exponent */
13243 while (isDIGIT(*s) || *s == '_') {
13244 if (isDIGIT(*s)) {
13245 if (d >= e)
13246 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13247 *d++ = *s++;
7fd134d9
JH
13248 }
13249 else {
041457d9 13250 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13251 (!isDIGIT(s[1]) && s[1] != '_')))
13252 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13253 "Misplaced _ in number");
b3b48e3e 13254 lastub = s++;
7fd134d9 13255 }
7fd134d9 13256 }
378cc40b 13257 }
02aa26ce 13258
02aa26ce 13259
0b7fceb9 13260 /*
58bb9ec3
NC
13261 We try to do an integer conversion first if no characters
13262 indicating "float" have been found.
0b7fceb9
MU
13263 */
13264
13265 if (!floatit) {
58bb9ec3 13266 UV uv;
6136c704 13267 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13268
13269 if (flags == IS_NUMBER_IN_UV) {
13270 if (uv <= IV_MAX)
b081dd7e 13271 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13272 else
b081dd7e 13273 sv = newSVuv(uv);
58bb9ec3
NC
13274 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13275 if (uv <= (UV) IV_MIN)
b081dd7e 13276 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13277 else
13278 floatit = TRUE;
13279 } else
13280 floatit = TRUE;
13281 }
0b7fceb9 13282 if (floatit) {
58bb9ec3
NC
13283 /* terminate the string */
13284 *d = '\0';
86554af2 13285 nv = Atof(PL_tokenbuf);
b081dd7e 13286 sv = newSVnv(nv);
86554af2 13287 }
86554af2 13288
eb0d8d16
NC
13289 if ( floatit
13290 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13291 const char *const key = floatit ? "float" : "integer";
13292 const STRLEN keylen = floatit ? 5 : 7;
13293 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13294 key, keylen, sv, NULL, NULL, 0);
13295 }
378cc40b 13296 break;
0b7fceb9 13297
e312add1 13298 /* if it starts with a v, it could be a v-string */
a7cb1f99 13299 case 'v':
dd629d5b 13300vstring:
561b68a9 13301 sv = newSV(5); /* preallocate storage space */
65b06e02 13302 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13303 break;
79072805 13304 }
a687059c 13305
02aa26ce
NT
13306 /* make the op for the constant and return */
13307
a86a20aa 13308 if (sv)
b73d6f50 13309 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13310 else
5f66b61c 13311 lvalp->opval = NULL;
a687059c 13312
73d840c0 13313 return (char *)s;
378cc40b
LW
13314}
13315
76e3520e 13316STATIC char *
cea2e8a9 13317S_scan_formline(pTHX_ register char *s)
378cc40b 13318{
97aff369 13319 dVAR;
79072805 13320 register char *eol;
378cc40b 13321 register char *t;
6136c704 13322 SV * const stuff = newSVpvs("");
79072805 13323 bool needargs = FALSE;
c5ee2135 13324 bool eofmt = FALSE;
5db06880
NC
13325#ifdef PERL_MAD
13326 char *tokenstart = s;
4f61fd4b
JC
13327 SV* savewhite = NULL;
13328
5db06880 13329 if (PL_madskills) {
cd81e915
NC
13330 savewhite = PL_thiswhite;
13331 PL_thiswhite = 0;
5db06880
NC
13332 }
13333#endif
378cc40b 13334
7918f24d
NC
13335 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13336
79072805 13337 while (!needargs) {
a1b95068 13338 if (*s == '.') {
c35e046a 13339 t = s+1;
51882d45 13340#ifdef PERL_STRICT_CR
c35e046a
AL
13341 while (SPACE_OR_TAB(*t))
13342 t++;
51882d45 13343#else
c35e046a
AL
13344 while (SPACE_OR_TAB(*t) || *t == '\r')
13345 t++;
51882d45 13346#endif
c5ee2135
WL
13347 if (*t == '\n' || t == PL_bufend) {
13348 eofmt = TRUE;
79072805 13349 break;
c5ee2135 13350 }
79072805 13351 }
3280af22 13352 if (PL_in_eval && !PL_rsfp) {
07409e01 13353 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13354 if (!eol++)
3280af22 13355 eol = PL_bufend;
0f85fab0
LW
13356 }
13357 else
3280af22 13358 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13359 if (*s != '#') {
a0d0e21e
LW
13360 for (t = s; t < eol; t++) {
13361 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13362 needargs = FALSE;
13363 goto enough; /* ~~ must be first line in formline */
378cc40b 13364 }
a0d0e21e
LW
13365 if (*t == '@' || *t == '^')
13366 needargs = TRUE;
378cc40b 13367 }
7121b347
MG
13368 if (eol > s) {
13369 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13370#ifndef PERL_STRICT_CR
7121b347
MG
13371 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13372 char *end = SvPVX(stuff) + SvCUR(stuff);
13373 end[-2] = '\n';
13374 end[-1] = '\0';
b162af07 13375 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13376 }
2dc4c65b 13377#endif
7121b347
MG
13378 }
13379 else
13380 break;
79072805 13381 }
95a20fc0 13382 s = (char*)eol;
3280af22 13383 if (PL_rsfp) {
f0e67a1d 13384 bool got_some;
5db06880
NC
13385#ifdef PERL_MAD
13386 if (PL_madskills) {
cd81e915
NC
13387 if (PL_thistoken)
13388 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13389 else
cd81e915 13390 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13391 }
13392#endif
f0e67a1d
Z
13393 PL_bufptr = PL_bufend;
13394 CopLINE_inc(PL_curcop);
13395 got_some = lex_next_chunk(0);
13396 CopLINE_dec(PL_curcop);
13397 s = PL_bufptr;
5db06880 13398#ifdef PERL_MAD
f0e67a1d 13399 tokenstart = PL_bufptr;
5db06880 13400#endif
f0e67a1d 13401 if (!got_some)
378cc40b 13402 break;
378cc40b 13403 }
463ee0b2 13404 incline(s);
79072805 13405 }
a0d0e21e
LW
13406 enough:
13407 if (SvCUR(stuff)) {
3280af22 13408 PL_expect = XTERM;
79072805 13409 if (needargs) {
3280af22 13410 PL_lex_state = LEX_NORMAL;
cd81e915 13411 start_force(PL_curforce);
9ded7720 13412 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13413 force_next(',');
13414 }
a0d0e21e 13415 else
3280af22 13416 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13417 if (!IN_BYTES) {
95a20fc0 13418 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13419 SvUTF8_on(stuff);
13420 else if (PL_encoding)
13421 sv_recode_to_utf8(stuff, PL_encoding);
13422 }
cd81e915 13423 start_force(PL_curforce);
9ded7720 13424 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13425 force_next(THING);
cd81e915 13426 start_force(PL_curforce);
9ded7720 13427 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13428 force_next(LSTOP);
378cc40b 13429 }
79072805 13430 else {
8990e307 13431 SvREFCNT_dec(stuff);
c5ee2135
WL
13432 if (eofmt)
13433 PL_lex_formbrack = 0;
3280af22 13434 PL_bufptr = s;
79072805 13435 }
5db06880
NC
13436#ifdef PERL_MAD
13437 if (PL_madskills) {
cd81e915
NC
13438 if (PL_thistoken)
13439 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13440 else
cd81e915
NC
13441 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13442 PL_thiswhite = savewhite;
5db06880
NC
13443 }
13444#endif
79072805 13445 return s;
378cc40b 13446}
a687059c 13447
ba6d6ac9 13448I32
864dbfa3 13449Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13450{
97aff369 13451 dVAR;
a3b680e6 13452 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13453 CV* const outsidecv = PL_compcv;
8990e307 13454
3280af22
NIS
13455 if (PL_compcv) {
13456 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13457 }
7766f137 13458 SAVEI32(PL_subline);
3280af22 13459 save_item(PL_subname);
3280af22 13460 SAVESPTR(PL_compcv);
3280af22 13461
ea726b52 13462 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13463 CvFLAGS(PL_compcv) |= flags;
13464
57843af0 13465 PL_subline = CopLINE(PL_curcop);
dd2155a4 13466 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13467 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13468 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13469
8990e307
LW
13470 return oldsavestack_ix;
13471}
13472
084592ab
CN
13473#ifdef __SC__
13474#pragma segment Perl_yylex
13475#endif
af41e527
NC
13476static int
13477S_yywarn(pTHX_ const char *const s)
8990e307 13478{
97aff369 13479 dVAR;
7918f24d
NC
13480
13481 PERL_ARGS_ASSERT_YYWARN;
13482
faef0170 13483 PL_in_eval |= EVAL_WARNONLY;
748a9306 13484 yyerror(s);
faef0170 13485 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13486 return 0;
8990e307
LW
13487}
13488
13489int
15f169a1 13490Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13491{
97aff369 13492 dVAR;
bfed75c6
AL
13493 const char *where = NULL;
13494 const char *context = NULL;
68dc0745 13495 int contlen = -1;
46fc3d4c 13496 SV *msg;
5912531f 13497 int yychar = PL_parser->yychar;
463ee0b2 13498
7918f24d
NC
13499 PERL_ARGS_ASSERT_YYERROR;
13500
3280af22 13501 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13502 where = "at EOF";
8bcfe651
TM
13503 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13504 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13505 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13506 /*
13507 Only for NetWare:
13508 The code below is removed for NetWare because it abends/crashes on NetWare
13509 when the script has error such as not having the closing quotes like:
13510 if ($var eq "value)
13511 Checking of white spaces is anyway done in NetWare code.
13512 */
13513#ifndef NETWARE
3280af22
NIS
13514 while (isSPACE(*PL_oldoldbufptr))
13515 PL_oldoldbufptr++;
f355267c 13516#endif
3280af22
NIS
13517 context = PL_oldoldbufptr;
13518 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13519 }
8bcfe651
TM
13520 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13521 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13522 /*
13523 Only for NetWare:
13524 The code below is removed for NetWare because it abends/crashes on NetWare
13525 when the script has error such as not having the closing quotes like:
13526 if ($var eq "value)
13527 Checking of white spaces is anyway done in NetWare code.
13528 */
13529#ifndef NETWARE
3280af22
NIS
13530 while (isSPACE(*PL_oldbufptr))
13531 PL_oldbufptr++;
f355267c 13532#endif
3280af22
NIS
13533 context = PL_oldbufptr;
13534 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13535 }
13536 else if (yychar > 255)
68dc0745 13537 where = "next token ???";
12fbd33b 13538 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13539 if (PL_lex_state == LEX_NORMAL ||
13540 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13541 where = "at end of line";
3280af22 13542 else if (PL_lex_inpat)
68dc0745 13543 where = "within pattern";
463ee0b2 13544 else
68dc0745 13545 where = "within string";
463ee0b2 13546 }
46fc3d4c 13547 else {
84bafc02 13548 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13549 if (yychar < 32)
cea2e8a9 13550 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13551 else if (isPRINT_LC(yychar)) {
88c9ea1e 13552 const char string = yychar;
5e7aa789
NC
13553 sv_catpvn(where_sv, &string, 1);
13554 }
463ee0b2 13555 else
cea2e8a9 13556 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13557 where = SvPVX_const(where_sv);
463ee0b2 13558 }
46fc3d4c 13559 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13560 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13561 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13562 if (context)
cea2e8a9 13563 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13564 else
cea2e8a9 13565 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13566 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13567 Perl_sv_catpvf(aTHX_ msg,
57def98f 13568 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13569 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13570 PL_multi_end = 0;
a0d0e21e 13571 }
500960a6 13572 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13573 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13574 }
463ee0b2 13575 else
5a844595 13576 qerror(msg);
c7d6bfb2
GS
13577 if (PL_error_count >= 10) {
13578 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13579 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13580 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13581 else
13582 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13583 OutCopFILE(PL_curcop));
c7d6bfb2 13584 }
3280af22 13585 PL_in_my = 0;
5c284bb0 13586 PL_in_my_stash = NULL;
463ee0b2
LW
13587 return 0;
13588}
084592ab
CN
13589#ifdef __SC__
13590#pragma segment Main
13591#endif
4e35701f 13592
b250498f 13593STATIC char*
3ae08724 13594S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13595{
97aff369 13596 dVAR;
f54cb97a 13597 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13598
13599 PERL_ARGS_ASSERT_SWALLOW_BOM;
13600
7aa207d6 13601 switch (s[0]) {
4e553d73
NIS
13602 case 0xFF:
13603 if (s[1] == 0xFE) {
ee6ba15d 13604 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13605 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13606 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13607#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13608 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13609 s += 2;
dea0fc0b 13610 if (PL_bufend > (char*)s) {
81a923f4 13611 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13612 }
b250498f 13613#else
ee6ba15d 13614 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13615#endif
01ec43d0
GS
13616 }
13617 break;
78ae23f5 13618 case 0xFE:
7aa207d6 13619 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13620#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13621 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13622 s += 2;
13623 if (PL_bufend > (char *)s) {
81a923f4 13624 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13625 }
b250498f 13626#else
ee6ba15d 13627 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13628#endif
01ec43d0
GS
13629 }
13630 break;
3ae08724
GS
13631 case 0xEF:
13632 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13633 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13634 s += 3; /* UTF-8 */
13635 }
13636 break;
13637 case 0:
7aa207d6
JH
13638 if (slen > 3) {
13639 if (s[1] == 0) {
13640 if (s[2] == 0xFE && s[3] == 0xFF) {
13641 /* UTF-32 big-endian */
ee6ba15d 13642 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13643 }
13644 }
13645 else if (s[2] == 0 && s[3] != 0) {
13646 /* Leading bytes
13647 * 00 xx 00 xx
13648 * are a good indicator of UTF-16BE. */
ee6ba15d 13649#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13650 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13651 s = add_utf16_textfilter(s, FALSE);
13652#else
13653 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13654#endif
7aa207d6 13655 }
01ec43d0 13656 }
e294cc5d
JH
13657#ifdef EBCDIC
13658 case 0xDD:
13659 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13660 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13661 s += 4; /* UTF-8 */
13662 }
13663 break;
13664#endif
13665
7aa207d6
JH
13666 default:
13667 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13668 /* Leading bytes
13669 * xx 00 xx 00
13670 * are a good indicator of UTF-16LE. */
ee6ba15d 13671#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13672 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13673 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13674#else
13675 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13676#endif
7aa207d6 13677 }
01ec43d0 13678 }
b8f84bb2 13679 return (char*)s;
b250498f 13680}
4755096e 13681
6e3aabd6
GS
13682
13683#ifndef PERL_NO_UTF16_FILTER
13684static I32
a28af015 13685S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13686{
97aff369 13687 dVAR;
f3040f2c 13688 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13689 /* We re-use this each time round, throwing the contents away before we
13690 return. */
2a773401 13691 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13692 SV *const utf8_buffer = filter;
c28d6105 13693 IV status = IoPAGE(filter);
f2338a2e 13694 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13695 I32 retval;
c8b0cbae 13696
c85ae797
NC
13697 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13698
c8b0cbae
NC
13699 /* As we're automatically added, at the lowest level, and hence only called
13700 from this file, we can be sure that we're not called in block mode. Hence
13701 don't bother writing code to deal with block mode. */
13702 if (maxlen) {
13703 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13704 }
c28d6105
NC
13705 if (status < 0) {
13706 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13707 }
1de9afcd 13708 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13709 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13710 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13711 reverse ? 'l' : 'b', idx, maxlen, status,
13712 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13713
13714 while (1) {
13715 STRLEN chars;
13716 STRLEN have;
dea0fc0b 13717 I32 newlen;
2a773401 13718 U8 *end;
c28d6105
NC
13719 /* First, look in our buffer of existing UTF-8 data: */
13720 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13721
13722 if (nl) {
13723 ++nl;
13724 } else if (status == 0) {
13725 /* EOF */
13726 IoPAGE(filter) = 0;
13727 nl = SvEND(utf8_buffer);
13728 }
13729 if (nl) {
d2d1d4de
NC
13730 STRLEN got = nl - SvPVX(utf8_buffer);
13731 /* Did we have anything to append? */
13732 retval = got != 0;
13733 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13734 /* Everything else in this code works just fine if SVp_POK isn't
13735 set. This, however, needs it, and we need it to work, else
13736 we loop infinitely because the buffer is never consumed. */
13737 sv_chop(utf8_buffer, nl);
13738 break;
13739 }
ba77e4cc 13740
c28d6105
NC
13741 /* OK, not a complete line there, so need to read some more UTF-16.
13742 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13743 while (1) {
13744 if (status <= 0)
13745 break;
13746 if (SvCUR(utf16_buffer) >= 2) {
13747 /* Location of the high octet of the last complete code point.
13748 Gosh, UTF-16 is a pain. All the benefits of variable length,
13749 *coupled* with all the benefits of partial reads and
13750 endianness. */
13751 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13752 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13753
13754 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13755 break;
13756 }
13757
13758 /* We have the first half of a surrogate. Read more. */
13759 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13760 }
c28d6105 13761
c28d6105
NC
13762 status = FILTER_READ(idx + 1, utf16_buffer,
13763 160 + (SvCUR(utf16_buffer) & 1));
13764 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13765 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13766 if (status < 0) {
13767 /* Error */
13768 IoPAGE(filter) = status;
13769 return status;
13770 }
13771 }
13772
13773 chars = SvCUR(utf16_buffer) >> 1;
13774 have = SvCUR(utf8_buffer);
13775 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13776
aa6dbd60 13777 if (reverse) {
c28d6105
NC
13778 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13779 (U8*)SvPVX_const(utf8_buffer) + have,
13780 chars * 2, &newlen);
aa6dbd60 13781 } else {
2a773401 13782 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13783 (U8*)SvPVX_const(utf8_buffer) + have,
13784 chars * 2, &newlen);
2a773401 13785 }
c28d6105 13786 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13787 *end = '\0';
c28d6105 13788
e07286ed
NC
13789 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13790 it's private to us, and utf16_to_utf8{,reversed} take a
13791 (pointer,length) pair, rather than a NUL-terminated string. */
13792 if(SvCUR(utf16_buffer) & 1) {
13793 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13794 SvCUR_set(utf16_buffer, 1);
13795 } else {
13796 SvCUR_set(utf16_buffer, 0);
13797 }
2a773401 13798 }
c28d6105
NC
13799 DEBUG_P(PerlIO_printf(Perl_debug_log,
13800 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13801 status,
13802 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13803 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13804 return retval;
6e3aabd6 13805}
81a923f4
NC
13806
13807static U8 *
13808S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13809{
2a773401 13810 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13811
c85ae797
NC
13812 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13813
c28d6105 13814 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13815 sv_setpvs(filter, "");
2a773401 13816 IoLINES(filter) = reversed;
c28d6105
NC
13817 IoPAGE(filter) = 1; /* Not EOF */
13818
13819 /* Sadly, we have to return a valid pointer, come what may, so we have to
13820 ignore any error return from this. */
13821 SvCUR_set(PL_linestr, 0);
13822 if (FILTER_READ(0, PL_linestr, 0)) {
13823 SvUTF8_on(PL_linestr);
81a923f4 13824 } else {
c28d6105 13825 SvUTF8_on(PL_linestr);
81a923f4 13826 }
c28d6105 13827 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13828 return (U8*)SvPVX(PL_linestr);
13829}
6e3aabd6 13830#endif
9f4817db 13831
f333445c
JP
13832/*
13833Returns a pointer to the next character after the parsed
13834vstring, as well as updating the passed in sv.
13835
13836Function must be called like
13837
561b68a9 13838 sv = newSV(5);
65b06e02 13839 s = scan_vstring(s,e,sv);
f333445c 13840
65b06e02 13841where s and e are the start and end of the string.
f333445c
JP
13842The sv should already be large enough to store the vstring
13843passed in, for performance reasons.
13844
13845*/
13846
13847char *
15f169a1 13848Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13849{
97aff369 13850 dVAR;
bfed75c6
AL
13851 const char *pos = s;
13852 const char *start = s;
7918f24d
NC
13853
13854 PERL_ARGS_ASSERT_SCAN_VSTRING;
13855
f333445c 13856 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13857 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13858 pos++;
f333445c
JP
13859 if ( *pos != '.') {
13860 /* this may not be a v-string if followed by => */
bfed75c6 13861 const char *next = pos;
65b06e02 13862 while (next < e && isSPACE(*next))
8fc7bb1c 13863 ++next;
65b06e02 13864 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13865 /* return string not v-string */
13866 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13867 return (char *)pos;
f333445c
JP
13868 }
13869 }
13870
13871 if (!isALPHA(*pos)) {
89ebb4a3 13872 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13873
d4c19fe8
AL
13874 if (*s == 'v')
13875 s++; /* get past 'v' */
f333445c 13876
76f68e9b 13877 sv_setpvs(sv, "");
f333445c
JP
13878
13879 for (;;) {
d4c19fe8 13880 /* this is atoi() that tolerates underscores */
0bd48802
AL
13881 U8 *tmpend;
13882 UV rev = 0;
d4c19fe8
AL
13883 const char *end = pos;
13884 UV mult = 1;
13885 while (--end >= s) {
13886 if (*end != '_') {
13887 const UV orev = rev;
f333445c
JP
13888 rev += (*end - '0') * mult;
13889 mult *= 10;
9b387841
NC
13890 if (orev > rev)
13891 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13892 "Integer overflow in decimal number");
f333445c
JP
13893 }
13894 }
13895#ifdef EBCDIC
13896 if (rev > 0x7FFFFFFF)
13897 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13898#endif
13899 /* Append native character for the rev point */
13900 tmpend = uvchr_to_utf8(tmpbuf, rev);
13901 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13902 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13903 SvUTF8_on(sv);
65b06e02 13904 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13905 s = ++pos;
13906 else {
13907 s = pos;
13908 break;
13909 }
65b06e02 13910 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13911 pos++;
13912 }
13913 SvPOK_on(sv);
13914 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13915 SvRMAGICAL_on(sv);
13916 }
73d840c0 13917 return (char *)s;
f333445c
JP
13918}
13919
88e1f1a2
JV
13920int
13921Perl_keyword_plugin_standard(pTHX_
13922 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13923{
13924 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13925 PERL_UNUSED_CONTEXT;
13926 PERL_UNUSED_ARG(keyword_ptr);
13927 PERL_UNUSED_ARG(keyword_len);
13928 PERL_UNUSED_ARG(op_ptr);
13929 return KEYWORD_PLUGIN_DECLINE;
13930}
13931
1da4ca5f
NC
13932/*
13933 * Local variables:
13934 * c-indentation-style: bsd
13935 * c-basic-offset: 4
13936 * indent-tabs-mode: t
13937 * End:
13938 *
37442d52
RGS
13939 * ex: set ts=8 sts=4 sw=4 noet:
13940 */