This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add /d, /l, /u (infixed) regex modifiers
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_brackstack (PL_parser->lex_brackstack)
52#define PL_lex_casemods (PL_parser->lex_casemods)
53#define PL_lex_casestack (PL_parser->lex_casestack)
54#define PL_lex_defer (PL_parser->lex_defer)
55#define PL_lex_dojoin (PL_parser->lex_dojoin)
56#define PL_lex_expect (PL_parser->lex_expect)
57#define PL_lex_formbrack (PL_parser->lex_formbrack)
58#define PL_lex_inpat (PL_parser->lex_inpat)
59#define PL_lex_inwhat (PL_parser->lex_inwhat)
60#define PL_lex_op (PL_parser->lex_op)
61#define PL_lex_repl (PL_parser->lex_repl)
62#define PL_lex_starts (PL_parser->lex_starts)
63#define PL_lex_stuff (PL_parser->lex_stuff)
64#define PL_multi_start (PL_parser->multi_start)
65#define PL_multi_open (PL_parser->multi_open)
66#define PL_multi_close (PL_parser->multi_close)
67#define PL_pending_ident (PL_parser->pending_ident)
68#define PL_preambled (PL_parser->preambled)
69#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 70#define PL_linestr (PL_parser->linestr)
c2598295
DM
71#define PL_expect (PL_parser->expect)
72#define PL_copline (PL_parser->copline)
f06b5848
DM
73#define PL_bufptr (PL_parser->bufptr)
74#define PL_oldbufptr (PL_parser->oldbufptr)
75#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76#define PL_linestart (PL_parser->linestart)
77#define PL_bufend (PL_parser->bufend)
78#define PL_last_uni (PL_parser->last_uni)
79#define PL_last_lop (PL_parser->last_lop)
80#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 81#define PL_lex_state (PL_parser->lex_state)
2f9285f8 82#define PL_rsfp (PL_parser->rsfp)
5486870f 83#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
84#define PL_in_my (PL_parser->in_my)
85#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 86#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 87#define PL_multi_end (PL_parser->multi_end)
13765c85 88#define PL_error_count (PL_parser->error_count)
199e78b7
DM
89
90#ifdef PERL_MAD
91# define PL_endwhite (PL_parser->endwhite)
92# define PL_faketokens (PL_parser->faketokens)
93# define PL_lasttoke (PL_parser->lasttoke)
94# define PL_nextwhite (PL_parser->nextwhite)
95# define PL_realtokenstart (PL_parser->realtokenstart)
96# define PL_skipwhite (PL_parser->skipwhite)
97# define PL_thisclose (PL_parser->thisclose)
98# define PL_thismad (PL_parser->thismad)
99# define PL_thisopen (PL_parser->thisopen)
100# define PL_thisstuff (PL_parser->thisstuff)
101# define PL_thistoken (PL_parser->thistoken)
102# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
103# define PL_thiswhite (PL_parser->thiswhite)
104# define PL_nexttoke (PL_parser->nexttoke)
105# define PL_curforce (PL_parser->curforce)
106#else
107# define PL_nexttoke (PL_parser->nexttoke)
108# define PL_nexttype (PL_parser->nexttype)
109# define PL_nextval (PL_parser->nextval)
199e78b7
DM
110#endif
111
16173588
NC
112/* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
114static int
115S_pending_ident(pTHX);
199e78b7 116
0bd48802 117static const char ident_too_long[] = "Identifier too long";
8903cb82 118
29595ff2 119#ifdef PERL_MAD
29595ff2 120# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 121# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 122#else
5db06880 123# define CURMAD(slot,sv)
9ded7720 124# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
125#endif
126
9059aa12
LW
127#define XFAKEBRACK 128
128#define XENUMMASK 127
129
39e02b42
JH
130#ifdef USE_UTF8_SCRIPTS
131# define UTF (!IN_BYTES)
2b9d42f0 132#else
746b446a 133# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 134#endif
a0ed51b3 135
b1fc3636
CJ
136/* The maximum number of characters preceding the unrecognized one to display */
137#define UNRECOGNIZED_PRECEDE_COUNT 10
138
61f0cdd9 139/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
140 * 1999-02-27 mjd-perl-patch@plover.com */
141#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
142
bf4acbe4 143#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 144
ffb4593c
NT
145/* LEX_* are values for PL_lex_state, the state of the lexer.
146 * They are arranged oddly so that the guard on the switch statement
79072805
LW
147 * can get by with a single comparison (if the compiler is smart enough).
148 */
149
fb73857a 150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
b6007c36
DM
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
79072805 168
bbf60fe6 169#ifdef DEBUGGING
27da23d5 170static const char* const lex_state_names[] = {
bbf60fe6
DM
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
79072805
LW
185#ifdef ff_next
186#undef ff_next
d48672a2
LW
187#endif
188
79072805 189#include "keywords.h"
fe14fcc3 190
ffb4593c
NT
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
ae986130
LW
193#ifdef CLINE
194#undef CLINE
195#endif
57843af0 196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 197
5db06880 198#ifdef PERL_MAD
29595ff2
NC
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
ffb4593c
NT
210/*
211 * Convenience functions to return different tokens and prime the
9cbb5ea2 212 * lexer for the next token. They all take an argument.
ffb4593c
NT
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
2d2e263d 224 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
225 * BOop : bitwise or or xor
226 * BAop : bitwise and
227 * SHop : shift operator
228 * PWop : power operator
9cbb5ea2 229 * PMop : pattern-matching operator
ffb4593c
NT
230 * Aop : addition-level operator
231 * Mop : multiplication-level operator
232 * Eop : equality-testing operator
e5edeb50 233 * Rop : relational operator <= != gt
ffb4593c
NT
234 *
235 * Also see LOP and lop() below.
236 */
237
998054bd 238#ifdef DEBUGGING /* Serve -DT. */
704d4215 239# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 240#else
bbf60fe6 241# define REPORT(retval) (retval)
998054bd
SC
242#endif
243
bbf60fe6
DM
244#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
245#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
246#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
247#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
248#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
249#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
250#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
251#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
252#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
253#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
254#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
255#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
256#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
257#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
258#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
259#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
260#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
261#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
262#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
263#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 264
a687059c
LW
265/* This bit of chicanery makes a unary function followed by
266 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
267 * The UNIDOR macro is for unary functions that can be followed by the //
268 * operator (such as C<shift // 0>).
a687059c 269 */
376fcdbf 270#define UNI2(f,x) { \
6154021b 271 pl_yylval.ival = f; \
376fcdbf
AL
272 PL_expect = x; \
273 PL_bufptr = s; \
274 PL_last_uni = PL_oldbufptr; \
275 PL_last_lop_op = f; \
276 if (*s == '(') \
277 return REPORT( (int)FUNC1 ); \
29595ff2 278 s = PEEKSPACE(s); \
376fcdbf
AL
279 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
280 }
6f33ba73
RGS
281#define UNI(f) UNI2(f,XTERM)
282#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 283
376fcdbf 284#define UNIBRACK(f) { \
6154021b 285 pl_yylval.ival = f; \
376fcdbf
AL
286 PL_bufptr = s; \
287 PL_last_uni = PL_oldbufptr; \
288 if (*s == '(') \
289 return REPORT( (int)FUNC1 ); \
29595ff2 290 s = PEEKSPACE(s); \
376fcdbf
AL
291 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
292 }
79072805 293
9f68db38 294/* grandfather return to old style */
6154021b 295#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 296
8fa7f367
JH
297#ifdef DEBUGGING
298
6154021b 299/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
300enum token_type {
301 TOKENTYPE_NONE,
302 TOKENTYPE_IVAL,
6154021b 303 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
304 TOKENTYPE_PVAL,
305 TOKENTYPE_OPVAL,
306 TOKENTYPE_GVVAL
307};
308
6d4a66ac
NC
309static struct debug_tokens {
310 const int token;
311 enum token_type type;
312 const char *name;
313} const debug_tokens[] =
9041c2e3 314{
bbf60fe6
DM
315 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
316 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
317 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
318 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
319 { ARROW, TOKENTYPE_NONE, "ARROW" },
320 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
321 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
322 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
323 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
324 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 325 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
326 { DO, TOKENTYPE_NONE, "DO" },
327 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
328 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
329 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
330 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
331 { ELSE, TOKENTYPE_NONE, "ELSE" },
332 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
333 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
334 { FOR, TOKENTYPE_IVAL, "FOR" },
335 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
336 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
337 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
338 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
339 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
340 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 341 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
342 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
343 { IF, TOKENTYPE_IVAL, "IF" },
344 { LABEL, TOKENTYPE_PVAL, "LABEL" },
345 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
346 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
347 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
348 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
349 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
350 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
351 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
352 { MY, TOKENTYPE_IVAL, "MY" },
353 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
354 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
355 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
356 { OROP, TOKENTYPE_IVAL, "OROP" },
357 { OROR, TOKENTYPE_NONE, "OROR" },
358 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
359 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
360 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
361 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
362 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
363 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
364 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
365 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
366 { PREINC, TOKENTYPE_NONE, "PREINC" },
367 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
368 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
369 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
370 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
371 { SUB, TOKENTYPE_NONE, "SUB" },
372 { THING, TOKENTYPE_OPVAL, "THING" },
373 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
374 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
375 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
376 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
377 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
378 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 379 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
380 { WHILE, TOKENTYPE_IVAL, "WHILE" },
381 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 382 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 383 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
384};
385
6154021b 386/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 387
bbf60fe6 388STATIC int
704d4215 389S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 390{
97aff369 391 dVAR;
7918f24d
NC
392
393 PERL_ARGS_ASSERT_TOKEREPORT;
394
bbf60fe6 395 if (DEBUG_T_TEST) {
bd61b366 396 const char *name = NULL;
bbf60fe6 397 enum token_type type = TOKENTYPE_NONE;
f54cb97a 398 const struct debug_tokens *p;
396482e1 399 SV* const report = newSVpvs("<== ");
bbf60fe6 400
f54cb97a 401 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
402 if (p->token == (int)rv) {
403 name = p->name;
404 type = p->type;
405 break;
406 }
407 }
408 if (name)
54667de8 409 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
410 else if ((char)rv > ' ' && (char)rv < '~')
411 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
412 else if (!rv)
396482e1 413 sv_catpvs(report, "EOF");
bbf60fe6
DM
414 else
415 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
416 switch (type) {
417 case TOKENTYPE_NONE:
418 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
419 break;
420 case TOKENTYPE_IVAL:
704d4215 421 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
422 break;
423 case TOKENTYPE_OPNUM:
424 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 425 PL_op_name[lvalp->ival]);
bbf60fe6
DM
426 break;
427 case TOKENTYPE_PVAL:
704d4215 428 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
429 break;
430 case TOKENTYPE_OPVAL:
704d4215 431 if (lvalp->opval) {
401441c0 432 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
433 PL_op_name[lvalp->opval->op_type]);
434 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 435 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 436 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
437 }
438
439 }
401441c0 440 else
396482e1 441 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
442 break;
443 }
b6007c36 444 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
445 };
446 return (int)rv;
998054bd
SC
447}
448
b6007c36
DM
449
450/* print the buffer with suitable escapes */
451
452STATIC void
15f169a1 453S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 454{
396482e1 455 SV* const tmp = newSVpvs("");
7918f24d
NC
456
457 PERL_ARGS_ASSERT_PRINTBUF;
458
b6007c36
DM
459 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
460 SvREFCNT_dec(tmp);
461}
462
8fa7f367
JH
463#endif
464
8290c323
NC
465static int
466S_deprecate_commaless_var_list(pTHX) {
467 PL_expect = XTERM;
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
470}
471
ffb4593c
NT
472/*
473 * S_ao
474 *
c963b151
BD
475 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
476 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
477 */
478
76e3520e 479STATIC int
cea2e8a9 480S_ao(pTHX_ int toketype)
a0d0e21e 481{
97aff369 482 dVAR;
3280af22
NIS
483 if (*PL_bufptr == '=') {
484 PL_bufptr++;
a0d0e21e 485 if (toketype == ANDAND)
6154021b 486 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 487 else if (toketype == OROR)
6154021b 488 pl_yylval.ival = OP_ORASSIGN;
c963b151 489 else if (toketype == DORDOR)
6154021b 490 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
491 toketype = ASSIGNOP;
492 }
493 return toketype;
494}
495
ffb4593c
NT
496/*
497 * S_no_op
498 * When Perl expects an operator and finds something else, no_op
499 * prints the warning. It always prints "<something> found where
500 * operator expected. It prints "Missing semicolon on previous line?"
501 * if the surprise occurs at the start of the line. "do you need to
502 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
503 * where the compiler doesn't know if foo is a method call or a function.
504 * It prints "Missing operator before end of line" if there's nothing
505 * after the missing operator, or "... before <...>" if there is something
506 * after the missing operator.
507 */
508
76e3520e 509STATIC void
15f169a1 510S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 511{
97aff369 512 dVAR;
9d4ba2ae
AL
513 char * const oldbp = PL_bufptr;
514 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 515
7918f24d
NC
516 PERL_ARGS_ASSERT_NO_OP;
517
1189a94a
GS
518 if (!s)
519 s = oldbp;
07c798fb 520 else
1189a94a 521 PL_bufptr = s;
cea2e8a9 522 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
523 if (ckWARN_d(WARN_SYNTAX)) {
524 if (is_first)
525 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
526 "\t(Missing semicolon on previous line?)\n");
527 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 528 const char *t;
c35e046a
AL
529 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
530 NOOP;
56da5a46
RGS
531 if (t < PL_bufptr && isSPACE(*t))
532 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
533 "\t(Do you need to predeclare %.*s?)\n",
551405c4 534 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
535 }
536 else {
537 assert(s >= oldbp);
538 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 539 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 540 }
07c798fb 541 }
3280af22 542 PL_bufptr = oldbp;
8990e307
LW
543}
544
ffb4593c
NT
545/*
546 * S_missingterm
547 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 548 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
549 * If we're in a delimited string and the delimiter is a control
550 * character, it's reformatted into a two-char sequence like ^C.
551 * This is fatal.
552 */
553
76e3520e 554STATIC void
cea2e8a9 555S_missingterm(pTHX_ char *s)
8990e307 556{
97aff369 557 dVAR;
8990e307
LW
558 char tmpbuf[3];
559 char q;
560 if (s) {
9d4ba2ae 561 char * const nl = strrchr(s,'\n');
d2719217 562 if (nl)
8990e307
LW
563 *nl = '\0';
564 }
463559e7 565 else if (isCNTRL(PL_multi_close)) {
8990e307 566 *tmpbuf = '^';
585ec06d 567 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
568 tmpbuf[2] = '\0';
569 s = tmpbuf;
570 }
571 else {
eb160463 572 *tmpbuf = (char)PL_multi_close;
8990e307
LW
573 tmpbuf[1] = '\0';
574 s = tmpbuf;
575 }
576 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 577 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 578}
79072805 579
ef89dcc3 580#define FEATURE_IS_ENABLED(name) \
0d863452 581 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 582 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 583/* The longest string we pass in. */
1863b879 584#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 585
0d863452
RH
586/*
587 * S_feature_is_enabled
588 * Check whether the named feature is enabled.
589 */
590STATIC bool
15f169a1 591S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 592{
97aff369 593 dVAR;
0d863452 594 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 595 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
596
597 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
598
4a731d7b
NC
599 assert(namelen <= MAX_FEATURE_LEN);
600 memcpy(&he_name[8], name, namelen);
d4c19fe8 601
7b9ef140 602 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
603}
604
ffb4593c 605/*
9cbb5ea2
GS
606 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
607 * utf16-to-utf8-reversed.
ffb4593c
NT
608 */
609
c39cd008
GS
610#ifdef PERL_CR_FILTER
611static void
612strip_return(SV *sv)
613{
95a20fc0 614 register const char *s = SvPVX_const(sv);
9d4ba2ae 615 register const char * const e = s + SvCUR(sv);
7918f24d
NC
616
617 PERL_ARGS_ASSERT_STRIP_RETURN;
618
c39cd008
GS
619 /* outer loop optimized to do nothing if there are no CR-LFs */
620 while (s < e) {
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
624 *d++ = *s++;
625 while (s < e) {
626 if (*s == '\r' && s[1] == '\n')
627 s++;
628 *d++ = *s++;
629 }
630 SvCUR(sv) -= s - d;
631 return;
632 }
633 }
634}
a868473f 635
76e3520e 636STATIC I32
c39cd008 637S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 638{
f54cb97a 639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
640 if (count > 0 && !maxlen)
641 strip_return(sv);
642 return count;
a868473f
NIS
643}
644#endif
645
199e78b7
DM
646
647
ffb4593c
NT
648/*
649 * Perl_lex_start
5486870f 650 *
e3abe207 651 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
652 *
653 * rsfp is the opened file handle to read from (if any),
654 *
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
657 *
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
ffb4593c
NT
660 */
661
a0d0e21e 662void
5486870f 663Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 664{
97aff369 665 dVAR;
6ef55633 666 const char *s = NULL;
8990e307 667 STRLEN len;
5486870f 668 yy_parser *parser, *oparser;
acdf0a21
DM
669
670 /* create and initialise a parser */
671
199e78b7 672 Newxz(parser, 1, yy_parser);
5486870f 673 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
674 PL_parser = parser;
675
28ac2b49
Z
676 parser->stack = NULL;
677 parser->ps = NULL;
678 parser->stack_size = 0;
acdf0a21 679
e3abe207
DM
680 /* on scope exit, free this parser and restore any outer one */
681 SAVEPARSER(parser);
7c4baf47 682 parser->saved_curcop = PL_curcop;
e3abe207 683
acdf0a21 684 /* initialise lexer state */
8990e307 685
fb205e7a
DM
686#ifdef PERL_MAD
687 parser->curforce = -1;
688#else
689 parser->nexttoke = 0;
690#endif
ca4cfd28 691 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 692 parser->copline = NOLINE;
5afb0a62 693 parser->lex_state = LEX_NORMAL;
c2598295 694 parser->expect = XSTATE;
2f9285f8 695 parser->rsfp = rsfp;
56b27c9a 696 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 697 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 698
199e78b7
DM
699 Newx(parser->lex_brackstack, 120, char);
700 Newx(parser->lex_casestack, 12, char);
701 *parser->lex_casestack = '\0';
02b34bbe 702
10efb74f
NC
703 if (line) {
704 s = SvPV_const(line, len);
705 } else {
706 len = 0;
707 }
bdc0bf6f 708
10efb74f 709 if (!len) {
bdc0bf6f 710 parser->linestr = newSVpvs("\n;");
3e5c0189 711 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
719a9bb0
NC
712 /* avoid tie/overload weirdness */
713 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 714 if (s[len-1] != ';')
bdc0bf6f 715 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
716 } else {
717 SvTEMP_off(line);
718 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 719 parser->linestr = line;
8990e307 720 }
f06b5848
DM
721 parser->oldoldbufptr =
722 parser->oldbufptr =
723 parser->bufptr =
724 parser->linestart = SvPVX(parser->linestr);
725 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
726 parser->last_lop = parser->last_uni = NULL;
79072805 727}
a687059c 728
e3abe207
DM
729
730/* delete a parser object */
731
732void
733Perl_parser_free(pTHX_ const yy_parser *parser)
734{
7918f24d
NC
735 PERL_ARGS_ASSERT_PARSER_FREE;
736
7c4baf47 737 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
738 SvREFCNT_dec(parser->linestr);
739
2f9285f8
DM
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
799361c3
SH
742 else if (parser->rsfp && (!parser->old_parser ||
743 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 744 PerlIO_close(parser->rsfp);
5486870f 745 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 746
e3abe207
DM
747 Safefree(parser->lex_brackstack);
748 Safefree(parser->lex_casestack);
749 PL_parser = parser->old_parser;
750 Safefree(parser);
751}
752
753
ffb4593c
NT
754/*
755 * Perl_lex_end
9cbb5ea2
GS
756 * Finalizer for lexing operations. Must be called when the parser is
757 * done with the lexer.
ffb4593c
NT
758 */
759
463ee0b2 760void
864dbfa3 761Perl_lex_end(pTHX)
463ee0b2 762{
97aff369 763 dVAR;
3280af22 764 PL_doextract = FALSE;
463ee0b2
LW
765}
766
ffb4593c 767/*
f0e67a1d
Z
768=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
769
770Buffer scalar containing the chunk currently under consideration of the
771text currently being lexed. This is always a plain string scalar (for
772which C<SvPOK> is true). It is not intended to be used as a scalar by
773normal scalar means; instead refer to the buffer directly by the pointer
774variables described below.
775
776The lexer maintains various C<char*> pointers to things in the
777C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
778reallocated, all of these pointers must be updated. Don't attempt to
779do this manually, but rather use L</lex_grow_linestr> if you need to
780reallocate the buffer.
781
782The content of the text chunk in the buffer is commonly exactly one
783complete line of input, up to and including a newline terminator,
784but there are situations where it is otherwise. The octets of the
785buffer may be intended to be interpreted as either UTF-8 or Latin-1.
786The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
787flag on this scalar, which may disagree with it.
788
789For direct examination of the buffer, the variable
790L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
791lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
792of these pointers is usually preferable to examination of the scalar
793through normal scalar means.
794
795=for apidoc AmxU|char *|PL_parser-E<gt>bufend
796
797Direct pointer to the end of the chunk of text currently being lexed, the
798end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
799+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
800always located at the end of the buffer, and does not count as part of
801the buffer's contents.
802
803=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
804
805Points to the current position of lexing inside the lexer buffer.
806Characters around this point may be freely examined, within
807the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
808L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
809interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
810
811Lexing code (whether in the Perl core or not) moves this pointer past
812the characters that it consumes. It is also expected to perform some
813bookkeeping whenever a newline character is consumed. This movement
814can be more conveniently performed by the function L</lex_read_to>,
815which handles newlines appropriately.
816
817Interpretation of the buffer's octets can be abstracted out by
818using the slightly higher-level functions L</lex_peek_unichar> and
819L</lex_read_unichar>.
820
821=for apidoc AmxU|char *|PL_parser-E<gt>linestart
822
823Points to the start of the current line inside the lexer buffer.
824This is useful for indicating at which column an error occurred, and
825not much else. This must be updated by any lexing code that consumes
826a newline; the function L</lex_read_to> handles this detail.
827
828=cut
829*/
830
831/*
832=for apidoc Amx|bool|lex_bufutf8
833
834Indicates whether the octets in the lexer buffer
835(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
836of Unicode characters. If not, they should be interpreted as Latin-1
837characters. This is analogous to the C<SvUTF8> flag for scalars.
838
839In UTF-8 mode, it is not guaranteed that the lexer buffer actually
840contains valid UTF-8. Lexing code must be robust in the face of invalid
841encoding.
842
843The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
844is significant, but not the whole story regarding the input character
845encoding. Normally, when a file is being read, the scalar contains octets
846and its C<SvUTF8> flag is off, but the octets should be interpreted as
847UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
848however, the scalar may have the C<SvUTF8> flag on, and in this case its
849octets should be interpreted as UTF-8 unless the C<use bytes> pragma
850is in effect. This logic may change in the future; use this function
851instead of implementing the logic yourself.
852
853=cut
854*/
855
856bool
857Perl_lex_bufutf8(pTHX)
858{
859 return UTF;
860}
861
862/*
863=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
864
865Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
866at least I<len> octets (including terminating NUL). Returns a
867pointer to the reallocated buffer. This is necessary before making
868any direct modification of the buffer that would increase its length.
869L</lex_stuff_pvn> provides a more convenient way to insert text into
870the buffer.
871
872Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
873this function updates all of the lexer's variables that point directly
874into the buffer.
875
876=cut
877*/
878
879char *
880Perl_lex_grow_linestr(pTHX_ STRLEN len)
881{
882 SV *linestr;
883 char *buf;
884 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
885 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
886 linestr = PL_parser->linestr;
887 buf = SvPVX(linestr);
888 if (len <= SvLEN(linestr))
889 return buf;
890 bufend_pos = PL_parser->bufend - buf;
891 bufptr_pos = PL_parser->bufptr - buf;
892 oldbufptr_pos = PL_parser->oldbufptr - buf;
893 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
894 linestart_pos = PL_parser->linestart - buf;
895 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
896 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
897 buf = sv_grow(linestr, len);
898 PL_parser->bufend = buf + bufend_pos;
899 PL_parser->bufptr = buf + bufptr_pos;
900 PL_parser->oldbufptr = buf + oldbufptr_pos;
901 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
902 PL_parser->linestart = buf + linestart_pos;
903 if (PL_parser->last_uni)
904 PL_parser->last_uni = buf + last_uni_pos;
905 if (PL_parser->last_lop)
906 PL_parser->last_lop = buf + last_lop_pos;
907 return buf;
908}
909
910/*
83aa740e 911=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
912
913Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
914immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
915reallocating the buffer if necessary. This means that lexing code that
916runs later will see the characters as if they had appeared in the input.
917It is not recommended to do this as part of normal parsing, and most
918uses of this facility run the risk of the inserted characters being
919interpreted in an unintended manner.
920
921The string to be inserted is represented by I<len> octets starting
922at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
923according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
924The characters are recoded for the lexer buffer, according to how the
925buffer is currently being interpreted (L</lex_bufutf8>). If a string
926to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
927function is more convenient.
928
929=cut
930*/
931
932void
83aa740e 933Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 934{
749123ff 935 dVAR;
f0e67a1d
Z
936 char *bufptr;
937 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
938 if (flags & ~(LEX_STUFF_UTF8))
939 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
940 if (UTF) {
941 if (flags & LEX_STUFF_UTF8) {
942 goto plain_copy;
943 } else {
944 STRLEN highhalf = 0;
83aa740e 945 const char *p, *e = pv+len;
f0e67a1d
Z
946 for (p = pv; p != e; p++)
947 highhalf += !!(((U8)*p) & 0x80);
948 if (!highhalf)
949 goto plain_copy;
950 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
951 bufptr = PL_parser->bufptr;
952 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
953 SvCUR_set(PL_parser->linestr,
954 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
955 PL_parser->bufend += len+highhalf;
956 for (p = pv; p != e; p++) {
957 U8 c = (U8)*p;
958 if (c & 0x80) {
959 *bufptr++ = (char)(0xc0 | (c >> 6));
960 *bufptr++ = (char)(0x80 | (c & 0x3f));
961 } else {
962 *bufptr++ = (char)c;
963 }
964 }
965 }
966 } else {
967 if (flags & LEX_STUFF_UTF8) {
968 STRLEN highhalf = 0;
83aa740e 969 const char *p, *e = pv+len;
f0e67a1d
Z
970 for (p = pv; p != e; p++) {
971 U8 c = (U8)*p;
972 if (c >= 0xc4) {
973 Perl_croak(aTHX_ "Lexing code attempted to stuff "
974 "non-Latin-1 character into Latin-1 input");
975 } else if (c >= 0xc2 && p+1 != e &&
976 (((U8)p[1]) & 0xc0) == 0x80) {
977 p++;
978 highhalf++;
979 } else if (c >= 0x80) {
980 /* malformed UTF-8 */
981 ENTER;
982 SAVESPTR(PL_warnhook);
983 PL_warnhook = PERL_WARNHOOK_FATAL;
984 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
985 LEAVE;
986 }
987 }
988 if (!highhalf)
989 goto plain_copy;
990 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
991 bufptr = PL_parser->bufptr;
992 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
993 SvCUR_set(PL_parser->linestr,
994 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
995 PL_parser->bufend += len-highhalf;
996 for (p = pv; p != e; p++) {
997 U8 c = (U8)*p;
998 if (c & 0x80) {
999 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1000 p++;
1001 } else {
1002 *bufptr++ = (char)c;
1003 }
1004 }
1005 } else {
1006 plain_copy:
1007 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008 bufptr = PL_parser->bufptr;
1009 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1010 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1013 }
1014 }
1015}
1016
1017/*
1018=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022reallocating the buffer if necessary. This means that lexing code that
1023runs later will see the characters as if they had appeared in the input.
1024It is not recommended to do this as part of normal parsing, and most
1025uses of this facility run the risk of the inserted characters being
1026interpreted in an unintended manner.
1027
1028The string to be inserted is the string value of I<sv>. The characters
1029are recoded for the lexer buffer, according to how the buffer is currently
1030being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1031not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032need to construct a scalar.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039{
1040 char *pv;
1041 STRLEN len;
1042 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043 if (flags)
1044 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045 pv = SvPV(sv, len);
1046 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047}
1048
1049/*
1050=for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1054This hides the discarded text from any lexing code that runs later,
1055as if the text had never appeared.
1056
1057This is not the normal way to consume lexed text. For that, use
1058L</lex_read_to>.
1059
1060=cut
1061*/
1062
1063void
1064Perl_lex_unstuff(pTHX_ char *ptr)
1065{
1066 char *buf, *bufend;
1067 STRLEN unstuff_len;
1068 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069 buf = PL_parser->bufptr;
1070 if (ptr < buf)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072 if (ptr == buf)
1073 return;
1074 bufend = PL_parser->bufend;
1075 if (ptr > bufend)
1076 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077 unstuff_len = ptr - buf;
1078 Move(ptr, buf, bufend+1-ptr, char);
1079 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080 PL_parser->bufend = bufend - unstuff_len;
1081}
1082
1083/*
1084=for apidoc Amx|void|lex_read_to|char *ptr
1085
1086Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088performing the correct bookkeeping whenever a newline character is passed.
1089This is the normal way to consume lexed text.
1090
1091Interpretation of the buffer's octets can be abstracted out by
1092using the slightly higher-level functions L</lex_peek_unichar> and
1093L</lex_read_unichar>.
1094
1095=cut
1096*/
1097
1098void
1099Perl_lex_read_to(pTHX_ char *ptr)
1100{
1101 char *s;
1102 PERL_ARGS_ASSERT_LEX_READ_TO;
1103 s = PL_parser->bufptr;
1104 if (ptr < s || ptr > PL_parser->bufend)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106 for (; s != ptr; s++)
1107 if (*s == '\n') {
1108 CopLINE_inc(PL_curcop);
1109 PL_parser->linestart = s+1;
1110 }
1111 PL_parser->bufptr = ptr;
1112}
1113
1114/*
1115=for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118up to I<ptr>. The remaining content of the buffer will be moved, and
1119all pointers into the buffer updated appropriately. I<ptr> must not
1120be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121it is not permitted to discard text that has yet to be lexed.
1122
1123Normally it is not necessarily to do this directly, because it suffices to
1124use the implicit discarding behaviour of L</lex_next_chunk> and things
1125based on it. However, if a token stretches across multiple lines,
1f317c95 1126and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1127that purpose, then after completion of the token it would be wise to
1128explicitly discard the now-unneeded earlier lines, to avoid future
1129multi-line tokens growing the buffer without bound.
1130
1131=cut
1132*/
1133
1134void
1135Perl_lex_discard_to(pTHX_ char *ptr)
1136{
1137 char *buf;
1138 STRLEN discard_len;
1139 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140 buf = SvPVX(PL_parser->linestr);
1141 if (ptr < buf)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143 if (ptr == buf)
1144 return;
1145 if (ptr > PL_parser->bufptr)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147 discard_len = ptr - buf;
1148 if (PL_parser->oldbufptr < ptr)
1149 PL_parser->oldbufptr = ptr;
1150 if (PL_parser->oldoldbufptr < ptr)
1151 PL_parser->oldoldbufptr = ptr;
1152 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153 PL_parser->last_uni = NULL;
1154 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155 PL_parser->last_lop = NULL;
1156 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158 PL_parser->bufend -= discard_len;
1159 PL_parser->bufptr -= discard_len;
1160 PL_parser->oldbufptr -= discard_len;
1161 PL_parser->oldoldbufptr -= discard_len;
1162 if (PL_parser->last_uni)
1163 PL_parser->last_uni -= discard_len;
1164 if (PL_parser->last_lop)
1165 PL_parser->last_lop -= discard_len;
1166}
1167
1168/*
1169=for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171Reads in the next chunk of text to be lexed, appending it to
1172L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1173looked to the end of the current chunk and wants to know more. It is
1174usual, but not necessary, for lexing to have consumed the entirety of
1175the current chunk at this time.
1176
1177If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178chunk (i.e., the current chunk has been entirely consumed), normally the
1179current chunk will be discarded at the same time that the new chunk is
1180read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181will not be discarded. If the current chunk has not been entirely
1182consumed, then it will not be discarded regardless of the flag.
1183
1184Returns true if some new text was added to the buffer, or false if the
1185buffer has reached the end of the input text.
1186
1187=cut
1188*/
1189
1190#define LEX_FAKE_EOF 0x80000000
1191
1192bool
1193Perl_lex_next_chunk(pTHX_ U32 flags)
1194{
1195 SV *linestr;
1196 char *buf;
1197 STRLEN old_bufend_pos, new_bufend_pos;
1198 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1200 bool got_some_for_debugger = 0;
f0e67a1d
Z
1201 bool got_some;
1202 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1204 linestr = PL_parser->linestr;
1205 buf = SvPVX(linestr);
1206 if (!(flags & LEX_KEEP_PREVIOUS) &&
1207 PL_parser->bufptr == PL_parser->bufend) {
1208 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209 linestart_pos = 0;
1210 if (PL_parser->last_uni != PL_parser->bufend)
1211 PL_parser->last_uni = NULL;
1212 if (PL_parser->last_lop != PL_parser->bufend)
1213 PL_parser->last_lop = NULL;
1214 last_uni_pos = last_lop_pos = 0;
1215 *buf = 0;
1216 SvCUR(linestr) = 0;
1217 } else {
1218 old_bufend_pos = PL_parser->bufend - buf;
1219 bufptr_pos = PL_parser->bufptr - buf;
1220 oldbufptr_pos = PL_parser->oldbufptr - buf;
1221 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222 linestart_pos = PL_parser->linestart - buf;
1223 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225 }
1226 if (flags & LEX_FAKE_EOF) {
1227 goto eof;
1228 } else if (!PL_parser->rsfp) {
1229 got_some = 0;
1230 } else if (filter_gets(linestr, old_bufend_pos)) {
1231 got_some = 1;
17cc9359 1232 got_some_for_debugger = 1;
f0e67a1d 1233 } else {
580561a3
Z
1234 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1235 sv_setpvs(linestr, "");
f0e67a1d
Z
1236 eof:
1237 /* End of real input. Close filehandle (unless it was STDIN),
1238 * then add implicit termination.
1239 */
1240 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241 PerlIO_clearerr(PL_parser->rsfp);
1242 else if (PL_parser->rsfp)
1243 (void)PerlIO_close(PL_parser->rsfp);
1244 PL_parser->rsfp = NULL;
1245 PL_doextract = FALSE;
1246#ifdef PERL_MAD
1247 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248 PL_faketokens = 1;
1249#endif
1250 if (!PL_in_eval && PL_minus_p) {
1251 sv_catpvs(linestr,
1252 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253 PL_minus_n = PL_minus_p = 0;
1254 } else if (!PL_in_eval && PL_minus_n) {
1255 sv_catpvs(linestr, /*{*/";}");
1256 PL_minus_n = 0;
1257 } else
1258 sv_catpvs(linestr, ";");
1259 got_some = 1;
1260 }
1261 buf = SvPVX(linestr);
1262 new_bufend_pos = SvCUR(linestr);
1263 PL_parser->bufend = buf + new_bufend_pos;
1264 PL_parser->bufptr = buf + bufptr_pos;
1265 PL_parser->oldbufptr = buf + oldbufptr_pos;
1266 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267 PL_parser->linestart = buf + linestart_pos;
1268 if (PL_parser->last_uni)
1269 PL_parser->last_uni = buf + last_uni_pos;
1270 if (PL_parser->last_lop)
1271 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1272 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1273 PL_curstash != PL_debstash) {
1274 /* debugger active and we're not compiling the debugger code,
1275 * so store the line into the debugger's array of lines
1276 */
1277 update_debugger_info(NULL, buf+old_bufend_pos,
1278 new_bufend_pos-old_bufend_pos);
1279 }
1280 return got_some;
1281}
1282
1283/*
1284=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286Looks ahead one (Unicode) character in the text currently being lexed.
1287Returns the codepoint (unsigned integer value) of the next character,
1288or -1 if lexing has reached the end of the input text. To consume the
1289peeked character, use L</lex_read_unichar>.
1290
1291If the next character is in (or extends into) the next chunk of input
1292text, the next chunk will be read in. Normally the current chunk will be
1293discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294then the current chunk will not be discarded.
1295
1296If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297is encountered, an exception is generated.
1298
1299=cut
1300*/
1301
1302I32
1303Perl_lex_peek_unichar(pTHX_ U32 flags)
1304{
749123ff 1305 dVAR;
f0e67a1d
Z
1306 char *s, *bufend;
1307 if (flags & ~(LEX_KEEP_PREVIOUS))
1308 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1309 s = PL_parser->bufptr;
1310 bufend = PL_parser->bufend;
1311 if (UTF) {
1312 U8 head;
1313 I32 unichar;
1314 STRLEN len, retlen;
1315 if (s == bufend) {
1316 if (!lex_next_chunk(flags))
1317 return -1;
1318 s = PL_parser->bufptr;
1319 bufend = PL_parser->bufend;
1320 }
1321 head = (U8)*s;
1322 if (!(head & 0x80))
1323 return head;
1324 if (head & 0x40) {
1325 len = PL_utf8skip[head];
1326 while ((STRLEN)(bufend-s) < len) {
1327 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1328 break;
1329 s = PL_parser->bufptr;
1330 bufend = PL_parser->bufend;
1331 }
1332 }
1333 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1334 if (retlen == (STRLEN)-1) {
1335 /* malformed UTF-8 */
1336 ENTER;
1337 SAVESPTR(PL_warnhook);
1338 PL_warnhook = PERL_WARNHOOK_FATAL;
1339 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1340 LEAVE;
1341 }
1342 return unichar;
1343 } else {
1344 if (s == bufend) {
1345 if (!lex_next_chunk(flags))
1346 return -1;
1347 s = PL_parser->bufptr;
1348 }
1349 return (U8)*s;
1350 }
1351}
1352
1353/*
1354=for apidoc Amx|I32|lex_read_unichar|U32 flags
1355
1356Reads the next (Unicode) character in the text currently being lexed.
1357Returns the codepoint (unsigned integer value) of the character read,
1358and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1359if lexing has reached the end of the input text. To non-destructively
1360examine the next character, use L</lex_peek_unichar> instead.
1361
1362If the next character is in (or extends into) the next chunk of input
1363text, the next chunk will be read in. Normally the current chunk will be
1364discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1365then the current chunk will not be discarded.
1366
1367If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1368is encountered, an exception is generated.
1369
1370=cut
1371*/
1372
1373I32
1374Perl_lex_read_unichar(pTHX_ U32 flags)
1375{
1376 I32 c;
1377 if (flags & ~(LEX_KEEP_PREVIOUS))
1378 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1379 c = lex_peek_unichar(flags);
1380 if (c != -1) {
1381 if (c == '\n')
1382 CopLINE_inc(PL_curcop);
1383 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1384 }
1385 return c;
1386}
1387
1388/*
1389=for apidoc Amx|void|lex_read_space|U32 flags
1390
1391Reads optional spaces, in Perl style, in the text currently being
1392lexed. The spaces may include ordinary whitespace characters and
1393Perl-style comments. C<#line> directives are processed if encountered.
1394L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1395at a non-space character (or the end of the input text).
1396
1397If spaces extend into the next chunk of input text, the next chunk will
1398be read in. Normally the current chunk will be discarded at the same
1399time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1400chunk will not be discarded.
1401
1402=cut
1403*/
1404
f0998909
Z
1405#define LEX_NO_NEXT_CHUNK 0x80000000
1406
f0e67a1d
Z
1407void
1408Perl_lex_read_space(pTHX_ U32 flags)
1409{
1410 char *s, *bufend;
1411 bool need_incline = 0;
f0998909 1412 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1414#ifdef PERL_MAD
1415 if (PL_skipwhite) {
1416 sv_free(PL_skipwhite);
1417 PL_skipwhite = NULL;
1418 }
1419 if (PL_madskills)
1420 PL_skipwhite = newSVpvs("");
1421#endif /* PERL_MAD */
1422 s = PL_parser->bufptr;
1423 bufend = PL_parser->bufend;
1424 while (1) {
1425 char c = *s;
1426 if (c == '#') {
1427 do {
1428 c = *++s;
1429 } while (!(c == '\n' || (c == 0 && s == bufend)));
1430 } else if (c == '\n') {
1431 s++;
1432 PL_parser->linestart = s;
1433 if (s == bufend)
1434 need_incline = 1;
1435 else
1436 incline(s);
1437 } else if (isSPACE(c)) {
1438 s++;
1439 } else if (c == 0 && s == bufend) {
1440 bool got_more;
1441#ifdef PERL_MAD
1442 if (PL_madskills)
1443 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1444#endif /* PERL_MAD */
f0998909
Z
1445 if (flags & LEX_NO_NEXT_CHUNK)
1446 break;
f0e67a1d
Z
1447 PL_parser->bufptr = s;
1448 CopLINE_inc(PL_curcop);
1449 got_more = lex_next_chunk(flags);
1450 CopLINE_dec(PL_curcop);
1451 s = PL_parser->bufptr;
1452 bufend = PL_parser->bufend;
1453 if (!got_more)
1454 break;
1455 if (need_incline && PL_parser->rsfp) {
1456 incline(s);
1457 need_incline = 0;
1458 }
1459 } else {
1460 break;
1461 }
1462 }
1463#ifdef PERL_MAD
1464 if (PL_madskills)
1465 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1466#endif /* PERL_MAD */
1467 PL_parser->bufptr = s;
1468}
1469
1470/*
ffb4593c
NT
1471 * S_incline
1472 * This subroutine has nothing to do with tilting, whether at windmills
1473 * or pinball tables. Its name is short for "increment line". It
57843af0 1474 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1475 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1476 * # line 500 "foo.pm"
1477 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1478 */
1479
76e3520e 1480STATIC void
d9095cec 1481S_incline(pTHX_ const char *s)
463ee0b2 1482{
97aff369 1483 dVAR;
d9095cec
NC
1484 const char *t;
1485 const char *n;
1486 const char *e;
463ee0b2 1487
7918f24d
NC
1488 PERL_ARGS_ASSERT_INCLINE;
1489
57843af0 1490 CopLINE_inc(PL_curcop);
463ee0b2
LW
1491 if (*s++ != '#')
1492 return;
d4c19fe8
AL
1493 while (SPACE_OR_TAB(*s))
1494 s++;
73659bf1
GS
1495 if (strnEQ(s, "line", 4))
1496 s += 4;
1497 else
1498 return;
084592ab 1499 if (SPACE_OR_TAB(*s))
73659bf1 1500 s++;
4e553d73 1501 else
73659bf1 1502 return;
d4c19fe8
AL
1503 while (SPACE_OR_TAB(*s))
1504 s++;
463ee0b2
LW
1505 if (!isDIGIT(*s))
1506 return;
d4c19fe8 1507
463ee0b2
LW
1508 n = s;
1509 while (isDIGIT(*s))
1510 s++;
07714eb4 1511 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1512 return;
bf4acbe4 1513 while (SPACE_OR_TAB(*s))
463ee0b2 1514 s++;
73659bf1 1515 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1516 s++;
73659bf1
GS
1517 e = t + 1;
1518 }
463ee0b2 1519 else {
c35e046a
AL
1520 t = s;
1521 while (!isSPACE(*t))
1522 t++;
73659bf1 1523 e = t;
463ee0b2 1524 }
bf4acbe4 1525 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1526 e++;
1527 if (*e != '\n' && *e != '\0')
1528 return; /* false alarm */
1529
f4dd75d9 1530 if (t - s > 0) {
d9095cec 1531 const STRLEN len = t - s;
8a5ee598 1532#ifndef USE_ITHREADS
19bad673
NC
1533 SV *const temp_sv = CopFILESV(PL_curcop);
1534 const char *cf;
1535 STRLEN tmplen;
1536
1537 if (temp_sv) {
1538 cf = SvPVX(temp_sv);
1539 tmplen = SvCUR(temp_sv);
1540 } else {
1541 cf = NULL;
1542 tmplen = 0;
1543 }
1544
42d9b98d 1545 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1546 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1547 * to *{"::_<newfilename"} */
44867030
NC
1548 /* However, the long form of evals is only turned on by the
1549 debugger - usually they're "(eval %lu)" */
1550 char smallbuf[128];
1551 char *tmpbuf;
1552 GV **gvp;
d9095cec 1553 STRLEN tmplen2 = len;
798b63bc 1554 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1555 tmpbuf = smallbuf;
1556 else
2ae0db35 1557 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1558 tmpbuf[0] = '_';
1559 tmpbuf[1] = '<';
2ae0db35 1560 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1561 tmplen += 2;
8a5ee598
RGS
1562 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1563 if (gvp) {
44867030
NC
1564 char *tmpbuf2;
1565 GV *gv2;
1566
1567 if (tmplen2 + 2 <= sizeof smallbuf)
1568 tmpbuf2 = smallbuf;
1569 else
1570 Newx(tmpbuf2, tmplen2 + 2, char);
1571
1572 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1573 /* Either they malloc'd it, or we malloc'd it,
1574 so no prefix is present in ours. */
1575 tmpbuf2[0] = '_';
1576 tmpbuf2[1] = '<';
1577 }
1578
1579 memcpy(tmpbuf2 + 2, s, tmplen2);
1580 tmplen2 += 2;
1581
8a5ee598 1582 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1583 if (!isGV(gv2)) {
8a5ee598 1584 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1585 /* adjust ${"::_<newfilename"} to store the new file name */
1586 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1587 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1588 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1589 }
44867030
NC
1590
1591 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1592 }
e66cf94c 1593 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1594 }
8a5ee598 1595#endif
05ec9bb3 1596 CopFILE_free(PL_curcop);
d9095cec 1597 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1598 }
57843af0 1599 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1600}
1601
29595ff2 1602#ifdef PERL_MAD
cd81e915 1603/* skip space before PL_thistoken */
29595ff2
NC
1604
1605STATIC char *
1606S_skipspace0(pTHX_ register char *s)
1607{
7918f24d
NC
1608 PERL_ARGS_ASSERT_SKIPSPACE0;
1609
29595ff2
NC
1610 s = skipspace(s);
1611 if (!PL_madskills)
1612 return s;
cd81e915
NC
1613 if (PL_skipwhite) {
1614 if (!PL_thiswhite)
6b29d1f5 1615 PL_thiswhite = newSVpvs("");
cd81e915
NC
1616 sv_catsv(PL_thiswhite, PL_skipwhite);
1617 sv_free(PL_skipwhite);
1618 PL_skipwhite = 0;
1619 }
1620 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1621 return s;
1622}
1623
cd81e915 1624/* skip space after PL_thistoken */
29595ff2
NC
1625
1626STATIC char *
1627S_skipspace1(pTHX_ register char *s)
1628{
d4c19fe8 1629 const char *start = s;
29595ff2
NC
1630 I32 startoff = start - SvPVX(PL_linestr);
1631
7918f24d
NC
1632 PERL_ARGS_ASSERT_SKIPSPACE1;
1633
29595ff2
NC
1634 s = skipspace(s);
1635 if (!PL_madskills)
1636 return s;
1637 start = SvPVX(PL_linestr) + startoff;
cd81e915 1638 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1639 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1640 PL_thistoken = newSVpvn(tstart, start - tstart);
1641 }
1642 PL_realtokenstart = -1;
1643 if (PL_skipwhite) {
1644 if (!PL_nextwhite)
6b29d1f5 1645 PL_nextwhite = newSVpvs("");
cd81e915
NC
1646 sv_catsv(PL_nextwhite, PL_skipwhite);
1647 sv_free(PL_skipwhite);
1648 PL_skipwhite = 0;
29595ff2
NC
1649 }
1650 return s;
1651}
1652
1653STATIC char *
1654S_skipspace2(pTHX_ register char *s, SV **svp)
1655{
c35e046a
AL
1656 char *start;
1657 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1658 const I32 startoff = s - SvPVX(PL_linestr);
1659
7918f24d
NC
1660 PERL_ARGS_ASSERT_SKIPSPACE2;
1661
29595ff2
NC
1662 s = skipspace(s);
1663 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1664 if (!PL_madskills || !svp)
1665 return s;
1666 start = SvPVX(PL_linestr) + startoff;
cd81e915 1667 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1668 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1669 PL_thistoken = newSVpvn(tstart, start - tstart);
1670 PL_realtokenstart = -1;
29595ff2 1671 }
cd81e915 1672 if (PL_skipwhite) {
29595ff2 1673 if (!*svp)
6b29d1f5 1674 *svp = newSVpvs("");
cd81e915
NC
1675 sv_setsv(*svp, PL_skipwhite);
1676 sv_free(PL_skipwhite);
1677 PL_skipwhite = 0;
29595ff2
NC
1678 }
1679
1680 return s;
1681}
1682#endif
1683
80a702cd 1684STATIC void
15f169a1 1685S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1686{
1687 AV *av = CopFILEAVx(PL_curcop);
1688 if (av) {
b9f83d2f 1689 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1690 if (orig_sv)
1691 sv_setsv(sv, orig_sv);
1692 else
1693 sv_setpvn(sv, buf, len);
80a702cd
RGS
1694 (void)SvIOK_on(sv);
1695 SvIV_set(sv, 0);
1696 av_store(av, (I32)CopLINE(PL_curcop), sv);
1697 }
1698}
1699
ffb4593c
NT
1700/*
1701 * S_skipspace
1702 * Called to gobble the appropriate amount and type of whitespace.
1703 * Skips comments as well.
1704 */
1705
76e3520e 1706STATIC char *
cea2e8a9 1707S_skipspace(pTHX_ register char *s)
a687059c 1708{
5db06880 1709#ifdef PERL_MAD
f0e67a1d
Z
1710 char *start = s;
1711#endif /* PERL_MAD */
7918f24d 1712 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1713#ifdef PERL_MAD
cd81e915
NC
1714 if (PL_skipwhite) {
1715 sv_free(PL_skipwhite);
f0e67a1d 1716 PL_skipwhite = NULL;
5db06880 1717 }
f0e67a1d 1718#endif /* PERL_MAD */
3280af22 1719 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1720 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1721 s++;
f0e67a1d
Z
1722 } else {
1723 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1724 PL_bufptr = s;
f0998909
Z
1725 lex_read_space(LEX_KEEP_PREVIOUS |
1726 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1727 LEX_NO_NEXT_CHUNK : 0));
3280af22 1728 s = PL_bufptr;
f0e67a1d
Z
1729 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1730 if (PL_linestart > PL_bufptr)
1731 PL_bufptr = PL_linestart;
1732 return s;
463ee0b2 1733 }
5db06880 1734#ifdef PERL_MAD
f0e67a1d
Z
1735 if (PL_madskills)
1736 PL_skipwhite = newSVpvn(start, s-start);
1737#endif /* PERL_MAD */
5db06880 1738 return s;
a687059c 1739}
378cc40b 1740
ffb4593c
NT
1741/*
1742 * S_check_uni
1743 * Check the unary operators to ensure there's no ambiguity in how they're
1744 * used. An ambiguous piece of code would be:
1745 * rand + 5
1746 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1747 * the +5 is its argument.
1748 */
1749
76e3520e 1750STATIC void
cea2e8a9 1751S_check_uni(pTHX)
ba106d47 1752{
97aff369 1753 dVAR;
d4c19fe8
AL
1754 const char *s;
1755 const char *t;
2f3197b3 1756
3280af22 1757 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1758 return;
3280af22
NIS
1759 while (isSPACE(*PL_last_uni))
1760 PL_last_uni++;
c35e046a
AL
1761 s = PL_last_uni;
1762 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1763 s++;
3280af22 1764 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1765 return;
6136c704 1766
9b387841
NC
1767 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1768 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1769 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1770}
1771
ffb4593c
NT
1772/*
1773 * LOP : macro to build a list operator. Its behaviour has been replaced
1774 * with a subroutine, S_lop() for which LOP is just another name.
1775 */
1776
a0d0e21e
LW
1777#define LOP(f,x) return lop(f,x,s)
1778
ffb4593c
NT
1779/*
1780 * S_lop
1781 * Build a list operator (or something that might be one). The rules:
1782 * - if we have a next token, then it's a list operator [why?]
1783 * - if the next thing is an opening paren, then it's a function
1784 * - else it's a list operator
1785 */
1786
76e3520e 1787STATIC I32
a0be28da 1788S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1789{
97aff369 1790 dVAR;
7918f24d
NC
1791
1792 PERL_ARGS_ASSERT_LOP;
1793
6154021b 1794 pl_yylval.ival = f;
35c8bce7 1795 CLINE;
3280af22
NIS
1796 PL_expect = x;
1797 PL_bufptr = s;
1798 PL_last_lop = PL_oldbufptr;
eb160463 1799 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1800#ifdef PERL_MAD
1801 if (PL_lasttoke)
1802 return REPORT(LSTOP);
1803#else
3280af22 1804 if (PL_nexttoke)
bbf60fe6 1805 return REPORT(LSTOP);
5db06880 1806#endif
79072805 1807 if (*s == '(')
bbf60fe6 1808 return REPORT(FUNC);
29595ff2 1809 s = PEEKSPACE(s);
79072805 1810 if (*s == '(')
bbf60fe6 1811 return REPORT(FUNC);
79072805 1812 else
bbf60fe6 1813 return REPORT(LSTOP);
79072805
LW
1814}
1815
5db06880
NC
1816#ifdef PERL_MAD
1817 /*
1818 * S_start_force
1819 * Sets up for an eventual force_next(). start_force(0) basically does
1820 * an unshift, while start_force(-1) does a push. yylex removes items
1821 * on the "pop" end.
1822 */
1823
1824STATIC void
1825S_start_force(pTHX_ int where)
1826{
1827 int i;
1828
cd81e915 1829 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1830 where = PL_lasttoke;
cd81e915
NC
1831 assert(PL_curforce < 0 || PL_curforce == where);
1832 if (PL_curforce != where) {
5db06880
NC
1833 for (i = PL_lasttoke; i > where; --i) {
1834 PL_nexttoke[i] = PL_nexttoke[i-1];
1835 }
1836 PL_lasttoke++;
1837 }
cd81e915 1838 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1839 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1840 PL_curforce = where;
1841 if (PL_nextwhite) {
5db06880 1842 if (PL_madskills)
6b29d1f5 1843 curmad('^', newSVpvs(""));
cd81e915 1844 CURMAD('_', PL_nextwhite);
5db06880
NC
1845 }
1846}
1847
1848STATIC void
1849S_curmad(pTHX_ char slot, SV *sv)
1850{
1851 MADPROP **where;
1852
1853 if (!sv)
1854 return;
cd81e915
NC
1855 if (PL_curforce < 0)
1856 where = &PL_thismad;
5db06880 1857 else
cd81e915 1858 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1859
cd81e915 1860 if (PL_faketokens)
76f68e9b 1861 sv_setpvs(sv, "");
5db06880
NC
1862 else {
1863 if (!IN_BYTES) {
1864 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1865 SvUTF8_on(sv);
1866 else if (PL_encoding) {
1867 sv_recode_to_utf8(sv, PL_encoding);
1868 }
1869 }
1870 }
1871
1872 /* keep a slot open for the head of the list? */
1873 if (slot != '_' && *where && (*where)->mad_key == '^') {
1874 (*where)->mad_key = slot;
daba3364 1875 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1876 (*where)->mad_val = (void*)sv;
1877 }
1878 else
1879 addmad(newMADsv(slot, sv), where, 0);
1880}
1881#else
b3f24c00
MHM
1882# define start_force(where) NOOP
1883# define curmad(slot, sv) NOOP
5db06880
NC
1884#endif
1885
ffb4593c
NT
1886/*
1887 * S_force_next
9cbb5ea2 1888 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1889 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1890 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1891 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1892 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1893 */
1894
4e553d73 1895STATIC void
cea2e8a9 1896S_force_next(pTHX_ I32 type)
79072805 1897{
97aff369 1898 dVAR;
704d4215
GG
1899#ifdef DEBUGGING
1900 if (DEBUG_T_TEST) {
1901 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1902 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1903 }
1904#endif
5db06880 1905#ifdef PERL_MAD
cd81e915 1906 if (PL_curforce < 0)
5db06880 1907 start_force(PL_lasttoke);
cd81e915 1908 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1909 if (PL_lex_state != LEX_KNOWNEXT)
1910 PL_lex_defer = PL_lex_state;
1911 PL_lex_state = LEX_KNOWNEXT;
1912 PL_lex_expect = PL_expect;
cd81e915 1913 PL_curforce = -1;
5db06880 1914#else
3280af22
NIS
1915 PL_nexttype[PL_nexttoke] = type;
1916 PL_nexttoke++;
1917 if (PL_lex_state != LEX_KNOWNEXT) {
1918 PL_lex_defer = PL_lex_state;
1919 PL_lex_expect = PL_expect;
1920 PL_lex_state = LEX_KNOWNEXT;
79072805 1921 }
5db06880 1922#endif
79072805
LW
1923}
1924
28ac2b49
Z
1925void
1926Perl_yyunlex(pTHX)
1927{
1928 if (PL_parser->yychar != YYEMPTY) {
1929 start_force(-1);
1930 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1931 force_next(PL_parser->yychar);
1932 PL_parser->yychar = YYEMPTY;
1933 }
1934}
1935
d0a148a6 1936STATIC SV *
15f169a1 1937S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1938{
97aff369 1939 dVAR;
740cce10 1940 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1941 !IN_BYTES
1942 && UTF
1943 && !is_ascii_string((const U8*)start, len)
740cce10 1944 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1945 return sv;
1946}
1947
ffb4593c
NT
1948/*
1949 * S_force_word
1950 * When the lexer knows the next thing is a word (for instance, it has
1951 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1952 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1953 * lookahead.
ffb4593c
NT
1954 *
1955 * Arguments:
b1b65b59 1956 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1957 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1958 * int check_keyword : if true, Perl checks to make sure the word isn't
1959 * a keyword (do this if the word is a label, e.g. goto FOO)
1960 * int allow_pack : if true, : characters will also be allowed (require,
1961 * use, etc. do this)
9cbb5ea2 1962 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1963 */
1964
76e3520e 1965STATIC char *
cea2e8a9 1966S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1967{
97aff369 1968 dVAR;
463ee0b2
LW
1969 register char *s;
1970 STRLEN len;
4e553d73 1971
7918f24d
NC
1972 PERL_ARGS_ASSERT_FORCE_WORD;
1973
29595ff2 1974 start = SKIPSPACE1(start);
463ee0b2 1975 s = start;
7e2040f0 1976 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1977 (allow_pack && *s == ':') ||
15f0808c 1978 (allow_initial_tick && *s == '\'') )
a0d0e21e 1979 {
3280af22 1980 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1981 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1982 return start;
cd81e915 1983 start_force(PL_curforce);
5db06880
NC
1984 if (PL_madskills)
1985 curmad('X', newSVpvn(start,s-start));
463ee0b2 1986 if (token == METHOD) {
29595ff2 1987 s = SKIPSPACE1(s);
463ee0b2 1988 if (*s == '(')
3280af22 1989 PL_expect = XTERM;
463ee0b2 1990 else {
3280af22 1991 PL_expect = XOPERATOR;
463ee0b2 1992 }
79072805 1993 }
e74e6b3d 1994 if (PL_madskills)
63575281 1995 curmad('g', newSVpvs( "forced" ));
9ded7720 1996 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1997 = (OP*)newSVOP(OP_CONST,0,
1998 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1999 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2000 force_next(token);
2001 }
2002 return s;
2003}
2004
ffb4593c
NT
2005/*
2006 * S_force_ident
9cbb5ea2 2007 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2008 * text only contains the "foo" portion. The first argument is a pointer
2009 * to the "foo", and the second argument is the type symbol to prefix.
2010 * Forces the next token to be a "WORD".
9cbb5ea2 2011 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2012 */
2013
76e3520e 2014STATIC void
bfed75c6 2015S_force_ident(pTHX_ register const char *s, int kind)
79072805 2016{
97aff369 2017 dVAR;
7918f24d
NC
2018
2019 PERL_ARGS_ASSERT_FORCE_IDENT;
2020
c35e046a 2021 if (*s) {
90e5519e
NC
2022 const STRLEN len = strlen(s);
2023 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2024 start_force(PL_curforce);
9ded7720 2025 NEXTVAL_NEXTTOKE.opval = o;
79072805 2026 force_next(WORD);
748a9306 2027 if (kind) {
11343788 2028 o->op_private = OPpCONST_ENTERED;
55497cff 2029 /* XXX see note in pp_entereval() for why we forgo typo
2030 warnings if the symbol must be introduced in an eval.
2031 GSAR 96-10-12 */
90e5519e
NC
2032 gv_fetchpvn_flags(s, len,
2033 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2034 : GV_ADD,
2035 kind == '$' ? SVt_PV :
2036 kind == '@' ? SVt_PVAV :
2037 kind == '%' ? SVt_PVHV :
a0d0e21e 2038 SVt_PVGV
90e5519e 2039 );
748a9306 2040 }
79072805
LW
2041 }
2042}
2043
1571675a
GS
2044NV
2045Perl_str_to_version(pTHX_ SV *sv)
2046{
2047 NV retval = 0.0;
2048 NV nshift = 1.0;
2049 STRLEN len;
cfd0369c 2050 const char *start = SvPV_const(sv,len);
9d4ba2ae 2051 const char * const end = start + len;
504618e9 2052 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2053
2054 PERL_ARGS_ASSERT_STR_TO_VERSION;
2055
1571675a 2056 while (start < end) {
ba210ebe 2057 STRLEN skip;
1571675a
GS
2058 UV n;
2059 if (utf)
9041c2e3 2060 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2061 else {
2062 n = *(U8*)start;
2063 skip = 1;
2064 }
2065 retval += ((NV)n)/nshift;
2066 start += skip;
2067 nshift *= 1000;
2068 }
2069 return retval;
2070}
2071
4e553d73 2072/*
ffb4593c
NT
2073 * S_force_version
2074 * Forces the next token to be a version number.
e759cc13
RGS
2075 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2076 * and if "guessing" is TRUE, then no new token is created (and the caller
2077 * must use an alternative parsing method).
ffb4593c
NT
2078 */
2079
76e3520e 2080STATIC char *
e759cc13 2081S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2082{
97aff369 2083 dVAR;
5f66b61c 2084 OP *version = NULL;
44dcb63b 2085 char *d;
5db06880
NC
2086#ifdef PERL_MAD
2087 I32 startoff = s - SvPVX(PL_linestr);
2088#endif
89bfa8cd 2089
7918f24d
NC
2090 PERL_ARGS_ASSERT_FORCE_VERSION;
2091
29595ff2 2092 s = SKIPSPACE1(s);
89bfa8cd 2093
44dcb63b 2094 d = s;
dd629d5b 2095 if (*d == 'v')
44dcb63b 2096 d++;
44dcb63b 2097 if (isDIGIT(*d)) {
e759cc13
RGS
2098 while (isDIGIT(*d) || *d == '_' || *d == '.')
2099 d++;
5db06880
NC
2100#ifdef PERL_MAD
2101 if (PL_madskills) {
cd81e915 2102 start_force(PL_curforce);
5db06880
NC
2103 curmad('X', newSVpvn(s,d-s));
2104 }
2105#endif
4e4da3ac 2106 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2107 SV *ver;
8d08d9ba
DG
2108#ifdef USE_LOCALE_NUMERIC
2109 char *loc = setlocale(LC_NUMERIC, "C");
2110#endif
6154021b 2111 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2112#ifdef USE_LOCALE_NUMERIC
2113 setlocale(LC_NUMERIC, loc);
2114#endif
6154021b 2115 version = pl_yylval.opval;
dd629d5b
GS
2116 ver = cSVOPx(version)->op_sv;
2117 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2118 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2119 SvNV_set(ver, str_to_version(ver));
1571675a 2120 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2121 }
89bfa8cd 2122 }
5db06880
NC
2123 else if (guessing) {
2124#ifdef PERL_MAD
2125 if (PL_madskills) {
cd81e915
NC
2126 sv_free(PL_nextwhite); /* let next token collect whitespace */
2127 PL_nextwhite = 0;
5db06880
NC
2128 s = SvPVX(PL_linestr) + startoff;
2129 }
2130#endif
e759cc13 2131 return s;
5db06880 2132 }
89bfa8cd 2133 }
2134
5db06880
NC
2135#ifdef PERL_MAD
2136 if (PL_madskills && !version) {
cd81e915
NC
2137 sv_free(PL_nextwhite); /* let next token collect whitespace */
2138 PL_nextwhite = 0;
5db06880
NC
2139 s = SvPVX(PL_linestr) + startoff;
2140 }
2141#endif
89bfa8cd 2142 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2143 start_force(PL_curforce);
9ded7720 2144 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2145 force_next(WORD);
89bfa8cd 2146
e759cc13 2147 return s;
89bfa8cd 2148}
2149
ffb4593c 2150/*
91152fc1
DG
2151 * S_force_strict_version
2152 * Forces the next token to be a version number using strict syntax rules.
2153 */
2154
2155STATIC char *
2156S_force_strict_version(pTHX_ char *s)
2157{
2158 dVAR;
2159 OP *version = NULL;
2160#ifdef PERL_MAD
2161 I32 startoff = s - SvPVX(PL_linestr);
2162#endif
2163 const char *errstr = NULL;
2164
2165 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2166
2167 while (isSPACE(*s)) /* leading whitespace */
2168 s++;
2169
2170 if (is_STRICT_VERSION(s,&errstr)) {
2171 SV *ver = newSV(0);
2172 s = (char *)scan_version(s, ver, 0);
2173 version = newSVOP(OP_CONST, 0, ver);
2174 }
4e4da3ac
Z
2175 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2176 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2177 {
91152fc1
DG
2178 PL_bufptr = s;
2179 if (errstr)
2180 yyerror(errstr); /* version required */
2181 return s;
2182 }
2183
2184#ifdef PERL_MAD
2185 if (PL_madskills && !version) {
2186 sv_free(PL_nextwhite); /* let next token collect whitespace */
2187 PL_nextwhite = 0;
2188 s = SvPVX(PL_linestr) + startoff;
2189 }
2190#endif
2191 /* NOTE: The parser sees the package name and the VERSION swapped */
2192 start_force(PL_curforce);
2193 NEXTVAL_NEXTTOKE.opval = version;
2194 force_next(WORD);
2195
2196 return s;
2197}
2198
2199/*
ffb4593c
NT
2200 * S_tokeq
2201 * Tokenize a quoted string passed in as an SV. It finds the next
2202 * chunk, up to end of string or a backslash. It may make a new
2203 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2204 * turns \\ into \.
2205 */
2206
76e3520e 2207STATIC SV *
cea2e8a9 2208S_tokeq(pTHX_ SV *sv)
79072805 2209{
97aff369 2210 dVAR;
79072805
LW
2211 register char *s;
2212 register char *send;
2213 register char *d;
b3ac6de7
IZ
2214 STRLEN len = 0;
2215 SV *pv = sv;
79072805 2216
7918f24d
NC
2217 PERL_ARGS_ASSERT_TOKEQ;
2218
79072805 2219 if (!SvLEN(sv))
b3ac6de7 2220 goto finish;
79072805 2221
a0d0e21e 2222 s = SvPV_force(sv, len);
21a311ee 2223 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2224 goto finish;
463ee0b2 2225 send = s + len;
79072805
LW
2226 while (s < send && *s != '\\')
2227 s++;
2228 if (s == send)
b3ac6de7 2229 goto finish;
79072805 2230 d = s;
be4731d2 2231 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2232 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2233 }
79072805
LW
2234 while (s < send) {
2235 if (*s == '\\') {
a0d0e21e 2236 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2237 s++; /* all that, just for this */
2238 }
2239 *d++ = *s++;
2240 }
2241 *d = '\0';
95a20fc0 2242 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2243 finish:
3280af22 2244 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2245 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2246 return sv;
2247}
2248
ffb4593c
NT
2249/*
2250 * Now come three functions related to double-quote context,
2251 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2252 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2253 * interact with PL_lex_state, and create fake ( ... ) argument lists
2254 * to handle functions and concatenation.
2255 * They assume that whoever calls them will be setting up a fake
2256 * join call, because each subthing puts a ',' after it. This lets
2257 * "lower \luPpEr"
2258 * become
2259 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2260 *
2261 * (I'm not sure whether the spurious commas at the end of lcfirst's
2262 * arguments and join's arguments are created or not).
2263 */
2264
2265/*
2266 * S_sublex_start
6154021b 2267 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2268 *
2269 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2270 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2271 *
2272 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2273 *
2274 * Everything else becomes a FUNC.
2275 *
2276 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2277 * had an OP_CONST or OP_READLINE). This just sets us up for a
2278 * call to S_sublex_push().
2279 */
2280
76e3520e 2281STATIC I32
cea2e8a9 2282S_sublex_start(pTHX)
79072805 2283{
97aff369 2284 dVAR;
6154021b 2285 register const I32 op_type = pl_yylval.ival;
79072805
LW
2286
2287 if (op_type == OP_NULL) {
6154021b 2288 pl_yylval.opval = PL_lex_op;
5f66b61c 2289 PL_lex_op = NULL;
79072805
LW
2290 return THING;
2291 }
2292 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2293 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2294
2295 if (SvTYPE(sv) == SVt_PVIV) {
2296 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2297 STRLEN len;
96a5add6 2298 const char * const p = SvPV_const(sv, len);
740cce10 2299 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2300 SvREFCNT_dec(sv);
2301 sv = nsv;
4e553d73 2302 }
6154021b 2303 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2304 PL_lex_stuff = NULL;
6f33ba73
RGS
2305 /* Allow <FH> // "foo" */
2306 if (op_type == OP_READLINE)
2307 PL_expect = XTERMORDORDOR;
79072805
LW
2308 return THING;
2309 }
e3f73d4e
RGS
2310 else if (op_type == OP_BACKTICK && PL_lex_op) {
2311 /* readpipe() vas overriden */
2312 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2313 pl_yylval.opval = PL_lex_op;
9b201d7d 2314 PL_lex_op = NULL;
e3f73d4e
RGS
2315 PL_lex_stuff = NULL;
2316 return THING;
2317 }
79072805 2318
3280af22 2319 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2320 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2321 PL_sublex_info.sub_op = PL_lex_op;
2322 PL_lex_state = LEX_INTERPPUSH;
55497cff 2323
3280af22
NIS
2324 PL_expect = XTERM;
2325 if (PL_lex_op) {
6154021b 2326 pl_yylval.opval = PL_lex_op;
5f66b61c 2327 PL_lex_op = NULL;
55497cff 2328 return PMFUNC;
2329 }
2330 else
2331 return FUNC;
2332}
2333
ffb4593c
NT
2334/*
2335 * S_sublex_push
2336 * Create a new scope to save the lexing state. The scope will be
2337 * ended in S_sublex_done. Returns a '(', starting the function arguments
2338 * to the uc, lc, etc. found before.
2339 * Sets PL_lex_state to LEX_INTERPCONCAT.
2340 */
2341
76e3520e 2342STATIC I32
cea2e8a9 2343S_sublex_push(pTHX)
55497cff 2344{
27da23d5 2345 dVAR;
f46d017c 2346 ENTER;
55497cff 2347
3280af22 2348 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2349 SAVEBOOL(PL_lex_dojoin);
3280af22 2350 SAVEI32(PL_lex_brackets);
3280af22
NIS
2351 SAVEI32(PL_lex_casemods);
2352 SAVEI32(PL_lex_starts);
651b5b28 2353 SAVEI8(PL_lex_state);
7766f137 2354 SAVEVPTR(PL_lex_inpat);
98246f1e 2355 SAVEI16(PL_lex_inwhat);
57843af0 2356 SAVECOPLINE(PL_curcop);
3280af22 2357 SAVEPPTR(PL_bufptr);
8452ff4b 2358 SAVEPPTR(PL_bufend);
3280af22
NIS
2359 SAVEPPTR(PL_oldbufptr);
2360 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2361 SAVEPPTR(PL_last_lop);
2362 SAVEPPTR(PL_last_uni);
3280af22
NIS
2363 SAVEPPTR(PL_linestart);
2364 SAVESPTR(PL_linestr);
8edd5f42
RGS
2365 SAVEGENERICPV(PL_lex_brackstack);
2366 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2367
2368 PL_linestr = PL_lex_stuff;
a0714e2c 2369 PL_lex_stuff = NULL;
3280af22 2370
9cbb5ea2
GS
2371 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2372 = SvPVX(PL_linestr);
3280af22 2373 PL_bufend += SvCUR(PL_linestr);
bd61b366 2374 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2375 SAVEFREESV(PL_linestr);
2376
2377 PL_lex_dojoin = FALSE;
2378 PL_lex_brackets = 0;
a02a5408
JC
2379 Newx(PL_lex_brackstack, 120, char);
2380 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2381 PL_lex_casemods = 0;
2382 *PL_lex_casestack = '\0';
2383 PL_lex_starts = 0;
2384 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2385 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2386
2387 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2388 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2389 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2390 else
5f66b61c 2391 PL_lex_inpat = NULL;
79072805 2392
55497cff 2393 return '(';
79072805
LW
2394}
2395
ffb4593c
NT
2396/*
2397 * S_sublex_done
2398 * Restores lexer state after a S_sublex_push.
2399 */
2400
76e3520e 2401STATIC I32
cea2e8a9 2402S_sublex_done(pTHX)
79072805 2403{
27da23d5 2404 dVAR;
3280af22 2405 if (!PL_lex_starts++) {
396482e1 2406 SV * const sv = newSVpvs("");
9aa983d2
JH
2407 if (SvUTF8(PL_linestr))
2408 SvUTF8_on(sv);
3280af22 2409 PL_expect = XOPERATOR;
6154021b 2410 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2411 return THING;
2412 }
2413
3280af22
NIS
2414 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2415 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2416 return yylex();
79072805
LW
2417 }
2418
ffb4593c 2419 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2420 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2421 PL_linestr = PL_lex_repl;
2422 PL_lex_inpat = 0;
2423 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2424 PL_bufend += SvCUR(PL_linestr);
bd61b366 2425 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2426 SAVEFREESV(PL_linestr);
2427 PL_lex_dojoin = FALSE;
2428 PL_lex_brackets = 0;
3280af22
NIS
2429 PL_lex_casemods = 0;
2430 *PL_lex_casestack = '\0';
2431 PL_lex_starts = 0;
25da4f38 2432 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2433 PL_lex_state = LEX_INTERPNORMAL;
2434 PL_lex_starts++;
e9fa98b2
HS
2435 /* we don't clear PL_lex_repl here, so that we can check later
2436 whether this is an evalled subst; that means we rely on the
2437 logic to ensure sublex_done() is called again only via the
2438 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2439 }
e9fa98b2 2440 else {
3280af22 2441 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2442 PL_lex_repl = NULL;
e9fa98b2 2443 }
79072805 2444 return ',';
ffed7fef
LW
2445 }
2446 else {
5db06880
NC
2447#ifdef PERL_MAD
2448 if (PL_madskills) {
cd81e915
NC
2449 if (PL_thiswhite) {
2450 if (!PL_endwhite)
6b29d1f5 2451 PL_endwhite = newSVpvs("");
cd81e915
NC
2452 sv_catsv(PL_endwhite, PL_thiswhite);
2453 PL_thiswhite = 0;
2454 }
2455 if (PL_thistoken)
76f68e9b 2456 sv_setpvs(PL_thistoken,"");
5db06880 2457 else
cd81e915 2458 PL_realtokenstart = -1;
5db06880
NC
2459 }
2460#endif
f46d017c 2461 LEAVE;
3280af22
NIS
2462 PL_bufend = SvPVX(PL_linestr);
2463 PL_bufend += SvCUR(PL_linestr);
2464 PL_expect = XOPERATOR;
09bef843 2465 PL_sublex_info.sub_inwhat = 0;
79072805 2466 return ')';
ffed7fef
LW
2467 }
2468}
2469
02aa26ce
NT
2470/*
2471 scan_const
2472
2473 Extracts a pattern, double-quoted string, or transliteration. This
2474 is terrifying code.
2475
94def140 2476 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2477 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2478 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2479
94def140
TS
2480 Returns a pointer to the character scanned up to. If this is
2481 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2482 successfully parsed), will leave an OP for the substring scanned
6154021b 2483 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2484 by looking at the next characters herself.
2485
02aa26ce
NT
2486 In patterns:
2487 backslashes:
ff3f963a 2488 constants: \N{NAME} only
02aa26ce
NT
2489 case and quoting: \U \Q \E
2490 stops on @ and $, but not for $ as tail anchor
2491
2492 In transliterations:
2493 characters are VERY literal, except for - not at the start or end
94def140
TS
2494 of the string, which indicates a range. If the range is in bytes,
2495 scan_const expands the range to the full set of intermediate
2496 characters. If the range is in utf8, the hyphen is replaced with
2497 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2498
2499 In double-quoted strings:
2500 backslashes:
2501 double-quoted style: \r and \n
ff3f963a 2502 constants: \x31, etc.
94def140 2503 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2504 case and quoting: \U \Q \E
2505 stops on @ and $
2506
2507 scan_const does *not* construct ops to handle interpolated strings.
2508 It stops processing as soon as it finds an embedded $ or @ variable
2509 and leaves it to the caller to work out what's going on.
2510
94def140
TS
2511 embedded arrays (whether in pattern or not) could be:
2512 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2513
2514 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2515
2516 $ in pattern could be $foo or could be tail anchor. Assumption:
2517 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2518 followed by one of "()| \r\n\t"
02aa26ce
NT
2519
2520 \1 (backreferences) are turned into $1
2521
2522 The structure of the code is
2523 while (there's a character to process) {
94def140
TS
2524 handle transliteration ranges
2525 skip regexp comments /(?#comment)/ and codes /(?{code})/
2526 skip #-initiated comments in //x patterns
2527 check for embedded arrays
02aa26ce
NT
2528 check for embedded scalars
2529 if (backslash) {
94def140 2530 deprecate \1 in substitution replacements
02aa26ce
NT
2531 handle string-changing backslashes \l \U \Q \E, etc.
2532 switch (what was escaped) {
94def140 2533 handle \- in a transliteration (becomes a literal -)
ff3f963a 2534 if a pattern and not \N{, go treat as regular character
94def140
TS
2535 handle \132 (octal characters)
2536 handle \x15 and \x{1234} (hex characters)
ff3f963a 2537 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2538 handle \cV (control characters)
2539 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2540 } (end switch)
77a135fe 2541 continue
02aa26ce 2542 } (end if backslash)
77a135fe 2543 handle regular character
02aa26ce 2544 } (end while character to read)
4e553d73 2545
02aa26ce
NT
2546*/
2547
76e3520e 2548STATIC char *
cea2e8a9 2549S_scan_const(pTHX_ char *start)
79072805 2550{
97aff369 2551 dVAR;
3280af22 2552 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2553 SV *sv = newSV(send - start); /* sv for the constant. See
2554 note below on sizing. */
02aa26ce
NT
2555 register char *s = start; /* start of the constant */
2556 register char *d = SvPVX(sv); /* destination for copies */
2557 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2558 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2559 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2560 I32 this_utf8 = UTF; /* Is the source string assumed
2561 to be UTF8? But, this can
2562 show as true when the source
2563 isn't utf8, as for example
2564 when it is entirely composed
2565 of hex constants */
2566
2567 /* Note on sizing: The scanned constant is placed into sv, which is
2568 * initialized by newSV() assuming one byte of output for every byte of
2569 * input. This routine expects newSV() to allocate an extra byte for a
2570 * trailing NUL, which this routine will append if it gets to the end of
2571 * the input. There may be more bytes of input than output (eg., \N{LATIN
2572 * CAPITAL LETTER A}), or more output than input if the constant ends up
2573 * recoded to utf8, but each time a construct is found that might increase
2574 * the needed size, SvGROW() is called. Its size parameter each time is
2575 * based on the best guess estimate at the time, namely the length used so
2576 * far, plus the length the current construct will occupy, plus room for
2577 * the trailing NUL, plus one byte for every input byte still unscanned */
2578
012bcf8d 2579 UV uv;
4c3a8340
TS
2580#ifdef EBCDIC
2581 UV literal_endpoint = 0;
e294cc5d 2582 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2583#endif
012bcf8d 2584
7918f24d
NC
2585 PERL_ARGS_ASSERT_SCAN_CONST;
2586
2b9d42f0
NIS
2587 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2588 /* If we are doing a trans and we know we want UTF8 set expectation */
2589 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2590 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2591 }
2592
2593
79072805 2594 while (s < send || dorange) {
ff3f963a 2595
02aa26ce 2596 /* get transliterations out of the way (they're most literal) */
3280af22 2597 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2598 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2599 if (dorange) {
1ba5c669
JH
2600 I32 i; /* current expanded character */
2601 I32 min; /* first character in range */
2602 I32 max; /* last character in range */
02aa26ce 2603
e294cc5d
JH
2604#ifdef EBCDIC
2605 UV uvmax = 0;
2606#endif
2607
2608 if (has_utf8
2609#ifdef EBCDIC
2610 && !native_range
2611#endif
2612 ) {
9d4ba2ae 2613 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2614 char *e = d++;
2615 while (e-- > c)
2616 *(e + 1) = *e;
25716404 2617 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2618 /* mark the range as done, and continue */
2619 dorange = FALSE;
2620 didrange = TRUE;
2621 continue;
2622 }
2b9d42f0 2623
95a20fc0 2624 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2625#ifdef EBCDIC
2626 SvGROW(sv,
2627 SvLEN(sv) + (has_utf8 ?
2628 (512 - UTF_CONTINUATION_MARK +
2629 UNISKIP(0x100))
2630 : 256));
2631 /* How many two-byte within 0..255: 128 in UTF-8,
2632 * 96 in UTF-8-mod. */
2633#else
9cbb5ea2 2634 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2635#endif
9cbb5ea2 2636 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2637#ifdef EBCDIC
2638 if (has_utf8) {
2639 int j;
2640 for (j = 0; j <= 1; j++) {
2641 char * const c = (char*)utf8_hop((U8*)d, -1);
2642 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2643 if (j)
2644 min = (U8)uv;
2645 else if (uv < 256)
2646 max = (U8)uv;
2647 else {
2648 max = (U8)0xff; /* only to \xff */
2649 uvmax = uv; /* \x{100} to uvmax */
2650 }
2651 d = c; /* eat endpoint chars */
2652 }
2653 }
2654 else {
2655#endif
2656 d -= 2; /* eat the first char and the - */
2657 min = (U8)*d; /* first char in range */
2658 max = (U8)d[1]; /* last char in range */
2659#ifdef EBCDIC
2660 }
2661#endif
8ada0baa 2662
c2e66d9e 2663 if (min > max) {
01ec43d0 2664 Perl_croak(aTHX_
d1573ac7 2665 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2666 (char)min, (char)max);
c2e66d9e
GS
2667 }
2668
c7f1f016 2669#ifdef EBCDIC
4c3a8340
TS
2670 if (literal_endpoint == 2 &&
2671 ((isLOWER(min) && isLOWER(max)) ||
2672 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2673 if (isLOWER(min)) {
2674 for (i = min; i <= max; i++)
2675 if (isLOWER(i))
db42d148 2676 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2677 } else {
2678 for (i = min; i <= max; i++)
2679 if (isUPPER(i))
db42d148 2680 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2681 }
2682 }
2683 else
2684#endif
2685 for (i = min; i <= max; i++)
e294cc5d
JH
2686#ifdef EBCDIC
2687 if (has_utf8) {
2688 const U8 ch = (U8)NATIVE_TO_UTF(i);
2689 if (UNI_IS_INVARIANT(ch))
2690 *d++ = (U8)i;
2691 else {
2692 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2693 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2694 }
2695 }
2696 else
2697#endif
2698 *d++ = (char)i;
2699
2700#ifdef EBCDIC
2701 if (uvmax) {
2702 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2703 if (uvmax > 0x101)
2704 *d++ = (char)UTF_TO_NATIVE(0xff);
2705 if (uvmax > 0x100)
2706 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2707 }
2708#endif
02aa26ce
NT
2709
2710 /* mark the range as done, and continue */
79072805 2711 dorange = FALSE;
01ec43d0 2712 didrange = TRUE;
4c3a8340
TS
2713#ifdef EBCDIC
2714 literal_endpoint = 0;
2715#endif
79072805 2716 continue;
4e553d73 2717 }
02aa26ce
NT
2718
2719 /* range begins (ignore - as first or last char) */
79072805 2720 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2721 if (didrange) {
1fafa243 2722 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2723 }
e294cc5d
JH
2724 if (has_utf8
2725#ifdef EBCDIC
2726 && !native_range
2727#endif
2728 ) {
25716404 2729 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2730 s++;
2731 continue;
2732 }
79072805
LW
2733 dorange = TRUE;
2734 s++;
01ec43d0
GS
2735 }
2736 else {
2737 didrange = FALSE;
4c3a8340
TS
2738#ifdef EBCDIC
2739 literal_endpoint = 0;
e294cc5d 2740 native_range = TRUE;
4c3a8340 2741#endif
01ec43d0 2742 }
79072805 2743 }
02aa26ce
NT
2744
2745 /* if we get here, we're not doing a transliteration */
2746
0f5d15d6
IZ
2747 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2748 except for the last char, which will be done separately. */
3280af22 2749 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2750 if (s[2] == '#') {
e994fd66 2751 while (s+1 < send && *s != ')')
db42d148 2752 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2753 }
2754 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2755 || (s[2] == '?' && s[3] == '{'))
155aba94 2756 {
cc6b7395 2757 I32 count = 1;
0f5d15d6 2758 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2759 char c;
2760
d9f97599
GS
2761 while (count && (c = *regparse)) {
2762 if (c == '\\' && regparse[1])
2763 regparse++;
4e553d73 2764 else if (c == '{')
cc6b7395 2765 count++;
4e553d73 2766 else if (c == '}')
cc6b7395 2767 count--;
d9f97599 2768 regparse++;
cc6b7395 2769 }
e994fd66 2770 if (*regparse != ')')
5bdf89e7 2771 regparse--; /* Leave one char for continuation. */
0f5d15d6 2772 while (s < regparse)
db42d148 2773 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2774 }
748a9306 2775 }
02aa26ce
NT
2776
2777 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2778 else if (*s == '#' && PL_lex_inpat &&
2779 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2780 while (s+1 < send && *s != '\n')
db42d148 2781 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2782 }
02aa26ce 2783
5d1d4326 2784 /* check for embedded arrays
da6eedaa 2785 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2786 */
1749ea0d
TS
2787 else if (*s == '@' && s[1]) {
2788 if (isALNUM_lazy_if(s+1,UTF))
2789 break;
2790 if (strchr(":'{$", s[1]))
2791 break;
2792 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2793 break; /* in regexp, neither @+ nor @- are interpolated */
2794 }
02aa26ce
NT
2795
2796 /* check for embedded scalars. only stop if we're sure it's a
2797 variable.
2798 */
79072805 2799 else if (*s == '$') {
3280af22 2800 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2801 break;
77772344 2802 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2803 if (s[1] == '\\') {
2804 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2805 "Possible unintended interpolation of $\\ in regex");
77772344 2806 }
79072805 2807 break; /* in regexp, $ might be tail anchor */
77772344 2808 }
79072805 2809 }
02aa26ce 2810
2b9d42f0
NIS
2811 /* End of else if chain - OP_TRANS rejoin rest */
2812
02aa26ce 2813 /* backslashes */
79072805 2814 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2815 char* e; /* Can be used for ending '}', etc. */
2816
79072805 2817 s++;
02aa26ce 2818
7d0fc23c
KW
2819 /* warn on \1 - \9 in substitution replacements, but note that \11
2820 * is an octal; and \19 is \1 followed by '9' */
3280af22 2821 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2822 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2823 {
a2a5de95 2824 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2825 *--s = '$';
2826 break;
2827 }
02aa26ce
NT
2828
2829 /* string-change backslash escapes */
3280af22 2830 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2831 --s;
2832 break;
2833 }
ff3f963a
KW
2834 /* In a pattern, process \N, but skip any other backslash escapes.
2835 * This is because we don't want to translate an escape sequence
2836 * into a meta symbol and have the regex compiler use the meta
2837 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2838 * in spite of this, we do have to process \N here while the proper
2839 * charnames handler is in scope. See bugs #56444 and #62056.
2840 * There is a complication because \N in a pattern may also stand
2841 * for 'match a non-nl', and not mean a charname, in which case its
2842 * processing should be deferred to the regex compiler. To be a
2843 * charname it must be followed immediately by a '{', and not look
2844 * like \N followed by a curly quantifier, i.e., not something like
2845 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2846 * quantifier */
2847 else if (PL_lex_inpat
2848 && (*s != 'N'
2849 || s[1] != '{'
2850 || regcurly(s + 1)))
2851 {
cc74c5bd
TS
2852 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2853 goto default_action;
2854 }
02aa26ce 2855
79072805 2856 switch (*s) {
02aa26ce
NT
2857
2858 /* quoted - in transliterations */
79072805 2859 case '-':
3280af22 2860 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2861 *d++ = *s++;
2862 continue;
2863 }
2864 /* FALL THROUGH */
2865 default:
11b8faa4 2866 {
a2a5de95
NC
2867 if ((isALPHA(*s) || isDIGIT(*s)))
2868 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2869 "Unrecognized escape \\%c passed through",
2870 *s);
11b8faa4 2871 /* default action is to copy the quoted character */
f9a63242 2872 goto default_action;
11b8faa4 2873 }
02aa26ce 2874
632403cc 2875 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2876 case '0': case '1': case '2': case '3':
2877 case '4': case '5': case '6': case '7':
ba210ebe 2878 {
53305cf1
NC
2879 I32 flags = 0;
2880 STRLEN len = 3;
77a135fe 2881 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2882 s += len;
2883 }
012bcf8d 2884 goto NUM_ESCAPE_INSERT;
02aa26ce 2885
f0a2b745
KW
2886 /* eg. \o{24} indicates the octal constant \024 */
2887 case 'o':
2888 {
2889 STRLEN len;
454155d9 2890 const char* error;
f0a2b745 2891
454155d9 2892 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2893 s += len;
454155d9 2894 if (! valid) {
f0a2b745
KW
2895 yyerror(error);
2896 continue;
2897 }
2898 goto NUM_ESCAPE_INSERT;
2899 }
2900
77a135fe 2901 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2902 case 'x':
a0ed51b3
LW
2903 ++s;
2904 if (*s == '{') {
9d4ba2ae 2905 char* const e = strchr(s, '}');
a4c04bdc
NC
2906 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2907 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2908 STRLEN len;
355860ce 2909
53305cf1 2910 ++s;
adaeee49 2911 if (!e) {
a0ed51b3 2912 yyerror("Missing right brace on \\x{}");
355860ce 2913 continue;
ba210ebe 2914 }
53305cf1 2915 len = e - s;
77a135fe 2916 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2917 s = e + 1;
a0ed51b3
LW
2918 }
2919 else {
ba210ebe 2920 {
53305cf1 2921 STRLEN len = 2;
a4c04bdc 2922 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2923 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2924 s += len;
2925 }
012bcf8d
GS
2926 }
2927
2928 NUM_ESCAPE_INSERT:
ff3f963a
KW
2929 /* Insert oct or hex escaped character. There will always be
2930 * enough room in sv since such escapes will be longer than any
2931 * UTF-8 sequence they can end up as, except if they force us
2932 * to recode the rest of the string into utf8 */
ba7cea30 2933
77a135fe 2934 /* Here uv is the ordinal of the next character being added in
ff3f963a 2935 * unicode (converted from native). */
77a135fe 2936 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2937 if (!has_utf8 && uv > 255) {
77a135fe
KW
2938 /* Might need to recode whatever we have accumulated so
2939 * far if it contains any chars variant in utf8 or
2940 * utf-ebcdic. */
2941
2942 SvCUR_set(sv, d - SvPVX_const(sv));
2943 SvPOK_on(sv);
2944 *d = '\0';
77a135fe 2945 /* See Note on sizing above. */
7bf79863
KW
2946 sv_utf8_upgrade_flags_grow(sv,
2947 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2948 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2949 d = SvPVX(sv) + SvCUR(sv);
2950 has_utf8 = TRUE;
012bcf8d
GS
2951 }
2952
77a135fe
KW
2953 if (has_utf8) {
2954 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2955 if (PL_lex_inwhat == OP_TRANS &&
2956 PL_sublex_info.sub_op) {
2957 PL_sublex_info.sub_op->op_private |=
2958 (PL_lex_repl ? OPpTRANS_FROM_UTF
2959 : OPpTRANS_TO_UTF);
f9a63242 2960 }
e294cc5d
JH
2961#ifdef EBCDIC
2962 if (uv > 255 && !dorange)
2963 native_range = FALSE;
2964#endif
012bcf8d 2965 }
a0ed51b3 2966 else {
012bcf8d 2967 *d++ = (char)uv;
a0ed51b3 2968 }
012bcf8d
GS
2969 }
2970 else {
c4d5f83a 2971 *d++ = (char) uv;
a0ed51b3 2972 }
79072805 2973 continue;
02aa26ce 2974
4a2d328f 2975 case 'N':
ff3f963a
KW
2976 /* In a non-pattern \N must be a named character, like \N{LATIN
2977 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2978 * mean to match a non-newline. For non-patterns, named
2979 * characters are converted to their string equivalents. In
2980 * patterns, named characters are not converted to their
2981 * ultimate forms for the same reasons that other escapes
2982 * aren't. Instead, they are converted to the \N{U+...} form
2983 * to get the value from the charnames that is in effect right
2984 * now, while preserving the fact that it was a named character
2985 * so that the regex compiler knows this */
2986
2987 /* This section of code doesn't generally use the
2988 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2989 * a close examination of this macro and determined it is a
2990 * no-op except on utfebcdic variant characters. Every
2991 * character generated by this that would normally need to be
2992 * enclosed by this macro is invariant, so the macro is not
2993 * needed, and would complicate use of copy(). There are other
2994 * parts of this file where the macro is used inconsistently,
2995 * but are saved by it being a no-op */
2996
2997 /* The structure of this section of code (besides checking for
2998 * errors and upgrading to utf8) is:
2999 * Further disambiguate between the two meanings of \N, and if
3000 * not a charname, go process it elsewhere
0a96133f
KW
3001 * If of form \N{U+...}, pass it through if a pattern;
3002 * otherwise convert to utf8
3003 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3004 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3005
3006 /* Here, s points to the 'N'; the test below is guaranteed to
3007 * succeed if we are being called on a pattern as we already
3008 * know from a test above that the next character is a '{'.
3009 * On a non-pattern \N must mean 'named sequence, which
3010 * requires braces */
3011 s++;
3012 if (*s != '{') {
3013 yyerror("Missing braces on \\N{}");
3014 continue;
3015 }
3016 s++;
3017
0a96133f 3018 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3019 if (! (e = strchr(s, '}'))) {
3020 if (! PL_lex_inpat) {
5777a3f7 3021 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3022 } else {
3023 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3024 }
0a96133f 3025 continue;
ff3f963a 3026 }
cddc7ef4 3027
ff3f963a 3028 /* Here it looks like a named character */
cddc7ef4 3029
ff3f963a
KW
3030 if (PL_lex_inpat) {
3031
3032 /* XXX This block is temporary code. \N{} implies that the
3033 * pattern is to have Unicode semantics, and therefore
3034 * currently has to be encoded in utf8. By putting it in
3035 * utf8 now, we save a whole pass in the regular expression
3036 * compiler. Once that code is changed so Unicode
3037 * semantics doesn't necessarily have to be in utf8, this
3038 * block should be removed */
3039 if (!has_utf8) {
77a135fe 3040 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3041 SvPOK_on(sv);
e4f3eed8 3042 *d = '\0';
77a135fe 3043 /* See Note on sizing above. */
7bf79863 3044 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3045 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3046 /* 5 = '\N{' + cur char + NUL */
3047 (STRLEN)(send - s) + 5);
f08d6ad9 3048 d = SvPVX(sv) + SvCUR(sv);
89491803 3049 has_utf8 = TRUE;
ff3f963a
KW
3050 }
3051 }
423cee85 3052
ff3f963a
KW
3053 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3054 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3055 | PERL_SCAN_DISALLOW_PREFIX;
3056 STRLEN len;
3057
3058 /* For \N{U+...}, the '...' is a unicode value even on
3059 * EBCDIC machines */
3060 s += 2; /* Skip to next char after the 'U+' */
3061 len = e - s;
3062 uv = grok_hex(s, &len, &flags, NULL);
3063 if (len == 0 || len != (STRLEN)(e - s)) {
3064 yyerror("Invalid hexadecimal number in \\N{U+...}");
3065 s = e + 1;
3066 continue;
3067 }
3068
3069 if (PL_lex_inpat) {
3070
3071 /* Pass through to the regex compiler unchanged. The
3072 * reason we evaluated the number above is to make sure
0a96133f 3073 * there wasn't a syntax error. */
ff3f963a
KW
3074 s -= 5; /* Include the '\N{U+' */
3075 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3076 d += e - s + 1;
3077 }
3078 else { /* Not a pattern: convert the hex to string */
3079
3080 /* If destination is not in utf8, unconditionally
3081 * recode it to be so. This is because \N{} implies
3082 * Unicode semantics, and scalars have to be in utf8
3083 * to guarantee those semantics */
3084 if (! has_utf8) {
3085 SvCUR_set(sv, d - SvPVX_const(sv));
3086 SvPOK_on(sv);
3087 *d = '\0';
3088 /* See Note on sizing above. */
3089 sv_utf8_upgrade_flags_grow(
3090 sv,
3091 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3092 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3093 d = SvPVX(sv) + SvCUR(sv);
3094 has_utf8 = TRUE;
3095 }
3096
3097 /* Add the string to the output */
3098 if (UNI_IS_INVARIANT(uv)) {
3099 *d++ = (char) uv;
3100 }
3101 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3102 }
3103 }
3104 else { /* Here is \N{NAME} but not \N{U+...}. */
3105
3106 SV *res; /* result from charnames */
3107 const char *str; /* the string in 'res' */
3108 STRLEN len; /* its length */
3109
3110 /* Get the value for NAME */
3111 res = newSVpvn(s, e - s);
3112 res = new_constant( NULL, 0, "charnames",
3113 /* includes all of: \N{...} */
3114 res, NULL, s - 3, e - s + 4 );
3115
3116 /* Most likely res will be in utf8 already since the
3117 * standard charnames uses pack U, but a custom translator
3118 * can leave it otherwise, so make sure. XXX This can be
3119 * revisited to not have charnames use utf8 for characters
3120 * that don't need it when regexes don't have to be in utf8
3121 * for Unicode semantics. If doing so, remember EBCDIC */
3122 sv_utf8_upgrade(res);
3123 str = SvPV_const(res, len);
3124
3125 /* Don't accept malformed input */
3126 if (! is_utf8_string((U8 *) str, len)) {
3127 yyerror("Malformed UTF-8 returned by \\N");
3128 }
3129 else if (PL_lex_inpat) {
3130
3131 if (! len) { /* The name resolved to an empty string */
3132 Copy("\\N{}", d, 4, char);
3133 d += 4;
3134 }
3135 else {
3136 /* In order to not lose information for the regex
3137 * compiler, pass the result in the specially made
3138 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3139 * the code points in hex of each character
3140 * returned by charnames */
3141
3142 const char *str_end = str + len;
3143 STRLEN char_length; /* cur char's byte length */
3144 STRLEN output_length; /* and the number of bytes
3145 after this is translated
3146 into hex digits */
3147 const STRLEN off = d - SvPVX_const(sv);
3148
3149 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3150 * max('U+', '.'); and 1 for NUL */
3151 char hex_string[2 * UTF8_MAXBYTES + 5];
3152
3153 /* Get the first character of the result. */
3154 U32 uv = utf8n_to_uvuni((U8 *) str,
3155 len,
3156 &char_length,
3157 UTF8_ALLOW_ANYUV);
3158
3159 /* The call to is_utf8_string() above hopefully
3160 * guarantees that there won't be an error. But
3161 * it's easy here to make sure. The function just
3162 * above warns and returns 0 if invalid utf8, but
3163 * it can also return 0 if the input is validly a
3164 * NUL. Disambiguate */
3165 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3166 uv = UNICODE_REPLACEMENT;
3167 }
3168
3169 /* Convert first code point to hex, including the
3170 * boiler plate before it */
3171 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3172 output_length = strlen(hex_string);
3173
3174 /* Make sure there is enough space to hold it */
3175 d = off + SvGROW(sv, off
3176 + output_length
3177 + (STRLEN)(send - e)
3178 + 2); /* '}' + NUL */
3179 /* And output it */
3180 Copy(hex_string, d, output_length, char);
3181 d += output_length;
3182
3183 /* For each subsequent character, append dot and
3184 * its ordinal in hex */
3185 while ((str += char_length) < str_end) {
3186 const STRLEN off = d - SvPVX_const(sv);
3187 U32 uv = utf8n_to_uvuni((U8 *) str,
3188 str_end - str,
3189 &char_length,
3190 UTF8_ALLOW_ANYUV);
3191 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3192 uv = UNICODE_REPLACEMENT;
3193 }
3194
3195 sprintf(hex_string, ".%X", (unsigned int) uv);
3196 output_length = strlen(hex_string);
3197
3198 d = off + SvGROW(sv, off
3199 + output_length
3200 + (STRLEN)(send - e)
3201 + 2); /* '}' + NUL */
3202 Copy(hex_string, d, output_length, char);
3203 d += output_length;
3204 }
3205
3206 *d++ = '}'; /* Done. Add the trailing brace */
3207 }
3208 }
3209 else { /* Here, not in a pattern. Convert the name to a
3210 * string. */
3211
3212 /* If destination is not in utf8, unconditionally
3213 * recode it to be so. This is because \N{} implies
3214 * Unicode semantics, and scalars have to be in utf8
3215 * to guarantee those semantics */
3216 if (! has_utf8) {
3217 SvCUR_set(sv, d - SvPVX_const(sv));
3218 SvPOK_on(sv);
3219 *d = '\0';
3220 /* See Note on sizing above. */
3221 sv_utf8_upgrade_flags_grow(sv,
3222 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3223 len + (STRLEN)(send - s) + 1);
3224 d = SvPVX(sv) + SvCUR(sv);
3225 has_utf8 = TRUE;
3226 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3227
3228 /* See Note on sizing above. (NOTE: SvCUR() is not
3229 * set correctly here). */
3230 const STRLEN off = d - SvPVX_const(sv);
3231 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3232 }
3233 Copy(str, d, len, char);
3234 d += len;
423cee85 3235 }
423cee85 3236 SvREFCNT_dec(res);
cb233ae3
KW
3237
3238 /* Deprecate non-approved name syntax */
3239 if (ckWARN_d(WARN_DEPRECATED)) {
3240 bool problematic = FALSE;
3241 char* i = s;
3242
3243 /* For non-ut8 input, look to see that the first
3244 * character is an alpha, then loop through the rest
3245 * checking that each is a continuation */
3246 if (! this_utf8) {
3247 if (! isALPHAU(*i)) problematic = TRUE;
3248 else for (i = s + 1; i < e; i++) {
3249 if (isCHARNAME_CONT(*i)) continue;
3250 problematic = TRUE;
3251 break;
3252 }
3253 }
3254 else {
3255 /* Similarly for utf8. For invariants can check
3256 * directly. We accept anything above the latin1
3257 * range because it is immaterial to Perl if it is
3258 * correct or not, and is expensive to check. But
3259 * it is fairly easy in the latin1 range to convert
3260 * the variants into a single character and check
3261 * those */
3262 if (UTF8_IS_INVARIANT(*i)) {
3263 if (! isALPHAU(*i)) problematic = TRUE;
3264 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3265 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3266 *(i+1)))))
3267 {
3268 problematic = TRUE;
3269 }
3270 }
3271 if (! problematic) for (i = s + UTF8SKIP(s);
3272 i < e;
3273 i+= UTF8SKIP(i))
3274 {
3275 if (UTF8_IS_INVARIANT(*i)) {
3276 if (isCHARNAME_CONT(*i)) continue;
3277 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3278 continue;
3279 } else if (isCHARNAME_CONT(
3280 UNI_TO_NATIVE(
3281 UTF8_ACCUMULATE(*i, *(i+1)))))
3282 {
3283 continue;
3284 }
3285 problematic = TRUE;
3286 break;
3287 }
3288 }
3289 if (problematic) {
6e1bad6c
KW
3290 /* The e-i passed to the final %.*s makes sure that
3291 * should the trailing NUL be missing that this
3292 * print won't run off the end of the string */
cb233ae3 3293 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3294 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3295 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3296 }
3297 }
3298 } /* End \N{NAME} */
ff3f963a
KW
3299#ifdef EBCDIC
3300 if (!dorange)
3301 native_range = FALSE; /* \N{} is defined to be Unicode */
3302#endif
3303 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3304 continue;
3305
02aa26ce 3306 /* \c is a control character */
79072805
LW
3307 case 'c':
3308 s++;
961ce445 3309 if (s < send) {
f9d13529 3310 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3311 }
961ce445
RGS
3312 else {
3313 yyerror("Missing control char name in \\c");
3314 }
79072805 3315 continue;
02aa26ce
NT
3316
3317 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3318 case 'b':
db42d148 3319 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3320 break;
3321 case 'n':
db42d148 3322 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3323 break;
3324 case 'r':
db42d148 3325 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3326 break;
3327 case 'f':
db42d148 3328 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3329 break;
3330 case 't':
db42d148 3331 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3332 break;
34a3fe2a 3333 case 'e':
db42d148 3334 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3335 break;
3336 case 'a':
db42d148 3337 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3338 break;
02aa26ce
NT
3339 } /* end switch */
3340
79072805
LW
3341 s++;
3342 continue;
02aa26ce 3343 } /* end if (backslash) */
4c3a8340
TS
3344#ifdef EBCDIC
3345 else
3346 literal_endpoint++;
3347#endif
02aa26ce 3348
f9a63242 3349 default_action:
77a135fe
KW
3350 /* If we started with encoded form, or already know we want it,
3351 then encode the next character */
3352 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3353 STRLEN len = 1;
77a135fe
KW
3354
3355
3356 /* One might think that it is wasted effort in the case of the
3357 * source being utf8 (this_utf8 == TRUE) to take the next character
3358 * in the source, convert it to an unsigned value, and then convert
3359 * it back again. But the source has not been validated here. The
3360 * routine that does the conversion checks for errors like
3361 * malformed utf8 */
3362
5f66b61c
AL
3363 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3364 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3365 if (!has_utf8) {
3366 SvCUR_set(sv, d - SvPVX_const(sv));
3367 SvPOK_on(sv);
3368 *d = '\0';
77a135fe 3369 /* See Note on sizing above. */
7bf79863
KW
3370 sv_utf8_upgrade_flags_grow(sv,
3371 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3372 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3373 d = SvPVX(sv) + SvCUR(sv);
3374 has_utf8 = TRUE;
3375 } else if (need > len) {
3376 /* encoded value larger than old, may need extra space (NOTE:
3377 * SvCUR() is not set correctly here). See Note on sizing
3378 * above. */
9d4ba2ae 3379 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3380 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3381 }
77a135fe
KW
3382 s += len;
3383
5f66b61c 3384 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3385#ifdef EBCDIC
3386 if (uv > 255 && !dorange)
3387 native_range = FALSE;
3388#endif
2b9d42f0
NIS
3389 }
3390 else {
3391 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3392 }
02aa26ce
NT
3393 } /* while loop to process each character */
3394
3395 /* terminate the string and set up the sv */
79072805 3396 *d = '\0';
95a20fc0 3397 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3398 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3399 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3400
79072805 3401 SvPOK_on(sv);
9f4817db 3402 if (PL_encoding && !has_utf8) {
d0063567
DK
3403 sv_recode_to_utf8(sv, PL_encoding);
3404 if (SvUTF8(sv))
3405 has_utf8 = TRUE;
9f4817db 3406 }
2b9d42f0 3407 if (has_utf8) {
7e2040f0 3408 SvUTF8_on(sv);
2b9d42f0 3409 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3410 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3411 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3412 }
3413 }
79072805 3414
02aa26ce 3415 /* shrink the sv if we allocated more than we used */
79072805 3416 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3417 SvPV_shrink_to_cur(sv);
79072805 3418 }
02aa26ce 3419
6154021b 3420 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3421 if (s > PL_bufptr) {
eb0d8d16
NC
3422 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3423 const char *const key = PL_lex_inpat ? "qr" : "q";
3424 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3425 const char *type;
3426 STRLEN typelen;
3427
3428 if (PL_lex_inwhat == OP_TRANS) {
3429 type = "tr";
3430 typelen = 2;
3431 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3432 type = "s";
3433 typelen = 1;
3434 } else {
3435 type = "qq";
3436 typelen = 2;
3437 }
3438
3439 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3440 type, typelen);
3441 }
6154021b 3442 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3443 } else
8990e307 3444 SvREFCNT_dec(sv);
79072805
LW
3445 return s;
3446}
3447
ffb4593c
NT
3448/* S_intuit_more
3449 * Returns TRUE if there's more to the expression (e.g., a subscript),
3450 * FALSE otherwise.
ffb4593c
NT
3451 *
3452 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3453 *
3454 * ->[ and ->{ return TRUE
3455 * { and [ outside a pattern are always subscripts, so return TRUE
3456 * if we're outside a pattern and it's not { or [, then return FALSE
3457 * if we're in a pattern and the first char is a {
3458 * {4,5} (any digits around the comma) returns FALSE
3459 * if we're in a pattern and the first char is a [
3460 * [] returns FALSE
3461 * [SOMETHING] has a funky algorithm to decide whether it's a
3462 * character class or not. It has to deal with things like
3463 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3464 * anything else returns TRUE
3465 */
3466
9cbb5ea2
GS
3467/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3468
76e3520e 3469STATIC int
cea2e8a9 3470S_intuit_more(pTHX_ register char *s)
79072805 3471{
97aff369 3472 dVAR;
7918f24d
NC
3473
3474 PERL_ARGS_ASSERT_INTUIT_MORE;
3475
3280af22 3476 if (PL_lex_brackets)
79072805
LW
3477 return TRUE;
3478 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3479 return TRUE;
3480 if (*s != '{' && *s != '[')
3481 return FALSE;
3280af22 3482 if (!PL_lex_inpat)
79072805
LW
3483 return TRUE;
3484
3485 /* In a pattern, so maybe we have {n,m}. */
3486 if (*s == '{') {
3487 s++;
3488 if (!isDIGIT(*s))
3489 return TRUE;
3490 while (isDIGIT(*s))
3491 s++;
3492 if (*s == ',')
3493 s++;
3494 while (isDIGIT(*s))
3495 s++;
3496 if (*s == '}')
3497 return FALSE;
3498 return TRUE;
3499
3500 }
3501
3502 /* On the other hand, maybe we have a character class */
3503
3504 s++;
3505 if (*s == ']' || *s == '^')
3506 return FALSE;
3507 else {
ffb4593c 3508 /* this is terrifying, and it works */
79072805
LW
3509 int weight = 2; /* let's weigh the evidence */
3510 char seen[256];
f27ffc4a 3511 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3512 const char * const send = strchr(s,']');
3280af22 3513 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3514
3515 if (!send) /* has to be an expression */
3516 return TRUE;
3517
3518 Zero(seen,256,char);
3519 if (*s == '$')
3520 weight -= 3;
3521 else if (isDIGIT(*s)) {
3522 if (s[1] != ']') {
3523 if (isDIGIT(s[1]) && s[2] == ']')
3524 weight -= 10;
3525 }
3526 else
3527 weight -= 100;
3528 }
3529 for (; s < send; s++) {
3530 last_un_char = un_char;
3531 un_char = (unsigned char)*s;
3532 switch (*s) {
3533 case '@':
3534 case '&':
3535 case '$':
3536 weight -= seen[un_char] * 10;
7e2040f0 3537 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3538 int len;
8903cb82 3539 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3540 len = (int)strlen(tmpbuf);
3541 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3542 weight -= 100;
3543 else
3544 weight -= 10;
3545 }
3546 else if (*s == '$' && s[1] &&
93a17b20
LW
3547 strchr("[#!%*<>()-=",s[1])) {
3548 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3549 weight -= 10;
3550 else
3551 weight -= 1;
3552 }
3553 break;
3554 case '\\':
3555 un_char = 254;
3556 if (s[1]) {
93a17b20 3557 if (strchr("wds]",s[1]))
79072805 3558 weight += 100;
10edeb5d 3559 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3560 weight += 1;
93a17b20 3561 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3562 weight += 40;
3563 else if (isDIGIT(s[1])) {
3564 weight += 40;
3565 while (s[1] && isDIGIT(s[1]))
3566 s++;
3567 }
3568 }
3569 else
3570 weight += 100;
3571 break;
3572 case '-':
3573 if (s[1] == '\\')
3574 weight += 50;
93a17b20 3575 if (strchr("aA01! ",last_un_char))
79072805 3576 weight += 30;
93a17b20 3577 if (strchr("zZ79~",s[1]))
79072805 3578 weight += 30;
f27ffc4a
GS
3579 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3580 weight -= 5; /* cope with negative subscript */
79072805
LW
3581 break;
3582 default:
3792a11b
NC
3583 if (!isALNUM(last_un_char)
3584 && !(last_un_char == '$' || last_un_char == '@'
3585 || last_un_char == '&')
3586 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3587 char *d = tmpbuf;
3588 while (isALPHA(*s))
3589 *d++ = *s++;
3590 *d = '\0';
5458a98a 3591 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3592 weight -= 150;
3593 }
3594 if (un_char == last_un_char + 1)
3595 weight += 5;
3596 weight -= seen[un_char];
3597 break;
3598 }
3599 seen[un_char]++;
3600 }
3601 if (weight >= 0) /* probably a character class */
3602 return FALSE;
3603 }
3604
3605 return TRUE;
3606}
ffed7fef 3607
ffb4593c
NT
3608/*
3609 * S_intuit_method
3610 *
3611 * Does all the checking to disambiguate
3612 * foo bar
3613 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3614 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3615 *
3616 * First argument is the stuff after the first token, e.g. "bar".
3617 *
3618 * Not a method if bar is a filehandle.
3619 * Not a method if foo is a subroutine prototyped to take a filehandle.
3620 * Not a method if it's really "Foo $bar"
3621 * Method if it's "foo $bar"
3622 * Not a method if it's really "print foo $bar"
3623 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3624 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3625 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3626 * =>
3627 */
3628
76e3520e 3629STATIC int
62d55b22 3630S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3631{
97aff369 3632 dVAR;
a0d0e21e 3633 char *s = start + (*start == '$');
3280af22 3634 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3635 STRLEN len;
3636 GV* indirgv;
5db06880
NC
3637#ifdef PERL_MAD
3638 int soff;
3639#endif
a0d0e21e 3640
7918f24d
NC
3641 PERL_ARGS_ASSERT_INTUIT_METHOD;
3642
a0d0e21e 3643 if (gv) {
62d55b22 3644 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3645 return 0;
62d55b22
NC
3646 if (cv) {
3647 if (SvPOK(cv)) {
3648 const char *proto = SvPVX_const(cv);
3649 if (proto) {
3650 if (*proto == ';')
3651 proto++;
3652 if (*proto == '*')
3653 return 0;
3654 }
b6c543e3
IZ
3655 }
3656 } else
c35e046a 3657 gv = NULL;
a0d0e21e 3658 }
8903cb82 3659 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3660 /* start is the beginning of the possible filehandle/object,
3661 * and s is the end of it
3662 * tmpbuf is a copy of it
3663 */
3664
a0d0e21e 3665 if (*start == '$') {
3ef1310e
RGS
3666 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3667 isUPPER(*PL_tokenbuf))
a0d0e21e 3668 return 0;
5db06880
NC
3669#ifdef PERL_MAD
3670 len = start - SvPVX(PL_linestr);
3671#endif
29595ff2 3672 s = PEEKSPACE(s);
f0092767 3673#ifdef PERL_MAD
5db06880
NC
3674 start = SvPVX(PL_linestr) + len;
3675#endif
3280af22
NIS
3676 PL_bufptr = start;
3677 PL_expect = XREF;
a0d0e21e
LW
3678 return *s == '(' ? FUNCMETH : METHOD;
3679 }
5458a98a 3680 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3681 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3682 len -= 2;
3683 tmpbuf[len] = '\0';
5db06880
NC
3684#ifdef PERL_MAD
3685 soff = s - SvPVX(PL_linestr);
3686#endif
c3e0f903
GS
3687 goto bare_package;
3688 }
90e5519e 3689 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3690 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3691 return 0;
3692 /* filehandle or package name makes it a method */
da51bb9b 3693 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3694#ifdef PERL_MAD
3695 soff = s - SvPVX(PL_linestr);
3696#endif
29595ff2 3697 s = PEEKSPACE(s);
3280af22 3698 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3699 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3700 bare_package:
cd81e915 3701 start_force(PL_curforce);
9ded7720 3702 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3703 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3704 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3705 if (PL_madskills)
3706 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3707 PL_expect = XTERM;
a0d0e21e 3708 force_next(WORD);
3280af22 3709 PL_bufptr = s;
5db06880
NC
3710#ifdef PERL_MAD
3711 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3712#endif
a0d0e21e
LW
3713 return *s == '(' ? FUNCMETH : METHOD;
3714 }
3715 }
3716 return 0;
3717}
3718
16d20bd9 3719/* Encoded script support. filter_add() effectively inserts a
4e553d73 3720 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3721 * Note that the filter function only applies to the current source file
3722 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3723 *
3724 * The datasv parameter (which may be NULL) can be used to pass
3725 * private data to this instance of the filter. The filter function
3726 * can recover the SV using the FILTER_DATA macro and use it to
3727 * store private buffers and state information.
3728 *
3729 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3730 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3731 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3732 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3733 * private use must be set using malloc'd pointers.
3734 */
16d20bd9
AD
3735
3736SV *
864dbfa3 3737Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3738{
97aff369 3739 dVAR;
f4c556ac 3740 if (!funcp)
a0714e2c 3741 return NULL;
f4c556ac 3742
5486870f
DM
3743 if (!PL_parser)
3744 return NULL;
3745
3280af22
NIS
3746 if (!PL_rsfp_filters)
3747 PL_rsfp_filters = newAV();
16d20bd9 3748 if (!datasv)
561b68a9 3749 datasv = newSV(0);
862a34c6 3750 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3751 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3752 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3753 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3754 FPTR2DPTR(void *, IoANY(datasv)),
3755 SvPV_nolen(datasv)));
3280af22
NIS
3756 av_unshift(PL_rsfp_filters, 1);
3757 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3758 return(datasv);
3759}
4e553d73 3760
16d20bd9
AD
3761
3762/* Delete most recently added instance of this filter function. */
a0d0e21e 3763void
864dbfa3 3764Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3765{
97aff369 3766 dVAR;
e0c19803 3767 SV *datasv;
24801a4b 3768
7918f24d
NC
3769 PERL_ARGS_ASSERT_FILTER_DEL;
3770
33073adb 3771#ifdef DEBUGGING
55662e27
JH
3772 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3773 FPTR2DPTR(void*, funcp)));
33073adb 3774#endif
5486870f 3775 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3776 return;
3777 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3778 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3779 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3780 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3781
16d20bd9
AD
3782 return;
3783 }
3784 /* we need to search for the correct entry and clear it */
cea2e8a9 3785 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3786}
3787
3788
1de9afcd
RGS
3789/* Invoke the idxth filter function for the current rsfp. */
3790/* maxlen 0 = read one text line */
16d20bd9 3791I32
864dbfa3 3792Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3793{
97aff369 3794 dVAR;
16d20bd9
AD
3795 filter_t funcp;
3796 SV *datasv = NULL;
f482118e
NC
3797 /* This API is bad. It should have been using unsigned int for maxlen.
3798 Not sure if we want to change the API, but if not we should sanity
3799 check the value here. */
39cd7a59
NC
3800 const unsigned int correct_length
3801 = maxlen < 0 ?
3802#ifdef PERL_MICRO
3803 0x7FFFFFFF
3804#else
3805 INT_MAX
3806#endif
3807 : maxlen;
e50aee73 3808
7918f24d
NC
3809 PERL_ARGS_ASSERT_FILTER_READ;
3810
5486870f 3811 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3812 return -1;
1de9afcd 3813 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3814 /* Provide a default input filter to make life easy. */
3815 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3816 DEBUG_P(PerlIO_printf(Perl_debug_log,
3817 "filter_read %d: from rsfp\n", idx));
f482118e 3818 if (correct_length) {
16d20bd9
AD
3819 /* Want a block */
3820 int len ;
f54cb97a 3821 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3822
3823 /* ensure buf_sv is large enough */
881d8f0a 3824 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3825 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3826 correct_length)) <= 0) {
3280af22 3827 if (PerlIO_error(PL_rsfp))
37120919
AD
3828 return -1; /* error */
3829 else
3830 return 0 ; /* end of file */
3831 }
16d20bd9 3832 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3833 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3834 } else {
3835 /* Want a line */
3280af22
NIS
3836 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3837 if (PerlIO_error(PL_rsfp))
37120919
AD
3838 return -1; /* error */
3839 else
3840 return 0 ; /* end of file */
3841 }
16d20bd9
AD
3842 }
3843 return SvCUR(buf_sv);
3844 }
3845 /* Skip this filter slot if filter has been deleted */
1de9afcd 3846 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3847 DEBUG_P(PerlIO_printf(Perl_debug_log,
3848 "filter_read %d: skipped (filter deleted)\n",
3849 idx));
f482118e 3850 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3851 }
3852 /* Get function pointer hidden within datasv */
8141890a 3853 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3854 DEBUG_P(PerlIO_printf(Perl_debug_log,
3855 "filter_read %d: via function %p (%s)\n",
ca0270c4 3856 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3857 /* Call function. The function is expected to */
3858 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3859 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3860 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3861}
3862
76e3520e 3863STATIC char *
5cc814fd 3864S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3865{
97aff369 3866 dVAR;
7918f24d
NC
3867
3868 PERL_ARGS_ASSERT_FILTER_GETS;
3869
c39cd008 3870#ifdef PERL_CR_FILTER
3280af22 3871 if (!PL_rsfp_filters) {
c39cd008 3872 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3873 }
3874#endif
3280af22 3875 if (PL_rsfp_filters) {
55497cff 3876 if (!append)
3877 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3878 if (FILTER_READ(0, sv, 0) > 0)
3879 return ( SvPVX(sv) ) ;
3880 else
bd61b366 3881 return NULL ;
16d20bd9 3882 }
9d116dd7 3883 else
5cc814fd 3884 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3885}
3886
01ec43d0 3887STATIC HV *
9bde8eb0 3888S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3889{
97aff369 3890 dVAR;
def3634b
GS
3891 GV *gv;
3892
7918f24d
NC
3893 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3894
01ec43d0 3895 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3896 return PL_curstash;
3897
3898 if (len > 2 &&
3899 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3900 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3901 {
3902 return GvHV(gv); /* Foo:: */
def3634b
GS
3903 }
3904
3905 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3906 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3907 if (gv && GvCV(gv)) {
3908 SV * const sv = cv_const_sv(GvCV(gv));
3909 if (sv)
9bde8eb0 3910 pkgname = SvPV_const(sv, len);
def3634b
GS
3911 }
3912
9bde8eb0 3913 return gv_stashpvn(pkgname, len, 0);
def3634b 3914}
a0d0e21e 3915
e3f73d4e
RGS
3916/*
3917 * S_readpipe_override
3918 * Check whether readpipe() is overriden, and generates the appropriate
3919 * optree, provided sublex_start() is called afterwards.
3920 */
3921STATIC void
1d51329b 3922S_readpipe_override(pTHX)
e3f73d4e
RGS
3923{
3924 GV **gvp;
3925 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3926 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3927 if ((gv_readpipe
3928 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3929 ||
3930 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3931 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3932 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3933 {
3934 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3935 append_elem(OP_LIST,
3936 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3937 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3938 }
e3f73d4e
RGS
3939}
3940
5db06880
NC
3941#ifdef PERL_MAD
3942 /*
3943 * Perl_madlex
3944 * The intent of this yylex wrapper is to minimize the changes to the
3945 * tokener when we aren't interested in collecting madprops. It remains
3946 * to be seen how successful this strategy will be...
3947 */
3948
3949int
3950Perl_madlex(pTHX)
3951{
3952 int optype;
3953 char *s = PL_bufptr;
3954
cd81e915
NC
3955 /* make sure PL_thiswhite is initialized */
3956 PL_thiswhite = 0;
3957 PL_thismad = 0;
5db06880 3958
cd81e915 3959 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 3960 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
3961 return S_pending_ident(aTHX);
3962
3963 /* previous token ate up our whitespace? */
cd81e915
NC
3964 if (!PL_lasttoke && PL_nextwhite) {
3965 PL_thiswhite = PL_nextwhite;
3966 PL_nextwhite = 0;
5db06880
NC
3967 }
3968
3969 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3970 PL_realtokenstart = -1;
3971 PL_thistoken = 0;
5db06880
NC
3972 optype = yylex();
3973 s = PL_bufptr;
cd81e915 3974 assert(PL_curforce < 0);
5db06880 3975
cd81e915
NC
3976 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3977 if (!PL_thistoken) {
3978 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3979 PL_thistoken = newSVpvs("");
5db06880 3980 else {
c35e046a 3981 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3982 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3983 }
3984 }
cd81e915
NC
3985 if (PL_thismad) /* install head */
3986 CURMAD('X', PL_thistoken);
5db06880
NC
3987 }
3988
3989 /* last whitespace of a sublex? */
cd81e915
NC
3990 if (optype == ')' && PL_endwhite) {
3991 CURMAD('X', PL_endwhite);
5db06880
NC
3992 }
3993
cd81e915 3994 if (!PL_thismad) {
5db06880
NC
3995
3996 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3997 if (!PL_thiswhite && !PL_endwhite && !optype) {
3998 sv_free(PL_thistoken);
3999 PL_thistoken = 0;
5db06880
NC
4000 return 0;
4001 }
4002
4003 /* put off final whitespace till peg */
4004 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4005 PL_nextwhite = PL_thiswhite;
4006 PL_thiswhite = 0;
5db06880 4007 }
cd81e915
NC
4008 else if (PL_thisopen) {
4009 CURMAD('q', PL_thisopen);
4010 if (PL_thistoken)
4011 sv_free(PL_thistoken);
4012 PL_thistoken = 0;
5db06880
NC
4013 }
4014 else {
4015 /* Store actual token text as madprop X */
cd81e915 4016 CURMAD('X', PL_thistoken);
5db06880
NC
4017 }
4018
cd81e915 4019 if (PL_thiswhite) {
5db06880 4020 /* add preceding whitespace as madprop _ */
cd81e915 4021 CURMAD('_', PL_thiswhite);
5db06880
NC
4022 }
4023
cd81e915 4024 if (PL_thisstuff) {
5db06880 4025 /* add quoted material as madprop = */
cd81e915 4026 CURMAD('=', PL_thisstuff);
5db06880
NC
4027 }
4028
cd81e915 4029 if (PL_thisclose) {
5db06880 4030 /* add terminating quote as madprop Q */
cd81e915 4031 CURMAD('Q', PL_thisclose);
5db06880
NC
4032 }
4033 }
4034
4035 /* special processing based on optype */
4036
4037 switch (optype) {
4038
4039 /* opval doesn't need a TOKEN since it can already store mp */
4040 case WORD:
4041 case METHOD:
4042 case FUNCMETH:
4043 case THING:
4044 case PMFUNC:
4045 case PRIVATEREF:
4046 case FUNC0SUB:
4047 case UNIOPSUB:
4048 case LSTOPSUB:
6154021b
RGS
4049 if (pl_yylval.opval)
4050 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4051 PL_thismad = 0;
5db06880
NC
4052 return optype;
4053
4054 /* fake EOF */
4055 case 0:
4056 optype = PEG;
cd81e915
NC
4057 if (PL_endwhite) {
4058 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4059 PL_endwhite = 0;
5db06880
NC
4060 }
4061 break;
4062
4063 case ']':
4064 case '}':
cd81e915 4065 if (PL_faketokens)
5db06880
NC
4066 break;
4067 /* remember any fake bracket that lexer is about to discard */
4068 if (PL_lex_brackets == 1 &&
4069 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4070 {
4071 s = PL_bufptr;
4072 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4073 s++;
4074 if (*s == '}') {
cd81e915
NC
4075 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4076 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4077 PL_thiswhite = 0;
5db06880
NC
4078 PL_bufptr = s - 1;
4079 break; /* don't bother looking for trailing comment */
4080 }
4081 else
4082 s = PL_bufptr;
4083 }
4084 if (optype == ']')
4085 break;
4086 /* FALLTHROUGH */
4087
4088 /* attach a trailing comment to its statement instead of next token */
4089 case ';':
cd81e915 4090 if (PL_faketokens)
5db06880
NC
4091 break;
4092 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4093 s = PL_bufptr;
4094 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4095 s++;
4096 if (*s == '\n' || *s == '#') {
4097 while (s < PL_bufend && *s != '\n')
4098 s++;
4099 if (s < PL_bufend)
4100 s++;
cd81e915
NC
4101 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4102 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4103 PL_thiswhite = 0;
5db06880
NC
4104 PL_bufptr = s;
4105 }
4106 }
4107 break;
4108
4109 /* pval */
4110 case LABEL:
4111 break;
4112
4113 /* ival */
4114 default:
4115 break;
4116
4117 }
4118
4119 /* Create new token struct. Note: opvals return early above. */
6154021b 4120 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4121 PL_thismad = 0;
5db06880
NC
4122 return optype;
4123}
4124#endif
4125
468aa647 4126STATIC char *
cc6ed77d 4127S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4128 dVAR;
7918f24d
NC
4129
4130 PERL_ARGS_ASSERT_TOKENIZE_USE;
4131
468aa647
RGS
4132 if (PL_expect != XSTATE)
4133 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4134 is_use ? "use" : "no"));
29595ff2 4135 s = SKIPSPACE1(s);
468aa647
RGS
4136 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4137 s = force_version(s, TRUE);
17c59fdf
VP
4138 if (*s == ';' || *s == '}'
4139 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4140 start_force(PL_curforce);
9ded7720 4141 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4142 force_next(WORD);
4143 }
4144 else if (*s == 'v') {
4145 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4146 s = force_version(s, FALSE);
4147 }
4148 }
4149 else {
4150 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4151 s = force_version(s, FALSE);
4152 }
6154021b 4153 pl_yylval.ival = is_use;
468aa647
RGS
4154 return s;
4155}
748a9306 4156#ifdef DEBUGGING
27da23d5 4157 static const char* const exp_name[] =
09bef843 4158 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4159 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4160 };
748a9306 4161#endif
463ee0b2 4162
02aa26ce
NT
4163/*
4164 yylex
4165
4166 Works out what to call the token just pulled out of the input
4167 stream. The yacc parser takes care of taking the ops we return and
4168 stitching them into a tree.
4169
4170 Returns:
4171 PRIVATEREF
4172
4173 Structure:
4174 if read an identifier
4175 if we're in a my declaration
4176 croak if they tried to say my($foo::bar)
4177 build the ops for a my() declaration
4178 if it's an access to a my() variable
4179 are we in a sort block?
4180 croak if my($a); $a <=> $b
4181 build ops for access to a my() variable
4182 if in a dq string, and they've said @foo and we can't find @foo
4183 croak
4184 build ops for a bareword
4185 if we already built the token before, use it.
4186*/
4187
20141f0e 4188
dba4d153
JH
4189#ifdef __SC__
4190#pragma segment Perl_yylex
4191#endif
dba4d153 4192int
dba4d153 4193Perl_yylex(pTHX)
20141f0e 4194{
97aff369 4195 dVAR;
3afc138a 4196 register char *s = PL_bufptr;
378cc40b 4197 register char *d;
463ee0b2 4198 STRLEN len;
aa7440fb 4199 bool bof = FALSE;
580561a3 4200 U32 fake_eof = 0;
a687059c 4201
10edeb5d
JH
4202 /* orig_keyword, gvp, and gv are initialized here because
4203 * jump to the label just_a_word_zero can bypass their
4204 * initialization later. */
4205 I32 orig_keyword = 0;
4206 GV *gv = NULL;
4207 GV **gvp = NULL;
4208
bbf60fe6 4209 DEBUG_T( {
396482e1 4210 SV* tmp = newSVpvs("");
b6007c36
DM
4211 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4212 (IV)CopLINE(PL_curcop),
4213 lex_state_names[PL_lex_state],
4214 exp_name[PL_expect],
4215 pv_display(tmp, s, strlen(s), 0, 60));
4216 SvREFCNT_dec(tmp);
bbf60fe6 4217 } );
02aa26ce 4218 /* check if there's an identifier for us to look at */
28ac2b49 4219 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4220 return REPORT(S_pending_ident(aTHX));
bbce6d69 4221
02aa26ce
NT
4222 /* no identifier pending identification */
4223
3280af22 4224 switch (PL_lex_state) {
79072805
LW
4225#ifdef COMMENTARY
4226 case LEX_NORMAL: /* Some compilers will produce faster */
4227 case LEX_INTERPNORMAL: /* code if we comment these out. */
4228 break;
4229#endif
4230
09bef843 4231 /* when we've already built the next token, just pull it out of the queue */
79072805 4232 case LEX_KNOWNEXT:
5db06880
NC
4233#ifdef PERL_MAD
4234 PL_lasttoke--;
6154021b 4235 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4236 if (PL_madskills) {
cd81e915 4237 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4238 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4239 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4240 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4241 PL_thismad->mad_val = 0;
4242 mad_free(PL_thismad);
4243 PL_thismad = 0;
5db06880
NC
4244 }
4245 }
4246 if (!PL_lasttoke) {
4247 PL_lex_state = PL_lex_defer;
4248 PL_expect = PL_lex_expect;
4249 PL_lex_defer = LEX_NORMAL;
4250 if (!PL_nexttoke[PL_lasttoke].next_type)
4251 return yylex();
4252 }
4253#else
3280af22 4254 PL_nexttoke--;
6154021b 4255 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4256 if (!PL_nexttoke) {
4257 PL_lex_state = PL_lex_defer;
4258 PL_expect = PL_lex_expect;
4259 PL_lex_defer = LEX_NORMAL;
463ee0b2 4260 }
5db06880
NC
4261#endif
4262#ifdef PERL_MAD
4263 /* FIXME - can these be merged? */
4264 return(PL_nexttoke[PL_lasttoke].next_type);
4265#else
bbf60fe6 4266 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4267#endif
79072805 4268
02aa26ce 4269 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4270 when we get here, PL_bufptr is at the \
02aa26ce 4271 */
79072805
LW
4272 case LEX_INTERPCASEMOD:
4273#ifdef DEBUGGING
3280af22 4274 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4275 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4276#endif
02aa26ce 4277 /* handle \E or end of string */
3280af22 4278 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4279 /* if at a \E */
3280af22 4280 if (PL_lex_casemods) {
f54cb97a 4281 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4282 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4283
3792a11b
NC
4284 if (PL_bufptr != PL_bufend
4285 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4286 PL_bufptr += 2;
4287 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4288#ifdef PERL_MAD
4289 if (PL_madskills)
6b29d1f5 4290 PL_thistoken = newSVpvs("\\E");
5db06880 4291#endif
a0d0e21e 4292 }
bbf60fe6 4293 return REPORT(')');
79072805 4294 }
5db06880
NC
4295#ifdef PERL_MAD
4296 while (PL_bufptr != PL_bufend &&
4297 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4298 if (!PL_thiswhite)
6b29d1f5 4299 PL_thiswhite = newSVpvs("");
cd81e915 4300 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4301 PL_bufptr += 2;
4302 }
4303#else
3280af22
NIS
4304 if (PL_bufptr != PL_bufend)
4305 PL_bufptr += 2;
5db06880 4306#endif
3280af22 4307 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4308 return yylex();
79072805
LW
4309 }
4310 else {
607df283 4311 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4312 "### Saw case modifier\n"); });
3280af22 4313 s = PL_bufptr + 1;
6e909404 4314 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4315#ifdef PERL_MAD
cd81e915 4316 if (!PL_thiswhite)
6b29d1f5 4317 PL_thiswhite = newSVpvs("");
cd81e915 4318 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4319#endif
89122651 4320 PL_bufptr = s + 3;
6e909404
JH
4321 PL_lex_state = LEX_INTERPCONCAT;
4322 return yylex();
a0d0e21e 4323 }
6e909404 4324 else {
90771dc0 4325 I32 tmp;
5db06880
NC
4326 if (!PL_madskills) /* when just compiling don't need correct */
4327 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4328 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4329 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4330 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4331 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4332 return REPORT(')');
6e909404
JH
4333 }
4334 if (PL_lex_casemods > 10)
4335 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4336 PL_lex_casestack[PL_lex_casemods++] = *s;
4337 PL_lex_casestack[PL_lex_casemods] = '\0';
4338 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4339 start_force(PL_curforce);
9ded7720 4340 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4341 force_next('(');
cd81e915 4342 start_force(PL_curforce);
6e909404 4343 if (*s == 'l')
9ded7720 4344 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4345 else if (*s == 'u')
9ded7720 4346 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4347 else if (*s == 'L')
9ded7720 4348 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4349 else if (*s == 'U')
9ded7720 4350 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4351 else if (*s == 'Q')
9ded7720 4352 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4353 else
4354 Perl_croak(aTHX_ "panic: yylex");
5db06880 4355 if (PL_madskills) {
a5849ce5
NC
4356 SV* const tmpsv = newSVpvs("\\ ");
4357 /* replace the space with the character we want to escape
4358 */
4359 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4360 curmad('_', tmpsv);
4361 }
6e909404 4362 PL_bufptr = s + 1;
a0d0e21e 4363 }
79072805 4364 force_next(FUNC);
3280af22
NIS
4365 if (PL_lex_starts) {
4366 s = PL_bufptr;
4367 PL_lex_starts = 0;
5db06880
NC
4368#ifdef PERL_MAD
4369 if (PL_madskills) {
cd81e915
NC
4370 if (PL_thistoken)
4371 sv_free(PL_thistoken);
6b29d1f5 4372 PL_thistoken = newSVpvs("");
5db06880
NC
4373 }
4374#endif
131b3ad0
DM
4375 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4376 if (PL_lex_casemods == 1 && PL_lex_inpat)
4377 OPERATOR(',');
4378 else
4379 Aop(OP_CONCAT);
79072805
LW
4380 }
4381 else
cea2e8a9 4382 return yylex();
79072805
LW
4383 }
4384
55497cff 4385 case LEX_INTERPPUSH:
bbf60fe6 4386 return REPORT(sublex_push());
55497cff 4387
79072805 4388 case LEX_INTERPSTART:
3280af22 4389 if (PL_bufptr == PL_bufend)
bbf60fe6 4390 return REPORT(sublex_done());
607df283 4391 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4392 "### Interpolated variable\n"); });
3280af22
NIS
4393 PL_expect = XTERM;
4394 PL_lex_dojoin = (*PL_bufptr == '@');
4395 PL_lex_state = LEX_INTERPNORMAL;
4396 if (PL_lex_dojoin) {
cd81e915 4397 start_force(PL_curforce);
9ded7720 4398 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4399 force_next(',');
cd81e915 4400 start_force(PL_curforce);
a0d0e21e 4401 force_ident("\"", '$');
cd81e915 4402 start_force(PL_curforce);
9ded7720 4403 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4404 force_next('$');
cd81e915 4405 start_force(PL_curforce);
9ded7720 4406 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4407 force_next('(');
cd81e915 4408 start_force(PL_curforce);
9ded7720 4409 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4410 force_next(FUNC);
4411 }
3280af22
NIS
4412 if (PL_lex_starts++) {
4413 s = PL_bufptr;
5db06880
NC
4414#ifdef PERL_MAD
4415 if (PL_madskills) {
cd81e915
NC
4416 if (PL_thistoken)
4417 sv_free(PL_thistoken);
6b29d1f5 4418 PL_thistoken = newSVpvs("");
5db06880
NC
4419 }
4420#endif
131b3ad0
DM
4421 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4422 if (!PL_lex_casemods && PL_lex_inpat)
4423 OPERATOR(',');
4424 else
4425 Aop(OP_CONCAT);
79072805 4426 }
cea2e8a9 4427 return yylex();
79072805
LW
4428
4429 case LEX_INTERPENDMAYBE:
3280af22
NIS
4430 if (intuit_more(PL_bufptr)) {
4431 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4432 break;
4433 }
4434 /* FALL THROUGH */
4435
4436 case LEX_INTERPEND:
3280af22
NIS
4437 if (PL_lex_dojoin) {
4438 PL_lex_dojoin = FALSE;
4439 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4440#ifdef PERL_MAD
4441 if (PL_madskills) {
cd81e915
NC
4442 if (PL_thistoken)
4443 sv_free(PL_thistoken);
6b29d1f5 4444 PL_thistoken = newSVpvs("");
5db06880
NC
4445 }
4446#endif
bbf60fe6 4447 return REPORT(')');
79072805 4448 }
43a16006 4449 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4450 && SvEVALED(PL_lex_repl))
43a16006 4451 {
e9fa98b2 4452 if (PL_bufptr != PL_bufend)
cea2e8a9 4453 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4454 PL_lex_repl = NULL;
e9fa98b2 4455 }
79072805
LW
4456 /* FALLTHROUGH */
4457 case LEX_INTERPCONCAT:
4458#ifdef DEBUGGING
3280af22 4459 if (PL_lex_brackets)
cea2e8a9 4460 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4461#endif
3280af22 4462 if (PL_bufptr == PL_bufend)
bbf60fe6 4463 return REPORT(sublex_done());
79072805 4464
3280af22
NIS
4465 if (SvIVX(PL_linestr) == '\'') {
4466 SV *sv = newSVsv(PL_linestr);
4467 if (!PL_lex_inpat)
76e3520e 4468 sv = tokeq(sv);
3280af22 4469 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4470 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4471 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4472 s = PL_bufend;
79072805
LW
4473 }
4474 else {
3280af22 4475 s = scan_const(PL_bufptr);
79072805 4476 if (*s == '\\')
3280af22 4477 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4478 else
3280af22 4479 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4480 }
4481
3280af22 4482 if (s != PL_bufptr) {
cd81e915 4483 start_force(PL_curforce);
5db06880
NC
4484 if (PL_madskills) {
4485 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4486 }
6154021b 4487 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4488 PL_expect = XTERM;
79072805 4489 force_next(THING);
131b3ad0 4490 if (PL_lex_starts++) {
5db06880
NC
4491#ifdef PERL_MAD
4492 if (PL_madskills) {
cd81e915
NC
4493 if (PL_thistoken)
4494 sv_free(PL_thistoken);
6b29d1f5 4495 PL_thistoken = newSVpvs("");
5db06880
NC
4496 }
4497#endif
131b3ad0
DM
4498 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4499 if (!PL_lex_casemods && PL_lex_inpat)
4500 OPERATOR(',');
4501 else
4502 Aop(OP_CONCAT);
4503 }
79072805 4504 else {
3280af22 4505 PL_bufptr = s;
cea2e8a9 4506 return yylex();
79072805
LW
4507 }
4508 }
4509
cea2e8a9 4510 return yylex();
a0d0e21e 4511 case LEX_FORMLINE:
3280af22
NIS
4512 PL_lex_state = LEX_NORMAL;
4513 s = scan_formline(PL_bufptr);
4514 if (!PL_lex_formbrack)
a0d0e21e
LW
4515 goto rightbracket;
4516 OPERATOR(';');
79072805
LW
4517 }
4518
3280af22
NIS
4519 s = PL_bufptr;
4520 PL_oldoldbufptr = PL_oldbufptr;
4521 PL_oldbufptr = s;
463ee0b2
LW
4522
4523 retry:
5db06880 4524#ifdef PERL_MAD
cd81e915
NC
4525 if (PL_thistoken) {
4526 sv_free(PL_thistoken);
4527 PL_thistoken = 0;
5db06880 4528 }
cd81e915 4529 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4530#endif
378cc40b
LW
4531 switch (*s) {
4532 default:
7e2040f0 4533 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4534 goto keylookup;
b1fc3636
CJ
4535 {
4536 unsigned char c = *s;
4537 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4538 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4539 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4540 } else {
4541 d = PL_linestart;
4542 }
4543 *s = '\0';
4544 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4545 }
e929a76b
LW
4546 case 4:
4547 case 26:
4548 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4549 case 0:
5db06880
NC
4550#ifdef PERL_MAD
4551 if (PL_madskills)
cd81e915 4552 PL_faketokens = 0;
5db06880 4553#endif
3280af22
NIS
4554 if (!PL_rsfp) {
4555 PL_last_uni = 0;
4556 PL_last_lop = 0;
c5ee2135 4557 if (PL_lex_brackets) {
10edeb5d
JH
4558 yyerror((const char *)
4559 (PL_lex_formbrack
4560 ? "Format not terminated"
4561 : "Missing right curly or square bracket"));
c5ee2135 4562 }
4e553d73 4563 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4564 "### Tokener got EOF\n");
5f80b19c 4565 } );
79072805 4566 TOKEN(0);
463ee0b2 4567 }
3280af22 4568 if (s++ < PL_bufend)
a687059c 4569 goto retry; /* ignore stray nulls */
3280af22
NIS
4570 PL_last_uni = 0;
4571 PL_last_lop = 0;
4572 if (!PL_in_eval && !PL_preambled) {
4573 PL_preambled = TRUE;
5db06880
NC
4574#ifdef PERL_MAD
4575 if (PL_madskills)
cd81e915 4576 PL_faketokens = 1;
5db06880 4577#endif
5ab7ff98
NC
4578 if (PL_perldb) {
4579 /* Generate a string of Perl code to load the debugger.
4580 * If PERL5DB is set, it will return the contents of that,
4581 * otherwise a compile-time require of perl5db.pl. */
4582
4583 const char * const pdb = PerlEnv_getenv("PERL5DB");
4584
4585 if (pdb) {
4586 sv_setpv(PL_linestr, pdb);
4587 sv_catpvs(PL_linestr,";");
4588 } else {
4589 SETERRNO(0,SS_NORMAL);
4590 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4591 }
4592 } else
4593 sv_setpvs(PL_linestr,"");
c62eb204
NC
4594 if (PL_preambleav) {
4595 SV **svp = AvARRAY(PL_preambleav);
4596 SV **const end = svp + AvFILLp(PL_preambleav);
4597 while(svp <= end) {
4598 sv_catsv(PL_linestr, *svp);
4599 ++svp;
396482e1 4600 sv_catpvs(PL_linestr, ";");
91b7def8 4601 }
daba3364 4602 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4603 PL_preambleav = NULL;
91b7def8 4604 }
9f639728
FR
4605 if (PL_minus_E)
4606 sv_catpvs(PL_linestr,
4607 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4608 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4609 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4610 if (PL_minus_l)
396482e1 4611 sv_catpvs(PL_linestr,"chomp;");
3280af22 4612 if (PL_minus_a) {
3280af22 4613 if (PL_minus_F) {
3792a11b
NC
4614 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4615 || *PL_splitstr == '"')
3280af22 4616 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4617 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4618 else {
c8ef6a4b
NC
4619 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4620 bytes can be used as quoting characters. :-) */
dd374669 4621 const char *splits = PL_splitstr;
91d456ae 4622 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4623 do {
4624 /* Need to \ \s */
dd374669
AL
4625 if (*splits == '\\')
4626 sv_catpvn(PL_linestr, splits, 1);
4627 sv_catpvn(PL_linestr, splits, 1);
4628 } while (*splits++);
48c4c863
NC
4629 /* This loop will embed the trailing NUL of
4630 PL_linestr as the last thing it does before
4631 terminating. */
396482e1 4632 sv_catpvs(PL_linestr, ");");
54310121 4633 }
2304df62
AD
4634 }
4635 else
396482e1 4636 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4637 }
79072805 4638 }
396482e1 4639 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4640 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4641 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4642 PL_last_lop = PL_last_uni = NULL;
65269a95 4643 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4644 update_debugger_info(PL_linestr, NULL, 0);
79072805 4645 goto retry;
a687059c 4646 }
e929a76b 4647 do {
580561a3
Z
4648 fake_eof = 0;
4649 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4650 if (0) {
7e28d3af 4651 fake_eof:
f0e67a1d
Z
4652 fake_eof = LEX_FAKE_EOF;
4653 }
4654 PL_bufptr = PL_bufend;
17cc9359 4655 CopLINE_inc(PL_curcop);
f0e67a1d 4656 if (!lex_next_chunk(fake_eof)) {
17cc9359 4657 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4658 s = PL_bufptr;
4659 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4660 }
17cc9359 4661 CopLINE_dec(PL_curcop);
5db06880 4662#ifdef PERL_MAD
f0e67a1d 4663 if (!PL_rsfp)
cd81e915 4664 PL_realtokenstart = -1;
5db06880 4665#endif
f0e67a1d 4666 s = PL_bufptr;
7aa207d6
JH
4667 /* If it looks like the start of a BOM or raw UTF-16,
4668 * check if it in fact is. */
580561a3 4669 if (bof && PL_rsfp &&
7aa207d6
JH
4670 (*s == 0 ||
4671 *(U8*)s == 0xEF ||
4672 *(U8*)s >= 0xFE ||
4673 s[1] == 0)) {
eb160463 4674 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4675 if (bof) {
3280af22 4676 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4677 s = swallow_bom((U8*)s);
e929a76b 4678 }
378cc40b 4679 }
3280af22 4680 if (PL_doextract) {
a0d0e21e 4681 /* Incest with pod. */
5db06880
NC
4682#ifdef PERL_MAD
4683 if (PL_madskills)
cd81e915 4684 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4685#endif
01a57ef7 4686 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4687 sv_setpvs(PL_linestr, "");
3280af22
NIS
4688 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4689 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4690 PL_last_lop = PL_last_uni = NULL;
3280af22 4691 PL_doextract = FALSE;
a0d0e21e 4692 }
4e553d73 4693 }
85613cab
Z
4694 if (PL_rsfp)
4695 incline(s);
3280af22
NIS
4696 } while (PL_doextract);
4697 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4698 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4699 PL_last_lop = PL_last_uni = NULL;
57843af0 4700 if (CopLINE(PL_curcop) == 1) {
3280af22 4701 while (s < PL_bufend && isSPACE(*s))
79072805 4702 s++;
a0d0e21e 4703 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4704 s++;
5db06880
NC
4705#ifdef PERL_MAD
4706 if (PL_madskills)
cd81e915 4707 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4708#endif
bd61b366 4709 d = NULL;
3280af22 4710 if (!PL_in_eval) {
44a8e56a 4711 if (*s == '#' && *(s+1) == '!')
4712 d = s + 2;
4713#ifdef ALTERNATE_SHEBANG
4714 else {
bfed75c6 4715 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4716 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4717 d = s + (sizeof(as) - 1);
4718 }
4719#endif /* ALTERNATE_SHEBANG */
4720 }
4721 if (d) {
b8378b72 4722 char *ipath;
774d564b 4723 char *ipathend;
b8378b72 4724
774d564b 4725 while (isSPACE(*d))
b8378b72
CS
4726 d++;
4727 ipath = d;
774d564b 4728 while (*d && !isSPACE(*d))
4729 d++;
4730 ipathend = d;
4731
4732#ifdef ARG_ZERO_IS_SCRIPT
4733 if (ipathend > ipath) {
4734 /*
4735 * HP-UX (at least) sets argv[0] to the script name,
4736 * which makes $^X incorrect. And Digital UNIX and Linux,
4737 * at least, set argv[0] to the basename of the Perl
4738 * interpreter. So, having found "#!", we'll set it right.
4739 */
fafc274c
NC
4740 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4741 SVt_PV)); /* $^X */
774d564b 4742 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4743 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4744 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4745 SvSETMAGIC(x);
4746 }
556c1dec
JH
4747 else {
4748 STRLEN blen;
4749 STRLEN llen;
cfd0369c 4750 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4751 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4752 if (llen < blen) {
4753 bstart += blen - llen;
4754 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4755 sv_setpvn(x, ipath, ipathend - ipath);
4756 SvSETMAGIC(x);
4757 }
4758 }
4759 }
774d564b 4760 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4761 }
774d564b 4762#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4763
4764 /*
4765 * Look for options.
4766 */
748a9306 4767 d = instr(s,"perl -");
84e30d1a 4768 if (!d) {
748a9306 4769 d = instr(s,"perl");
84e30d1a
GS
4770#if defined(DOSISH)
4771 /* avoid getting into infinite loops when shebang
4772 * line contains "Perl" rather than "perl" */
4773 if (!d) {
4774 for (d = ipathend-4; d >= ipath; --d) {
4775 if ((*d == 'p' || *d == 'P')
4776 && !ibcmp(d, "perl", 4))
4777 {
4778 break;
4779 }
4780 }
4781 if (d < ipath)
bd61b366 4782 d = NULL;
84e30d1a
GS
4783 }
4784#endif
4785 }
44a8e56a 4786#ifdef ALTERNATE_SHEBANG
4787 /*
4788 * If the ALTERNATE_SHEBANG on this system starts with a
4789 * character that can be part of a Perl expression, then if
4790 * we see it but not "perl", we're probably looking at the
4791 * start of Perl code, not a request to hand off to some
4792 * other interpreter. Similarly, if "perl" is there, but
4793 * not in the first 'word' of the line, we assume the line
4794 * contains the start of the Perl program.
44a8e56a 4795 */
4796 if (d && *s != '#') {
f54cb97a 4797 const char *c = ipath;
44a8e56a 4798 while (*c && !strchr("; \t\r\n\f\v#", *c))
4799 c++;
4800 if (c < d)
bd61b366 4801 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4802 else
4803 *s = '#'; /* Don't try to parse shebang line */
4804 }
774d564b 4805#endif /* ALTERNATE_SHEBANG */
748a9306 4806 if (!d &&
44a8e56a 4807 *s == '#' &&
774d564b 4808 ipathend > ipath &&
3280af22 4809 !PL_minus_c &&
748a9306 4810 !instr(s,"indir") &&
3280af22 4811 instr(PL_origargv[0],"perl"))
748a9306 4812 {
27da23d5 4813 dVAR;
9f68db38 4814 char **newargv;
9f68db38 4815
774d564b 4816 *ipathend = '\0';
4817 s = ipathend + 1;
3280af22 4818 while (s < PL_bufend && isSPACE(*s))
9f68db38 4819 s++;
3280af22 4820 if (s < PL_bufend) {
d85f917e 4821 Newx(newargv,PL_origargc+3,char*);
9f68db38 4822 newargv[1] = s;
3280af22 4823 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4824 s++;
4825 *s = '\0';
3280af22 4826 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4827 }
4828 else
3280af22 4829 newargv = PL_origargv;
774d564b 4830 newargv[0] = ipath;
b35112e7 4831 PERL_FPU_PRE_EXEC
b4748376 4832 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4833 PERL_FPU_POST_EXEC
cea2e8a9 4834 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4835 }
748a9306 4836 if (d) {
c35e046a
AL
4837 while (*d && !isSPACE(*d))
4838 d++;
4839 while (SPACE_OR_TAB(*d))
4840 d++;
748a9306
LW
4841
4842 if (*d++ == '-') {
f54cb97a 4843 const bool switches_done = PL_doswitches;
fb993905
GA
4844 const U32 oldpdb = PL_perldb;
4845 const bool oldn = PL_minus_n;
4846 const bool oldp = PL_minus_p;
c7030b81 4847 const char *d1 = d;
fb993905 4848
8cc95fdb 4849 do {
4ba71d51
FC
4850 bool baduni = FALSE;
4851 if (*d1 == 'C') {
bd0ab00d
NC
4852 const char *d2 = d1 + 1;
4853 if (parse_unicode_opts((const char **)&d2)
4854 != PL_unicode)
4855 baduni = TRUE;
4ba71d51
FC
4856 }
4857 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4858 const char * const m = d1;
4859 while (*d1 && !isSPACE(*d1))
4860 d1++;
cea2e8a9 4861 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4862 (int)(d1 - m), m);
8cc95fdb 4863 }
c7030b81
NC
4864 d1 = moreswitches(d1);
4865 } while (d1);
f0b2cf55
YST
4866 if (PL_doswitches && !switches_done) {
4867 int argc = PL_origargc;
4868 char **argv = PL_origargv;
4869 do {
4870 argc--,argv++;
4871 } while (argc && argv[0][0] == '-' && argv[0][1]);
4872 init_argv_symbols(argc,argv);
4873 }
65269a95 4874 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4875 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4876 /* if we have already added "LINE: while (<>) {",
4877 we must not do it again */
748a9306 4878 {
76f68e9b 4879 sv_setpvs(PL_linestr, "");
3280af22
NIS
4880 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4881 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4882 PL_last_lop = PL_last_uni = NULL;
3280af22 4883 PL_preambled = FALSE;
65269a95 4884 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4885 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4886 goto retry;
4887 }
a0d0e21e 4888 }
79072805 4889 }
9f68db38 4890 }
79072805 4891 }
3280af22
NIS
4892 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4893 PL_bufptr = s;
4894 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4895 return yylex();
ae986130 4896 }
378cc40b 4897 goto retry;
4fdae800 4898 case '\r':
6a27c188 4899#ifdef PERL_STRICT_CR
cea2e8a9 4900 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4901 Perl_croak(aTHX_
cc507455 4902 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4903#endif
4fdae800 4904 case ' ': case '\t': case '\f': case 013:
5db06880 4905#ifdef PERL_MAD
cd81e915 4906 PL_realtokenstart = -1;
ac372eb8
RD
4907 if (!PL_thiswhite)
4908 PL_thiswhite = newSVpvs("");
4909 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4910#endif
ac372eb8 4911 s++;
378cc40b 4912 goto retry;
378cc40b 4913 case '#':
e929a76b 4914 case '\n':
5db06880 4915#ifdef PERL_MAD
cd81e915 4916 PL_realtokenstart = -1;
5db06880 4917 if (PL_madskills)
cd81e915 4918 PL_faketokens = 0;
5db06880 4919#endif
3280af22 4920 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4921 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4922 /* handle eval qq[#line 1 "foo"\n ...] */
4923 CopLINE_dec(PL_curcop);
4924 incline(s);
4925 }
5db06880
NC
4926 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4927 s = SKIPSPACE0(s);
4928 if (!PL_in_eval || PL_rsfp)
4929 incline(s);
4930 }
4931 else {
4932 d = s;
4933 while (d < PL_bufend && *d != '\n')
4934 d++;
4935 if (d < PL_bufend)
4936 d++;
4937 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4938 Perl_croak(aTHX_ "panic: input overflow");
4939#ifdef PERL_MAD
4940 if (PL_madskills)
cd81e915 4941 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4942#endif
4943 s = d;
4944 incline(s);
4945 }
3280af22
NIS
4946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4947 PL_bufptr = s;
4948 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4949 return yylex();
a687059c 4950 }
378cc40b 4951 }
a687059c 4952 else {
5db06880
NC
4953#ifdef PERL_MAD
4954 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4955 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4956 PL_faketokens = 0;
5db06880
NC
4957 s = SKIPSPACE0(s);
4958 TOKEN(PEG); /* make sure any #! line is accessible */
4959 }
4960 s = SKIPSPACE0(s);
4961 }
4962 else {
4963/* if (PL_madskills && PL_lex_formbrack) { */
4964 d = s;
4965 while (d < PL_bufend && *d != '\n')
4966 d++;
4967 if (d < PL_bufend)
4968 d++;
4969 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4970 Perl_croak(aTHX_ "panic: input overflow");
4971 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4972 if (!PL_thiswhite)
6b29d1f5 4973 PL_thiswhite = newSVpvs("");
5db06880 4974 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4975 sv_setpvs(PL_thiswhite, "");
cd81e915 4976 PL_faketokens = 0;
5db06880 4977 }
cd81e915 4978 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4979 }
4980 s = d;
4981/* }
4982 *s = '\0';
4983 PL_bufend = s; */
4984 }
4985#else
378cc40b 4986 *s = '\0';
3280af22 4987 PL_bufend = s;
5db06880 4988#endif
a687059c 4989 }
378cc40b
LW
4990 goto retry;
4991 case '-':
79072805 4992 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4993 I32 ftst = 0;
90771dc0 4994 char tmp;
e5edeb50 4995
378cc40b 4996 s++;
3280af22 4997 PL_bufptr = s;
748a9306
LW
4998 tmp = *s++;
4999
bf4acbe4 5000 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5001 s++;
5002
5003 if (strnEQ(s,"=>",2)) {
3280af22 5004 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5005 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5006 OPERATOR('-'); /* unary minus */
5007 }
3280af22 5008 PL_last_uni = PL_oldbufptr;
748a9306 5009 switch (tmp) {
e5edeb50
JH
5010 case 'r': ftst = OP_FTEREAD; break;
5011 case 'w': ftst = OP_FTEWRITE; break;
5012 case 'x': ftst = OP_FTEEXEC; break;
5013 case 'o': ftst = OP_FTEOWNED; break;
5014 case 'R': ftst = OP_FTRREAD; break;
5015 case 'W': ftst = OP_FTRWRITE; break;
5016 case 'X': ftst = OP_FTREXEC; break;
5017 case 'O': ftst = OP_FTROWNED; break;
5018 case 'e': ftst = OP_FTIS; break;
5019 case 'z': ftst = OP_FTZERO; break;
5020 case 's': ftst = OP_FTSIZE; break;
5021 case 'f': ftst = OP_FTFILE; break;
5022 case 'd': ftst = OP_FTDIR; break;
5023 case 'l': ftst = OP_FTLINK; break;
5024 case 'p': ftst = OP_FTPIPE; break;
5025 case 'S': ftst = OP_FTSOCK; break;
5026 case 'u': ftst = OP_FTSUID; break;
5027 case 'g': ftst = OP_FTSGID; break;
5028 case 'k': ftst = OP_FTSVTX; break;
5029 case 'b': ftst = OP_FTBLK; break;
5030 case 'c': ftst = OP_FTCHR; break;
5031 case 't': ftst = OP_FTTTY; break;
5032 case 'T': ftst = OP_FTTEXT; break;
5033 case 'B': ftst = OP_FTBINARY; break;
5034 case 'M': case 'A': case 'C':
fafc274c 5035 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5036 switch (tmp) {
5037 case 'M': ftst = OP_FTMTIME; break;
5038 case 'A': ftst = OP_FTATIME; break;
5039 case 'C': ftst = OP_FTCTIME; break;
5040 default: break;
5041 }
5042 break;
378cc40b 5043 default:
378cc40b
LW
5044 break;
5045 }
e5edeb50 5046 if (ftst) {
eb160463 5047 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5048 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5049 "### Saw file test %c\n", (int)tmp);
5f80b19c 5050 } );
e5edeb50
JH
5051 FTST(ftst);
5052 }
5053 else {
5054 /* Assume it was a minus followed by a one-letter named
5055 * subroutine call (or a -bareword), then. */
95c31fe3 5056 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5057 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5058 (int) tmp);
5f80b19c 5059 } );
3cf7b4c4 5060 s = --PL_bufptr;
e5edeb50 5061 }
378cc40b 5062 }
90771dc0
NC
5063 {
5064 const char tmp = *s++;
5065 if (*s == tmp) {
5066 s++;
5067 if (PL_expect == XOPERATOR)
5068 TERM(POSTDEC);
5069 else
5070 OPERATOR(PREDEC);
5071 }
5072 else if (*s == '>') {
5073 s++;
29595ff2 5074 s = SKIPSPACE1(s);
90771dc0
NC
5075 if (isIDFIRST_lazy_if(s,UTF)) {
5076 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5077 TOKEN(ARROW);
5078 }
5079 else if (*s == '$')
5080 OPERATOR(ARROW);
5081 else
5082 TERM(ARROW);
5083 }
3280af22 5084 if (PL_expect == XOPERATOR)
90771dc0
NC
5085 Aop(OP_SUBTRACT);
5086 else {
5087 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5088 check_uni();
5089 OPERATOR('-'); /* unary minus */
79072805 5090 }
2f3197b3 5091 }
79072805 5092
378cc40b 5093 case '+':
90771dc0
NC
5094 {
5095 const char tmp = *s++;
5096 if (*s == tmp) {
5097 s++;
5098 if (PL_expect == XOPERATOR)
5099 TERM(POSTINC);
5100 else
5101 OPERATOR(PREINC);
5102 }
3280af22 5103 if (PL_expect == XOPERATOR)
90771dc0
NC
5104 Aop(OP_ADD);
5105 else {
5106 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5107 check_uni();
5108 OPERATOR('+');
5109 }
2f3197b3 5110 }
a687059c 5111
378cc40b 5112 case '*':
3280af22
NIS
5113 if (PL_expect != XOPERATOR) {
5114 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5115 PL_expect = XOPERATOR;
5116 force_ident(PL_tokenbuf, '*');
5117 if (!*PL_tokenbuf)
a0d0e21e 5118 PREREF('*');
79072805 5119 TERM('*');
a687059c 5120 }
79072805
LW
5121 s++;
5122 if (*s == '*') {
a687059c 5123 s++;
79072805 5124 PWop(OP_POW);
a687059c 5125 }
79072805
LW
5126 Mop(OP_MULTIPLY);
5127
378cc40b 5128 case '%':
3280af22 5129 if (PL_expect == XOPERATOR) {
bbce6d69 5130 ++s;
5131 Mop(OP_MODULO);
a687059c 5132 }
3280af22 5133 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5134 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5135 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5136 if (!PL_tokenbuf[1]) {
bbce6d69 5137 PREREF('%');
a687059c 5138 }
3280af22 5139 PL_pending_ident = '%';
bbce6d69 5140 TERM('%');
a687059c 5141
378cc40b 5142 case '^':
79072805 5143 s++;
a0d0e21e 5144 BOop(OP_BIT_XOR);
79072805 5145 case '[':
3280af22 5146 PL_lex_brackets++;
df3467db
IG
5147 {
5148 const char tmp = *s++;
5149 OPERATOR(tmp);
5150 }
378cc40b 5151 case '~':
0d863452 5152 if (s[1] == '~'
3e7dd34d 5153 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5154 {
5155 s += 2;
5156 Eop(OP_SMARTMATCH);
5157 }
378cc40b 5158 case ',':
90771dc0
NC
5159 {
5160 const char tmp = *s++;
5161 OPERATOR(tmp);
5162 }
a0d0e21e
LW
5163 case ':':
5164 if (s[1] == ':') {
5165 len = 0;
0bfa2a8a 5166 goto just_a_word_zero_gv;
a0d0e21e
LW
5167 }
5168 s++;
09bef843
SB
5169 switch (PL_expect) {
5170 OP *attrs;
5db06880
NC
5171#ifdef PERL_MAD
5172 I32 stuffstart;
5173#endif
09bef843
SB
5174 case XOPERATOR:
5175 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5176 break;
5177 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5178 if (*s == '=') {
5179 deprecate(":= for an empty attribute list");
5180 }
09bef843
SB
5181 goto grabattrs;
5182 case XATTRBLOCK:
5183 PL_expect = XBLOCK;
5184 goto grabattrs;
5185 case XATTRTERM:
5186 PL_expect = XTERMBLOCK;
5187 grabattrs:
5db06880
NC
5188#ifdef PERL_MAD
5189 stuffstart = s - SvPVX(PL_linestr) - 1;
5190#endif
29595ff2 5191 s = PEEKSPACE(s);
5f66b61c 5192 attrs = NULL;
7e2040f0 5193 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5194 I32 tmp;
5cc237b8 5195 SV *sv;
09bef843 5196 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5197 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5198 if (tmp < 0) tmp = -tmp;
5199 switch (tmp) {
5200 case KEY_or:
5201 case KEY_and:
5202 case KEY_for:
11baf631 5203 case KEY_foreach:
f9829d6b
GS
5204 case KEY_unless:
5205 case KEY_if:
5206 case KEY_while:
5207 case KEY_until:
5208 goto got_attrs;
5209 default:
5210 break;
5211 }
5212 }
5cc237b8 5213 sv = newSVpvn(s, len);
09bef843
SB
5214 if (*d == '(') {
5215 d = scan_str(d,TRUE,TRUE);
5216 if (!d) {
09bef843
SB
5217 /* MUST advance bufptr here to avoid bogus
5218 "at end of line" context messages from yyerror().
5219 */
5220 PL_bufptr = s + len;
5221 yyerror("Unterminated attribute parameter in attribute list");
5222 if (attrs)
5223 op_free(attrs);
5cc237b8 5224 sv_free(sv);
bbf60fe6 5225 return REPORT(0); /* EOF indicator */
09bef843
SB
5226 }
5227 }
5228 if (PL_lex_stuff) {
09bef843
SB
5229 sv_catsv(sv, PL_lex_stuff);
5230 attrs = append_elem(OP_LIST, attrs,
5231 newSVOP(OP_CONST, 0, sv));
5232 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5233 PL_lex_stuff = NULL;
09bef843
SB
5234 }
5235 else {
5cc237b8
BS
5236 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5237 sv_free(sv);
1108974d 5238 if (PL_in_my == KEY_our) {
df9a6019 5239 deprecate(":unique");
1108974d 5240 }
bfed75c6 5241 else
371fce9b
DM
5242 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5243 }
5244
d3cea301
SB
5245 /* NOTE: any CV attrs applied here need to be part of
5246 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5247 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5248 sv_free(sv);
78f9721b 5249 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5250 }
5251 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5252 sv_free(sv);
8e5dadda 5253 deprecate(":locked");
5cc237b8
BS
5254 }
5255 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5256 sv_free(sv);
78f9721b 5257 CvMETHOD_on(PL_compcv);
5cc237b8 5258 }
78f9721b
SM
5259 /* After we've set the flags, it could be argued that
5260 we don't need to do the attributes.pm-based setting
5261 process, and shouldn't bother appending recognized
d3cea301
SB
5262 flags. To experiment with that, uncomment the
5263 following "else". (Note that's already been
5264 uncommented. That keeps the above-applied built-in
5265 attributes from being intercepted (and possibly
5266 rejected) by a package's attribute routines, but is
5267 justified by the performance win for the common case
5268 of applying only built-in attributes.) */
0256094b 5269 else
78f9721b
SM
5270 attrs = append_elem(OP_LIST, attrs,
5271 newSVOP(OP_CONST, 0,
5cc237b8 5272 sv));
09bef843 5273 }
29595ff2 5274 s = PEEKSPACE(d);
0120eecf 5275 if (*s == ':' && s[1] != ':')
29595ff2 5276 s = PEEKSPACE(s+1);
0120eecf
GS
5277 else if (s == d)
5278 break; /* require real whitespace or :'s */
29595ff2 5279 /* XXX losing whitespace on sequential attributes here */
09bef843 5280 }
90771dc0
NC
5281 {
5282 const char tmp
5283 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5284 if (*s != ';' && *s != '}' && *s != tmp
5285 && (tmp != '=' || *s != ')')) {
5286 const char q = ((*s == '\'') ? '"' : '\'');
5287 /* If here for an expression, and parsed no attrs, back
5288 off. */
5289 if (tmp == '=' && !attrs) {
5290 s = PL_bufptr;
5291 break;
5292 }
5293 /* MUST advance bufptr here to avoid bogus "at end of line"
5294 context messages from yyerror().
5295 */
5296 PL_bufptr = s;
10edeb5d
JH
5297 yyerror( (const char *)
5298 (*s
5299 ? Perl_form(aTHX_ "Invalid separator character "
5300 "%c%c%c in attribute list", q, *s, q)
5301 : "Unterminated attribute list" ) );
90771dc0
NC
5302 if (attrs)
5303 op_free(attrs);
5304 OPERATOR(':');
09bef843 5305 }
09bef843 5306 }
f9829d6b 5307 got_attrs:
09bef843 5308 if (attrs) {
cd81e915 5309 start_force(PL_curforce);
9ded7720 5310 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5311 CURMAD('_', PL_nextwhite);
89122651 5312 force_next(THING);
5db06880
NC
5313 }
5314#ifdef PERL_MAD
5315 if (PL_madskills) {
cd81e915 5316 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5317 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5318 }
5db06880 5319#endif
09bef843
SB
5320 TOKEN(COLONATTR);
5321 }
a0d0e21e 5322 OPERATOR(':');
8990e307
LW
5323 case '(':
5324 s++;
3280af22
NIS
5325 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5326 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5327 else
3280af22 5328 PL_expect = XTERM;
29595ff2 5329 s = SKIPSPACE1(s);
a0d0e21e 5330 TOKEN('(');
378cc40b 5331 case ';':
f4dd75d9 5332 CLINE;
90771dc0
NC
5333 {
5334 const char tmp = *s++;
5335 OPERATOR(tmp);
5336 }
378cc40b 5337 case ')':
90771dc0
NC
5338 {
5339 const char tmp = *s++;
29595ff2 5340 s = SKIPSPACE1(s);
90771dc0
NC
5341 if (*s == '{')
5342 PREBLOCK(tmp);
5343 TERM(tmp);
5344 }
79072805
LW
5345 case ']':
5346 s++;
3280af22 5347 if (PL_lex_brackets <= 0)
d98d5fff 5348 yyerror("Unmatched right square bracket");
463ee0b2 5349 else
3280af22
NIS
5350 --PL_lex_brackets;
5351 if (PL_lex_state == LEX_INTERPNORMAL) {
5352 if (PL_lex_brackets == 0) {
02255c60
FC
5353 if (*s == '-' && s[1] == '>')
5354 PL_lex_state = LEX_INTERPENDMAYBE;
5355 else if (*s != '[' && *s != '{')
3280af22 5356 PL_lex_state = LEX_INTERPEND;
79072805
LW
5357 }
5358 }
4633a7c4 5359 TERM(']');
79072805
LW
5360 case '{':
5361 leftbracket:
79072805 5362 s++;
3280af22 5363 if (PL_lex_brackets > 100) {
8edd5f42 5364 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5365 }
3280af22 5366 switch (PL_expect) {
a0d0e21e 5367 case XTERM:
3280af22 5368 if (PL_lex_formbrack) {
a0d0e21e
LW
5369 s--;
5370 PRETERMBLOCK(DO);
5371 }
3280af22
NIS
5372 if (PL_oldoldbufptr == PL_last_lop)
5373 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5374 else
3280af22 5375 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5376 OPERATOR(HASHBRACK);
a0d0e21e 5377 case XOPERATOR:
bf4acbe4 5378 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5379 s++;
44a8e56a 5380 d = s;
3280af22
NIS
5381 PL_tokenbuf[0] = '\0';
5382 if (d < PL_bufend && *d == '-') {
5383 PL_tokenbuf[0] = '-';
44a8e56a 5384 d++;
bf4acbe4 5385 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5386 d++;
5387 }
7e2040f0 5388 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5389 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5390 FALSE, &len);
bf4acbe4 5391 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5392 d++;
5393 if (*d == '}') {
f54cb97a 5394 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5395 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5396 if (minus)
5397 force_next('-');
748a9306
LW
5398 }
5399 }
5400 /* FALL THROUGH */
09bef843 5401 case XATTRBLOCK:
748a9306 5402 case XBLOCK:
3280af22
NIS
5403 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5404 PL_expect = XSTATE;
a0d0e21e 5405 break;
09bef843 5406 case XATTRTERM:
a0d0e21e 5407 case XTERMBLOCK:
3280af22
NIS
5408 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5409 PL_expect = XSTATE;
a0d0e21e
LW
5410 break;
5411 default: {
f54cb97a 5412 const char *t;
3280af22
NIS
5413 if (PL_oldoldbufptr == PL_last_lop)
5414 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5415 else
3280af22 5416 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5417 s = SKIPSPACE1(s);
8452ff4b
SB
5418 if (*s == '}') {
5419 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5420 PL_expect = XTERM;
5421 /* This hack is to get the ${} in the message. */
5422 PL_bufptr = s+1;
5423 yyerror("syntax error");
5424 break;
5425 }
a0d0e21e 5426 OPERATOR(HASHBRACK);
8452ff4b 5427 }
b8a4b1be
GS
5428 /* This hack serves to disambiguate a pair of curlies
5429 * as being a block or an anon hash. Normally, expectation
5430 * determines that, but in cases where we're not in a
5431 * position to expect anything in particular (like inside
5432 * eval"") we have to resolve the ambiguity. This code
5433 * covers the case where the first term in the curlies is a
5434 * quoted string. Most other cases need to be explicitly
a0288114 5435 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5436 * curly in order to force resolution as an anon hash.
5437 *
5438 * XXX should probably propagate the outer expectation
5439 * into eval"" to rely less on this hack, but that could
5440 * potentially break current behavior of eval"".
5441 * GSAR 97-07-21
5442 */
5443 t = s;
5444 if (*s == '\'' || *s == '"' || *s == '`') {
5445 /* common case: get past first string, handling escapes */
3280af22 5446 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5447 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5448 t++;
5449 t++;
a0d0e21e 5450 }
b8a4b1be 5451 else if (*s == 'q') {
3280af22 5452 if (++t < PL_bufend
b8a4b1be 5453 && (!isALNUM(*t)
3280af22 5454 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5455 && !isALNUM(*t))))
5456 {
abc667d1 5457 /* skip q//-like construct */
f54cb97a 5458 const char *tmps;
b8a4b1be
GS
5459 char open, close, term;
5460 I32 brackets = 1;
5461
3280af22 5462 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5463 t++;
abc667d1
DM
5464 /* check for q => */
5465 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5466 OPERATOR(HASHBRACK);
5467 }
b8a4b1be
GS
5468 term = *t;
5469 open = term;
5470 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5471 term = tmps[5];
5472 close = term;
5473 if (open == close)
3280af22
NIS
5474 for (t++; t < PL_bufend; t++) {
5475 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5476 t++;
6d07e5e9 5477 else if (*t == open)
b8a4b1be
GS
5478 break;
5479 }
abc667d1 5480 else {
3280af22
NIS
5481 for (t++; t < PL_bufend; t++) {
5482 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5483 t++;
6d07e5e9 5484 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5485 break;
5486 else if (*t == open)
5487 brackets++;
5488 }
abc667d1
DM
5489 }
5490 t++;
b8a4b1be 5491 }
abc667d1
DM
5492 else
5493 /* skip plain q word */
5494 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5495 t += UTF8SKIP(t);
a0d0e21e 5496 }
7e2040f0 5497 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5498 t += UTF8SKIP(t);
7e2040f0 5499 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5500 t += UTF8SKIP(t);
a0d0e21e 5501 }
3280af22 5502 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5503 t++;
b8a4b1be
GS
5504 /* if comma follows first term, call it an anon hash */
5505 /* XXX it could be a comma expression with loop modifiers */
3280af22 5506 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5507 || (*t == '=' && t[1] == '>')))
a0d0e21e 5508 OPERATOR(HASHBRACK);
3280af22 5509 if (PL_expect == XREF)
4e4e412b 5510 PL_expect = XTERM;
a0d0e21e 5511 else {
3280af22
NIS
5512 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5513 PL_expect = XSTATE;
a0d0e21e 5514 }
8990e307 5515 }
a0d0e21e 5516 break;
463ee0b2 5517 }
6154021b 5518 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5519 if (isSPACE(*s) || *s == '#')
3280af22 5520 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5521 TOKEN('{');
378cc40b 5522 case '}':
79072805
LW
5523 rightbracket:
5524 s++;
3280af22 5525 if (PL_lex_brackets <= 0)
d98d5fff 5526 yyerror("Unmatched right curly bracket");
463ee0b2 5527 else
3280af22 5528 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5529 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5530 PL_lex_formbrack = 0;
5531 if (PL_lex_state == LEX_INTERPNORMAL) {
5532 if (PL_lex_brackets == 0) {
9059aa12
LW
5533 if (PL_expect & XFAKEBRACK) {
5534 PL_expect &= XENUMMASK;
3280af22
NIS
5535 PL_lex_state = LEX_INTERPEND;
5536 PL_bufptr = s;
5db06880
NC
5537#if 0
5538 if (PL_madskills) {
cd81e915 5539 if (!PL_thiswhite)
6b29d1f5 5540 PL_thiswhite = newSVpvs("");
76f68e9b 5541 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5542 }
5543#endif
cea2e8a9 5544 return yylex(); /* ignore fake brackets */
79072805 5545 }
fa83b5b6 5546 if (*s == '-' && s[1] == '>')
3280af22 5547 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5548 else if (*s != '[' && *s != '{')
3280af22 5549 PL_lex_state = LEX_INTERPEND;
79072805
LW
5550 }
5551 }
9059aa12
LW
5552 if (PL_expect & XFAKEBRACK) {
5553 PL_expect &= XENUMMASK;
3280af22 5554 PL_bufptr = s;
cea2e8a9 5555 return yylex(); /* ignore fake brackets */
748a9306 5556 }
cd81e915 5557 start_force(PL_curforce);
5db06880
NC
5558 if (PL_madskills) {
5559 curmad('X', newSVpvn(s-1,1));
cd81e915 5560 CURMAD('_', PL_thiswhite);
5db06880 5561 }
79072805 5562 force_next('}');
5db06880 5563#ifdef PERL_MAD
cd81e915 5564 if (!PL_thistoken)
6b29d1f5 5565 PL_thistoken = newSVpvs("");
5db06880 5566#endif
79072805 5567 TOKEN(';');
378cc40b
LW
5568 case '&':
5569 s++;
90771dc0 5570 if (*s++ == '&')
a0d0e21e 5571 AOPERATOR(ANDAND);
378cc40b 5572 s--;
3280af22 5573 if (PL_expect == XOPERATOR) {
041457d9
DM
5574 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5575 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5576 {
57843af0 5577 CopLINE_dec(PL_curcop);
f1f66076 5578 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5579 CopLINE_inc(PL_curcop);
463ee0b2 5580 }
79072805 5581 BAop(OP_BIT_AND);
463ee0b2 5582 }
79072805 5583
3280af22
NIS
5584 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5585 if (*PL_tokenbuf) {
5586 PL_expect = XOPERATOR;
5587 force_ident(PL_tokenbuf, '&');
463ee0b2 5588 }
79072805
LW
5589 else
5590 PREREF('&');
6154021b 5591 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5592 TERM('&');
5593
378cc40b
LW
5594 case '|':
5595 s++;
90771dc0 5596 if (*s++ == '|')
a0d0e21e 5597 AOPERATOR(OROR);
378cc40b 5598 s--;
79072805 5599 BOop(OP_BIT_OR);
378cc40b
LW
5600 case '=':
5601 s++;
748a9306 5602 {
90771dc0
NC
5603 const char tmp = *s++;
5604 if (tmp == '=')
5605 Eop(OP_EQ);
5606 if (tmp == '>')
5607 OPERATOR(',');
5608 if (tmp == '~')
5609 PMop(OP_MATCH);
5610 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5611 && strchr("+-*/%.^&|<",tmp))
5612 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5613 "Reversed %c= operator",(int)tmp);
5614 s--;
5615 if (PL_expect == XSTATE && isALPHA(tmp) &&
5616 (s == PL_linestart+1 || s[-2] == '\n') )
5617 {
5618 if (PL_in_eval && !PL_rsfp) {
5619 d = PL_bufend;
5620 while (s < d) {
5621 if (*s++ == '\n') {
5622 incline(s);
5623 if (strnEQ(s,"=cut",4)) {
5624 s = strchr(s,'\n');
5625 if (s)
5626 s++;
5627 else
5628 s = d;
5629 incline(s);
5630 goto retry;
5631 }
5632 }
a5f75d66 5633 }
90771dc0 5634 goto retry;
a5f75d66 5635 }
5db06880
NC
5636#ifdef PERL_MAD
5637 if (PL_madskills) {
cd81e915 5638 if (!PL_thiswhite)
6b29d1f5 5639 PL_thiswhite = newSVpvs("");
cd81e915 5640 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5641 PL_bufend - PL_linestart);
5642 }
5643#endif
90771dc0
NC
5644 s = PL_bufend;
5645 PL_doextract = TRUE;
5646 goto retry;
a5f75d66 5647 }
a0d0e21e 5648 }
3280af22 5649 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5650 const char *t = s;
51882d45 5651#ifdef PERL_STRICT_CR
c35e046a 5652 while (SPACE_OR_TAB(*t))
51882d45 5653#else
c35e046a 5654 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5655#endif
c35e046a 5656 t++;
a0d0e21e
LW
5657 if (*t == '\n' || *t == '#') {
5658 s--;
3280af22 5659 PL_expect = XBLOCK;
a0d0e21e
LW
5660 goto leftbracket;
5661 }
79072805 5662 }
6154021b 5663 pl_yylval.ival = 0;
a0d0e21e 5664 OPERATOR(ASSIGNOP);
378cc40b
LW
5665 case '!':
5666 s++;
90771dc0
NC
5667 {
5668 const char tmp = *s++;
5669 if (tmp == '=') {
5670 /* was this !=~ where !~ was meant?
5671 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5672
5673 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5674 const char *t = s+1;
5675
5676 while (t < PL_bufend && isSPACE(*t))
5677 ++t;
5678
5679 if (*t == '/' || *t == '?' ||
5680 ((*t == 'm' || *t == 's' || *t == 'y')
5681 && !isALNUM(t[1])) ||
5682 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5683 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5684 "!=~ should be !~");
5685 }
5686 Eop(OP_NE);
5687 }
5688 if (tmp == '~')
5689 PMop(OP_NOT);
5690 }
378cc40b
LW
5691 s--;
5692 OPERATOR('!');
5693 case '<':
3280af22 5694 if (PL_expect != XOPERATOR) {
93a17b20 5695 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5696 check_uni();
79072805
LW
5697 if (s[1] == '<')
5698 s = scan_heredoc(s);
5699 else
5700 s = scan_inputsymbol(s);
5701 TERM(sublex_start());
378cc40b
LW
5702 }
5703 s++;
90771dc0
NC
5704 {
5705 char tmp = *s++;
5706 if (tmp == '<')
5707 SHop(OP_LEFT_SHIFT);
5708 if (tmp == '=') {
5709 tmp = *s++;
5710 if (tmp == '>')
5711 Eop(OP_NCMP);
5712 s--;
5713 Rop(OP_LE);
5714 }
395c3793 5715 }
378cc40b 5716 s--;
79072805 5717 Rop(OP_LT);
378cc40b
LW
5718 case '>':
5719 s++;
90771dc0
NC
5720 {
5721 const char tmp = *s++;
5722 if (tmp == '>')
5723 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5724 else if (tmp == '=')
90771dc0
NC
5725 Rop(OP_GE);
5726 }
378cc40b 5727 s--;
79072805 5728 Rop(OP_GT);
378cc40b
LW
5729
5730 case '$':
bbce6d69 5731 CLINE;
5732
3280af22
NIS
5733 if (PL_expect == XOPERATOR) {
5734 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5735 return deprecate_commaless_var_list();
a0d0e21e 5736 }
8990e307 5737 }
a0d0e21e 5738
c0b977fd 5739 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5740 PL_tokenbuf[0] = '@';
376b8730
SM
5741 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5742 sizeof PL_tokenbuf - 1, FALSE);
5743 if (PL_expect == XOPERATOR)
5744 no_op("Array length", s);
3280af22 5745 if (!PL_tokenbuf[1])
a0d0e21e 5746 PREREF(DOLSHARP);
3280af22
NIS
5747 PL_expect = XOPERATOR;
5748 PL_pending_ident = '#';
463ee0b2 5749 TOKEN(DOLSHARP);
79072805 5750 }
bbce6d69 5751
3280af22 5752 PL_tokenbuf[0] = '$';
376b8730
SM
5753 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5754 sizeof PL_tokenbuf - 1, FALSE);
5755 if (PL_expect == XOPERATOR)
5756 no_op("Scalar", s);
3280af22
NIS
5757 if (!PL_tokenbuf[1]) {
5758 if (s == PL_bufend)
bbce6d69 5759 yyerror("Final $ should be \\$ or $name");
5760 PREREF('$');
8990e307 5761 }
a0d0e21e 5762
bbce6d69 5763 /* This kludge not intended to be bulletproof. */
3280af22 5764 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5765 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5766 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5767 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5768 TERM(THING);
5769 }
5770
ff68c719 5771 d = s;
90771dc0
NC
5772 {
5773 const char tmp = *s;
ae28bb2a 5774 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5775 s = SKIPSPACE1(s);
ff68c719 5776
90771dc0
NC
5777 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5778 && intuit_more(s)) {
5779 if (*s == '[') {
5780 PL_tokenbuf[0] = '@';
5781 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5782 char *t = s+1;
5783
5784 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5785 t++;
90771dc0 5786 if (*t++ == ',') {
29595ff2 5787 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5788 while (t < PL_bufend && *t != ']')
5789 t++;
9014280d 5790 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5791 "Multidimensional syntax %.*s not supported",
36c7798d 5792 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5793 }
748a9306 5794 }
93a17b20 5795 }
90771dc0
NC
5796 else if (*s == '{') {
5797 char *t;
5798 PL_tokenbuf[0] = '%';
5799 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5800 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5801 {
5802 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5803 do {
5804 t++;
5805 } while (isSPACE(*t));
90771dc0 5806 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5807 STRLEN len;
90771dc0 5808 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5809 &len);
c35e046a
AL
5810 while (isSPACE(*t))
5811 t++;
780a5241 5812 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5813 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5814 "You need to quote \"%s\"",
5815 tmpbuf);
5816 }
5817 }
5818 }
93a17b20 5819 }
bbce6d69 5820
90771dc0
NC
5821 PL_expect = XOPERATOR;
5822 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5823 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5824 if (!islop || PL_last_lop_op == OP_GREPSTART)
5825 PL_expect = XOPERATOR;
5826 else if (strchr("$@\"'`q", *s))
5827 PL_expect = XTERM; /* e.g. print $fh "foo" */
5828 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5829 PL_expect = XTERM; /* e.g. print $fh &sub */
5830 else if (isIDFIRST_lazy_if(s,UTF)) {
5831 char tmpbuf[sizeof PL_tokenbuf];
5832 int t2;
5833 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5834 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5835 /* binary operators exclude handle interpretations */
5836 switch (t2) {
5837 case -KEY_x:
5838 case -KEY_eq:
5839 case -KEY_ne:
5840 case -KEY_gt:
5841 case -KEY_lt:
5842 case -KEY_ge:
5843 case -KEY_le:
5844 case -KEY_cmp:
5845 break;
5846 default:
5847 PL_expect = XTERM; /* e.g. print $fh length() */
5848 break;
5849 }
5850 }
5851 else {
5852 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5853 }
5854 }
90771dc0
NC
5855 else if (isDIGIT(*s))
5856 PL_expect = XTERM; /* e.g. print $fh 3 */
5857 else if (*s == '.' && isDIGIT(s[1]))
5858 PL_expect = XTERM; /* e.g. print $fh .3 */
5859 else if ((*s == '?' || *s == '-' || *s == '+')
5860 && !isSPACE(s[1]) && s[1] != '=')
5861 PL_expect = XTERM; /* e.g. print $fh -1 */
5862 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5863 && s[1] != '/')
5864 PL_expect = XTERM; /* e.g. print $fh /.../
5865 XXX except DORDOR operator
5866 */
5867 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5868 && s[2] != '=')
5869 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5870 }
bbce6d69 5871 }
3280af22 5872 PL_pending_ident = '$';
79072805 5873 TOKEN('$');
378cc40b
LW
5874
5875 case '@':
3280af22 5876 if (PL_expect == XOPERATOR)
bbce6d69 5877 no_op("Array", s);
3280af22
NIS
5878 PL_tokenbuf[0] = '@';
5879 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5880 if (!PL_tokenbuf[1]) {
bbce6d69 5881 PREREF('@');
5882 }
3280af22 5883 if (PL_lex_state == LEX_NORMAL)
29595ff2 5884 s = SKIPSPACE1(s);
3280af22 5885 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5886 if (*s == '{')
3280af22 5887 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5888
5889 /* Warn about @ where they meant $. */
041457d9
DM
5890 if (*s == '[' || *s == '{') {
5891 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5892 const char *t = s + 1;
7e2040f0 5893 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5894 t++;
5895 if (*t == '}' || *t == ']') {
5896 t++;
29595ff2 5897 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5898 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5899 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5900 (int)(t-PL_bufptr), PL_bufptr,
5901 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5902 }
93a17b20
LW
5903 }
5904 }
463ee0b2 5905 }
3280af22 5906 PL_pending_ident = '@';
79072805 5907 TERM('@');
378cc40b 5908
c963b151 5909 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5910 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5911 s += 2;
5912 AOPERATOR(DORDOR);
5913 }
c963b151 5914 case '?': /* may either be conditional or pattern */
be25f609 5915 if (PL_expect == XOPERATOR) {
90771dc0 5916 char tmp = *s++;
c963b151 5917 if(tmp == '?') {
be25f609 5918 OPERATOR('?');
c963b151
BD
5919 }
5920 else {
5921 tmp = *s++;
5922 if(tmp == '/') {
5923 /* A // operator. */
5924 AOPERATOR(DORDOR);
5925 }
5926 else {
5927 s--;
5928 Mop(OP_DIVIDE);
5929 }
5930 }
5931 }
5932 else {
5933 /* Disable warning on "study /blah/" */
5934 if (PL_oldoldbufptr == PL_last_uni
5935 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5936 || memNE(PL_last_uni, "study", 5)
5937 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5938 ))
5939 check_uni();
5940 s = scan_pat(s,OP_MATCH);
5941 TERM(sublex_start());
5942 }
378cc40b
LW
5943
5944 case '.':
51882d45
GS
5945 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5946#ifdef PERL_STRICT_CR
5947 && s[1] == '\n'
5948#else
5949 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5950#endif
5951 && (s == PL_linestart || s[-1] == '\n') )
5952 {
3280af22
NIS
5953 PL_lex_formbrack = 0;
5954 PL_expect = XSTATE;
79072805
LW
5955 goto rightbracket;
5956 }
be25f609 5957 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5958 s += 3;
5959 OPERATOR(YADAYADA);
5960 }
3280af22 5961 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5962 char tmp = *s++;
a687059c
LW
5963 if (*s == tmp) {
5964 s++;
2f3197b3
LW
5965 if (*s == tmp) {
5966 s++;
6154021b 5967 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5968 }
5969 else
6154021b 5970 pl_yylval.ival = 0;
378cc40b 5971 OPERATOR(DOTDOT);
a687059c 5972 }
79072805 5973 Aop(OP_CONCAT);
378cc40b
LW
5974 }
5975 /* FALL THROUGH */
5976 case '0': case '1': case '2': case '3': case '4':
5977 case '5': case '6': case '7': case '8': case '9':
6154021b 5978 s = scan_num(s, &pl_yylval);
931e0695 5979 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5980 if (PL_expect == XOPERATOR)
8990e307 5981 no_op("Number",s);
79072805
LW
5982 TERM(THING);
5983
5984 case '\'':
5db06880 5985 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5986 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5987 if (PL_expect == XOPERATOR) {
5988 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5989 return deprecate_commaless_var_list();
a0d0e21e 5990 }
463ee0b2 5991 else
8990e307 5992 no_op("String",s);
463ee0b2 5993 }
79072805 5994 if (!s)
d4c19fe8 5995 missingterm(NULL);
6154021b 5996 pl_yylval.ival = OP_CONST;
79072805
LW
5997 TERM(sublex_start());
5998
5999 case '"':
5db06880 6000 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6001 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6002 if (PL_expect == XOPERATOR) {
6003 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6004 return deprecate_commaless_var_list();
a0d0e21e 6005 }
463ee0b2 6006 else
8990e307 6007 no_op("String",s);
463ee0b2 6008 }
79072805 6009 if (!s)
d4c19fe8 6010 missingterm(NULL);
6154021b 6011 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6012 /* FIXME. I think that this can be const if char *d is replaced by
6013 more localised variables. */
3280af22 6014 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6015 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6016 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6017 break;
6018 }
6019 }
79072805
LW
6020 TERM(sublex_start());
6021
6022 case '`':
5db06880 6023 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6024 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6025 if (PL_expect == XOPERATOR)
8990e307 6026 no_op("Backticks",s);
79072805 6027 if (!s)
d4c19fe8 6028 missingterm(NULL);
9b201d7d 6029 readpipe_override();
79072805
LW
6030 TERM(sublex_start());
6031
6032 case '\\':
6033 s++;
a2a5de95
NC
6034 if (PL_lex_inwhat && isDIGIT(*s))
6035 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6036 *s, *s);
3280af22 6037 if (PL_expect == XOPERATOR)
8990e307 6038 no_op("Backslash",s);
79072805
LW
6039 OPERATOR(REFGEN);
6040
a7cb1f99 6041 case 'v':
e526c9e6 6042 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6043 char *start = s + 2;
dd629d5b 6044 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6045 start++;
6046 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6047 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6048 TERM(THING);
6049 }
e526c9e6 6050 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6051 else if (!isALPHA(*start) && (PL_expect == XTERM
6052 || PL_expect == XREF || PL_expect == XSTATE
6053 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6054 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6055 if (!gv) {
6154021b 6056 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6057 TERM(THING);
6058 }
6059 }
a7cb1f99
GS
6060 }
6061 goto keylookup;
79072805 6062 case 'x':
3280af22 6063 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6064 s++;
6065 Mop(OP_REPEAT);
2f3197b3 6066 }
79072805
LW
6067 goto keylookup;
6068
378cc40b 6069 case '_':
79072805
LW
6070 case 'a': case 'A':
6071 case 'b': case 'B':
6072 case 'c': case 'C':
6073 case 'd': case 'D':
6074 case 'e': case 'E':
6075 case 'f': case 'F':
6076 case 'g': case 'G':
6077 case 'h': case 'H':
6078 case 'i': case 'I':
6079 case 'j': case 'J':
6080 case 'k': case 'K':
6081 case 'l': case 'L':
6082 case 'm': case 'M':
6083 case 'n': case 'N':
6084 case 'o': case 'O':
6085 case 'p': case 'P':
6086 case 'q': case 'Q':
6087 case 'r': case 'R':
6088 case 's': case 'S':
6089 case 't': case 'T':
6090 case 'u': case 'U':
a7cb1f99 6091 case 'V':
79072805
LW
6092 case 'w': case 'W':
6093 case 'X':
6094 case 'y': case 'Y':
6095 case 'z': case 'Z':
6096
49dc05e3 6097 keylookup: {
88e1f1a2 6098 bool anydelim;
90771dc0 6099 I32 tmp;
10edeb5d
JH
6100
6101 orig_keyword = 0;
6102 gv = NULL;
6103 gvp = NULL;
49dc05e3 6104
3280af22
NIS
6105 PL_bufptr = s;
6106 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6107
6108 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6109 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6110 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6111 (PL_tokenbuf[0] == 'q' &&
6112 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6113
6114 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6115 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6116 goto just_a_word;
6117
3643fb5f 6118 d = s;
3280af22 6119 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6120 d++; /* no comments skipped here, or s### is misparsed */
6121
748a9306 6122 /* Is this a word before a => operator? */
1c3923b3 6123 if (*d == '=' && d[1] == '>') {
748a9306 6124 CLINE;
6154021b 6125 pl_yylval.opval
d0a148a6
NC
6126 = (OP*)newSVOP(OP_CONST, 0,
6127 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6128 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6129 TERM(WORD);
6130 }
6131
88e1f1a2
JV
6132 /* Check for plugged-in keyword */
6133 {
6134 OP *o;
6135 int result;
6136 char *saved_bufptr = PL_bufptr;
6137 PL_bufptr = s;
16c91539 6138 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6139 s = PL_bufptr;
6140 if (result == KEYWORD_PLUGIN_DECLINE) {
6141 /* not a plugged-in keyword */
6142 PL_bufptr = saved_bufptr;
6143 } else if (result == KEYWORD_PLUGIN_STMT) {
6144 pl_yylval.opval = o;
6145 CLINE;
6146 PL_expect = XSTATE;
6147 return REPORT(PLUGSTMT);
6148 } else if (result == KEYWORD_PLUGIN_EXPR) {
6149 pl_yylval.opval = o;
6150 CLINE;
6151 PL_expect = XOPERATOR;
6152 return REPORT(PLUGEXPR);
6153 } else {
6154 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6155 PL_tokenbuf);
6156 }
6157 }
6158
6159 /* Check for built-in keyword */
6160 tmp = keyword(PL_tokenbuf, len, 0);
6161
6162 /* Is this a label? */
6163 if (!anydelim && PL_expect == XSTATE
6164 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6165 s = d + 1;
6166 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6167 CLINE;
6168 TOKEN(LABEL);
6169 }
6170
a0d0e21e 6171 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6172 GV *ogv = NULL; /* override (winner) */
6173 GV *hgv = NULL; /* hidden (loser) */
3280af22 6174 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6175 CV *cv;
90e5519e 6176 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6177 (cv = GvCVu(gv)))
6178 {
6179 if (GvIMPORTED_CV(gv))
6180 ogv = gv;
6181 else if (! CvMETHOD(cv))
6182 hgv = gv;
6183 }
6184 if (!ogv &&
3280af22 6185 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6186 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6187 GvCVu(gv) && GvIMPORTED_CV(gv))
6188 {
6189 ogv = gv;
6190 }
6191 }
6192 if (ogv) {
30fe34ed 6193 orig_keyword = tmp;
56f7f34b 6194 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6195 }
6196 else if (gv && !gvp
6197 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6198 && GvCVu(gv))
6e7b2336
GS
6199 {
6200 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6201 }
56f7f34b
CS
6202 else { /* no override */
6203 tmp = -tmp;
a2a5de95
NC
6204 if (tmp == KEY_dump) {
6205 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6206 "dump() better written as CORE::dump()");
ac206dc8 6207 }
a0714e2c 6208 gv = NULL;
56f7f34b 6209 gvp = 0;
a2a5de95
NC
6210 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6211 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6212 "Ambiguous call resolved as CORE::%s(), "
6213 "qualify as such or use &",
6214 GvENAME(hgv));
49dc05e3 6215 }
a0d0e21e
LW
6216 }
6217
6218 reserved_word:
6219 switch (tmp) {
79072805
LW
6220
6221 default: /* not a keyword */
0bfa2a8a
NC
6222 /* Trade off - by using this evil construction we can pull the
6223 variable gv into the block labelled keylookup. If not, then
6224 we have to give it function scope so that the goto from the
6225 earlier ':' case doesn't bypass the initialisation. */
6226 if (0) {
6227 just_a_word_zero_gv:
6228 gv = NULL;
6229 gvp = NULL;
8bee0991 6230 orig_keyword = 0;
0bfa2a8a 6231 }
93a17b20 6232 just_a_word: {
96e4d5b1 6233 SV *sv;
ce29ac45 6234 int pkgname = 0;
f54cb97a 6235 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6236 OP *rv2cv_op;
5069cc75 6237 CV *cv;
5db06880 6238#ifdef PERL_MAD
cd81e915 6239 SV *nextPL_nextwhite = 0;
5db06880
NC
6240#endif
6241
8990e307
LW
6242
6243 /* Get the rest if it looks like a package qualifier */
6244
155aba94 6245 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6246 STRLEN morelen;
3280af22 6247 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6248 TRUE, &morelen);
6249 if (!morelen)
cea2e8a9 6250 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6251 *s == '\'' ? "'" : "::");
c3e0f903 6252 len += morelen;
ce29ac45 6253 pkgname = 1;
a0d0e21e 6254 }
8990e307 6255
3280af22
NIS
6256 if (PL_expect == XOPERATOR) {
6257 if (PL_bufptr == PL_linestart) {
57843af0 6258 CopLINE_dec(PL_curcop);
f1f66076 6259 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6260 CopLINE_inc(PL_curcop);
463ee0b2
LW
6261 }
6262 else
54310121 6263 no_op("Bareword",s);
463ee0b2 6264 }
8990e307 6265
c3e0f903
GS
6266 /* Look for a subroutine with this name in current package,
6267 unless name is "Foo::", in which case Foo is a bearword
6268 (and a package name). */
6269
5db06880 6270 if (len > 2 && !PL_madskills &&
3280af22 6271 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6272 {
f776e3cd 6273 if (ckWARN(WARN_BAREWORD)
90e5519e 6274 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6275 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6276 "Bareword \"%s\" refers to nonexistent package",
3280af22 6277 PL_tokenbuf);
c3e0f903 6278 len -= 2;
3280af22 6279 PL_tokenbuf[len] = '\0';
a0714e2c 6280 gv = NULL;
c3e0f903
GS
6281 gvp = 0;
6282 }
6283 else {
62d55b22
NC
6284 if (!gv) {
6285 /* Mustn't actually add anything to a symbol table.
6286 But also don't want to "initialise" any placeholder
6287 constants that might already be there into full
6288 blown PVGVs with attached PVCV. */
90e5519e
NC
6289 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6290 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6291 }
b3d904f3 6292 len = 0;
c3e0f903
GS
6293 }
6294
6295 /* if we saw a global override before, get the right name */
8990e307 6296
37bb7629
EB
6297 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6298 len ? len : strlen(PL_tokenbuf));
49dc05e3 6299 if (gvp) {
37bb7629 6300 SV * const tmp_sv = sv;
396482e1 6301 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6302 sv_catsv(sv, tmp_sv);
6303 SvREFCNT_dec(tmp_sv);
8a7a129d 6304 }
37bb7629 6305
5db06880 6306#ifdef PERL_MAD
cd81e915
NC
6307 if (PL_madskills && !PL_thistoken) {
6308 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6309 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6310 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6311 }
6312#endif
8990e307 6313
a0d0e21e 6314 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6315 CLINE;
6154021b
RGS
6316 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6317 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6318
c3e0f903 6319 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6320 if (len)
6321 goto safe_bareword;
6322
f7461760
Z
6323 cv = NULL;
6324 {
6325 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6326 const_op->op_private = OPpCONST_BARE;
6327 rv2cv_op = newCVREF(0, const_op);
6328 }
6329 if (rv2cv_op->op_type == OP_RV2CV &&
6330 (rv2cv_op->op_flags & OPf_KIDS)) {
6331 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6332 switch (rv_op->op_type) {
6333 case OP_CONST: {
6334 SV *sv = cSVOPx_sv(rv_op);
6335 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6336 cv = (CV*)SvRV(sv);
6337 } break;
6338 case OP_GV: {
6339 GV *gv = cGVOPx_gv(rv_op);
6340 CV *maybe_cv = GvCVu(gv);
6341 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6342 cv = maybe_cv;
6343 } break;
6344 }
6345 }
5069cc75 6346
8990e307
LW
6347 /* See if it's the indirect object for a list operator. */
6348
3280af22
NIS
6349 if (PL_oldoldbufptr &&
6350 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6351 (PL_oldoldbufptr == PL_last_lop
6352 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6353 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6354 (PL_expect == XREF ||
6355 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6356 {
748a9306
LW
6357 bool immediate_paren = *s == '(';
6358
a0d0e21e 6359 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6360 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6361#ifdef PERL_MAD
cd81e915 6362 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6363#endif
a0d0e21e
LW
6364
6365 /* Two barewords in a row may indicate method call. */
6366
62d55b22 6367 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6368 (tmp = intuit_method(s, gv, cv))) {
6369 op_free(rv2cv_op);
bbf60fe6 6370 return REPORT(tmp);
f7461760 6371 }
a0d0e21e
LW
6372
6373 /* If not a declared subroutine, it's an indirect object. */
6374 /* (But it's an indir obj regardless for sort.) */
7294df96 6375 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6376
7294df96
RGS
6377 if (
6378 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6379 (!cv &&
a9ef352a 6380 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6381 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6382 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6383 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6384 )
a9ef352a 6385 {
3280af22 6386 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6387 goto bareword;
93a17b20
LW
6388 }
6389 }
8990e307 6390
3280af22 6391 PL_expect = XOPERATOR;
5db06880
NC
6392#ifdef PERL_MAD
6393 if (isSPACE(*s))
cd81e915
NC
6394 s = SKIPSPACE2(s,nextPL_nextwhite);
6395 PL_nextwhite = nextPL_nextwhite;
5db06880 6396#else
8990e307 6397 s = skipspace(s);
5db06880 6398#endif
1c3923b3
GS
6399
6400 /* Is this a word before a => operator? */
ce29ac45 6401 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6402 op_free(rv2cv_op);
1c3923b3 6403 CLINE;
6154021b 6404 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6405 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6406 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6407 TERM(WORD);
6408 }
6409
6410 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6411 if (*s == '(') {
79072805 6412 CLINE;
5069cc75 6413 if (cv) {
c35e046a
AL
6414 d = s + 1;
6415 while (SPACE_OR_TAB(*d))
6416 d++;
f7461760 6417 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6418 s = d + 1;
c631f32b 6419 goto its_constant;
96e4d5b1 6420 }
6421 }
5db06880
NC
6422#ifdef PERL_MAD
6423 if (PL_madskills) {
cd81e915
NC
6424 PL_nextwhite = PL_thiswhite;
6425 PL_thiswhite = 0;
5db06880 6426 }
cd81e915 6427 start_force(PL_curforce);
5db06880 6428#endif
6154021b 6429 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6430 PL_expect = XOPERATOR;
5db06880
NC
6431#ifdef PERL_MAD
6432 if (PL_madskills) {
cd81e915
NC
6433 PL_nextwhite = nextPL_nextwhite;
6434 curmad('X', PL_thistoken);
6b29d1f5 6435 PL_thistoken = newSVpvs("");
5db06880
NC
6436 }
6437#endif
f7461760 6438 op_free(rv2cv_op);
93a17b20 6439 force_next(WORD);
6154021b 6440 pl_yylval.ival = 0;
463ee0b2 6441 TOKEN('&');
79072805 6442 }
93a17b20 6443
a0d0e21e 6444 /* If followed by var or block, call it a method (unless sub) */
8990e307 6445
f7461760
Z
6446 if ((*s == '$' || *s == '{') && !cv) {
6447 op_free(rv2cv_op);
3280af22
NIS
6448 PL_last_lop = PL_oldbufptr;
6449 PL_last_lop_op = OP_METHOD;
93a17b20 6450 PREBLOCK(METHOD);
463ee0b2
LW
6451 }
6452
8990e307
LW
6453 /* If followed by a bareword, see if it looks like indir obj. */
6454
30fe34ed
RGS
6455 if (!orig_keyword
6456 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6457 && (tmp = intuit_method(s, gv, cv))) {
6458 op_free(rv2cv_op);
bbf60fe6 6459 return REPORT(tmp);
f7461760 6460 }
93a17b20 6461
8990e307
LW
6462 /* Not a method, so call it a subroutine (if defined) */
6463
5069cc75 6464 if (cv) {
9b387841
NC
6465 if (lastchar == '-')
6466 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6467 "Ambiguous use of -%s resolved as -&%s()",
6468 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6469 /* Check for a constant sub */
f7461760 6470 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6471 its_constant:
f7461760 6472 op_free(rv2cv_op);
6154021b
RGS
6473 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6474 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6475 pl_yylval.opval->op_private = 0;
96e4d5b1 6476 TOKEN(WORD);
89bfa8cd 6477 }
6478
6154021b 6479 op_free(pl_yylval.opval);
f7461760 6480 pl_yylval.opval = rv2cv_op;
6154021b 6481 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6482 PL_last_lop = PL_oldbufptr;
bf848113 6483 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6484 /* Is there a prototype? */
5db06880
NC
6485 if (
6486#ifdef PERL_MAD
6487 cv &&
6488#endif
d9f2850e
RGS
6489 SvPOK(cv))
6490 {
5f66b61c 6491 STRLEN protolen;
daba3364 6492 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6493 if (!protolen)
4633a7c4 6494 TERM(FUNC0SUB);
0f5d0394
AE
6495 while (*proto == ';')
6496 proto++;
649d02de
FC
6497 if (
6498 (
6499 (
6500 *proto == '$' || *proto == '_'
6501 || *proto == '*'
6502 )
6503 && proto[1] == '\0'
6504 )
6505 || (
6506 *proto == '\\' && proto[1] && proto[2] == '\0'
6507 )
6508 )
6509 OPERATOR(UNIOPSUB);
6510 if (*proto == '\\' && proto[1] == '[') {
6511 const char *p = proto + 2;
6512 while(*p && *p != ']')
6513 ++p;
6514 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6515 }
7a52d87a 6516 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6517 if (PL_curstash)
6518 sv_setpvs(PL_subname, "__ANON__");
6519 else
6520 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6521 PREBLOCK(LSTOPSUB);
6522 }
a9ef352a 6523 }
5db06880
NC
6524#ifdef PERL_MAD
6525 {
6526 if (PL_madskills) {
cd81e915
NC
6527 PL_nextwhite = PL_thiswhite;
6528 PL_thiswhite = 0;
5db06880 6529 }
cd81e915 6530 start_force(PL_curforce);
6154021b 6531 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6532 PL_expect = XTERM;
6533 if (PL_madskills) {
cd81e915
NC
6534 PL_nextwhite = nextPL_nextwhite;
6535 curmad('X', PL_thistoken);
6b29d1f5 6536 PL_thistoken = newSVpvs("");
5db06880
NC
6537 }
6538 force_next(WORD);
6539 TOKEN(NOAMP);
6540 }
6541 }
6542
6543 /* Guess harder when madskills require "best effort". */
6544 if (PL_madskills && (!gv || !GvCVu(gv))) {
6545 int probable_sub = 0;
6546 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6547 probable_sub = 1;
6548 else if (isALPHA(*s)) {
6549 char tmpbuf[1024];
6550 STRLEN tmplen;
6551 d = s;
6552 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6553 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6554 probable_sub = 1;
6555 else {
6556 while (d < PL_bufend && isSPACE(*d))
6557 d++;
6558 if (*d == '=' && d[1] == '>')
6559 probable_sub = 1;
6560 }
6561 }
6562 if (probable_sub) {
7a6d04f4 6563 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6564 op_free(pl_yylval.opval);
f7461760 6565 pl_yylval.opval = rv2cv_op;
6154021b 6566 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6567 PL_last_lop = PL_oldbufptr;
6568 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6569 PL_nextwhite = PL_thiswhite;
6570 PL_thiswhite = 0;
6571 start_force(PL_curforce);
6154021b 6572 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6573 PL_expect = XTERM;
cd81e915
NC
6574 PL_nextwhite = nextPL_nextwhite;
6575 curmad('X', PL_thistoken);
6b29d1f5 6576 PL_thistoken = newSVpvs("");
5db06880
NC
6577 force_next(WORD);
6578 TOKEN(NOAMP);
6579 }
6580#else
6154021b 6581 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6582 PL_expect = XTERM;
8990e307
LW
6583 force_next(WORD);
6584 TOKEN(NOAMP);
5db06880 6585#endif
8990e307 6586 }
748a9306 6587
8990e307
LW
6588 /* Call it a bare word */
6589
5603f27d 6590 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6591 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6592 else {
9a073a1d
RGS
6593 bareword:
6594 /* after "print" and similar functions (corresponding to
6595 * "F? L" in opcode.pl), whatever wasn't already parsed as
6596 * a filehandle should be subject to "strict subs".
6597 * Likewise for the optional indirect-object argument to system
6598 * or exec, which can't be a bareword */
6599 if ((PL_last_lop_op == OP_PRINT
6600 || PL_last_lop_op == OP_PRTF
6601 || PL_last_lop_op == OP_SAY
6602 || PL_last_lop_op == OP_SYSTEM
6603 || PL_last_lop_op == OP_EXEC)
6604 && (PL_hints & HINT_STRICT_SUBS))
6605 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6606 if (lastchar != '-') {
6607 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6608 d = PL_tokenbuf;
6609 while (isLOWER(*d))
6610 d++;
da51bb9b 6611 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6612 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6613 PL_tokenbuf);
6614 }
748a9306
LW
6615 }
6616 }
f7461760 6617 op_free(rv2cv_op);
c3e0f903
GS
6618
6619 safe_bareword:
9b387841
NC
6620 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6621 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6622 "Operator or semicolon missing before %c%s",
6623 lastchar, PL_tokenbuf);
6624 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625 "Ambiguous use of %c resolved as operator %c",
6626 lastchar, lastchar);
748a9306 6627 }
93a17b20 6628 TOKEN(WORD);
79072805 6629 }
79072805 6630
68dc0745 6631 case KEY___FILE__:
6154021b 6632 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6633 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6634 TERM(THING);
6635
79072805 6636 case KEY___LINE__:
6154021b 6637 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6638 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6639 TERM(THING);
68dc0745 6640
6641 case KEY___PACKAGE__:
6154021b 6642 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6643 (PL_curstash
5aaec2b4 6644 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6645 : &PL_sv_undef));
79072805 6646 TERM(THING);
79072805 6647
e50aee73 6648 case KEY___DATA__:
79072805
LW
6649 case KEY___END__: {
6650 GV *gv;
3280af22 6651 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6652 const char *pname = "main";
3280af22 6653 if (PL_tokenbuf[2] == 'D')
bfcb3514 6654 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6655 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6656 SVt_PVIO);
a5f75d66 6657 GvMULTI_on(gv);
79072805 6658 if (!GvIO(gv))
a0d0e21e 6659 GvIOp(gv) = newIO();
3280af22 6660 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6661#if defined(HAS_FCNTL) && defined(F_SETFD)
6662 {
f54cb97a 6663 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6664 fcntl(fd,F_SETFD,fd >= 3);
6665 }
79072805 6666#endif
fd049845 6667 /* Mark this internal pseudo-handle as clean */
6668 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6669 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6670 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6671 else
50952442 6672 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6673#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6674 /* if the script was opened in binmode, we need to revert
53129d29 6675 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6676 * XXX this is a questionable hack at best. */
53129d29
GS
6677 if (PL_bufend-PL_bufptr > 2
6678 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6679 {
6680 Off_t loc = 0;
50952442 6681 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6682 loc = PerlIO_tell(PL_rsfp);
6683 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6684 }
2986a63f
JH
6685#ifdef NETWARE
6686 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6687#else
c39cd008 6688 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6689#endif /* NETWARE */
1143fce0
JH
6690#ifdef PERLIO_IS_STDIO /* really? */
6691# if defined(__BORLANDC__)
cb359b41
JH
6692 /* XXX see note in do_binmode() */
6693 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6694# endif
6695#endif
c39cd008
GS
6696 if (loc > 0)
6697 PerlIO_seek(PL_rsfp, loc, 0);
6698 }
6699 }
6700#endif
7948272d 6701#ifdef PERLIO_LAYERS
52d2e0f4
JH
6702 if (!IN_BYTES) {
6703 if (UTF)
6704 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6705 else if (PL_encoding) {
6706 SV *name;
6707 dSP;
6708 ENTER;
6709 SAVETMPS;
6710 PUSHMARK(sp);
6711 EXTEND(SP, 1);
6712 XPUSHs(PL_encoding);
6713 PUTBACK;
6714 call_method("name", G_SCALAR);
6715 SPAGAIN;
6716 name = POPs;
6717 PUTBACK;
bfed75c6 6718 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6719 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6720 SVfARG(name)));
52d2e0f4
JH
6721 FREETMPS;
6722 LEAVE;
6723 }
6724 }
7948272d 6725#endif
5db06880
NC
6726#ifdef PERL_MAD
6727 if (PL_madskills) {
cd81e915
NC
6728 if (PL_realtokenstart >= 0) {
6729 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6730 if (!PL_endwhite)
6b29d1f5 6731 PL_endwhite = newSVpvs("");
cd81e915
NC
6732 sv_catsv(PL_endwhite, PL_thiswhite);
6733 PL_thiswhite = 0;
6734 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6735 PL_realtokenstart = -1;
5db06880 6736 }
5cc814fd
NC
6737 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6738 != NULL) ;
5db06880
NC
6739 }
6740#endif
4608196e 6741 PL_rsfp = NULL;
79072805
LW
6742 }
6743 goto fake_eof;
e929a76b 6744 }
de3bb511 6745
8990e307 6746 case KEY_AUTOLOAD:
ed6116ce 6747 case KEY_DESTROY:
79072805 6748 case KEY_BEGIN:
3c10abe3 6749 case KEY_UNITCHECK:
7d30b5c4 6750 case KEY_CHECK:
7d07dbc2 6751 case KEY_INIT:
7d30b5c4 6752 case KEY_END:
3280af22
NIS
6753 if (PL_expect == XSTATE) {
6754 s = PL_bufptr;
93a17b20 6755 goto really_sub;
79072805
LW
6756 }
6757 goto just_a_word;
6758
a0d0e21e
LW
6759 case KEY_CORE:
6760 if (*s == ':' && s[1] == ':') {
6761 s += 2;
748a9306 6762 d = s;
3280af22 6763 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6764 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6765 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6766 if (tmp < 0)
6767 tmp = -tmp;
850e8516 6768 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6769 /* that's a way to remember we saw "CORE::" */
850e8516 6770 orig_keyword = tmp;
a0d0e21e
LW
6771 goto reserved_word;
6772 }
6773 goto just_a_word;
6774
463ee0b2
LW
6775 case KEY_abs:
6776 UNI(OP_ABS);
6777
79072805
LW
6778 case KEY_alarm:
6779 UNI(OP_ALARM);
6780
6781 case KEY_accept:
a0d0e21e 6782 LOP(OP_ACCEPT,XTERM);
79072805 6783
463ee0b2
LW
6784 case KEY_and:
6785 OPERATOR(ANDOP);
6786
79072805 6787 case KEY_atan2:
a0d0e21e 6788 LOP(OP_ATAN2,XTERM);
85e6fe83 6789
79072805 6790 case KEY_bind:
a0d0e21e 6791 LOP(OP_BIND,XTERM);
79072805
LW
6792
6793 case KEY_binmode:
1c1fc3ea 6794 LOP(OP_BINMODE,XTERM);
79072805
LW
6795
6796 case KEY_bless:
a0d0e21e 6797 LOP(OP_BLESS,XTERM);
79072805 6798
0d863452
RH
6799 case KEY_break:
6800 FUN0(OP_BREAK);
6801
79072805
LW
6802 case KEY_chop:
6803 UNI(OP_CHOP);
6804
6805 case KEY_continue:
0d863452
RH
6806 /* When 'use switch' is in effect, continue has a dual
6807 life as a control operator. */
6808 {
ef89dcc3 6809 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6810 PREBLOCK(CONTINUE);
6811 else {
6812 /* We have to disambiguate the two senses of
6813 "continue". If the next token is a '{' then
6814 treat it as the start of a continue block;
6815 otherwise treat it as a control operator.
6816 */
6817 s = skipspace(s);
6818 if (*s == '{')
79072805 6819 PREBLOCK(CONTINUE);
0d863452
RH
6820 else
6821 FUN0(OP_CONTINUE);
6822 }
6823 }
79072805
LW
6824
6825 case KEY_chdir:
fafc274c
NC
6826 /* may use HOME */
6827 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6828 UNI(OP_CHDIR);
6829
6830 case KEY_close:
6831 UNI(OP_CLOSE);
6832
6833 case KEY_closedir:
6834 UNI(OP_CLOSEDIR);
6835
6836 case KEY_cmp:
6837 Eop(OP_SCMP);
6838
6839 case KEY_caller:
6840 UNI(OP_CALLER);
6841
6842 case KEY_crypt:
6843#ifdef FCRYPT
f4c556ac
GS
6844 if (!PL_cryptseen) {
6845 PL_cryptseen = TRUE;
de3bb511 6846 init_des();
f4c556ac 6847 }
a687059c 6848#endif
a0d0e21e 6849 LOP(OP_CRYPT,XTERM);
79072805
LW
6850
6851 case KEY_chmod:
a0d0e21e 6852 LOP(OP_CHMOD,XTERM);
79072805
LW
6853
6854 case KEY_chown:
a0d0e21e 6855 LOP(OP_CHOWN,XTERM);
79072805
LW
6856
6857 case KEY_connect:
a0d0e21e 6858 LOP(OP_CONNECT,XTERM);
79072805 6859
463ee0b2
LW
6860 case KEY_chr:
6861 UNI(OP_CHR);
6862
79072805
LW
6863 case KEY_cos:
6864 UNI(OP_COS);
6865
6866 case KEY_chroot:
6867 UNI(OP_CHROOT);
6868
0d863452
RH
6869 case KEY_default:
6870 PREBLOCK(DEFAULT);
6871
79072805 6872 case KEY_do:
29595ff2 6873 s = SKIPSPACE1(s);
79072805 6874 if (*s == '{')
a0d0e21e 6875 PRETERMBLOCK(DO);
79072805 6876 if (*s != '\'')
89c5585f 6877 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6878 if (orig_keyword == KEY_do) {
6879 orig_keyword = 0;
6154021b 6880 pl_yylval.ival = 1;
850e8516
RGS
6881 }
6882 else
6154021b 6883 pl_yylval.ival = 0;
378cc40b 6884 OPERATOR(DO);
79072805
LW
6885
6886 case KEY_die:
3280af22 6887 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6888 LOP(OP_DIE,XTERM);
79072805
LW
6889
6890 case KEY_defined:
6891 UNI(OP_DEFINED);
6892
6893 case KEY_delete:
a0d0e21e 6894 UNI(OP_DELETE);
79072805
LW
6895
6896 case KEY_dbmopen:
5c1737d1 6897 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6898 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6899
6900 case KEY_dbmclose:
6901 UNI(OP_DBMCLOSE);
6902
6903 case KEY_dump:
a0d0e21e 6904 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6905 LOOPX(OP_DUMP);
6906
6907 case KEY_else:
6908 PREBLOCK(ELSE);
6909
6910 case KEY_elsif:
6154021b 6911 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6912 OPERATOR(ELSIF);
6913
6914 case KEY_eq:
6915 Eop(OP_SEQ);
6916
a0d0e21e
LW
6917 case KEY_exists:
6918 UNI(OP_EXISTS);
4e553d73 6919
79072805 6920 case KEY_exit:
5db06880
NC
6921 if (PL_madskills)
6922 UNI(OP_INT);
79072805
LW
6923 UNI(OP_EXIT);
6924
6925 case KEY_eval:
29595ff2 6926 s = SKIPSPACE1(s);
32e2a35d
RGS
6927 if (*s == '{') { /* block eval */
6928 PL_expect = XTERMBLOCK;
6929 UNIBRACK(OP_ENTERTRY);
6930 }
6931 else { /* string eval */
6932 PL_expect = XTERM;
6933 UNIBRACK(OP_ENTEREVAL);
6934 }
79072805
LW
6935
6936 case KEY_eof:
6937 UNI(OP_EOF);
6938
6939 case KEY_exp:
6940 UNI(OP_EXP);
6941
6942 case KEY_each:
6943 UNI(OP_EACH);
6944
6945 case KEY_exec:
a0d0e21e 6946 LOP(OP_EXEC,XREF);
79072805
LW
6947
6948 case KEY_endhostent:
6949 FUN0(OP_EHOSTENT);
6950
6951 case KEY_endnetent:
6952 FUN0(OP_ENETENT);
6953
6954 case KEY_endservent:
6955 FUN0(OP_ESERVENT);
6956
6957 case KEY_endprotoent:
6958 FUN0(OP_EPROTOENT);
6959
6960 case KEY_endpwent:
6961 FUN0(OP_EPWENT);
6962
6963 case KEY_endgrent:
6964 FUN0(OP_EGRENT);
6965
6966 case KEY_for:
6967 case KEY_foreach:
6154021b 6968 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6969 s = SKIPSPACE1(s);
7e2040f0 6970 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6971 char *p = s;
5db06880
NC
6972#ifdef PERL_MAD
6973 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6974#endif
6975
3280af22 6976 if ((PL_bufend - p) >= 3 &&
55497cff 6977 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6978 p += 2;
77ca0c92
LW
6979 else if ((PL_bufend - p) >= 4 &&
6980 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6981 p += 3;
29595ff2 6982 p = PEEKSPACE(p);
7e2040f0 6983 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6984 p = scan_ident(p, PL_bufend,
6985 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6986 p = PEEKSPACE(p);
77ca0c92
LW
6987 }
6988 if (*p != '$')
cea2e8a9 6989 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6990#ifdef PERL_MAD
6991 s = SvPVX(PL_linestr) + soff;
6992#endif
55497cff 6993 }
79072805
LW
6994 OPERATOR(FOR);
6995
6996 case KEY_formline:
a0d0e21e 6997 LOP(OP_FORMLINE,XTERM);
79072805
LW
6998
6999 case KEY_fork:
7000 FUN0(OP_FORK);
7001
7002 case KEY_fcntl:
a0d0e21e 7003 LOP(OP_FCNTL,XTERM);
79072805
LW
7004
7005 case KEY_fileno:
7006 UNI(OP_FILENO);
7007
7008 case KEY_flock:
a0d0e21e 7009 LOP(OP_FLOCK,XTERM);
79072805
LW
7010
7011 case KEY_gt:
7012 Rop(OP_SGT);
7013
7014 case KEY_ge:
7015 Rop(OP_SGE);
7016
7017 case KEY_grep:
2c38e13d 7018 LOP(OP_GREPSTART, XREF);
79072805
LW
7019
7020 case KEY_goto:
a0d0e21e 7021 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7022 LOOPX(OP_GOTO);
7023
7024 case KEY_gmtime:
7025 UNI(OP_GMTIME);
7026
7027 case KEY_getc:
6f33ba73 7028 UNIDOR(OP_GETC);
79072805
LW
7029
7030 case KEY_getppid:
7031 FUN0(OP_GETPPID);
7032
7033 case KEY_getpgrp:
7034 UNI(OP_GETPGRP);
7035
7036 case KEY_getpriority:
a0d0e21e 7037 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7038
7039 case KEY_getprotobyname:
7040 UNI(OP_GPBYNAME);
7041
7042 case KEY_getprotobynumber:
a0d0e21e 7043 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7044
7045 case KEY_getprotoent:
7046 FUN0(OP_GPROTOENT);
7047
7048 case KEY_getpwent:
7049 FUN0(OP_GPWENT);
7050
7051 case KEY_getpwnam:
ff68c719 7052 UNI(OP_GPWNAM);
79072805
LW
7053
7054 case KEY_getpwuid:
ff68c719 7055 UNI(OP_GPWUID);
79072805
LW
7056
7057 case KEY_getpeername:
7058 UNI(OP_GETPEERNAME);
7059
7060 case KEY_gethostbyname:
7061 UNI(OP_GHBYNAME);
7062
7063 case KEY_gethostbyaddr:
a0d0e21e 7064 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7065
7066 case KEY_gethostent:
7067 FUN0(OP_GHOSTENT);
7068
7069 case KEY_getnetbyname:
7070 UNI(OP_GNBYNAME);
7071
7072 case KEY_getnetbyaddr:
a0d0e21e 7073 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7074
7075 case KEY_getnetent:
7076 FUN0(OP_GNETENT);
7077
7078 case KEY_getservbyname:
a0d0e21e 7079 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7080
7081 case KEY_getservbyport:
a0d0e21e 7082 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7083
7084 case KEY_getservent:
7085 FUN0(OP_GSERVENT);
7086
7087 case KEY_getsockname:
7088 UNI(OP_GETSOCKNAME);
7089
7090 case KEY_getsockopt:
a0d0e21e 7091 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7092
7093 case KEY_getgrent:
7094 FUN0(OP_GGRENT);
7095
7096 case KEY_getgrnam:
ff68c719 7097 UNI(OP_GGRNAM);
79072805
LW
7098
7099 case KEY_getgrgid:
ff68c719 7100 UNI(OP_GGRGID);
79072805
LW
7101
7102 case KEY_getlogin:
7103 FUN0(OP_GETLOGIN);
7104
0d863452 7105 case KEY_given:
6154021b 7106 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7107 OPERATOR(GIVEN);
7108
93a17b20 7109 case KEY_glob:
a0d0e21e 7110 LOP(OP_GLOB,XTERM);
93a17b20 7111
79072805
LW
7112 case KEY_hex:
7113 UNI(OP_HEX);
7114
7115 case KEY_if:
6154021b 7116 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7117 OPERATOR(IF);
7118
7119 case KEY_index:
a0d0e21e 7120 LOP(OP_INDEX,XTERM);
79072805
LW
7121
7122 case KEY_int:
7123 UNI(OP_INT);
7124
7125 case KEY_ioctl:
a0d0e21e 7126 LOP(OP_IOCTL,XTERM);
79072805
LW
7127
7128 case KEY_join:
a0d0e21e 7129 LOP(OP_JOIN,XTERM);
79072805
LW
7130
7131 case KEY_keys:
7132 UNI(OP_KEYS);
7133
7134 case KEY_kill:
a0d0e21e 7135 LOP(OP_KILL,XTERM);
79072805
LW
7136
7137 case KEY_last:
a0d0e21e 7138 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7139 LOOPX(OP_LAST);
4e553d73 7140
79072805
LW
7141 case KEY_lc:
7142 UNI(OP_LC);
7143
7144 case KEY_lcfirst:
7145 UNI(OP_LCFIRST);
7146
7147 case KEY_local:
6154021b 7148 pl_yylval.ival = 0;
79072805
LW
7149 OPERATOR(LOCAL);
7150
7151 case KEY_length:
7152 UNI(OP_LENGTH);
7153
7154 case KEY_lt:
7155 Rop(OP_SLT);
7156
7157 case KEY_le:
7158 Rop(OP_SLE);
7159
7160 case KEY_localtime:
7161 UNI(OP_LOCALTIME);
7162
7163 case KEY_log:
7164 UNI(OP_LOG);
7165
7166 case KEY_link:
a0d0e21e 7167 LOP(OP_LINK,XTERM);
79072805
LW
7168
7169 case KEY_listen:
a0d0e21e 7170 LOP(OP_LISTEN,XTERM);
79072805 7171
c0329465
MB
7172 case KEY_lock:
7173 UNI(OP_LOCK);
7174
79072805
LW
7175 case KEY_lstat:
7176 UNI(OP_LSTAT);
7177
7178 case KEY_m:
8782bef2 7179 s = scan_pat(s,OP_MATCH);
79072805
LW
7180 TERM(sublex_start());
7181
a0d0e21e 7182 case KEY_map:
2c38e13d 7183 LOP(OP_MAPSTART, XREF);
4e4e412b 7184
79072805 7185 case KEY_mkdir:
a0d0e21e 7186 LOP(OP_MKDIR,XTERM);
79072805
LW
7187
7188 case KEY_msgctl:
a0d0e21e 7189 LOP(OP_MSGCTL,XTERM);
79072805
LW
7190
7191 case KEY_msgget:
a0d0e21e 7192 LOP(OP_MSGGET,XTERM);
79072805
LW
7193
7194 case KEY_msgrcv:
a0d0e21e 7195 LOP(OP_MSGRCV,XTERM);
79072805
LW
7196
7197 case KEY_msgsnd:
a0d0e21e 7198 LOP(OP_MSGSND,XTERM);
79072805 7199
77ca0c92 7200 case KEY_our:
93a17b20 7201 case KEY_my:
952306ac 7202 case KEY_state:
eac04b2e 7203 PL_in_my = (U16)tmp;
29595ff2 7204 s = SKIPSPACE1(s);
7e2040f0 7205 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7206#ifdef PERL_MAD
7207 char* start = s;
7208#endif
3280af22 7209 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7210 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7211 goto really_sub;
def3634b 7212 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7213 if (!PL_in_my_stash) {
c750a3ec 7214 char tmpbuf[1024];
3280af22 7215 PL_bufptr = s;
d9fad198 7216 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7217 yyerror(tmpbuf);
7218 }
5db06880
NC
7219#ifdef PERL_MAD
7220 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7221 sv_catsv(PL_thistoken, PL_nextwhite);
7222 PL_nextwhite = 0;
7223 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7224 }
7225#endif
c750a3ec 7226 }
6154021b 7227 pl_yylval.ival = 1;
55497cff 7228 OPERATOR(MY);
93a17b20 7229
79072805 7230 case KEY_next:
a0d0e21e 7231 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7232 LOOPX(OP_NEXT);
7233
7234 case KEY_ne:
7235 Eop(OP_SNE);
7236
a0d0e21e 7237 case KEY_no:
468aa647 7238 s = tokenize_use(0, s);
a0d0e21e
LW
7239 OPERATOR(USE);
7240
7241 case KEY_not:
29595ff2 7242 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7243 FUN1(OP_NOT);
7244 else
7245 OPERATOR(NOTOP);
a0d0e21e 7246
79072805 7247 case KEY_open:
29595ff2 7248 s = SKIPSPACE1(s);
7e2040f0 7249 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7250 const char *t;
c35e046a
AL
7251 for (d = s; isALNUM_lazy_if(d,UTF);)
7252 d++;
7253 for (t=d; isSPACE(*t);)
7254 t++;
e2ab214b 7255 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7256 /* [perl #16184] */
7257 && !(t[0] == '=' && t[1] == '>')
7258 ) {
5f66b61c 7259 int parms_len = (int)(d-s);
9014280d 7260 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7261 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7262 parms_len, s, parms_len, s);
66fbe8fb 7263 }
93a17b20 7264 }
a0d0e21e 7265 LOP(OP_OPEN,XTERM);
79072805 7266
463ee0b2 7267 case KEY_or:
6154021b 7268 pl_yylval.ival = OP_OR;
463ee0b2
LW
7269 OPERATOR(OROP);
7270
79072805
LW
7271 case KEY_ord:
7272 UNI(OP_ORD);
7273
7274 case KEY_oct:
7275 UNI(OP_OCT);
7276
7277 case KEY_opendir:
a0d0e21e 7278 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7279
7280 case KEY_print:
3280af22 7281 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7282 LOP(OP_PRINT,XREF);
79072805
LW
7283
7284 case KEY_printf:
3280af22 7285 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7286 LOP(OP_PRTF,XREF);
79072805 7287
c07a80fd 7288 case KEY_prototype:
7289 UNI(OP_PROTOTYPE);
7290
79072805 7291 case KEY_push:
a0d0e21e 7292 LOP(OP_PUSH,XTERM);
79072805
LW
7293
7294 case KEY_pop:
6f33ba73 7295 UNIDOR(OP_POP);
79072805 7296
a0d0e21e 7297 case KEY_pos:
6f33ba73 7298 UNIDOR(OP_POS);
4e553d73 7299
79072805 7300 case KEY_pack:
a0d0e21e 7301 LOP(OP_PACK,XTERM);
79072805
LW
7302
7303 case KEY_package:
a0d0e21e 7304 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7305 s = SKIPSPACE1(s);
91152fc1 7306 s = force_strict_version(s);
4e4da3ac 7307 PL_lex_expect = XBLOCK;
79072805
LW
7308 OPERATOR(PACKAGE);
7309
7310 case KEY_pipe:
a0d0e21e 7311 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7312
7313 case KEY_q:
5db06880 7314 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7315 if (!s)
d4c19fe8 7316 missingterm(NULL);
6154021b 7317 pl_yylval.ival = OP_CONST;
79072805
LW
7318 TERM(sublex_start());
7319
a0d0e21e
LW
7320 case KEY_quotemeta:
7321 UNI(OP_QUOTEMETA);
7322
ea25a9b2
Z
7323 case KEY_qw: {
7324 OP *words = NULL;
5db06880 7325 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7326 if (!s)
d4c19fe8 7327 missingterm(NULL);
3480a8d2 7328 PL_expect = XOPERATOR;
8127e0e3 7329 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7330 int warned = 0;
3280af22 7331 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7332 while (len) {
d4c19fe8
AL
7333 for (; isSPACE(*d) && len; --len, ++d)
7334 /**/;
8127e0e3 7335 if (len) {
d4c19fe8 7336 SV *sv;
f54cb97a 7337 const char *b = d;
e476b1b5 7338 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7339 for (; !isSPACE(*d) && len; --len, ++d) {
7340 if (*d == ',') {
9014280d 7341 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7342 "Possible attempt to separate words with commas");
7343 ++warned;
7344 }
7345 else if (*d == '#') {
9014280d 7346 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7347 "Possible attempt to put comments in qw() list");
7348 ++warned;
7349 }
7350 }
7351 }
7352 else {
d4c19fe8
AL
7353 for (; !isSPACE(*d) && len; --len, ++d)
7354 /**/;
8127e0e3 7355 }
740cce10 7356 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7357 words = append_elem(OP_LIST, words,
7948272d 7358 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7359 }
7360 }
7361 }
ea25a9b2
Z
7362 if (!words)
7363 words = newNULLLIST();
37fd879b 7364 if (PL_lex_stuff) {
8127e0e3 7365 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7366 PL_lex_stuff = NULL;
37fd879b 7367 }
ea25a9b2
Z
7368 PL_expect = XOPERATOR;
7369 pl_yylval.opval = sawparens(words);
7370 TOKEN(QWLIST);
7371 }
8990e307 7372
79072805 7373 case KEY_qq:
5db06880 7374 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7375 if (!s)
d4c19fe8 7376 missingterm(NULL);
6154021b 7377 pl_yylval.ival = OP_STRINGIFY;
3280af22 7378 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7379 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7380 TERM(sublex_start());
7381
8782bef2
GB
7382 case KEY_qr:
7383 s = scan_pat(s,OP_QR);
7384 TERM(sublex_start());
7385
79072805 7386 case KEY_qx:
5db06880 7387 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7388 if (!s)
d4c19fe8 7389 missingterm(NULL);
9b201d7d 7390 readpipe_override();
79072805
LW
7391 TERM(sublex_start());
7392
7393 case KEY_return:
7394 OLDLOP(OP_RETURN);
7395
7396 case KEY_require:
29595ff2 7397 s = SKIPSPACE1(s);
e759cc13
RGS
7398 if (isDIGIT(*s)) {
7399 s = force_version(s, FALSE);
a7cb1f99 7400 }
e759cc13
RGS
7401 else if (*s != 'v' || !isDIGIT(s[1])
7402 || (s = force_version(s, TRUE), *s == 'v'))
7403 {
a7cb1f99
GS
7404 *PL_tokenbuf = '\0';
7405 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7406 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7407 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7408 else if (*s == '<')
7409 yyerror("<> should be quotes");
7410 }
a72a1c8b
RGS
7411 if (orig_keyword == KEY_require) {
7412 orig_keyword = 0;
6154021b 7413 pl_yylval.ival = 1;
a72a1c8b
RGS
7414 }
7415 else
6154021b 7416 pl_yylval.ival = 0;
a72a1c8b
RGS
7417 PL_expect = XTERM;
7418 PL_bufptr = s;
7419 PL_last_uni = PL_oldbufptr;
7420 PL_last_lop_op = OP_REQUIRE;
7421 s = skipspace(s);
7422 return REPORT( (int)REQUIRE );
79072805
LW
7423
7424 case KEY_reset:
7425 UNI(OP_RESET);
7426
7427 case KEY_redo:
a0d0e21e 7428 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7429 LOOPX(OP_REDO);
7430
7431 case KEY_rename:
a0d0e21e 7432 LOP(OP_RENAME,XTERM);
79072805
LW
7433
7434 case KEY_rand:
7435 UNI(OP_RAND);
7436
7437 case KEY_rmdir:
7438 UNI(OP_RMDIR);
7439
7440 case KEY_rindex:
a0d0e21e 7441 LOP(OP_RINDEX,XTERM);
79072805
LW
7442
7443 case KEY_read:
a0d0e21e 7444 LOP(OP_READ,XTERM);
79072805
LW
7445
7446 case KEY_readdir:
7447 UNI(OP_READDIR);
7448
93a17b20 7449 case KEY_readline:
6f33ba73 7450 UNIDOR(OP_READLINE);
93a17b20
LW
7451
7452 case KEY_readpipe:
0858480c 7453 UNIDOR(OP_BACKTICK);
93a17b20 7454
79072805
LW
7455 case KEY_rewinddir:
7456 UNI(OP_REWINDDIR);
7457
7458 case KEY_recv:
a0d0e21e 7459 LOP(OP_RECV,XTERM);
79072805
LW
7460
7461 case KEY_reverse:
a0d0e21e 7462 LOP(OP_REVERSE,XTERM);
79072805
LW
7463
7464 case KEY_readlink:
6f33ba73 7465 UNIDOR(OP_READLINK);
79072805
LW
7466
7467 case KEY_ref:
7468 UNI(OP_REF);
7469
7470 case KEY_s:
7471 s = scan_subst(s);
6154021b 7472 if (pl_yylval.opval)
79072805
LW
7473 TERM(sublex_start());
7474 else
7475 TOKEN(1); /* force error */
7476
0d863452
RH
7477 case KEY_say:
7478 checkcomma(s,PL_tokenbuf,"filehandle");
7479 LOP(OP_SAY,XREF);
7480
a0d0e21e
LW
7481 case KEY_chomp:
7482 UNI(OP_CHOMP);
4e553d73 7483
79072805
LW
7484 case KEY_scalar:
7485 UNI(OP_SCALAR);
7486
7487 case KEY_select:
a0d0e21e 7488 LOP(OP_SELECT,XTERM);
79072805
LW
7489
7490 case KEY_seek:
a0d0e21e 7491 LOP(OP_SEEK,XTERM);
79072805
LW
7492
7493 case KEY_semctl:
a0d0e21e 7494 LOP(OP_SEMCTL,XTERM);
79072805
LW
7495
7496 case KEY_semget:
a0d0e21e 7497 LOP(OP_SEMGET,XTERM);
79072805
LW
7498
7499 case KEY_semop:
a0d0e21e 7500 LOP(OP_SEMOP,XTERM);
79072805
LW
7501
7502 case KEY_send:
a0d0e21e 7503 LOP(OP_SEND,XTERM);
79072805
LW
7504
7505 case KEY_setpgrp:
a0d0e21e 7506 LOP(OP_SETPGRP,XTERM);
79072805
LW
7507
7508 case KEY_setpriority:
a0d0e21e 7509 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7510
7511 case KEY_sethostent:
ff68c719 7512 UNI(OP_SHOSTENT);
79072805
LW
7513
7514 case KEY_setnetent:
ff68c719 7515 UNI(OP_SNETENT);
79072805
LW
7516
7517 case KEY_setservent:
ff68c719 7518 UNI(OP_SSERVENT);
79072805
LW
7519
7520 case KEY_setprotoent:
ff68c719 7521 UNI(OP_SPROTOENT);
79072805
LW
7522
7523 case KEY_setpwent:
7524 FUN0(OP_SPWENT);
7525
7526 case KEY_setgrent:
7527 FUN0(OP_SGRENT);
7528
7529 case KEY_seekdir:
a0d0e21e 7530 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7531
7532 case KEY_setsockopt:
a0d0e21e 7533 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7534
7535 case KEY_shift:
6f33ba73 7536 UNIDOR(OP_SHIFT);
79072805
LW
7537
7538 case KEY_shmctl:
a0d0e21e 7539 LOP(OP_SHMCTL,XTERM);
79072805
LW
7540
7541 case KEY_shmget:
a0d0e21e 7542 LOP(OP_SHMGET,XTERM);
79072805
LW
7543
7544 case KEY_shmread:
a0d0e21e 7545 LOP(OP_SHMREAD,XTERM);
79072805
LW
7546
7547 case KEY_shmwrite:
a0d0e21e 7548 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7549
7550 case KEY_shutdown:
a0d0e21e 7551 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7552
7553 case KEY_sin:
7554 UNI(OP_SIN);
7555
7556 case KEY_sleep:
7557 UNI(OP_SLEEP);
7558
7559 case KEY_socket:
a0d0e21e 7560 LOP(OP_SOCKET,XTERM);
79072805
LW
7561
7562 case KEY_socketpair:
a0d0e21e 7563 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7564
7565 case KEY_sort:
3280af22 7566 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7567 s = SKIPSPACE1(s);
79072805 7568 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7569 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7570 PL_expect = XTERM;
15f0808c 7571 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7572 LOP(OP_SORT,XREF);
79072805
LW
7573
7574 case KEY_split:
a0d0e21e 7575 LOP(OP_SPLIT,XTERM);
79072805
LW
7576
7577 case KEY_sprintf:
a0d0e21e 7578 LOP(OP_SPRINTF,XTERM);
79072805
LW
7579
7580 case KEY_splice:
a0d0e21e 7581 LOP(OP_SPLICE,XTERM);
79072805
LW
7582
7583 case KEY_sqrt:
7584 UNI(OP_SQRT);
7585
7586 case KEY_srand:
7587 UNI(OP_SRAND);
7588
7589 case KEY_stat:
7590 UNI(OP_STAT);
7591
7592 case KEY_study:
79072805
LW
7593 UNI(OP_STUDY);
7594
7595 case KEY_substr:
a0d0e21e 7596 LOP(OP_SUBSTR,XTERM);
79072805
LW
7597
7598 case KEY_format:
7599 case KEY_sub:
93a17b20 7600 really_sub:
09bef843 7601 {
3280af22 7602 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7603 SSize_t tboffset = 0;
09bef843 7604 expectation attrful;
28cc6278 7605 bool have_name, have_proto;
f54cb97a 7606 const int key = tmp;
09bef843 7607
5db06880
NC
7608#ifdef PERL_MAD
7609 SV *tmpwhite = 0;
7610
cd81e915 7611 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7612 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7613 PL_thistoken = 0;
5db06880
NC
7614
7615 d = s;
7616 s = SKIPSPACE2(s,tmpwhite);
7617#else
09bef843 7618 s = skipspace(s);
5db06880 7619#endif
09bef843 7620
7e2040f0 7621 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7622 (*s == ':' && s[1] == ':'))
7623 {
5db06880 7624#ifdef PERL_MAD
4f61fd4b 7625 SV *nametoke = NULL;
5db06880
NC
7626#endif
7627
09bef843
SB
7628 PL_expect = XBLOCK;
7629 attrful = XATTRBLOCK;
b1b65b59
JH
7630 /* remember buffer pos'n for later force_word */
7631 tboffset = s - PL_oldbufptr;
09bef843 7632 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7633#ifdef PERL_MAD
7634 if (PL_madskills)
7635 nametoke = newSVpvn(s, d - s);
7636#endif
6502358f
NC
7637 if (memchr(tmpbuf, ':', len))
7638 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7639 else {
7640 sv_setsv(PL_subname,PL_curstname);
396482e1 7641 sv_catpvs(PL_subname,"::");
09bef843
SB
7642 sv_catpvn(PL_subname,tmpbuf,len);
7643 }
09bef843 7644 have_name = TRUE;
5db06880
NC
7645
7646#ifdef PERL_MAD
7647
7648 start_force(0);
7649 CURMAD('X', nametoke);
7650 CURMAD('_', tmpwhite);
7651 (void) force_word(PL_oldbufptr + tboffset, WORD,
7652 FALSE, TRUE, TRUE);
7653
7654 s = SKIPSPACE2(d,tmpwhite);
7655#else
7656 s = skipspace(d);
7657#endif
09bef843 7658 }
463ee0b2 7659 else {
09bef843
SB
7660 if (key == KEY_my)
7661 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7662 PL_expect = XTERMBLOCK;
7663 attrful = XATTRTERM;
76f68e9b 7664 sv_setpvs(PL_subname,"?");
09bef843 7665 have_name = FALSE;
463ee0b2 7666 }
4633a7c4 7667
09bef843
SB
7668 if (key == KEY_format) {
7669 if (*s == '=')
7670 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7671#ifdef PERL_MAD
cd81e915 7672 PL_thistoken = subtoken;
5db06880
NC
7673 s = d;
7674#else
09bef843 7675 if (have_name)
b1b65b59
JH
7676 (void) force_word(PL_oldbufptr + tboffset, WORD,
7677 FALSE, TRUE, TRUE);
5db06880 7678#endif
09bef843
SB
7679 OPERATOR(FORMAT);
7680 }
79072805 7681
09bef843
SB
7682 /* Look for a prototype */
7683 if (*s == '(') {
d9f2850e
RGS
7684 char *p;
7685 bool bad_proto = FALSE;
9e8d7757
RB
7686 bool in_brackets = FALSE;
7687 char greedy_proto = ' ';
7688 bool proto_after_greedy_proto = FALSE;
7689 bool must_be_last = FALSE;
7690 bool underscore = FALSE;
aef2a98a 7691 bool seen_underscore = FALSE;
197afce1 7692 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7693
5db06880 7694 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7695 if (!s)
09bef843 7696 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7697 /* strip spaces and check for bad characters */
09bef843
SB
7698 d = SvPVX(PL_lex_stuff);
7699 tmp = 0;
d9f2850e
RGS
7700 for (p = d; *p; ++p) {
7701 if (!isSPACE(*p)) {
7702 d[tmp++] = *p;
9e8d7757 7703
197afce1 7704 if (warnillegalproto) {
9e8d7757
RB
7705 if (must_be_last)
7706 proto_after_greedy_proto = TRUE;
7707 if (!strchr("$@%*;[]&\\_", *p)) {
7708 bad_proto = TRUE;
7709 }
7710 else {
7711 if ( underscore ) {
7712 if ( *p != ';' )
7713 bad_proto = TRUE;
7714 underscore = FALSE;
7715 }
7716 if ( *p == '[' ) {
7717 in_brackets = TRUE;
7718 }
7719 else if ( *p == ']' ) {
7720 in_brackets = FALSE;
7721 }
7722 else if ( (*p == '@' || *p == '%') &&
7723 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7724 !in_brackets ) {
7725 must_be_last = TRUE;
7726 greedy_proto = *p;
7727 }
7728 else if ( *p == '_' ) {
aef2a98a 7729 underscore = seen_underscore = TRUE;
9e8d7757
RB
7730 }
7731 }
7732 }
d37a9538 7733 }
09bef843 7734 }
d9f2850e 7735 d[tmp] = '\0';
9e8d7757 7736 if (proto_after_greedy_proto)
197afce1 7737 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7738 "Prototype after '%c' for %"SVf" : %s",
7739 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7740 if (bad_proto)
197afce1 7741 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7742 "Illegal character %sin prototype for %"SVf" : %s",
7743 seen_underscore ? "after '_' " : "",
be2597df 7744 SVfARG(PL_subname), d);
b162af07 7745 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7746 have_proto = TRUE;
68dc0745 7747
5db06880
NC
7748#ifdef PERL_MAD
7749 start_force(0);
cd81e915 7750 CURMAD('q', PL_thisopen);
5db06880 7751 CURMAD('_', tmpwhite);
cd81e915
NC
7752 CURMAD('=', PL_thisstuff);
7753 CURMAD('Q', PL_thisclose);
5db06880
NC
7754 NEXTVAL_NEXTTOKE.opval =
7755 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7756 PL_lex_stuff = NULL;
5db06880
NC
7757 force_next(THING);
7758
7759 s = SKIPSPACE2(s,tmpwhite);
7760#else
09bef843 7761 s = skipspace(s);
5db06880 7762#endif
4633a7c4 7763 }
09bef843
SB
7764 else
7765 have_proto = FALSE;
7766
7767 if (*s == ':' && s[1] != ':')
7768 PL_expect = attrful;
8e742a20
MHM
7769 else if (*s != '{' && key == KEY_sub) {
7770 if (!have_name)
7771 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7772 else if (*s != ';' && *s != '}')
be2597df 7773 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7774 }
09bef843 7775
5db06880
NC
7776#ifdef PERL_MAD
7777 start_force(0);
7778 if (tmpwhite) {
7779 if (PL_madskills)
6b29d1f5 7780 curmad('^', newSVpvs(""));
5db06880
NC
7781 CURMAD('_', tmpwhite);
7782 }
7783 force_next(0);
7784
cd81e915 7785 PL_thistoken = subtoken;
5db06880 7786#else
09bef843 7787 if (have_proto) {
9ded7720 7788 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7789 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7790 PL_lex_stuff = NULL;
09bef843 7791 force_next(THING);
68dc0745 7792 }
5db06880 7793#endif
09bef843 7794 if (!have_name) {
49a54bbe
NC
7795 if (PL_curstash)
7796 sv_setpvs(PL_subname, "__ANON__");
7797 else
7798 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7799 TOKEN(ANONSUB);
4633a7c4 7800 }
5db06880 7801#ifndef PERL_MAD
b1b65b59
JH
7802 (void) force_word(PL_oldbufptr + tboffset, WORD,
7803 FALSE, TRUE, TRUE);
5db06880 7804#endif
09bef843
SB
7805 if (key == KEY_my)
7806 TOKEN(MYSUB);
7807 TOKEN(SUB);
4633a7c4 7808 }
79072805
LW
7809
7810 case KEY_system:
a0d0e21e 7811 LOP(OP_SYSTEM,XREF);
79072805
LW
7812
7813 case KEY_symlink:
a0d0e21e 7814 LOP(OP_SYMLINK,XTERM);
79072805
LW
7815
7816 case KEY_syscall:
a0d0e21e 7817 LOP(OP_SYSCALL,XTERM);
79072805 7818
c07a80fd 7819 case KEY_sysopen:
7820 LOP(OP_SYSOPEN,XTERM);
7821
137443ea 7822 case KEY_sysseek:
7823 LOP(OP_SYSSEEK,XTERM);
7824
79072805 7825 case KEY_sysread:
a0d0e21e 7826 LOP(OP_SYSREAD,XTERM);
79072805
LW
7827
7828 case KEY_syswrite:
a0d0e21e 7829 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7830
7831 case KEY_tr:
7832 s = scan_trans(s);
7833 TERM(sublex_start());
7834
7835 case KEY_tell:
7836 UNI(OP_TELL);
7837
7838 case KEY_telldir:
7839 UNI(OP_TELLDIR);
7840
463ee0b2 7841 case KEY_tie:
a0d0e21e 7842 LOP(OP_TIE,XTERM);
463ee0b2 7843
c07a80fd 7844 case KEY_tied:
7845 UNI(OP_TIED);
7846
79072805
LW
7847 case KEY_time:
7848 FUN0(OP_TIME);
7849
7850 case KEY_times:
7851 FUN0(OP_TMS);
7852
7853 case KEY_truncate:
a0d0e21e 7854 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7855
7856 case KEY_uc:
7857 UNI(OP_UC);
7858
7859 case KEY_ucfirst:
7860 UNI(OP_UCFIRST);
7861
463ee0b2
LW
7862 case KEY_untie:
7863 UNI(OP_UNTIE);
7864
79072805 7865 case KEY_until:
6154021b 7866 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7867 OPERATOR(UNTIL);
7868
7869 case KEY_unless:
6154021b 7870 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7871 OPERATOR(UNLESS);
7872
7873 case KEY_unlink:
a0d0e21e 7874 LOP(OP_UNLINK,XTERM);
79072805
LW
7875
7876 case KEY_undef:
6f33ba73 7877 UNIDOR(OP_UNDEF);
79072805
LW
7878
7879 case KEY_unpack:
a0d0e21e 7880 LOP(OP_UNPACK,XTERM);
79072805
LW
7881
7882 case KEY_utime:
a0d0e21e 7883 LOP(OP_UTIME,XTERM);
79072805
LW
7884
7885 case KEY_umask:
6f33ba73 7886 UNIDOR(OP_UMASK);
79072805
LW
7887
7888 case KEY_unshift:
a0d0e21e
LW
7889 LOP(OP_UNSHIFT,XTERM);
7890
7891 case KEY_use:
468aa647 7892 s = tokenize_use(1, s);
a0d0e21e 7893 OPERATOR(USE);
79072805
LW
7894
7895 case KEY_values:
7896 UNI(OP_VALUES);
7897
7898 case KEY_vec:
a0d0e21e 7899 LOP(OP_VEC,XTERM);
79072805 7900
0d863452 7901 case KEY_when:
6154021b 7902 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7903 OPERATOR(WHEN);
7904
79072805 7905 case KEY_while:
6154021b 7906 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7907 OPERATOR(WHILE);
7908
7909 case KEY_warn:
3280af22 7910 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7911 LOP(OP_WARN,XTERM);
79072805
LW
7912
7913 case KEY_wait:
7914 FUN0(OP_WAIT);
7915
7916 case KEY_waitpid:
a0d0e21e 7917 LOP(OP_WAITPID,XTERM);
79072805
LW
7918
7919 case KEY_wantarray:
7920 FUN0(OP_WANTARRAY);
7921
7922 case KEY_write:
9d116dd7
JH
7923#ifdef EBCDIC
7924 {
df3728a2
JH
7925 char ctl_l[2];
7926 ctl_l[0] = toCTRL('L');
7927 ctl_l[1] = '\0';
fafc274c 7928 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7929 }
7930#else
fafc274c
NC
7931 /* Make sure $^L is defined */
7932 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7933#endif
79072805
LW
7934 UNI(OP_ENTERWRITE);
7935
7936 case KEY_x:
3280af22 7937 if (PL_expect == XOPERATOR)
79072805
LW
7938 Mop(OP_REPEAT);
7939 check_uni();
7940 goto just_a_word;
7941
a0d0e21e 7942 case KEY_xor:
6154021b 7943 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7944 OPERATOR(OROP);
7945
79072805
LW
7946 case KEY_y:
7947 s = scan_trans(s);
7948 TERM(sublex_start());
7949 }
49dc05e3 7950 }}
79072805 7951}
bf4acbe4
GS
7952#ifdef __SC__
7953#pragma segment Main
7954#endif
79072805 7955
e930465f
JH
7956static int
7957S_pending_ident(pTHX)
8eceec63 7958{
97aff369 7959 dVAR;
8eceec63 7960 register char *d;
bbd11bfc 7961 PADOFFSET tmp = 0;
8eceec63
SC
7962 /* pit holds the identifier we read and pending_ident is reset */
7963 char pit = PL_pending_ident;
9bde8eb0
NC
7964 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7965 /* All routes through this function want to know if there is a colon. */
c099d646 7966 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7967 PL_pending_ident = 0;
7968
cd81e915 7969 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7970 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7971 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7972
7973 /* if we're in a my(), we can't allow dynamics here.
7974 $foo'bar has already been turned into $foo::bar, so
7975 just check for colons.
7976
7977 if it's a legal name, the OP is a PADANY.
7978 */
7979 if (PL_in_my) {
7980 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7981 if (has_colon)
8eceec63
SC
7982 yyerror(Perl_form(aTHX_ "No package name allowed for "
7983 "variable %s in \"our\"",
7984 PL_tokenbuf));
d6447115 7985 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7986 }
7987 else {
9bde8eb0 7988 if (has_colon)
952306ac
RGS
7989 yyerror(Perl_form(aTHX_ PL_no_myglob,
7990 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7991
6154021b 7992 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7993 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7994 return PRIVATEREF;
7995 }
7996 }
7997
7998 /*
7999 build the ops for accesses to a my() variable.
8000
8001 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8002 then used in a comparison. This catches most, but not
8003 all cases. For instance, it catches
8004 sort { my($a); $a <=> $b }
8005 but not
8006 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8007 (although why you'd do that is anyone's guess).
8008 */
8009
9bde8eb0 8010 if (!has_colon) {
8716503d 8011 if (!PL_in_my)
f8f98e0a 8012 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8013 if (tmp != NOT_IN_PAD) {
8eceec63 8014 /* might be an "our" variable" */
00b1698f 8015 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8016 /* build ops for a bareword */
b64e5050
AL
8017 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8018 HEK * const stashname = HvNAME_HEK(stash);
8019 SV * const sym = newSVhek(stashname);
396482e1 8020 sv_catpvs(sym, "::");
9bde8eb0 8021 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8022 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8023 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8024 gv_fetchsv(sym,
8eceec63
SC
8025 (PL_in_eval
8026 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8027 : GV_ADDMULTI
8eceec63
SC
8028 ),
8029 ((PL_tokenbuf[0] == '$') ? SVt_PV
8030 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8031 : SVt_PVHV));
8032 return WORD;
8033 }
8034
8035 /* if it's a sort block and they're naming $a or $b */
8036 if (PL_last_lop_op == OP_SORT &&
8037 PL_tokenbuf[0] == '$' &&
8038 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8039 && !PL_tokenbuf[2])
8040 {
8041 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8042 d < PL_bufend && *d != '\n';
8043 d++)
8044 {
8045 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8046 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8047 PL_tokenbuf);
8048 }
8049 }
8050 }
8051
6154021b
RGS
8052 pl_yylval.opval = newOP(OP_PADANY, 0);
8053 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8054 return PRIVATEREF;
8055 }
8056 }
8057
8058 /*
8059 Whine if they've said @foo in a doublequoted string,
8060 and @foo isn't a variable we can find in the symbol
8061 table.
8062 */
d824713b
NC
8063 if (ckWARN(WARN_AMBIGUOUS) &&
8064 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8065 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8066 SVt_PVAV);
8eceec63 8067 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8068 /* DO NOT warn for @- and @+ */
8069 && !( PL_tokenbuf[2] == '\0' &&
8070 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8071 )
8eceec63
SC
8072 {
8073 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8074 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8075 "Possible unintended interpolation of %s in string",
8076 PL_tokenbuf);
8eceec63
SC
8077 }
8078 }
8079
8080 /* build ops for a bareword */
6154021b 8081 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8082 tokenbuf_len - 1));
6154021b 8083 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8084 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8085 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8086 ((PL_tokenbuf[0] == '$') ? SVt_PV
8087 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8088 : SVt_PVHV));
8eceec63
SC
8089 return WORD;
8090}
8091
4c3bbe0f
MHM
8092/*
8093 * The following code was generated by perl_keyword.pl.
8094 */
e2e1dd5a 8095
79072805 8096I32
5458a98a 8097Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8098{
952306ac 8099 dVAR;
7918f24d
NC
8100
8101 PERL_ARGS_ASSERT_KEYWORD;
8102
4c3bbe0f
MHM
8103 switch (len)
8104 {
8105 case 1: /* 5 tokens of length 1 */
8106 switch (name[0])
e2e1dd5a 8107 {
4c3bbe0f
MHM
8108 case 'm':
8109 { /* m */
8110 return KEY_m;
8111 }
8112
4c3bbe0f
MHM
8113 case 'q':
8114 { /* q */
8115 return KEY_q;
8116 }
8117
4c3bbe0f
MHM
8118 case 's':
8119 { /* s */
8120 return KEY_s;
8121 }
8122
4c3bbe0f
MHM
8123 case 'x':
8124 { /* x */
8125 return -KEY_x;
8126 }
8127
4c3bbe0f
MHM
8128 case 'y':
8129 { /* y */
8130 return KEY_y;
8131 }
8132
4c3bbe0f
MHM
8133 default:
8134 goto unknown;
e2e1dd5a 8135 }
4c3bbe0f
MHM
8136
8137 case 2: /* 18 tokens of length 2 */
8138 switch (name[0])
e2e1dd5a 8139 {
4c3bbe0f
MHM
8140 case 'd':
8141 if (name[1] == 'o')
8142 { /* do */
8143 return KEY_do;
8144 }
8145
8146 goto unknown;
8147
8148 case 'e':
8149 if (name[1] == 'q')
8150 { /* eq */
8151 return -KEY_eq;
8152 }
8153
8154 goto unknown;
8155
8156 case 'g':
8157 switch (name[1])
8158 {
8159 case 'e':
8160 { /* ge */
8161 return -KEY_ge;
8162 }
8163
4c3bbe0f
MHM
8164 case 't':
8165 { /* gt */
8166 return -KEY_gt;
8167 }
8168
4c3bbe0f
MHM
8169 default:
8170 goto unknown;
8171 }
8172
8173 case 'i':
8174 if (name[1] == 'f')
8175 { /* if */
8176 return KEY_if;
8177 }
8178
8179 goto unknown;
8180
8181 case 'l':
8182 switch (name[1])
8183 {
8184 case 'c':
8185 { /* lc */
8186 return -KEY_lc;
8187 }
8188
4c3bbe0f
MHM
8189 case 'e':
8190 { /* le */
8191 return -KEY_le;
8192 }
8193
4c3bbe0f
MHM
8194 case 't':
8195 { /* lt */
8196 return -KEY_lt;
8197 }
8198
4c3bbe0f
MHM
8199 default:
8200 goto unknown;
8201 }
8202
8203 case 'm':
8204 if (name[1] == 'y')
8205 { /* my */
8206 return KEY_my;
8207 }
8208
8209 goto unknown;
8210
8211 case 'n':
8212 switch (name[1])
8213 {
8214 case 'e':
8215 { /* ne */
8216 return -KEY_ne;
8217 }
8218
4c3bbe0f
MHM
8219 case 'o':
8220 { /* no */
8221 return KEY_no;
8222 }
8223
4c3bbe0f
MHM
8224 default:
8225 goto unknown;
8226 }
8227
8228 case 'o':
8229 if (name[1] == 'r')
8230 { /* or */
8231 return -KEY_or;
8232 }
8233
8234 goto unknown;
8235
8236 case 'q':
8237 switch (name[1])
8238 {
8239 case 'q':
8240 { /* qq */
8241 return KEY_qq;
8242 }
8243
4c3bbe0f
MHM
8244 case 'r':
8245 { /* qr */
8246 return KEY_qr;
8247 }
8248
4c3bbe0f
MHM
8249 case 'w':
8250 { /* qw */
8251 return KEY_qw;
8252 }
8253
4c3bbe0f
MHM
8254 case 'x':
8255 { /* qx */
8256 return KEY_qx;
8257 }
8258
4c3bbe0f
MHM
8259 default:
8260 goto unknown;
8261 }
8262
8263 case 't':
8264 if (name[1] == 'r')
8265 { /* tr */
8266 return KEY_tr;
8267 }
8268
8269 goto unknown;
8270
8271 case 'u':
8272 if (name[1] == 'c')
8273 { /* uc */
8274 return -KEY_uc;
8275 }
8276
8277 goto unknown;
8278
8279 default:
8280 goto unknown;
e2e1dd5a 8281 }
4c3bbe0f 8282
0d863452 8283 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8284 switch (name[0])
e2e1dd5a 8285 {
4c3bbe0f
MHM
8286 case 'E':
8287 if (name[1] == 'N' &&
8288 name[2] == 'D')
8289 { /* END */
8290 return KEY_END;
8291 }
8292
8293 goto unknown;
8294
8295 case 'a':
8296 switch (name[1])
8297 {
8298 case 'b':
8299 if (name[2] == 's')
8300 { /* abs */
8301 return -KEY_abs;
8302 }
8303
8304 goto unknown;
8305
8306 case 'n':
8307 if (name[2] == 'd')
8308 { /* and */
8309 return -KEY_and;
8310 }
8311
8312 goto unknown;
8313
8314 default:
8315 goto unknown;
8316 }
8317
8318 case 'c':
8319 switch (name[1])
8320 {
8321 case 'h':
8322 if (name[2] == 'r')
8323 { /* chr */
8324 return -KEY_chr;
8325 }
8326
8327 goto unknown;
8328
8329 case 'm':
8330 if (name[2] == 'p')
8331 { /* cmp */
8332 return -KEY_cmp;
8333 }
8334
8335 goto unknown;
8336
8337 case 'o':
8338 if (name[2] == 's')
8339 { /* cos */
8340 return -KEY_cos;
8341 }
8342
8343 goto unknown;
8344
8345 default:
8346 goto unknown;
8347 }
8348
8349 case 'd':
8350 if (name[1] == 'i' &&
8351 name[2] == 'e')
8352 { /* die */
8353 return -KEY_die;
8354 }
8355
8356 goto unknown;
8357
8358 case 'e':
8359 switch (name[1])
8360 {
8361 case 'o':
8362 if (name[2] == 'f')
8363 { /* eof */
8364 return -KEY_eof;
8365 }
8366
8367 goto unknown;
8368
4c3bbe0f
MHM
8369 case 'x':
8370 if (name[2] == 'p')
8371 { /* exp */
8372 return -KEY_exp;
8373 }
8374
8375 goto unknown;
8376
8377 default:
8378 goto unknown;
8379 }
8380
8381 case 'f':
8382 if (name[1] == 'o' &&
8383 name[2] == 'r')
8384 { /* for */
8385 return KEY_for;
8386 }
8387
8388 goto unknown;
8389
8390 case 'h':
8391 if (name[1] == 'e' &&
8392 name[2] == 'x')
8393 { /* hex */
8394 return -KEY_hex;
8395 }
8396
8397 goto unknown;
8398
8399 case 'i':
8400 if (name[1] == 'n' &&
8401 name[2] == 't')
8402 { /* int */
8403 return -KEY_int;
8404 }
8405
8406 goto unknown;
8407
8408 case 'l':
8409 if (name[1] == 'o' &&
8410 name[2] == 'g')
8411 { /* log */
8412 return -KEY_log;
8413 }
8414
8415 goto unknown;
8416
8417 case 'm':
8418 if (name[1] == 'a' &&
8419 name[2] == 'p')
8420 { /* map */
8421 return KEY_map;
8422 }
8423
8424 goto unknown;
8425
8426 case 'n':
8427 if (name[1] == 'o' &&
8428 name[2] == 't')
8429 { /* not */
8430 return -KEY_not;
8431 }
8432
8433 goto unknown;
8434
8435 case 'o':
8436 switch (name[1])
8437 {
8438 case 'c':
8439 if (name[2] == 't')
8440 { /* oct */
8441 return -KEY_oct;
8442 }
8443
8444 goto unknown;
8445
8446 case 'r':
8447 if (name[2] == 'd')
8448 { /* ord */
8449 return -KEY_ord;
8450 }
8451
8452 goto unknown;
8453
8454 case 'u':
8455 if (name[2] == 'r')
8456 { /* our */
8457 return KEY_our;
8458 }
8459
8460 goto unknown;
8461
8462 default:
8463 goto unknown;
8464 }
8465
8466 case 'p':
8467 if (name[1] == 'o')
8468 {
8469 switch (name[2])
8470 {
8471 case 'p':
8472 { /* pop */
8473 return -KEY_pop;
8474 }
8475
4c3bbe0f
MHM
8476 case 's':
8477 { /* pos */
8478 return KEY_pos;
8479 }
8480
4c3bbe0f
MHM
8481 default:
8482 goto unknown;
8483 }
8484 }
8485
8486 goto unknown;
8487
8488 case 'r':
8489 if (name[1] == 'e' &&
8490 name[2] == 'f')
8491 { /* ref */
8492 return -KEY_ref;
8493 }
8494
8495 goto unknown;
8496
8497 case 's':
8498 switch (name[1])
8499 {
0d863452
RH
8500 case 'a':
8501 if (name[2] == 'y')
8502 { /* say */
e3e804c9 8503 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8504 }
8505
8506 goto unknown;
8507
4c3bbe0f
MHM
8508 case 'i':
8509 if (name[2] == 'n')
8510 { /* sin */
8511 return -KEY_sin;
8512 }
8513
8514 goto unknown;
8515
8516 case 'u':
8517 if (name[2] == 'b')
8518 { /* sub */
8519 return KEY_sub;
8520 }
8521
8522 goto unknown;
8523
8524 default:
8525 goto unknown;
8526 }
8527
8528 case 't':
8529 if (name[1] == 'i' &&
8530 name[2] == 'e')
8531 { /* tie */
1db4d195 8532 return -KEY_tie;
4c3bbe0f
MHM
8533 }
8534
8535 goto unknown;
8536
8537 case 'u':
8538 if (name[1] == 's' &&
8539 name[2] == 'e')
8540 { /* use */
8541 return KEY_use;
8542 }
8543
8544 goto unknown;
8545
8546 case 'v':
8547 if (name[1] == 'e' &&
8548 name[2] == 'c')
8549 { /* vec */
8550 return -KEY_vec;
8551 }
8552
8553 goto unknown;
8554
8555 case 'x':
8556 if (name[1] == 'o' &&
8557 name[2] == 'r')
8558 { /* xor */
8559 return -KEY_xor;
8560 }
8561
8562 goto unknown;
8563
8564 default:
8565 goto unknown;
e2e1dd5a 8566 }
4c3bbe0f 8567
0d863452 8568 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8569 switch (name[0])
e2e1dd5a 8570 {
4c3bbe0f
MHM
8571 case 'C':
8572 if (name[1] == 'O' &&
8573 name[2] == 'R' &&
8574 name[3] == 'E')
8575 { /* CORE */
8576 return -KEY_CORE;
8577 }
8578
8579 goto unknown;
8580
8581 case 'I':
8582 if (name[1] == 'N' &&
8583 name[2] == 'I' &&
8584 name[3] == 'T')
8585 { /* INIT */
8586 return KEY_INIT;
8587 }
8588
8589 goto unknown;
8590
8591 case 'b':
8592 if (name[1] == 'i' &&
8593 name[2] == 'n' &&
8594 name[3] == 'd')
8595 { /* bind */
8596 return -KEY_bind;
8597 }
8598
8599 goto unknown;
8600
8601 case 'c':
8602 if (name[1] == 'h' &&
8603 name[2] == 'o' &&
8604 name[3] == 'p')
8605 { /* chop */
8606 return -KEY_chop;
8607 }
8608
8609 goto unknown;
8610
8611 case 'd':
8612 if (name[1] == 'u' &&
8613 name[2] == 'm' &&
8614 name[3] == 'p')
8615 { /* dump */
8616 return -KEY_dump;
8617 }
8618
8619 goto unknown;
8620
8621 case 'e':
8622 switch (name[1])
8623 {
8624 case 'a':
8625 if (name[2] == 'c' &&
8626 name[3] == 'h')
8627 { /* each */
8628 return -KEY_each;
8629 }
8630
8631 goto unknown;
8632
8633 case 'l':
8634 if (name[2] == 's' &&
8635 name[3] == 'e')
8636 { /* else */
8637 return KEY_else;
8638 }
8639
8640 goto unknown;
8641
8642 case 'v':
8643 if (name[2] == 'a' &&
8644 name[3] == 'l')
8645 { /* eval */
8646 return KEY_eval;
8647 }
8648
8649 goto unknown;
8650
8651 case 'x':
8652 switch (name[2])
8653 {
8654 case 'e':
8655 if (name[3] == 'c')
8656 { /* exec */
8657 return -KEY_exec;
8658 }
8659
8660 goto unknown;
8661
8662 case 'i':
8663 if (name[3] == 't')
8664 { /* exit */
8665 return -KEY_exit;
8666 }
8667
8668 goto unknown;
8669
8670 default:
8671 goto unknown;
8672 }
8673
8674 default:
8675 goto unknown;
8676 }
8677
8678 case 'f':
8679 if (name[1] == 'o' &&
8680 name[2] == 'r' &&
8681 name[3] == 'k')
8682 { /* fork */
8683 return -KEY_fork;
8684 }
8685
8686 goto unknown;
8687
8688 case 'g':
8689 switch (name[1])
8690 {
8691 case 'e':
8692 if (name[2] == 't' &&
8693 name[3] == 'c')
8694 { /* getc */
8695 return -KEY_getc;
8696 }
8697
8698 goto unknown;
8699
8700 case 'l':
8701 if (name[2] == 'o' &&
8702 name[3] == 'b')
8703 { /* glob */
8704 return KEY_glob;
8705 }
8706
8707 goto unknown;
8708
8709 case 'o':
8710 if (name[2] == 't' &&
8711 name[3] == 'o')
8712 { /* goto */
8713 return KEY_goto;
8714 }
8715
8716 goto unknown;
8717
8718 case 'r':
8719 if (name[2] == 'e' &&
8720 name[3] == 'p')
8721 { /* grep */
8722 return KEY_grep;
8723 }
8724
8725 goto unknown;
8726
8727 default:
8728 goto unknown;
8729 }
8730
8731 case 'j':
8732 if (name[1] == 'o' &&
8733 name[2] == 'i' &&
8734 name[3] == 'n')
8735 { /* join */
8736 return -KEY_join;
8737 }
8738
8739 goto unknown;
8740
8741 case 'k':
8742 switch (name[1])
8743 {
8744 case 'e':
8745 if (name[2] == 'y' &&
8746 name[3] == 's')
8747 { /* keys */
8748 return -KEY_keys;
8749 }
8750
8751 goto unknown;
8752
8753 case 'i':
8754 if (name[2] == 'l' &&
8755 name[3] == 'l')
8756 { /* kill */
8757 return -KEY_kill;
8758 }
8759
8760 goto unknown;
8761
8762 default:
8763 goto unknown;
8764 }
8765
8766 case 'l':
8767 switch (name[1])
8768 {
8769 case 'a':
8770 if (name[2] == 's' &&
8771 name[3] == 't')
8772 { /* last */
8773 return KEY_last;
8774 }
8775
8776 goto unknown;
8777
8778 case 'i':
8779 if (name[2] == 'n' &&
8780 name[3] == 'k')
8781 { /* link */
8782 return -KEY_link;
8783 }
8784
8785 goto unknown;
8786
8787 case 'o':
8788 if (name[2] == 'c' &&
8789 name[3] == 'k')
8790 { /* lock */
8791 return -KEY_lock;
8792 }
8793
8794 goto unknown;
8795
8796 default:
8797 goto unknown;
8798 }
8799
8800 case 'n':
8801 if (name[1] == 'e' &&
8802 name[2] == 'x' &&
8803 name[3] == 't')
8804 { /* next */
8805 return KEY_next;
8806 }
8807
8808 goto unknown;
8809
8810 case 'o':
8811 if (name[1] == 'p' &&
8812 name[2] == 'e' &&
8813 name[3] == 'n')
8814 { /* open */
8815 return -KEY_open;
8816 }
8817
8818 goto unknown;
8819
8820 case 'p':
8821 switch (name[1])
8822 {
8823 case 'a':
8824 if (name[2] == 'c' &&
8825 name[3] == 'k')
8826 { /* pack */
8827 return -KEY_pack;
8828 }
8829
8830 goto unknown;
8831
8832 case 'i':
8833 if (name[2] == 'p' &&
8834 name[3] == 'e')
8835 { /* pipe */
8836 return -KEY_pipe;
8837 }
8838
8839 goto unknown;
8840
8841 case 'u':
8842 if (name[2] == 's' &&
8843 name[3] == 'h')
8844 { /* push */
8845 return -KEY_push;
8846 }
8847
8848 goto unknown;
8849
8850 default:
8851 goto unknown;
8852 }
8853
8854 case 'r':
8855 switch (name[1])
8856 {
8857 case 'a':
8858 if (name[2] == 'n' &&
8859 name[3] == 'd')
8860 { /* rand */
8861 return -KEY_rand;
8862 }
8863
8864 goto unknown;
8865
8866 case 'e':
8867 switch (name[2])
8868 {
8869 case 'a':
8870 if (name[3] == 'd')
8871 { /* read */
8872 return -KEY_read;
8873 }
8874
8875 goto unknown;
8876
8877 case 'c':
8878 if (name[3] == 'v')
8879 { /* recv */
8880 return -KEY_recv;
8881 }
8882
8883 goto unknown;
8884
8885 case 'd':
8886 if (name[3] == 'o')
8887 { /* redo */
8888 return KEY_redo;
8889 }
8890
8891 goto unknown;
8892
8893 default:
8894 goto unknown;
8895 }
8896
8897 default:
8898 goto unknown;
8899 }
8900
8901 case 's':
8902 switch (name[1])
8903 {
8904 case 'e':
8905 switch (name[2])
8906 {
8907 case 'e':
8908 if (name[3] == 'k')
8909 { /* seek */
8910 return -KEY_seek;
8911 }
8912
8913 goto unknown;
8914
8915 case 'n':
8916 if (name[3] == 'd')
8917 { /* send */
8918 return -KEY_send;
8919 }
8920
8921 goto unknown;
8922
8923 default:
8924 goto unknown;
8925 }
8926
8927 case 'o':
8928 if (name[2] == 'r' &&
8929 name[3] == 't')
8930 { /* sort */
8931 return KEY_sort;
8932 }
8933
8934 goto unknown;
8935
8936 case 'q':
8937 if (name[2] == 'r' &&
8938 name[3] == 't')
8939 { /* sqrt */
8940 return -KEY_sqrt;
8941 }
8942
8943 goto unknown;
8944
8945 case 't':
8946 if (name[2] == 'a' &&
8947 name[3] == 't')
8948 { /* stat */
8949 return -KEY_stat;
8950 }
8951
8952 goto unknown;
8953
8954 default:
8955 goto unknown;
8956 }
8957
8958 case 't':
8959 switch (name[1])
8960 {
8961 case 'e':
8962 if (name[2] == 'l' &&
8963 name[3] == 'l')
8964 { /* tell */
8965 return -KEY_tell;
8966 }
8967
8968 goto unknown;
8969
8970 case 'i':
8971 switch (name[2])
8972 {
8973 case 'e':
8974 if (name[3] == 'd')
8975 { /* tied */
1db4d195 8976 return -KEY_tied;
4c3bbe0f
MHM
8977 }
8978
8979 goto unknown;
8980
8981 case 'm':
8982 if (name[3] == 'e')
8983 { /* time */
8984 return -KEY_time;
8985 }
8986
8987 goto unknown;
8988
8989 default:
8990 goto unknown;
8991 }
8992
8993 default:
8994 goto unknown;
8995 }
8996
8997 case 'w':
0d863452 8998 switch (name[1])
4c3bbe0f 8999 {
0d863452 9000 case 'a':
952306ac
RGS
9001 switch (name[2])
9002 {
9003 case 'i':
9004 if (name[3] == 't')
9005 { /* wait */
9006 return -KEY_wait;
9007 }
4c3bbe0f 9008
952306ac 9009 goto unknown;
4c3bbe0f 9010
952306ac
RGS
9011 case 'r':
9012 if (name[3] == 'n')
9013 { /* warn */
9014 return -KEY_warn;
9015 }
4c3bbe0f 9016
952306ac 9017 goto unknown;
4c3bbe0f 9018
952306ac
RGS
9019 default:
9020 goto unknown;
9021 }
0d863452
RH
9022
9023 case 'h':
9024 if (name[2] == 'e' &&
9025 name[3] == 'n')
9026 { /* when */
5458a98a 9027 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9028 }
4c3bbe0f 9029
952306ac 9030 goto unknown;
4c3bbe0f 9031
952306ac
RGS
9032 default:
9033 goto unknown;
9034 }
4c3bbe0f 9035
0d863452
RH
9036 default:
9037 goto unknown;
9038 }
9039
952306ac 9040 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9041 switch (name[0])
e2e1dd5a 9042 {
4c3bbe0f
MHM
9043 case 'B':
9044 if (name[1] == 'E' &&
9045 name[2] == 'G' &&
9046 name[3] == 'I' &&
9047 name[4] == 'N')
9048 { /* BEGIN */
9049 return KEY_BEGIN;
9050 }
9051
9052 goto unknown;
9053
9054 case 'C':
9055 if (name[1] == 'H' &&
9056 name[2] == 'E' &&
9057 name[3] == 'C' &&
9058 name[4] == 'K')
9059 { /* CHECK */
9060 return KEY_CHECK;
9061 }
9062
9063 goto unknown;
9064
9065 case 'a':
9066 switch (name[1])
9067 {
9068 case 'l':
9069 if (name[2] == 'a' &&
9070 name[3] == 'r' &&
9071 name[4] == 'm')
9072 { /* alarm */
9073 return -KEY_alarm;
9074 }
9075
9076 goto unknown;
9077
9078 case 't':
9079 if (name[2] == 'a' &&
9080 name[3] == 'n' &&
9081 name[4] == '2')
9082 { /* atan2 */
9083 return -KEY_atan2;
9084 }
9085
9086 goto unknown;
9087
9088 default:
9089 goto unknown;
9090 }
9091
9092 case 'b':
0d863452
RH
9093 switch (name[1])
9094 {
9095 case 'l':
9096 if (name[2] == 'e' &&
952306ac
RGS
9097 name[3] == 's' &&
9098 name[4] == 's')
9099 { /* bless */
9100 return -KEY_bless;
9101 }
4c3bbe0f 9102
952306ac 9103 goto unknown;
4c3bbe0f 9104
0d863452
RH
9105 case 'r':
9106 if (name[2] == 'e' &&
9107 name[3] == 'a' &&
9108 name[4] == 'k')
9109 { /* break */
5458a98a 9110 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9111 }
9112
9113 goto unknown;
9114
9115 default:
9116 goto unknown;
9117 }
9118
4c3bbe0f
MHM
9119 case 'c':
9120 switch (name[1])
9121 {
9122 case 'h':
9123 switch (name[2])
9124 {
9125 case 'd':
9126 if (name[3] == 'i' &&
9127 name[4] == 'r')
9128 { /* chdir */
9129 return -KEY_chdir;
9130 }
9131
9132 goto unknown;
9133
9134 case 'm':
9135 if (name[3] == 'o' &&
9136 name[4] == 'd')
9137 { /* chmod */
9138 return -KEY_chmod;
9139 }
9140
9141 goto unknown;
9142
9143 case 'o':
9144 switch (name[3])
9145 {
9146 case 'm':
9147 if (name[4] == 'p')
9148 { /* chomp */
9149 return -KEY_chomp;
9150 }
9151
9152 goto unknown;
9153
9154 case 'w':
9155 if (name[4] == 'n')
9156 { /* chown */
9157 return -KEY_chown;
9158 }
9159
9160 goto unknown;
9161
9162 default:
9163 goto unknown;
9164 }
9165
9166 default:
9167 goto unknown;
9168 }
9169
9170 case 'l':
9171 if (name[2] == 'o' &&
9172 name[3] == 's' &&
9173 name[4] == 'e')
9174 { /* close */
9175 return -KEY_close;
9176 }
9177
9178 goto unknown;
9179
9180 case 'r':
9181 if (name[2] == 'y' &&
9182 name[3] == 'p' &&
9183 name[4] == 't')
9184 { /* crypt */
9185 return -KEY_crypt;
9186 }
9187
9188 goto unknown;
9189
9190 default:
9191 goto unknown;
9192 }
9193
9194 case 'e':
9195 if (name[1] == 'l' &&
9196 name[2] == 's' &&
9197 name[3] == 'i' &&
9198 name[4] == 'f')
9199 { /* elsif */
9200 return KEY_elsif;
9201 }
9202
9203 goto unknown;
9204
9205 case 'f':
9206 switch (name[1])
9207 {
9208 case 'c':
9209 if (name[2] == 'n' &&
9210 name[3] == 't' &&
9211 name[4] == 'l')
9212 { /* fcntl */
9213 return -KEY_fcntl;
9214 }
9215
9216 goto unknown;
9217
9218 case 'l':
9219 if (name[2] == 'o' &&
9220 name[3] == 'c' &&
9221 name[4] == 'k')
9222 { /* flock */
9223 return -KEY_flock;
9224 }
9225
9226 goto unknown;
9227
9228 default:
9229 goto unknown;
9230 }
9231
0d863452
RH
9232 case 'g':
9233 if (name[1] == 'i' &&
9234 name[2] == 'v' &&
9235 name[3] == 'e' &&
9236 name[4] == 'n')
9237 { /* given */
5458a98a 9238 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9239 }
9240
9241 goto unknown;
9242
4c3bbe0f
MHM
9243 case 'i':
9244 switch (name[1])
9245 {
9246 case 'n':
9247 if (name[2] == 'd' &&
9248 name[3] == 'e' &&
9249 name[4] == 'x')
9250 { /* index */
9251 return -KEY_index;
9252 }
9253
9254 goto unknown;
9255
9256 case 'o':
9257 if (name[2] == 'c' &&
9258 name[3] == 't' &&
9259 name[4] == 'l')
9260 { /* ioctl */
9261 return -KEY_ioctl;
9262 }
9263
9264 goto unknown;
9265
9266 default:
9267 goto unknown;
9268 }
9269
9270 case 'l':
9271 switch (name[1])
9272 {
9273 case 'o':
9274 if (name[2] == 'c' &&
9275 name[3] == 'a' &&
9276 name[4] == 'l')
9277 { /* local */
9278 return KEY_local;
9279 }
9280
9281 goto unknown;
9282
9283 case 's':
9284 if (name[2] == 't' &&
9285 name[3] == 'a' &&
9286 name[4] == 't')
9287 { /* lstat */
9288 return -KEY_lstat;
9289 }
9290
9291 goto unknown;
9292
9293 default:
9294 goto unknown;
9295 }
9296
9297 case 'm':
9298 if (name[1] == 'k' &&
9299 name[2] == 'd' &&
9300 name[3] == 'i' &&
9301 name[4] == 'r')
9302 { /* mkdir */
9303 return -KEY_mkdir;
9304 }
9305
9306 goto unknown;
9307
9308 case 'p':
9309 if (name[1] == 'r' &&
9310 name[2] == 'i' &&
9311 name[3] == 'n' &&
9312 name[4] == 't')
9313 { /* print */
9314 return KEY_print;
9315 }
9316
9317 goto unknown;
9318
9319 case 'r':
9320 switch (name[1])
9321 {
9322 case 'e':
9323 if (name[2] == 's' &&
9324 name[3] == 'e' &&
9325 name[4] == 't')
9326 { /* reset */
9327 return -KEY_reset;
9328 }
9329
9330 goto unknown;
9331
9332 case 'm':
9333 if (name[2] == 'd' &&
9334 name[3] == 'i' &&
9335 name[4] == 'r')
9336 { /* rmdir */
9337 return -KEY_rmdir;
9338 }
9339
9340 goto unknown;
9341
9342 default:
9343 goto unknown;
9344 }
9345
9346 case 's':
9347 switch (name[1])
9348 {
9349 case 'e':
9350 if (name[2] == 'm' &&
9351 name[3] == 'o' &&
9352 name[4] == 'p')
9353 { /* semop */
9354 return -KEY_semop;
9355 }
9356
9357 goto unknown;
9358
9359 case 'h':
9360 if (name[2] == 'i' &&
9361 name[3] == 'f' &&
9362 name[4] == 't')
9363 { /* shift */
9364 return -KEY_shift;
9365 }
9366
9367 goto unknown;
9368
9369 case 'l':
9370 if (name[2] == 'e' &&
9371 name[3] == 'e' &&
9372 name[4] == 'p')
9373 { /* sleep */
9374 return -KEY_sleep;
9375 }
9376
9377 goto unknown;
9378
9379 case 'p':
9380 if (name[2] == 'l' &&
9381 name[3] == 'i' &&
9382 name[4] == 't')
9383 { /* split */
9384 return KEY_split;
9385 }
9386
9387 goto unknown;
9388
9389 case 'r':
9390 if (name[2] == 'a' &&
9391 name[3] == 'n' &&
9392 name[4] == 'd')
9393 { /* srand */
9394 return -KEY_srand;
9395 }
9396
9397 goto unknown;
9398
9399 case 't':
952306ac
RGS
9400 switch (name[2])
9401 {
9402 case 'a':
9403 if (name[3] == 't' &&
9404 name[4] == 'e')
9405 { /* state */
5458a98a 9406 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9407 }
4c3bbe0f 9408
952306ac
RGS
9409 goto unknown;
9410
9411 case 'u':
9412 if (name[3] == 'd' &&
9413 name[4] == 'y')
9414 { /* study */
9415 return KEY_study;
9416 }
9417
9418 goto unknown;
9419
9420 default:
9421 goto unknown;
9422 }
4c3bbe0f
MHM
9423
9424 default:
9425 goto unknown;
9426 }
9427
9428 case 't':
9429 if (name[1] == 'i' &&
9430 name[2] == 'm' &&
9431 name[3] == 'e' &&
9432 name[4] == 's')
9433 { /* times */
9434 return -KEY_times;
9435 }
9436
9437 goto unknown;
9438
9439 case 'u':
9440 switch (name[1])
9441 {
9442 case 'm':
9443 if (name[2] == 'a' &&
9444 name[3] == 's' &&
9445 name[4] == 'k')
9446 { /* umask */
9447 return -KEY_umask;
9448 }
9449
9450 goto unknown;
9451
9452 case 'n':
9453 switch (name[2])
9454 {
9455 case 'd':
9456 if (name[3] == 'e' &&
9457 name[4] == 'f')
9458 { /* undef */
9459 return KEY_undef;
9460 }
9461
9462 goto unknown;
9463
9464 case 't':
9465 if (name[3] == 'i')
9466 {
9467 switch (name[4])
9468 {
9469 case 'e':
9470 { /* untie */
1db4d195 9471 return -KEY_untie;
4c3bbe0f
MHM
9472 }
9473
4c3bbe0f
MHM
9474 case 'l':
9475 { /* until */
9476 return KEY_until;
9477 }
9478
4c3bbe0f
MHM
9479 default:
9480 goto unknown;
9481 }
9482 }
9483
9484 goto unknown;
9485
9486 default:
9487 goto unknown;
9488 }
9489
9490 case 't':
9491 if (name[2] == 'i' &&
9492 name[3] == 'm' &&
9493 name[4] == 'e')
9494 { /* utime */
9495 return -KEY_utime;
9496 }
9497
9498 goto unknown;
9499
9500 default:
9501 goto unknown;
9502 }
9503
9504 case 'w':
9505 switch (name[1])
9506 {
9507 case 'h':
9508 if (name[2] == 'i' &&
9509 name[3] == 'l' &&
9510 name[4] == 'e')
9511 { /* while */
9512 return KEY_while;
9513 }
9514
9515 goto unknown;
9516
9517 case 'r':
9518 if (name[2] == 'i' &&
9519 name[3] == 't' &&
9520 name[4] == 'e')
9521 { /* write */
9522 return -KEY_write;
9523 }
9524
9525 goto unknown;
9526
9527 default:
9528 goto unknown;
9529 }
9530
9531 default:
9532 goto unknown;
e2e1dd5a 9533 }
4c3bbe0f
MHM
9534
9535 case 6: /* 33 tokens of length 6 */
9536 switch (name[0])
9537 {
9538 case 'a':
9539 if (name[1] == 'c' &&
9540 name[2] == 'c' &&
9541 name[3] == 'e' &&
9542 name[4] == 'p' &&
9543 name[5] == 't')
9544 { /* accept */
9545 return -KEY_accept;
9546 }
9547
9548 goto unknown;
9549
9550 case 'c':
9551 switch (name[1])
9552 {
9553 case 'a':
9554 if (name[2] == 'l' &&
9555 name[3] == 'l' &&
9556 name[4] == 'e' &&
9557 name[5] == 'r')
9558 { /* caller */
9559 return -KEY_caller;
9560 }
9561
9562 goto unknown;
9563
9564 case 'h':
9565 if (name[2] == 'r' &&
9566 name[3] == 'o' &&
9567 name[4] == 'o' &&
9568 name[5] == 't')
9569 { /* chroot */
9570 return -KEY_chroot;
9571 }
9572
9573 goto unknown;
9574
9575 default:
9576 goto unknown;
9577 }
9578
9579 case 'd':
9580 if (name[1] == 'e' &&
9581 name[2] == 'l' &&
9582 name[3] == 'e' &&
9583 name[4] == 't' &&
9584 name[5] == 'e')
9585 { /* delete */
9586 return KEY_delete;
9587 }
9588
9589 goto unknown;
9590
9591 case 'e':
9592 switch (name[1])
9593 {
9594 case 'l':
9595 if (name[2] == 's' &&
9596 name[3] == 'e' &&
9597 name[4] == 'i' &&
9598 name[5] == 'f')
9599 { /* elseif */
9b387841 9600 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9601 }
9602
9603 goto unknown;
9604
9605 case 'x':
9606 if (name[2] == 'i' &&
9607 name[3] == 's' &&
9608 name[4] == 't' &&
9609 name[5] == 's')
9610 { /* exists */
9611 return KEY_exists;
9612 }
9613
9614 goto unknown;
9615
9616 default:
9617 goto unknown;
9618 }
9619
9620 case 'f':
9621 switch (name[1])
9622 {
9623 case 'i':
9624 if (name[2] == 'l' &&
9625 name[3] == 'e' &&
9626 name[4] == 'n' &&
9627 name[5] == 'o')
9628 { /* fileno */
9629 return -KEY_fileno;
9630 }
9631
9632 goto unknown;
9633
9634 case 'o':
9635 if (name[2] == 'r' &&
9636 name[3] == 'm' &&
9637 name[4] == 'a' &&
9638 name[5] == 't')
9639 { /* format */
9640 return KEY_format;
9641 }
9642
9643 goto unknown;
9644
9645 default:
9646 goto unknown;
9647 }
9648
9649 case 'g':
9650 if (name[1] == 'm' &&
9651 name[2] == 't' &&
9652 name[3] == 'i' &&
9653 name[4] == 'm' &&
9654 name[5] == 'e')
9655 { /* gmtime */
9656 return -KEY_gmtime;
9657 }
9658
9659 goto unknown;
9660
9661 case 'l':
9662 switch (name[1])
9663 {
9664 case 'e':
9665 if (name[2] == 'n' &&
9666 name[3] == 'g' &&
9667 name[4] == 't' &&
9668 name[5] == 'h')
9669 { /* length */
9670 return -KEY_length;
9671 }
9672
9673 goto unknown;
9674
9675 case 'i':
9676 if (name[2] == 's' &&
9677 name[3] == 't' &&
9678 name[4] == 'e' &&
9679 name[5] == 'n')
9680 { /* listen */
9681 return -KEY_listen;
9682 }
9683
9684 goto unknown;
9685
9686 default:
9687 goto unknown;
9688 }
9689
9690 case 'm':
9691 if (name[1] == 's' &&
9692 name[2] == 'g')
9693 {
9694 switch (name[3])
9695 {
9696 case 'c':
9697 if (name[4] == 't' &&
9698 name[5] == 'l')
9699 { /* msgctl */
9700 return -KEY_msgctl;
9701 }
9702
9703 goto unknown;
9704
9705 case 'g':
9706 if (name[4] == 'e' &&
9707 name[5] == 't')
9708 { /* msgget */
9709 return -KEY_msgget;
9710 }
9711
9712 goto unknown;
9713
9714 case 'r':
9715 if (name[4] == 'c' &&
9716 name[5] == 'v')
9717 { /* msgrcv */
9718 return -KEY_msgrcv;
9719 }
9720
9721 goto unknown;
9722
9723 case 's':
9724 if (name[4] == 'n' &&
9725 name[5] == 'd')
9726 { /* msgsnd */
9727 return -KEY_msgsnd;
9728 }
9729
9730 goto unknown;
9731
9732 default:
9733 goto unknown;
9734 }
9735 }
9736
9737 goto unknown;
9738
9739 case 'p':
9740 if (name[1] == 'r' &&
9741 name[2] == 'i' &&
9742 name[3] == 'n' &&
9743 name[4] == 't' &&
9744 name[5] == 'f')
9745 { /* printf */
9746 return KEY_printf;
9747 }
9748
9749 goto unknown;
9750
9751 case 'r':
9752 switch (name[1])
9753 {
9754 case 'e':
9755 switch (name[2])
9756 {
9757 case 'n':
9758 if (name[3] == 'a' &&
9759 name[4] == 'm' &&
9760 name[5] == 'e')
9761 { /* rename */
9762 return -KEY_rename;
9763 }
9764
9765 goto unknown;
9766
9767 case 't':
9768 if (name[3] == 'u' &&
9769 name[4] == 'r' &&
9770 name[5] == 'n')
9771 { /* return */
9772 return KEY_return;
9773 }
9774
9775 goto unknown;
9776
9777 default:
9778 goto unknown;
9779 }
9780
9781 case 'i':
9782 if (name[2] == 'n' &&
9783 name[3] == 'd' &&
9784 name[4] == 'e' &&
9785 name[5] == 'x')
9786 { /* rindex */
9787 return -KEY_rindex;
9788 }
9789
9790 goto unknown;
9791
9792 default:
9793 goto unknown;
9794 }
9795
9796 case 's':
9797 switch (name[1])
9798 {
9799 case 'c':
9800 if (name[2] == 'a' &&
9801 name[3] == 'l' &&
9802 name[4] == 'a' &&
9803 name[5] == 'r')
9804 { /* scalar */
9805 return KEY_scalar;
9806 }
9807
9808 goto unknown;
9809
9810 case 'e':
9811 switch (name[2])
9812 {
9813 case 'l':
9814 if (name[3] == 'e' &&
9815 name[4] == 'c' &&
9816 name[5] == 't')
9817 { /* select */
9818 return -KEY_select;
9819 }
9820
9821 goto unknown;
9822
9823 case 'm':
9824 switch (name[3])
9825 {
9826 case 'c':
9827 if (name[4] == 't' &&
9828 name[5] == 'l')
9829 { /* semctl */
9830 return -KEY_semctl;
9831 }
9832
9833 goto unknown;
9834
9835 case 'g':
9836 if (name[4] == 'e' &&
9837 name[5] == 't')
9838 { /* semget */
9839 return -KEY_semget;
9840 }
9841
9842 goto unknown;
9843
9844 default:
9845 goto unknown;
9846 }
9847
9848 default:
9849 goto unknown;
9850 }
9851
9852 case 'h':
9853 if (name[2] == 'm')
9854 {
9855 switch (name[3])
9856 {
9857 case 'c':
9858 if (name[4] == 't' &&
9859 name[5] == 'l')
9860 { /* shmctl */
9861 return -KEY_shmctl;
9862 }
9863
9864 goto unknown;
9865
9866 case 'g':
9867 if (name[4] == 'e' &&
9868 name[5] == 't')
9869 { /* shmget */
9870 return -KEY_shmget;
9871 }
9872
9873 goto unknown;
9874
9875 default:
9876 goto unknown;
9877 }
9878 }
9879
9880 goto unknown;
9881
9882 case 'o':
9883 if (name[2] == 'c' &&
9884 name[3] == 'k' &&
9885 name[4] == 'e' &&
9886 name[5] == 't')
9887 { /* socket */
9888 return -KEY_socket;
9889 }
9890
9891 goto unknown;
9892
9893 case 'p':
9894 if (name[2] == 'l' &&
9895 name[3] == 'i' &&
9896 name[4] == 'c' &&
9897 name[5] == 'e')
9898 { /* splice */
9899 return -KEY_splice;
9900 }
9901
9902 goto unknown;
9903
9904 case 'u':
9905 if (name[2] == 'b' &&
9906 name[3] == 's' &&
9907 name[4] == 't' &&
9908 name[5] == 'r')
9909 { /* substr */
9910 return -KEY_substr;
9911 }
9912
9913 goto unknown;
9914
9915 case 'y':
9916 if (name[2] == 's' &&
9917 name[3] == 't' &&
9918 name[4] == 'e' &&
9919 name[5] == 'm')
9920 { /* system */
9921 return -KEY_system;
9922 }
9923
9924 goto unknown;
9925
9926 default:
9927 goto unknown;
9928 }
9929
9930 case 'u':
9931 if (name[1] == 'n')
9932 {
9933 switch (name[2])
9934 {
9935 case 'l':
9936 switch (name[3])
9937 {
9938 case 'e':
9939 if (name[4] == 's' &&
9940 name[5] == 's')
9941 { /* unless */
9942 return KEY_unless;
9943 }
9944
9945 goto unknown;
9946
9947 case 'i':
9948 if (name[4] == 'n' &&
9949 name[5] == 'k')
9950 { /* unlink */
9951 return -KEY_unlink;
9952 }
9953
9954 goto unknown;
9955
9956 default:
9957 goto unknown;
9958 }
9959
9960 case 'p':
9961 if (name[3] == 'a' &&
9962 name[4] == 'c' &&
9963 name[5] == 'k')
9964 { /* unpack */
9965 return -KEY_unpack;
9966 }
9967
9968 goto unknown;
9969
9970 default:
9971 goto unknown;
9972 }
9973 }
9974
9975 goto unknown;
9976
9977 case 'v':
9978 if (name[1] == 'a' &&
9979 name[2] == 'l' &&
9980 name[3] == 'u' &&
9981 name[4] == 'e' &&
9982 name[5] == 's')
9983 { /* values */
9984 return -KEY_values;
9985 }
9986
9987 goto unknown;
9988
9989 default:
9990 goto unknown;
e2e1dd5a 9991 }
4c3bbe0f 9992
0d863452 9993 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9994 switch (name[0])
9995 {
9996 case 'D':
9997 if (name[1] == 'E' &&
9998 name[2] == 'S' &&
9999 name[3] == 'T' &&
10000 name[4] == 'R' &&
10001 name[5] == 'O' &&
10002 name[6] == 'Y')
10003 { /* DESTROY */
10004 return KEY_DESTROY;
10005 }
10006
10007 goto unknown;
10008
10009 case '_':
10010 if (name[1] == '_' &&
10011 name[2] == 'E' &&
10012 name[3] == 'N' &&
10013 name[4] == 'D' &&
10014 name[5] == '_' &&
10015 name[6] == '_')
10016 { /* __END__ */
10017 return KEY___END__;
10018 }
10019
10020 goto unknown;
10021
10022 case 'b':
10023 if (name[1] == 'i' &&
10024 name[2] == 'n' &&
10025 name[3] == 'm' &&
10026 name[4] == 'o' &&
10027 name[5] == 'd' &&
10028 name[6] == 'e')
10029 { /* binmode */
10030 return -KEY_binmode;
10031 }
10032
10033 goto unknown;
10034
10035 case 'c':
10036 if (name[1] == 'o' &&
10037 name[2] == 'n' &&
10038 name[3] == 'n' &&
10039 name[4] == 'e' &&
10040 name[5] == 'c' &&
10041 name[6] == 't')
10042 { /* connect */
10043 return -KEY_connect;
10044 }
10045
10046 goto unknown;
10047
10048 case 'd':
10049 switch (name[1])
10050 {
10051 case 'b':
10052 if (name[2] == 'm' &&
10053 name[3] == 'o' &&
10054 name[4] == 'p' &&
10055 name[5] == 'e' &&
10056 name[6] == 'n')
10057 { /* dbmopen */
10058 return -KEY_dbmopen;
10059 }
10060
10061 goto unknown;
10062
10063 case 'e':
0d863452
RH
10064 if (name[2] == 'f')
10065 {
10066 switch (name[3])
10067 {
10068 case 'a':
10069 if (name[4] == 'u' &&
10070 name[5] == 'l' &&
10071 name[6] == 't')
10072 { /* default */
5458a98a 10073 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10074 }
10075
10076 goto unknown;
10077
10078 case 'i':
10079 if (name[4] == 'n' &&
952306ac
RGS
10080 name[5] == 'e' &&
10081 name[6] == 'd')
10082 { /* defined */
10083 return KEY_defined;
10084 }
4c3bbe0f 10085
952306ac 10086 goto unknown;
4c3bbe0f 10087
952306ac
RGS
10088 default:
10089 goto unknown;
10090 }
0d863452
RH
10091 }
10092
10093 goto unknown;
10094
10095 default:
10096 goto unknown;
10097 }
4c3bbe0f
MHM
10098
10099 case 'f':
10100 if (name[1] == 'o' &&
10101 name[2] == 'r' &&
10102 name[3] == 'e' &&
10103 name[4] == 'a' &&
10104 name[5] == 'c' &&
10105 name[6] == 'h')
10106 { /* foreach */
10107 return KEY_foreach;
10108 }
10109
10110 goto unknown;
10111
10112 case 'g':
10113 if (name[1] == 'e' &&
10114 name[2] == 't' &&
10115 name[3] == 'p')
10116 {
10117 switch (name[4])
10118 {
10119 case 'g':
10120 if (name[5] == 'r' &&
10121 name[6] == 'p')
10122 { /* getpgrp */
10123 return -KEY_getpgrp;
10124 }
10125
10126 goto unknown;
10127
10128 case 'p':
10129 if (name[5] == 'i' &&
10130 name[6] == 'd')
10131 { /* getppid */
10132 return -KEY_getppid;
10133 }
10134
10135 goto unknown;
10136
10137 default:
10138 goto unknown;
10139 }
10140 }
10141
10142 goto unknown;
10143
10144 case 'l':
10145 if (name[1] == 'c' &&
10146 name[2] == 'f' &&
10147 name[3] == 'i' &&
10148 name[4] == 'r' &&
10149 name[5] == 's' &&
10150 name[6] == 't')
10151 { /* lcfirst */
10152 return -KEY_lcfirst;
10153 }
10154
10155 goto unknown;
10156
10157 case 'o':
10158 if (name[1] == 'p' &&
10159 name[2] == 'e' &&
10160 name[3] == 'n' &&
10161 name[4] == 'd' &&
10162 name[5] == 'i' &&
10163 name[6] == 'r')
10164 { /* opendir */
10165 return -KEY_opendir;
10166 }
10167
10168 goto unknown;
10169
10170 case 'p':
10171 if (name[1] == 'a' &&
10172 name[2] == 'c' &&
10173 name[3] == 'k' &&
10174 name[4] == 'a' &&
10175 name[5] == 'g' &&
10176 name[6] == 'e')
10177 { /* package */
10178 return KEY_package;
10179 }
10180
10181 goto unknown;
10182
10183 case 'r':
10184 if (name[1] == 'e')
10185 {
10186 switch (name[2])
10187 {
10188 case 'a':
10189 if (name[3] == 'd' &&
10190 name[4] == 'd' &&
10191 name[5] == 'i' &&
10192 name[6] == 'r')
10193 { /* readdir */
10194 return -KEY_readdir;
10195 }
10196
10197 goto unknown;
10198
10199 case 'q':
10200 if (name[3] == 'u' &&
10201 name[4] == 'i' &&
10202 name[5] == 'r' &&
10203 name[6] == 'e')
10204 { /* require */
10205 return KEY_require;
10206 }
10207
10208 goto unknown;
10209
10210 case 'v':
10211 if (name[3] == 'e' &&
10212 name[4] == 'r' &&
10213 name[5] == 's' &&
10214 name[6] == 'e')
10215 { /* reverse */
10216 return -KEY_reverse;
10217 }
10218
10219 goto unknown;
10220
10221 default:
10222 goto unknown;
10223 }
10224 }
10225
10226 goto unknown;
10227
10228 case 's':
10229 switch (name[1])
10230 {
10231 case 'e':
10232 switch (name[2])
10233 {
10234 case 'e':
10235 if (name[3] == 'k' &&
10236 name[4] == 'd' &&
10237 name[5] == 'i' &&
10238 name[6] == 'r')
10239 { /* seekdir */
10240 return -KEY_seekdir;
10241 }
10242
10243 goto unknown;
10244
10245 case 't':
10246 if (name[3] == 'p' &&
10247 name[4] == 'g' &&
10248 name[5] == 'r' &&
10249 name[6] == 'p')
10250 { /* setpgrp */
10251 return -KEY_setpgrp;
10252 }
10253
10254 goto unknown;
10255
10256 default:
10257 goto unknown;
10258 }
10259
10260 case 'h':
10261 if (name[2] == 'm' &&
10262 name[3] == 'r' &&
10263 name[4] == 'e' &&
10264 name[5] == 'a' &&
10265 name[6] == 'd')
10266 { /* shmread */
10267 return -KEY_shmread;
10268 }
10269
10270 goto unknown;
10271
10272 case 'p':
10273 if (name[2] == 'r' &&
10274 name[3] == 'i' &&
10275 name[4] == 'n' &&
10276 name[5] == 't' &&
10277 name[6] == 'f')
10278 { /* sprintf */
10279 return -KEY_sprintf;
10280 }
10281
10282 goto unknown;
10283
10284 case 'y':
10285 switch (name[2])
10286 {
10287 case 'm':
10288 if (name[3] == 'l' &&
10289 name[4] == 'i' &&
10290 name[5] == 'n' &&
10291 name[6] == 'k')
10292 { /* symlink */
10293 return -KEY_symlink;
10294 }
10295
10296 goto unknown;
10297
10298 case 's':
10299 switch (name[3])
10300 {
10301 case 'c':
10302 if (name[4] == 'a' &&
10303 name[5] == 'l' &&
10304 name[6] == 'l')
10305 { /* syscall */
10306 return -KEY_syscall;
10307 }
10308
10309 goto unknown;
10310
10311 case 'o':
10312 if (name[4] == 'p' &&
10313 name[5] == 'e' &&
10314 name[6] == 'n')
10315 { /* sysopen */
10316 return -KEY_sysopen;
10317 }
10318
10319 goto unknown;
10320
10321 case 'r':
10322 if (name[4] == 'e' &&
10323 name[5] == 'a' &&
10324 name[6] == 'd')
10325 { /* sysread */
10326 return -KEY_sysread;
10327 }
10328
10329 goto unknown;
10330
10331 case 's':
10332 if (name[4] == 'e' &&
10333 name[5] == 'e' &&
10334 name[6] == 'k')
10335 { /* sysseek */
10336 return -KEY_sysseek;
10337 }
10338
10339 goto unknown;
10340
10341 default:
10342 goto unknown;
10343 }
10344
10345 default:
10346 goto unknown;
10347 }
10348
10349 default:
10350 goto unknown;
10351 }
10352
10353 case 't':
10354 if (name[1] == 'e' &&
10355 name[2] == 'l' &&
10356 name[3] == 'l' &&
10357 name[4] == 'd' &&
10358 name[5] == 'i' &&
10359 name[6] == 'r')
10360 { /* telldir */
10361 return -KEY_telldir;
10362 }
10363
10364 goto unknown;
10365
10366 case 'u':
10367 switch (name[1])
10368 {
10369 case 'c':
10370 if (name[2] == 'f' &&
10371 name[3] == 'i' &&
10372 name[4] == 'r' &&
10373 name[5] == 's' &&
10374 name[6] == 't')
10375 { /* ucfirst */
10376 return -KEY_ucfirst;
10377 }
10378
10379 goto unknown;
10380
10381 case 'n':
10382 if (name[2] == 's' &&
10383 name[3] == 'h' &&
10384 name[4] == 'i' &&
10385 name[5] == 'f' &&
10386 name[6] == 't')
10387 { /* unshift */
10388 return -KEY_unshift;
10389 }
10390
10391 goto unknown;
10392
10393 default:
10394 goto unknown;
10395 }
10396
10397 case 'w':
10398 if (name[1] == 'a' &&
10399 name[2] == 'i' &&
10400 name[3] == 't' &&
10401 name[4] == 'p' &&
10402 name[5] == 'i' &&
10403 name[6] == 'd')
10404 { /* waitpid */
10405 return -KEY_waitpid;
10406 }
10407
10408 goto unknown;
10409
10410 default:
10411 goto unknown;
10412 }
10413
10414 case 8: /* 26 tokens of length 8 */
10415 switch (name[0])
10416 {
10417 case 'A':
10418 if (name[1] == 'U' &&
10419 name[2] == 'T' &&
10420 name[3] == 'O' &&
10421 name[4] == 'L' &&
10422 name[5] == 'O' &&
10423 name[6] == 'A' &&
10424 name[7] == 'D')
10425 { /* AUTOLOAD */
10426 return KEY_AUTOLOAD;
10427 }
10428
10429 goto unknown;
10430
10431 case '_':
10432 if (name[1] == '_')
10433 {
10434 switch (name[2])
10435 {
10436 case 'D':
10437 if (name[3] == 'A' &&
10438 name[4] == 'T' &&
10439 name[5] == 'A' &&
10440 name[6] == '_' &&
10441 name[7] == '_')
10442 { /* __DATA__ */
10443 return KEY___DATA__;
10444 }
10445
10446 goto unknown;
10447
10448 case 'F':
10449 if (name[3] == 'I' &&
10450 name[4] == 'L' &&
10451 name[5] == 'E' &&
10452 name[6] == '_' &&
10453 name[7] == '_')
10454 { /* __FILE__ */
10455 return -KEY___FILE__;
10456 }
10457
10458 goto unknown;
10459
10460 case 'L':
10461 if (name[3] == 'I' &&
10462 name[4] == 'N' &&
10463 name[5] == 'E' &&
10464 name[6] == '_' &&
10465 name[7] == '_')
10466 { /* __LINE__ */
10467 return -KEY___LINE__;
10468 }
10469
10470 goto unknown;
10471
10472 default:
10473 goto unknown;
10474 }
10475 }
10476
10477 goto unknown;
10478
10479 case 'c':
10480 switch (name[1])
10481 {
10482 case 'l':
10483 if (name[2] == 'o' &&
10484 name[3] == 's' &&
10485 name[4] == 'e' &&
10486 name[5] == 'd' &&
10487 name[6] == 'i' &&
10488 name[7] == 'r')
10489 { /* closedir */
10490 return -KEY_closedir;
10491 }
10492
10493 goto unknown;
10494
10495 case 'o':
10496 if (name[2] == 'n' &&
10497 name[3] == 't' &&
10498 name[4] == 'i' &&
10499 name[5] == 'n' &&
10500 name[6] == 'u' &&
10501 name[7] == 'e')
10502 { /* continue */
10503 return -KEY_continue;
10504 }
10505
10506 goto unknown;
10507
10508 default:
10509 goto unknown;
10510 }
10511
10512 case 'd':
10513 if (name[1] == 'b' &&
10514 name[2] == 'm' &&
10515 name[3] == 'c' &&
10516 name[4] == 'l' &&
10517 name[5] == 'o' &&
10518 name[6] == 's' &&
10519 name[7] == 'e')
10520 { /* dbmclose */
10521 return -KEY_dbmclose;
10522 }
10523
10524 goto unknown;
10525
10526 case 'e':
10527 if (name[1] == 'n' &&
10528 name[2] == 'd')
10529 {
10530 switch (name[3])
10531 {
10532 case 'g':
10533 if (name[4] == 'r' &&
10534 name[5] == 'e' &&
10535 name[6] == 'n' &&
10536 name[7] == 't')
10537 { /* endgrent */
10538 return -KEY_endgrent;
10539 }
10540
10541 goto unknown;
10542
10543 case 'p':
10544 if (name[4] == 'w' &&
10545 name[5] == 'e' &&
10546 name[6] == 'n' &&
10547 name[7] == 't')
10548 { /* endpwent */
10549 return -KEY_endpwent;
10550 }
10551
10552 goto unknown;
10553
10554 default:
10555 goto unknown;
10556 }
10557 }
10558
10559 goto unknown;
10560
10561 case 'f':
10562 if (name[1] == 'o' &&
10563 name[2] == 'r' &&
10564 name[3] == 'm' &&
10565 name[4] == 'l' &&
10566 name[5] == 'i' &&
10567 name[6] == 'n' &&
10568 name[7] == 'e')
10569 { /* formline */
10570 return -KEY_formline;
10571 }
10572
10573 goto unknown;
10574
10575 case 'g':
10576 if (name[1] == 'e' &&
10577 name[2] == 't')
10578 {
10579 switch (name[3])
10580 {
10581 case 'g':
10582 if (name[4] == 'r')
10583 {
10584 switch (name[5])
10585 {
10586 case 'e':
10587 if (name[6] == 'n' &&
10588 name[7] == 't')
10589 { /* getgrent */
10590 return -KEY_getgrent;
10591 }
10592
10593 goto unknown;
10594
10595 case 'g':
10596 if (name[6] == 'i' &&
10597 name[7] == 'd')
10598 { /* getgrgid */
10599 return -KEY_getgrgid;
10600 }
10601
10602 goto unknown;
10603
10604 case 'n':
10605 if (name[6] == 'a' &&
10606 name[7] == 'm')
10607 { /* getgrnam */
10608 return -KEY_getgrnam;
10609 }
10610
10611 goto unknown;
10612
10613 default:
10614 goto unknown;
10615 }
10616 }
10617
10618 goto unknown;
10619
10620 case 'l':
10621 if (name[4] == 'o' &&
10622 name[5] == 'g' &&
10623 name[6] == 'i' &&
10624 name[7] == 'n')
10625 { /* getlogin */
10626 return -KEY_getlogin;
10627 }
10628
10629 goto unknown;
10630
10631 case 'p':
10632 if (name[4] == 'w')
10633 {
10634 switch (name[5])
10635 {
10636 case 'e':
10637 if (name[6] == 'n' &&
10638 name[7] == 't')
10639 { /* getpwent */
10640 return -KEY_getpwent;
10641 }
10642
10643 goto unknown;
10644
10645 case 'n':
10646 if (name[6] == 'a' &&
10647 name[7] == 'm')
10648 { /* getpwnam */
10649 return -KEY_getpwnam;
10650 }
10651
10652 goto unknown;
10653
10654 case 'u':
10655 if (name[6] == 'i' &&
10656 name[7] == 'd')
10657 { /* getpwuid */
10658 return -KEY_getpwuid;
10659 }
10660
10661 goto unknown;
10662
10663 default:
10664 goto unknown;
10665 }
10666 }
10667
10668 goto unknown;
10669
10670 default:
10671 goto unknown;
10672 }
10673 }
10674
10675 goto unknown;
10676
10677 case 'r':
10678 if (name[1] == 'e' &&
10679 name[2] == 'a' &&
10680 name[3] == 'd')
10681 {
10682 switch (name[4])
10683 {
10684 case 'l':
10685 if (name[5] == 'i' &&
10686 name[6] == 'n')
10687 {
10688 switch (name[7])
10689 {
10690 case 'e':
10691 { /* readline */
10692 return -KEY_readline;
10693 }
10694
4c3bbe0f
MHM
10695 case 'k':
10696 { /* readlink */
10697 return -KEY_readlink;
10698 }
10699
4c3bbe0f
MHM
10700 default:
10701 goto unknown;
10702 }
10703 }
10704
10705 goto unknown;
10706
10707 case 'p':
10708 if (name[5] == 'i' &&
10709 name[6] == 'p' &&
10710 name[7] == 'e')
10711 { /* readpipe */
10712 return -KEY_readpipe;
10713 }
10714
10715 goto unknown;
10716
10717 default:
10718 goto unknown;
10719 }
10720 }
10721
10722 goto unknown;
10723
10724 case 's':
10725 switch (name[1])
10726 {
10727 case 'e':
10728 if (name[2] == 't')
10729 {
10730 switch (name[3])
10731 {
10732 case 'g':
10733 if (name[4] == 'r' &&
10734 name[5] == 'e' &&
10735 name[6] == 'n' &&
10736 name[7] == 't')
10737 { /* setgrent */
10738 return -KEY_setgrent;
10739 }
10740
10741 goto unknown;
10742
10743 case 'p':
10744 if (name[4] == 'w' &&
10745 name[5] == 'e' &&
10746 name[6] == 'n' &&
10747 name[7] == 't')
10748 { /* setpwent */
10749 return -KEY_setpwent;
10750 }
10751
10752 goto unknown;
10753
10754 default:
10755 goto unknown;
10756 }
10757 }
10758
10759 goto unknown;
10760
10761 case 'h':
10762 switch (name[2])
10763 {
10764 case 'm':
10765 if (name[3] == 'w' &&
10766 name[4] == 'r' &&
10767 name[5] == 'i' &&
10768 name[6] == 't' &&
10769 name[7] == 'e')
10770 { /* shmwrite */
10771 return -KEY_shmwrite;
10772 }
10773
10774 goto unknown;
10775
10776 case 'u':
10777 if (name[3] == 't' &&
10778 name[4] == 'd' &&
10779 name[5] == 'o' &&
10780 name[6] == 'w' &&
10781 name[7] == 'n')
10782 { /* shutdown */
10783 return -KEY_shutdown;
10784 }
10785
10786 goto unknown;
10787
10788 default:
10789 goto unknown;
10790 }
10791
10792 case 'y':
10793 if (name[2] == 's' &&
10794 name[3] == 'w' &&
10795 name[4] == 'r' &&
10796 name[5] == 'i' &&
10797 name[6] == 't' &&
10798 name[7] == 'e')
10799 { /* syswrite */
10800 return -KEY_syswrite;
10801 }
10802
10803 goto unknown;
10804
10805 default:
10806 goto unknown;
10807 }
10808
10809 case 't':
10810 if (name[1] == 'r' &&
10811 name[2] == 'u' &&
10812 name[3] == 'n' &&
10813 name[4] == 'c' &&
10814 name[5] == 'a' &&
10815 name[6] == 't' &&
10816 name[7] == 'e')
10817 { /* truncate */
10818 return -KEY_truncate;
10819 }
10820
10821 goto unknown;
10822
10823 default:
10824 goto unknown;
10825 }
10826
3c10abe3 10827 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10828 switch (name[0])
10829 {
3c10abe3
AG
10830 case 'U':
10831 if (name[1] == 'N' &&
10832 name[2] == 'I' &&
10833 name[3] == 'T' &&
10834 name[4] == 'C' &&
10835 name[5] == 'H' &&
10836 name[6] == 'E' &&
10837 name[7] == 'C' &&
10838 name[8] == 'K')
10839 { /* UNITCHECK */
10840 return KEY_UNITCHECK;
10841 }
10842
10843 goto unknown;
10844
4c3bbe0f
MHM
10845 case 'e':
10846 if (name[1] == 'n' &&
10847 name[2] == 'd' &&
10848 name[3] == 'n' &&
10849 name[4] == 'e' &&
10850 name[5] == 't' &&
10851 name[6] == 'e' &&
10852 name[7] == 'n' &&
10853 name[8] == 't')
10854 { /* endnetent */
10855 return -KEY_endnetent;
10856 }
10857
10858 goto unknown;
10859
10860 case 'g':
10861 if (name[1] == 'e' &&
10862 name[2] == 't' &&
10863 name[3] == 'n' &&
10864 name[4] == 'e' &&
10865 name[5] == 't' &&
10866 name[6] == 'e' &&
10867 name[7] == 'n' &&
10868 name[8] == 't')
10869 { /* getnetent */
10870 return -KEY_getnetent;
10871 }
10872
10873 goto unknown;
10874
10875 case 'l':
10876 if (name[1] == 'o' &&
10877 name[2] == 'c' &&
10878 name[3] == 'a' &&
10879 name[4] == 'l' &&
10880 name[5] == 't' &&
10881 name[6] == 'i' &&
10882 name[7] == 'm' &&
10883 name[8] == 'e')
10884 { /* localtime */
10885 return -KEY_localtime;
10886 }
10887
10888 goto unknown;
10889
10890 case 'p':
10891 if (name[1] == 'r' &&
10892 name[2] == 'o' &&
10893 name[3] == 't' &&
10894 name[4] == 'o' &&
10895 name[5] == 't' &&
10896 name[6] == 'y' &&
10897 name[7] == 'p' &&
10898 name[8] == 'e')
10899 { /* prototype */
10900 return KEY_prototype;
10901 }
10902
10903 goto unknown;
10904
10905 case 'q':
10906 if (name[1] == 'u' &&
10907 name[2] == 'o' &&
10908 name[3] == 't' &&
10909 name[4] == 'e' &&
10910 name[5] == 'm' &&
10911 name[6] == 'e' &&
10912 name[7] == 't' &&
10913 name[8] == 'a')
10914 { /* quotemeta */
10915 return -KEY_quotemeta;
10916 }
10917
10918 goto unknown;
10919
10920 case 'r':
10921 if (name[1] == 'e' &&
10922 name[2] == 'w' &&
10923 name[3] == 'i' &&
10924 name[4] == 'n' &&
10925 name[5] == 'd' &&
10926 name[6] == 'd' &&
10927 name[7] == 'i' &&
10928 name[8] == 'r')
10929 { /* rewinddir */
10930 return -KEY_rewinddir;
10931 }
10932
10933 goto unknown;
10934
10935 case 's':
10936 if (name[1] == 'e' &&
10937 name[2] == 't' &&
10938 name[3] == 'n' &&
10939 name[4] == 'e' &&
10940 name[5] == 't' &&
10941 name[6] == 'e' &&
10942 name[7] == 'n' &&
10943 name[8] == 't')
10944 { /* setnetent */
10945 return -KEY_setnetent;
10946 }
10947
10948 goto unknown;
10949
10950 case 'w':
10951 if (name[1] == 'a' &&
10952 name[2] == 'n' &&
10953 name[3] == 't' &&
10954 name[4] == 'a' &&
10955 name[5] == 'r' &&
10956 name[6] == 'r' &&
10957 name[7] == 'a' &&
10958 name[8] == 'y')
10959 { /* wantarray */
10960 return -KEY_wantarray;
10961 }
10962
10963 goto unknown;
10964
10965 default:
10966 goto unknown;
10967 }
10968
10969 case 10: /* 9 tokens of length 10 */
10970 switch (name[0])
10971 {
10972 case 'e':
10973 if (name[1] == 'n' &&
10974 name[2] == 'd')
10975 {
10976 switch (name[3])
10977 {
10978 case 'h':
10979 if (name[4] == 'o' &&
10980 name[5] == 's' &&
10981 name[6] == 't' &&
10982 name[7] == 'e' &&
10983 name[8] == 'n' &&
10984 name[9] == 't')
10985 { /* endhostent */
10986 return -KEY_endhostent;
10987 }
10988
10989 goto unknown;
10990
10991 case 's':
10992 if (name[4] == 'e' &&
10993 name[5] == 'r' &&
10994 name[6] == 'v' &&
10995 name[7] == 'e' &&
10996 name[8] == 'n' &&
10997 name[9] == 't')
10998 { /* endservent */
10999 return -KEY_endservent;
11000 }
11001
11002 goto unknown;
11003
11004 default:
11005 goto unknown;
11006 }
11007 }
11008
11009 goto unknown;
11010
11011 case 'g':
11012 if (name[1] == 'e' &&
11013 name[2] == 't')
11014 {
11015 switch (name[3])
11016 {
11017 case 'h':
11018 if (name[4] == 'o' &&
11019 name[5] == 's' &&
11020 name[6] == 't' &&
11021 name[7] == 'e' &&
11022 name[8] == 'n' &&
11023 name[9] == 't')
11024 { /* gethostent */
11025 return -KEY_gethostent;
11026 }
11027
11028 goto unknown;
11029
11030 case 's':
11031 switch (name[4])
11032 {
11033 case 'e':
11034 if (name[5] == 'r' &&
11035 name[6] == 'v' &&
11036 name[7] == 'e' &&
11037 name[8] == 'n' &&
11038 name[9] == 't')
11039 { /* getservent */
11040 return -KEY_getservent;
11041 }
11042
11043 goto unknown;
11044
11045 case 'o':
11046 if (name[5] == 'c' &&
11047 name[6] == 'k' &&
11048 name[7] == 'o' &&
11049 name[8] == 'p' &&
11050 name[9] == 't')
11051 { /* getsockopt */
11052 return -KEY_getsockopt;
11053 }
11054
11055 goto unknown;
11056
11057 default:
11058 goto unknown;
11059 }
11060
11061 default:
11062 goto unknown;
11063 }
11064 }
11065
11066 goto unknown;
11067
11068 case 's':
11069 switch (name[1])
11070 {
11071 case 'e':
11072 if (name[2] == 't')
11073 {
11074 switch (name[3])
11075 {
11076 case 'h':
11077 if (name[4] == 'o' &&
11078 name[5] == 's' &&
11079 name[6] == 't' &&
11080 name[7] == 'e' &&
11081 name[8] == 'n' &&
11082 name[9] == 't')
11083 { /* sethostent */
11084 return -KEY_sethostent;
11085 }
11086
11087 goto unknown;
11088
11089 case 's':
11090 switch (name[4])
11091 {
11092 case 'e':
11093 if (name[5] == 'r' &&
11094 name[6] == 'v' &&
11095 name[7] == 'e' &&
11096 name[8] == 'n' &&
11097 name[9] == 't')
11098 { /* setservent */
11099 return -KEY_setservent;
11100 }
11101
11102 goto unknown;
11103
11104 case 'o':
11105 if (name[5] == 'c' &&
11106 name[6] == 'k' &&
11107 name[7] == 'o' &&
11108 name[8] == 'p' &&
11109 name[9] == 't')
11110 { /* setsockopt */
11111 return -KEY_setsockopt;
11112 }
11113
11114 goto unknown;
11115
11116 default:
11117 goto unknown;
11118 }
11119
11120 default:
11121 goto unknown;
11122 }
11123 }
11124
11125 goto unknown;
11126
11127 case 'o':
11128 if (name[2] == 'c' &&
11129 name[3] == 'k' &&
11130 name[4] == 'e' &&
11131 name[5] == 't' &&
11132 name[6] == 'p' &&
11133 name[7] == 'a' &&
11134 name[8] == 'i' &&
11135 name[9] == 'r')
11136 { /* socketpair */
11137 return -KEY_socketpair;
11138 }
11139
11140 goto unknown;
11141
11142 default:
11143 goto unknown;
11144 }
11145
11146 default:
11147 goto unknown;
e2e1dd5a 11148 }
4c3bbe0f
MHM
11149
11150 case 11: /* 8 tokens of length 11 */
11151 switch (name[0])
11152 {
11153 case '_':
11154 if (name[1] == '_' &&
11155 name[2] == 'P' &&
11156 name[3] == 'A' &&
11157 name[4] == 'C' &&
11158 name[5] == 'K' &&
11159 name[6] == 'A' &&
11160 name[7] == 'G' &&
11161 name[8] == 'E' &&
11162 name[9] == '_' &&
11163 name[10] == '_')
11164 { /* __PACKAGE__ */
11165 return -KEY___PACKAGE__;
11166 }
11167
11168 goto unknown;
11169
11170 case 'e':
11171 if (name[1] == 'n' &&
11172 name[2] == 'd' &&
11173 name[3] == 'p' &&
11174 name[4] == 'r' &&
11175 name[5] == 'o' &&
11176 name[6] == 't' &&
11177 name[7] == 'o' &&
11178 name[8] == 'e' &&
11179 name[9] == 'n' &&
11180 name[10] == 't')
11181 { /* endprotoent */
11182 return -KEY_endprotoent;
11183 }
11184
11185 goto unknown;
11186
11187 case 'g':
11188 if (name[1] == 'e' &&
11189 name[2] == 't')
11190 {
11191 switch (name[3])
11192 {
11193 case 'p':
11194 switch (name[4])
11195 {
11196 case 'e':
11197 if (name[5] == 'e' &&
11198 name[6] == 'r' &&
11199 name[7] == 'n' &&
11200 name[8] == 'a' &&
11201 name[9] == 'm' &&
11202 name[10] == 'e')
11203 { /* getpeername */
11204 return -KEY_getpeername;
11205 }
11206
11207 goto unknown;
11208
11209 case 'r':
11210 switch (name[5])
11211 {
11212 case 'i':
11213 if (name[6] == 'o' &&
11214 name[7] == 'r' &&
11215 name[8] == 'i' &&
11216 name[9] == 't' &&
11217 name[10] == 'y')
11218 { /* getpriority */
11219 return -KEY_getpriority;
11220 }
11221
11222 goto unknown;
11223
11224 case 'o':
11225 if (name[6] == 't' &&
11226 name[7] == 'o' &&
11227 name[8] == 'e' &&
11228 name[9] == 'n' &&
11229 name[10] == 't')
11230 { /* getprotoent */
11231 return -KEY_getprotoent;
11232 }
11233
11234 goto unknown;
11235
11236 default:
11237 goto unknown;
11238 }
11239
11240 default:
11241 goto unknown;
11242 }
11243
11244 case 's':
11245 if (name[4] == 'o' &&
11246 name[5] == 'c' &&
11247 name[6] == 'k' &&
11248 name[7] == 'n' &&
11249 name[8] == 'a' &&
11250 name[9] == 'm' &&
11251 name[10] == 'e')
11252 { /* getsockname */
11253 return -KEY_getsockname;
11254 }
11255
11256 goto unknown;
11257
11258 default:
11259 goto unknown;
11260 }
11261 }
11262
11263 goto unknown;
11264
11265 case 's':
11266 if (name[1] == 'e' &&
11267 name[2] == 't' &&
11268 name[3] == 'p' &&
11269 name[4] == 'r')
11270 {
11271 switch (name[5])
11272 {
11273 case 'i':
11274 if (name[6] == 'o' &&
11275 name[7] == 'r' &&
11276 name[8] == 'i' &&
11277 name[9] == 't' &&
11278 name[10] == 'y')
11279 { /* setpriority */
11280 return -KEY_setpriority;
11281 }
11282
11283 goto unknown;
11284
11285 case 'o':
11286 if (name[6] == 't' &&
11287 name[7] == 'o' &&
11288 name[8] == 'e' &&
11289 name[9] == 'n' &&
11290 name[10] == 't')
11291 { /* setprotoent */
11292 return -KEY_setprotoent;
11293 }
11294
11295 goto unknown;
11296
11297 default:
11298 goto unknown;
11299 }
11300 }
11301
11302 goto unknown;
11303
11304 default:
11305 goto unknown;
e2e1dd5a 11306 }
4c3bbe0f
MHM
11307
11308 case 12: /* 2 tokens of length 12 */
11309 if (name[0] == 'g' &&
11310 name[1] == 'e' &&
11311 name[2] == 't' &&
11312 name[3] == 'n' &&
11313 name[4] == 'e' &&
11314 name[5] == 't' &&
11315 name[6] == 'b' &&
11316 name[7] == 'y')
11317 {
11318 switch (name[8])
11319 {
11320 case 'a':
11321 if (name[9] == 'd' &&
11322 name[10] == 'd' &&
11323 name[11] == 'r')
11324 { /* getnetbyaddr */
11325 return -KEY_getnetbyaddr;
11326 }
11327
11328 goto unknown;
11329
11330 case 'n':
11331 if (name[9] == 'a' &&
11332 name[10] == 'm' &&
11333 name[11] == 'e')
11334 { /* getnetbyname */
11335 return -KEY_getnetbyname;
11336 }
11337
11338 goto unknown;
11339
11340 default:
11341 goto unknown;
11342 }
e2e1dd5a 11343 }
4c3bbe0f
MHM
11344
11345 goto unknown;
11346
11347 case 13: /* 4 tokens of length 13 */
11348 if (name[0] == 'g' &&
11349 name[1] == 'e' &&
11350 name[2] == 't')
11351 {
11352 switch (name[3])
11353 {
11354 case 'h':
11355 if (name[4] == 'o' &&
11356 name[5] == 's' &&
11357 name[6] == 't' &&
11358 name[7] == 'b' &&
11359 name[8] == 'y')
11360 {
11361 switch (name[9])
11362 {
11363 case 'a':
11364 if (name[10] == 'd' &&
11365 name[11] == 'd' &&
11366 name[12] == 'r')
11367 { /* gethostbyaddr */
11368 return -KEY_gethostbyaddr;
11369 }
11370
11371 goto unknown;
11372
11373 case 'n':
11374 if (name[10] == 'a' &&
11375 name[11] == 'm' &&
11376 name[12] == 'e')
11377 { /* gethostbyname */
11378 return -KEY_gethostbyname;
11379 }
11380
11381 goto unknown;
11382
11383 default:
11384 goto unknown;
11385 }
11386 }
11387
11388 goto unknown;
11389
11390 case 's':
11391 if (name[4] == 'e' &&
11392 name[5] == 'r' &&
11393 name[6] == 'v' &&
11394 name[7] == 'b' &&
11395 name[8] == 'y')
11396 {
11397 switch (name[9])
11398 {
11399 case 'n':
11400 if (name[10] == 'a' &&
11401 name[11] == 'm' &&
11402 name[12] == 'e')
11403 { /* getservbyname */
11404 return -KEY_getservbyname;
11405 }
11406
11407 goto unknown;
11408
11409 case 'p':
11410 if (name[10] == 'o' &&
11411 name[11] == 'r' &&
11412 name[12] == 't')
11413 { /* getservbyport */
11414 return -KEY_getservbyport;
11415 }
11416
11417 goto unknown;
11418
11419 default:
11420 goto unknown;
11421 }
11422 }
11423
11424 goto unknown;
11425
11426 default:
11427 goto unknown;
11428 }
e2e1dd5a 11429 }
4c3bbe0f
MHM
11430
11431 goto unknown;
11432
11433 case 14: /* 1 tokens of length 14 */
11434 if (name[0] == 'g' &&
11435 name[1] == 'e' &&
11436 name[2] == 't' &&
11437 name[3] == 'p' &&
11438 name[4] == 'r' &&
11439 name[5] == 'o' &&
11440 name[6] == 't' &&
11441 name[7] == 'o' &&
11442 name[8] == 'b' &&
11443 name[9] == 'y' &&
11444 name[10] == 'n' &&
11445 name[11] == 'a' &&
11446 name[12] == 'm' &&
11447 name[13] == 'e')
11448 { /* getprotobyname */
11449 return -KEY_getprotobyname;
11450 }
11451
11452 goto unknown;
11453
11454 case 16: /* 1 tokens of length 16 */
11455 if (name[0] == 'g' &&
11456 name[1] == 'e' &&
11457 name[2] == 't' &&
11458 name[3] == 'p' &&
11459 name[4] == 'r' &&
11460 name[5] == 'o' &&
11461 name[6] == 't' &&
11462 name[7] == 'o' &&
11463 name[8] == 'b' &&
11464 name[9] == 'y' &&
11465 name[10] == 'n' &&
11466 name[11] == 'u' &&
11467 name[12] == 'm' &&
11468 name[13] == 'b' &&
11469 name[14] == 'e' &&
11470 name[15] == 'r')
11471 { /* getprotobynumber */
11472 return -KEY_getprotobynumber;
11473 }
11474
11475 goto unknown;
11476
11477 default:
11478 goto unknown;
e2e1dd5a 11479 }
4c3bbe0f
MHM
11480
11481unknown:
e2e1dd5a 11482 return 0;
a687059c
LW
11483}
11484
76e3520e 11485STATIC void
c94115d8 11486S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11487{
97aff369 11488 dVAR;
2f3197b3 11489
7918f24d
NC
11490 PERL_ARGS_ASSERT_CHECKCOMMA;
11491
d008e5eb 11492 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11493 if (ckWARN(WARN_SYNTAX)) {
11494 int level = 1;
26ff0806 11495 const char *w;
d008e5eb
GS
11496 for (w = s+2; *w && level; w++) {
11497 if (*w == '(')
11498 ++level;
11499 else if (*w == ')')
11500 --level;
11501 }
888fea98
NC
11502 while (isSPACE(*w))
11503 ++w;
b1439985
RGS
11504 /* the list of chars below is for end of statements or
11505 * block / parens, boolean operators (&&, ||, //) and branch
11506 * constructs (or, and, if, until, unless, while, err, for).
11507 * Not a very solid hack... */
11508 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11509 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11510 "%s (...) interpreted as function",name);
d008e5eb 11511 }
2f3197b3 11512 }
3280af22 11513 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11514 s++;
a687059c
LW
11515 if (*s == '(')
11516 s++;
3280af22 11517 while (s < PL_bufend && isSPACE(*s))
a687059c 11518 s++;
7e2040f0 11519 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11520 const char * const w = s++;
7e2040f0 11521 while (isALNUM_lazy_if(s,UTF))
a687059c 11522 s++;
3280af22 11523 while (s < PL_bufend && isSPACE(*s))
a687059c 11524 s++;
e929a76b 11525 if (*s == ',') {
c94115d8 11526 GV* gv;
5458a98a 11527 if (keyword(w, s - w, 0))
e929a76b 11528 return;
c94115d8
NC
11529
11530 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11531 if (gv && GvCVu(gv))
abbb3198 11532 return;
cea2e8a9 11533 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11534 }
11535 }
11536}
11537
423cee85
JH
11538/* Either returns sv, or mortalizes sv and returns a new SV*.
11539 Best used as sv=new_constant(..., sv, ...).
11540 If s, pv are NULL, calls subroutine with one argument,
11541 and type is used with error messages only. */
11542
b3ac6de7 11543STATIC SV *
eb0d8d16
NC
11544S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11545 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11546{
27da23d5 11547 dVAR; dSP;
890ce7af 11548 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11549 SV *res;
b3ac6de7
IZ
11550 SV **cvp;
11551 SV *cv, *typesv;
89e33a05 11552 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11553
7918f24d
NC
11554 PERL_ARGS_ASSERT_NEW_CONSTANT;
11555
f0af216f 11556 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11557 SV *msg;
11558
10edeb5d
JH
11559 why2 = (const char *)
11560 (strEQ(key,"charnames")
11561 ? "(possibly a missing \"use charnames ...\")"
11562 : "");
4e553d73 11563 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11564 (type ? type: "undef"), why2);
11565
11566 /* This is convoluted and evil ("goto considered harmful")
11567 * but I do not understand the intricacies of all the different
11568 * failure modes of %^H in here. The goal here is to make
11569 * the most probable error message user-friendly. --jhi */
11570
11571 goto msgdone;
11572
423cee85 11573 report:
4e553d73 11574 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11575 (type ? type: "undef"), why1, why2, why3);
41ab332f 11576 msgdone:
95a20fc0 11577 yyerror(SvPVX_const(msg));
423cee85
JH
11578 SvREFCNT_dec(msg);
11579 return sv;
11580 }
ff3f963a
KW
11581
11582 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11583 if (PL_error_count > 0 && strEQ(key,"charnames"))
11584 return &PL_sv_undef;
ff3f963a 11585
eb0d8d16 11586 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11587 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11588 why1 = "$^H{";
11589 why2 = key;
f0af216f 11590 why3 = "} is not defined";
423cee85 11591 goto report;
b3ac6de7
IZ
11592 }
11593 sv_2mortal(sv); /* Parent created it permanently */
11594 cv = *cvp;
423cee85 11595 if (!pv && s)
59cd0e26 11596 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11597 if (type && pv)
59cd0e26 11598 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11599 else
423cee85 11600 typesv = &PL_sv_undef;
4e553d73 11601
e788e7d3 11602 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11603 ENTER ;
11604 SAVETMPS;
4e553d73 11605
423cee85 11606 PUSHMARK(SP) ;
a5845cb7 11607 EXTEND(sp, 3);
423cee85
JH
11608 if (pv)
11609 PUSHs(pv);
b3ac6de7 11610 PUSHs(sv);
423cee85
JH
11611 if (pv)
11612 PUSHs(typesv);
b3ac6de7 11613 PUTBACK;
423cee85 11614 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11615
423cee85 11616 SPAGAIN ;
4e553d73 11617
423cee85 11618 /* Check the eval first */
9b0e499b 11619 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11620 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11621 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11622 (void)POPs;
b37c2d43 11623 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11624 }
11625 else {
11626 res = POPs;
b37c2d43 11627 SvREFCNT_inc_simple_void(res);
423cee85 11628 }
4e553d73 11629
423cee85
JH
11630 PUTBACK ;
11631 FREETMPS ;
11632 LEAVE ;
b3ac6de7 11633 POPSTACK;
4e553d73 11634
b3ac6de7 11635 if (!SvOK(res)) {
423cee85
JH
11636 why1 = "Call to &{$^H{";
11637 why2 = key;
f0af216f 11638 why3 = "}} did not return a defined value";
423cee85
JH
11639 sv = res;
11640 goto report;
9b0e499b 11641 }
423cee85 11642
9b0e499b 11643 return res;
b3ac6de7 11644}
4e553d73 11645
d0a148a6
NC
11646/* Returns a NUL terminated string, with the length of the string written to
11647 *slp
11648 */
76e3520e 11649STATIC char *
cea2e8a9 11650S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11651{
97aff369 11652 dVAR;
463ee0b2 11653 register char *d = dest;
890ce7af 11654 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11655
11656 PERL_ARGS_ASSERT_SCAN_WORD;
11657
463ee0b2 11658 for (;;) {
8903cb82 11659 if (d >= e)
cea2e8a9 11660 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11661 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11662 *d++ = *s++;
c35e046a 11663 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11664 *d++ = ':';
11665 *d++ = ':';
11666 s++;
11667 }
c35e046a 11668 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11669 *d++ = *s++;
11670 *d++ = *s++;
11671 }
fd400ab9 11672 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11673 char *t = s + UTF8SKIP(s);
c35e046a 11674 size_t len;
fd400ab9 11675 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11676 t += UTF8SKIP(t);
c35e046a
AL
11677 len = t - s;
11678 if (d + len > e)
cea2e8a9 11679 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11680 Copy(s, d, len, char);
11681 d += len;
a0ed51b3
LW
11682 s = t;
11683 }
463ee0b2
LW
11684 else {
11685 *d = '\0';
11686 *slp = d - dest;
11687 return s;
e929a76b 11688 }
378cc40b
LW
11689 }
11690}
11691
76e3520e 11692STATIC char *
f54cb97a 11693S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11694{
97aff369 11695 dVAR;
6136c704 11696 char *bracket = NULL;
748a9306 11697 char funny = *s++;
6136c704 11698 register char *d = dest;
0b3da58d 11699 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11700
7918f24d
NC
11701 PERL_ARGS_ASSERT_SCAN_IDENT;
11702
a0d0e21e 11703 if (isSPACE(*s))
29595ff2 11704 s = PEEKSPACE(s);
de3bb511 11705 if (isDIGIT(*s)) {
8903cb82 11706 while (isDIGIT(*s)) {
11707 if (d >= e)
cea2e8a9 11708 Perl_croak(aTHX_ ident_too_long);
378cc40b 11709 *d++ = *s++;
8903cb82 11710 }
378cc40b
LW
11711 }
11712 else {
463ee0b2 11713 for (;;) {
8903cb82 11714 if (d >= e)
cea2e8a9 11715 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11716 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11717 *d++ = *s++;
7e2040f0 11718 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11719 *d++ = ':';
11720 *d++ = ':';
11721 s++;
11722 }
a0d0e21e 11723 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11724 *d++ = *s++;
11725 *d++ = *s++;
11726 }
fd400ab9 11727 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11728 char *t = s + UTF8SKIP(s);
fd400ab9 11729 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11730 t += UTF8SKIP(t);
11731 if (d + (t - s) > e)
cea2e8a9 11732 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11733 Copy(s, d, t - s, char);
11734 d += t - s;
11735 s = t;
11736 }
463ee0b2
LW
11737 else
11738 break;
11739 }
378cc40b
LW
11740 }
11741 *d = '\0';
11742 d = dest;
79072805 11743 if (*d) {
3280af22
NIS
11744 if (PL_lex_state != LEX_NORMAL)
11745 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11746 return s;
378cc40b 11747 }
748a9306 11748 if (*s == '$' && s[1] &&
3792a11b 11749 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11750 {
4810e5ec 11751 return s;
5cd24f17 11752 }
79072805
LW
11753 if (*s == '{') {
11754 bracket = s;
11755 s++;
11756 }
11757 else if (ck_uni)
11758 check_uni();
93a17b20 11759 if (s < send)
79072805
LW
11760 *d = *s++;
11761 d[1] = '\0';
2b92dfce 11762 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11763 *d = toCTRL(*s);
11764 s++;
de3bb511 11765 }
79072805 11766 if (bracket) {
748a9306 11767 if (isSPACE(s[-1])) {
fa83b5b6 11768 while (s < send) {
f54cb97a 11769 const char ch = *s++;
bf4acbe4 11770 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11771 *d = ch;
11772 break;
11773 }
11774 }
748a9306 11775 }
7e2040f0 11776 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11777 d++;
a0ed51b3 11778 if (UTF) {
6136c704
AL
11779 char *end = s;
11780 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11781 end += UTF8SKIP(end);
11782 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11783 end += UTF8SKIP(end);
a0ed51b3 11784 }
6136c704
AL
11785 Copy(s, d, end - s, char);
11786 d += end - s;
11787 s = end;
a0ed51b3
LW
11788 }
11789 else {
2b92dfce 11790 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11791 *d++ = *s++;
2b92dfce 11792 if (d >= e)
cea2e8a9 11793 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11794 }
79072805 11795 *d = '\0';
c35e046a
AL
11796 while (s < send && SPACE_OR_TAB(*s))
11797 s++;
ff68c719 11798 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11799 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11800 const char * const brack =
11801 (const char *)
11802 ((*s == '[') ? "[...]" : "{...}");
9014280d 11803 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11804 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11805 funny, dest, brack, funny, dest, brack);
11806 }
79072805 11807 bracket++;
a0be28da 11808 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11809 return s;
11810 }
4e553d73
NIS
11811 }
11812 /* Handle extended ${^Foo} variables
2b92dfce
GS
11813 * 1999-02-27 mjd-perl-patch@plover.com */
11814 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11815 && isALNUM(*s))
11816 {
11817 d++;
11818 while (isALNUM(*s) && d < e) {
11819 *d++ = *s++;
11820 }
11821 if (d >= e)
cea2e8a9 11822 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11823 *d = '\0';
79072805
LW
11824 }
11825 if (*s == '}') {
11826 s++;
7df0d042 11827 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11828 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11829 PL_expect = XREF;
11830 }
d008e5eb 11831 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11832 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11833 (keyword(dest, d - dest, 0)
11834 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11835 {
c35e046a
AL
11836 if (funny == '#')
11837 funny = '@';
9014280d 11838 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11839 "Ambiguous use of %c{%s} resolved to %c%s",
11840 funny, dest, funny, dest);
11841 }
11842 }
79072805
LW
11843 }
11844 else {
11845 s = bracket; /* let the parser handle it */
93a17b20 11846 *dest = '\0';
79072805
LW
11847 }
11848 }
3280af22
NIS
11849 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11850 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11851 return s;
11852}
11853
879d0c72
NC
11854static U32
11855S_pmflag(U32 pmfl, const char ch) {
11856 switch (ch) {
11857 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11858 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11859 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11860 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11861 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11862 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11863 }
11864 return pmfl;
11865}
11866
76e3520e 11867STATIC char *
cea2e8a9 11868S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11869{
97aff369 11870 dVAR;
79072805 11871 PMOP *pm;
5db06880 11872 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11873 const char * const valid_flags =
a20207d7 11874 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11875#ifdef PERL_MAD
11876 char *modstart;
11877#endif
11878
7918f24d 11879 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11880
25c09cbf 11881 if (!s) {
6136c704 11882 const char * const delimiter = skipspace(start);
10edeb5d
JH
11883 Perl_croak(aTHX_
11884 (const char *)
11885 (*delimiter == '?'
11886 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11887 : "Search pattern not terminated" ));
25c09cbf 11888 }
bbce6d69 11889
8782bef2 11890 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11891 if (PL_multi_open == '?') {
11892 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11893 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11894
11895 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11896 allows us to restrict the list needed by reset to just the ??
11897 matches. */
11898 assert(type != OP_TRANS);
11899 if (PL_curstash) {
daba3364 11900 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11901 U32 elements;
11902 if (!mg) {
daba3364 11903 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11904 0);
11905 }
11906 elements = mg->mg_len / sizeof(PMOP**);
11907 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11908 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11909 mg->mg_len = elements * sizeof(PMOP**);
11910 PmopSTASH_set(pm,PL_curstash);
11911 }
11912 }
5db06880
NC
11913#ifdef PERL_MAD
11914 modstart = s;
11915#endif
6136c704 11916 while (*s && strchr(valid_flags, *s))
879d0c72 11917 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11918
11919 if (isALNUM(*s)) {
11920 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11921 "Having no space between pattern and following word is deprecated");
11922
11923 }
5db06880
NC
11924#ifdef PERL_MAD
11925 if (PL_madskills && modstart != s) {
11926 SV* tmptoken = newSVpvn(modstart, s - modstart);
11927 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11928 }
11929#endif
4ac733c9 11930 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11931 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11932 {
a2a5de95
NC
11933 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11934 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11935 }
11936
3280af22 11937 PL_lex_op = (OP*)pm;
6154021b 11938 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11939 return s;
11940}
11941
76e3520e 11942STATIC char *
cea2e8a9 11943S_scan_subst(pTHX_ char *start)
79072805 11944{
27da23d5 11945 dVAR;
a0d0e21e 11946 register char *s;
79072805 11947 register PMOP *pm;
4fdae800 11948 I32 first_start;
79072805 11949 I32 es = 0;
5db06880
NC
11950#ifdef PERL_MAD
11951 char *modstart;
11952#endif
79072805 11953
7918f24d
NC
11954 PERL_ARGS_ASSERT_SCAN_SUBST;
11955
6154021b 11956 pl_yylval.ival = OP_NULL;
79072805 11957
5db06880 11958 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11959
37fd879b 11960 if (!s)
cea2e8a9 11961 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11962
3280af22 11963 if (s[-1] == PL_multi_open)
79072805 11964 s--;
5db06880
NC
11965#ifdef PERL_MAD
11966 if (PL_madskills) {
cd81e915
NC
11967 CURMAD('q', PL_thisopen);
11968 CURMAD('_', PL_thiswhite);
11969 CURMAD('E', PL_thisstuff);
11970 CURMAD('Q', PL_thisclose);
11971 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11972 }
11973#endif
79072805 11974
3280af22 11975 first_start = PL_multi_start;
5db06880 11976 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11977 if (!s) {
37fd879b 11978 if (PL_lex_stuff) {
3280af22 11979 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11980 PL_lex_stuff = NULL;
37fd879b 11981 }
cea2e8a9 11982 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11983 }
3280af22 11984 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11985
79072805 11986 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11987
11988#ifdef PERL_MAD
11989 if (PL_madskills) {
cd81e915
NC
11990 CURMAD('z', PL_thisopen);
11991 CURMAD('R', PL_thisstuff);
11992 CURMAD('Z', PL_thisclose);
5db06880
NC
11993 }
11994 modstart = s;
11995#endif
11996
48c036b1 11997 while (*s) {
a20207d7 11998 if (*s == EXEC_PAT_MOD) {
a687059c 11999 s++;
2f3197b3 12000 es++;
a687059c 12001 }
a20207d7 12002 else if (strchr(S_PAT_MODS, *s))
879d0c72 12003 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
12004 else {
12005 if (isALNUM(*s)) {
12006 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12007 "Having no space between pattern and following word is deprecated");
12008
12009 }
48c036b1 12010 break;
aa78b661 12011 }
378cc40b 12012 }
79072805 12013
5db06880
NC
12014#ifdef PERL_MAD
12015 if (PL_madskills) {
12016 if (modstart != s)
12017 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12018 append_madprops(PL_thismad, (OP*)pm, 0);
12019 PL_thismad = 0;
5db06880
NC
12020 }
12021#endif
a2a5de95
NC
12022 if ((pm->op_pmflags & PMf_CONTINUE)) {
12023 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12024 }
12025
79072805 12026 if (es) {
6136c704
AL
12027 SV * const repl = newSVpvs("");
12028
0244c3a4
GS
12029 PL_sublex_info.super_bufptr = s;
12030 PL_sublex_info.super_bufend = PL_bufend;
12031 PL_multi_end = 0;
79072805 12032 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12033 while (es-- > 0) {
12034 if (es)
12035 sv_catpvs(repl, "eval ");
12036 else
12037 sv_catpvs(repl, "do ");
12038 }
6f43d98f 12039 sv_catpvs(repl, "{");
3280af22 12040 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12041 if (strchr(SvPVX(PL_lex_repl), '#'))
12042 sv_catpvs(repl, "\n");
12043 sv_catpvs(repl, "}");
25da4f38 12044 SvEVALED_on(repl);
3280af22
NIS
12045 SvREFCNT_dec(PL_lex_repl);
12046 PL_lex_repl = repl;
378cc40b 12047 }
79072805 12048
3280af22 12049 PL_lex_op = (OP*)pm;
6154021b 12050 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12051 return s;
12052}
12053
76e3520e 12054STATIC char *
cea2e8a9 12055S_scan_trans(pTHX_ char *start)
378cc40b 12056{
97aff369 12057 dVAR;
a0d0e21e 12058 register char* s;
11343788 12059 OP *o;
79072805 12060 short *tbl;
b84c11c8
NC
12061 U8 squash;
12062 U8 del;
12063 U8 complement;
5db06880
NC
12064#ifdef PERL_MAD
12065 char *modstart;
12066#endif
79072805 12067
7918f24d
NC
12068 PERL_ARGS_ASSERT_SCAN_TRANS;
12069
6154021b 12070 pl_yylval.ival = OP_NULL;
79072805 12071
5db06880 12072 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12073 if (!s)
cea2e8a9 12074 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12075
3280af22 12076 if (s[-1] == PL_multi_open)
2f3197b3 12077 s--;
5db06880
NC
12078#ifdef PERL_MAD
12079 if (PL_madskills) {
cd81e915
NC
12080 CURMAD('q', PL_thisopen);
12081 CURMAD('_', PL_thiswhite);
12082 CURMAD('E', PL_thisstuff);
12083 CURMAD('Q', PL_thisclose);
12084 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12085 }
12086#endif
2f3197b3 12087
5db06880 12088 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12089 if (!s) {
37fd879b 12090 if (PL_lex_stuff) {
3280af22 12091 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12092 PL_lex_stuff = NULL;
37fd879b 12093 }
cea2e8a9 12094 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12095 }
5db06880 12096 if (PL_madskills) {
cd81e915
NC
12097 CURMAD('z', PL_thisopen);
12098 CURMAD('R', PL_thisstuff);
12099 CURMAD('Z', PL_thisclose);
5db06880 12100 }
79072805 12101
a0ed51b3 12102 complement = del = squash = 0;
5db06880
NC
12103#ifdef PERL_MAD
12104 modstart = s;
12105#endif
7a1e2023
NC
12106 while (1) {
12107 switch (*s) {
12108 case 'c':
79072805 12109 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12110 break;
12111 case 'd':
a0ed51b3 12112 del = OPpTRANS_DELETE;
7a1e2023
NC
12113 break;
12114 case 's':
79072805 12115 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12116 break;
12117 default:
12118 goto no_more;
12119 }
395c3793
LW
12120 s++;
12121 }
7a1e2023 12122 no_more:
8973db79 12123
aa1f7c5b 12124 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12125 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12126 o->op_private &= ~OPpTRANS_ALL;
12127 o->op_private |= del|squash|complement|
7948272d
NIS
12128 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12129 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12130
3280af22 12131 PL_lex_op = o;
6154021b 12132 pl_yylval.ival = OP_TRANS;
5db06880
NC
12133
12134#ifdef PERL_MAD
12135 if (PL_madskills) {
12136 if (modstart != s)
12137 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12138 append_madprops(PL_thismad, o, 0);
12139 PL_thismad = 0;
5db06880
NC
12140 }
12141#endif
12142
79072805
LW
12143 return s;
12144}
12145
76e3520e 12146STATIC char *
cea2e8a9 12147S_scan_heredoc(pTHX_ register char *s)
79072805 12148{
97aff369 12149 dVAR;
79072805
LW
12150 SV *herewas;
12151 I32 op_type = OP_SCALAR;
12152 I32 len;
12153 SV *tmpstr;
12154 char term;
73d840c0 12155 const char *found_newline;
79072805 12156 register char *d;
fc36a67e 12157 register char *e;
4633a7c4 12158 char *peek;
f54cb97a 12159 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12160#ifdef PERL_MAD
12161 I32 stuffstart = s - SvPVX(PL_linestr);
12162 char *tstart;
12163
cd81e915 12164 PL_realtokenstart = -1;
5db06880 12165#endif
79072805 12166
7918f24d
NC
12167 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12168
79072805 12169 s += 2;
3280af22
NIS
12170 d = PL_tokenbuf;
12171 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12172 if (!outer)
79072805 12173 *d++ = '\n';
c35e046a
AL
12174 peek = s;
12175 while (SPACE_OR_TAB(*peek))
12176 peek++;
3792a11b 12177 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12178 s = peek;
79072805 12179 term = *s++;
3280af22 12180 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12181 d += len;
3280af22 12182 if (s < PL_bufend)
79072805 12183 s++;
79072805
LW
12184 }
12185 else {
12186 if (*s == '\\')
12187 s++, term = '\'';
12188 else
12189 term = '"';
7e2040f0 12190 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12191 deprecate("bare << to mean <<\"\"");
7e2040f0 12192 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12193 if (d < e)
12194 *d++ = *s;
12195 }
12196 }
3280af22 12197 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12198 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12199 *d++ = '\n';
12200 *d = '\0';
3280af22 12201 len = d - PL_tokenbuf;
5db06880
NC
12202
12203#ifdef PERL_MAD
12204 if (PL_madskills) {
12205 tstart = PL_tokenbuf + !outer;
cd81e915 12206 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12207 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12208 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12209 stuffstart = s - SvPVX(PL_linestr);
12210 }
12211#endif
6a27c188 12212#ifndef PERL_STRICT_CR
f63a84b2
LW
12213 d = strchr(s, '\r');
12214 if (d) {
b464bac0 12215 char * const olds = s;
f63a84b2 12216 s = d;
3280af22 12217 while (s < PL_bufend) {
f63a84b2
LW
12218 if (*s == '\r') {
12219 *d++ = '\n';
12220 if (*++s == '\n')
12221 s++;
12222 }
12223 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12224 *d++ = *s++;
12225 s++;
12226 }
12227 else
12228 *d++ = *s++;
12229 }
12230 *d = '\0';
3280af22 12231 PL_bufend = d;
95a20fc0 12232 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12233 s = olds;
12234 }
12235#endif
5db06880
NC
12236#ifdef PERL_MAD
12237 found_newline = 0;
12238#endif
10edeb5d 12239 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12240 herewas = newSVpvn(s,PL_bufend-s);
12241 }
12242 else {
5db06880
NC
12243#ifdef PERL_MAD
12244 herewas = newSVpvn(s-1,found_newline-s+1);
12245#else
73d840c0
AL
12246 s--;
12247 herewas = newSVpvn(s,found_newline-s);
5db06880 12248#endif
73d840c0 12249 }
5db06880
NC
12250#ifdef PERL_MAD
12251 if (PL_madskills) {
12252 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12253 if (PL_thisstuff)
12254 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12255 else
cd81e915 12256 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12257 }
12258#endif
79072805 12259 s += SvCUR(herewas);
748a9306 12260
5db06880
NC
12261#ifdef PERL_MAD
12262 stuffstart = s - SvPVX(PL_linestr);
12263
12264 if (found_newline)
12265 s--;
12266#endif
12267
7d0a29fe
NC
12268 tmpstr = newSV_type(SVt_PVIV);
12269 SvGROW(tmpstr, 80);
748a9306 12270 if (term == '\'') {
79072805 12271 op_type = OP_CONST;
45977657 12272 SvIV_set(tmpstr, -1);
748a9306
LW
12273 }
12274 else if (term == '`') {
79072805 12275 op_type = OP_BACKTICK;
45977657 12276 SvIV_set(tmpstr, '\\');
748a9306 12277 }
79072805
LW
12278
12279 CLINE;
57843af0 12280 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12281 PL_multi_open = PL_multi_close = '<';
12282 term = *PL_tokenbuf;
0244c3a4 12283 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12284 char * const bufptr = PL_sublex_info.super_bufptr;
12285 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12286 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12287 s = strchr(bufptr, '\n');
12288 if (!s)
12289 s = bufend;
12290 d = s;
12291 while (s < bufend &&
12292 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12293 if (*s++ == '\n')
57843af0 12294 CopLINE_inc(PL_curcop);
0244c3a4
GS
12295 }
12296 if (s >= bufend) {
eb160463 12297 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12298 missingterm(PL_tokenbuf);
12299 }
12300 sv_setpvn(herewas,bufptr,d-bufptr+1);
12301 sv_setpvn(tmpstr,d+1,s-d);
12302 s += len - 1;
12303 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12304 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12305
12306 s = olds;
12307 goto retval;
12308 }
12309 else if (!outer) {
79072805 12310 d = s;
3280af22
NIS
12311 while (s < PL_bufend &&
12312 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12313 if (*s++ == '\n')
57843af0 12314 CopLINE_inc(PL_curcop);
79072805 12315 }
3280af22 12316 if (s >= PL_bufend) {
eb160463 12317 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12318 missingterm(PL_tokenbuf);
79072805
LW
12319 }
12320 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12321#ifdef PERL_MAD
12322 if (PL_madskills) {
cd81e915
NC
12323 if (PL_thisstuff)
12324 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12325 else
cd81e915 12326 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12327 stuffstart = s - SvPVX(PL_linestr);
12328 }
12329#endif
79072805 12330 s += len - 1;
57843af0 12331 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12332
3280af22
NIS
12333 sv_catpvn(herewas,s,PL_bufend-s);
12334 sv_setsv(PL_linestr,herewas);
12335 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12336 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12337 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12338 }
12339 else
76f68e9b 12340 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12341 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12342#ifdef PERL_MAD
12343 if (PL_madskills) {
12344 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12345 if (PL_thisstuff)
12346 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12347 else
cd81e915 12348 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12349 }
12350#endif
f0e67a1d 12351 PL_bufptr = s;
17cc9359 12352 CopLINE_inc(PL_curcop);
f0e67a1d 12353 if (!outer || !lex_next_chunk(0)) {
eb160463 12354 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12355 missingterm(PL_tokenbuf);
79072805 12356 }
17cc9359 12357 CopLINE_dec(PL_curcop);
f0e67a1d 12358 s = PL_bufptr;
5db06880
NC
12359#ifdef PERL_MAD
12360 stuffstart = s - SvPVX(PL_linestr);
12361#endif
57843af0 12362 CopLINE_inc(PL_curcop);
3280af22 12363 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12364 PL_last_lop = PL_last_uni = NULL;
6a27c188 12365#ifndef PERL_STRICT_CR
3280af22 12366 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12367 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12368 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12369 {
3280af22
NIS
12370 PL_bufend[-2] = '\n';
12371 PL_bufend--;
95a20fc0 12372 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12373 }
3280af22
NIS
12374 else if (PL_bufend[-1] == '\r')
12375 PL_bufend[-1] = '\n';
f63a84b2 12376 }
3280af22
NIS
12377 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12378 PL_bufend[-1] = '\n';
f63a84b2 12379#endif
3280af22 12380 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12381 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12382 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12383 sv_catsv(PL_linestr,herewas);
12384 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12385 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12386 }
12387 else {
3280af22
NIS
12388 s = PL_bufend;
12389 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12390 }
12391 }
79072805 12392 s++;
0244c3a4 12393retval:
57843af0 12394 PL_multi_end = CopLINE(PL_curcop);
79072805 12395 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12396 SvPV_shrink_to_cur(tmpstr);
79072805 12397 }
8990e307 12398 SvREFCNT_dec(herewas);
2f31ce75 12399 if (!IN_BYTES) {
95a20fc0 12400 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12401 SvUTF8_on(tmpstr);
12402 else if (PL_encoding)
12403 sv_recode_to_utf8(tmpstr, PL_encoding);
12404 }
3280af22 12405 PL_lex_stuff = tmpstr;
6154021b 12406 pl_yylval.ival = op_type;
79072805
LW
12407 return s;
12408}
12409
02aa26ce
NT
12410/* scan_inputsymbol
12411 takes: current position in input buffer
12412 returns: new position in input buffer
6154021b 12413 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12414
12415 This code handles:
12416
12417 <> read from ARGV
12418 <FH> read from filehandle
12419 <pkg::FH> read from package qualified filehandle
12420 <pkg'FH> read from package qualified filehandle
12421 <$fh> read from filehandle in $fh
12422 <*.h> filename glob
12423
12424*/
12425
76e3520e 12426STATIC char *
cea2e8a9 12427S_scan_inputsymbol(pTHX_ char *start)
79072805 12428{
97aff369 12429 dVAR;
02aa26ce 12430 register char *s = start; /* current position in buffer */
1b420867 12431 char *end;
79072805 12432 I32 len;
6136c704
AL
12433 char *d = PL_tokenbuf; /* start of temp holding space */
12434 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12435
7918f24d
NC
12436 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12437
1b420867
GS
12438 end = strchr(s, '\n');
12439 if (!end)
12440 end = PL_bufend;
12441 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12442
12443 /* die if we didn't have space for the contents of the <>,
1b420867 12444 or if it didn't end, or if we see a newline
02aa26ce
NT
12445 */
12446
bb7a0f54 12447 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12448 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12449 if (s >= end)
cea2e8a9 12450 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12451
fc36a67e 12452 s++;
02aa26ce
NT
12453
12454 /* check for <$fh>
12455 Remember, only scalar variables are interpreted as filehandles by
12456 this code. Anything more complex (e.g., <$fh{$num}>) will be
12457 treated as a glob() call.
12458 This code makes use of the fact that except for the $ at the front,
12459 a scalar variable and a filehandle look the same.
12460 */
4633a7c4 12461 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12462
12463 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12464 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12465 d++;
02aa26ce
NT
12466
12467 /* If we've tried to read what we allow filehandles to look like, and
12468 there's still text left, then it must be a glob() and not a getline.
12469 Use scan_str to pull out the stuff between the <> and treat it
12470 as nothing more than a string.
12471 */
12472
3280af22 12473 if (d - PL_tokenbuf != len) {
6154021b 12474 pl_yylval.ival = OP_GLOB;
5db06880 12475 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12476 if (!s)
cea2e8a9 12477 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12478 return s;
12479 }
395c3793 12480 else {
9b3023bc 12481 bool readline_overriden = FALSE;
6136c704 12482 GV *gv_readline;
9b3023bc 12483 GV **gvp;
02aa26ce 12484 /* we're in a filehandle read situation */
3280af22 12485 d = PL_tokenbuf;
02aa26ce
NT
12486
12487 /* turn <> into <ARGV> */
79072805 12488 if (!len)
689badd5 12489 Copy("ARGV",d,5,char);
02aa26ce 12490
9b3023bc 12491 /* Check whether readline() is overriden */
fafc274c 12492 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12493 if ((gv_readline
ba979b31 12494 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12495 ||
017a3ce5 12496 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12497 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12498 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12499 readline_overriden = TRUE;
12500
02aa26ce
NT
12501 /* if <$fh>, create the ops to turn the variable into a
12502 filehandle
12503 */
79072805 12504 if (*d == '$') {
02aa26ce
NT
12505 /* try to find it in the pad for this block, otherwise find
12506 add symbol table ops
12507 */
f8f98e0a 12508 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12509 if (tmp != NOT_IN_PAD) {
00b1698f 12510 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12511 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12512 HEK * const stashname = HvNAME_HEK(stash);
12513 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12514 sv_catpvs(sym, "::");
f558d5af
JH
12515 sv_catpv(sym, d+1);
12516 d = SvPVX(sym);
12517 goto intro_sym;
12518 }
12519 else {
6136c704 12520 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12521 o->op_targ = tmp;
9b3023bc
RGS
12522 PL_lex_op = readline_overriden
12523 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12524 append_elem(OP_LIST, o,
12525 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12526 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12527 }
a0d0e21e
LW
12528 }
12529 else {
f558d5af
JH
12530 GV *gv;
12531 ++d;
12532intro_sym:
12533 gv = gv_fetchpv(d,
12534 (PL_in_eval
12535 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12536 : GV_ADDMULTI),
f558d5af 12537 SVt_PV);
9b3023bc
RGS
12538 PL_lex_op = readline_overriden
12539 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12540 append_elem(OP_LIST,
12541 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12542 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12543 : (OP*)newUNOP(OP_READLINE, 0,
12544 newUNOP(OP_RV2SV, 0,
12545 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12546 }
7c6fadd6
RGS
12547 if (!readline_overriden)
12548 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12549 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12550 pl_yylval.ival = OP_NULL;
79072805 12551 }
02aa26ce
NT
12552
12553 /* If it's none of the above, it must be a literal filehandle
12554 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12555 else {
6136c704 12556 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12557 PL_lex_op = readline_overriden
12558 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12559 append_elem(OP_LIST,
12560 newGVOP(OP_GV, 0, gv),
12561 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12562 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12563 pl_yylval.ival = OP_NULL;
79072805
LW
12564 }
12565 }
02aa26ce 12566
79072805
LW
12567 return s;
12568}
12569
02aa26ce
NT
12570
12571/* scan_str
12572 takes: start position in buffer
09bef843
SB
12573 keep_quoted preserve \ on the embedded delimiter(s)
12574 keep_delims preserve the delimiters around the string
02aa26ce
NT
12575 returns: position to continue reading from buffer
12576 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12577 updates the read buffer.
12578
12579 This subroutine pulls a string out of the input. It is called for:
12580 q single quotes q(literal text)
12581 ' single quotes 'literal text'
12582 qq double quotes qq(interpolate $here please)
12583 " double quotes "interpolate $here please"
12584 qx backticks qx(/bin/ls -l)
12585 ` backticks `/bin/ls -l`
12586 qw quote words @EXPORT_OK = qw( func() $spam )
12587 m// regexp match m/this/
12588 s/// regexp substitute s/this/that/
12589 tr/// string transliterate tr/this/that/
12590 y/// string transliterate y/this/that/
12591 ($*@) sub prototypes sub foo ($)
09bef843 12592 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12593 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12594
12595 In most of these cases (all but <>, patterns and transliterate)
12596 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12597 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12598 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12599 calls scan_str().
4e553d73 12600
02aa26ce
NT
12601 It skips whitespace before the string starts, and treats the first
12602 character as the delimiter. If the delimiter is one of ([{< then
12603 the corresponding "close" character )]}> is used as the closing
12604 delimiter. It allows quoting of delimiters, and if the string has
12605 balanced delimiters ([{<>}]) it allows nesting.
12606
37fd879b
HS
12607 On success, the SV with the resulting string is put into lex_stuff or,
12608 if that is already non-NULL, into lex_repl. The second case occurs only
12609 when parsing the RHS of the special constructs s/// and tr/// (y///).
12610 For convenience, the terminating delimiter character is stuffed into
12611 SvIVX of the SV.
02aa26ce
NT
12612*/
12613
76e3520e 12614STATIC char *
09bef843 12615S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12616{
97aff369 12617 dVAR;
02aa26ce 12618 SV *sv; /* scalar value: string */
d3fcec1f 12619 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12620 register char *s = start; /* current position in the buffer */
12621 register char term; /* terminating character */
12622 register char *to; /* current position in the sv's data */
12623 I32 brackets = 1; /* bracket nesting level */
89491803 12624 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12625 I32 termcode; /* terminating char. code */
89ebb4a3 12626 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12627 STRLEN termlen; /* length of terminating string */
0331ef07 12628 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12629#ifdef PERL_MAD
12630 int stuffstart;
12631 char *tstart;
12632#endif
02aa26ce 12633
7918f24d
NC
12634 PERL_ARGS_ASSERT_SCAN_STR;
12635
02aa26ce 12636 /* skip space before the delimiter */
29595ff2
NC
12637 if (isSPACE(*s)) {
12638 s = PEEKSPACE(s);
12639 }
02aa26ce 12640
5db06880 12641#ifdef PERL_MAD
cd81e915
NC
12642 if (PL_realtokenstart >= 0) {
12643 stuffstart = PL_realtokenstart;
12644 PL_realtokenstart = -1;
5db06880
NC
12645 }
12646 else
12647 stuffstart = start - SvPVX(PL_linestr);
12648#endif
02aa26ce 12649 /* mark where we are, in case we need to report errors */
79072805 12650 CLINE;
02aa26ce
NT
12651
12652 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12653 term = *s;
220e2d4e
IH
12654 if (!UTF) {
12655 termcode = termstr[0] = term;
12656 termlen = 1;
12657 }
12658 else {
f3b9ce0f 12659 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12660 Copy(s, termstr, termlen, U8);
12661 if (!UTF8_IS_INVARIANT(term))
12662 has_utf8 = TRUE;
12663 }
b1c7b182 12664
02aa26ce 12665 /* mark where we are */
57843af0 12666 PL_multi_start = CopLINE(PL_curcop);
3280af22 12667 PL_multi_open = term;
02aa26ce
NT
12668
12669 /* find corresponding closing delimiter */
93a17b20 12670 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12671 termcode = termstr[0] = term = tmps[5];
12672
3280af22 12673 PL_multi_close = term;
79072805 12674
561b68a9
SH
12675 /* create a new SV to hold the contents. 79 is the SV's initial length.
12676 What a random number. */
7d0a29fe
NC
12677 sv = newSV_type(SVt_PVIV);
12678 SvGROW(sv, 80);
45977657 12679 SvIV_set(sv, termcode);
a0d0e21e 12680 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12681
12682 /* move past delimiter and try to read a complete string */
09bef843 12683 if (keep_delims)
220e2d4e
IH
12684 sv_catpvn(sv, s, termlen);
12685 s += termlen;
5db06880
NC
12686#ifdef PERL_MAD
12687 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12688 if (!PL_thisopen && !keep_delims) {
12689 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12690 stuffstart = s - SvPVX(PL_linestr);
12691 }
12692#endif
93a17b20 12693 for (;;) {
220e2d4e
IH
12694 if (PL_encoding && !UTF) {
12695 bool cont = TRUE;
12696
12697 while (cont) {
95a20fc0 12698 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12699 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12700 &offset, (char*)termstr, termlen);
6136c704
AL
12701 const char * const ns = SvPVX_const(PL_linestr) + offset;
12702 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12703
12704 for (; s < ns; s++) {
12705 if (*s == '\n' && !PL_rsfp)
12706 CopLINE_inc(PL_curcop);
12707 }
12708 if (!found)
12709 goto read_more_line;
12710 else {
12711 /* handle quoted delimiters */
52327caf 12712 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12713 const char *t;
95a20fc0 12714 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12715 t--;
12716 if ((svlast-1 - t) % 2) {
12717 if (!keep_quoted) {
12718 *(svlast-1) = term;
12719 *svlast = '\0';
12720 SvCUR_set(sv, SvCUR(sv) - 1);
12721 }
12722 continue;
12723 }
12724 }
12725 if (PL_multi_open == PL_multi_close) {
12726 cont = FALSE;
12727 }
12728 else {
f54cb97a
AL
12729 const char *t;
12730 char *w;
0331ef07 12731 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12732 /* At here, all closes are "was quoted" one,
12733 so we don't check PL_multi_close. */
12734 if (*t == '\\') {
12735 if (!keep_quoted && *(t+1) == PL_multi_open)
12736 t++;
12737 else
12738 *w++ = *t++;
12739 }
12740 else if (*t == PL_multi_open)
12741 brackets++;
12742
12743 *w = *t;
12744 }
12745 if (w < t) {
12746 *w++ = term;
12747 *w = '\0';
95a20fc0 12748 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12749 }
0331ef07 12750 last_off = w - SvPVX(sv);
220e2d4e
IH
12751 if (--brackets <= 0)
12752 cont = FALSE;
12753 }
12754 }
12755 }
12756 if (!keep_delims) {
12757 SvCUR_set(sv, SvCUR(sv) - 1);
12758 *SvEND(sv) = '\0';
12759 }
12760 break;
12761 }
12762
02aa26ce 12763 /* extend sv if need be */
3280af22 12764 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12765 /* set 'to' to the next character in the sv's string */
463ee0b2 12766 to = SvPVX(sv)+SvCUR(sv);
09bef843 12767
02aa26ce 12768 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12769 if (PL_multi_open == PL_multi_close) {
12770 for (; s < PL_bufend; s++,to++) {
02aa26ce 12771 /* embedded newlines increment the current line number */
3280af22 12772 if (*s == '\n' && !PL_rsfp)
57843af0 12773 CopLINE_inc(PL_curcop);
02aa26ce 12774 /* handle quoted delimiters */
3280af22 12775 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12776 if (!keep_quoted && s[1] == term)
a0d0e21e 12777 s++;
02aa26ce 12778 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12779 else
12780 *to++ = *s++;
12781 }
02aa26ce
NT
12782 /* terminate when run out of buffer (the for() condition), or
12783 have found the terminator */
220e2d4e
IH
12784 else if (*s == term) {
12785 if (termlen == 1)
12786 break;
f3b9ce0f 12787 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12788 break;
12789 }
63cd0674 12790 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12791 has_utf8 = TRUE;
93a17b20
LW
12792 *to = *s;
12793 }
12794 }
02aa26ce
NT
12795
12796 /* if the terminator isn't the same as the start character (e.g.,
12797 matched brackets), we have to allow more in the quoting, and
12798 be prepared for nested brackets.
12799 */
93a17b20 12800 else {
02aa26ce 12801 /* read until we run out of string, or we find the terminator */
3280af22 12802 for (; s < PL_bufend; s++,to++) {
02aa26ce 12803 /* embedded newlines increment the line count */
3280af22 12804 if (*s == '\n' && !PL_rsfp)
57843af0 12805 CopLINE_inc(PL_curcop);
02aa26ce 12806 /* backslashes can escape the open or closing characters */
3280af22 12807 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12808 if (!keep_quoted &&
12809 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12810 s++;
12811 else
12812 *to++ = *s++;
12813 }
02aa26ce 12814 /* allow nested opens and closes */
3280af22 12815 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12816 break;
3280af22 12817 else if (*s == PL_multi_open)
93a17b20 12818 brackets++;
63cd0674 12819 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12820 has_utf8 = TRUE;
93a17b20
LW
12821 *to = *s;
12822 }
12823 }
02aa26ce 12824 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12825 *to = '\0';
95a20fc0 12826 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12827
02aa26ce
NT
12828 /*
12829 * this next chunk reads more into the buffer if we're not done yet
12830 */
12831
b1c7b182
GS
12832 if (s < PL_bufend)
12833 break; /* handle case where we are done yet :-) */
79072805 12834
6a27c188 12835#ifndef PERL_STRICT_CR
95a20fc0 12836 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12837 if ((to[-2] == '\r' && to[-1] == '\n') ||
12838 (to[-2] == '\n' && to[-1] == '\r'))
12839 {
f63a84b2
LW
12840 to[-2] = '\n';
12841 to--;
95a20fc0 12842 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12843 }
12844 else if (to[-1] == '\r')
12845 to[-1] = '\n';
12846 }
95a20fc0 12847 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12848 to[-1] = '\n';
12849#endif
12850
220e2d4e 12851 read_more_line:
02aa26ce
NT
12852 /* if we're out of file, or a read fails, bail and reset the current
12853 line marker so we can report where the unterminated string began
12854 */
5db06880
NC
12855#ifdef PERL_MAD
12856 if (PL_madskills) {
c35e046a 12857 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12858 if (PL_thisstuff)
12859 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12860 else
cd81e915 12861 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12862 }
12863#endif
f0e67a1d
Z
12864 CopLINE_inc(PL_curcop);
12865 PL_bufptr = PL_bufend;
12866 if (!lex_next_chunk(0)) {
c07a80fd 12867 sv_free(sv);
eb160463 12868 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12869 return NULL;
79072805 12870 }
f0e67a1d 12871 s = PL_bufptr;
5db06880
NC
12872#ifdef PERL_MAD
12873 stuffstart = 0;
12874#endif
378cc40b 12875 }
4e553d73 12876
02aa26ce
NT
12877 /* at this point, we have successfully read the delimited string */
12878
220e2d4e 12879 if (!PL_encoding || UTF) {
5db06880
NC
12880#ifdef PERL_MAD
12881 if (PL_madskills) {
c35e046a 12882 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12883 const int len = s - tstart;
cd81e915 12884 if (PL_thisstuff)
c35e046a 12885 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12886 else
c35e046a 12887 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12888 if (!PL_thisclose && !keep_delims)
12889 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12890 }
12891#endif
12892
220e2d4e
IH
12893 if (keep_delims)
12894 sv_catpvn(sv, s, termlen);
12895 s += termlen;
12896 }
5db06880
NC
12897#ifdef PERL_MAD
12898 else {
12899 if (PL_madskills) {
c35e046a
AL
12900 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12901 const int len = s - tstart - termlen;
cd81e915 12902 if (PL_thisstuff)
c35e046a 12903 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12904 else
c35e046a 12905 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12906 if (!PL_thisclose && !keep_delims)
12907 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12908 }
12909 }
12910#endif
220e2d4e 12911 if (has_utf8 || PL_encoding)
b1c7b182 12912 SvUTF8_on(sv);
d0063567 12913
57843af0 12914 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12915
12916 /* if we allocated too much space, give some back */
93a17b20
LW
12917 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12918 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12919 SvPV_renew(sv, SvLEN(sv));
79072805 12920 }
02aa26ce
NT
12921
12922 /* decide whether this is the first or second quoted string we've read
12923 for this op
12924 */
4e553d73 12925
3280af22
NIS
12926 if (PL_lex_stuff)
12927 PL_lex_repl = sv;
79072805 12928 else
3280af22 12929 PL_lex_stuff = sv;
378cc40b
LW
12930 return s;
12931}
12932
02aa26ce
NT
12933/*
12934 scan_num
12935 takes: pointer to position in buffer
12936 returns: pointer to new position in buffer
6154021b 12937 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12938
12939 Read a number in any of the formats that Perl accepts:
12940
7fd134d9
JH
12941 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12942 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12943 0b[01](_?[01])*
12944 0[0-7](_?[0-7])*
12945 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12946
3280af22 12947 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12948 thing it reads.
12949
12950 If it reads a number without a decimal point or an exponent, it will
12951 try converting the number to an integer and see if it can do so
12952 without loss of precision.
12953*/
4e553d73 12954
378cc40b 12955char *
bfed75c6 12956Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12957{
97aff369 12958 dVAR;
bfed75c6 12959 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12960 register char *d; /* destination in temp buffer */
12961 register char *e; /* end of temp buffer */
86554af2 12962 NV nv; /* number read, as a double */
a0714e2c 12963 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12964 bool floatit; /* boolean: int or float? */
cbbf8932 12965 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12966 static char const number_too_long[] = "Number too long";
378cc40b 12967
7918f24d
NC
12968 PERL_ARGS_ASSERT_SCAN_NUM;
12969
02aa26ce
NT
12970 /* We use the first character to decide what type of number this is */
12971
378cc40b 12972 switch (*s) {
79072805 12973 default:
cea2e8a9 12974 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12975
02aa26ce 12976 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12977 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12978 case '0':
12979 {
02aa26ce
NT
12980 /* variables:
12981 u holds the "number so far"
4f19785b
WSI
12982 shift the power of 2 of the base
12983 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12984 overflowed was the number more than we can hold?
12985
12986 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12987 we in octal/hex/binary?" indicator to disallow hex characters
12988 when in octal mode.
02aa26ce 12989 */
9e24b6e2
JH
12990 NV n = 0.0;
12991 UV u = 0;
79072805 12992 I32 shift;
9e24b6e2 12993 bool overflowed = FALSE;
61f33854 12994 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12995 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12996 static const char* const bases[5] =
12997 { "", "binary", "", "octal", "hexadecimal" };
12998 static const char* const Bases[5] =
12999 { "", "Binary", "", "Octal", "Hexadecimal" };
13000 static const char* const maxima[5] =
13001 { "",
13002 "0b11111111111111111111111111111111",
13003 "",
13004 "037777777777",
13005 "0xffffffff" };
bfed75c6 13006 const char *base, *Base, *max;
378cc40b 13007
02aa26ce 13008 /* check for hex */
a674e8db 13009 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
13010 shift = 4;
13011 s += 2;
61f33854 13012 just_zero = FALSE;
a674e8db 13013 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13014 shift = 1;
13015 s += 2;
61f33854 13016 just_zero = FALSE;
378cc40b 13017 }
02aa26ce 13018 /* check for a decimal in disguise */
b78218b7 13019 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13020 goto decimal;
02aa26ce 13021 /* so it must be octal */
928753ea 13022 else {
378cc40b 13023 shift = 3;
928753ea
JH
13024 s++;
13025 }
13026
13027 if (*s == '_') {
a2a5de95 13028 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13029 "Misplaced _ in number");
13030 lastub = s++;
13031 }
9e24b6e2
JH
13032
13033 base = bases[shift];
13034 Base = Bases[shift];
13035 max = maxima[shift];
02aa26ce 13036
4f19785b 13037 /* read the rest of the number */
378cc40b 13038 for (;;) {
9e24b6e2 13039 /* x is used in the overflow test,
893fe2c2 13040 b is the digit we're adding on. */
9e24b6e2 13041 UV x, b;
55497cff 13042
378cc40b 13043 switch (*s) {
02aa26ce
NT
13044
13045 /* if we don't mention it, we're done */
378cc40b
LW
13046 default:
13047 goto out;
02aa26ce 13048
928753ea 13049 /* _ are ignored -- but warned about if consecutive */
de3bb511 13050 case '_':
a2a5de95
NC
13051 if (lastub && s == lastub + 1)
13052 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13053 "Misplaced _ in number");
928753ea 13054 lastub = s++;
de3bb511 13055 break;
02aa26ce
NT
13056
13057 /* 8 and 9 are not octal */
378cc40b 13058 case '8': case '9':
4f19785b 13059 if (shift == 3)
cea2e8a9 13060 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13061 /* FALL THROUGH */
02aa26ce
NT
13062
13063 /* octal digits */
4f19785b 13064 case '2': case '3': case '4':
378cc40b 13065 case '5': case '6': case '7':
4f19785b 13066 if (shift == 1)
cea2e8a9 13067 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13068 /* FALL THROUGH */
13069
13070 case '0': case '1':
02aa26ce 13071 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13072 goto digit;
02aa26ce
NT
13073
13074 /* hex digits */
378cc40b
LW
13075 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13076 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13077 /* make sure they said 0x */
378cc40b
LW
13078 if (shift != 4)
13079 goto out;
55497cff 13080 b = (*s++ & 7) + 9;
02aa26ce
NT
13081
13082 /* Prepare to put the digit we have onto the end
13083 of the number so far. We check for overflows.
13084 */
13085
55497cff 13086 digit:
61f33854 13087 just_zero = FALSE;
9e24b6e2
JH
13088 if (!overflowed) {
13089 x = u << shift; /* make room for the digit */
13090
13091 if ((x >> shift) != u
13092 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13093 overflowed = TRUE;
13094 n = (NV) u;
9b387841
NC
13095 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13096 "Integer overflow in %s number",
13097 base);
9e24b6e2
JH
13098 } else
13099 u = x | b; /* add the digit to the end */
13100 }
13101 if (overflowed) {
13102 n *= nvshift[shift];
13103 /* If an NV has not enough bits in its
13104 * mantissa to represent an UV this summing of
13105 * small low-order numbers is a waste of time
13106 * (because the NV cannot preserve the
13107 * low-order bits anyway): we could just
13108 * remember when did we overflow and in the
13109 * end just multiply n by the right
13110 * amount. */
13111 n += (NV) b;
55497cff 13112 }
378cc40b
LW
13113 break;
13114 }
13115 }
02aa26ce
NT
13116
13117 /* if we get here, we had success: make a scalar value from
13118 the number.
13119 */
378cc40b 13120 out:
928753ea
JH
13121
13122 /* final misplaced underbar check */
13123 if (s[-1] == '_') {
a2a5de95 13124 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13125 }
13126
9e24b6e2 13127 if (overflowed) {
a2a5de95
NC
13128 if (n > 4294967295.0)
13129 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13130 "%s number > %s non-portable",
13131 Base, max);
b081dd7e 13132 sv = newSVnv(n);
9e24b6e2
JH
13133 }
13134 else {
15041a67 13135#if UVSIZE > 4
a2a5de95
NC
13136 if (u > 0xffffffff)
13137 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13138 "%s number > %s non-portable",
13139 Base, max);
2cc4c2dc 13140#endif
b081dd7e 13141 sv = newSVuv(u);
9e24b6e2 13142 }
61f33854 13143 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13144 sv = new_constant(start, s - start, "integer",
eb0d8d16 13145 sv, NULL, NULL, 0);
61f33854 13146 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13147 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13148 }
13149 break;
02aa26ce
NT
13150
13151 /*
13152 handle decimal numbers.
13153 we're also sent here when we read a 0 as the first digit
13154 */
378cc40b
LW
13155 case '1': case '2': case '3': case '4': case '5':
13156 case '6': case '7': case '8': case '9': case '.':
13157 decimal:
3280af22
NIS
13158 d = PL_tokenbuf;
13159 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13160 floatit = FALSE;
02aa26ce
NT
13161
13162 /* read next group of digits and _ and copy into d */
de3bb511 13163 while (isDIGIT(*s) || *s == '_') {
4e553d73 13164 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13165 if -w is on
13166 */
93a17b20 13167 if (*s == '_') {
a2a5de95
NC
13168 if (lastub && s == lastub + 1)
13169 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13170 "Misplaced _ in number");
928753ea 13171 lastub = s++;
93a17b20 13172 }
fc36a67e 13173 else {
02aa26ce 13174 /* check for end of fixed-length buffer */
fc36a67e 13175 if (d >= e)
cea2e8a9 13176 Perl_croak(aTHX_ number_too_long);
02aa26ce 13177 /* if we're ok, copy the character */
378cc40b 13178 *d++ = *s++;
fc36a67e 13179 }
378cc40b 13180 }
02aa26ce
NT
13181
13182 /* final misplaced underbar check */
928753ea 13183 if (lastub && s == lastub + 1) {
a2a5de95 13184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13185 }
02aa26ce
NT
13186
13187 /* read a decimal portion if there is one. avoid
13188 3..5 being interpreted as the number 3. followed
13189 by .5
13190 */
2f3197b3 13191 if (*s == '.' && s[1] != '.') {
79072805 13192 floatit = TRUE;
378cc40b 13193 *d++ = *s++;
02aa26ce 13194
928753ea 13195 if (*s == '_') {
a2a5de95
NC
13196 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13197 "Misplaced _ in number");
928753ea
JH
13198 lastub = s;
13199 }
13200
13201 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13202 */
fc36a67e 13203 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13204 /* fixed length buffer check */
fc36a67e 13205 if (d >= e)
cea2e8a9 13206 Perl_croak(aTHX_ number_too_long);
928753ea 13207 if (*s == '_') {
a2a5de95
NC
13208 if (lastub && s == lastub + 1)
13209 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13210 "Misplaced _ in number");
928753ea
JH
13211 lastub = s;
13212 }
13213 else
fc36a67e 13214 *d++ = *s;
378cc40b 13215 }
928753ea
JH
13216 /* fractional part ending in underbar? */
13217 if (s[-1] == '_') {
a2a5de95
NC
13218 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13219 "Misplaced _ in number");
928753ea 13220 }
dd629d5b
GS
13221 if (*s == '.' && isDIGIT(s[1])) {
13222 /* oops, it's really a v-string, but without the "v" */
f4758303 13223 s = start;
dd629d5b
GS
13224 goto vstring;
13225 }
378cc40b 13226 }
02aa26ce
NT
13227
13228 /* read exponent part, if present */
3792a11b 13229 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13230 floatit = TRUE;
13231 s++;
02aa26ce
NT
13232
13233 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13234 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13235
7fd134d9
JH
13236 /* stray preinitial _ */
13237 if (*s == '_') {
a2a5de95
NC
13238 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13239 "Misplaced _ in number");
7fd134d9
JH
13240 lastub = s++;
13241 }
13242
02aa26ce 13243 /* allow positive or negative exponent */
378cc40b
LW
13244 if (*s == '+' || *s == '-')
13245 *d++ = *s++;
02aa26ce 13246
7fd134d9
JH
13247 /* stray initial _ */
13248 if (*s == '_') {
a2a5de95
NC
13249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13250 "Misplaced _ in number");
7fd134d9
JH
13251 lastub = s++;
13252 }
13253
7fd134d9
JH
13254 /* read digits of exponent */
13255 while (isDIGIT(*s) || *s == '_') {
13256 if (isDIGIT(*s)) {
13257 if (d >= e)
13258 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13259 *d++ = *s++;
7fd134d9
JH
13260 }
13261 else {
041457d9 13262 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13263 (!isDIGIT(s[1]) && s[1] != '_')))
13264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13265 "Misplaced _ in number");
b3b48e3e 13266 lastub = s++;
7fd134d9 13267 }
7fd134d9 13268 }
378cc40b 13269 }
02aa26ce 13270
02aa26ce 13271
0b7fceb9 13272 /*
58bb9ec3
NC
13273 We try to do an integer conversion first if no characters
13274 indicating "float" have been found.
0b7fceb9
MU
13275 */
13276
13277 if (!floatit) {
58bb9ec3 13278 UV uv;
6136c704 13279 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13280
13281 if (flags == IS_NUMBER_IN_UV) {
13282 if (uv <= IV_MAX)
b081dd7e 13283 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13284 else
b081dd7e 13285 sv = newSVuv(uv);
58bb9ec3
NC
13286 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13287 if (uv <= (UV) IV_MIN)
b081dd7e 13288 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13289 else
13290 floatit = TRUE;
13291 } else
13292 floatit = TRUE;
13293 }
0b7fceb9 13294 if (floatit) {
58bb9ec3
NC
13295 /* terminate the string */
13296 *d = '\0';
86554af2 13297 nv = Atof(PL_tokenbuf);
b081dd7e 13298 sv = newSVnv(nv);
86554af2 13299 }
86554af2 13300
eb0d8d16
NC
13301 if ( floatit
13302 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13303 const char *const key = floatit ? "float" : "integer";
13304 const STRLEN keylen = floatit ? 5 : 7;
13305 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13306 key, keylen, sv, NULL, NULL, 0);
13307 }
378cc40b 13308 break;
0b7fceb9 13309
e312add1 13310 /* if it starts with a v, it could be a v-string */
a7cb1f99 13311 case 'v':
dd629d5b 13312vstring:
561b68a9 13313 sv = newSV(5); /* preallocate storage space */
65b06e02 13314 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13315 break;
79072805 13316 }
a687059c 13317
02aa26ce
NT
13318 /* make the op for the constant and return */
13319
a86a20aa 13320 if (sv)
b73d6f50 13321 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13322 else
5f66b61c 13323 lvalp->opval = NULL;
a687059c 13324
73d840c0 13325 return (char *)s;
378cc40b
LW
13326}
13327
76e3520e 13328STATIC char *
cea2e8a9 13329S_scan_formline(pTHX_ register char *s)
378cc40b 13330{
97aff369 13331 dVAR;
79072805 13332 register char *eol;
378cc40b 13333 register char *t;
6136c704 13334 SV * const stuff = newSVpvs("");
79072805 13335 bool needargs = FALSE;
c5ee2135 13336 bool eofmt = FALSE;
5db06880
NC
13337#ifdef PERL_MAD
13338 char *tokenstart = s;
4f61fd4b
JC
13339 SV* savewhite = NULL;
13340
5db06880 13341 if (PL_madskills) {
cd81e915
NC
13342 savewhite = PL_thiswhite;
13343 PL_thiswhite = 0;
5db06880
NC
13344 }
13345#endif
378cc40b 13346
7918f24d
NC
13347 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13348
79072805 13349 while (!needargs) {
a1b95068 13350 if (*s == '.') {
c35e046a 13351 t = s+1;
51882d45 13352#ifdef PERL_STRICT_CR
c35e046a
AL
13353 while (SPACE_OR_TAB(*t))
13354 t++;
51882d45 13355#else
c35e046a
AL
13356 while (SPACE_OR_TAB(*t) || *t == '\r')
13357 t++;
51882d45 13358#endif
c5ee2135
WL
13359 if (*t == '\n' || t == PL_bufend) {
13360 eofmt = TRUE;
79072805 13361 break;
c5ee2135 13362 }
79072805 13363 }
3280af22 13364 if (PL_in_eval && !PL_rsfp) {
07409e01 13365 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13366 if (!eol++)
3280af22 13367 eol = PL_bufend;
0f85fab0
LW
13368 }
13369 else
3280af22 13370 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13371 if (*s != '#') {
a0d0e21e
LW
13372 for (t = s; t < eol; t++) {
13373 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13374 needargs = FALSE;
13375 goto enough; /* ~~ must be first line in formline */
378cc40b 13376 }
a0d0e21e
LW
13377 if (*t == '@' || *t == '^')
13378 needargs = TRUE;
378cc40b 13379 }
7121b347
MG
13380 if (eol > s) {
13381 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13382#ifndef PERL_STRICT_CR
7121b347
MG
13383 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13384 char *end = SvPVX(stuff) + SvCUR(stuff);
13385 end[-2] = '\n';
13386 end[-1] = '\0';
b162af07 13387 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13388 }
2dc4c65b 13389#endif
7121b347
MG
13390 }
13391 else
13392 break;
79072805 13393 }
95a20fc0 13394 s = (char*)eol;
3280af22 13395 if (PL_rsfp) {
f0e67a1d 13396 bool got_some;
5db06880
NC
13397#ifdef PERL_MAD
13398 if (PL_madskills) {
cd81e915
NC
13399 if (PL_thistoken)
13400 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13401 else
cd81e915 13402 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13403 }
13404#endif
f0e67a1d
Z
13405 PL_bufptr = PL_bufend;
13406 CopLINE_inc(PL_curcop);
13407 got_some = lex_next_chunk(0);
13408 CopLINE_dec(PL_curcop);
13409 s = PL_bufptr;
5db06880 13410#ifdef PERL_MAD
f0e67a1d 13411 tokenstart = PL_bufptr;
5db06880 13412#endif
f0e67a1d 13413 if (!got_some)
378cc40b 13414 break;
378cc40b 13415 }
463ee0b2 13416 incline(s);
79072805 13417 }
a0d0e21e
LW
13418 enough:
13419 if (SvCUR(stuff)) {
3280af22 13420 PL_expect = XTERM;
79072805 13421 if (needargs) {
3280af22 13422 PL_lex_state = LEX_NORMAL;
cd81e915 13423 start_force(PL_curforce);
9ded7720 13424 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13425 force_next(',');
13426 }
a0d0e21e 13427 else
3280af22 13428 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13429 if (!IN_BYTES) {
95a20fc0 13430 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13431 SvUTF8_on(stuff);
13432 else if (PL_encoding)
13433 sv_recode_to_utf8(stuff, PL_encoding);
13434 }
cd81e915 13435 start_force(PL_curforce);
9ded7720 13436 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13437 force_next(THING);
cd81e915 13438 start_force(PL_curforce);
9ded7720 13439 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13440 force_next(LSTOP);
378cc40b 13441 }
79072805 13442 else {
8990e307 13443 SvREFCNT_dec(stuff);
c5ee2135
WL
13444 if (eofmt)
13445 PL_lex_formbrack = 0;
3280af22 13446 PL_bufptr = s;
79072805 13447 }
5db06880
NC
13448#ifdef PERL_MAD
13449 if (PL_madskills) {
cd81e915
NC
13450 if (PL_thistoken)
13451 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13452 else
cd81e915
NC
13453 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13454 PL_thiswhite = savewhite;
5db06880
NC
13455 }
13456#endif
79072805 13457 return s;
378cc40b 13458}
a687059c 13459
ba6d6ac9 13460I32
864dbfa3 13461Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13462{
97aff369 13463 dVAR;
a3b680e6 13464 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13465 CV* const outsidecv = PL_compcv;
8990e307 13466
3280af22
NIS
13467 if (PL_compcv) {
13468 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13469 }
7766f137 13470 SAVEI32(PL_subline);
3280af22 13471 save_item(PL_subname);
3280af22 13472 SAVESPTR(PL_compcv);
3280af22 13473
ea726b52 13474 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13475 CvFLAGS(PL_compcv) |= flags;
13476
57843af0 13477 PL_subline = CopLINE(PL_curcop);
dd2155a4 13478 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13479 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13480 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13481
8990e307
LW
13482 return oldsavestack_ix;
13483}
13484
084592ab
CN
13485#ifdef __SC__
13486#pragma segment Perl_yylex
13487#endif
af41e527
NC
13488static int
13489S_yywarn(pTHX_ const char *const s)
8990e307 13490{
97aff369 13491 dVAR;
7918f24d
NC
13492
13493 PERL_ARGS_ASSERT_YYWARN;
13494
faef0170 13495 PL_in_eval |= EVAL_WARNONLY;
748a9306 13496 yyerror(s);
faef0170 13497 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13498 return 0;
8990e307
LW
13499}
13500
13501int
15f169a1 13502Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13503{
97aff369 13504 dVAR;
bfed75c6
AL
13505 const char *where = NULL;
13506 const char *context = NULL;
68dc0745 13507 int contlen = -1;
46fc3d4c 13508 SV *msg;
5912531f 13509 int yychar = PL_parser->yychar;
463ee0b2 13510
7918f24d
NC
13511 PERL_ARGS_ASSERT_YYERROR;
13512
3280af22 13513 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13514 where = "at EOF";
8bcfe651
TM
13515 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13516 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13517 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13518 /*
13519 Only for NetWare:
13520 The code below is removed for NetWare because it abends/crashes on NetWare
13521 when the script has error such as not having the closing quotes like:
13522 if ($var eq "value)
13523 Checking of white spaces is anyway done in NetWare code.
13524 */
13525#ifndef NETWARE
3280af22
NIS
13526 while (isSPACE(*PL_oldoldbufptr))
13527 PL_oldoldbufptr++;
f355267c 13528#endif
3280af22
NIS
13529 context = PL_oldoldbufptr;
13530 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13531 }
8bcfe651
TM
13532 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13533 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13534 /*
13535 Only for NetWare:
13536 The code below is removed for NetWare because it abends/crashes on NetWare
13537 when the script has error such as not having the closing quotes like:
13538 if ($var eq "value)
13539 Checking of white spaces is anyway done in NetWare code.
13540 */
13541#ifndef NETWARE
3280af22
NIS
13542 while (isSPACE(*PL_oldbufptr))
13543 PL_oldbufptr++;
f355267c 13544#endif
3280af22
NIS
13545 context = PL_oldbufptr;
13546 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13547 }
13548 else if (yychar > 255)
68dc0745 13549 where = "next token ???";
12fbd33b 13550 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13551 if (PL_lex_state == LEX_NORMAL ||
13552 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13553 where = "at end of line";
3280af22 13554 else if (PL_lex_inpat)
68dc0745 13555 where = "within pattern";
463ee0b2 13556 else
68dc0745 13557 where = "within string";
463ee0b2 13558 }
46fc3d4c 13559 else {
84bafc02 13560 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13561 if (yychar < 32)
cea2e8a9 13562 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13563 else if (isPRINT_LC(yychar)) {
88c9ea1e 13564 const char string = yychar;
5e7aa789
NC
13565 sv_catpvn(where_sv, &string, 1);
13566 }
463ee0b2 13567 else
cea2e8a9 13568 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13569 where = SvPVX_const(where_sv);
463ee0b2 13570 }
46fc3d4c 13571 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13572 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13573 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13574 if (context)
cea2e8a9 13575 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13576 else
cea2e8a9 13577 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13578 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13579 Perl_sv_catpvf(aTHX_ msg,
57def98f 13580 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13581 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13582 PL_multi_end = 0;
a0d0e21e 13583 }
500960a6 13584 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13585 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13586 }
463ee0b2 13587 else
5a844595 13588 qerror(msg);
c7d6bfb2
GS
13589 if (PL_error_count >= 10) {
13590 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13591 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13592 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13593 else
13594 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13595 OutCopFILE(PL_curcop));
c7d6bfb2 13596 }
3280af22 13597 PL_in_my = 0;
5c284bb0 13598 PL_in_my_stash = NULL;
463ee0b2
LW
13599 return 0;
13600}
084592ab
CN
13601#ifdef __SC__
13602#pragma segment Main
13603#endif
4e35701f 13604
b250498f 13605STATIC char*
3ae08724 13606S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13607{
97aff369 13608 dVAR;
f54cb97a 13609 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13610
13611 PERL_ARGS_ASSERT_SWALLOW_BOM;
13612
7aa207d6 13613 switch (s[0]) {
4e553d73
NIS
13614 case 0xFF:
13615 if (s[1] == 0xFE) {
ee6ba15d 13616 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13617 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13618 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13619#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13620 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13621 s += 2;
dea0fc0b 13622 if (PL_bufend > (char*)s) {
81a923f4 13623 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13624 }
b250498f 13625#else
ee6ba15d 13626 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13627#endif
01ec43d0
GS
13628 }
13629 break;
78ae23f5 13630 case 0xFE:
7aa207d6 13631 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13632#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13633 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13634 s += 2;
13635 if (PL_bufend > (char *)s) {
81a923f4 13636 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13637 }
b250498f 13638#else
ee6ba15d 13639 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13640#endif
01ec43d0
GS
13641 }
13642 break;
3ae08724
GS
13643 case 0xEF:
13644 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13645 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13646 s += 3; /* UTF-8 */
13647 }
13648 break;
13649 case 0:
7aa207d6
JH
13650 if (slen > 3) {
13651 if (s[1] == 0) {
13652 if (s[2] == 0xFE && s[3] == 0xFF) {
13653 /* UTF-32 big-endian */
ee6ba15d 13654 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13655 }
13656 }
13657 else if (s[2] == 0 && s[3] != 0) {
13658 /* Leading bytes
13659 * 00 xx 00 xx
13660 * are a good indicator of UTF-16BE. */
ee6ba15d 13661#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13662 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13663 s = add_utf16_textfilter(s, FALSE);
13664#else
13665 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13666#endif
7aa207d6 13667 }
01ec43d0 13668 }
e294cc5d
JH
13669#ifdef EBCDIC
13670 case 0xDD:
13671 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13672 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13673 s += 4; /* UTF-8 */
13674 }
13675 break;
13676#endif
13677
7aa207d6
JH
13678 default:
13679 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13680 /* Leading bytes
13681 * xx 00 xx 00
13682 * are a good indicator of UTF-16LE. */
ee6ba15d 13683#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13684 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13685 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13686#else
13687 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13688#endif
7aa207d6 13689 }
01ec43d0 13690 }
b8f84bb2 13691 return (char*)s;
b250498f 13692}
4755096e 13693
6e3aabd6
GS
13694
13695#ifndef PERL_NO_UTF16_FILTER
13696static I32
a28af015 13697S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13698{
97aff369 13699 dVAR;
f3040f2c 13700 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13701 /* We re-use this each time round, throwing the contents away before we
13702 return. */
2a773401 13703 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13704 SV *const utf8_buffer = filter;
c28d6105 13705 IV status = IoPAGE(filter);
f2338a2e 13706 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13707 I32 retval;
c8b0cbae 13708
c85ae797
NC
13709 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13710
c8b0cbae
NC
13711 /* As we're automatically added, at the lowest level, and hence only called
13712 from this file, we can be sure that we're not called in block mode. Hence
13713 don't bother writing code to deal with block mode. */
13714 if (maxlen) {
13715 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13716 }
c28d6105
NC
13717 if (status < 0) {
13718 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13719 }
1de9afcd 13720 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13721 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13722 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13723 reverse ? 'l' : 'b', idx, maxlen, status,
13724 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13725
13726 while (1) {
13727 STRLEN chars;
13728 STRLEN have;
dea0fc0b 13729 I32 newlen;
2a773401 13730 U8 *end;
c28d6105
NC
13731 /* First, look in our buffer of existing UTF-8 data: */
13732 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13733
13734 if (nl) {
13735 ++nl;
13736 } else if (status == 0) {
13737 /* EOF */
13738 IoPAGE(filter) = 0;
13739 nl = SvEND(utf8_buffer);
13740 }
13741 if (nl) {
d2d1d4de
NC
13742 STRLEN got = nl - SvPVX(utf8_buffer);
13743 /* Did we have anything to append? */
13744 retval = got != 0;
13745 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13746 /* Everything else in this code works just fine if SVp_POK isn't
13747 set. This, however, needs it, and we need it to work, else
13748 we loop infinitely because the buffer is never consumed. */
13749 sv_chop(utf8_buffer, nl);
13750 break;
13751 }
ba77e4cc 13752
c28d6105
NC
13753 /* OK, not a complete line there, so need to read some more UTF-16.
13754 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13755 while (1) {
13756 if (status <= 0)
13757 break;
13758 if (SvCUR(utf16_buffer) >= 2) {
13759 /* Location of the high octet of the last complete code point.
13760 Gosh, UTF-16 is a pain. All the benefits of variable length,
13761 *coupled* with all the benefits of partial reads and
13762 endianness. */
13763 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13764 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13765
13766 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13767 break;
13768 }
13769
13770 /* We have the first half of a surrogate. Read more. */
13771 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13772 }
c28d6105 13773
c28d6105
NC
13774 status = FILTER_READ(idx + 1, utf16_buffer,
13775 160 + (SvCUR(utf16_buffer) & 1));
13776 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13777 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13778 if (status < 0) {
13779 /* Error */
13780 IoPAGE(filter) = status;
13781 return status;
13782 }
13783 }
13784
13785 chars = SvCUR(utf16_buffer) >> 1;
13786 have = SvCUR(utf8_buffer);
13787 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13788
aa6dbd60 13789 if (reverse) {
c28d6105
NC
13790 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13791 (U8*)SvPVX_const(utf8_buffer) + have,
13792 chars * 2, &newlen);
aa6dbd60 13793 } else {
2a773401 13794 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13795 (U8*)SvPVX_const(utf8_buffer) + have,
13796 chars * 2, &newlen);
2a773401 13797 }
c28d6105 13798 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13799 *end = '\0';
c28d6105 13800
e07286ed
NC
13801 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13802 it's private to us, and utf16_to_utf8{,reversed} take a
13803 (pointer,length) pair, rather than a NUL-terminated string. */
13804 if(SvCUR(utf16_buffer) & 1) {
13805 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13806 SvCUR_set(utf16_buffer, 1);
13807 } else {
13808 SvCUR_set(utf16_buffer, 0);
13809 }
2a773401 13810 }
c28d6105
NC
13811 DEBUG_P(PerlIO_printf(Perl_debug_log,
13812 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13813 status,
13814 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13815 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13816 return retval;
6e3aabd6 13817}
81a923f4
NC
13818
13819static U8 *
13820S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13821{
2a773401 13822 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13823
c85ae797
NC
13824 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13825
c28d6105 13826 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13827 sv_setpvs(filter, "");
2a773401 13828 IoLINES(filter) = reversed;
c28d6105
NC
13829 IoPAGE(filter) = 1; /* Not EOF */
13830
13831 /* Sadly, we have to return a valid pointer, come what may, so we have to
13832 ignore any error return from this. */
13833 SvCUR_set(PL_linestr, 0);
13834 if (FILTER_READ(0, PL_linestr, 0)) {
13835 SvUTF8_on(PL_linestr);
81a923f4 13836 } else {
c28d6105 13837 SvUTF8_on(PL_linestr);
81a923f4 13838 }
c28d6105 13839 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13840 return (U8*)SvPVX(PL_linestr);
13841}
6e3aabd6 13842#endif
9f4817db 13843
f333445c
JP
13844/*
13845Returns a pointer to the next character after the parsed
13846vstring, as well as updating the passed in sv.
13847
13848Function must be called like
13849
561b68a9 13850 sv = newSV(5);
65b06e02 13851 s = scan_vstring(s,e,sv);
f333445c 13852
65b06e02 13853where s and e are the start and end of the string.
f333445c
JP
13854The sv should already be large enough to store the vstring
13855passed in, for performance reasons.
13856
13857*/
13858
13859char *
15f169a1 13860Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13861{
97aff369 13862 dVAR;
bfed75c6
AL
13863 const char *pos = s;
13864 const char *start = s;
7918f24d
NC
13865
13866 PERL_ARGS_ASSERT_SCAN_VSTRING;
13867
f333445c 13868 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13869 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13870 pos++;
f333445c
JP
13871 if ( *pos != '.') {
13872 /* this may not be a v-string if followed by => */
bfed75c6 13873 const char *next = pos;
65b06e02 13874 while (next < e && isSPACE(*next))
8fc7bb1c 13875 ++next;
65b06e02 13876 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13877 /* return string not v-string */
13878 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13879 return (char *)pos;
f333445c
JP
13880 }
13881 }
13882
13883 if (!isALPHA(*pos)) {
89ebb4a3 13884 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13885
d4c19fe8
AL
13886 if (*s == 'v')
13887 s++; /* get past 'v' */
f333445c 13888
76f68e9b 13889 sv_setpvs(sv, "");
f333445c
JP
13890
13891 for (;;) {
d4c19fe8 13892 /* this is atoi() that tolerates underscores */
0bd48802
AL
13893 U8 *tmpend;
13894 UV rev = 0;
d4c19fe8
AL
13895 const char *end = pos;
13896 UV mult = 1;
13897 while (--end >= s) {
13898 if (*end != '_') {
13899 const UV orev = rev;
f333445c
JP
13900 rev += (*end - '0') * mult;
13901 mult *= 10;
9b387841
NC
13902 if (orev > rev)
13903 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13904 "Integer overflow in decimal number");
f333445c
JP
13905 }
13906 }
13907#ifdef EBCDIC
13908 if (rev > 0x7FFFFFFF)
13909 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13910#endif
13911 /* Append native character for the rev point */
13912 tmpend = uvchr_to_utf8(tmpbuf, rev);
13913 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13914 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13915 SvUTF8_on(sv);
65b06e02 13916 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13917 s = ++pos;
13918 else {
13919 s = pos;
13920 break;
13921 }
65b06e02 13922 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13923 pos++;
13924 }
13925 SvPOK_on(sv);
13926 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13927 SvRMAGICAL_on(sv);
13928 }
73d840c0 13929 return (char *)s;
f333445c
JP
13930}
13931
88e1f1a2
JV
13932int
13933Perl_keyword_plugin_standard(pTHX_
13934 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13935{
13936 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13937 PERL_UNUSED_CONTEXT;
13938 PERL_UNUSED_ARG(keyword_ptr);
13939 PERL_UNUSED_ARG(keyword_len);
13940 PERL_UNUSED_ARG(op_ptr);
13941 return KEYWORD_PLUGIN_DECLINE;
13942}
13943
1da4ca5f 13944/*
28ac2b49
Z
13945=for apidoc Amx|OP *|parse_fullstmt|U32 flags
13946
13947Parse a single complete Perl statement. This may be a normal imperative
13948statement, including optional label, or a declaration that has
13949compile-time effect. It is up to the caller to ensure that the dynamic
13950parser state (L</PL_parser> et al) is correctly set to reflect the source
13951of the code to be parsed and the lexical context for the statement.
13952
13953The op tree representing the statement is returned. This may be a
13954null pointer if the statement is null, for example if it was actually
13955a subroutine definition (which has compile-time side effects). If not
13956null, it will be the result of a L</newSTATEOP> call, normally including
13957a C<nextstate> or equivalent op.
13958
13959If an error occurs in parsing or compilation, in most cases a valid op
13960tree (most likely null) is returned anyway. The error is reflected in
13961the parser state, normally resulting in a single exception at the top
13962level of parsing which covers all the compilation errors that occurred.
13963Some compilation errors, however, will throw an exception immediately.
13964
13965The I<flags> parameter is reserved for future use, and must always
13966be zero.
13967
13968=cut
13969*/
13970
13971OP *
13972Perl_parse_fullstmt(pTHX_ U32 flags)
13973{
13974 OP *fullstmtop;
13975 if (flags)
13976 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13977 ENTER;
13978 SAVEVPTR(PL_eval_root);
13979 PL_eval_root = NULL;
13980 if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
13981 qerror(Perl_mess(aTHX_ "Parse error"));
13982 fullstmtop = PL_eval_root;
13983 LEAVE;
13984 return fullstmtop;
13985}
13986
ea25a9b2 13987void
f7e3d326 13988Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 13989{
f7e3d326 13990 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2
Z
13991 deprecate("qw(...) as parentheses");
13992 force_next(')');
13993 if (qwlist->op_type == OP_STUB) {
13994 op_free(qwlist);
13995 }
13996 else {
3d8e05a0 13997 start_force(PL_curforce);
ea25a9b2
Z
13998 NEXTVAL_NEXTTOKE.opval = qwlist;
13999 force_next(THING);
14000 }
14001 force_next('(');
14002}
14003
28ac2b49 14004/*
1da4ca5f
NC
14005 * Local variables:
14006 * c-indentation-style: bsd
14007 * c-basic-offset: 4
14008 * indent-tabs-mode: t
14009 * End:
14010 *
37442d52
RGS
14011 * ex: set ts=8 sts=4 sw=4 noet:
14012 */