This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
An implementation of change 29735 for blead (PL_curcop could be NULL)
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
52
53#ifdef PERL_MAD
54# define PL_endwhite (PL_parser->endwhite)
55# define PL_faketokens (PL_parser->faketokens)
56# define PL_lasttoke (PL_parser->lasttoke)
57# define PL_nextwhite (PL_parser->nextwhite)
58# define PL_realtokenstart (PL_parser->realtokenstart)
59# define PL_skipwhite (PL_parser->skipwhite)
60# define PL_thisclose (PL_parser->thisclose)
61# define PL_thismad (PL_parser->thismad)
62# define PL_thisopen (PL_parser->thisopen)
63# define PL_thisstuff (PL_parser->thisstuff)
64# define PL_thistoken (PL_parser->thistoken)
65# define PL_thiswhite (PL_parser->thiswhite)
66#endif
67
3cbf51f5
DM
68static int
69S_pending_ident(pTHX);
199e78b7 70
0bd48802 71static const char ident_too_long[] = "Identifier too long";
c445ea15 72static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 73
acfe0abc 74static void restore_rsfp(pTHX_ void *f);
6e3aabd6 75#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
76static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 78#endif
51371543 79
29595ff2 80#ifdef PERL_MAD
29595ff2 81# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 82# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 83#else
5db06880 84# define CURMAD(slot,sv)
9ded7720 85# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
86#endif
87
9059aa12
LW
88#define XFAKEBRACK 128
89#define XENUMMASK 127
90
39e02b42
JH
91#ifdef USE_UTF8_SCRIPTS
92# define UTF (!IN_BYTES)
2b9d42f0 93#else
746b446a 94# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 95#endif
a0ed51b3 96
61f0cdd9 97/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
98 * 1999-02-27 mjd-perl-patch@plover.com */
99#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
100
bf4acbe4
GS
101/* On MacOS, respect nonbreaking spaces */
102#ifdef MACOS_TRADITIONAL
103#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
104#else
105#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
106#endif
107
ffb4593c
NT
108/* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
79072805
LW
110 * can get by with a single comparison (if the compiler is smart enough).
111 */
112
fb73857a 113/* #define LEX_NOTPARSING 11 is done in perl.h. */
114
b6007c36
DM
115#define LEX_NORMAL 10 /* normal code (ie not within "...") */
116#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119#define LEX_INTERPSTART 6 /* expecting the start of a $var */
120
121 /* at end of code, eg "$x" followed by: */
122#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
124
125#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127#define LEX_INTERPCONST 2 /* NOT USED */
128#define LEX_FORMLINE 1 /* expecting a format line */
129#define LEX_KNOWNEXT 0 /* next token known; just return it */
130
79072805 131
bbf60fe6 132#ifdef DEBUGGING
27da23d5 133static const char* const lex_state_names[] = {
bbf60fe6
DM
134 "KNOWNEXT",
135 "FORMLINE",
136 "INTERPCONST",
137 "INTERPCONCAT",
138 "INTERPENDMAYBE",
139 "INTERPEND",
140 "INTERPSTART",
141 "INTERPPUSH",
142 "INTERPCASEMOD",
143 "INTERPNORMAL",
144 "NORMAL"
145};
146#endif
147
79072805
LW
148#ifdef ff_next
149#undef ff_next
d48672a2
LW
150#endif
151
79072805 152#include "keywords.h"
fe14fcc3 153
ffb4593c
NT
154/* CLINE is a macro that ensures PL_copline has a sane value */
155
ae986130
LW
156#ifdef CLINE
157#undef CLINE
158#endif
57843af0 159#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 160
5db06880 161#ifdef PERL_MAD
29595ff2
NC
162# define SKIPSPACE0(s) skipspace0(s)
163# define SKIPSPACE1(s) skipspace1(s)
164# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165# define PEEKSPACE(s) skipspace2(s,0)
166#else
167# define SKIPSPACE0(s) skipspace(s)
168# define SKIPSPACE1(s) skipspace(s)
169# define SKIPSPACE2(s,tsv) skipspace(s)
170# define PEEKSPACE(s) skipspace(s)
171#endif
172
ffb4593c
NT
173/*
174 * Convenience functions to return different tokens and prime the
9cbb5ea2 175 * lexer for the next token. They all take an argument.
ffb4593c
NT
176 *
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
2d2e263d 187 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
188 * BOop : bitwise or or xor
189 * BAop : bitwise and
190 * SHop : shift operator
191 * PWop : power operator
9cbb5ea2 192 * PMop : pattern-matching operator
ffb4593c
NT
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
e5edeb50 196 * Rop : relational operator <= != gt
ffb4593c
NT
197 *
198 * Also see LOP and lop() below.
199 */
200
998054bd 201#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 202# define REPORT(retval) tokereport((I32)retval)
998054bd 203#else
bbf60fe6 204# define REPORT(retval) (retval)
998054bd
SC
205#endif
206
bbf60fe6
DM
207#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 227
a687059c
LW
228/* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
a687059c 232 */
376fcdbf
AL
233#define UNI2(f,x) { \
234 yylval.ival = f; \
235 PL_expect = x; \
236 PL_bufptr = s; \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
239 if (*s == '(') \
240 return REPORT( (int)FUNC1 ); \
29595ff2 241 s = PEEKSPACE(s); \
376fcdbf
AL
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
243 }
6f33ba73
RGS
244#define UNI(f) UNI2(f,XTERM)
245#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 246
376fcdbf
AL
247#define UNIBRACK(f) { \
248 yylval.ival = f; \
249 PL_bufptr = s; \
250 PL_last_uni = PL_oldbufptr; \
251 if (*s == '(') \
252 return REPORT( (int)FUNC1 ); \
29595ff2 253 s = PEEKSPACE(s); \
376fcdbf
AL
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
255 }
79072805 256
9f68db38 257/* grandfather return to old style */
3280af22 258#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 259
8fa7f367
JH
260#ifdef DEBUGGING
261
bbf60fe6
DM
262/* how to interpret the yylval associated with the token */
263enum token_type {
264 TOKENTYPE_NONE,
265 TOKENTYPE_IVAL,
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
267 TOKENTYPE_PVAL,
268 TOKENTYPE_OPVAL,
269 TOKENTYPE_GVVAL
270};
271
6d4a66ac
NC
272static struct debug_tokens {
273 const int token;
274 enum token_type type;
275 const char *name;
276} const debug_tokens[] =
9041c2e3 277{
bbf60fe6
DM
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 343 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
344};
345
346/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 347
bbf60fe6 348STATIC int
f5bd084c 349S_tokereport(pTHX_ I32 rv)
bbf60fe6 350{
97aff369 351 dVAR;
bbf60fe6 352 if (DEBUG_T_TEST) {
bd61b366 353 const char *name = NULL;
bbf60fe6 354 enum token_type type = TOKENTYPE_NONE;
f54cb97a 355 const struct debug_tokens *p;
396482e1 356 SV* const report = newSVpvs("<== ");
bbf60fe6 357
f54cb97a 358 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
359 if (p->token == (int)rv) {
360 name = p->name;
361 type = p->type;
362 break;
363 }
364 }
365 if (name)
54667de8 366 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
369 else if (!rv)
396482e1 370 sv_catpvs(report, "EOF");
bbf60fe6
DM
371 else
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
373 switch (type) {
374 case TOKENTYPE_NONE:
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
376 break;
377 case TOKENTYPE_IVAL:
e4584336 378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
379 break;
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
383 break;
384 case TOKENTYPE_PVAL:
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
386 break;
387 case TOKENTYPE_OPVAL:
b6007c36 388 if (yylval.opval) {
401441c0 389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 390 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
394 }
395
396 }
401441c0 397 else
396482e1 398 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
399 break;
400 }
b6007c36 401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
402 };
403 return (int)rv;
998054bd
SC
404}
405
b6007c36
DM
406
407/* print the buffer with suitable escapes */
408
409STATIC void
410S_printbuf(pTHX_ const char* fmt, const char* s)
411{
396482e1 412 SV* const tmp = newSVpvs("");
b6007c36
DM
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
414 SvREFCNT_dec(tmp);
415}
416
8fa7f367
JH
417#endif
418
ffb4593c
NT
419/*
420 * S_ao
421 *
c963b151
BD
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
424 */
425
76e3520e 426STATIC int
cea2e8a9 427S_ao(pTHX_ int toketype)
a0d0e21e 428{
97aff369 429 dVAR;
3280af22
NIS
430 if (*PL_bufptr == '=') {
431 PL_bufptr++;
a0d0e21e
LW
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
c963b151
BD
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
438 toketype = ASSIGNOP;
439 }
440 return toketype;
441}
442
ffb4593c
NT
443/*
444 * S_no_op
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
454 */
455
76e3520e 456STATIC void
bfed75c6 457S_no_op(pTHX_ const char *what, char *s)
463ee0b2 458{
97aff369 459 dVAR;
9d4ba2ae
AL
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 462
1189a94a
GS
463 if (!s)
464 s = oldbp;
07c798fb 465 else
1189a94a 466 PL_bufptr = s;
cea2e8a9 467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
468 if (ckWARN_d(WARN_SYNTAX)) {
469 if (is_first)
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 473 const char *t;
c35e046a
AL
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
475 NOOP;
56da5a46
RGS
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
551405c4 479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
480 }
481 else {
482 assert(s >= oldbp);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 485 }
07c798fb 486 }
3280af22 487 PL_bufptr = oldbp;
8990e307
LW
488}
489
ffb4593c
NT
490/*
491 * S_missingterm
492 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 493 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
496 * This is fatal.
497 */
498
76e3520e 499STATIC void
cea2e8a9 500S_missingterm(pTHX_ char *s)
8990e307 501{
97aff369 502 dVAR;
8990e307
LW
503 char tmpbuf[3];
504 char q;
505 if (s) {
9d4ba2ae 506 char * const nl = strrchr(s,'\n');
d2719217 507 if (nl)
8990e307
LW
508 *nl = '\0';
509 }
9d116dd7
JH
510 else if (
511#ifdef EBCDIC
512 iscntrl(PL_multi_close)
513#else
514 PL_multi_close < 32 || PL_multi_close == 127
515#endif
516 ) {
8990e307 517 *tmpbuf = '^';
585ec06d 518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
519 tmpbuf[2] = '\0';
520 s = tmpbuf;
521 }
522 else {
eb160463 523 *tmpbuf = (char)PL_multi_close;
8990e307
LW
524 tmpbuf[1] = '\0';
525 s = tmpbuf;
526 }
527 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 529}
79072805 530
ef89dcc3 531#define FEATURE_IS_ENABLED(name) \
0d863452 532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
534/*
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
537 */
538STATIC bool
d4c19fe8 539S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 540{
97aff369 541 dVAR;
0d863452 542 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 543 char he_name[32] = "feature_";
6fca0082 544 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 545
7b9ef140 546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
547}
548
ffb4593c
NT
549/*
550 * Perl_deprecate
ffb4593c
NT
551 */
552
79072805 553void
bfed75c6 554Perl_deprecate(pTHX_ const char *s)
a0d0e21e 555{
599cee73 556 if (ckWARN(WARN_DEPRECATED))
9014280d 557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
558}
559
12bcd1a6 560void
bfed75c6 561Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
562{
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
565 /* */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
570
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
573 "Use of %s is deprecated", s);
574}
575
ffb4593c 576/*
9cbb5ea2
GS
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
ffb4593c
NT
579 */
580
c39cd008
GS
581#ifdef PERL_CR_FILTER
582static void
583strip_return(SV *sv)
584{
95a20fc0 585 register const char *s = SvPVX_const(sv);
9d4ba2ae 586 register const char * const e = s + SvCUR(sv);
c39cd008
GS
587 /* outer loop optimized to do nothing if there are no CR-LFs */
588 while (s < e) {
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
592 *d++ = *s++;
593 while (s < e) {
594 if (*s == '\r' && s[1] == '\n')
595 s++;
596 *d++ = *s++;
597 }
598 SvCUR(sv) -= s - d;
599 return;
600 }
601 }
602}
a868473f 603
76e3520e 604STATIC I32
c39cd008 605S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 606{
f54cb97a 607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
608 if (count > 0 && !maxlen)
609 strip_return(sv);
610 return count;
a868473f
NIS
611}
612#endif
613
199e78b7
DM
614
615
ffb4593c
NT
616/*
617 * Perl_lex_start
9cbb5ea2
GS
618 * Initialize variables. Uses the Perl save_stack to save its state (for
619 * recursive calls to the parser).
ffb4593c
NT
620 */
621
a0d0e21e 622void
864dbfa3 623Perl_lex_start(pTHX_ SV *line)
79072805 624{
97aff369 625 dVAR;
6ef55633 626 const char *s = NULL;
8990e307 627 STRLEN len;
acdf0a21
DM
628 yy_parser *parser;
629
630 /* create and initialise a parser */
631
199e78b7 632 Newxz(parser, 1, yy_parser);
acdf0a21
DM
633 parser->old_parser = PL_parser;
634 PL_parser = parser;
635
636 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
637 parser->ps = parser->stack;
638 parser->stack_size = YYINITDEPTH;
639
640 parser->stack->state = 0;
641 parser->yyerrstatus = 0;
642 parser->yychar = YYEMPTY; /* Cause a token to be read. */
643
644 /* initialise lexer state */
8990e307 645
3280af22 646 SAVEI32(PL_lex_state);
5db06880
NC
647#ifdef PERL_MAD
648 if (PL_lex_state == LEX_KNOWNEXT) {
199e78b7 649 I32 toke = parser->old_parser->lasttoke;
5db06880
NC
650 while (--toke >= 0) {
651 SAVEI32(PL_nexttoke[toke].next_type);
652 SAVEVPTR(PL_nexttoke[toke].next_val);
653 if (PL_madskills)
654 SAVEVPTR(PL_nexttoke[toke].next_mad);
655 }
5db06880 656 }
cd81e915 657 SAVEI32(PL_curforce);
5db06880 658#else
18b09519
GS
659 if (PL_lex_state == LEX_KNOWNEXT) {
660 I32 toke = PL_nexttoke;
661 while (--toke >= 0) {
662 SAVEI32(PL_nexttype[toke]);
663 SAVEVPTR(PL_nextval[toke]);
664 }
665 SAVEI32(PL_nexttoke);
18b09519 666 }
5db06880 667#endif
57843af0 668 SAVECOPLINE(PL_curcop);
3280af22
NIS
669 SAVEPPTR(PL_bufptr);
670 SAVEPPTR(PL_bufend);
671 SAVEPPTR(PL_oldbufptr);
672 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
673 SAVEPPTR(PL_last_lop);
674 SAVEPPTR(PL_last_uni);
3280af22
NIS
675 SAVEPPTR(PL_linestart);
676 SAVESPTR(PL_linestr);
c76ac1ee 677 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
bebdddfc 678 SAVEINT(PL_expect);
3280af22
NIS
679
680 PL_lex_state = LEX_NORMAL;
3280af22 681 PL_expect = XSTATE;
199e78b7
DM
682 Newx(parser->lex_brackstack, 120, char);
683 Newx(parser->lex_casestack, 12, char);
684 *parser->lex_casestack = '\0';
685#ifndef PERL_MAD
76be56bc 686 PL_nexttoke = 0;
5db06880 687#endif
02b34bbe 688
10efb74f
NC
689 if (line) {
690 s = SvPV_const(line, len);
691 } else {
692 len = 0;
693 }
694 if (!len) {
695 PL_linestr = newSVpvs("\n;");
696 } else if (SvREADONLY(line) || s[len-1] != ';') {
697 PL_linestr = newSVsv(line);
698 if (s[len-1] != ';')
0eb20fa2 699 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
700 } else {
701 SvTEMP_off(line);
702 SvREFCNT_inc_simple_void_NN(line);
703 PL_linestr = line;
8990e307 704 }
db4997f0
NC
705 /* PL_linestr needs to survive until end of scope, not just the next
706 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 707 SAVEFREESV(PL_linestr);
3280af22
NIS
708 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
709 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 710 PL_last_lop = PL_last_uni = NULL;
3280af22 711 PL_rsfp = 0;
79072805 712}
a687059c 713
ffb4593c
NT
714/*
715 * Perl_lex_end
9cbb5ea2
GS
716 * Finalizer for lexing operations. Must be called when the parser is
717 * done with the lexer.
ffb4593c
NT
718 */
719
463ee0b2 720void
864dbfa3 721Perl_lex_end(pTHX)
463ee0b2 722{
97aff369 723 dVAR;
3280af22 724 PL_doextract = FALSE;
463ee0b2
LW
725}
726
ffb4593c
NT
727/*
728 * S_incline
729 * This subroutine has nothing to do with tilting, whether at windmills
730 * or pinball tables. Its name is short for "increment line". It
57843af0 731 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 732 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
733 * # line 500 "foo.pm"
734 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
735 */
736
76e3520e 737STATIC void
cea2e8a9 738S_incline(pTHX_ char *s)
463ee0b2 739{
97aff369 740 dVAR;
463ee0b2
LW
741 char *t;
742 char *n;
73659bf1 743 char *e;
463ee0b2 744 char ch;
463ee0b2 745
57843af0 746 CopLINE_inc(PL_curcop);
463ee0b2
LW
747 if (*s++ != '#')
748 return;
d4c19fe8
AL
749 while (SPACE_OR_TAB(*s))
750 s++;
73659bf1
GS
751 if (strnEQ(s, "line", 4))
752 s += 4;
753 else
754 return;
084592ab 755 if (SPACE_OR_TAB(*s))
73659bf1 756 s++;
4e553d73 757 else
73659bf1 758 return;
d4c19fe8
AL
759 while (SPACE_OR_TAB(*s))
760 s++;
463ee0b2
LW
761 if (!isDIGIT(*s))
762 return;
d4c19fe8 763
463ee0b2
LW
764 n = s;
765 while (isDIGIT(*s))
766 s++;
bf4acbe4 767 while (SPACE_OR_TAB(*s))
463ee0b2 768 s++;
73659bf1 769 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 770 s++;
73659bf1
GS
771 e = t + 1;
772 }
463ee0b2 773 else {
c35e046a
AL
774 t = s;
775 while (!isSPACE(*t))
776 t++;
73659bf1 777 e = t;
463ee0b2 778 }
bf4acbe4 779 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
780 e++;
781 if (*e != '\n' && *e != '\0')
782 return; /* false alarm */
783
463ee0b2
LW
784 ch = *t;
785 *t = '\0';
f4dd75d9 786 if (t - s > 0) {
8a5ee598 787#ifndef USE_ITHREADS
c4420975 788 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
789 STRLEN tmplen = cf ? strlen(cf) : 0;
790 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
791 /* must copy *{"::_<(eval N)[oldfilename:L]"}
792 * to *{"::_<newfilename"} */
793 char smallbuf[256], smallbuf2[256];
794 char *tmpbuf, *tmpbuf2;
8a5ee598 795 GV **gvp, *gv2;
e66cf94c
RGS
796 STRLEN tmplen2 = strlen(s);
797 if (tmplen + 3 < sizeof smallbuf)
798 tmpbuf = smallbuf;
799 else
800 Newx(tmpbuf, tmplen + 3, char);
801 if (tmplen2 + 3 < sizeof smallbuf2)
802 tmpbuf2 = smallbuf2;
803 else
804 Newx(tmpbuf2, tmplen2 + 3, char);
805 tmpbuf[0] = tmpbuf2[0] = '_';
806 tmpbuf[1] = tmpbuf2[1] = '<';
807 memcpy(tmpbuf + 2, cf, ++tmplen);
808 memcpy(tmpbuf2 + 2, s, ++tmplen2);
809 ++tmplen; ++tmplen2;
8a5ee598
RGS
810 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
811 if (gvp) {
812 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 813 if (!isGV(gv2)) {
8a5ee598 814 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
815 /* adjust ${"::_<newfilename"} to store the new file name */
816 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
817 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
818 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
819 }
8a5ee598 820 }
e66cf94c
RGS
821 if (tmpbuf != smallbuf) Safefree(tmpbuf);
822 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
823 }
8a5ee598 824#endif
05ec9bb3 825 CopFILE_free(PL_curcop);
57843af0 826 CopFILE_set(PL_curcop, s);
f4dd75d9 827 }
463ee0b2 828 *t = ch;
57843af0 829 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
830}
831
29595ff2 832#ifdef PERL_MAD
cd81e915 833/* skip space before PL_thistoken */
29595ff2
NC
834
835STATIC char *
836S_skipspace0(pTHX_ register char *s)
837{
838 s = skipspace(s);
839 if (!PL_madskills)
840 return s;
cd81e915
NC
841 if (PL_skipwhite) {
842 if (!PL_thiswhite)
6b29d1f5 843 PL_thiswhite = newSVpvs("");
cd81e915
NC
844 sv_catsv(PL_thiswhite, PL_skipwhite);
845 sv_free(PL_skipwhite);
846 PL_skipwhite = 0;
847 }
848 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
849 return s;
850}
851
cd81e915 852/* skip space after PL_thistoken */
29595ff2
NC
853
854STATIC char *
855S_skipspace1(pTHX_ register char *s)
856{
d4c19fe8 857 const char *start = s;
29595ff2
NC
858 I32 startoff = start - SvPVX(PL_linestr);
859
860 s = skipspace(s);
861 if (!PL_madskills)
862 return s;
863 start = SvPVX(PL_linestr) + startoff;
cd81e915 864 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 865 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
866 PL_thistoken = newSVpvn(tstart, start - tstart);
867 }
868 PL_realtokenstart = -1;
869 if (PL_skipwhite) {
870 if (!PL_nextwhite)
6b29d1f5 871 PL_nextwhite = newSVpvs("");
cd81e915
NC
872 sv_catsv(PL_nextwhite, PL_skipwhite);
873 sv_free(PL_skipwhite);
874 PL_skipwhite = 0;
29595ff2
NC
875 }
876 return s;
877}
878
879STATIC char *
880S_skipspace2(pTHX_ register char *s, SV **svp)
881{
c35e046a
AL
882 char *start;
883 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
884 const I32 startoff = s - SvPVX(PL_linestr);
885
29595ff2
NC
886 s = skipspace(s);
887 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
888 if (!PL_madskills || !svp)
889 return s;
890 start = SvPVX(PL_linestr) + startoff;
cd81e915 891 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 892 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
893 PL_thistoken = newSVpvn(tstart, start - tstart);
894 PL_realtokenstart = -1;
29595ff2 895 }
cd81e915 896 if (PL_skipwhite) {
29595ff2 897 if (!*svp)
6b29d1f5 898 *svp = newSVpvs("");
cd81e915
NC
899 sv_setsv(*svp, PL_skipwhite);
900 sv_free(PL_skipwhite);
901 PL_skipwhite = 0;
29595ff2
NC
902 }
903
904 return s;
905}
906#endif
907
80a702cd
RGS
908STATIC void
909S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
910{
911 AV *av = CopFILEAVx(PL_curcop);
912 if (av) {
913 SV * const sv = newSV(0);
914 sv_upgrade(sv, SVt_PVMG);
915 sv_setpvn(sv, buf, len);
916 (void)SvIOK_on(sv);
917 SvIV_set(sv, 0);
918 av_store(av, (I32)CopLINE(PL_curcop), sv);
919 }
920}
921
922STATIC void
923S_update_debugger_info_sv(pTHX_ SV *orig_sv)
924{
925 AV *av = CopFILEAVx(PL_curcop);
926 if (av) {
927 SV * const sv = newSV(0);
928 sv_upgrade(sv, SVt_PVMG);
929 sv_setsv(sv, orig_sv);
930 (void)SvIOK_on(sv);
931 SvIV_set(sv, 0);
932 av_store(av, (I32)CopLINE(PL_curcop), sv);
933 }
934}
935
ffb4593c
NT
936/*
937 * S_skipspace
938 * Called to gobble the appropriate amount and type of whitespace.
939 * Skips comments as well.
940 */
941
76e3520e 942STATIC char *
cea2e8a9 943S_skipspace(pTHX_ register char *s)
a687059c 944{
97aff369 945 dVAR;
5db06880
NC
946#ifdef PERL_MAD
947 int curoff;
948 int startoff = s - SvPVX(PL_linestr);
949
cd81e915
NC
950 if (PL_skipwhite) {
951 sv_free(PL_skipwhite);
952 PL_skipwhite = 0;
5db06880
NC
953 }
954#endif
955
3280af22 956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 957 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 958 s++;
5db06880
NC
959#ifdef PERL_MAD
960 goto done;
961#else
463ee0b2 962 return s;
5db06880 963#endif
463ee0b2
LW
964 }
965 for (;;) {
fd049845 966 STRLEN prevlen;
09bef843 967 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 968 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
969 while (s < PL_bufend && isSPACE(*s)) {
970 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
971 incline(s);
972 }
ffb4593c
NT
973
974 /* comment */
3280af22
NIS
975 if (s < PL_bufend && *s == '#') {
976 while (s < PL_bufend && *s != '\n')
463ee0b2 977 s++;
60e6418e 978 if (s < PL_bufend) {
463ee0b2 979 s++;
60e6418e
GS
980 if (PL_in_eval && !PL_rsfp) {
981 incline(s);
982 continue;
983 }
984 }
463ee0b2 985 }
ffb4593c
NT
986
987 /* only continue to recharge the buffer if we're at the end
988 * of the buffer, we're not reading from a source filter, and
989 * we're in normal lexing mode
990 */
09bef843
SB
991 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
992 PL_lex_state == LEX_FORMLINE)
5db06880
NC
993#ifdef PERL_MAD
994 goto done;
995#else
463ee0b2 996 return s;
5db06880 997#endif
ffb4593c
NT
998
999 /* try to recharge the buffer */
5db06880
NC
1000#ifdef PERL_MAD
1001 curoff = s - SvPVX(PL_linestr);
1002#endif
1003
9cbb5ea2 1004 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1005 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1006 {
5db06880
NC
1007#ifdef PERL_MAD
1008 if (PL_madskills && curoff != startoff) {
cd81e915 1009 if (!PL_skipwhite)
6b29d1f5 1010 PL_skipwhite = newSVpvs("");
cd81e915 1011 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1012 curoff - startoff);
1013 }
1014
1015 /* mustn't throw out old stuff yet if madpropping */
1016 SvCUR(PL_linestr) = curoff;
1017 s = SvPVX(PL_linestr) + curoff;
1018 *s = 0;
1019 if (curoff && s[-1] == '\n')
1020 s[-1] = ' ';
1021#endif
1022
9cbb5ea2 1023 /* end of file. Add on the -p or -n magic */
cd81e915 1024 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1025 if (PL_minus_p) {
5db06880
NC
1026#ifdef PERL_MAD
1027 sv_catpv(PL_linestr,
1028 ";}continue{print or die qq(-p destination: $!\\n);}");
1029#else
01a19ab0
NC
1030 sv_setpv(PL_linestr,
1031 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1032#endif
3280af22 1033 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1034 }
01a19ab0 1035 else if (PL_minus_n) {
5db06880
NC
1036#ifdef PERL_MAD
1037 sv_catpvn(PL_linestr, ";}", 2);
1038#else
01a19ab0 1039 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1040#endif
01a19ab0
NC
1041 PL_minus_n = 0;
1042 }
a0d0e21e 1043 else
5db06880
NC
1044#ifdef PERL_MAD
1045 sv_catpvn(PL_linestr,";", 1);
1046#else
4147a61b 1047 sv_setpvn(PL_linestr,";", 1);
5db06880 1048#endif
ffb4593c
NT
1049
1050 /* reset variables for next time we lex */
9cbb5ea2 1051 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1052 = SvPVX(PL_linestr)
1053#ifdef PERL_MAD
1054 + curoff
1055#endif
1056 ;
3280af22 1057 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1058 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1059
1060 /* Close the filehandle. Could be from -P preprocessor,
1061 * STDIN, or a regular file. If we were reading code from
1062 * STDIN (because the commandline held no -e or filename)
1063 * then we don't close it, we reset it so the code can
1064 * read from STDIN too.
1065 */
1066
3280af22
NIS
1067 if (PL_preprocess && !PL_in_eval)
1068 (void)PerlProc_pclose(PL_rsfp);
1069 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1070 PerlIO_clearerr(PL_rsfp);
8990e307 1071 else
3280af22 1072 (void)PerlIO_close(PL_rsfp);
4608196e 1073 PL_rsfp = NULL;
463ee0b2
LW
1074 return s;
1075 }
ffb4593c
NT
1076
1077 /* not at end of file, so we only read another line */
09bef843
SB
1078 /* make corresponding updates to old pointers, for yyerror() */
1079 oldprevlen = PL_oldbufptr - PL_bufend;
1080 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1081 if (PL_last_uni)
1082 oldunilen = PL_last_uni - PL_bufend;
1083 if (PL_last_lop)
1084 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1085 PL_linestart = PL_bufptr = s + prevlen;
1086 PL_bufend = s + SvCUR(PL_linestr);
1087 s = PL_bufptr;
09bef843
SB
1088 PL_oldbufptr = s + oldprevlen;
1089 PL_oldoldbufptr = s + oldoldprevlen;
1090 if (PL_last_uni)
1091 PL_last_uni = s + oldunilen;
1092 if (PL_last_lop)
1093 PL_last_lop = s + oldloplen;
a0d0e21e 1094 incline(s);
ffb4593c
NT
1095
1096 /* debugger active and we're not compiling the debugger code,
1097 * so store the line into the debugger's array of lines
1098 */
80a702cd
RGS
1099 if (PERLDB_LINE && PL_curstash != PL_debstash)
1100 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1101 }
5db06880
NC
1102
1103#ifdef PERL_MAD
1104 done:
1105 if (PL_madskills) {
cd81e915 1106 if (!PL_skipwhite)
6b29d1f5 1107 PL_skipwhite = newSVpvs("");
5db06880
NC
1108 curoff = s - SvPVX(PL_linestr);
1109 if (curoff - startoff)
cd81e915 1110 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1111 curoff - startoff);
1112 }
1113 return s;
1114#endif
a687059c 1115}
378cc40b 1116
ffb4593c
NT
1117/*
1118 * S_check_uni
1119 * Check the unary operators to ensure there's no ambiguity in how they're
1120 * used. An ambiguous piece of code would be:
1121 * rand + 5
1122 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1123 * the +5 is its argument.
1124 */
1125
76e3520e 1126STATIC void
cea2e8a9 1127S_check_uni(pTHX)
ba106d47 1128{
97aff369 1129 dVAR;
d4c19fe8
AL
1130 const char *s;
1131 const char *t;
2f3197b3 1132
3280af22 1133 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1134 return;
3280af22
NIS
1135 while (isSPACE(*PL_last_uni))
1136 PL_last_uni++;
c35e046a
AL
1137 s = PL_last_uni;
1138 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1139 s++;
3280af22 1140 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1141 return;
6136c704 1142
0453d815 1143 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1144 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1145 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1146 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1147 }
2f3197b3
LW
1148}
1149
ffb4593c
NT
1150/*
1151 * LOP : macro to build a list operator. Its behaviour has been replaced
1152 * with a subroutine, S_lop() for which LOP is just another name.
1153 */
1154
a0d0e21e
LW
1155#define LOP(f,x) return lop(f,x,s)
1156
ffb4593c
NT
1157/*
1158 * S_lop
1159 * Build a list operator (or something that might be one). The rules:
1160 * - if we have a next token, then it's a list operator [why?]
1161 * - if the next thing is an opening paren, then it's a function
1162 * - else it's a list operator
1163 */
1164
76e3520e 1165STATIC I32
a0be28da 1166S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1167{
97aff369 1168 dVAR;
79072805 1169 yylval.ival = f;
35c8bce7 1170 CLINE;
3280af22
NIS
1171 PL_expect = x;
1172 PL_bufptr = s;
1173 PL_last_lop = PL_oldbufptr;
eb160463 1174 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1175#ifdef PERL_MAD
1176 if (PL_lasttoke)
1177 return REPORT(LSTOP);
1178#else
3280af22 1179 if (PL_nexttoke)
bbf60fe6 1180 return REPORT(LSTOP);
5db06880 1181#endif
79072805 1182 if (*s == '(')
bbf60fe6 1183 return REPORT(FUNC);
29595ff2 1184 s = PEEKSPACE(s);
79072805 1185 if (*s == '(')
bbf60fe6 1186 return REPORT(FUNC);
79072805 1187 else
bbf60fe6 1188 return REPORT(LSTOP);
79072805
LW
1189}
1190
5db06880
NC
1191#ifdef PERL_MAD
1192 /*
1193 * S_start_force
1194 * Sets up for an eventual force_next(). start_force(0) basically does
1195 * an unshift, while start_force(-1) does a push. yylex removes items
1196 * on the "pop" end.
1197 */
1198
1199STATIC void
1200S_start_force(pTHX_ int where)
1201{
1202 int i;
1203
cd81e915 1204 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1205 where = PL_lasttoke;
cd81e915
NC
1206 assert(PL_curforce < 0 || PL_curforce == where);
1207 if (PL_curforce != where) {
5db06880
NC
1208 for (i = PL_lasttoke; i > where; --i) {
1209 PL_nexttoke[i] = PL_nexttoke[i-1];
1210 }
1211 PL_lasttoke++;
1212 }
cd81e915 1213 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1214 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1215 PL_curforce = where;
1216 if (PL_nextwhite) {
5db06880 1217 if (PL_madskills)
6b29d1f5 1218 curmad('^', newSVpvs(""));
cd81e915 1219 CURMAD('_', PL_nextwhite);
5db06880
NC
1220 }
1221}
1222
1223STATIC void
1224S_curmad(pTHX_ char slot, SV *sv)
1225{
1226 MADPROP **where;
1227
1228 if (!sv)
1229 return;
cd81e915
NC
1230 if (PL_curforce < 0)
1231 where = &PL_thismad;
5db06880 1232 else
cd81e915 1233 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1234
cd81e915 1235 if (PL_faketokens)
5db06880
NC
1236 sv_setpvn(sv, "", 0);
1237 else {
1238 if (!IN_BYTES) {
1239 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1240 SvUTF8_on(sv);
1241 else if (PL_encoding) {
1242 sv_recode_to_utf8(sv, PL_encoding);
1243 }
1244 }
1245 }
1246
1247 /* keep a slot open for the head of the list? */
1248 if (slot != '_' && *where && (*where)->mad_key == '^') {
1249 (*where)->mad_key = slot;
1250 sv_free((*where)->mad_val);
1251 (*where)->mad_val = (void*)sv;
1252 }
1253 else
1254 addmad(newMADsv(slot, sv), where, 0);
1255}
1256#else
b3f24c00
MHM
1257# define start_force(where) NOOP
1258# define curmad(slot, sv) NOOP
5db06880
NC
1259#endif
1260
ffb4593c
NT
1261/*
1262 * S_force_next
9cbb5ea2 1263 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1264 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1265 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1266 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1267 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1268 */
1269
4e553d73 1270STATIC void
cea2e8a9 1271S_force_next(pTHX_ I32 type)
79072805 1272{
97aff369 1273 dVAR;
5db06880 1274#ifdef PERL_MAD
cd81e915 1275 if (PL_curforce < 0)
5db06880 1276 start_force(PL_lasttoke);
cd81e915 1277 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1278 if (PL_lex_state != LEX_KNOWNEXT)
1279 PL_lex_defer = PL_lex_state;
1280 PL_lex_state = LEX_KNOWNEXT;
1281 PL_lex_expect = PL_expect;
cd81e915 1282 PL_curforce = -1;
5db06880 1283#else
3280af22
NIS
1284 PL_nexttype[PL_nexttoke] = type;
1285 PL_nexttoke++;
1286 if (PL_lex_state != LEX_KNOWNEXT) {
1287 PL_lex_defer = PL_lex_state;
1288 PL_lex_expect = PL_expect;
1289 PL_lex_state = LEX_KNOWNEXT;
79072805 1290 }
5db06880 1291#endif
79072805
LW
1292}
1293
d0a148a6
NC
1294STATIC SV *
1295S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1296{
97aff369 1297 dVAR;
9d4ba2ae 1298 SV * const sv = newSVpvn(start,len);
bfed75c6 1299 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1300 SvUTF8_on(sv);
1301 return sv;
1302}
1303
ffb4593c
NT
1304/*
1305 * S_force_word
1306 * When the lexer knows the next thing is a word (for instance, it has
1307 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1308 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1309 * lookahead.
ffb4593c
NT
1310 *
1311 * Arguments:
b1b65b59 1312 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1313 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1314 * int check_keyword : if true, Perl checks to make sure the word isn't
1315 * a keyword (do this if the word is a label, e.g. goto FOO)
1316 * int allow_pack : if true, : characters will also be allowed (require,
1317 * use, etc. do this)
9cbb5ea2 1318 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1319 */
1320
76e3520e 1321STATIC char *
cea2e8a9 1322S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1323{
97aff369 1324 dVAR;
463ee0b2
LW
1325 register char *s;
1326 STRLEN len;
4e553d73 1327
29595ff2 1328 start = SKIPSPACE1(start);
463ee0b2 1329 s = start;
7e2040f0 1330 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1331 (allow_pack && *s == ':') ||
15f0808c 1332 (allow_initial_tick && *s == '\'') )
a0d0e21e 1333 {
3280af22 1334 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1335 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1336 return start;
cd81e915 1337 start_force(PL_curforce);
5db06880
NC
1338 if (PL_madskills)
1339 curmad('X', newSVpvn(start,s-start));
463ee0b2 1340 if (token == METHOD) {
29595ff2 1341 s = SKIPSPACE1(s);
463ee0b2 1342 if (*s == '(')
3280af22 1343 PL_expect = XTERM;
463ee0b2 1344 else {
3280af22 1345 PL_expect = XOPERATOR;
463ee0b2 1346 }
79072805 1347 }
9ded7720 1348 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1349 = (OP*)newSVOP(OP_CONST,0,
1350 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1351 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1352 force_next(token);
1353 }
1354 return s;
1355}
1356
ffb4593c
NT
1357/*
1358 * S_force_ident
9cbb5ea2 1359 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1360 * text only contains the "foo" portion. The first argument is a pointer
1361 * to the "foo", and the second argument is the type symbol to prefix.
1362 * Forces the next token to be a "WORD".
9cbb5ea2 1363 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1364 */
1365
76e3520e 1366STATIC void
bfed75c6 1367S_force_ident(pTHX_ register const char *s, int kind)
79072805 1368{
97aff369 1369 dVAR;
c35e046a 1370 if (*s) {
90e5519e
NC
1371 const STRLEN len = strlen(s);
1372 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1373 start_force(PL_curforce);
9ded7720 1374 NEXTVAL_NEXTTOKE.opval = o;
79072805 1375 force_next(WORD);
748a9306 1376 if (kind) {
11343788 1377 o->op_private = OPpCONST_ENTERED;
55497cff 1378 /* XXX see note in pp_entereval() for why we forgo typo
1379 warnings if the symbol must be introduced in an eval.
1380 GSAR 96-10-12 */
90e5519e
NC
1381 gv_fetchpvn_flags(s, len,
1382 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1383 : GV_ADD,
1384 kind == '$' ? SVt_PV :
1385 kind == '@' ? SVt_PVAV :
1386 kind == '%' ? SVt_PVHV :
a0d0e21e 1387 SVt_PVGV
90e5519e 1388 );
748a9306 1389 }
79072805
LW
1390 }
1391}
1392
1571675a
GS
1393NV
1394Perl_str_to_version(pTHX_ SV *sv)
1395{
1396 NV retval = 0.0;
1397 NV nshift = 1.0;
1398 STRLEN len;
cfd0369c 1399 const char *start = SvPV_const(sv,len);
9d4ba2ae 1400 const char * const end = start + len;
504618e9 1401 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1402 while (start < end) {
ba210ebe 1403 STRLEN skip;
1571675a
GS
1404 UV n;
1405 if (utf)
9041c2e3 1406 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1407 else {
1408 n = *(U8*)start;
1409 skip = 1;
1410 }
1411 retval += ((NV)n)/nshift;
1412 start += skip;
1413 nshift *= 1000;
1414 }
1415 return retval;
1416}
1417
4e553d73 1418/*
ffb4593c
NT
1419 * S_force_version
1420 * Forces the next token to be a version number.
e759cc13
RGS
1421 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1422 * and if "guessing" is TRUE, then no new token is created (and the caller
1423 * must use an alternative parsing method).
ffb4593c
NT
1424 */
1425
76e3520e 1426STATIC char *
e759cc13 1427S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1428{
97aff369 1429 dVAR;
5f66b61c 1430 OP *version = NULL;
44dcb63b 1431 char *d;
5db06880
NC
1432#ifdef PERL_MAD
1433 I32 startoff = s - SvPVX(PL_linestr);
1434#endif
89bfa8cd 1435
29595ff2 1436 s = SKIPSPACE1(s);
89bfa8cd 1437
44dcb63b 1438 d = s;
dd629d5b 1439 if (*d == 'v')
44dcb63b 1440 d++;
44dcb63b 1441 if (isDIGIT(*d)) {
e759cc13
RGS
1442 while (isDIGIT(*d) || *d == '_' || *d == '.')
1443 d++;
5db06880
NC
1444#ifdef PERL_MAD
1445 if (PL_madskills) {
cd81e915 1446 start_force(PL_curforce);
5db06880
NC
1447 curmad('X', newSVpvn(s,d-s));
1448 }
1449#endif
9f3d182e 1450 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1451 SV *ver;
b73d6f50 1452 s = scan_num(s, &yylval);
89bfa8cd 1453 version = yylval.opval;
dd629d5b
GS
1454 ver = cSVOPx(version)->op_sv;
1455 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1456 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1457 SvNV_set(ver, str_to_version(ver));
1571675a 1458 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1459 }
89bfa8cd 1460 }
5db06880
NC
1461 else if (guessing) {
1462#ifdef PERL_MAD
1463 if (PL_madskills) {
cd81e915
NC
1464 sv_free(PL_nextwhite); /* let next token collect whitespace */
1465 PL_nextwhite = 0;
5db06880
NC
1466 s = SvPVX(PL_linestr) + startoff;
1467 }
1468#endif
e759cc13 1469 return s;
5db06880 1470 }
89bfa8cd 1471 }
1472
5db06880
NC
1473#ifdef PERL_MAD
1474 if (PL_madskills && !version) {
cd81e915
NC
1475 sv_free(PL_nextwhite); /* let next token collect whitespace */
1476 PL_nextwhite = 0;
5db06880
NC
1477 s = SvPVX(PL_linestr) + startoff;
1478 }
1479#endif
89bfa8cd 1480 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1481 start_force(PL_curforce);
9ded7720 1482 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1483 force_next(WORD);
89bfa8cd 1484
e759cc13 1485 return s;
89bfa8cd 1486}
1487
ffb4593c
NT
1488/*
1489 * S_tokeq
1490 * Tokenize a quoted string passed in as an SV. It finds the next
1491 * chunk, up to end of string or a backslash. It may make a new
1492 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1493 * turns \\ into \.
1494 */
1495
76e3520e 1496STATIC SV *
cea2e8a9 1497S_tokeq(pTHX_ SV *sv)
79072805 1498{
97aff369 1499 dVAR;
79072805
LW
1500 register char *s;
1501 register char *send;
1502 register char *d;
b3ac6de7
IZ
1503 STRLEN len = 0;
1504 SV *pv = sv;
79072805
LW
1505
1506 if (!SvLEN(sv))
b3ac6de7 1507 goto finish;
79072805 1508
a0d0e21e 1509 s = SvPV_force(sv, len);
21a311ee 1510 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1511 goto finish;
463ee0b2 1512 send = s + len;
79072805
LW
1513 while (s < send && *s != '\\')
1514 s++;
1515 if (s == send)
b3ac6de7 1516 goto finish;
79072805 1517 d = s;
be4731d2 1518 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1519 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1520 if (SvUTF8(sv))
1521 SvUTF8_on(pv);
1522 }
79072805
LW
1523 while (s < send) {
1524 if (*s == '\\') {
a0d0e21e 1525 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1526 s++; /* all that, just for this */
1527 }
1528 *d++ = *s++;
1529 }
1530 *d = '\0';
95a20fc0 1531 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1532 finish:
3280af22 1533 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1534 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1535 return sv;
1536}
1537
ffb4593c
NT
1538/*
1539 * Now come three functions related to double-quote context,
1540 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1541 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1542 * interact with PL_lex_state, and create fake ( ... ) argument lists
1543 * to handle functions and concatenation.
1544 * They assume that whoever calls them will be setting up a fake
1545 * join call, because each subthing puts a ',' after it. This lets
1546 * "lower \luPpEr"
1547 * become
1548 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1549 *
1550 * (I'm not sure whether the spurious commas at the end of lcfirst's
1551 * arguments and join's arguments are created or not).
1552 */
1553
1554/*
1555 * S_sublex_start
1556 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1557 *
1558 * Pattern matching will set PL_lex_op to the pattern-matching op to
1559 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1560 *
1561 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1562 *
1563 * Everything else becomes a FUNC.
1564 *
1565 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1566 * had an OP_CONST or OP_READLINE). This just sets us up for a
1567 * call to S_sublex_push().
1568 */
1569
76e3520e 1570STATIC I32
cea2e8a9 1571S_sublex_start(pTHX)
79072805 1572{
97aff369 1573 dVAR;
0d46e09a 1574 register const I32 op_type = yylval.ival;
79072805
LW
1575
1576 if (op_type == OP_NULL) {
3280af22 1577 yylval.opval = PL_lex_op;
5f66b61c 1578 PL_lex_op = NULL;
79072805
LW
1579 return THING;
1580 }
1581 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1582 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1583
1584 if (SvTYPE(sv) == SVt_PVIV) {
1585 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1586 STRLEN len;
96a5add6 1587 const char * const p = SvPV_const(sv, len);
f54cb97a 1588 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1589 if (SvUTF8(sv))
1590 SvUTF8_on(nsv);
b3ac6de7
IZ
1591 SvREFCNT_dec(sv);
1592 sv = nsv;
4e553d73 1593 }
b3ac6de7 1594 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1595 PL_lex_stuff = NULL;
6f33ba73
RGS
1596 /* Allow <FH> // "foo" */
1597 if (op_type == OP_READLINE)
1598 PL_expect = XTERMORDORDOR;
79072805
LW
1599 return THING;
1600 }
e3f73d4e
RGS
1601 else if (op_type == OP_BACKTICK && PL_lex_op) {
1602 /* readpipe() vas overriden */
1603 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1604 yylval.opval = PL_lex_op;
9b201d7d 1605 PL_lex_op = NULL;
e3f73d4e
RGS
1606 PL_lex_stuff = NULL;
1607 return THING;
1608 }
79072805 1609
3280af22
NIS
1610 PL_sublex_info.super_state = PL_lex_state;
1611 PL_sublex_info.sub_inwhat = op_type;
1612 PL_sublex_info.sub_op = PL_lex_op;
1613 PL_lex_state = LEX_INTERPPUSH;
55497cff 1614
3280af22
NIS
1615 PL_expect = XTERM;
1616 if (PL_lex_op) {
1617 yylval.opval = PL_lex_op;
5f66b61c 1618 PL_lex_op = NULL;
55497cff 1619 return PMFUNC;
1620 }
1621 else
1622 return FUNC;
1623}
1624
ffb4593c
NT
1625/*
1626 * S_sublex_push
1627 * Create a new scope to save the lexing state. The scope will be
1628 * ended in S_sublex_done. Returns a '(', starting the function arguments
1629 * to the uc, lc, etc. found before.
1630 * Sets PL_lex_state to LEX_INTERPCONCAT.
1631 */
1632
76e3520e 1633STATIC I32
cea2e8a9 1634S_sublex_push(pTHX)
55497cff 1635{
27da23d5 1636 dVAR;
f46d017c 1637 ENTER;
55497cff 1638
3280af22
NIS
1639 PL_lex_state = PL_sublex_info.super_state;
1640 SAVEI32(PL_lex_dojoin);
1641 SAVEI32(PL_lex_brackets);
3280af22
NIS
1642 SAVEI32(PL_lex_casemods);
1643 SAVEI32(PL_lex_starts);
1644 SAVEI32(PL_lex_state);
7766f137 1645 SAVEVPTR(PL_lex_inpat);
3280af22 1646 SAVEI32(PL_lex_inwhat);
57843af0 1647 SAVECOPLINE(PL_curcop);
3280af22 1648 SAVEPPTR(PL_bufptr);
8452ff4b 1649 SAVEPPTR(PL_bufend);
3280af22
NIS
1650 SAVEPPTR(PL_oldbufptr);
1651 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1652 SAVEPPTR(PL_last_lop);
1653 SAVEPPTR(PL_last_uni);
3280af22
NIS
1654 SAVEPPTR(PL_linestart);
1655 SAVESPTR(PL_linestr);
8edd5f42
RGS
1656 SAVEGENERICPV(PL_lex_brackstack);
1657 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1658
1659 PL_linestr = PL_lex_stuff;
a0714e2c 1660 PL_lex_stuff = NULL;
3280af22 1661
9cbb5ea2
GS
1662 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1663 = SvPVX(PL_linestr);
3280af22 1664 PL_bufend += SvCUR(PL_linestr);
bd61b366 1665 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1666 SAVEFREESV(PL_linestr);
1667
1668 PL_lex_dojoin = FALSE;
1669 PL_lex_brackets = 0;
a02a5408
JC
1670 Newx(PL_lex_brackstack, 120, char);
1671 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1672 PL_lex_casemods = 0;
1673 *PL_lex_casestack = '\0';
1674 PL_lex_starts = 0;
1675 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1676 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1677
1678 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1679 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1680 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1681 else
5f66b61c 1682 PL_lex_inpat = NULL;
79072805 1683
55497cff 1684 return '(';
79072805
LW
1685}
1686
ffb4593c
NT
1687/*
1688 * S_sublex_done
1689 * Restores lexer state after a S_sublex_push.
1690 */
1691
76e3520e 1692STATIC I32
cea2e8a9 1693S_sublex_done(pTHX)
79072805 1694{
27da23d5 1695 dVAR;
3280af22 1696 if (!PL_lex_starts++) {
396482e1 1697 SV * const sv = newSVpvs("");
9aa983d2
JH
1698 if (SvUTF8(PL_linestr))
1699 SvUTF8_on(sv);
3280af22 1700 PL_expect = XOPERATOR;
9aa983d2 1701 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1702 return THING;
1703 }
1704
3280af22
NIS
1705 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1706 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1707 return yylex();
79072805
LW
1708 }
1709
ffb4593c 1710 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1711 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1712 PL_linestr = PL_lex_repl;
1713 PL_lex_inpat = 0;
1714 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1715 PL_bufend += SvCUR(PL_linestr);
bd61b366 1716 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1717 SAVEFREESV(PL_linestr);
1718 PL_lex_dojoin = FALSE;
1719 PL_lex_brackets = 0;
3280af22
NIS
1720 PL_lex_casemods = 0;
1721 *PL_lex_casestack = '\0';
1722 PL_lex_starts = 0;
25da4f38 1723 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1724 PL_lex_state = LEX_INTERPNORMAL;
1725 PL_lex_starts++;
e9fa98b2
HS
1726 /* we don't clear PL_lex_repl here, so that we can check later
1727 whether this is an evalled subst; that means we rely on the
1728 logic to ensure sublex_done() is called again only via the
1729 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1730 }
e9fa98b2 1731 else {
3280af22 1732 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1733 PL_lex_repl = NULL;
e9fa98b2 1734 }
79072805 1735 return ',';
ffed7fef
LW
1736 }
1737 else {
5db06880
NC
1738#ifdef PERL_MAD
1739 if (PL_madskills) {
cd81e915
NC
1740 if (PL_thiswhite) {
1741 if (!PL_endwhite)
6b29d1f5 1742 PL_endwhite = newSVpvs("");
cd81e915
NC
1743 sv_catsv(PL_endwhite, PL_thiswhite);
1744 PL_thiswhite = 0;
1745 }
1746 if (PL_thistoken)
1747 sv_setpvn(PL_thistoken,"",0);
5db06880 1748 else
cd81e915 1749 PL_realtokenstart = -1;
5db06880
NC
1750 }
1751#endif
f46d017c 1752 LEAVE;
3280af22
NIS
1753 PL_bufend = SvPVX(PL_linestr);
1754 PL_bufend += SvCUR(PL_linestr);
1755 PL_expect = XOPERATOR;
09bef843 1756 PL_sublex_info.sub_inwhat = 0;
79072805 1757 return ')';
ffed7fef
LW
1758 }
1759}
1760
02aa26ce
NT
1761/*
1762 scan_const
1763
1764 Extracts a pattern, double-quoted string, or transliteration. This
1765 is terrifying code.
1766
94def140 1767 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1768 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1769 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1770
94def140
TS
1771 Returns a pointer to the character scanned up to. If this is
1772 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1773 successfully parsed), will leave an OP for the substring scanned
1774 in yylval. Caller must intuit reason for not parsing further
1775 by looking at the next characters herself.
1776
02aa26ce
NT
1777 In patterns:
1778 backslashes:
1779 double-quoted style: \r and \n
1780 regexp special ones: \D \s
94def140
TS
1781 constants: \x31
1782 backrefs: \1
02aa26ce
NT
1783 case and quoting: \U \Q \E
1784 stops on @ and $, but not for $ as tail anchor
1785
1786 In transliterations:
1787 characters are VERY literal, except for - not at the start or end
94def140
TS
1788 of the string, which indicates a range. If the range is in bytes,
1789 scan_const expands the range to the full set of intermediate
1790 characters. If the range is in utf8, the hyphen is replaced with
1791 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1792
1793 In double-quoted strings:
1794 backslashes:
1795 double-quoted style: \r and \n
94def140
TS
1796 constants: \x31
1797 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1798 case and quoting: \U \Q \E
1799 stops on @ and $
1800
1801 scan_const does *not* construct ops to handle interpolated strings.
1802 It stops processing as soon as it finds an embedded $ or @ variable
1803 and leaves it to the caller to work out what's going on.
1804
94def140
TS
1805 embedded arrays (whether in pattern or not) could be:
1806 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1807
1808 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1809
1810 $ in pattern could be $foo or could be tail anchor. Assumption:
1811 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1812 followed by one of "()| \r\n\t"
02aa26ce
NT
1813
1814 \1 (backreferences) are turned into $1
1815
1816 The structure of the code is
1817 while (there's a character to process) {
94def140
TS
1818 handle transliteration ranges
1819 skip regexp comments /(?#comment)/ and codes /(?{code})/
1820 skip #-initiated comments in //x patterns
1821 check for embedded arrays
02aa26ce
NT
1822 check for embedded scalars
1823 if (backslash) {
94def140
TS
1824 leave intact backslashes from leaveit (below)
1825 deprecate \1 in substitution replacements
02aa26ce
NT
1826 handle string-changing backslashes \l \U \Q \E, etc.
1827 switch (what was escaped) {
94def140
TS
1828 handle \- in a transliteration (becomes a literal -)
1829 handle \132 (octal characters)
1830 handle \x15 and \x{1234} (hex characters)
1831 handle \N{name} (named characters)
1832 handle \cV (control characters)
1833 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1834 } (end switch)
1835 } (end if backslash)
1836 } (end while character to read)
4e553d73 1837
02aa26ce
NT
1838*/
1839
76e3520e 1840STATIC char *
cea2e8a9 1841S_scan_const(pTHX_ char *start)
79072805 1842{
97aff369 1843 dVAR;
3280af22 1844 register char *send = PL_bufend; /* end of the constant */
561b68a9 1845 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1846 register char *s = start; /* start of the constant */
1847 register char *d = SvPVX(sv); /* destination for copies */
1848 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1849 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1850 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1851 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1852 UV uv;
4c3a8340
TS
1853#ifdef EBCDIC
1854 UV literal_endpoint = 0;
e294cc5d 1855 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1856#endif
012bcf8d 1857
2b9d42f0
NIS
1858 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1859 /* If we are doing a trans and we know we want UTF8 set expectation */
1860 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1861 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1862 }
1863
1864
79072805 1865 while (s < send || dorange) {
02aa26ce 1866 /* get transliterations out of the way (they're most literal) */
3280af22 1867 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1868 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1869 if (dorange) {
1ba5c669
JH
1870 I32 i; /* current expanded character */
1871 I32 min; /* first character in range */
1872 I32 max; /* last character in range */
02aa26ce 1873
e294cc5d
JH
1874#ifdef EBCDIC
1875 UV uvmax = 0;
1876#endif
1877
1878 if (has_utf8
1879#ifdef EBCDIC
1880 && !native_range
1881#endif
1882 ) {
9d4ba2ae 1883 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1884 char *e = d++;
1885 while (e-- > c)
1886 *(e + 1) = *e;
25716404 1887 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1888 /* mark the range as done, and continue */
1889 dorange = FALSE;
1890 didrange = TRUE;
1891 continue;
1892 }
2b9d42f0 1893
95a20fc0 1894 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1895#ifdef EBCDIC
1896 SvGROW(sv,
1897 SvLEN(sv) + (has_utf8 ?
1898 (512 - UTF_CONTINUATION_MARK +
1899 UNISKIP(0x100))
1900 : 256));
1901 /* How many two-byte within 0..255: 128 in UTF-8,
1902 * 96 in UTF-8-mod. */
1903#else
9cbb5ea2 1904 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1905#endif
9cbb5ea2 1906 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1907#ifdef EBCDIC
1908 if (has_utf8) {
1909 int j;
1910 for (j = 0; j <= 1; j++) {
1911 char * const c = (char*)utf8_hop((U8*)d, -1);
1912 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1913 if (j)
1914 min = (U8)uv;
1915 else if (uv < 256)
1916 max = (U8)uv;
1917 else {
1918 max = (U8)0xff; /* only to \xff */
1919 uvmax = uv; /* \x{100} to uvmax */
1920 }
1921 d = c; /* eat endpoint chars */
1922 }
1923 }
1924 else {
1925#endif
1926 d -= 2; /* eat the first char and the - */
1927 min = (U8)*d; /* first char in range */
1928 max = (U8)d[1]; /* last char in range */
1929#ifdef EBCDIC
1930 }
1931#endif
8ada0baa 1932
c2e66d9e 1933 if (min > max) {
01ec43d0 1934 Perl_croak(aTHX_
d1573ac7 1935 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1936 (char)min, (char)max);
c2e66d9e
GS
1937 }
1938
c7f1f016 1939#ifdef EBCDIC
4c3a8340
TS
1940 if (literal_endpoint == 2 &&
1941 ((isLOWER(min) && isLOWER(max)) ||
1942 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1943 if (isLOWER(min)) {
1944 for (i = min; i <= max; i++)
1945 if (isLOWER(i))
db42d148 1946 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1947 } else {
1948 for (i = min; i <= max; i++)
1949 if (isUPPER(i))
db42d148 1950 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1951 }
1952 }
1953 else
1954#endif
1955 for (i = min; i <= max; i++)
e294cc5d
JH
1956#ifdef EBCDIC
1957 if (has_utf8) {
1958 const U8 ch = (U8)NATIVE_TO_UTF(i);
1959 if (UNI_IS_INVARIANT(ch))
1960 *d++ = (U8)i;
1961 else {
1962 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1963 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1964 }
1965 }
1966 else
1967#endif
1968 *d++ = (char)i;
1969
1970#ifdef EBCDIC
1971 if (uvmax) {
1972 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1973 if (uvmax > 0x101)
1974 *d++ = (char)UTF_TO_NATIVE(0xff);
1975 if (uvmax > 0x100)
1976 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1977 }
1978#endif
02aa26ce
NT
1979
1980 /* mark the range as done, and continue */
79072805 1981 dorange = FALSE;
01ec43d0 1982 didrange = TRUE;
4c3a8340
TS
1983#ifdef EBCDIC
1984 literal_endpoint = 0;
1985#endif
79072805 1986 continue;
4e553d73 1987 }
02aa26ce
NT
1988
1989 /* range begins (ignore - as first or last char) */
79072805 1990 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1991 if (didrange) {
1fafa243 1992 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1993 }
e294cc5d
JH
1994 if (has_utf8
1995#ifdef EBCDIC
1996 && !native_range
1997#endif
1998 ) {
25716404 1999 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2000 s++;
2001 continue;
2002 }
79072805
LW
2003 dorange = TRUE;
2004 s++;
01ec43d0
GS
2005 }
2006 else {
2007 didrange = FALSE;
4c3a8340
TS
2008#ifdef EBCDIC
2009 literal_endpoint = 0;
e294cc5d 2010 native_range = TRUE;
4c3a8340 2011#endif
01ec43d0 2012 }
79072805 2013 }
02aa26ce
NT
2014
2015 /* if we get here, we're not doing a transliteration */
2016
0f5d15d6
IZ
2017 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2018 except for the last char, which will be done separately. */
3280af22 2019 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2020 if (s[2] == '#') {
e994fd66 2021 while (s+1 < send && *s != ')')
db42d148 2022 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2023 }
2024 else if (s[2] == '{' /* This should match regcomp.c */
2025 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2026 {
cc6b7395 2027 I32 count = 1;
0f5d15d6 2028 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2029 char c;
2030
d9f97599
GS
2031 while (count && (c = *regparse)) {
2032 if (c == '\\' && regparse[1])
2033 regparse++;
4e553d73 2034 else if (c == '{')
cc6b7395 2035 count++;
4e553d73 2036 else if (c == '}')
cc6b7395 2037 count--;
d9f97599 2038 regparse++;
cc6b7395 2039 }
e994fd66 2040 if (*regparse != ')')
5bdf89e7 2041 regparse--; /* Leave one char for continuation. */
0f5d15d6 2042 while (s < regparse)
db42d148 2043 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2044 }
748a9306 2045 }
02aa26ce
NT
2046
2047 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2048 else if (*s == '#' && PL_lex_inpat &&
2049 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2050 while (s+1 < send && *s != '\n')
db42d148 2051 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2052 }
02aa26ce 2053
5d1d4326 2054 /* check for embedded arrays
da6eedaa 2055 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2056 */
1749ea0d
TS
2057 else if (*s == '@' && s[1]) {
2058 if (isALNUM_lazy_if(s+1,UTF))
2059 break;
2060 if (strchr(":'{$", s[1]))
2061 break;
2062 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2063 break; /* in regexp, neither @+ nor @- are interpolated */
2064 }
02aa26ce
NT
2065
2066 /* check for embedded scalars. only stop if we're sure it's a
2067 variable.
2068 */
79072805 2069 else if (*s == '$') {
3280af22 2070 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2071 break;
6002328a 2072 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2073 break; /* in regexp, $ might be tail anchor */
2074 }
02aa26ce 2075
2b9d42f0
NIS
2076 /* End of else if chain - OP_TRANS rejoin rest */
2077
02aa26ce 2078 /* backslashes */
79072805
LW
2079 if (*s == '\\' && s+1 < send) {
2080 s++;
02aa26ce 2081
02aa26ce 2082 /* deprecate \1 in strings and substitution replacements */
3280af22 2083 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2084 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2085 {
599cee73 2086 if (ckWARN(WARN_SYNTAX))
9014280d 2087 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2088 *--s = '$';
2089 break;
2090 }
02aa26ce
NT
2091
2092 /* string-change backslash escapes */
3280af22 2093 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2094 --s;
2095 break;
2096 }
cc74c5bd
TS
2097 /* skip any other backslash escapes in a pattern */
2098 else if (PL_lex_inpat) {
2099 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2100 goto default_action;
2101 }
02aa26ce
NT
2102
2103 /* if we get here, it's either a quoted -, or a digit */
79072805 2104 switch (*s) {
02aa26ce
NT
2105
2106 /* quoted - in transliterations */
79072805 2107 case '-':
3280af22 2108 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2109 *d++ = *s++;
2110 continue;
2111 }
2112 /* FALL THROUGH */
2113 default:
11b8faa4 2114 {
86f97054 2115 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2116 ckWARN(WARN_MISC))
9014280d 2117 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2118 "Unrecognized escape \\%c passed through",
2119 *s);
11b8faa4 2120 /* default action is to copy the quoted character */
f9a63242 2121 goto default_action;
11b8faa4 2122 }
02aa26ce
NT
2123
2124 /* \132 indicates an octal constant */
79072805
LW
2125 case '0': case '1': case '2': case '3':
2126 case '4': case '5': case '6': case '7':
ba210ebe 2127 {
53305cf1
NC
2128 I32 flags = 0;
2129 STRLEN len = 3;
2130 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2131 s += len;
2132 }
012bcf8d 2133 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2134
2135 /* \x24 indicates a hex constant */
79072805 2136 case 'x':
a0ed51b3
LW
2137 ++s;
2138 if (*s == '{') {
9d4ba2ae 2139 char* const e = strchr(s, '}');
a4c04bdc
NC
2140 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2141 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2142 STRLEN len;
355860ce 2143
53305cf1 2144 ++s;
adaeee49 2145 if (!e) {
a0ed51b3 2146 yyerror("Missing right brace on \\x{}");
355860ce 2147 continue;
ba210ebe 2148 }
53305cf1
NC
2149 len = e - s;
2150 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2151 s = e + 1;
a0ed51b3
LW
2152 }
2153 else {
ba210ebe 2154 {
53305cf1 2155 STRLEN len = 2;
a4c04bdc 2156 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2157 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2158 s += len;
2159 }
012bcf8d
GS
2160 }
2161
2162 NUM_ESCAPE_INSERT:
2163 /* Insert oct or hex escaped character.
301d3d20 2164 * There will always enough room in sv since such
db42d148 2165 * escapes will be longer than any UTF-8 sequence
301d3d20 2166 * they can end up as. */
ba7cea30 2167
c7f1f016
NIS
2168 /* We need to map to chars to ASCII before doing the tests
2169 to cover EBCDIC
2170 */
c4d5f83a 2171 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2172 if (!has_utf8 && uv > 255) {
301d3d20
JH
2173 /* Might need to recode whatever we have
2174 * accumulated so far if it contains any
2175 * hibit chars.
2176 *
2177 * (Can't we keep track of that and avoid
2178 * this rescan? --jhi)
012bcf8d 2179 */
c7f1f016 2180 int hicount = 0;
63cd0674
NIS
2181 U8 *c;
2182 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2183 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2184 hicount++;
db42d148 2185 }
012bcf8d 2186 }
63cd0674 2187 if (hicount) {
9d4ba2ae 2188 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2189 U8 *src, *dst;
2190 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2191 src = (U8 *)d - 1;
2192 dst = src+hicount;
2193 d += hicount;
cfd0369c 2194 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2195 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2196 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2197 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2198 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2199 }
2200 else {
63cd0674 2201 *dst-- = *src;
012bcf8d 2202 }
c7f1f016 2203 src--;
012bcf8d
GS
2204 }
2205 }
2206 }
2207
9aa983d2 2208 if (has_utf8 || uv > 255) {
9041c2e3 2209 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2210 has_utf8 = TRUE;
f9a63242
JH
2211 if (PL_lex_inwhat == OP_TRANS &&
2212 PL_sublex_info.sub_op) {
2213 PL_sublex_info.sub_op->op_private |=
2214 (PL_lex_repl ? OPpTRANS_FROM_UTF
2215 : OPpTRANS_TO_UTF);
f9a63242 2216 }
e294cc5d
JH
2217#ifdef EBCDIC
2218 if (uv > 255 && !dorange)
2219 native_range = FALSE;
2220#endif
012bcf8d 2221 }
a0ed51b3 2222 else {
012bcf8d 2223 *d++ = (char)uv;
a0ed51b3 2224 }
012bcf8d
GS
2225 }
2226 else {
c4d5f83a 2227 *d++ = (char) uv;
a0ed51b3 2228 }
79072805 2229 continue;
02aa26ce 2230
b239daa5 2231 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2232 case 'N':
55eda711 2233 ++s;
423cee85
JH
2234 if (*s == '{') {
2235 char* e = strchr(s, '}');
155aba94 2236 SV *res;
423cee85 2237 STRLEN len;
cfd0369c 2238 const char *str;
fc8cd66c 2239 SV *type;
4e553d73 2240
423cee85 2241 if (!e) {
5777a3f7 2242 yyerror("Missing right brace on \\N{}");
423cee85
JH
2243 e = s - 1;
2244 goto cont_scan;
2245 }
dbc0d4f2
JH
2246 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2247 /* \N{U+...} */
2248 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2249 PERL_SCAN_DISALLOW_PREFIX;
2250 s += 3;
2251 len = e - s;
2252 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2253 if ( e > s && len != (STRLEN)(e - s) ) {
2254 uv = 0xFFFD;
fc8cd66c 2255 }
dbc0d4f2
JH
2256 s = e + 1;
2257 goto NUM_ESCAPE_INSERT;
2258 }
55eda711 2259 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2260 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2261 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2262 res, NULL, SvPVX(type) );
2263 SvREFCNT_dec(type);
f9a63242
JH
2264 if (has_utf8)
2265 sv_utf8_upgrade(res);
cfd0369c 2266 str = SvPV_const(res,len);
1c47067b
JH
2267#ifdef EBCDIC_NEVER_MIND
2268 /* charnames uses pack U and that has been
2269 * recently changed to do the below uni->native
2270 * mapping, so this would be redundant (and wrong,
2271 * the code point would be doubly converted).
2272 * But leave this in just in case the pack U change
2273 * gets revoked, but the semantics is still
2274 * desireable for charnames. --jhi */
cddc7ef4 2275 {
cfd0369c 2276 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2277
2278 if (uv < 0x100) {
89ebb4a3 2279 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2280
2281 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2282 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2283 str = SvPV_const(res, len);
cddc7ef4
JH
2284 }
2285 }
2286#endif
89491803 2287 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2288 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2289 SvCUR_set(sv, d - ostart);
2290 SvPOK_on(sv);
e4f3eed8 2291 *d = '\0';
f08d6ad9 2292 sv_utf8_upgrade(sv);
d2f449dd 2293 /* this just broke our allocation above... */
eb160463 2294 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2295 d = SvPVX(sv) + SvCUR(sv);
89491803 2296 has_utf8 = TRUE;
f08d6ad9 2297 }
eb160463 2298 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2299 const char * const odest = SvPVX_const(sv);
423cee85 2300
8973db79 2301 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2302 d = SvPVX(sv) + (d - odest);
2303 }
e294cc5d
JH
2304#ifdef EBCDIC
2305 if (!dorange)
2306 native_range = FALSE; /* \N{} is guessed to be Unicode */
2307#endif
423cee85
JH
2308 Copy(str, d, len, char);
2309 d += len;
2310 SvREFCNT_dec(res);
2311 cont_scan:
2312 s = e + 1;
2313 }
2314 else
5777a3f7 2315 yyerror("Missing braces on \\N{}");
423cee85
JH
2316 continue;
2317
02aa26ce 2318 /* \c is a control character */
79072805
LW
2319 case 'c':
2320 s++;
961ce445 2321 if (s < send) {
ba210ebe 2322 U8 c = *s++;
c7f1f016
NIS
2323#ifdef EBCDIC
2324 if (isLOWER(c))
2325 c = toUPPER(c);
2326#endif
db42d148 2327 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2328 }
961ce445
RGS
2329 else {
2330 yyerror("Missing control char name in \\c");
2331 }
79072805 2332 continue;
02aa26ce
NT
2333
2334 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2335 case 'b':
db42d148 2336 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2337 break;
2338 case 'n':
db42d148 2339 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2340 break;
2341 case 'r':
db42d148 2342 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2343 break;
2344 case 'f':
db42d148 2345 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2346 break;
2347 case 't':
db42d148 2348 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2349 break;
34a3fe2a 2350 case 'e':
db42d148 2351 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2352 break;
2353 case 'a':
db42d148 2354 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2355 break;
02aa26ce
NT
2356 } /* end switch */
2357
79072805
LW
2358 s++;
2359 continue;
02aa26ce 2360 } /* end if (backslash) */
4c3a8340
TS
2361#ifdef EBCDIC
2362 else
2363 literal_endpoint++;
2364#endif
02aa26ce 2365
f9a63242 2366 default_action:
2b9d42f0
NIS
2367 /* If we started with encoded form, or already know we want it
2368 and then encode the next character */
2369 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2370 STRLEN len = 1;
5f66b61c
AL
2371 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2372 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2373 s += len;
2374 if (need > len) {
2375 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2376 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2377 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2378 }
5f66b61c 2379 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2380 has_utf8 = TRUE;
e294cc5d
JH
2381#ifdef EBCDIC
2382 if (uv > 255 && !dorange)
2383 native_range = FALSE;
2384#endif
2b9d42f0
NIS
2385 }
2386 else {
2387 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2388 }
02aa26ce
NT
2389 } /* while loop to process each character */
2390
2391 /* terminate the string and set up the sv */
79072805 2392 *d = '\0';
95a20fc0 2393 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2394 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2395 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2396
79072805 2397 SvPOK_on(sv);
9f4817db 2398 if (PL_encoding && !has_utf8) {
d0063567
DK
2399 sv_recode_to_utf8(sv, PL_encoding);
2400 if (SvUTF8(sv))
2401 has_utf8 = TRUE;
9f4817db 2402 }
2b9d42f0 2403 if (has_utf8) {
7e2040f0 2404 SvUTF8_on(sv);
2b9d42f0 2405 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2406 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2407 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2408 }
2409 }
79072805 2410
02aa26ce 2411 /* shrink the sv if we allocated more than we used */
79072805 2412 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2413 SvPV_shrink_to_cur(sv);
79072805 2414 }
02aa26ce 2415
9b599b2a 2416 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2417 if (s > PL_bufptr) {
2418 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2419 sv = new_constant(start, s - start,
2420 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2421 sv, NULL,
10edeb5d
JH
2422 (const char *)
2423 (( PL_lex_inwhat == OP_TRANS
2424 ? "tr"
2425 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2426 ? "s"
2427 : "qq"))));
79072805 2428 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2429 } else
8990e307 2430 SvREFCNT_dec(sv);
79072805
LW
2431 return s;
2432}
2433
ffb4593c
NT
2434/* S_intuit_more
2435 * Returns TRUE if there's more to the expression (e.g., a subscript),
2436 * FALSE otherwise.
ffb4593c
NT
2437 *
2438 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2439 *
2440 * ->[ and ->{ return TRUE
2441 * { and [ outside a pattern are always subscripts, so return TRUE
2442 * if we're outside a pattern and it's not { or [, then return FALSE
2443 * if we're in a pattern and the first char is a {
2444 * {4,5} (any digits around the comma) returns FALSE
2445 * if we're in a pattern and the first char is a [
2446 * [] returns FALSE
2447 * [SOMETHING] has a funky algorithm to decide whether it's a
2448 * character class or not. It has to deal with things like
2449 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2450 * anything else returns TRUE
2451 */
2452
9cbb5ea2
GS
2453/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2454
76e3520e 2455STATIC int
cea2e8a9 2456S_intuit_more(pTHX_ register char *s)
79072805 2457{
97aff369 2458 dVAR;
3280af22 2459 if (PL_lex_brackets)
79072805
LW
2460 return TRUE;
2461 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2462 return TRUE;
2463 if (*s != '{' && *s != '[')
2464 return FALSE;
3280af22 2465 if (!PL_lex_inpat)
79072805
LW
2466 return TRUE;
2467
2468 /* In a pattern, so maybe we have {n,m}. */
2469 if (*s == '{') {
2470 s++;
2471 if (!isDIGIT(*s))
2472 return TRUE;
2473 while (isDIGIT(*s))
2474 s++;
2475 if (*s == ',')
2476 s++;
2477 while (isDIGIT(*s))
2478 s++;
2479 if (*s == '}')
2480 return FALSE;
2481 return TRUE;
2482
2483 }
2484
2485 /* On the other hand, maybe we have a character class */
2486
2487 s++;
2488 if (*s == ']' || *s == '^')
2489 return FALSE;
2490 else {
ffb4593c 2491 /* this is terrifying, and it works */
79072805
LW
2492 int weight = 2; /* let's weigh the evidence */
2493 char seen[256];
f27ffc4a 2494 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2495 const char * const send = strchr(s,']');
3280af22 2496 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2497
2498 if (!send) /* has to be an expression */
2499 return TRUE;
2500
2501 Zero(seen,256,char);
2502 if (*s == '$')
2503 weight -= 3;
2504 else if (isDIGIT(*s)) {
2505 if (s[1] != ']') {
2506 if (isDIGIT(s[1]) && s[2] == ']')
2507 weight -= 10;
2508 }
2509 else
2510 weight -= 100;
2511 }
2512 for (; s < send; s++) {
2513 last_un_char = un_char;
2514 un_char = (unsigned char)*s;
2515 switch (*s) {
2516 case '@':
2517 case '&':
2518 case '$':
2519 weight -= seen[un_char] * 10;
7e2040f0 2520 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2521 int len;
8903cb82 2522 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2523 len = (int)strlen(tmpbuf);
2524 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2525 weight -= 100;
2526 else
2527 weight -= 10;
2528 }
2529 else if (*s == '$' && s[1] &&
93a17b20
LW
2530 strchr("[#!%*<>()-=",s[1])) {
2531 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2532 weight -= 10;
2533 else
2534 weight -= 1;
2535 }
2536 break;
2537 case '\\':
2538 un_char = 254;
2539 if (s[1]) {
93a17b20 2540 if (strchr("wds]",s[1]))
79072805 2541 weight += 100;
10edeb5d 2542 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2543 weight += 1;
93a17b20 2544 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2545 weight += 40;
2546 else if (isDIGIT(s[1])) {
2547 weight += 40;
2548 while (s[1] && isDIGIT(s[1]))
2549 s++;
2550 }
2551 }
2552 else
2553 weight += 100;
2554 break;
2555 case '-':
2556 if (s[1] == '\\')
2557 weight += 50;
93a17b20 2558 if (strchr("aA01! ",last_un_char))
79072805 2559 weight += 30;
93a17b20 2560 if (strchr("zZ79~",s[1]))
79072805 2561 weight += 30;
f27ffc4a
GS
2562 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2563 weight -= 5; /* cope with negative subscript */
79072805
LW
2564 break;
2565 default:
3792a11b
NC
2566 if (!isALNUM(last_un_char)
2567 && !(last_un_char == '$' || last_un_char == '@'
2568 || last_un_char == '&')
2569 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2570 char *d = tmpbuf;
2571 while (isALPHA(*s))
2572 *d++ = *s++;
2573 *d = '\0';
5458a98a 2574 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2575 weight -= 150;
2576 }
2577 if (un_char == last_un_char + 1)
2578 weight += 5;
2579 weight -= seen[un_char];
2580 break;
2581 }
2582 seen[un_char]++;
2583 }
2584 if (weight >= 0) /* probably a character class */
2585 return FALSE;
2586 }
2587
2588 return TRUE;
2589}
ffed7fef 2590
ffb4593c
NT
2591/*
2592 * S_intuit_method
2593 *
2594 * Does all the checking to disambiguate
2595 * foo bar
2596 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2597 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2598 *
2599 * First argument is the stuff after the first token, e.g. "bar".
2600 *
2601 * Not a method if bar is a filehandle.
2602 * Not a method if foo is a subroutine prototyped to take a filehandle.
2603 * Not a method if it's really "Foo $bar"
2604 * Method if it's "foo $bar"
2605 * Not a method if it's really "print foo $bar"
2606 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2607 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2608 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2609 * =>
2610 */
2611
76e3520e 2612STATIC int
62d55b22 2613S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2614{
97aff369 2615 dVAR;
a0d0e21e 2616 char *s = start + (*start == '$');
3280af22 2617 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2618 STRLEN len;
2619 GV* indirgv;
5db06880
NC
2620#ifdef PERL_MAD
2621 int soff;
2622#endif
a0d0e21e
LW
2623
2624 if (gv) {
62d55b22 2625 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2626 return 0;
62d55b22
NC
2627 if (cv) {
2628 if (SvPOK(cv)) {
2629 const char *proto = SvPVX_const(cv);
2630 if (proto) {
2631 if (*proto == ';')
2632 proto++;
2633 if (*proto == '*')
2634 return 0;
2635 }
b6c543e3
IZ
2636 }
2637 } else
c35e046a 2638 gv = NULL;
a0d0e21e 2639 }
8903cb82 2640 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2641 /* start is the beginning of the possible filehandle/object,
2642 * and s is the end of it
2643 * tmpbuf is a copy of it
2644 */
2645
a0d0e21e 2646 if (*start == '$') {
3280af22 2647 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2648 return 0;
5db06880
NC
2649#ifdef PERL_MAD
2650 len = start - SvPVX(PL_linestr);
2651#endif
29595ff2 2652 s = PEEKSPACE(s);
f0092767 2653#ifdef PERL_MAD
5db06880
NC
2654 start = SvPVX(PL_linestr) + len;
2655#endif
3280af22
NIS
2656 PL_bufptr = start;
2657 PL_expect = XREF;
a0d0e21e
LW
2658 return *s == '(' ? FUNCMETH : METHOD;
2659 }
5458a98a 2660 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2661 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2662 len -= 2;
2663 tmpbuf[len] = '\0';
5db06880
NC
2664#ifdef PERL_MAD
2665 soff = s - SvPVX(PL_linestr);
2666#endif
c3e0f903
GS
2667 goto bare_package;
2668 }
90e5519e 2669 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2670 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2671 return 0;
2672 /* filehandle or package name makes it a method */
89bfa8cd 2673 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2674#ifdef PERL_MAD
2675 soff = s - SvPVX(PL_linestr);
2676#endif
29595ff2 2677 s = PEEKSPACE(s);
3280af22 2678 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2679 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2680 bare_package:
cd81e915 2681 start_force(PL_curforce);
9ded7720 2682 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2683 newSVpvn(tmpbuf,len));
9ded7720 2684 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2685 if (PL_madskills)
2686 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2687 PL_expect = XTERM;
a0d0e21e 2688 force_next(WORD);
3280af22 2689 PL_bufptr = s;
5db06880
NC
2690#ifdef PERL_MAD
2691 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2692#endif
a0d0e21e
LW
2693 return *s == '(' ? FUNCMETH : METHOD;
2694 }
2695 }
2696 return 0;
2697}
2698
ffb4593c
NT
2699/*
2700 * S_incl_perldb
2701 * Return a string of Perl code to load the debugger. If PERL5DB
2702 * is set, it will return the contents of that, otherwise a
2703 * compile-time require of perl5db.pl.
2704 */
2705
bfed75c6 2706STATIC const char*
cea2e8a9 2707S_incl_perldb(pTHX)
a0d0e21e 2708{
97aff369 2709 dVAR;
3280af22 2710 if (PL_perldb) {
9d4ba2ae 2711 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2712
2713 if (pdb)
2714 return pdb;
93189314 2715 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2716 return "BEGIN { require 'perl5db.pl' }";
2717 }
2718 return "";
2719}
2720
2721
16d20bd9 2722/* Encoded script support. filter_add() effectively inserts a
4e553d73 2723 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2724 * Note that the filter function only applies to the current source file
2725 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2726 *
2727 * The datasv parameter (which may be NULL) can be used to pass
2728 * private data to this instance of the filter. The filter function
2729 * can recover the SV using the FILTER_DATA macro and use it to
2730 * store private buffers and state information.
2731 *
2732 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2733 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2734 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2735 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2736 * private use must be set using malloc'd pointers.
2737 */
16d20bd9
AD
2738
2739SV *
864dbfa3 2740Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2741{
97aff369 2742 dVAR;
f4c556ac 2743 if (!funcp)
a0714e2c 2744 return NULL;
f4c556ac 2745
3280af22
NIS
2746 if (!PL_rsfp_filters)
2747 PL_rsfp_filters = newAV();
16d20bd9 2748 if (!datasv)
561b68a9 2749 datasv = newSV(0);
862a34c6 2750 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2751 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2752 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2753 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2754 FPTR2DPTR(void *, IoANY(datasv)),
2755 SvPV_nolen(datasv)));
3280af22
NIS
2756 av_unshift(PL_rsfp_filters, 1);
2757 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2758 return(datasv);
2759}
4e553d73 2760
16d20bd9
AD
2761
2762/* Delete most recently added instance of this filter function. */
a0d0e21e 2763void
864dbfa3 2764Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2765{
97aff369 2766 dVAR;
e0c19803 2767 SV *datasv;
24801a4b 2768
33073adb 2769#ifdef DEBUGGING
55662e27
JH
2770 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2771 FPTR2DPTR(void*, funcp)));
33073adb 2772#endif
3280af22 2773 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2774 return;
2775 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2776 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2777 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2778 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2779 IoANY(datasv) = (void *)NULL;
3280af22 2780 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2781
16d20bd9
AD
2782 return;
2783 }
2784 /* we need to search for the correct entry and clear it */
cea2e8a9 2785 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2786}
2787
2788
1de9afcd
RGS
2789/* Invoke the idxth filter function for the current rsfp. */
2790/* maxlen 0 = read one text line */
16d20bd9 2791I32
864dbfa3 2792Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2793{
97aff369 2794 dVAR;
16d20bd9
AD
2795 filter_t funcp;
2796 SV *datasv = NULL;
f482118e
NC
2797 /* This API is bad. It should have been using unsigned int for maxlen.
2798 Not sure if we want to change the API, but if not we should sanity
2799 check the value here. */
39cd7a59
NC
2800 const unsigned int correct_length
2801 = maxlen < 0 ?
2802#ifdef PERL_MICRO
2803 0x7FFFFFFF
2804#else
2805 INT_MAX
2806#endif
2807 : maxlen;
e50aee73 2808
3280af22 2809 if (!PL_rsfp_filters)
16d20bd9 2810 return -1;
1de9afcd 2811 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2812 /* Provide a default input filter to make life easy. */
2813 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2814 DEBUG_P(PerlIO_printf(Perl_debug_log,
2815 "filter_read %d: from rsfp\n", idx));
f482118e 2816 if (correct_length) {
16d20bd9
AD
2817 /* Want a block */
2818 int len ;
f54cb97a 2819 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2820
2821 /* ensure buf_sv is large enough */
f482118e
NC
2822 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2823 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2824 correct_length)) <= 0) {
3280af22 2825 if (PerlIO_error(PL_rsfp))
37120919
AD
2826 return -1; /* error */
2827 else
2828 return 0 ; /* end of file */
2829 }
16d20bd9
AD
2830 SvCUR_set(buf_sv, old_len + len) ;
2831 } else {
2832 /* Want a line */
3280af22
NIS
2833 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2834 if (PerlIO_error(PL_rsfp))
37120919
AD
2835 return -1; /* error */
2836 else
2837 return 0 ; /* end of file */
2838 }
16d20bd9
AD
2839 }
2840 return SvCUR(buf_sv);
2841 }
2842 /* Skip this filter slot if filter has been deleted */
1de9afcd 2843 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2844 DEBUG_P(PerlIO_printf(Perl_debug_log,
2845 "filter_read %d: skipped (filter deleted)\n",
2846 idx));
f482118e 2847 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2848 }
2849 /* Get function pointer hidden within datasv */
8141890a 2850 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2851 DEBUG_P(PerlIO_printf(Perl_debug_log,
2852 "filter_read %d: via function %p (%s)\n",
ca0270c4 2853 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2854 /* Call function. The function is expected to */
2855 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2856 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2857 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2858}
2859
76e3520e 2860STATIC char *
cea2e8a9 2861S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2862{
97aff369 2863 dVAR;
c39cd008 2864#ifdef PERL_CR_FILTER
3280af22 2865 if (!PL_rsfp_filters) {
c39cd008 2866 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2867 }
2868#endif
3280af22 2869 if (PL_rsfp_filters) {
55497cff 2870 if (!append)
2871 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2872 if (FILTER_READ(0, sv, 0) > 0)
2873 return ( SvPVX(sv) ) ;
2874 else
bd61b366 2875 return NULL ;
16d20bd9 2876 }
9d116dd7 2877 else
fd049845 2878 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2879}
2880
01ec43d0 2881STATIC HV *
7fc63493 2882S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2883{
97aff369 2884 dVAR;
def3634b
GS
2885 GV *gv;
2886
01ec43d0 2887 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2888 return PL_curstash;
2889
2890 if (len > 2 &&
2891 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2892 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2893 {
2894 return GvHV(gv); /* Foo:: */
def3634b
GS
2895 }
2896
2897 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2898 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2899 if (gv && GvCV(gv)) {
2900 SV * const sv = cv_const_sv(GvCV(gv));
2901 if (sv)
83003860 2902 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2903 }
2904
2905 return gv_stashpv(pkgname, FALSE);
2906}
a0d0e21e 2907
e3f73d4e
RGS
2908/*
2909 * S_readpipe_override
2910 * Check whether readpipe() is overriden, and generates the appropriate
2911 * optree, provided sublex_start() is called afterwards.
2912 */
2913STATIC void
1d51329b 2914S_readpipe_override(pTHX)
e3f73d4e
RGS
2915{
2916 GV **gvp;
2917 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2918 yylval.ival = OP_BACKTICK;
2919 if ((gv_readpipe
2920 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2921 ||
2922 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2923 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2924 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2925 {
2926 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2927 append_elem(OP_LIST,
2928 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2929 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2930 }
2931 else {
2932 set_csh();
2933 }
2934}
2935
5db06880
NC
2936#ifdef PERL_MAD
2937 /*
2938 * Perl_madlex
2939 * The intent of this yylex wrapper is to minimize the changes to the
2940 * tokener when we aren't interested in collecting madprops. It remains
2941 * to be seen how successful this strategy will be...
2942 */
2943
2944int
2945Perl_madlex(pTHX)
2946{
2947 int optype;
2948 char *s = PL_bufptr;
2949
cd81e915
NC
2950 /* make sure PL_thiswhite is initialized */
2951 PL_thiswhite = 0;
2952 PL_thismad = 0;
5db06880 2953
cd81e915 2954 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2955 if (PL_pending_ident)
2956 return S_pending_ident(aTHX);
2957
2958 /* previous token ate up our whitespace? */
cd81e915
NC
2959 if (!PL_lasttoke && PL_nextwhite) {
2960 PL_thiswhite = PL_nextwhite;
2961 PL_nextwhite = 0;
5db06880
NC
2962 }
2963
2964 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2965 PL_realtokenstart = -1;
2966 PL_thistoken = 0;
5db06880
NC
2967 optype = yylex();
2968 s = PL_bufptr;
cd81e915 2969 assert(PL_curforce < 0);
5db06880 2970
cd81e915
NC
2971 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2972 if (!PL_thistoken) {
2973 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2974 PL_thistoken = newSVpvs("");
5db06880 2975 else {
c35e046a 2976 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2977 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2978 }
2979 }
cd81e915
NC
2980 if (PL_thismad) /* install head */
2981 CURMAD('X', PL_thistoken);
5db06880
NC
2982 }
2983
2984 /* last whitespace of a sublex? */
cd81e915
NC
2985 if (optype == ')' && PL_endwhite) {
2986 CURMAD('X', PL_endwhite);
5db06880
NC
2987 }
2988
cd81e915 2989 if (!PL_thismad) {
5db06880
NC
2990
2991 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2992 if (!PL_thiswhite && !PL_endwhite && !optype) {
2993 sv_free(PL_thistoken);
2994 PL_thistoken = 0;
5db06880
NC
2995 return 0;
2996 }
2997
2998 /* put off final whitespace till peg */
2999 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3000 PL_nextwhite = PL_thiswhite;
3001 PL_thiswhite = 0;
5db06880 3002 }
cd81e915
NC
3003 else if (PL_thisopen) {
3004 CURMAD('q', PL_thisopen);
3005 if (PL_thistoken)
3006 sv_free(PL_thistoken);
3007 PL_thistoken = 0;
5db06880
NC
3008 }
3009 else {
3010 /* Store actual token text as madprop X */
cd81e915 3011 CURMAD('X', PL_thistoken);
5db06880
NC
3012 }
3013
cd81e915 3014 if (PL_thiswhite) {
5db06880 3015 /* add preceding whitespace as madprop _ */
cd81e915 3016 CURMAD('_', PL_thiswhite);
5db06880
NC
3017 }
3018
cd81e915 3019 if (PL_thisstuff) {
5db06880 3020 /* add quoted material as madprop = */
cd81e915 3021 CURMAD('=', PL_thisstuff);
5db06880
NC
3022 }
3023
cd81e915 3024 if (PL_thisclose) {
5db06880 3025 /* add terminating quote as madprop Q */
cd81e915 3026 CURMAD('Q', PL_thisclose);
5db06880
NC
3027 }
3028 }
3029
3030 /* special processing based on optype */
3031
3032 switch (optype) {
3033
3034 /* opval doesn't need a TOKEN since it can already store mp */
3035 case WORD:
3036 case METHOD:
3037 case FUNCMETH:
3038 case THING:
3039 case PMFUNC:
3040 case PRIVATEREF:
3041 case FUNC0SUB:
3042 case UNIOPSUB:
3043 case LSTOPSUB:
3044 if (yylval.opval)
cd81e915
NC
3045 append_madprops(PL_thismad, yylval.opval, 0);
3046 PL_thismad = 0;
5db06880
NC
3047 return optype;
3048
3049 /* fake EOF */
3050 case 0:
3051 optype = PEG;
cd81e915
NC
3052 if (PL_endwhite) {
3053 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3054 PL_endwhite = 0;
5db06880
NC
3055 }
3056 break;
3057
3058 case ']':
3059 case '}':
cd81e915 3060 if (PL_faketokens)
5db06880
NC
3061 break;
3062 /* remember any fake bracket that lexer is about to discard */
3063 if (PL_lex_brackets == 1 &&
3064 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3065 {
3066 s = PL_bufptr;
3067 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3068 s++;
3069 if (*s == '}') {
cd81e915
NC
3070 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3071 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3072 PL_thiswhite = 0;
5db06880
NC
3073 PL_bufptr = s - 1;
3074 break; /* don't bother looking for trailing comment */
3075 }
3076 else
3077 s = PL_bufptr;
3078 }
3079 if (optype == ']')
3080 break;
3081 /* FALLTHROUGH */
3082
3083 /* attach a trailing comment to its statement instead of next token */
3084 case ';':
cd81e915 3085 if (PL_faketokens)
5db06880
NC
3086 break;
3087 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3088 s = PL_bufptr;
3089 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3090 s++;
3091 if (*s == '\n' || *s == '#') {
3092 while (s < PL_bufend && *s != '\n')
3093 s++;
3094 if (s < PL_bufend)
3095 s++;
cd81e915
NC
3096 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3097 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3098 PL_thiswhite = 0;
5db06880
NC
3099 PL_bufptr = s;
3100 }
3101 }
3102 break;
3103
3104 /* pval */
3105 case LABEL:
3106 break;
3107
3108 /* ival */
3109 default:
3110 break;
3111
3112 }
3113
3114 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3115 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3116 PL_thismad = 0;
5db06880
NC
3117 return optype;
3118}
3119#endif
3120
468aa647 3121STATIC char *
cc6ed77d 3122S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3123 dVAR;
468aa647
RGS
3124 if (PL_expect != XSTATE)
3125 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3126 is_use ? "use" : "no"));
29595ff2 3127 s = SKIPSPACE1(s);
468aa647
RGS
3128 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3129 s = force_version(s, TRUE);
29595ff2 3130 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3131 start_force(PL_curforce);
9ded7720 3132 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3133 force_next(WORD);
3134 }
3135 else if (*s == 'v') {
3136 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3137 s = force_version(s, FALSE);
3138 }
3139 }
3140 else {
3141 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3142 s = force_version(s, FALSE);
3143 }
3144 yylval.ival = is_use;
3145 return s;
3146}
748a9306 3147#ifdef DEBUGGING
27da23d5 3148 static const char* const exp_name[] =
09bef843 3149 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3150 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3151 };
748a9306 3152#endif
463ee0b2 3153
02aa26ce
NT
3154/*
3155 yylex
3156
3157 Works out what to call the token just pulled out of the input
3158 stream. The yacc parser takes care of taking the ops we return and
3159 stitching them into a tree.
3160
3161 Returns:
3162 PRIVATEREF
3163
3164 Structure:
3165 if read an identifier
3166 if we're in a my declaration
3167 croak if they tried to say my($foo::bar)
3168 build the ops for a my() declaration
3169 if it's an access to a my() variable
3170 are we in a sort block?
3171 croak if my($a); $a <=> $b
3172 build ops for access to a my() variable
3173 if in a dq string, and they've said @foo and we can't find @foo
3174 croak
3175 build ops for a bareword
3176 if we already built the token before, use it.
3177*/
3178
20141f0e 3179
dba4d153
JH
3180#ifdef __SC__
3181#pragma segment Perl_yylex
3182#endif
dba4d153 3183int
dba4d153 3184Perl_yylex(pTHX)
20141f0e 3185{
97aff369 3186 dVAR;
3afc138a 3187 register char *s = PL_bufptr;
378cc40b 3188 register char *d;
463ee0b2 3189 STRLEN len;
aa7440fb 3190 bool bof = FALSE;
a687059c 3191
10edeb5d
JH
3192 /* orig_keyword, gvp, and gv are initialized here because
3193 * jump to the label just_a_word_zero can bypass their
3194 * initialization later. */
3195 I32 orig_keyword = 0;
3196 GV *gv = NULL;
3197 GV **gvp = NULL;
3198
bbf60fe6 3199 DEBUG_T( {
396482e1 3200 SV* tmp = newSVpvs("");
b6007c36
DM
3201 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3202 (IV)CopLINE(PL_curcop),
3203 lex_state_names[PL_lex_state],
3204 exp_name[PL_expect],
3205 pv_display(tmp, s, strlen(s), 0, 60));
3206 SvREFCNT_dec(tmp);
bbf60fe6 3207 } );
02aa26ce 3208 /* check if there's an identifier for us to look at */
ba979b31 3209 if (PL_pending_ident)
bbf60fe6 3210 return REPORT(S_pending_ident(aTHX));
bbce6d69 3211
02aa26ce
NT
3212 /* no identifier pending identification */
3213
3280af22 3214 switch (PL_lex_state) {
79072805
LW
3215#ifdef COMMENTARY
3216 case LEX_NORMAL: /* Some compilers will produce faster */
3217 case LEX_INTERPNORMAL: /* code if we comment these out. */
3218 break;
3219#endif
3220
09bef843 3221 /* when we've already built the next token, just pull it out of the queue */
79072805 3222 case LEX_KNOWNEXT:
5db06880
NC
3223#ifdef PERL_MAD
3224 PL_lasttoke--;
3225 yylval = PL_nexttoke[PL_lasttoke].next_val;
3226 if (PL_madskills) {
cd81e915 3227 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3228 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3229 if (PL_thismad && PL_thismad->mad_key == '_') {
3230 PL_thiswhite = (SV*)PL_thismad->mad_val;
3231 PL_thismad->mad_val = 0;
3232 mad_free(PL_thismad);
3233 PL_thismad = 0;
5db06880
NC
3234 }
3235 }
3236 if (!PL_lasttoke) {
3237 PL_lex_state = PL_lex_defer;
3238 PL_expect = PL_lex_expect;
3239 PL_lex_defer = LEX_NORMAL;
3240 if (!PL_nexttoke[PL_lasttoke].next_type)
3241 return yylex();
3242 }
3243#else
3280af22 3244 PL_nexttoke--;
5db06880 3245 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3246 if (!PL_nexttoke) {
3247 PL_lex_state = PL_lex_defer;
3248 PL_expect = PL_lex_expect;
3249 PL_lex_defer = LEX_NORMAL;
463ee0b2 3250 }
5db06880
NC
3251#endif
3252#ifdef PERL_MAD
3253 /* FIXME - can these be merged? */
3254 return(PL_nexttoke[PL_lasttoke].next_type);
3255#else
bbf60fe6 3256 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3257#endif
79072805 3258
02aa26ce 3259 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3260 when we get here, PL_bufptr is at the \
02aa26ce 3261 */
79072805
LW
3262 case LEX_INTERPCASEMOD:
3263#ifdef DEBUGGING
3280af22 3264 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3265 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3266#endif
02aa26ce 3267 /* handle \E or end of string */
3280af22 3268 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3269 /* if at a \E */
3280af22 3270 if (PL_lex_casemods) {
f54cb97a 3271 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3272 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3273
3792a11b
NC
3274 if (PL_bufptr != PL_bufend
3275 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3276 PL_bufptr += 2;
3277 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3278#ifdef PERL_MAD
3279 if (PL_madskills)
6b29d1f5 3280 PL_thistoken = newSVpvs("\\E");
5db06880 3281#endif
a0d0e21e 3282 }
bbf60fe6 3283 return REPORT(')');
79072805 3284 }
5db06880
NC
3285#ifdef PERL_MAD
3286 while (PL_bufptr != PL_bufend &&
3287 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3288 if (!PL_thiswhite)
6b29d1f5 3289 PL_thiswhite = newSVpvs("");
cd81e915 3290 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3291 PL_bufptr += 2;
3292 }
3293#else
3280af22
NIS
3294 if (PL_bufptr != PL_bufend)
3295 PL_bufptr += 2;
5db06880 3296#endif
3280af22 3297 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3298 return yylex();
79072805
LW
3299 }
3300 else {
607df283 3301 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3302 "### Saw case modifier\n"); });
3280af22 3303 s = PL_bufptr + 1;
6e909404 3304 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3305#ifdef PERL_MAD
cd81e915 3306 if (!PL_thiswhite)
6b29d1f5 3307 PL_thiswhite = newSVpvs("");
cd81e915 3308 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3309#endif
89122651 3310 PL_bufptr = s + 3;
6e909404
JH
3311 PL_lex_state = LEX_INTERPCONCAT;
3312 return yylex();
a0d0e21e 3313 }
6e909404 3314 else {
90771dc0 3315 I32 tmp;
5db06880
NC
3316 if (!PL_madskills) /* when just compiling don't need correct */
3317 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3318 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3319 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3320 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3321 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3322 return REPORT(')');
6e909404
JH
3323 }
3324 if (PL_lex_casemods > 10)
3325 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3326 PL_lex_casestack[PL_lex_casemods++] = *s;
3327 PL_lex_casestack[PL_lex_casemods] = '\0';
3328 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3329 start_force(PL_curforce);
9ded7720 3330 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3331 force_next('(');
cd81e915 3332 start_force(PL_curforce);
6e909404 3333 if (*s == 'l')
9ded7720 3334 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3335 else if (*s == 'u')
9ded7720 3336 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3337 else if (*s == 'L')
9ded7720 3338 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3339 else if (*s == 'U')
9ded7720 3340 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3341 else if (*s == 'Q')
9ded7720 3342 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3343 else
3344 Perl_croak(aTHX_ "panic: yylex");
5db06880 3345 if (PL_madskills) {
6b29d1f5 3346 SV* const tmpsv = newSVpvs("");
5db06880
NC
3347 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3348 curmad('_', tmpsv);
3349 }
6e909404 3350 PL_bufptr = s + 1;
a0d0e21e 3351 }
79072805 3352 force_next(FUNC);
3280af22
NIS
3353 if (PL_lex_starts) {
3354 s = PL_bufptr;
3355 PL_lex_starts = 0;
5db06880
NC
3356#ifdef PERL_MAD
3357 if (PL_madskills) {
cd81e915
NC
3358 if (PL_thistoken)
3359 sv_free(PL_thistoken);
6b29d1f5 3360 PL_thistoken = newSVpvs("");
5db06880
NC
3361 }
3362#endif
131b3ad0
DM
3363 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3364 if (PL_lex_casemods == 1 && PL_lex_inpat)
3365 OPERATOR(',');
3366 else
3367 Aop(OP_CONCAT);
79072805
LW
3368 }
3369 else
cea2e8a9 3370 return yylex();
79072805
LW
3371 }
3372
55497cff 3373 case LEX_INTERPPUSH:
bbf60fe6 3374 return REPORT(sublex_push());
55497cff 3375
79072805 3376 case LEX_INTERPSTART:
3280af22 3377 if (PL_bufptr == PL_bufend)
bbf60fe6 3378 return REPORT(sublex_done());
607df283 3379 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3380 "### Interpolated variable\n"); });
3280af22
NIS
3381 PL_expect = XTERM;
3382 PL_lex_dojoin = (*PL_bufptr == '@');
3383 PL_lex_state = LEX_INTERPNORMAL;
3384 if (PL_lex_dojoin) {
cd81e915 3385 start_force(PL_curforce);
9ded7720 3386 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3387 force_next(',');
cd81e915 3388 start_force(PL_curforce);
a0d0e21e 3389 force_ident("\"", '$');
cd81e915 3390 start_force(PL_curforce);
9ded7720 3391 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3392 force_next('$');
cd81e915 3393 start_force(PL_curforce);
9ded7720 3394 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3395 force_next('(');
cd81e915 3396 start_force(PL_curforce);
9ded7720 3397 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3398 force_next(FUNC);
3399 }
3280af22
NIS
3400 if (PL_lex_starts++) {
3401 s = PL_bufptr;
5db06880
NC
3402#ifdef PERL_MAD
3403 if (PL_madskills) {
cd81e915
NC
3404 if (PL_thistoken)
3405 sv_free(PL_thistoken);
6b29d1f5 3406 PL_thistoken = newSVpvs("");
5db06880
NC
3407 }
3408#endif
131b3ad0
DM
3409 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3410 if (!PL_lex_casemods && PL_lex_inpat)
3411 OPERATOR(',');
3412 else
3413 Aop(OP_CONCAT);
79072805 3414 }
cea2e8a9 3415 return yylex();
79072805
LW
3416
3417 case LEX_INTERPENDMAYBE:
3280af22
NIS
3418 if (intuit_more(PL_bufptr)) {
3419 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3420 break;
3421 }
3422 /* FALL THROUGH */
3423
3424 case LEX_INTERPEND:
3280af22
NIS
3425 if (PL_lex_dojoin) {
3426 PL_lex_dojoin = FALSE;
3427 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3428#ifdef PERL_MAD
3429 if (PL_madskills) {
cd81e915
NC
3430 if (PL_thistoken)
3431 sv_free(PL_thistoken);
6b29d1f5 3432 PL_thistoken = newSVpvs("");
5db06880
NC
3433 }
3434#endif
bbf60fe6 3435 return REPORT(')');
79072805 3436 }
43a16006 3437 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3438 && SvEVALED(PL_lex_repl))
43a16006 3439 {
e9fa98b2 3440 if (PL_bufptr != PL_bufend)
cea2e8a9 3441 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3442 PL_lex_repl = NULL;
e9fa98b2 3443 }
79072805
LW
3444 /* FALLTHROUGH */
3445 case LEX_INTERPCONCAT:
3446#ifdef DEBUGGING
3280af22 3447 if (PL_lex_brackets)
cea2e8a9 3448 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3449#endif
3280af22 3450 if (PL_bufptr == PL_bufend)
bbf60fe6 3451 return REPORT(sublex_done());
79072805 3452
3280af22
NIS
3453 if (SvIVX(PL_linestr) == '\'') {
3454 SV *sv = newSVsv(PL_linestr);
3455 if (!PL_lex_inpat)
76e3520e 3456 sv = tokeq(sv);
3280af22 3457 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3458 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3459 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3460 s = PL_bufend;
79072805
LW
3461 }
3462 else {
3280af22 3463 s = scan_const(PL_bufptr);
79072805 3464 if (*s == '\\')
3280af22 3465 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3466 else
3280af22 3467 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3468 }
3469
3280af22 3470 if (s != PL_bufptr) {
cd81e915 3471 start_force(PL_curforce);
5db06880
NC
3472 if (PL_madskills) {
3473 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3474 }
9ded7720 3475 NEXTVAL_NEXTTOKE = yylval;
3280af22 3476 PL_expect = XTERM;
79072805 3477 force_next(THING);
131b3ad0 3478 if (PL_lex_starts++) {
5db06880
NC
3479#ifdef PERL_MAD
3480 if (PL_madskills) {
cd81e915
NC
3481 if (PL_thistoken)
3482 sv_free(PL_thistoken);
6b29d1f5 3483 PL_thistoken = newSVpvs("");
5db06880
NC
3484 }
3485#endif
131b3ad0
DM
3486 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3487 if (!PL_lex_casemods && PL_lex_inpat)
3488 OPERATOR(',');
3489 else
3490 Aop(OP_CONCAT);
3491 }
79072805 3492 else {
3280af22 3493 PL_bufptr = s;
cea2e8a9 3494 return yylex();
79072805
LW
3495 }
3496 }
3497
cea2e8a9 3498 return yylex();
a0d0e21e 3499 case LEX_FORMLINE:
3280af22
NIS
3500 PL_lex_state = LEX_NORMAL;
3501 s = scan_formline(PL_bufptr);
3502 if (!PL_lex_formbrack)
a0d0e21e
LW
3503 goto rightbracket;
3504 OPERATOR(';');
79072805
LW
3505 }
3506
3280af22
NIS
3507 s = PL_bufptr;
3508 PL_oldoldbufptr = PL_oldbufptr;
3509 PL_oldbufptr = s;
463ee0b2
LW
3510
3511 retry:
5db06880 3512#ifdef PERL_MAD
cd81e915
NC
3513 if (PL_thistoken) {
3514 sv_free(PL_thistoken);
3515 PL_thistoken = 0;
5db06880 3516 }
cd81e915 3517 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3518#endif
378cc40b
LW
3519 switch (*s) {
3520 default:
7e2040f0 3521 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3522 goto keylookup;
cea2e8a9 3523 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3524 case 4:
3525 case 26:
3526 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3527 case 0:
5db06880
NC
3528#ifdef PERL_MAD
3529 if (PL_madskills)
cd81e915 3530 PL_faketokens = 0;
5db06880 3531#endif
3280af22
NIS
3532 if (!PL_rsfp) {
3533 PL_last_uni = 0;
3534 PL_last_lop = 0;
c5ee2135 3535 if (PL_lex_brackets) {
10edeb5d
JH
3536 yyerror((const char *)
3537 (PL_lex_formbrack
3538 ? "Format not terminated"
3539 : "Missing right curly or square bracket"));
c5ee2135 3540 }
4e553d73 3541 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3542 "### Tokener got EOF\n");
5f80b19c 3543 } );
79072805 3544 TOKEN(0);
463ee0b2 3545 }
3280af22 3546 if (s++ < PL_bufend)
a687059c 3547 goto retry; /* ignore stray nulls */
3280af22
NIS
3548 PL_last_uni = 0;
3549 PL_last_lop = 0;
3550 if (!PL_in_eval && !PL_preambled) {
3551 PL_preambled = TRUE;
5db06880
NC
3552#ifdef PERL_MAD
3553 if (PL_madskills)
cd81e915 3554 PL_faketokens = 1;
5db06880 3555#endif
3280af22
NIS
3556 sv_setpv(PL_linestr,incl_perldb());
3557 if (SvCUR(PL_linestr))
396482e1 3558 sv_catpvs(PL_linestr,";");
3280af22
NIS
3559 if (PL_preambleav){
3560 while(AvFILLp(PL_preambleav) >= 0) {
3561 SV *tmpsv = av_shift(PL_preambleav);
3562 sv_catsv(PL_linestr, tmpsv);<