This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Rationalise the logic in lex_start for creating a new temporary if the
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
0bd48802 28static const char ident_too_long[] = "Identifier too long";
c445ea15 29static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 30
acfe0abc 31static void restore_rsfp(pTHX_ void *f);
6e3aabd6 32#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
33static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 35#endif
51371543 36
29595ff2 37#ifdef PERL_MAD
29595ff2 38# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 39# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 40#else
5db06880 41# define CURMAD(slot,sv)
9ded7720 42# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
43#endif
44
9059aa12
LW
45#define XFAKEBRACK 128
46#define XENUMMASK 127
47
39e02b42
JH
48#ifdef USE_UTF8_SCRIPTS
49# define UTF (!IN_BYTES)
2b9d42f0 50#else
746b446a 51# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 52#endif
a0ed51b3 53
61f0cdd9 54/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
55 * 1999-02-27 mjd-perl-patch@plover.com */
56#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
57
bf4acbe4
GS
58/* On MacOS, respect nonbreaking spaces */
59#ifdef MACOS_TRADITIONAL
60#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
61#else
62#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
63#endif
64
ffb4593c
NT
65/* LEX_* are values for PL_lex_state, the state of the lexer.
66 * They are arranged oddly so that the guard on the switch statement
79072805
LW
67 * can get by with a single comparison (if the compiler is smart enough).
68 */
69
fb73857a 70/* #define LEX_NOTPARSING 11 is done in perl.h. */
71
b6007c36
DM
72#define LEX_NORMAL 10 /* normal code (ie not within "...") */
73#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
74#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
75#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
76#define LEX_INTERPSTART 6 /* expecting the start of a $var */
77
78 /* at end of code, eg "$x" followed by: */
79#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
80#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
81
82#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
83 string or after \E, $foo, etc */
84#define LEX_INTERPCONST 2 /* NOT USED */
85#define LEX_FORMLINE 1 /* expecting a format line */
86#define LEX_KNOWNEXT 0 /* next token known; just return it */
87
79072805 88
bbf60fe6 89#ifdef DEBUGGING
27da23d5 90static const char* const lex_state_names[] = {
bbf60fe6
DM
91 "KNOWNEXT",
92 "FORMLINE",
93 "INTERPCONST",
94 "INTERPCONCAT",
95 "INTERPENDMAYBE",
96 "INTERPEND",
97 "INTERPSTART",
98 "INTERPPUSH",
99 "INTERPCASEMOD",
100 "INTERPNORMAL",
101 "NORMAL"
102};
103#endif
104
79072805
LW
105#ifdef ff_next
106#undef ff_next
d48672a2
LW
107#endif
108
79072805 109#include "keywords.h"
fe14fcc3 110
ffb4593c
NT
111/* CLINE is a macro that ensures PL_copline has a sane value */
112
ae986130
LW
113#ifdef CLINE
114#undef CLINE
115#endif
57843af0 116#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 117
5db06880 118#ifdef PERL_MAD
29595ff2
NC
119# define SKIPSPACE0(s) skipspace0(s)
120# define SKIPSPACE1(s) skipspace1(s)
121# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
122# define PEEKSPACE(s) skipspace2(s,0)
123#else
124# define SKIPSPACE0(s) skipspace(s)
125# define SKIPSPACE1(s) skipspace(s)
126# define SKIPSPACE2(s,tsv) skipspace(s)
127# define PEEKSPACE(s) skipspace(s)
128#endif
129
ffb4593c
NT
130/*
131 * Convenience functions to return different tokens and prime the
9cbb5ea2 132 * lexer for the next token. They all take an argument.
ffb4593c
NT
133 *
134 * TOKEN : generic token (used for '(', DOLSHARP, etc)
135 * OPERATOR : generic operator
136 * AOPERATOR : assignment operator
137 * PREBLOCK : beginning the block after an if, while, foreach, ...
138 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
139 * PREREF : *EXPR where EXPR is not a simple identifier
140 * TERM : expression term
141 * LOOPX : loop exiting command (goto, last, dump, etc)
142 * FTST : file test operator
143 * FUN0 : zero-argument function
2d2e263d 144 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
145 * BOop : bitwise or or xor
146 * BAop : bitwise and
147 * SHop : shift operator
148 * PWop : power operator
9cbb5ea2 149 * PMop : pattern-matching operator
ffb4593c
NT
150 * Aop : addition-level operator
151 * Mop : multiplication-level operator
152 * Eop : equality-testing operator
e5edeb50 153 * Rop : relational operator <= != gt
ffb4593c
NT
154 *
155 * Also see LOP and lop() below.
156 */
157
998054bd 158#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 159# define REPORT(retval) tokereport((I32)retval)
998054bd 160#else
bbf60fe6 161# define REPORT(retval) (retval)
998054bd
SC
162#endif
163
bbf60fe6
DM
164#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
165#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
166#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
167#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
168#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
169#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
170#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
171#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
172#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
173#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
174#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
175#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
176#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
177#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
178#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
179#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
180#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
181#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
182#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
183#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 184
a687059c
LW
185/* This bit of chicanery makes a unary function followed by
186 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
187 * The UNIDOR macro is for unary functions that can be followed by the //
188 * operator (such as C<shift // 0>).
a687059c 189 */
376fcdbf
AL
190#define UNI2(f,x) { \
191 yylval.ival = f; \
192 PL_expect = x; \
193 PL_bufptr = s; \
194 PL_last_uni = PL_oldbufptr; \
195 PL_last_lop_op = f; \
196 if (*s == '(') \
197 return REPORT( (int)FUNC1 ); \
29595ff2 198 s = PEEKSPACE(s); \
376fcdbf
AL
199 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
200 }
6f33ba73
RGS
201#define UNI(f) UNI2(f,XTERM)
202#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 203
376fcdbf
AL
204#define UNIBRACK(f) { \
205 yylval.ival = f; \
206 PL_bufptr = s; \
207 PL_last_uni = PL_oldbufptr; \
208 if (*s == '(') \
209 return REPORT( (int)FUNC1 ); \
29595ff2 210 s = PEEKSPACE(s); \
376fcdbf
AL
211 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
212 }
79072805 213
9f68db38 214/* grandfather return to old style */
3280af22 215#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 216
8fa7f367
JH
217#ifdef DEBUGGING
218
bbf60fe6
DM
219/* how to interpret the yylval associated with the token */
220enum token_type {
221 TOKENTYPE_NONE,
222 TOKENTYPE_IVAL,
223 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
224 TOKENTYPE_PVAL,
225 TOKENTYPE_OPVAL,
226 TOKENTYPE_GVVAL
227};
228
6d4a66ac
NC
229static struct debug_tokens {
230 const int token;
231 enum token_type type;
232 const char *name;
233} const debug_tokens[] =
9041c2e3 234{
bbf60fe6
DM
235 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
236 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
237 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
238 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
239 { ARROW, TOKENTYPE_NONE, "ARROW" },
240 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
241 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
242 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
243 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
244 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 245 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
246 { DO, TOKENTYPE_NONE, "DO" },
247 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
248 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
249 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
250 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
251 { ELSE, TOKENTYPE_NONE, "ELSE" },
252 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
253 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
254 { FOR, TOKENTYPE_IVAL, "FOR" },
255 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
256 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
257 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
258 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
259 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
260 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 261 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
262 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
263 { IF, TOKENTYPE_IVAL, "IF" },
264 { LABEL, TOKENTYPE_PVAL, "LABEL" },
265 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
266 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
267 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
268 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
269 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
270 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
271 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
272 { MY, TOKENTYPE_IVAL, "MY" },
273 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
274 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
275 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
276 { OROP, TOKENTYPE_IVAL, "OROP" },
277 { OROR, TOKENTYPE_NONE, "OROR" },
278 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
279 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
280 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
281 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
282 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
283 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
284 { PREINC, TOKENTYPE_NONE, "PREINC" },
285 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
286 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
287 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
288 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
289 { SUB, TOKENTYPE_NONE, "SUB" },
290 { THING, TOKENTYPE_OPVAL, "THING" },
291 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
292 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
293 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
294 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
295 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
296 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 297 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
298 { WHILE, TOKENTYPE_IVAL, "WHILE" },
299 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 300 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
301};
302
303/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 304
bbf60fe6 305STATIC int
f5bd084c 306S_tokereport(pTHX_ I32 rv)
bbf60fe6 307{
97aff369 308 dVAR;
bbf60fe6 309 if (DEBUG_T_TEST) {
bd61b366 310 const char *name = NULL;
bbf60fe6 311 enum token_type type = TOKENTYPE_NONE;
f54cb97a 312 const struct debug_tokens *p;
396482e1 313 SV* const report = newSVpvs("<== ");
bbf60fe6 314
f54cb97a 315 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
316 if (p->token == (int)rv) {
317 name = p->name;
318 type = p->type;
319 break;
320 }
321 }
322 if (name)
54667de8 323 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
324 else if ((char)rv > ' ' && (char)rv < '~')
325 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
326 else if (!rv)
396482e1 327 sv_catpvs(report, "EOF");
bbf60fe6
DM
328 else
329 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
330 switch (type) {
331 case TOKENTYPE_NONE:
332 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
333 break;
334 case TOKENTYPE_IVAL:
e4584336 335 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
336 break;
337 case TOKENTYPE_OPNUM:
338 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
339 PL_op_name[yylval.ival]);
340 break;
341 case TOKENTYPE_PVAL:
342 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
343 break;
344 case TOKENTYPE_OPVAL:
b6007c36 345 if (yylval.opval) {
401441c0 346 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 347 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
348 if (yylval.opval->op_type == OP_CONST) {
349 Perl_sv_catpvf(aTHX_ report, " %s",
350 SvPEEK(cSVOPx_sv(yylval.opval)));
351 }
352
353 }
401441c0 354 else
396482e1 355 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
356 break;
357 }
b6007c36 358 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
359 };
360 return (int)rv;
998054bd
SC
361}
362
b6007c36
DM
363
364/* print the buffer with suitable escapes */
365
366STATIC void
367S_printbuf(pTHX_ const char* fmt, const char* s)
368{
396482e1 369 SV* const tmp = newSVpvs("");
b6007c36
DM
370 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
371 SvREFCNT_dec(tmp);
372}
373
8fa7f367
JH
374#endif
375
ffb4593c
NT
376/*
377 * S_ao
378 *
c963b151
BD
379 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
380 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
381 */
382
76e3520e 383STATIC int
cea2e8a9 384S_ao(pTHX_ int toketype)
a0d0e21e 385{
97aff369 386 dVAR;
3280af22
NIS
387 if (*PL_bufptr == '=') {
388 PL_bufptr++;
a0d0e21e
LW
389 if (toketype == ANDAND)
390 yylval.ival = OP_ANDASSIGN;
391 else if (toketype == OROR)
392 yylval.ival = OP_ORASSIGN;
c963b151
BD
393 else if (toketype == DORDOR)
394 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
395 toketype = ASSIGNOP;
396 }
397 return toketype;
398}
399
ffb4593c
NT
400/*
401 * S_no_op
402 * When Perl expects an operator and finds something else, no_op
403 * prints the warning. It always prints "<something> found where
404 * operator expected. It prints "Missing semicolon on previous line?"
405 * if the surprise occurs at the start of the line. "do you need to
406 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
407 * where the compiler doesn't know if foo is a method call or a function.
408 * It prints "Missing operator before end of line" if there's nothing
409 * after the missing operator, or "... before <...>" if there is something
410 * after the missing operator.
411 */
412
76e3520e 413STATIC void
bfed75c6 414S_no_op(pTHX_ const char *what, char *s)
463ee0b2 415{
97aff369 416 dVAR;
9d4ba2ae
AL
417 char * const oldbp = PL_bufptr;
418 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 419
1189a94a
GS
420 if (!s)
421 s = oldbp;
07c798fb 422 else
1189a94a 423 PL_bufptr = s;
cea2e8a9 424 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
425 if (ckWARN_d(WARN_SYNTAX)) {
426 if (is_first)
427 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
428 "\t(Missing semicolon on previous line?)\n");
429 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 430 const char *t;
c35e046a
AL
431 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
432 NOOP;
56da5a46
RGS
433 if (t < PL_bufptr && isSPACE(*t))
434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
435 "\t(Do you need to predeclare %.*s?)\n",
551405c4 436 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
437 }
438 else {
439 assert(s >= oldbp);
440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 441 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 442 }
07c798fb 443 }
3280af22 444 PL_bufptr = oldbp;
8990e307
LW
445}
446
ffb4593c
NT
447/*
448 * S_missingterm
449 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 450 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
451 * If we're in a delimited string and the delimiter is a control
452 * character, it's reformatted into a two-char sequence like ^C.
453 * This is fatal.
454 */
455
76e3520e 456STATIC void
cea2e8a9 457S_missingterm(pTHX_ char *s)
8990e307 458{
97aff369 459 dVAR;
8990e307
LW
460 char tmpbuf[3];
461 char q;
462 if (s) {
9d4ba2ae 463 char * const nl = strrchr(s,'\n');
d2719217 464 if (nl)
8990e307
LW
465 *nl = '\0';
466 }
9d116dd7
JH
467 else if (
468#ifdef EBCDIC
469 iscntrl(PL_multi_close)
470#else
471 PL_multi_close < 32 || PL_multi_close == 127
472#endif
473 ) {
8990e307 474 *tmpbuf = '^';
585ec06d 475 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
476 tmpbuf[2] = '\0';
477 s = tmpbuf;
478 }
479 else {
eb160463 480 *tmpbuf = (char)PL_multi_close;
8990e307
LW
481 tmpbuf[1] = '\0';
482 s = tmpbuf;
483 }
484 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 485 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 486}
79072805 487
ef89dcc3 488#define FEATURE_IS_ENABLED(name) \
0d863452 489 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 490 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
491/*
492 * S_feature_is_enabled
493 * Check whether the named feature is enabled.
494 */
495STATIC bool
d4c19fe8 496S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 497{
97aff369 498 dVAR;
0d863452 499 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 500 char he_name[32] = "feature_";
6fca0082 501 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 502
7b9ef140 503 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
504}
505
ffb4593c
NT
506/*
507 * Perl_deprecate
ffb4593c
NT
508 */
509
79072805 510void
bfed75c6 511Perl_deprecate(pTHX_ const char *s)
a0d0e21e 512{
599cee73 513 if (ckWARN(WARN_DEPRECATED))
9014280d 514 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
515}
516
12bcd1a6 517void
bfed75c6 518Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
519{
520 /* This function should NOT be called for any new deprecated warnings */
521 /* Use Perl_deprecate instead */
522 /* */
523 /* It is here to maintain backward compatibility with the pre-5.8 */
524 /* warnings category hierarchy. The "deprecated" category used to */
525 /* live under the "syntax" category. It is now a top-level category */
526 /* in its own right. */
527
528 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 529 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
530 "Use of %s is deprecated", s);
531}
532
ffb4593c 533/*
9cbb5ea2
GS
534 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
535 * utf16-to-utf8-reversed.
ffb4593c
NT
536 */
537
c39cd008
GS
538#ifdef PERL_CR_FILTER
539static void
540strip_return(SV *sv)
541{
95a20fc0 542 register const char *s = SvPVX_const(sv);
9d4ba2ae 543 register const char * const e = s + SvCUR(sv);
c39cd008
GS
544 /* outer loop optimized to do nothing if there are no CR-LFs */
545 while (s < e) {
546 if (*s++ == '\r' && *s == '\n') {
547 /* hit a CR-LF, need to copy the rest */
548 register char *d = s - 1;
549 *d++ = *s++;
550 while (s < e) {
551 if (*s == '\r' && s[1] == '\n')
552 s++;
553 *d++ = *s++;
554 }
555 SvCUR(sv) -= s - d;
556 return;
557 }
558 }
559}
a868473f 560
76e3520e 561STATIC I32
c39cd008 562S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 563{
f54cb97a 564 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
565 if (count > 0 && !maxlen)
566 strip_return(sv);
567 return count;
a868473f
NIS
568}
569#endif
570
ffb4593c
NT
571/*
572 * Perl_lex_start
9cbb5ea2
GS
573 * Initialize variables. Uses the Perl save_stack to save its state (for
574 * recursive calls to the parser).
ffb4593c
NT
575 */
576
a0d0e21e 577void
864dbfa3 578Perl_lex_start(pTHX_ SV *line)
79072805 579{
97aff369 580 dVAR;
cfd0369c 581 const char *s;
8990e307
LW
582 STRLEN len;
583
3280af22
NIS
584 SAVEI32(PL_lex_dojoin);
585 SAVEI32(PL_lex_brackets);
3280af22
NIS
586 SAVEI32(PL_lex_casemods);
587 SAVEI32(PL_lex_starts);
588 SAVEI32(PL_lex_state);
7766f137 589 SAVEVPTR(PL_lex_inpat);
3280af22 590 SAVEI32(PL_lex_inwhat);
5db06880
NC
591#ifdef PERL_MAD
592 if (PL_lex_state == LEX_KNOWNEXT) {
593 I32 toke = PL_lasttoke;
594 while (--toke >= 0) {
595 SAVEI32(PL_nexttoke[toke].next_type);
596 SAVEVPTR(PL_nexttoke[toke].next_val);
597 if (PL_madskills)
598 SAVEVPTR(PL_nexttoke[toke].next_mad);
599 }
600 SAVEI32(PL_lasttoke);
601 }
602 if (PL_madskills) {
cd81e915
NC
603 SAVESPTR(PL_thistoken);
604 SAVESPTR(PL_thiswhite);
605 SAVESPTR(PL_nextwhite);
606 SAVESPTR(PL_thisopen);
607 SAVESPTR(PL_thisclose);
608 SAVESPTR(PL_thisstuff);
609 SAVEVPTR(PL_thismad);
610 SAVEI32(PL_realtokenstart);
611 SAVEI32(PL_faketokens);
612 }
613 SAVEI32(PL_curforce);
5db06880 614#else
18b09519
GS
615 if (PL_lex_state == LEX_KNOWNEXT) {
616 I32 toke = PL_nexttoke;
617 while (--toke >= 0) {
618 SAVEI32(PL_nexttype[toke]);
619 SAVEVPTR(PL_nextval[toke]);
620 }
621 SAVEI32(PL_nexttoke);
18b09519 622 }
5db06880 623#endif
57843af0 624 SAVECOPLINE(PL_curcop);
3280af22
NIS
625 SAVEPPTR(PL_bufptr);
626 SAVEPPTR(PL_bufend);
627 SAVEPPTR(PL_oldbufptr);
628 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
629 SAVEPPTR(PL_last_lop);
630 SAVEPPTR(PL_last_uni);
3280af22
NIS
631 SAVEPPTR(PL_linestart);
632 SAVESPTR(PL_linestr);
8edd5f42
RGS
633 SAVEGENERICPV(PL_lex_brackstack);
634 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 635 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
636 SAVESPTR(PL_lex_stuff);
637 SAVEI32(PL_lex_defer);
09bef843 638 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 639 SAVESPTR(PL_lex_repl);
bebdddfc
GS
640 SAVEINT(PL_expect);
641 SAVEINT(PL_lex_expect);
3280af22
NIS
642
643 PL_lex_state = LEX_NORMAL;
644 PL_lex_defer = 0;
645 PL_expect = XSTATE;
646 PL_lex_brackets = 0;
a02a5408
JC
647 Newx(PL_lex_brackstack, 120, char);
648 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
649 PL_lex_casemods = 0;
650 *PL_lex_casestack = '\0';
651 PL_lex_dojoin = 0;
652 PL_lex_starts = 0;
a0714e2c
SS
653 PL_lex_stuff = NULL;
654 PL_lex_repl = NULL;
3280af22 655 PL_lex_inpat = 0;
5db06880
NC
656#ifdef PERL_MAD
657 PL_lasttoke = 0;
658#else
76be56bc 659 PL_nexttoke = 0;
5db06880 660#endif
3280af22 661 PL_lex_inwhat = 0;
09bef843 662 PL_sublex_info.sub_inwhat = 0;
10efb74f
NC
663 if (line) {
664 s = SvPV_const(line, len);
665 } else {
666 len = 0;
667 }
668 if (!len) {
669 PL_linestr = newSVpvs("\n;");
670 } else if (SvREADONLY(line) || s[len-1] != ';') {
671 PL_linestr = newSVsv(line);
672 if (s[len-1] != ';')
0eb20fa2 673 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
674 } else {
675 SvTEMP_off(line);
676 SvREFCNT_inc_simple_void_NN(line);
677 PL_linestr = line;
8990e307 678 }
db4997f0
NC
679 /* PL_linestr needs to survive until end of scope, not just the next
680 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 681 SAVEFREESV(PL_linestr);
3280af22
NIS
682 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
683 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 684 PL_last_lop = PL_last_uni = NULL;
3280af22 685 PL_rsfp = 0;
79072805 686}
a687059c 687
ffb4593c
NT
688/*
689 * Perl_lex_end
9cbb5ea2
GS
690 * Finalizer for lexing operations. Must be called when the parser is
691 * done with the lexer.
ffb4593c
NT
692 */
693
463ee0b2 694void
864dbfa3 695Perl_lex_end(pTHX)
463ee0b2 696{
97aff369 697 dVAR;
3280af22 698 PL_doextract = FALSE;
463ee0b2
LW
699}
700
ffb4593c
NT
701/*
702 * S_incline
703 * This subroutine has nothing to do with tilting, whether at windmills
704 * or pinball tables. Its name is short for "increment line". It
57843af0 705 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 706 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
707 * # line 500 "foo.pm"
708 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
709 */
710
76e3520e 711STATIC void
cea2e8a9 712S_incline(pTHX_ char *s)
463ee0b2 713{
97aff369 714 dVAR;
463ee0b2
LW
715 char *t;
716 char *n;
73659bf1 717 char *e;
463ee0b2 718 char ch;
463ee0b2 719
57843af0 720 CopLINE_inc(PL_curcop);
463ee0b2
LW
721 if (*s++ != '#')
722 return;
d4c19fe8
AL
723 while (SPACE_OR_TAB(*s))
724 s++;
73659bf1
GS
725 if (strnEQ(s, "line", 4))
726 s += 4;
727 else
728 return;
084592ab 729 if (SPACE_OR_TAB(*s))
73659bf1 730 s++;
4e553d73 731 else
73659bf1 732 return;
d4c19fe8
AL
733 while (SPACE_OR_TAB(*s))
734 s++;
463ee0b2
LW
735 if (!isDIGIT(*s))
736 return;
d4c19fe8 737
463ee0b2
LW
738 n = s;
739 while (isDIGIT(*s))
740 s++;
bf4acbe4 741 while (SPACE_OR_TAB(*s))
463ee0b2 742 s++;
73659bf1 743 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 744 s++;
73659bf1
GS
745 e = t + 1;
746 }
463ee0b2 747 else {
c35e046a
AL
748 t = s;
749 while (!isSPACE(*t))
750 t++;
73659bf1 751 e = t;
463ee0b2 752 }
bf4acbe4 753 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
754 e++;
755 if (*e != '\n' && *e != '\0')
756 return; /* false alarm */
757
463ee0b2
LW
758 ch = *t;
759 *t = '\0';
f4dd75d9 760 if (t - s > 0) {
8a5ee598 761#ifndef USE_ITHREADS
c4420975 762 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
763 STRLEN tmplen = cf ? strlen(cf) : 0;
764 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
765 /* must copy *{"::_<(eval N)[oldfilename:L]"}
766 * to *{"::_<newfilename"} */
767 char smallbuf[256], smallbuf2[256];
768 char *tmpbuf, *tmpbuf2;
8a5ee598 769 GV **gvp, *gv2;
e66cf94c
RGS
770 STRLEN tmplen2 = strlen(s);
771 if (tmplen + 3 < sizeof smallbuf)
772 tmpbuf = smallbuf;
773 else
774 Newx(tmpbuf, tmplen + 3, char);
775 if (tmplen2 + 3 < sizeof smallbuf2)
776 tmpbuf2 = smallbuf2;
777 else
778 Newx(tmpbuf2, tmplen2 + 3, char);
779 tmpbuf[0] = tmpbuf2[0] = '_';
780 tmpbuf[1] = tmpbuf2[1] = '<';
781 memcpy(tmpbuf + 2, cf, ++tmplen);
782 memcpy(tmpbuf2 + 2, s, ++tmplen2);
783 ++tmplen; ++tmplen2;
8a5ee598
RGS
784 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
785 if (gvp) {
786 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 787 if (!isGV(gv2)) {
8a5ee598 788 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
789 /* adjust ${"::_<newfilename"} to store the new file name */
790 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
791 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
792 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
793 }
8a5ee598 794 }
e66cf94c
RGS
795 if (tmpbuf != smallbuf) Safefree(tmpbuf);
796 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
797 }
8a5ee598 798#endif
05ec9bb3 799 CopFILE_free(PL_curcop);
57843af0 800 CopFILE_set(PL_curcop, s);
f4dd75d9 801 }
463ee0b2 802 *t = ch;
57843af0 803 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
804}
805
29595ff2 806#ifdef PERL_MAD
cd81e915 807/* skip space before PL_thistoken */
29595ff2
NC
808
809STATIC char *
810S_skipspace0(pTHX_ register char *s)
811{
812 s = skipspace(s);
813 if (!PL_madskills)
814 return s;
cd81e915
NC
815 if (PL_skipwhite) {
816 if (!PL_thiswhite)
6b29d1f5 817 PL_thiswhite = newSVpvs("");
cd81e915
NC
818 sv_catsv(PL_thiswhite, PL_skipwhite);
819 sv_free(PL_skipwhite);
820 PL_skipwhite = 0;
821 }
822 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
823 return s;
824}
825
cd81e915 826/* skip space after PL_thistoken */
29595ff2
NC
827
828STATIC char *
829S_skipspace1(pTHX_ register char *s)
830{
d4c19fe8 831 const char *start = s;
29595ff2
NC
832 I32 startoff = start - SvPVX(PL_linestr);
833
834 s = skipspace(s);
835 if (!PL_madskills)
836 return s;
837 start = SvPVX(PL_linestr) + startoff;
cd81e915 838 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 839 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
840 PL_thistoken = newSVpvn(tstart, start - tstart);
841 }
842 PL_realtokenstart = -1;
843 if (PL_skipwhite) {
844 if (!PL_nextwhite)
6b29d1f5 845 PL_nextwhite = newSVpvs("");
cd81e915
NC
846 sv_catsv(PL_nextwhite, PL_skipwhite);
847 sv_free(PL_skipwhite);
848 PL_skipwhite = 0;
29595ff2
NC
849 }
850 return s;
851}
852
853STATIC char *
854S_skipspace2(pTHX_ register char *s, SV **svp)
855{
c35e046a
AL
856 char *start;
857 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
858 const I32 startoff = s - SvPVX(PL_linestr);
859
29595ff2
NC
860 s = skipspace(s);
861 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
862 if (!PL_madskills || !svp)
863 return s;
864 start = SvPVX(PL_linestr) + startoff;
cd81e915 865 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 866 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
867 PL_thistoken = newSVpvn(tstart, start - tstart);
868 PL_realtokenstart = -1;
29595ff2 869 }
cd81e915 870 if (PL_skipwhite) {
29595ff2 871 if (!*svp)
6b29d1f5 872 *svp = newSVpvs("");
cd81e915
NC
873 sv_setsv(*svp, PL_skipwhite);
874 sv_free(PL_skipwhite);
875 PL_skipwhite = 0;
29595ff2
NC
876 }
877
878 return s;
879}
880#endif
881
80a702cd
RGS
882STATIC void
883S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
884{
885 AV *av = CopFILEAVx(PL_curcop);
886 if (av) {
887 SV * const sv = newSV(0);
888 sv_upgrade(sv, SVt_PVMG);
889 sv_setpvn(sv, buf, len);
890 (void)SvIOK_on(sv);
891 SvIV_set(sv, 0);
892 av_store(av, (I32)CopLINE(PL_curcop), sv);
893 }
894}
895
896STATIC void
897S_update_debugger_info_sv(pTHX_ SV *orig_sv)
898{
899 AV *av = CopFILEAVx(PL_curcop);
900 if (av) {
901 SV * const sv = newSV(0);
902 sv_upgrade(sv, SVt_PVMG);
903 sv_setsv(sv, orig_sv);
904 (void)SvIOK_on(sv);
905 SvIV_set(sv, 0);
906 av_store(av, (I32)CopLINE(PL_curcop), sv);
907 }
908}
909
ffb4593c
NT
910/*
911 * S_skipspace
912 * Called to gobble the appropriate amount and type of whitespace.
913 * Skips comments as well.
914 */
915
76e3520e 916STATIC char *
cea2e8a9 917S_skipspace(pTHX_ register char *s)
a687059c 918{
97aff369 919 dVAR;
5db06880
NC
920#ifdef PERL_MAD
921 int curoff;
922 int startoff = s - SvPVX(PL_linestr);
923
cd81e915
NC
924 if (PL_skipwhite) {
925 sv_free(PL_skipwhite);
926 PL_skipwhite = 0;
5db06880
NC
927 }
928#endif
929
3280af22 930 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 931 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 932 s++;
5db06880
NC
933#ifdef PERL_MAD
934 goto done;
935#else
463ee0b2 936 return s;
5db06880 937#endif
463ee0b2
LW
938 }
939 for (;;) {
fd049845 940 STRLEN prevlen;
09bef843 941 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 942 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
943 while (s < PL_bufend && isSPACE(*s)) {
944 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
945 incline(s);
946 }
ffb4593c
NT
947
948 /* comment */
3280af22
NIS
949 if (s < PL_bufend && *s == '#') {
950 while (s < PL_bufend && *s != '\n')
463ee0b2 951 s++;
60e6418e 952 if (s < PL_bufend) {
463ee0b2 953 s++;
60e6418e
GS
954 if (PL_in_eval && !PL_rsfp) {
955 incline(s);
956 continue;
957 }
958 }
463ee0b2 959 }
ffb4593c
NT
960
961 /* only continue to recharge the buffer if we're at the end
962 * of the buffer, we're not reading from a source filter, and
963 * we're in normal lexing mode
964 */
09bef843
SB
965 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
966 PL_lex_state == LEX_FORMLINE)
5db06880
NC
967#ifdef PERL_MAD
968 goto done;
969#else
463ee0b2 970 return s;
5db06880 971#endif
ffb4593c
NT
972
973 /* try to recharge the buffer */
5db06880
NC
974#ifdef PERL_MAD
975 curoff = s - SvPVX(PL_linestr);
976#endif
977
9cbb5ea2 978 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 979 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 980 {
5db06880
NC
981#ifdef PERL_MAD
982 if (PL_madskills && curoff != startoff) {
cd81e915 983 if (!PL_skipwhite)
6b29d1f5 984 PL_skipwhite = newSVpvs("");
cd81e915 985 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
986 curoff - startoff);
987 }
988
989 /* mustn't throw out old stuff yet if madpropping */
990 SvCUR(PL_linestr) = curoff;
991 s = SvPVX(PL_linestr) + curoff;
992 *s = 0;
993 if (curoff && s[-1] == '\n')
994 s[-1] = ' ';
995#endif
996
9cbb5ea2 997 /* end of file. Add on the -p or -n magic */
cd81e915 998 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 999 if (PL_minus_p) {
5db06880
NC
1000#ifdef PERL_MAD
1001 sv_catpv(PL_linestr,
1002 ";}continue{print or die qq(-p destination: $!\\n);}");
1003#else
01a19ab0
NC
1004 sv_setpv(PL_linestr,
1005 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1006#endif
3280af22 1007 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1008 }
01a19ab0 1009 else if (PL_minus_n) {
5db06880
NC
1010#ifdef PERL_MAD
1011 sv_catpvn(PL_linestr, ";}", 2);
1012#else
01a19ab0 1013 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1014#endif
01a19ab0
NC
1015 PL_minus_n = 0;
1016 }
a0d0e21e 1017 else
5db06880
NC
1018#ifdef PERL_MAD
1019 sv_catpvn(PL_linestr,";", 1);
1020#else
4147a61b 1021 sv_setpvn(PL_linestr,";", 1);
5db06880 1022#endif
ffb4593c
NT
1023
1024 /* reset variables for next time we lex */
9cbb5ea2 1025 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1026 = SvPVX(PL_linestr)
1027#ifdef PERL_MAD
1028 + curoff
1029#endif
1030 ;
3280af22 1031 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1032 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1033
1034 /* Close the filehandle. Could be from -P preprocessor,
1035 * STDIN, or a regular file. If we were reading code from
1036 * STDIN (because the commandline held no -e or filename)
1037 * then we don't close it, we reset it so the code can
1038 * read from STDIN too.
1039 */
1040
3280af22
NIS
1041 if (PL_preprocess && !PL_in_eval)
1042 (void)PerlProc_pclose(PL_rsfp);
1043 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1044 PerlIO_clearerr(PL_rsfp);
8990e307 1045 else
3280af22 1046 (void)PerlIO_close(PL_rsfp);
4608196e 1047 PL_rsfp = NULL;
463ee0b2
LW
1048 return s;
1049 }
ffb4593c
NT
1050
1051 /* not at end of file, so we only read another line */
09bef843
SB
1052 /* make corresponding updates to old pointers, for yyerror() */
1053 oldprevlen = PL_oldbufptr - PL_bufend;
1054 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1055 if (PL_last_uni)
1056 oldunilen = PL_last_uni - PL_bufend;
1057 if (PL_last_lop)
1058 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1059 PL_linestart = PL_bufptr = s + prevlen;
1060 PL_bufend = s + SvCUR(PL_linestr);
1061 s = PL_bufptr;
09bef843
SB
1062 PL_oldbufptr = s + oldprevlen;
1063 PL_oldoldbufptr = s + oldoldprevlen;
1064 if (PL_last_uni)
1065 PL_last_uni = s + oldunilen;
1066 if (PL_last_lop)
1067 PL_last_lop = s + oldloplen;
a0d0e21e 1068 incline(s);
ffb4593c
NT
1069
1070 /* debugger active and we're not compiling the debugger code,
1071 * so store the line into the debugger's array of lines
1072 */
80a702cd
RGS
1073 if (PERLDB_LINE && PL_curstash != PL_debstash)
1074 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1075 }
5db06880
NC
1076
1077#ifdef PERL_MAD
1078 done:
1079 if (PL_madskills) {
cd81e915 1080 if (!PL_skipwhite)
6b29d1f5 1081 PL_skipwhite = newSVpvs("");
5db06880
NC
1082 curoff = s - SvPVX(PL_linestr);
1083 if (curoff - startoff)
cd81e915 1084 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1085 curoff - startoff);
1086 }
1087 return s;
1088#endif
a687059c 1089}
378cc40b 1090
ffb4593c
NT
1091/*
1092 * S_check_uni
1093 * Check the unary operators to ensure there's no ambiguity in how they're
1094 * used. An ambiguous piece of code would be:
1095 * rand + 5
1096 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1097 * the +5 is its argument.
1098 */
1099
76e3520e 1100STATIC void
cea2e8a9 1101S_check_uni(pTHX)
ba106d47 1102{
97aff369 1103 dVAR;
d4c19fe8
AL
1104 const char *s;
1105 const char *t;
2f3197b3 1106
3280af22 1107 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1108 return;
3280af22
NIS
1109 while (isSPACE(*PL_last_uni))
1110 PL_last_uni++;
c35e046a
AL
1111 s = PL_last_uni;
1112 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1113 s++;
3280af22 1114 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1115 return;
6136c704 1116
0453d815 1117 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1118 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1119 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1120 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1121 }
2f3197b3
LW
1122}
1123
ffb4593c
NT
1124/*
1125 * LOP : macro to build a list operator. Its behaviour has been replaced
1126 * with a subroutine, S_lop() for which LOP is just another name.
1127 */
1128
a0d0e21e
LW
1129#define LOP(f,x) return lop(f,x,s)
1130
ffb4593c
NT
1131/*
1132 * S_lop
1133 * Build a list operator (or something that might be one). The rules:
1134 * - if we have a next token, then it's a list operator [why?]
1135 * - if the next thing is an opening paren, then it's a function
1136 * - else it's a list operator
1137 */
1138
76e3520e 1139STATIC I32
a0be28da 1140S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1141{
97aff369 1142 dVAR;
79072805 1143 yylval.ival = f;
35c8bce7 1144 CLINE;
3280af22
NIS
1145 PL_expect = x;
1146 PL_bufptr = s;
1147 PL_last_lop = PL_oldbufptr;
eb160463 1148 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1149#ifdef PERL_MAD
1150 if (PL_lasttoke)
1151 return REPORT(LSTOP);
1152#else
3280af22 1153 if (PL_nexttoke)
bbf60fe6 1154 return REPORT(LSTOP);
5db06880 1155#endif
79072805 1156 if (*s == '(')
bbf60fe6 1157 return REPORT(FUNC);
29595ff2 1158 s = PEEKSPACE(s);
79072805 1159 if (*s == '(')
bbf60fe6 1160 return REPORT(FUNC);
79072805 1161 else
bbf60fe6 1162 return REPORT(LSTOP);
79072805
LW
1163}
1164
5db06880
NC
1165#ifdef PERL_MAD
1166 /*
1167 * S_start_force
1168 * Sets up for an eventual force_next(). start_force(0) basically does
1169 * an unshift, while start_force(-1) does a push. yylex removes items
1170 * on the "pop" end.
1171 */
1172
1173STATIC void
1174S_start_force(pTHX_ int where)
1175{
1176 int i;
1177
cd81e915 1178 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1179 where = PL_lasttoke;
cd81e915
NC
1180 assert(PL_curforce < 0 || PL_curforce == where);
1181 if (PL_curforce != where) {
5db06880
NC
1182 for (i = PL_lasttoke; i > where; --i) {
1183 PL_nexttoke[i] = PL_nexttoke[i-1];
1184 }
1185 PL_lasttoke++;
1186 }
cd81e915 1187 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1188 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1189 PL_curforce = where;
1190 if (PL_nextwhite) {
5db06880 1191 if (PL_madskills)
6b29d1f5 1192 curmad('^', newSVpvs(""));
cd81e915 1193 CURMAD('_', PL_nextwhite);
5db06880
NC
1194 }
1195}
1196
1197STATIC void
1198S_curmad(pTHX_ char slot, SV *sv)
1199{
1200 MADPROP **where;
1201
1202 if (!sv)
1203 return;
cd81e915
NC
1204 if (PL_curforce < 0)
1205 where = &PL_thismad;
5db06880 1206 else
cd81e915 1207 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1208
cd81e915 1209 if (PL_faketokens)
5db06880
NC
1210 sv_setpvn(sv, "", 0);
1211 else {
1212 if (!IN_BYTES) {
1213 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1214 SvUTF8_on(sv);
1215 else if (PL_encoding) {
1216 sv_recode_to_utf8(sv, PL_encoding);
1217 }
1218 }
1219 }
1220
1221 /* keep a slot open for the head of the list? */
1222 if (slot != '_' && *where && (*where)->mad_key == '^') {
1223 (*where)->mad_key = slot;
1224 sv_free((*where)->mad_val);
1225 (*where)->mad_val = (void*)sv;
1226 }
1227 else
1228 addmad(newMADsv(slot, sv), where, 0);
1229}
1230#else
b3f24c00
MHM
1231# define start_force(where) NOOP
1232# define curmad(slot, sv) NOOP
5db06880
NC
1233#endif
1234
ffb4593c
NT
1235/*
1236 * S_force_next
9cbb5ea2 1237 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1238 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1239 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1240 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1241 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1242 */
1243
4e553d73 1244STATIC void
cea2e8a9 1245S_force_next(pTHX_ I32 type)
79072805 1246{
97aff369 1247 dVAR;
5db06880 1248#ifdef PERL_MAD
cd81e915 1249 if (PL_curforce < 0)
5db06880 1250 start_force(PL_lasttoke);
cd81e915 1251 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1252 if (PL_lex_state != LEX_KNOWNEXT)
1253 PL_lex_defer = PL_lex_state;
1254 PL_lex_state = LEX_KNOWNEXT;
1255 PL_lex_expect = PL_expect;
cd81e915 1256 PL_curforce = -1;
5db06880 1257#else
3280af22
NIS
1258 PL_nexttype[PL_nexttoke] = type;
1259 PL_nexttoke++;
1260 if (PL_lex_state != LEX_KNOWNEXT) {
1261 PL_lex_defer = PL_lex_state;
1262 PL_lex_expect = PL_expect;
1263 PL_lex_state = LEX_KNOWNEXT;
79072805 1264 }
5db06880 1265#endif
79072805
LW
1266}
1267
d0a148a6
NC
1268STATIC SV *
1269S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1270{
97aff369 1271 dVAR;
9d4ba2ae 1272 SV * const sv = newSVpvn(start,len);
bfed75c6 1273 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1274 SvUTF8_on(sv);
1275 return sv;
1276}
1277
ffb4593c
NT
1278/*
1279 * S_force_word
1280 * When the lexer knows the next thing is a word (for instance, it has
1281 * just seen -> and it knows that the next char is a word char, then
1282 * it calls S_force_word to stick the next word into the PL_next lookahead.
1283 *
1284 * Arguments:
b1b65b59 1285 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1286 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1287 * int check_keyword : if true, Perl checks to make sure the word isn't
1288 * a keyword (do this if the word is a label, e.g. goto FOO)
1289 * int allow_pack : if true, : characters will also be allowed (require,
1290 * use, etc. do this)
9cbb5ea2 1291 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1292 */
1293
76e3520e 1294STATIC char *
cea2e8a9 1295S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1296{
97aff369 1297 dVAR;
463ee0b2
LW
1298 register char *s;
1299 STRLEN len;
4e553d73 1300
29595ff2 1301 start = SKIPSPACE1(start);
463ee0b2 1302 s = start;
7e2040f0 1303 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1304 (allow_pack && *s == ':') ||
15f0808c 1305 (allow_initial_tick && *s == '\'') )
a0d0e21e 1306 {
3280af22 1307 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1308 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1309 return start;
cd81e915 1310 start_force(PL_curforce);
5db06880
NC
1311 if (PL_madskills)
1312 curmad('X', newSVpvn(start,s-start));
463ee0b2 1313 if (token == METHOD) {
29595ff2 1314 s = SKIPSPACE1(s);
463ee0b2 1315 if (*s == '(')
3280af22 1316 PL_expect = XTERM;
463ee0b2 1317 else {
3280af22 1318 PL_expect = XOPERATOR;
463ee0b2 1319 }
79072805 1320 }
9ded7720 1321 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1322 = (OP*)newSVOP(OP_CONST,0,
1323 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1324 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1325 force_next(token);
1326 }
1327 return s;
1328}
1329
ffb4593c
NT
1330/*
1331 * S_force_ident
9cbb5ea2 1332 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1333 * text only contains the "foo" portion. The first argument is a pointer
1334 * to the "foo", and the second argument is the type symbol to prefix.
1335 * Forces the next token to be a "WORD".
9cbb5ea2 1336 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1337 */
1338
76e3520e 1339STATIC void
bfed75c6 1340S_force_ident(pTHX_ register const char *s, int kind)
79072805 1341{
97aff369 1342 dVAR;
c35e046a 1343 if (*s) {
90e5519e
NC
1344 const STRLEN len = strlen(s);
1345 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1346 start_force(PL_curforce);
9ded7720 1347 NEXTVAL_NEXTTOKE.opval = o;
79072805 1348 force_next(WORD);
748a9306 1349 if (kind) {
11343788 1350 o->op_private = OPpCONST_ENTERED;
55497cff 1351 /* XXX see note in pp_entereval() for why we forgo typo
1352 warnings if the symbol must be introduced in an eval.
1353 GSAR 96-10-12 */
90e5519e
NC
1354 gv_fetchpvn_flags(s, len,
1355 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1356 : GV_ADD,
1357 kind == '$' ? SVt_PV :
1358 kind == '@' ? SVt_PVAV :
1359 kind == '%' ? SVt_PVHV :
a0d0e21e 1360 SVt_PVGV
90e5519e 1361 );
748a9306 1362 }
79072805
LW
1363 }
1364}
1365
1571675a
GS
1366NV
1367Perl_str_to_version(pTHX_ SV *sv)
1368{
1369 NV retval = 0.0;
1370 NV nshift = 1.0;
1371 STRLEN len;
cfd0369c 1372 const char *start = SvPV_const(sv,len);
9d4ba2ae 1373 const char * const end = start + len;
504618e9 1374 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1375 while (start < end) {
ba210ebe 1376 STRLEN skip;
1571675a
GS
1377 UV n;
1378 if (utf)
9041c2e3 1379 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1380 else {
1381 n = *(U8*)start;
1382 skip = 1;
1383 }
1384 retval += ((NV)n)/nshift;
1385 start += skip;
1386 nshift *= 1000;
1387 }
1388 return retval;
1389}
1390
4e553d73 1391/*
ffb4593c
NT
1392 * S_force_version
1393 * Forces the next token to be a version number.
e759cc13
RGS
1394 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1395 * and if "guessing" is TRUE, then no new token is created (and the caller
1396 * must use an alternative parsing method).
ffb4593c
NT
1397 */
1398
76e3520e 1399STATIC char *
e759cc13 1400S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1401{
97aff369 1402 dVAR;
5f66b61c 1403 OP *version = NULL;
44dcb63b 1404 char *d;
5db06880
NC
1405#ifdef PERL_MAD
1406 I32 startoff = s - SvPVX(PL_linestr);
1407#endif
89bfa8cd 1408
29595ff2 1409 s = SKIPSPACE1(s);
89bfa8cd 1410
44dcb63b 1411 d = s;
dd629d5b 1412 if (*d == 'v')
44dcb63b 1413 d++;
44dcb63b 1414 if (isDIGIT(*d)) {
e759cc13
RGS
1415 while (isDIGIT(*d) || *d == '_' || *d == '.')
1416 d++;
5db06880
NC
1417#ifdef PERL_MAD
1418 if (PL_madskills) {
cd81e915 1419 start_force(PL_curforce);
5db06880
NC
1420 curmad('X', newSVpvn(s,d-s));
1421 }
1422#endif
9f3d182e 1423 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1424 SV *ver;
b73d6f50 1425 s = scan_num(s, &yylval);
89bfa8cd 1426 version = yylval.opval;
dd629d5b
GS
1427 ver = cSVOPx(version)->op_sv;
1428 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1429 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1430 SvNV_set(ver, str_to_version(ver));
1571675a 1431 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1432 }
89bfa8cd 1433 }
5db06880
NC
1434 else if (guessing) {
1435#ifdef PERL_MAD
1436 if (PL_madskills) {
cd81e915
NC
1437 sv_free(PL_nextwhite); /* let next token collect whitespace */
1438 PL_nextwhite = 0;
5db06880
NC
1439 s = SvPVX(PL_linestr) + startoff;
1440 }
1441#endif
e759cc13 1442 return s;
5db06880 1443 }
89bfa8cd 1444 }
1445
5db06880
NC
1446#ifdef PERL_MAD
1447 if (PL_madskills && !version) {
cd81e915
NC
1448 sv_free(PL_nextwhite); /* let next token collect whitespace */
1449 PL_nextwhite = 0;
5db06880
NC
1450 s = SvPVX(PL_linestr) + startoff;
1451 }
1452#endif
89bfa8cd 1453 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1454 start_force(PL_curforce);
9ded7720 1455 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1456 force_next(WORD);
89bfa8cd 1457
e759cc13 1458 return s;
89bfa8cd 1459}
1460
ffb4593c
NT
1461/*
1462 * S_tokeq
1463 * Tokenize a quoted string passed in as an SV. It finds the next
1464 * chunk, up to end of string or a backslash. It may make a new
1465 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1466 * turns \\ into \.
1467 */
1468
76e3520e 1469STATIC SV *
cea2e8a9 1470S_tokeq(pTHX_ SV *sv)
79072805 1471{
97aff369 1472 dVAR;
79072805
LW
1473 register char *s;
1474 register char *send;
1475 register char *d;
b3ac6de7
IZ
1476 STRLEN len = 0;
1477 SV *pv = sv;
79072805
LW
1478
1479 if (!SvLEN(sv))
b3ac6de7 1480 goto finish;
79072805 1481
a0d0e21e 1482 s = SvPV_force(sv, len);
21a311ee 1483 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1484 goto finish;
463ee0b2 1485 send = s + len;
79072805
LW
1486 while (s < send && *s != '\\')
1487 s++;
1488 if (s == send)
b3ac6de7 1489 goto finish;
79072805 1490 d = s;
be4731d2 1491 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1492 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1493 if (SvUTF8(sv))
1494 SvUTF8_on(pv);
1495 }
79072805
LW
1496 while (s < send) {
1497 if (*s == '\\') {
a0d0e21e 1498 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1499 s++; /* all that, just for this */
1500 }
1501 *d++ = *s++;
1502 }
1503 *d = '\0';
95a20fc0 1504 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1505 finish:
3280af22 1506 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1507 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1508 return sv;
1509}
1510
ffb4593c
NT
1511/*
1512 * Now come three functions related to double-quote context,
1513 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1514 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1515 * interact with PL_lex_state, and create fake ( ... ) argument lists
1516 * to handle functions and concatenation.
1517 * They assume that whoever calls them will be setting up a fake
1518 * join call, because each subthing puts a ',' after it. This lets
1519 * "lower \luPpEr"
1520 * become
1521 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1522 *
1523 * (I'm not sure whether the spurious commas at the end of lcfirst's
1524 * arguments and join's arguments are created or not).
1525 */
1526
1527/*
1528 * S_sublex_start
1529 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1530 *
1531 * Pattern matching will set PL_lex_op to the pattern-matching op to
1532 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1533 *
1534 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1535 *
1536 * Everything else becomes a FUNC.
1537 *
1538 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1539 * had an OP_CONST or OP_READLINE). This just sets us up for a
1540 * call to S_sublex_push().
1541 */
1542
76e3520e 1543STATIC I32
cea2e8a9 1544S_sublex_start(pTHX)
79072805 1545{
97aff369 1546 dVAR;
0d46e09a 1547 register const I32 op_type = yylval.ival;
79072805
LW
1548
1549 if (op_type == OP_NULL) {
3280af22 1550 yylval.opval = PL_lex_op;
5f66b61c 1551 PL_lex_op = NULL;
79072805
LW
1552 return THING;
1553 }
1554 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1555 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1556
1557 if (SvTYPE(sv) == SVt_PVIV) {
1558 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1559 STRLEN len;
96a5add6 1560 const char * const p = SvPV_const(sv, len);
f54cb97a 1561 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1562 if (SvUTF8(sv))
1563 SvUTF8_on(nsv);
b3ac6de7
IZ
1564 SvREFCNT_dec(sv);
1565 sv = nsv;
4e553d73 1566 }
b3ac6de7 1567 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1568 PL_lex_stuff = NULL;
6f33ba73
RGS
1569 /* Allow <FH> // "foo" */
1570 if (op_type == OP_READLINE)
1571 PL_expect = XTERMORDORDOR;
79072805
LW
1572 return THING;
1573 }
e3f73d4e
RGS
1574 else if (op_type == OP_BACKTICK && PL_lex_op) {
1575 /* readpipe() vas overriden */
1576 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1577 yylval.opval = PL_lex_op;
9b201d7d 1578 PL_lex_op = NULL;
e3f73d4e
RGS
1579 PL_lex_stuff = NULL;
1580 return THING;
1581 }
79072805 1582
3280af22
NIS
1583 PL_sublex_info.super_state = PL_lex_state;
1584 PL_sublex_info.sub_inwhat = op_type;
1585 PL_sublex_info.sub_op = PL_lex_op;
1586 PL_lex_state = LEX_INTERPPUSH;
55497cff 1587
3280af22
NIS
1588 PL_expect = XTERM;
1589 if (PL_lex_op) {
1590 yylval.opval = PL_lex_op;
5f66b61c 1591 PL_lex_op = NULL;
55497cff 1592 return PMFUNC;
1593 }
1594 else
1595 return FUNC;
1596}
1597
ffb4593c
NT
1598/*
1599 * S_sublex_push
1600 * Create a new scope to save the lexing state. The scope will be
1601 * ended in S_sublex_done. Returns a '(', starting the function arguments
1602 * to the uc, lc, etc. found before.
1603 * Sets PL_lex_state to LEX_INTERPCONCAT.
1604 */
1605
76e3520e 1606STATIC I32
cea2e8a9 1607S_sublex_push(pTHX)
55497cff 1608{
27da23d5 1609 dVAR;
f46d017c 1610 ENTER;
55497cff 1611
3280af22
NIS
1612 PL_lex_state = PL_sublex_info.super_state;
1613 SAVEI32(PL_lex_dojoin);
1614 SAVEI32(PL_lex_brackets);
3280af22
NIS
1615 SAVEI32(PL_lex_casemods);
1616 SAVEI32(PL_lex_starts);
1617 SAVEI32(PL_lex_state);
7766f137 1618 SAVEVPTR(PL_lex_inpat);
3280af22 1619 SAVEI32(PL_lex_inwhat);
57843af0 1620 SAVECOPLINE(PL_curcop);
3280af22 1621 SAVEPPTR(PL_bufptr);
8452ff4b 1622 SAVEPPTR(PL_bufend);
3280af22
NIS
1623 SAVEPPTR(PL_oldbufptr);
1624 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1625 SAVEPPTR(PL_last_lop);
1626 SAVEPPTR(PL_last_uni);
3280af22
NIS
1627 SAVEPPTR(PL_linestart);
1628 SAVESPTR(PL_linestr);
8edd5f42
RGS
1629 SAVEGENERICPV(PL_lex_brackstack);
1630 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1631
1632 PL_linestr = PL_lex_stuff;
a0714e2c 1633 PL_lex_stuff = NULL;
3280af22 1634
9cbb5ea2
GS
1635 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1636 = SvPVX(PL_linestr);
3280af22 1637 PL_bufend += SvCUR(PL_linestr);
bd61b366 1638 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1639 SAVEFREESV(PL_linestr);
1640
1641 PL_lex_dojoin = FALSE;
1642 PL_lex_brackets = 0;
a02a5408
JC
1643 Newx(PL_lex_brackstack, 120, char);
1644 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1645 PL_lex_casemods = 0;
1646 *PL_lex_casestack = '\0';
1647 PL_lex_starts = 0;
1648 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1649 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1650
1651 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1652 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1653 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1654 else
5f66b61c 1655 PL_lex_inpat = NULL;
79072805 1656
55497cff 1657 return '(';
79072805
LW
1658}
1659
ffb4593c
NT
1660/*
1661 * S_sublex_done
1662 * Restores lexer state after a S_sublex_push.
1663 */
1664
76e3520e 1665STATIC I32
cea2e8a9 1666S_sublex_done(pTHX)
79072805 1667{
27da23d5 1668 dVAR;
3280af22 1669 if (!PL_lex_starts++) {
396482e1 1670 SV * const sv = newSVpvs("");
9aa983d2
JH
1671 if (SvUTF8(PL_linestr))
1672 SvUTF8_on(sv);
3280af22 1673 PL_expect = XOPERATOR;
9aa983d2 1674 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1675 return THING;
1676 }
1677
3280af22
NIS
1678 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1679 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1680 return yylex();
79072805
LW
1681 }
1682
ffb4593c 1683 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1684 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1685 PL_linestr = PL_lex_repl;
1686 PL_lex_inpat = 0;
1687 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1688 PL_bufend += SvCUR(PL_linestr);
bd61b366 1689 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1690 SAVEFREESV(PL_linestr);
1691 PL_lex_dojoin = FALSE;
1692 PL_lex_brackets = 0;
3280af22
NIS
1693 PL_lex_casemods = 0;
1694 *PL_lex_casestack = '\0';
1695 PL_lex_starts = 0;
25da4f38 1696 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1697 PL_lex_state = LEX_INTERPNORMAL;
1698 PL_lex_starts++;
e9fa98b2
HS
1699 /* we don't clear PL_lex_repl here, so that we can check later
1700 whether this is an evalled subst; that means we rely on the
1701 logic to ensure sublex_done() is called again only via the
1702 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1703 }
e9fa98b2 1704 else {
3280af22 1705 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1706 PL_lex_repl = NULL;
e9fa98b2 1707 }
79072805 1708 return ',';
ffed7fef
LW
1709 }
1710 else {
5db06880
NC
1711#ifdef PERL_MAD
1712 if (PL_madskills) {
cd81e915
NC
1713 if (PL_thiswhite) {
1714 if (!PL_endwhite)
6b29d1f5 1715 PL_endwhite = newSVpvs("");
cd81e915
NC
1716 sv_catsv(PL_endwhite, PL_thiswhite);
1717 PL_thiswhite = 0;
1718 }
1719 if (PL_thistoken)
1720 sv_setpvn(PL_thistoken,"",0);
5db06880 1721 else
cd81e915 1722 PL_realtokenstart = -1;
5db06880
NC
1723 }
1724#endif
f46d017c 1725 LEAVE;
3280af22
NIS
1726 PL_bufend = SvPVX(PL_linestr);
1727 PL_bufend += SvCUR(PL_linestr);
1728 PL_expect = XOPERATOR;
09bef843 1729 PL_sublex_info.sub_inwhat = 0;
79072805 1730 return ')';
ffed7fef
LW
1731 }
1732}
1733
02aa26ce
NT
1734/*
1735 scan_const
1736
1737 Extracts a pattern, double-quoted string, or transliteration. This
1738 is terrifying code.
1739
94def140 1740 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1741 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1742 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1743
94def140
TS
1744 Returns a pointer to the character scanned up to. If this is
1745 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1746 successfully parsed), will leave an OP for the substring scanned
1747 in yylval. Caller must intuit reason for not parsing further
1748 by looking at the next characters herself.
1749
02aa26ce
NT
1750 In patterns:
1751 backslashes:
1752 double-quoted style: \r and \n
1753 regexp special ones: \D \s
94def140
TS
1754 constants: \x31
1755 backrefs: \1
02aa26ce
NT
1756 case and quoting: \U \Q \E
1757 stops on @ and $, but not for $ as tail anchor
1758
1759 In transliterations:
1760 characters are VERY literal, except for - not at the start or end
94def140
TS
1761 of the string, which indicates a range. If the range is in bytes,
1762 scan_const expands the range to the full set of intermediate
1763 characters. If the range is in utf8, the hyphen is replaced with
1764 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1765
1766 In double-quoted strings:
1767 backslashes:
1768 double-quoted style: \r and \n
94def140
TS
1769 constants: \x31
1770 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1771 case and quoting: \U \Q \E
1772 stops on @ and $
1773
1774 scan_const does *not* construct ops to handle interpolated strings.
1775 It stops processing as soon as it finds an embedded $ or @ variable
1776 and leaves it to the caller to work out what's going on.
1777
94def140
TS
1778 embedded arrays (whether in pattern or not) could be:
1779 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1780
1781 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1782
1783 $ in pattern could be $foo or could be tail anchor. Assumption:
1784 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1785 followed by one of "()| \r\n\t"
02aa26ce
NT
1786
1787 \1 (backreferences) are turned into $1
1788
1789 The structure of the code is
1790 while (there's a character to process) {
94def140
TS
1791 handle transliteration ranges
1792 skip regexp comments /(?#comment)/ and codes /(?{code})/
1793 skip #-initiated comments in //x patterns
1794 check for embedded arrays
02aa26ce
NT
1795 check for embedded scalars
1796 if (backslash) {
94def140
TS
1797 leave intact backslashes from leaveit (below)
1798 deprecate \1 in substitution replacements
02aa26ce
NT
1799 handle string-changing backslashes \l \U \Q \E, etc.
1800 switch (what was escaped) {
94def140
TS
1801 handle \- in a transliteration (becomes a literal -)
1802 handle \132 (octal characters)
1803 handle \x15 and \x{1234} (hex characters)
1804 handle \N{name} (named characters)
1805 handle \cV (control characters)
1806 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1807 } (end switch)
1808 } (end if backslash)
1809 } (end while character to read)
4e553d73 1810
02aa26ce
NT
1811*/
1812
76e3520e 1813STATIC char *
cea2e8a9 1814S_scan_const(pTHX_ char *start)
79072805 1815{
97aff369 1816 dVAR;
3280af22 1817 register char *send = PL_bufend; /* end of the constant */
561b68a9 1818 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1819 register char *s = start; /* start of the constant */
1820 register char *d = SvPVX(sv); /* destination for copies */
1821 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1822 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1823 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1824 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1825 UV uv;
4c3a8340
TS
1826#ifdef EBCDIC
1827 UV literal_endpoint = 0;
e294cc5d 1828 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1829#endif
012bcf8d 1830
2b9d42f0
NIS
1831 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1832 /* If we are doing a trans and we know we want UTF8 set expectation */
1833 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1834 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1835 }
1836
1837
79072805 1838 while (s < send || dorange) {
02aa26ce 1839 /* get transliterations out of the way (they're most literal) */
3280af22 1840 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1841 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1842 if (dorange) {
1ba5c669
JH
1843 I32 i; /* current expanded character */
1844 I32 min; /* first character in range */
1845 I32 max; /* last character in range */
02aa26ce 1846
e294cc5d
JH
1847#ifdef EBCDIC
1848 UV uvmax = 0;
1849#endif
1850
1851 if (has_utf8
1852#ifdef EBCDIC
1853 && !native_range
1854#endif
1855 ) {
9d4ba2ae 1856 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1857 char *e = d++;
1858 while (e-- > c)
1859 *(e + 1) = *e;
25716404 1860 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1861 /* mark the range as done, and continue */
1862 dorange = FALSE;
1863 didrange = TRUE;
1864 continue;
1865 }
2b9d42f0 1866
95a20fc0 1867 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1868#ifdef EBCDIC
1869 SvGROW(sv,
1870 SvLEN(sv) + (has_utf8 ?
1871 (512 - UTF_CONTINUATION_MARK +
1872 UNISKIP(0x100))
1873 : 256));
1874 /* How many two-byte within 0..255: 128 in UTF-8,
1875 * 96 in UTF-8-mod. */
1876#else
9cbb5ea2 1877 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1878#endif
9cbb5ea2 1879 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1880#ifdef EBCDIC
1881 if (has_utf8) {
1882 int j;
1883 for (j = 0; j <= 1; j++) {
1884 char * const c = (char*)utf8_hop((U8*)d, -1);
1885 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1886 if (j)
1887 min = (U8)uv;
1888 else if (uv < 256)
1889 max = (U8)uv;
1890 else {
1891 max = (U8)0xff; /* only to \xff */
1892 uvmax = uv; /* \x{100} to uvmax */
1893 }
1894 d = c; /* eat endpoint chars */
1895 }
1896 }
1897 else {
1898#endif
1899 d -= 2; /* eat the first char and the - */
1900 min = (U8)*d; /* first char in range */
1901 max = (U8)d[1]; /* last char in range */
1902#ifdef EBCDIC
1903 }
1904#endif
8ada0baa 1905
c2e66d9e 1906 if (min > max) {
01ec43d0 1907 Perl_croak(aTHX_
d1573ac7 1908 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1909 (char)min, (char)max);
c2e66d9e
GS
1910 }
1911
c7f1f016 1912#ifdef EBCDIC
4c3a8340
TS
1913 if (literal_endpoint == 2 &&
1914 ((isLOWER(min) && isLOWER(max)) ||
1915 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1916 if (isLOWER(min)) {
1917 for (i = min; i <= max; i++)
1918 if (isLOWER(i))
db42d148 1919 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1920 } else {
1921 for (i = min; i <= max; i++)
1922 if (isUPPER(i))
db42d148 1923 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1924 }
1925 }
1926 else
1927#endif
1928 for (i = min; i <= max; i++)
e294cc5d
JH
1929#ifdef EBCDIC
1930 if (has_utf8) {
1931 const U8 ch = (U8)NATIVE_TO_UTF(i);
1932 if (UNI_IS_INVARIANT(ch))
1933 *d++ = (U8)i;
1934 else {
1935 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1936 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1937 }
1938 }
1939 else
1940#endif
1941 *d++ = (char)i;
1942
1943#ifdef EBCDIC
1944 if (uvmax) {
1945 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1946 if (uvmax > 0x101)
1947 *d++ = (char)UTF_TO_NATIVE(0xff);
1948 if (uvmax > 0x100)
1949 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1950 }
1951#endif
02aa26ce
NT
1952
1953 /* mark the range as done, and continue */
79072805 1954 dorange = FALSE;
01ec43d0 1955 didrange = TRUE;
4c3a8340
TS
1956#ifdef EBCDIC
1957 literal_endpoint = 0;
1958#endif
79072805 1959 continue;
4e553d73 1960 }
02aa26ce
NT
1961
1962 /* range begins (ignore - as first or last char) */
79072805 1963 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1964 if (didrange) {
1fafa243 1965 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1966 }
e294cc5d
JH
1967 if (has_utf8
1968#ifdef EBCDIC
1969 && !native_range
1970#endif
1971 ) {
25716404 1972 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1973 s++;
1974 continue;
1975 }
79072805
LW
1976 dorange = TRUE;
1977 s++;
01ec43d0
GS
1978 }
1979 else {
1980 didrange = FALSE;
4c3a8340
TS
1981#ifdef EBCDIC
1982 literal_endpoint = 0;
e294cc5d 1983 native_range = TRUE;
4c3a8340 1984#endif
01ec43d0 1985 }
79072805 1986 }
02aa26ce
NT
1987
1988 /* if we get here, we're not doing a transliteration */
1989
0f5d15d6
IZ
1990 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1991 except for the last char, which will be done separately. */
3280af22 1992 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1993 if (s[2] == '#') {
e994fd66 1994 while (s+1 < send && *s != ')')
db42d148 1995 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1996 }
1997 else if (s[2] == '{' /* This should match regcomp.c */
1998 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1999 {
cc6b7395 2000 I32 count = 1;
0f5d15d6 2001 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2002 char c;
2003
d9f97599
GS
2004 while (count && (c = *regparse)) {
2005 if (c == '\\' && regparse[1])
2006 regparse++;
4e553d73 2007 else if (c == '{')
cc6b7395 2008 count++;
4e553d73 2009 else if (c == '}')
cc6b7395 2010 count--;
d9f97599 2011 regparse++;
cc6b7395 2012 }
e994fd66 2013 if (*regparse != ')')
5bdf89e7 2014 regparse--; /* Leave one char for continuation. */
0f5d15d6 2015 while (s < regparse)
db42d148 2016 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2017 }
748a9306 2018 }
02aa26ce
NT
2019
2020 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2021 else if (*s == '#' && PL_lex_inpat &&
2022 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2023 while (s+1 < send && *s != '\n')
db42d148 2024 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2025 }
02aa26ce 2026
5d1d4326 2027 /* check for embedded arrays
da6eedaa 2028 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2029 */
1749ea0d
TS
2030 else if (*s == '@' && s[1]) {
2031 if (isALNUM_lazy_if(s+1,UTF))
2032 break;
2033 if (strchr(":'{$", s[1]))
2034 break;
2035 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2036 break; /* in regexp, neither @+ nor @- are interpolated */
2037 }
02aa26ce
NT
2038
2039 /* check for embedded scalars. only stop if we're sure it's a
2040 variable.
2041 */
79072805 2042 else if (*s == '$') {
3280af22 2043 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2044 break;
6002328a 2045 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2046 break; /* in regexp, $ might be tail anchor */
2047 }
02aa26ce 2048
2b9d42f0
NIS
2049 /* End of else if chain - OP_TRANS rejoin rest */
2050
02aa26ce 2051 /* backslashes */
79072805
LW
2052 if (*s == '\\' && s+1 < send) {
2053 s++;
02aa26ce 2054
02aa26ce 2055 /* deprecate \1 in strings and substitution replacements */
3280af22 2056 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2057 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2058 {
599cee73 2059 if (ckWARN(WARN_SYNTAX))
9014280d 2060 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2061 *--s = '$';
2062 break;
2063 }
02aa26ce
NT
2064
2065 /* string-change backslash escapes */
3280af22 2066 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2067 --s;
2068 break;
2069 }
cc74c5bd
TS
2070 /* skip any other backslash escapes in a pattern */
2071 else if (PL_lex_inpat) {
2072 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2073 goto default_action;
2074 }
02aa26ce
NT
2075
2076 /* if we get here, it's either a quoted -, or a digit */
79072805 2077 switch (*s) {
02aa26ce
NT
2078
2079 /* quoted - in transliterations */
79072805 2080 case '-':
3280af22 2081 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2082 *d++ = *s++;
2083 continue;
2084 }
2085 /* FALL THROUGH */
2086 default:
11b8faa4 2087 {
86f97054 2088 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2089 ckWARN(WARN_MISC))
9014280d 2090 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2091 "Unrecognized escape \\%c passed through",
2092 *s);
11b8faa4 2093 /* default action is to copy the quoted character */
f9a63242 2094 goto default_action;
11b8faa4 2095 }
02aa26ce
NT
2096
2097 /* \132 indicates an octal constant */
79072805
LW
2098 case '0': case '1': case '2': case '3':
2099 case '4': case '5': case '6': case '7':
ba210ebe 2100 {
53305cf1
NC
2101 I32 flags = 0;
2102 STRLEN len = 3;
2103 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2104 s += len;
2105 }
012bcf8d 2106 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2107
2108 /* \x24 indicates a hex constant */
79072805 2109 case 'x':
a0ed51b3
LW
2110 ++s;
2111 if (*s == '{') {
9d4ba2ae 2112 char* const e = strchr(s, '}');
a4c04bdc
NC
2113 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2114 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2115 STRLEN len;
355860ce 2116
53305cf1 2117 ++s;
adaeee49 2118 if (!e) {
a0ed51b3 2119 yyerror("Missing right brace on \\x{}");
355860ce 2120 continue;
ba210ebe 2121 }
53305cf1
NC
2122 len = e - s;
2123 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2124 s = e + 1;
a0ed51b3
LW
2125 }
2126 else {
ba210ebe 2127 {
53305cf1 2128 STRLEN len = 2;
a4c04bdc 2129 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2130 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2131 s += len;
2132 }
012bcf8d
GS
2133 }
2134
2135 NUM_ESCAPE_INSERT:
2136 /* Insert oct or hex escaped character.
301d3d20 2137 * There will always enough room in sv since such
db42d148 2138 * escapes will be longer than any UTF-8 sequence
301d3d20 2139 * they can end up as. */
ba7cea30 2140
c7f1f016
NIS
2141 /* We need to map to chars to ASCII before doing the tests
2142 to cover EBCDIC
2143 */
c4d5f83a 2144 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2145 if (!has_utf8 && uv > 255) {
301d3d20
JH
2146 /* Might need to recode whatever we have
2147 * accumulated so far if it contains any
2148 * hibit chars.
2149 *
2150 * (Can't we keep track of that and avoid
2151 * this rescan? --jhi)
012bcf8d 2152 */
c7f1f016 2153 int hicount = 0;
63cd0674
NIS
2154 U8 *c;
2155 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2156 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2157 hicount++;
db42d148 2158 }
012bcf8d 2159 }
63cd0674 2160 if (hicount) {
9d4ba2ae 2161 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2162 U8 *src, *dst;
2163 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2164 src = (U8 *)d - 1;
2165 dst = src+hicount;
2166 d += hicount;
cfd0369c 2167 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2168 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2169 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2170 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2171 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2172 }
2173 else {
63cd0674 2174 *dst-- = *src;
012bcf8d 2175 }
c7f1f016 2176 src--;
012bcf8d
GS
2177 }
2178 }
2179 }
2180
9aa983d2 2181 if (has_utf8 || uv > 255) {
9041c2e3 2182 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2183 has_utf8 = TRUE;
f9a63242
JH
2184 if (PL_lex_inwhat == OP_TRANS &&
2185 PL_sublex_info.sub_op) {
2186 PL_sublex_info.sub_op->op_private |=
2187 (PL_lex_repl ? OPpTRANS_FROM_UTF
2188 : OPpTRANS_TO_UTF);
f9a63242 2189 }
e294cc5d
JH
2190#ifdef EBCDIC
2191 if (uv > 255 && !dorange)
2192 native_range = FALSE;
2193#endif
012bcf8d 2194 }
a0ed51b3 2195 else {
012bcf8d 2196 *d++ = (char)uv;
a0ed51b3 2197 }
012bcf8d
GS
2198 }
2199 else {
c4d5f83a 2200 *d++ = (char) uv;
a0ed51b3 2201 }
79072805 2202 continue;
02aa26ce 2203
b239daa5 2204 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2205 case 'N':
55eda711 2206 ++s;
423cee85
JH
2207 if (*s == '{') {
2208 char* e = strchr(s, '}');
155aba94 2209 SV *res;
423cee85 2210 STRLEN len;
cfd0369c 2211 const char *str;
fc8cd66c 2212 SV *type;
4e553d73 2213
423cee85 2214 if (!e) {
5777a3f7 2215 yyerror("Missing right brace on \\N{}");
423cee85
JH
2216 e = s - 1;
2217 goto cont_scan;
2218 }
dbc0d4f2
JH
2219 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2220 /* \N{U+...} */
2221 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2222 PERL_SCAN_DISALLOW_PREFIX;
2223 s += 3;
2224 len = e - s;
2225 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2226 if ( e > s && len != (STRLEN)(e - s) ) {
2227 uv = 0xFFFD;
fc8cd66c 2228 }
dbc0d4f2
JH
2229 s = e + 1;
2230 goto NUM_ESCAPE_INSERT;
2231 }
55eda711 2232 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2233 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2234 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2235 res, NULL, SvPVX(type) );
2236 SvREFCNT_dec(type);
f9a63242
JH
2237 if (has_utf8)
2238 sv_utf8_upgrade(res);
cfd0369c 2239 str = SvPV_const(res,len);
1c47067b
JH
2240#ifdef EBCDIC_NEVER_MIND
2241 /* charnames uses pack U and that has been
2242 * recently changed to do the below uni->native
2243 * mapping, so this would be redundant (and wrong,
2244 * the code point would be doubly converted).
2245 * But leave this in just in case the pack U change
2246 * gets revoked, but the semantics is still
2247 * desireable for charnames. --jhi */
cddc7ef4 2248 {
cfd0369c 2249 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2250
2251 if (uv < 0x100) {
89ebb4a3 2252 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2253
2254 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2255 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2256 str = SvPV_const(res, len);
cddc7ef4
JH
2257 }
2258 }
2259#endif
89491803 2260 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2261 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2262 SvCUR_set(sv, d - ostart);
2263 SvPOK_on(sv);
e4f3eed8 2264 *d = '\0';
f08d6ad9 2265 sv_utf8_upgrade(sv);
d2f449dd 2266 /* this just broke our allocation above... */
eb160463 2267 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2268 d = SvPVX(sv) + SvCUR(sv);
89491803 2269 has_utf8 = TRUE;
f08d6ad9 2270 }
eb160463 2271 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2272 const char * const odest = SvPVX_const(sv);
423cee85 2273
8973db79 2274 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2275 d = SvPVX(sv) + (d - odest);
2276 }
e294cc5d
JH
2277#ifdef EBCDIC
2278 if (!dorange)
2279 native_range = FALSE; /* \N{} is guessed to be Unicode */
2280#endif
423cee85
JH
2281 Copy(str, d, len, char);
2282 d += len;
2283 SvREFCNT_dec(res);
2284 cont_scan:
2285 s = e + 1;
2286 }
2287 else
5777a3f7 2288 yyerror("Missing braces on \\N{}");
423cee85
JH
2289 continue;
2290
02aa26ce 2291 /* \c is a control character */
79072805
LW
2292 case 'c':
2293 s++;
961ce445 2294 if (s < send) {
ba210ebe 2295 U8 c = *s++;
c7f1f016
NIS
2296#ifdef EBCDIC
2297 if (isLOWER(c))
2298 c = toUPPER(c);
2299#endif
db42d148 2300 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2301 }
961ce445
RGS
2302 else {
2303 yyerror("Missing control char name in \\c");
2304 }
79072805 2305 continue;
02aa26ce
NT
2306
2307 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2308 case 'b':
db42d148 2309 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2310 break;
2311 case 'n':
db42d148 2312 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2313 break;
2314 case 'r':
db42d148 2315 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2316 break;
2317 case 'f':
db42d148 2318 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2319 break;
2320 case 't':
db42d148 2321 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2322 break;
34a3fe2a 2323 case 'e':
db42d148 2324 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2325 break;
2326 case 'a':
db42d148 2327 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2328 break;
02aa26ce
NT
2329 } /* end switch */
2330
79072805
LW
2331 s++;
2332 continue;
02aa26ce 2333 } /* end if (backslash) */
4c3a8340
TS
2334#ifdef EBCDIC
2335 else
2336 literal_endpoint++;
2337#endif
02aa26ce 2338
f9a63242 2339 default_action:
2b9d42f0
NIS
2340 /* If we started with encoded form, or already know we want it
2341 and then encode the next character */
2342 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2343 STRLEN len = 1;
5f66b61c
AL
2344 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2345 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2346 s += len;
2347 if (need > len) {
2348 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2349 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2350 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2351 }
5f66b61c 2352 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2353 has_utf8 = TRUE;
e294cc5d
JH
2354#ifdef EBCDIC
2355 if (uv > 255 && !dorange)
2356 native_range = FALSE;
2357#endif
2b9d42f0
NIS
2358 }
2359 else {
2360 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2361 }
02aa26ce
NT
2362 } /* while loop to process each character */
2363
2364 /* terminate the string and set up the sv */
79072805 2365 *d = '\0';
95a20fc0 2366 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2367 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2368 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2369
79072805 2370 SvPOK_on(sv);
9f4817db 2371 if (PL_encoding && !has_utf8) {
d0063567
DK
2372 sv_recode_to_utf8(sv, PL_encoding);
2373 if (SvUTF8(sv))
2374 has_utf8 = TRUE;
9f4817db 2375 }
2b9d42f0 2376 if (has_utf8) {
7e2040f0 2377 SvUTF8_on(sv);
2b9d42f0 2378 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2379 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2380 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2381 }
2382 }
79072805 2383
02aa26ce 2384 /* shrink the sv if we allocated more than we used */
79072805 2385 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2386 SvPV_shrink_to_cur(sv);
79072805 2387 }
02aa26ce 2388
9b599b2a 2389 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2390 if (s > PL_bufptr) {
2391 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2392 sv = new_constant(start, s - start,
2393 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2394 sv, NULL,
10edeb5d
JH
2395 (const char *)
2396 (( PL_lex_inwhat == OP_TRANS
2397 ? "tr"
2398 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2399 ? "s"
2400 : "qq"))));
79072805 2401 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2402 } else
8990e307 2403 SvREFCNT_dec(sv);
79072805
LW
2404 return s;
2405}
2406
ffb4593c
NT
2407/* S_intuit_more
2408 * Returns TRUE if there's more to the expression (e.g., a subscript),
2409 * FALSE otherwise.
ffb4593c
NT
2410 *
2411 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2412 *
2413 * ->[ and ->{ return TRUE
2414 * { and [ outside a pattern are always subscripts, so return TRUE
2415 * if we're outside a pattern and it's not { or [, then return FALSE
2416 * if we're in a pattern and the first char is a {
2417 * {4,5} (any digits around the comma) returns FALSE
2418 * if we're in a pattern and the first char is a [
2419 * [] returns FALSE
2420 * [SOMETHING] has a funky algorithm to decide whether it's a
2421 * character class or not. It has to deal with things like
2422 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2423 * anything else returns TRUE
2424 */
2425
9cbb5ea2
GS
2426/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2427
76e3520e 2428STATIC int
cea2e8a9 2429S_intuit_more(pTHX_ register char *s)
79072805 2430{
97aff369 2431 dVAR;
3280af22 2432 if (PL_lex_brackets)
79072805
LW
2433 return TRUE;
2434 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2435 return TRUE;
2436 if (*s != '{' && *s != '[')
2437 return FALSE;
3280af22 2438 if (!PL_lex_inpat)
79072805
LW
2439 return TRUE;
2440
2441 /* In a pattern, so maybe we have {n,m}. */
2442 if (*s == '{') {
2443 s++;
2444 if (!isDIGIT(*s))
2445 return TRUE;
2446 while (isDIGIT(*s))
2447 s++;
2448 if (*s == ',')
2449 s++;
2450 while (isDIGIT(*s))
2451 s++;
2452 if (*s == '}')
2453 return FALSE;
2454 return TRUE;
2455
2456 }
2457
2458 /* On the other hand, maybe we have a character class */
2459
2460 s++;
2461 if (*s == ']' || *s == '^')
2462 return FALSE;
2463 else {
ffb4593c 2464 /* this is terrifying, and it works */
79072805
LW
2465 int weight = 2; /* let's weigh the evidence */
2466 char seen[256];
f27ffc4a 2467 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2468 const char * const send = strchr(s,']');
3280af22 2469 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2470
2471 if (!send) /* has to be an expression */
2472 return TRUE;
2473
2474 Zero(seen,256,char);
2475 if (*s == '$')
2476 weight -= 3;
2477 else if (isDIGIT(*s)) {
2478 if (s[1] != ']') {
2479 if (isDIGIT(s[1]) && s[2] == ']')
2480 weight -= 10;
2481 }
2482 else
2483 weight -= 100;
2484 }
2485 for (; s < send; s++) {
2486 last_un_char = un_char;
2487 un_char = (unsigned char)*s;
2488 switch (*s) {
2489 case '@':
2490 case '&':
2491 case '$':
2492 weight -= seen[un_char] * 10;
7e2040f0 2493 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2494 int len;
8903cb82 2495 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2496 len = (int)strlen(tmpbuf);
2497 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2498 weight -= 100;
2499 else
2500 weight -= 10;
2501 }
2502 else if (*s == '$' && s[1] &&
93a17b20
LW
2503 strchr("[#!%*<>()-=",s[1])) {
2504 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2505 weight -= 10;
2506 else
2507 weight -= 1;
2508 }
2509 break;
2510 case '\\':
2511 un_char = 254;
2512 if (s[1]) {
93a17b20 2513 if (strchr("wds]",s[1]))
79072805 2514 weight += 100;
10edeb5d 2515 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2516 weight += 1;
93a17b20 2517 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2518 weight += 40;
2519 else if (isDIGIT(s[1])) {
2520 weight += 40;
2521 while (s[1] && isDIGIT(s[1]))
2522 s++;
2523 }
2524 }
2525 else
2526 weight += 100;
2527 break;
2528 case '-':
2529 if (s[1] == '\\')
2530 weight += 50;
93a17b20 2531 if (strchr("aA01! ",last_un_char))
79072805 2532 weight += 30;
93a17b20 2533 if (strchr("zZ79~",s[1]))
79072805 2534 weight += 30;
f27ffc4a
GS
2535 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2536 weight -= 5; /* cope with negative subscript */
79072805
LW
2537 break;
2538 default:
3792a11b
NC
2539 if (!isALNUM(last_un_char)
2540 && !(last_un_char == '$' || last_un_char == '@'
2541 || last_un_char == '&')
2542 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2543 char *d = tmpbuf;
2544 while (isALPHA(*s))
2545 *d++ = *s++;
2546 *d = '\0';
5458a98a 2547 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2548 weight -= 150;
2549 }
2550 if (un_char == last_un_char + 1)
2551 weight += 5;
2552 weight -= seen[un_char];
2553 break;
2554 }
2555 seen[un_char]++;
2556 }
2557 if (weight >= 0) /* probably a character class */
2558 return FALSE;
2559 }
2560
2561 return TRUE;
2562}
ffed7fef 2563
ffb4593c
NT
2564/*
2565 * S_intuit_method
2566 *
2567 * Does all the checking to disambiguate
2568 * foo bar
2569 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2570 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2571 *
2572 * First argument is the stuff after the first token, e.g. "bar".
2573 *
2574 * Not a method if bar is a filehandle.
2575 * Not a method if foo is a subroutine prototyped to take a filehandle.
2576 * Not a method if it's really "Foo $bar"
2577 * Method if it's "foo $bar"
2578 * Not a method if it's really "print foo $bar"
2579 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2580 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2581 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2582 * =>
2583 */
2584
76e3520e 2585STATIC int
62d55b22 2586S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2587{
97aff369 2588 dVAR;
a0d0e21e 2589 char *s = start + (*start == '$');
3280af22 2590 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2591 STRLEN len;
2592 GV* indirgv;
5db06880
NC
2593#ifdef PERL_MAD
2594 int soff;
2595#endif
a0d0e21e
LW
2596
2597 if (gv) {
62d55b22 2598 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2599 return 0;
62d55b22
NC
2600 if (cv) {
2601 if (SvPOK(cv)) {
2602 const char *proto = SvPVX_const(cv);
2603 if (proto) {
2604 if (*proto == ';')
2605 proto++;
2606 if (*proto == '*')
2607 return 0;
2608 }
b6c543e3
IZ
2609 }
2610 } else
c35e046a 2611 gv = NULL;
a0d0e21e 2612 }
8903cb82 2613 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2614 /* start is the beginning of the possible filehandle/object,
2615 * and s is the end of it
2616 * tmpbuf is a copy of it
2617 */
2618
a0d0e21e 2619 if (*start == '$') {
3280af22 2620 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2621 return 0;
5db06880
NC
2622#ifdef PERL_MAD
2623 len = start - SvPVX(PL_linestr);
2624#endif
29595ff2 2625 s = PEEKSPACE(s);
f0092767 2626#ifdef PERL_MAD
5db06880
NC
2627 start = SvPVX(PL_linestr) + len;
2628#endif
3280af22
NIS
2629 PL_bufptr = start;
2630 PL_expect = XREF;
a0d0e21e
LW
2631 return *s == '(' ? FUNCMETH : METHOD;
2632 }
5458a98a 2633 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2634 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2635 len -= 2;
2636 tmpbuf[len] = '\0';
5db06880
NC
2637#ifdef PERL_MAD
2638 soff = s - SvPVX(PL_linestr);
2639#endif
c3e0f903
GS
2640 goto bare_package;
2641 }
90e5519e 2642 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2643 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2644 return 0;
2645 /* filehandle or package name makes it a method */
89bfa8cd 2646 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2647#ifdef PERL_MAD
2648 soff = s - SvPVX(PL_linestr);
2649#endif
29595ff2 2650 s = PEEKSPACE(s);
3280af22 2651 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2652 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2653 bare_package:
cd81e915 2654 start_force(PL_curforce);
9ded7720 2655 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2656 newSVpvn(tmpbuf,len));
9ded7720 2657 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2658 if (PL_madskills)
2659 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2660 PL_expect = XTERM;
a0d0e21e 2661 force_next(WORD);
3280af22 2662 PL_bufptr = s;
5db06880
NC
2663#ifdef PERL_MAD
2664 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2665#endif
a0d0e21e
LW
2666 return *s == '(' ? FUNCMETH : METHOD;
2667 }
2668 }
2669 return 0;
2670}
2671
ffb4593c
NT
2672/*
2673 * S_incl_perldb
2674 * Return a string of Perl code to load the debugger. If PERL5DB
2675 * is set, it will return the contents of that, otherwise a
2676 * compile-time require of perl5db.pl.
2677 */
2678
bfed75c6 2679STATIC const char*
cea2e8a9 2680S_incl_perldb(pTHX)
a0d0e21e 2681{
97aff369 2682 dVAR;
3280af22 2683 if (PL_perldb) {
9d4ba2ae 2684 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2685
2686 if (pdb)
2687 return pdb;
93189314 2688 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2689 return "BEGIN { require 'perl5db.pl' }";
2690 }
2691 return "";
2692}
2693
2694
16d20bd9 2695/* Encoded script support. filter_add() effectively inserts a
4e553d73 2696 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2697 * Note that the filter function only applies to the current source file
2698 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2699 *
2700 * The datasv parameter (which may be NULL) can be used to pass
2701 * private data to this instance of the filter. The filter function
2702 * can recover the SV using the FILTER_DATA macro and use it to
2703 * store private buffers and state information.
2704 *
2705 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2706 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2707 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2708 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2709 * private use must be set using malloc'd pointers.
2710 */
16d20bd9
AD
2711
2712SV *
864dbfa3 2713Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2714{
97aff369 2715 dVAR;
f4c556ac 2716 if (!funcp)
a0714e2c 2717 return NULL;
f4c556ac 2718
3280af22
NIS
2719 if (!PL_rsfp_filters)
2720 PL_rsfp_filters = newAV();
16d20bd9 2721 if (!datasv)
561b68a9 2722 datasv = newSV(0);
862a34c6 2723 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2724 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2725 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2726 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2727 FPTR2DPTR(void *, IoANY(datasv)),
2728 SvPV_nolen(datasv)));
3280af22
NIS
2729 av_unshift(PL_rsfp_filters, 1);
2730 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2731 return(datasv);
2732}
4e553d73 2733
16d20bd9
AD
2734
2735/* Delete most recently added instance of this filter function. */
a0d0e21e 2736void
864dbfa3 2737Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2738{
97aff369 2739 dVAR;
e0c19803 2740 SV *datasv;
24801a4b 2741
33073adb 2742#ifdef DEBUGGING
55662e27
JH
2743 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2744 FPTR2DPTR(void*, funcp)));
33073adb 2745#endif
3280af22 2746 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2747 return;
2748 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2749 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2750 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2751 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2752 IoANY(datasv) = (void *)NULL;
3280af22 2753 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2754
16d20bd9
AD
2755 return;
2756 }
2757 /* we need to search for the correct entry and clear it */
cea2e8a9 2758 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2759}
2760
2761
1de9afcd
RGS
2762/* Invoke the idxth filter function for the current rsfp. */
2763/* maxlen 0 = read one text line */
16d20bd9 2764I32
864dbfa3 2765Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2766{
97aff369 2767 dVAR;
16d20bd9
AD
2768 filter_t funcp;
2769 SV *datasv = NULL;
f482118e
NC
2770 /* This API is bad. It should have been using unsigned int for maxlen.
2771 Not sure if we want to change the API, but if not we should sanity
2772 check the value here. */
39cd7a59
NC
2773 const unsigned int correct_length
2774 = maxlen < 0 ?
2775#ifdef PERL_MICRO
2776 0x7FFFFFFF
2777#else
2778 INT_MAX
2779#endif
2780 : maxlen;
e50aee73 2781
3280af22 2782 if (!PL_rsfp_filters)
16d20bd9 2783 return -1;
1de9afcd 2784 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2785 /* Provide a default input filter to make life easy. */
2786 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2787 DEBUG_P(PerlIO_printf(Perl_debug_log,
2788 "filter_read %d: from rsfp\n", idx));
f482118e 2789 if (correct_length) {
16d20bd9
AD
2790 /* Want a block */
2791 int len ;
f54cb97a 2792 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2793
2794 /* ensure buf_sv is large enough */
f482118e
NC
2795 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2796 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2797 correct_length)) <= 0) {
3280af22 2798 if (PerlIO_error(PL_rsfp))
37120919
AD
2799 return -1; /* error */
2800 else
2801 return 0 ; /* end of file */
2802 }
16d20bd9
AD
2803 SvCUR_set(buf_sv, old_len + len) ;
2804 } else {
2805 /* Want a line */
3280af22
NIS
2806 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2807 if (PerlIO_error(PL_rsfp))
37120919
AD
2808 return -1; /* error */
2809 else
2810 return 0 ; /* end of file */
2811 }
16d20bd9
AD
2812 }
2813 return SvCUR(buf_sv);
2814 }
2815 /* Skip this filter slot if filter has been deleted */
1de9afcd 2816 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2817 DEBUG_P(PerlIO_printf(Perl_debug_log,
2818 "filter_read %d: skipped (filter deleted)\n",
2819 idx));
f482118e 2820 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2821 }
2822 /* Get function pointer hidden within datasv */
8141890a 2823 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2824 DEBUG_P(PerlIO_printf(Perl_debug_log,
2825 "filter_read %d: via function %p (%s)\n",
ca0270c4 2826 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2827 /* Call function. The function is expected to */
2828 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2829 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2830 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2831}
2832
76e3520e 2833STATIC char *
cea2e8a9 2834S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2835{
97aff369 2836 dVAR;
c39cd008 2837#ifdef PERL_CR_FILTER
3280af22 2838 if (!PL_rsfp_filters) {
c39cd008 2839 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2840 }
2841#endif
3280af22 2842 if (PL_rsfp_filters) {
55497cff 2843 if (!append)
2844 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2845 if (FILTER_READ(0, sv, 0) > 0)
2846 return ( SvPVX(sv) ) ;
2847 else
bd61b366 2848 return NULL ;
16d20bd9 2849 }
9d116dd7 2850 else
fd049845 2851 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2852}
2853
01ec43d0 2854STATIC HV *
7fc63493 2855S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2856{
97aff369 2857 dVAR;
def3634b
GS
2858 GV *gv;
2859
01ec43d0 2860 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2861 return PL_curstash;
2862
2863 if (len > 2 &&
2864 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2865 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2866 {
2867 return GvHV(gv); /* Foo:: */
def3634b
GS
2868 }
2869
2870 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2871 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2872 if (gv && GvCV(gv)) {
2873 SV * const sv = cv_const_sv(GvCV(gv));
2874 if (sv)
83003860 2875 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2876 }
2877
2878 return gv_stashpv(pkgname, FALSE);
2879}
a0d0e21e 2880
e3f73d4e
RGS
2881/*
2882 * S_readpipe_override
2883 * Check whether readpipe() is overriden, and generates the appropriate
2884 * optree, provided sublex_start() is called afterwards.
2885 */
2886STATIC void
1d51329b 2887S_readpipe_override(pTHX)
e3f73d4e
RGS
2888{
2889 GV **gvp;
2890 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2891 yylval.ival = OP_BACKTICK;
2892 if ((gv_readpipe
2893 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2894 ||
2895 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2896 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2897 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2898 {
2899 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2900 append_elem(OP_LIST,
2901 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2902 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2903 }
2904 else {
2905 set_csh();
2906 }
2907}
2908
5db06880
NC
2909#ifdef PERL_MAD
2910 /*
2911 * Perl_madlex
2912 * The intent of this yylex wrapper is to minimize the changes to the
2913 * tokener when we aren't interested in collecting madprops. It remains
2914 * to be seen how successful this strategy will be...
2915 */
2916
2917int
2918Perl_madlex(pTHX)
2919{
2920 int optype;
2921 char *s = PL_bufptr;
2922
cd81e915
NC
2923 /* make sure PL_thiswhite is initialized */
2924 PL_thiswhite = 0;
2925 PL_thismad = 0;
5db06880 2926
cd81e915 2927 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2928 if (PL_pending_ident)
2929 return S_pending_ident(aTHX);
2930
2931 /* previous token ate up our whitespace? */
cd81e915
NC
2932 if (!PL_lasttoke && PL_nextwhite) {
2933 PL_thiswhite = PL_nextwhite;
2934 PL_nextwhite = 0;
5db06880
NC
2935 }
2936
2937 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2938 PL_realtokenstart = -1;
2939 PL_thistoken = 0;
5db06880
NC
2940 optype = yylex();
2941 s = PL_bufptr;
cd81e915 2942 assert(PL_curforce < 0);
5db06880 2943
cd81e915
NC
2944 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2945 if (!PL_thistoken) {
2946 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2947 PL_thistoken = newSVpvs("");
5db06880 2948 else {
c35e046a 2949 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2950 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2951 }
2952 }
cd81e915
NC
2953 if (PL_thismad) /* install head */
2954 CURMAD('X', PL_thistoken);
5db06880
NC
2955 }
2956
2957 /* last whitespace of a sublex? */
cd81e915
NC
2958 if (optype == ')' && PL_endwhite) {
2959 CURMAD('X', PL_endwhite);
5db06880
NC
2960 }
2961
cd81e915 2962 if (!PL_thismad) {
5db06880
NC
2963
2964 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2965 if (!PL_thiswhite && !PL_endwhite && !optype) {
2966 sv_free(PL_thistoken);
2967 PL_thistoken = 0;
5db06880
NC
2968 return 0;
2969 }
2970
2971 /* put off final whitespace till peg */
2972 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2973 PL_nextwhite = PL_thiswhite;
2974 PL_thiswhite = 0;
5db06880 2975 }
cd81e915
NC
2976 else if (PL_thisopen) {
2977 CURMAD('q', PL_thisopen);
2978 if (PL_thistoken)
2979 sv_free(PL_thistoken);
2980 PL_thistoken = 0;
5db06880
NC
2981 }
2982 else {
2983 /* Store actual token text as madprop X */
cd81e915 2984 CURMAD('X', PL_thistoken);
5db06880
NC
2985 }
2986
cd81e915 2987 if (PL_thiswhite) {
5db06880 2988 /* add preceding whitespace as madprop _ */
cd81e915 2989 CURMAD('_', PL_thiswhite);
5db06880
NC
2990 }
2991
cd81e915 2992 if (PL_thisstuff) {
5db06880 2993 /* add quoted material as madprop = */
cd81e915 2994 CURMAD('=', PL_thisstuff);
5db06880
NC
2995 }
2996
cd81e915 2997 if (PL_thisclose) {
5db06880 2998 /* add terminating quote as madprop Q */
cd81e915 2999 CURMAD('Q', PL_thisclose);
5db06880
NC
3000 }
3001 }
3002
3003 /* special processing based on optype */
3004
3005 switch (optype) {
3006
3007 /* opval doesn't need a TOKEN since it can already store mp */
3008 case WORD:
3009 case METHOD:
3010 case FUNCMETH:
3011 case THING:
3012 case PMFUNC:
3013 case PRIVATEREF:
3014 case FUNC0SUB:
3015 case UNIOPSUB:
3016 case LSTOPSUB:
3017 if (yylval.opval)
cd81e915
NC
3018 append_madprops(PL_thismad, yylval.opval, 0);
3019 PL_thismad = 0;
5db06880
NC
3020 return optype;
3021
3022 /* fake EOF */
3023 case 0:
3024 optype = PEG;
cd81e915
NC
3025 if (PL_endwhite) {
3026 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3027 PL_endwhite = 0;
5db06880
NC
3028 }
3029 break;
3030
3031 case ']':
3032 case '}':
cd81e915 3033 if (PL_faketokens)
5db06880
NC
3034 break;
3035 /* remember any fake bracket that lexer is about to discard */
3036 if (PL_lex_brackets == 1 &&
3037 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3038 {
3039 s = PL_bufptr;
3040 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3041 s++;
3042 if (*s == '}') {
cd81e915
NC
3043 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3044 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3045 PL_thiswhite = 0;
5db06880
NC
3046 PL_bufptr = s - 1;
3047 break; /* don't bother looking for trailing comment */
3048 }
3049 else
3050 s = PL_bufptr;
3051 }
3052 if (optype == ']')
3053 break;
3054 /* FALLTHROUGH */
3055
3056 /* attach a trailing comment to its statement instead of next token */
3057 case ';':
cd81e915 3058 if (PL_faketokens)
5db06880
NC
3059 break;
3060 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3061 s = PL_bufptr;
3062 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3063 s++;
3064 if (*s == '\n' || *s == '#') {
3065 while (s < PL_bufend && *s != '\n')
3066 s++;
3067 if (s < PL_bufend)
3068 s++;
cd81e915
NC
3069 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3070 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3071 PL_thiswhite = 0;
5db06880
NC
3072 PL_bufptr = s;
3073 }
3074 }
3075 break;
3076
3077 /* pval */
3078 case LABEL:
3079 break;
3080
3081 /* ival */
3082 default:
3083 break;
3084
3085 }
3086
3087 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3088 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3089 PL_thismad = 0;
5db06880
NC
3090 return optype;
3091}
3092#endif
3093
468aa647 3094STATIC char *
cc6ed77d 3095S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3096 dVAR;
468aa647
RGS
3097 if (PL_expect != XSTATE)
3098 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3099 is_use ? "use" : "no"));
29595ff2 3100 s = SKIPSPACE1(s);
468aa647
RGS
3101 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3102 s = force_version(s, TRUE);
29595ff2 3103 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3104 start_force(PL_curforce);
9ded7720 3105 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3106 force_next(WORD);
3107 }
3108 else if (*s == 'v') {
3109 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3110 s = force_version(s, FALSE);
3111 }
3112 }
3113 else {
3114 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3115 s = force_version(s, FALSE);
3116 }
3117 yylval.ival = is_use;
3118 return s;
3119}
748a9306 3120#ifdef DEBUGGING
27da23d5 3121 static const char* const exp_name[] =
09bef843 3122 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3123 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3124 };
748a9306 3125#endif
463ee0b2 3126
02aa26ce
NT
3127/*
3128 yylex
3129
3130 Works out what to call the token just pulled out of the input
3131 stream. The yacc parser takes care of taking the ops we return and
3132 stitching them into a tree.
3133
3134 Returns:
3135 PRIVATEREF
3136
3137 Structure:
3138 if read an identifier
3139 if we're in a my declaration
3140 croak if they tried to say my($foo::bar)
3141 build the ops for a my() declaration
3142 if it's an access to a my() variable
3143 are we in a sort block?
3144 croak if my($a); $a <=> $b
3145 build ops for access to a my() variable
3146 if in a dq string, and they've said @foo and we can't find @foo
3147 croak
3148 build ops for a bareword
3149 if we already built the token before, use it.
3150*/
3151
20141f0e 3152
dba4d153
JH
3153#ifdef __SC__
3154#pragma segment Perl_yylex
3155#endif
dba4d153 3156int
dba4d153 3157Perl_yylex(pTHX)
20141f0e 3158{
97aff369 3159 dVAR;
3afc138a 3160 register char *s = PL_bufptr;
378cc40b 3161 register char *d;
463ee0b2 3162 STRLEN len;
aa7440fb 3163 bool bof = FALSE;
a687059c 3164
10edeb5d
JH
3165 /* orig_keyword, gvp, and gv are initialized here because
3166 * jump to the label just_a_word_zero can bypass their
3167 * initialization later. */
3168 I32 orig_keyword = 0;
3169 GV *gv = NULL;
3170 GV **gvp = NULL;
3171
bbf60fe6 3172 DEBUG_T( {
396482e1 3173 SV* tmp = newSVpvs("");
b6007c36
DM
3174 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3175 (IV)CopLINE(PL_curcop),
3176 lex_state_names[PL_lex_state],
3177 exp_name[PL_expect],
3178 pv_display(tmp, s, strlen(s), 0, 60));
3179 SvREFCNT_dec(tmp);
bbf60fe6 3180 } );
02aa26ce 3181 /* check if there's an identifier for us to look at */
ba979b31 3182 if (PL_pending_ident)
bbf60fe6 3183 return REPORT(S_pending_ident(aTHX));
bbce6d69 3184
02aa26ce
NT
3185 /* no identifier pending identification */
3186
3280af22 3187 switch (PL_lex_state) {
79072805
LW
3188#ifdef COMMENTARY
3189 case LEX_NORMAL: /* Some compilers will produce faster */
3190 case LEX_INTERPNORMAL: /* code if we comment these out. */
3191 break;
3192#endif
3193
09bef843 3194 /* when we've already built the next token, just pull it out of the queue */
79072805 3195 case LEX_KNOWNEXT:
5db06880
NC
3196#ifdef PERL_MAD
3197 PL_lasttoke--;
3198 yylval = PL_nexttoke[PL_lasttoke].next_val;
3199 if (PL_madskills) {
cd81e915 3200 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3201 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3202 if (PL_thismad && PL_thismad->mad_key == '_') {
3203 PL_thiswhite = (SV*)PL_thismad->mad_val;
3204 PL_thismad->mad_val = 0;
3205 mad_free(PL_thismad);
3206 PL_thismad = 0;
5db06880
NC
3207 }
3208 }
3209 if (!PL_lasttoke) {
3210 PL_lex_state = PL_lex_defer;
3211 PL_expect = PL_lex_expect;
3212 PL_lex_defer = LEX_NORMAL;
3213 if (!PL_nexttoke[PL_lasttoke].next_type)
3214 return yylex();
3215 }
3216#else
3280af22 3217 PL_nexttoke--;
5db06880 3218 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3219 if (!PL_nexttoke) {
3220 PL_lex_state = PL_lex_defer;
3221 PL_expect = PL_lex_expect;
3222 PL_lex_defer = LEX_NORMAL;
463ee0b2 3223 }
5db06880
NC
3224#endif
3225#ifdef PERL_MAD
3226 /* FIXME - can these be merged? */
3227 return(PL_nexttoke[PL_lasttoke].next_type);
3228#else
bbf60fe6 3229 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3230#endif
79072805 3231
02aa26ce 3232 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3233 when we get here, PL_bufptr is at the \
02aa26ce 3234 */
79072805
LW
3235 case LEX_INTERPCASEMOD:
3236#ifdef DEBUGGING
3280af22 3237 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3238 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3239#endif
02aa26ce 3240 /* handle \E or end of string */
3280af22 3241 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3242 /* if at a \E */
3280af22 3243 if (PL_lex_casemods) {
f54cb97a 3244 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3245 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3246
3792a11b
NC
3247 if (PL_bufptr != PL_bufend
3248 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3249 PL_bufptr += 2;
3250 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3251#ifdef PERL_MAD
3252 if (PL_madskills)
6b29d1f5 3253 PL_thistoken = newSVpvs("\\E");
5db06880 3254#endif
a0d0e21e 3255 }
bbf60fe6 3256 return REPORT(')');
79072805 3257 }
5db06880
NC
3258#ifdef PERL_MAD
3259 while (PL_bufptr != PL_bufend &&
3260 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3261 if (!PL_thiswhite)
6b29d1f5 3262 PL_thiswhite = newSVpvs("");
cd81e915 3263 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3264 PL_bufptr += 2;
3265 }
3266#else
3280af22
NIS
3267 if (PL_bufptr != PL_bufend)
3268 PL_bufptr += 2;
5db06880 3269#endif
3280af22 3270 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3271 return yylex();
79072805
LW
3272 }
3273 else {
607df283 3274 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3275 "### Saw case modifier\n"); });
3280af22 3276 s = PL_bufptr + 1;
6e909404 3277 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3278#ifdef PERL_MAD
cd81e915 3279 if (!PL_thiswhite)
6b29d1f5 3280 PL_thiswhite = newSVpvs("");
cd81e915 3281 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3282#endif
89122651 3283 PL_bufptr = s + 3;
6e909404
JH
3284 PL_lex_state = LEX_INTERPCONCAT;
3285 return yylex();
a0d0e21e 3286 }
6e909404 3287 else {
90771dc0 3288 I32 tmp;
5db06880
NC
3289 if (!PL_madskills) /* when just compiling don't need correct */
3290 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3291 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3292 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3293 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3294 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3295 return REPORT(')');
6e909404
JH
3296 }
3297 if (PL_lex_casemods > 10)
3298 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3299 PL_lex_casestack[PL_lex_casemods++] = *s;
3300 PL_lex_casestack[PL_lex_casemods] = '\0';
3301 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3302 start_force(PL_curforce);
9ded7720 3303 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3304 force_next('(');
cd81e915 3305 start_force(PL_curforce);
6e909404 3306 if (*s == 'l')
9ded7720 3307 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3308 else if (*s == 'u')
9ded7720 3309 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3310 else if (*s == 'L')
9ded7720 3311 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3312 else if (*s == 'U')
9ded7720 3313 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3314 else if (*s == 'Q')
9ded7720 3315 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3316 else
3317 Perl_croak(aTHX_ "panic: yylex");
5db06880 3318 if (PL_madskills) {
6b29d1f5 3319 SV* const tmpsv = newSVpvs("");
5db06880
NC
3320 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3321 curmad('_', tmpsv);
3322 }
6e909404 3323 PL_bufptr = s + 1;
a0d0e21e 3324 }
79072805 3325 force_next(FUNC);
3280af22
NIS
3326 if (PL_lex_starts) {
3327 s = PL_bufptr;
3328 PL_lex_starts = 0;
5db06880
NC
3329#ifdef PERL_MAD
3330 if (PL_madskills) {
cd81e915
NC
3331 if (PL_thistoken)
3332 sv_free(PL_thistoken);
6b29d1f5 3333 PL_thistoken = newSVpvs("");
5db06880
NC
3334 }
3335#endif
131b3ad0
DM
3336 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3337 if (PL_lex_casemods == 1 && PL_lex_inpat)
3338 OPERATOR(',');
3339 else
3340 Aop(OP_CONCAT);
79072805
LW
3341 }
3342 else
cea2e8a9 3343 return yylex();
79072805
LW
3344 }
3345
55497cff 3346 case LEX_INTERPPUSH:
bbf60fe6 3347 return REPORT(sublex_push());
55497cff 3348
79072805 3349 case LEX_INTERPSTART:
3280af22 3350 if (PL_bufptr == PL_bufend)
bbf60fe6 3351 return REPORT(sublex_done());
607df283 3352 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3353 "### Interpolated variable\n"); });
3280af22
NIS
3354 PL_expect = XTERM;
3355 PL_lex_dojoin = (*PL_bufptr == '@');
3356 PL_lex_state = LEX_INTERPNORMAL;
3357 if (PL_lex_dojoin) {
cd81e915 3358 start_force(PL_curforce);
9ded7720 3359 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3360 force_next(',');
cd81e915 3361 start_force(PL_curforce);
a0d0e21e 3362 force_ident("\"", '$');
cd81e915 3363 start_force(PL_curforce);
9ded7720 3364 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3365 force_next('$');
cd81e915 3366 start_force(PL_curforce);
9ded7720 3367 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3368 force_next('(');
cd81e915 3369 start_force(PL_curforce);
9ded7720 3370 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3371 force_next(FUNC);
3372 }
3280af22
NIS
3373 if (PL_lex_starts++) {
3374 s = PL_bufptr;
5db06880
NC
3375#ifdef PERL_MAD
3376 if (PL_madskills) {
cd81e915
NC
3377 if (PL_thistoken)
3378 sv_free(PL_thistoken);
6b29d1f5 3379 PL_thistoken = newSVpvs("");
5db06880
NC
3380 }
3381#endif
131b3ad0
DM
3382 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3383 if (!PL_lex_casemods && PL_lex_inpat)
3384 OPERATOR(',');
3385 else
3386 Aop(OP_CONCAT);
79072805 3387 }
cea2e8a9 3388 return yylex();
79072805
LW
3389
3390 case LEX_INTERPENDMAYBE:
3280af22
NIS
3391 if (intuit_more(PL_bufptr)) {
3392 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3393 break;
3394 }
3395 /* FALL THROUGH */
3396
3397 case LEX_INTERPEND:
3280af22
NIS
3398 if (PL_lex_dojoin) {
3399 PL_lex_dojoin = FALSE;
3400 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3401#ifdef PERL_MAD
3402 if (PL_madskills) {
cd81e915
NC
3403 if (PL_thistoken)
3404 sv_free(PL_thistoken);
6b29d1f5 3405 PL_thistoken = newSVpvs("");
5db06880
NC
3406 }
3407#endif
bbf60fe6 3408 return REPORT(')');
79072805 3409 }
43a16006 3410 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3411 && SvEVALED(PL_lex_repl))
43a16006 3412 {
e9fa98b2 3413 if (PL_bufptr != PL_bufend)
cea2e8a9 3414 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3415 PL_lex_repl = NULL;
e9fa98b2 3416 }
79072805
LW
3417 /* FALLTHROUGH */
3418 case LEX_INTERPCONCAT:
3419#ifdef DEBUGGING
3280af22 3420 if (PL_lex_brackets)
cea2e8a9 3421 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3422#endif
3280af22 3423 if (PL_bufptr == PL_bufend)
bbf60fe6 3424 return REPORT(sublex_done());
79072805 3425
3280af22
NIS
3426 if (SvIVX(PL_linestr) == '\'') {
3427 SV *sv = newSVsv(PL_linestr);
3428 if (!PL_lex_inpat)
76e3520e 3429 sv = tokeq(sv);
3280af22 3430 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3431 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3432 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3433 s = PL_bufend;
79072805
LW
3434 }
3435 else {
3280af22 3436 s = scan_const(PL_bufptr);
79072805 3437 if (*s == '\\')
3280af22 3438 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3439 else
3280af22 3440 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3441 }
3442
3280af22 3443 if (s != PL_bufptr) {
cd81e915 3444 start_force(PL_curforce);
5db06880
NC
3445 if (PL_madskills) {
3446 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3447 }
9ded7720 3448 NEXTVAL_NEXTTOKE = yylval;
3280af22 3449 PL_expect = XTERM;
79072805 3450 force_next(THING);
131b3ad0 3451 if (PL_lex_starts++) {
5db06880
NC
3452#ifdef PERL_MAD
3453 if (PL_madskills) {
cd81e915
NC
3454 if (PL_thistoken)
3455 sv_free(PL_thistoken);
6b29d1f5 3456 PL_thistoken = newSVpvs("");
5db06880
NC
3457 }
3458#endif
131b3ad0
DM
3459 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3460 if (!PL_lex_casemods && PL_lex_inpat)
3461 OPERATOR(',');
3462 else
3463 Aop(OP_CONCAT);
3464 }
79072805 3465 else {
3280af22 3466 PL_bufptr = s;
cea2e8a9 3467 return yylex();
79072805
LW
3468 }
3469 }
3470
cea2e8a9 3471 return yylex();
a0d0e21e 3472 case LEX_FORMLINE:
3280af22
NIS
3473 PL_lex_state = LEX_NORMAL;
3474 s = scan_formline(PL_bufptr);
3475 if (!PL_lex_formbrack)
a0d0e21e
LW
3476 goto rightbracket;
3477 OPERATOR(';');
79072805
LW
3478 }
3479
3280af22
NIS
3480 s = PL_bufptr;
3481 PL_oldoldbufptr = PL_oldbufptr;
3482 PL_oldbufptr = s;
463ee0b2
LW
3483
3484 retry:
5db06880 3485#ifdef PERL_MAD
cd81e915
NC
3486 if (PL_thistoken) {
3487 sv_free(PL_thistoken);
3488 PL_thistoken = 0;
5db06880 3489 }
cd81e915 3490 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3491#endif
378cc40b
LW
3492 switch (*s) {
3493 default:
7e2040f0 3494 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3495 goto keylookup;
cea2e8a9 3496 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3497 case 4:
3498 case 26:
3499 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3500 case 0:
5db06880
NC
3501#ifdef PERL_MAD
3502 if (PL_madskills)
cd81e915 3503 PL_faketokens = 0;
5db06880 3504#endif
3280af22
NIS
3505 if (!PL_rsfp) {
3506 PL_last_uni = 0;
3507 PL_last_lop = 0;
c5ee2135 3508 if (PL_lex_brackets) {
10edeb5d
JH
3509 yyerror((const char *)
3510 (PL_lex_formbrack
3511 ? "Format not terminated"
3512 : "Missing right curly or square bracket"));
c5ee2135 3513 }
4e553d73 3514 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3515 "### Tokener got EOF\n");
5f80b19c 3516 } );
79072805 3517 TOKEN(0);
463ee0b2 3518 }
3280af22 3519 if (s++ < PL_bufend)
a687059c 3520 goto retry; /* ignore stray nulls */
3280af22
NIS
3521 PL_last_uni = 0;
3522 PL_last_lop = 0;
3523 if (!PL_in_eval && !PL_preambled) {
3524 PL_preambled = TRUE;
5db06880
NC
3525#ifdef PERL_MAD
3526 if (PL_madskills)
cd81e915 3527 PL_faketokens = 1;
5db06880 3528#endif
3280af22
NIS
3529 sv_setpv(PL_linestr,incl_perldb());
3530 if (SvCUR(PL_linestr))
396482e1 3531 sv_catpvs(PL_linestr,";");
3280af22
NIS
3532 if (PL_preambleav){
3533 while(AvFILLp(PL_preambleav) >= 0) {
3534 SV *tmpsv = av_shift(PL_preambleav);
3535 sv_catsv(PL_linestr, tmpsv);
396482e1 3536 sv_catpvs(PL_linestr, ";");
91b7def8 3537 sv_free(tmpsv);
3538 }
3280af22
NIS
3539 sv_free((SV*)PL_preambleav);
3540 PL_preambleav = NULL;
91b7def8 3541 }
3280af22 3542 if (PL_minus_n || PL_minus_p) {
396482e1 3543 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3544 if (PL_minus_l)
396482e1 3545 sv_catpvs(PL_linestr,"chomp;");
3280af22 3546 if (PL_minus_a) {
3280af22 3547 if (PL_minus_F) {
3792a11b
NC
3548 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3549 || *PL_splitstr == '"')
3280af22 3550 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3551 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3552 else {
c8ef6a4b
NC
3553 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3554 bytes can be used as quoting characters. :-) */
dd374669