This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
in lex_start(), ensure that all lexer state vars are saved
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
0bd48802 28static const char ident_too_long[] = "Identifier too long";
c445ea15 29static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 30
acfe0abc 31static void restore_rsfp(pTHX_ void *f);
6e3aabd6 32#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
33static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 35#endif
51371543 36
29595ff2 37#ifdef PERL_MAD
29595ff2 38# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 39# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 40#else
5db06880 41# define CURMAD(slot,sv)
9ded7720 42# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
43#endif
44
9059aa12
LW
45#define XFAKEBRACK 128
46#define XENUMMASK 127
47
39e02b42
JH
48#ifdef USE_UTF8_SCRIPTS
49# define UTF (!IN_BYTES)
2b9d42f0 50#else
746b446a 51# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 52#endif
a0ed51b3 53
61f0cdd9 54/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
55 * 1999-02-27 mjd-perl-patch@plover.com */
56#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
57
bf4acbe4
GS
58/* On MacOS, respect nonbreaking spaces */
59#ifdef MACOS_TRADITIONAL
60#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
61#else
62#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
63#endif
64
ffb4593c
NT
65/* LEX_* are values for PL_lex_state, the state of the lexer.
66 * They are arranged oddly so that the guard on the switch statement
79072805
LW
67 * can get by with a single comparison (if the compiler is smart enough).
68 */
69
fb73857a 70/* #define LEX_NOTPARSING 11 is done in perl.h. */
71
b6007c36
DM
72#define LEX_NORMAL 10 /* normal code (ie not within "...") */
73#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
74#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
75#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
76#define LEX_INTERPSTART 6 /* expecting the start of a $var */
77
78 /* at end of code, eg "$x" followed by: */
79#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
80#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
81
82#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
83 string or after \E, $foo, etc */
84#define LEX_INTERPCONST 2 /* NOT USED */
85#define LEX_FORMLINE 1 /* expecting a format line */
86#define LEX_KNOWNEXT 0 /* next token known; just return it */
87
79072805 88
bbf60fe6 89#ifdef DEBUGGING
27da23d5 90static const char* const lex_state_names[] = {
bbf60fe6
DM
91 "KNOWNEXT",
92 "FORMLINE",
93 "INTERPCONST",
94 "INTERPCONCAT",
95 "INTERPENDMAYBE",
96 "INTERPEND",
97 "INTERPSTART",
98 "INTERPPUSH",
99 "INTERPCASEMOD",
100 "INTERPNORMAL",
101 "NORMAL"
102};
103#endif
104
79072805
LW
105#ifdef ff_next
106#undef ff_next
d48672a2
LW
107#endif
108
79072805 109#include "keywords.h"
fe14fcc3 110
ffb4593c
NT
111/* CLINE is a macro that ensures PL_copline has a sane value */
112
ae986130
LW
113#ifdef CLINE
114#undef CLINE
115#endif
57843af0 116#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 117
5db06880 118#ifdef PERL_MAD
29595ff2
NC
119# define SKIPSPACE0(s) skipspace0(s)
120# define SKIPSPACE1(s) skipspace1(s)
121# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
122# define PEEKSPACE(s) skipspace2(s,0)
123#else
124# define SKIPSPACE0(s) skipspace(s)
125# define SKIPSPACE1(s) skipspace(s)
126# define SKIPSPACE2(s,tsv) skipspace(s)
127# define PEEKSPACE(s) skipspace(s)
128#endif
129
ffb4593c
NT
130/*
131 * Convenience functions to return different tokens and prime the
9cbb5ea2 132 * lexer for the next token. They all take an argument.
ffb4593c
NT
133 *
134 * TOKEN : generic token (used for '(', DOLSHARP, etc)
135 * OPERATOR : generic operator
136 * AOPERATOR : assignment operator
137 * PREBLOCK : beginning the block after an if, while, foreach, ...
138 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
139 * PREREF : *EXPR where EXPR is not a simple identifier
140 * TERM : expression term
141 * LOOPX : loop exiting command (goto, last, dump, etc)
142 * FTST : file test operator
143 * FUN0 : zero-argument function
2d2e263d 144 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
145 * BOop : bitwise or or xor
146 * BAop : bitwise and
147 * SHop : shift operator
148 * PWop : power operator
9cbb5ea2 149 * PMop : pattern-matching operator
ffb4593c
NT
150 * Aop : addition-level operator
151 * Mop : multiplication-level operator
152 * Eop : equality-testing operator
e5edeb50 153 * Rop : relational operator <= != gt
ffb4593c
NT
154 *
155 * Also see LOP and lop() below.
156 */
157
998054bd 158#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 159# define REPORT(retval) tokereport((I32)retval)
998054bd 160#else
bbf60fe6 161# define REPORT(retval) (retval)
998054bd
SC
162#endif
163
bbf60fe6
DM
164#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
165#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
166#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
167#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
168#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
169#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
170#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
171#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
172#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
173#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
174#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
175#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
176#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
177#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
178#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
179#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
180#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
181#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
182#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
183#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 184
a687059c
LW
185/* This bit of chicanery makes a unary function followed by
186 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
187 * The UNIDOR macro is for unary functions that can be followed by the //
188 * operator (such as C<shift // 0>).
a687059c 189 */
376fcdbf
AL
190#define UNI2(f,x) { \
191 yylval.ival = f; \
192 PL_expect = x; \
193 PL_bufptr = s; \
194 PL_last_uni = PL_oldbufptr; \
195 PL_last_lop_op = f; \
196 if (*s == '(') \
197 return REPORT( (int)FUNC1 ); \
29595ff2 198 s = PEEKSPACE(s); \
376fcdbf
AL
199 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
200 }
6f33ba73
RGS
201#define UNI(f) UNI2(f,XTERM)
202#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 203
376fcdbf
AL
204#define UNIBRACK(f) { \
205 yylval.ival = f; \
206 PL_bufptr = s; \
207 PL_last_uni = PL_oldbufptr; \
208 if (*s == '(') \
209 return REPORT( (int)FUNC1 ); \
29595ff2 210 s = PEEKSPACE(s); \
376fcdbf
AL
211 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
212 }
79072805 213
9f68db38 214/* grandfather return to old style */
3280af22 215#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 216
8fa7f367
JH
217#ifdef DEBUGGING
218
bbf60fe6
DM
219/* how to interpret the yylval associated with the token */
220enum token_type {
221 TOKENTYPE_NONE,
222 TOKENTYPE_IVAL,
223 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
224 TOKENTYPE_PVAL,
225 TOKENTYPE_OPVAL,
226 TOKENTYPE_GVVAL
227};
228
6d4a66ac
NC
229static struct debug_tokens {
230 const int token;
231 enum token_type type;
232 const char *name;
233} const debug_tokens[] =
9041c2e3 234{
bbf60fe6
DM
235 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
236 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
237 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
238 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
239 { ARROW, TOKENTYPE_NONE, "ARROW" },
240 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
241 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
242 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
243 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
244 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 245 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
246 { DO, TOKENTYPE_NONE, "DO" },
247 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
248 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
249 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
250 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
251 { ELSE, TOKENTYPE_NONE, "ELSE" },
252 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
253 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
254 { FOR, TOKENTYPE_IVAL, "FOR" },
255 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
256 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
257 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
258 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
259 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
260 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 261 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
262 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
263 { IF, TOKENTYPE_IVAL, "IF" },
264 { LABEL, TOKENTYPE_PVAL, "LABEL" },
265 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
266 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
267 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
268 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
269 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
270 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
271 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
272 { MY, TOKENTYPE_IVAL, "MY" },
273 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
274 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
275 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
276 { OROP, TOKENTYPE_IVAL, "OROP" },
277 { OROR, TOKENTYPE_NONE, "OROR" },
278 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
279 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
280 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
281 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
282 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
283 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
284 { PREINC, TOKENTYPE_NONE, "PREINC" },
285 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
286 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
287 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
288 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
289 { SUB, TOKENTYPE_NONE, "SUB" },
290 { THING, TOKENTYPE_OPVAL, "THING" },
291 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
292 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
293 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
294 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
295 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
296 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 297 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
298 { WHILE, TOKENTYPE_IVAL, "WHILE" },
299 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 300 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
301};
302
303/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 304
bbf60fe6 305STATIC int
f5bd084c 306S_tokereport(pTHX_ I32 rv)
bbf60fe6 307{
97aff369 308 dVAR;
bbf60fe6 309 if (DEBUG_T_TEST) {
bd61b366 310 const char *name = NULL;
bbf60fe6 311 enum token_type type = TOKENTYPE_NONE;
f54cb97a 312 const struct debug_tokens *p;
396482e1 313 SV* const report = newSVpvs("<== ");
bbf60fe6 314
f54cb97a 315 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
316 if (p->token == (int)rv) {
317 name = p->name;
318 type = p->type;
319 break;
320 }
321 }
322 if (name)
54667de8 323 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
324 else if ((char)rv > ' ' && (char)rv < '~')
325 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
326 else if (!rv)
396482e1 327 sv_catpvs(report, "EOF");
bbf60fe6
DM
328 else
329 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
330 switch (type) {
331 case TOKENTYPE_NONE:
332 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
333 break;
334 case TOKENTYPE_IVAL:
e4584336 335 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
336 break;
337 case TOKENTYPE_OPNUM:
338 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
339 PL_op_name[yylval.ival]);
340 break;
341 case TOKENTYPE_PVAL:
342 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
343 break;
344 case TOKENTYPE_OPVAL:
b6007c36 345 if (yylval.opval) {
401441c0 346 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 347 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
348 if (yylval.opval->op_type == OP_CONST) {
349 Perl_sv_catpvf(aTHX_ report, " %s",
350 SvPEEK(cSVOPx_sv(yylval.opval)));
351 }
352
353 }
401441c0 354 else
396482e1 355 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
356 break;
357 }
b6007c36 358 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
359 };
360 return (int)rv;
998054bd
SC
361}
362
b6007c36
DM
363
364/* print the buffer with suitable escapes */
365
366STATIC void
367S_printbuf(pTHX_ const char* fmt, const char* s)
368{
396482e1 369 SV* const tmp = newSVpvs("");
b6007c36
DM
370 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
371 SvREFCNT_dec(tmp);
372}
373
8fa7f367
JH
374#endif
375
ffb4593c
NT
376/*
377 * S_ao
378 *
c963b151
BD
379 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
380 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
381 */
382
76e3520e 383STATIC int
cea2e8a9 384S_ao(pTHX_ int toketype)
a0d0e21e 385{
97aff369 386 dVAR;
3280af22
NIS
387 if (*PL_bufptr == '=') {
388 PL_bufptr++;
a0d0e21e
LW
389 if (toketype == ANDAND)
390 yylval.ival = OP_ANDASSIGN;
391 else if (toketype == OROR)
392 yylval.ival = OP_ORASSIGN;
c963b151
BD
393 else if (toketype == DORDOR)
394 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
395 toketype = ASSIGNOP;
396 }
397 return toketype;
398}
399
ffb4593c
NT
400/*
401 * S_no_op
402 * When Perl expects an operator and finds something else, no_op
403 * prints the warning. It always prints "<something> found where
404 * operator expected. It prints "Missing semicolon on previous line?"
405 * if the surprise occurs at the start of the line. "do you need to
406 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
407 * where the compiler doesn't know if foo is a method call or a function.
408 * It prints "Missing operator before end of line" if there's nothing
409 * after the missing operator, or "... before <...>" if there is something
410 * after the missing operator.
411 */
412
76e3520e 413STATIC void
bfed75c6 414S_no_op(pTHX_ const char *what, char *s)
463ee0b2 415{
97aff369 416 dVAR;
9d4ba2ae
AL
417 char * const oldbp = PL_bufptr;
418 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 419
1189a94a
GS
420 if (!s)
421 s = oldbp;
07c798fb 422 else
1189a94a 423 PL_bufptr = s;
cea2e8a9 424 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
425 if (ckWARN_d(WARN_SYNTAX)) {
426 if (is_first)
427 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
428 "\t(Missing semicolon on previous line?)\n");
429 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 430 const char *t;
c35e046a
AL
431 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
432 NOOP;
56da5a46
RGS
433 if (t < PL_bufptr && isSPACE(*t))
434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
435 "\t(Do you need to predeclare %.*s?)\n",
551405c4 436 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
437 }
438 else {
439 assert(s >= oldbp);
440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 441 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 442 }
07c798fb 443 }
3280af22 444 PL_bufptr = oldbp;
8990e307
LW
445}
446
ffb4593c
NT
447/*
448 * S_missingterm
449 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 450 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
451 * If we're in a delimited string and the delimiter is a control
452 * character, it's reformatted into a two-char sequence like ^C.
453 * This is fatal.
454 */
455
76e3520e 456STATIC void
cea2e8a9 457S_missingterm(pTHX_ char *s)
8990e307 458{
97aff369 459 dVAR;
8990e307
LW
460 char tmpbuf[3];
461 char q;
462 if (s) {
9d4ba2ae 463 char * const nl = strrchr(s,'\n');
d2719217 464 if (nl)
8990e307
LW
465 *nl = '\0';
466 }
9d116dd7
JH
467 else if (
468#ifdef EBCDIC
469 iscntrl(PL_multi_close)
470#else
471 PL_multi_close < 32 || PL_multi_close == 127
472#endif
473 ) {
8990e307 474 *tmpbuf = '^';
585ec06d 475 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
476 tmpbuf[2] = '\0';
477 s = tmpbuf;
478 }
479 else {
eb160463 480 *tmpbuf = (char)PL_multi_close;
8990e307
LW
481 tmpbuf[1] = '\0';
482 s = tmpbuf;
483 }
484 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 485 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 486}
79072805 487
ef89dcc3 488#define FEATURE_IS_ENABLED(name) \
0d863452 489 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 490 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
491/*
492 * S_feature_is_enabled
493 * Check whether the named feature is enabled.
494 */
495STATIC bool
d4c19fe8 496S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 497{
97aff369 498 dVAR;
0d863452 499 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 500 char he_name[32] = "feature_";
6fca0082 501 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 502
7b9ef140 503 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
504}
505
ffb4593c
NT
506/*
507 * Perl_deprecate
ffb4593c
NT
508 */
509
79072805 510void
bfed75c6 511Perl_deprecate(pTHX_ const char *s)
a0d0e21e 512{
599cee73 513 if (ckWARN(WARN_DEPRECATED))
9014280d 514 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
515}
516
12bcd1a6 517void
bfed75c6 518Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
519{
520 /* This function should NOT be called for any new deprecated warnings */
521 /* Use Perl_deprecate instead */
522 /* */
523 /* It is here to maintain backward compatibility with the pre-5.8 */
524 /* warnings category hierarchy. The "deprecated" category used to */
525 /* live under the "syntax" category. It is now a top-level category */
526 /* in its own right. */
527
528 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 529 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
530 "Use of %s is deprecated", s);
531}
532
ffb4593c 533/*
9cbb5ea2
GS
534 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
535 * utf16-to-utf8-reversed.
ffb4593c
NT
536 */
537
c39cd008
GS
538#ifdef PERL_CR_FILTER
539static void
540strip_return(SV *sv)
541{
95a20fc0 542 register const char *s = SvPVX_const(sv);
9d4ba2ae 543 register const char * const e = s + SvCUR(sv);
c39cd008
GS
544 /* outer loop optimized to do nothing if there are no CR-LFs */
545 while (s < e) {
546 if (*s++ == '\r' && *s == '\n') {
547 /* hit a CR-LF, need to copy the rest */
548 register char *d = s - 1;
549 *d++ = *s++;
550 while (s < e) {
551 if (*s == '\r' && s[1] == '\n')
552 s++;
553 *d++ = *s++;
554 }
555 SvCUR(sv) -= s - d;
556 return;
557 }
558 }
559}
a868473f 560
76e3520e 561STATIC I32
c39cd008 562S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 563{
f54cb97a 564 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
565 if (count > 0 && !maxlen)
566 strip_return(sv);
567 return count;
a868473f
NIS
568}
569#endif
570
ffb4593c
NT
571/*
572 * Perl_lex_start
9cbb5ea2
GS
573 * Initialize variables. Uses the Perl save_stack to save its state (for
574 * recursive calls to the parser).
ffb4593c
NT
575 */
576
a0d0e21e 577void
864dbfa3 578Perl_lex_start(pTHX_ SV *line)
79072805 579{
97aff369 580 dVAR;
cfd0369c 581 const char *s;
8990e307
LW
582 STRLEN len;
583
3280af22
NIS
584 SAVEI32(PL_lex_dojoin);
585 SAVEI32(PL_lex_brackets);
3280af22
NIS
586 SAVEI32(PL_lex_casemods);
587 SAVEI32(PL_lex_starts);
588 SAVEI32(PL_lex_state);
7766f137 589 SAVEVPTR(PL_lex_inpat);
3280af22 590 SAVEI32(PL_lex_inwhat);
5db06880
NC
591#ifdef PERL_MAD
592 if (PL_lex_state == LEX_KNOWNEXT) {
593 I32 toke = PL_lasttoke;
594 while (--toke >= 0) {
595 SAVEI32(PL_nexttoke[toke].next_type);
596 SAVEVPTR(PL_nexttoke[toke].next_val);
597 if (PL_madskills)
598 SAVEVPTR(PL_nexttoke[toke].next_mad);
599 }
600 SAVEI32(PL_lasttoke);
601 }
02b34bbe
DM
602 SAVESPTR(PL_endwhite);
603 SAVESPTR(PL_thistoken);
604 SAVESPTR(PL_thiswhite);
605 SAVESPTR(PL_nextwhite);
606 SAVESPTR(PL_thisopen);
607 SAVESPTR(PL_thisclose);
608 SAVESPTR(PL_thisstuff);
609 SAVEVPTR(PL_thismad);
610 SAVEI32(PL_realtokenstart);
611 SAVEI32(PL_faketokens);
612 SAVESPTR(PL_skipwhite);
cd81e915 613 SAVEI32(PL_curforce);
5db06880 614#else
18b09519
GS
615 if (PL_lex_state == LEX_KNOWNEXT) {
616 I32 toke = PL_nexttoke;
617 while (--toke >= 0) {
618 SAVEI32(PL_nexttype[toke]);
619 SAVEVPTR(PL_nextval[toke]);
620 }
621 SAVEI32(PL_nexttoke);
18b09519 622 }
5db06880 623#endif
57843af0 624 SAVECOPLINE(PL_curcop);
3280af22
NIS
625 SAVEPPTR(PL_bufptr);
626 SAVEPPTR(PL_bufend);
627 SAVEPPTR(PL_oldbufptr);
628 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
629 SAVEPPTR(PL_last_lop);
630 SAVEPPTR(PL_last_uni);
3280af22
NIS
631 SAVEPPTR(PL_linestart);
632 SAVESPTR(PL_linestr);
8edd5f42
RGS
633 SAVEGENERICPV(PL_lex_brackstack);
634 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 635 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
636 SAVESPTR(PL_lex_stuff);
637 SAVEI32(PL_lex_defer);
09bef843 638 SAVEI32(PL_sublex_info.sub_inwhat);
02b34bbe
DM
639 SAVEI32(PL_sublex_info.super_state);
640 SAVEVPTR(PL_sublex_info.sub_op);
641 SAVEPPTR(PL_sublex_info.super_bufptr);
642 SAVEPPTR(PL_sublex_info.super_bufend);
3280af22 643 SAVESPTR(PL_lex_repl);
bebdddfc
GS
644 SAVEINT(PL_expect);
645 SAVEINT(PL_lex_expect);
02b34bbe
DM
646 SAVEI32(PL_lex_formbrack);
647 SAVEVPTR(PL_lex_op);
648 SAVEI32(PL_multi_close);
649 SAVEI32(PL_multi_open);
650 SAVEI32(PL_multi_start);
651 SAVEI8(PL_pending_ident);
652 SAVEBOOL(PL_preambled);
3280af22
NIS
653
654 PL_lex_state = LEX_NORMAL;
655 PL_lex_defer = 0;
656 PL_expect = XSTATE;
657 PL_lex_brackets = 0;
a02a5408
JC
658 Newx(PL_lex_brackstack, 120, char);
659 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
660 PL_lex_casemods = 0;
661 *PL_lex_casestack = '\0';
662 PL_lex_dojoin = 0;
663 PL_lex_starts = 0;
a0714e2c
SS
664 PL_lex_stuff = NULL;
665 PL_lex_repl = NULL;
3280af22 666 PL_lex_inpat = 0;
5db06880
NC
667#ifdef PERL_MAD
668 PL_lasttoke = 0;
02b34bbe
DM
669 PL_endwhite = NULL;
670 PL_faketokens = 0;
671 PL_nextwhite = NULL;
672 PL_realtokenstart = 0;
673 PL_skipwhite = NULL;
674 PL_thisclose = NULL;
675 PL_thisopen = NULL;
676 PL_thisstuff = NULL;
677 PL_thistoken = NULL;
678 PL_thiswhite = NULL;
679 PL_thismad = NULL;
5db06880 680#else
76be56bc 681 PL_nexttoke = 0;
5db06880 682#endif
3280af22 683 PL_lex_inwhat = 0;
09bef843 684 PL_sublex_info.sub_inwhat = 0;
02b34bbe
DM
685 PL_sublex_info.super_state = 0;
686 PL_sublex_info.sub_op = NULL;
687 PL_sublex_info.super_bufptr = NULL;
688 PL_sublex_info.super_bufend = NULL;
689 PL_lex_expect = 0;
690 PL_lex_formbrack = 0;
691 PL_lex_op = NULL;
692 PL_multi_close = 0;
693 PL_multi_open = 0;
694 PL_multi_start = 0;
695 PL_pending_ident = '\0';
696 PL_preambled = FALSE;
697
10efb74f
NC
698 if (line) {
699 s = SvPV_const(line, len);
700 } else {
701 len = 0;
702 }
703 if (!len) {
704 PL_linestr = newSVpvs("\n;");
705 } else if (SvREADONLY(line) || s[len-1] != ';') {
706 PL_linestr = newSVsv(line);
707 if (s[len-1] != ';')
0eb20fa2 708 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
709 } else {
710 SvTEMP_off(line);
711 SvREFCNT_inc_simple_void_NN(line);
712 PL_linestr = line;
8990e307 713 }
db4997f0
NC
714 /* PL_linestr needs to survive until end of scope, not just the next
715 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 716 SAVEFREESV(PL_linestr);
3280af22
NIS
717 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
718 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 719 PL_last_lop = PL_last_uni = NULL;
3280af22 720 PL_rsfp = 0;
79072805 721}
a687059c 722
ffb4593c
NT
723/*
724 * Perl_lex_end
9cbb5ea2
GS
725 * Finalizer for lexing operations. Must be called when the parser is
726 * done with the lexer.
ffb4593c
NT
727 */
728
463ee0b2 729void
864dbfa3 730Perl_lex_end(pTHX)
463ee0b2 731{
97aff369 732 dVAR;
3280af22 733 PL_doextract = FALSE;
463ee0b2
LW
734}
735
ffb4593c
NT
736/*
737 * S_incline
738 * This subroutine has nothing to do with tilting, whether at windmills
739 * or pinball tables. Its name is short for "increment line". It
57843af0 740 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 741 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
742 * # line 500 "foo.pm"
743 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
744 */
745
76e3520e 746STATIC void
cea2e8a9 747S_incline(pTHX_ char *s)
463ee0b2 748{
97aff369 749 dVAR;
463ee0b2
LW
750 char *t;
751 char *n;
73659bf1 752 char *e;
463ee0b2 753 char ch;
463ee0b2 754
57843af0 755 CopLINE_inc(PL_curcop);
463ee0b2
LW
756 if (*s++ != '#')
757 return;
d4c19fe8
AL
758 while (SPACE_OR_TAB(*s))
759 s++;
73659bf1
GS
760 if (strnEQ(s, "line", 4))
761 s += 4;
762 else
763 return;
084592ab 764 if (SPACE_OR_TAB(*s))
73659bf1 765 s++;
4e553d73 766 else
73659bf1 767 return;
d4c19fe8
AL
768 while (SPACE_OR_TAB(*s))
769 s++;
463ee0b2
LW
770 if (!isDIGIT(*s))
771 return;
d4c19fe8 772
463ee0b2
LW
773 n = s;
774 while (isDIGIT(*s))
775 s++;
bf4acbe4 776 while (SPACE_OR_TAB(*s))
463ee0b2 777 s++;
73659bf1 778 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 779 s++;
73659bf1
GS
780 e = t + 1;
781 }
463ee0b2 782 else {
c35e046a
AL
783 t = s;
784 while (!isSPACE(*t))
785 t++;
73659bf1 786 e = t;
463ee0b2 787 }
bf4acbe4 788 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
789 e++;
790 if (*e != '\n' && *e != '\0')
791 return; /* false alarm */
792
463ee0b2
LW
793 ch = *t;
794 *t = '\0';
f4dd75d9 795 if (t - s > 0) {
8a5ee598 796#ifndef USE_ITHREADS
c4420975 797 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
798 STRLEN tmplen = cf ? strlen(cf) : 0;
799 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
800 /* must copy *{"::_<(eval N)[oldfilename:L]"}
801 * to *{"::_<newfilename"} */
802 char smallbuf[256], smallbuf2[256];
803 char *tmpbuf, *tmpbuf2;
8a5ee598 804 GV **gvp, *gv2;
e66cf94c
RGS
805 STRLEN tmplen2 = strlen(s);
806 if (tmplen + 3 < sizeof smallbuf)
807 tmpbuf = smallbuf;
808 else
809 Newx(tmpbuf, tmplen + 3, char);
810 if (tmplen2 + 3 < sizeof smallbuf2)
811 tmpbuf2 = smallbuf2;
812 else
813 Newx(tmpbuf2, tmplen2 + 3, char);
814 tmpbuf[0] = tmpbuf2[0] = '_';
815 tmpbuf[1] = tmpbuf2[1] = '<';
816 memcpy(tmpbuf + 2, cf, ++tmplen);
817 memcpy(tmpbuf2 + 2, s, ++tmplen2);
818 ++tmplen; ++tmplen2;
8a5ee598
RGS
819 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
820 if (gvp) {
821 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 822 if (!isGV(gv2)) {
8a5ee598 823 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
824 /* adjust ${"::_<newfilename"} to store the new file name */
825 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
826 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
827 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
828 }
8a5ee598 829 }
e66cf94c
RGS
830 if (tmpbuf != smallbuf) Safefree(tmpbuf);
831 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
832 }
8a5ee598 833#endif
05ec9bb3 834 CopFILE_free(PL_curcop);
57843af0 835 CopFILE_set(PL_curcop, s);
f4dd75d9 836 }
463ee0b2 837 *t = ch;
57843af0 838 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
839}
840
29595ff2 841#ifdef PERL_MAD
cd81e915 842/* skip space before PL_thistoken */
29595ff2
NC
843
844STATIC char *
845S_skipspace0(pTHX_ register char *s)
846{
847 s = skipspace(s);
848 if (!PL_madskills)
849 return s;
cd81e915
NC
850 if (PL_skipwhite) {
851 if (!PL_thiswhite)
6b29d1f5 852 PL_thiswhite = newSVpvs("");
cd81e915
NC
853 sv_catsv(PL_thiswhite, PL_skipwhite);
854 sv_free(PL_skipwhite);
855 PL_skipwhite = 0;
856 }
857 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
858 return s;
859}
860
cd81e915 861/* skip space after PL_thistoken */
29595ff2
NC
862
863STATIC char *
864S_skipspace1(pTHX_ register char *s)
865{
d4c19fe8 866 const char *start = s;
29595ff2
NC
867 I32 startoff = start - SvPVX(PL_linestr);
868
869 s = skipspace(s);
870 if (!PL_madskills)
871 return s;
872 start = SvPVX(PL_linestr) + startoff;
cd81e915 873 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 874 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
875 PL_thistoken = newSVpvn(tstart, start - tstart);
876 }
877 PL_realtokenstart = -1;
878 if (PL_skipwhite) {
879 if (!PL_nextwhite)
6b29d1f5 880 PL_nextwhite = newSVpvs("");
cd81e915
NC
881 sv_catsv(PL_nextwhite, PL_skipwhite);
882 sv_free(PL_skipwhite);
883 PL_skipwhite = 0;
29595ff2
NC
884 }
885 return s;
886}
887
888STATIC char *
889S_skipspace2(pTHX_ register char *s, SV **svp)
890{
c35e046a
AL
891 char *start;
892 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
893 const I32 startoff = s - SvPVX(PL_linestr);
894
29595ff2
NC
895 s = skipspace(s);
896 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
897 if (!PL_madskills || !svp)
898 return s;
899 start = SvPVX(PL_linestr) + startoff;
cd81e915 900 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 901 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
902 PL_thistoken = newSVpvn(tstart, start - tstart);
903 PL_realtokenstart = -1;
29595ff2 904 }
cd81e915 905 if (PL_skipwhite) {
29595ff2 906 if (!*svp)
6b29d1f5 907 *svp = newSVpvs("");
cd81e915
NC
908 sv_setsv(*svp, PL_skipwhite);
909 sv_free(PL_skipwhite);
910 PL_skipwhite = 0;
29595ff2
NC
911 }
912
913 return s;
914}
915#endif
916
80a702cd
RGS
917STATIC void
918S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
919{
920 AV *av = CopFILEAVx(PL_curcop);
921 if (av) {
922 SV * const sv = newSV(0);
923 sv_upgrade(sv, SVt_PVMG);
924 sv_setpvn(sv, buf, len);
925 (void)SvIOK_on(sv);
926 SvIV_set(sv, 0);
927 av_store(av, (I32)CopLINE(PL_curcop), sv);
928 }
929}
930
931STATIC void
932S_update_debugger_info_sv(pTHX_ SV *orig_sv)
933{
934 AV *av = CopFILEAVx(PL_curcop);
935 if (av) {
936 SV * const sv = newSV(0);
937 sv_upgrade(sv, SVt_PVMG);
938 sv_setsv(sv, orig_sv);
939 (void)SvIOK_on(sv);
940 SvIV_set(sv, 0);
941 av_store(av, (I32)CopLINE(PL_curcop), sv);
942 }
943}
944
ffb4593c
NT
945/*
946 * S_skipspace
947 * Called to gobble the appropriate amount and type of whitespace.
948 * Skips comments as well.
949 */
950
76e3520e 951STATIC char *
cea2e8a9 952S_skipspace(pTHX_ register char *s)
a687059c 953{
97aff369 954 dVAR;
5db06880
NC
955#ifdef PERL_MAD
956 int curoff;
957 int startoff = s - SvPVX(PL_linestr);
958
cd81e915
NC
959 if (PL_skipwhite) {
960 sv_free(PL_skipwhite);
961 PL_skipwhite = 0;
5db06880
NC
962 }
963#endif
964
3280af22 965 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 966 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 967 s++;
5db06880
NC
968#ifdef PERL_MAD
969 goto done;
970#else
463ee0b2 971 return s;
5db06880 972#endif
463ee0b2
LW
973 }
974 for (;;) {
fd049845 975 STRLEN prevlen;
09bef843 976 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 977 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
978 while (s < PL_bufend && isSPACE(*s)) {
979 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
980 incline(s);
981 }
ffb4593c
NT
982
983 /* comment */
3280af22
NIS
984 if (s < PL_bufend && *s == '#') {
985 while (s < PL_bufend && *s != '\n')
463ee0b2 986 s++;
60e6418e 987 if (s < PL_bufend) {
463ee0b2 988 s++;
60e6418e
GS
989 if (PL_in_eval && !PL_rsfp) {
990 incline(s);
991 continue;
992 }
993 }
463ee0b2 994 }
ffb4593c
NT
995
996 /* only continue to recharge the buffer if we're at the end
997 * of the buffer, we're not reading from a source filter, and
998 * we're in normal lexing mode
999 */
09bef843
SB
1000 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1001 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1002#ifdef PERL_MAD
1003 goto done;
1004#else
463ee0b2 1005 return s;
5db06880 1006#endif
ffb4593c
NT
1007
1008 /* try to recharge the buffer */
5db06880
NC
1009#ifdef PERL_MAD
1010 curoff = s - SvPVX(PL_linestr);
1011#endif
1012
9cbb5ea2 1013 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1014 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1015 {
5db06880
NC
1016#ifdef PERL_MAD
1017 if (PL_madskills && curoff != startoff) {
cd81e915 1018 if (!PL_skipwhite)
6b29d1f5 1019 PL_skipwhite = newSVpvs("");
cd81e915 1020 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1021 curoff - startoff);
1022 }
1023
1024 /* mustn't throw out old stuff yet if madpropping */
1025 SvCUR(PL_linestr) = curoff;
1026 s = SvPVX(PL_linestr) + curoff;
1027 *s = 0;
1028 if (curoff && s[-1] == '\n')
1029 s[-1] = ' ';
1030#endif
1031
9cbb5ea2 1032 /* end of file. Add on the -p or -n magic */
cd81e915 1033 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1034 if (PL_minus_p) {
5db06880
NC
1035#ifdef PERL_MAD
1036 sv_catpv(PL_linestr,
1037 ";}continue{print or die qq(-p destination: $!\\n);}");
1038#else
01a19ab0
NC
1039 sv_setpv(PL_linestr,
1040 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1041#endif
3280af22 1042 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1043 }
01a19ab0 1044 else if (PL_minus_n) {
5db06880
NC
1045#ifdef PERL_MAD
1046 sv_catpvn(PL_linestr, ";}", 2);
1047#else
01a19ab0 1048 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1049#endif
01a19ab0
NC
1050 PL_minus_n = 0;
1051 }
a0d0e21e 1052 else
5db06880
NC
1053#ifdef PERL_MAD
1054 sv_catpvn(PL_linestr,";", 1);
1055#else
4147a61b 1056 sv_setpvn(PL_linestr,";", 1);
5db06880 1057#endif
ffb4593c
NT
1058
1059 /* reset variables for next time we lex */
9cbb5ea2 1060 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1061 = SvPVX(PL_linestr)
1062#ifdef PERL_MAD
1063 + curoff
1064#endif
1065 ;
3280af22 1066 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1067 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1068
1069 /* Close the filehandle. Could be from -P preprocessor,
1070 * STDIN, or a regular file. If we were reading code from
1071 * STDIN (because the commandline held no -e or filename)
1072 * then we don't close it, we reset it so the code can
1073 * read from STDIN too.
1074 */
1075
3280af22
NIS
1076 if (PL_preprocess && !PL_in_eval)
1077 (void)PerlProc_pclose(PL_rsfp);
1078 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1079 PerlIO_clearerr(PL_rsfp);
8990e307 1080 else
3280af22 1081 (void)PerlIO_close(PL_rsfp);
4608196e 1082 PL_rsfp = NULL;
463ee0b2
LW
1083 return s;
1084 }
ffb4593c
NT
1085
1086 /* not at end of file, so we only read another line */
09bef843
SB
1087 /* make corresponding updates to old pointers, for yyerror() */
1088 oldprevlen = PL_oldbufptr - PL_bufend;
1089 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1090 if (PL_last_uni)
1091 oldunilen = PL_last_uni - PL_bufend;
1092 if (PL_last_lop)
1093 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1094 PL_linestart = PL_bufptr = s + prevlen;
1095 PL_bufend = s + SvCUR(PL_linestr);
1096 s = PL_bufptr;
09bef843
SB
1097 PL_oldbufptr = s + oldprevlen;
1098 PL_oldoldbufptr = s + oldoldprevlen;
1099 if (PL_last_uni)
1100 PL_last_uni = s + oldunilen;
1101 if (PL_last_lop)
1102 PL_last_lop = s + oldloplen;
a0d0e21e 1103 incline(s);
ffb4593c
NT
1104
1105 /* debugger active and we're not compiling the debugger code,
1106 * so store the line into the debugger's array of lines
1107 */
80a702cd
RGS
1108 if (PERLDB_LINE && PL_curstash != PL_debstash)
1109 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1110 }
5db06880
NC
1111
1112#ifdef PERL_MAD
1113 done:
1114 if (PL_madskills) {
cd81e915 1115 if (!PL_skipwhite)
6b29d1f5 1116 PL_skipwhite = newSVpvs("");
5db06880
NC
1117 curoff = s - SvPVX(PL_linestr);
1118 if (curoff - startoff)
cd81e915 1119 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1120 curoff - startoff);
1121 }
1122 return s;
1123#endif
a687059c 1124}
378cc40b 1125
ffb4593c
NT
1126/*
1127 * S_check_uni
1128 * Check the unary operators to ensure there's no ambiguity in how they're
1129 * used. An ambiguous piece of code would be:
1130 * rand + 5
1131 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1132 * the +5 is its argument.
1133 */
1134
76e3520e 1135STATIC void
cea2e8a9 1136S_check_uni(pTHX)
ba106d47 1137{
97aff369 1138 dVAR;
d4c19fe8
AL
1139 const char *s;
1140 const char *t;
2f3197b3 1141
3280af22 1142 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1143 return;
3280af22
NIS
1144 while (isSPACE(*PL_last_uni))
1145 PL_last_uni++;
c35e046a
AL
1146 s = PL_last_uni;
1147 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1148 s++;
3280af22 1149 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1150 return;
6136c704 1151
0453d815 1152 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1153 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1154 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1155 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1156 }
2f3197b3
LW
1157}
1158
ffb4593c
NT
1159/*
1160 * LOP : macro to build a list operator. Its behaviour has been replaced
1161 * with a subroutine, S_lop() for which LOP is just another name.
1162 */
1163
a0d0e21e
LW
1164#define LOP(f,x) return lop(f,x,s)
1165
ffb4593c
NT
1166/*
1167 * S_lop
1168 * Build a list operator (or something that might be one). The rules:
1169 * - if we have a next token, then it's a list operator [why?]
1170 * - if the next thing is an opening paren, then it's a function
1171 * - else it's a list operator
1172 */
1173
76e3520e 1174STATIC I32
a0be28da 1175S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1176{
97aff369 1177 dVAR;
79072805 1178 yylval.ival = f;
35c8bce7 1179 CLINE;
3280af22
NIS
1180 PL_expect = x;
1181 PL_bufptr = s;
1182 PL_last_lop = PL_oldbufptr;
eb160463 1183 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1184#ifdef PERL_MAD
1185 if (PL_lasttoke)
1186 return REPORT(LSTOP);
1187#else
3280af22 1188 if (PL_nexttoke)
bbf60fe6 1189 return REPORT(LSTOP);
5db06880 1190#endif
79072805 1191 if (*s == '(')
bbf60fe6 1192 return REPORT(FUNC);
29595ff2 1193 s = PEEKSPACE(s);
79072805 1194 if (*s == '(')
bbf60fe6 1195 return REPORT(FUNC);
79072805 1196 else
bbf60fe6 1197 return REPORT(LSTOP);
79072805
LW
1198}
1199
5db06880
NC
1200#ifdef PERL_MAD
1201 /*
1202 * S_start_force
1203 * Sets up for an eventual force_next(). start_force(0) basically does
1204 * an unshift, while start_force(-1) does a push. yylex removes items
1205 * on the "pop" end.
1206 */
1207
1208STATIC void
1209S_start_force(pTHX_ int where)
1210{
1211 int i;
1212
cd81e915 1213 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1214 where = PL_lasttoke;
cd81e915
NC
1215 assert(PL_curforce < 0 || PL_curforce == where);
1216 if (PL_curforce != where) {
5db06880
NC
1217 for (i = PL_lasttoke; i > where; --i) {
1218 PL_nexttoke[i] = PL_nexttoke[i-1];
1219 }
1220 PL_lasttoke++;
1221 }
cd81e915 1222 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1223 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1224 PL_curforce = where;
1225 if (PL_nextwhite) {
5db06880 1226 if (PL_madskills)
6b29d1f5 1227 curmad('^', newSVpvs(""));
cd81e915 1228 CURMAD('_', PL_nextwhite);
5db06880
NC
1229 }
1230}
1231
1232STATIC void
1233S_curmad(pTHX_ char slot, SV *sv)
1234{
1235 MADPROP **where;
1236
1237 if (!sv)
1238 return;
cd81e915
NC
1239 if (PL_curforce < 0)
1240 where = &PL_thismad;
5db06880 1241 else
cd81e915 1242 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1243
cd81e915 1244 if (PL_faketokens)
5db06880
NC
1245 sv_setpvn(sv, "", 0);
1246 else {
1247 if (!IN_BYTES) {
1248 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1249 SvUTF8_on(sv);
1250 else if (PL_encoding) {
1251 sv_recode_to_utf8(sv, PL_encoding);
1252 }
1253 }
1254 }
1255
1256 /* keep a slot open for the head of the list? */
1257 if (slot != '_' && *where && (*where)->mad_key == '^') {
1258 (*where)->mad_key = slot;
1259 sv_free((*where)->mad_val);
1260 (*where)->mad_val = (void*)sv;
1261 }
1262 else
1263 addmad(newMADsv(slot, sv), where, 0);
1264}
1265#else
b3f24c00
MHM
1266# define start_force(where) NOOP
1267# define curmad(slot, sv) NOOP
5db06880
NC
1268#endif
1269
ffb4593c
NT
1270/*
1271 * S_force_next
9cbb5ea2 1272 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1273 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1274 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1275 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1276 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1277 */
1278
4e553d73 1279STATIC void
cea2e8a9 1280S_force_next(pTHX_ I32 type)
79072805 1281{
97aff369 1282 dVAR;
5db06880 1283#ifdef PERL_MAD
cd81e915 1284 if (PL_curforce < 0)
5db06880 1285 start_force(PL_lasttoke);
cd81e915 1286 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1287 if (PL_lex_state != LEX_KNOWNEXT)
1288 PL_lex_defer = PL_lex_state;
1289 PL_lex_state = LEX_KNOWNEXT;
1290 PL_lex_expect = PL_expect;
cd81e915 1291 PL_curforce = -1;
5db06880 1292#else
3280af22
NIS
1293 PL_nexttype[PL_nexttoke] = type;
1294 PL_nexttoke++;
1295 if (PL_lex_state != LEX_KNOWNEXT) {
1296 PL_lex_defer = PL_lex_state;
1297 PL_lex_expect = PL_expect;
1298 PL_lex_state = LEX_KNOWNEXT;
79072805 1299 }
5db06880 1300#endif
79072805
LW
1301}
1302
d0a148a6
NC
1303STATIC SV *
1304S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1305{
97aff369 1306 dVAR;
9d4ba2ae 1307 SV * const sv = newSVpvn(start,len);
bfed75c6 1308 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1309 SvUTF8_on(sv);
1310 return sv;
1311}
1312
ffb4593c
NT
1313/*
1314 * S_force_word
1315 * When the lexer knows the next thing is a word (for instance, it has
1316 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1317 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1318 * lookahead.
ffb4593c
NT
1319 *
1320 * Arguments:
b1b65b59 1321 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1322 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1323 * int check_keyword : if true, Perl checks to make sure the word isn't
1324 * a keyword (do this if the word is a label, e.g. goto FOO)
1325 * int allow_pack : if true, : characters will also be allowed (require,
1326 * use, etc. do this)
9cbb5ea2 1327 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1328 */
1329
76e3520e 1330STATIC char *
cea2e8a9 1331S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1332{
97aff369 1333 dVAR;
463ee0b2
LW
1334 register char *s;
1335 STRLEN len;
4e553d73 1336
29595ff2 1337 start = SKIPSPACE1(start);
463ee0b2 1338 s = start;
7e2040f0 1339 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1340 (allow_pack && *s == ':') ||
15f0808c 1341 (allow_initial_tick && *s == '\'') )
a0d0e21e 1342 {
3280af22 1343 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1344 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1345 return start;
cd81e915 1346 start_force(PL_curforce);
5db06880
NC
1347 if (PL_madskills)
1348 curmad('X', newSVpvn(start,s-start));
463ee0b2 1349 if (token == METHOD) {
29595ff2 1350 s = SKIPSPACE1(s);
463ee0b2 1351 if (*s == '(')
3280af22 1352 PL_expect = XTERM;
463ee0b2 1353 else {
3280af22 1354 PL_expect = XOPERATOR;
463ee0b2 1355 }
79072805 1356 }
9ded7720 1357 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1358 = (OP*)newSVOP(OP_CONST,0,
1359 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1360 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1361 force_next(token);
1362 }
1363 return s;
1364}
1365
ffb4593c
NT
1366/*
1367 * S_force_ident
9cbb5ea2 1368 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1369 * text only contains the "foo" portion. The first argument is a pointer
1370 * to the "foo", and the second argument is the type symbol to prefix.
1371 * Forces the next token to be a "WORD".
9cbb5ea2 1372 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1373 */
1374
76e3520e 1375STATIC void
bfed75c6 1376S_force_ident(pTHX_ register const char *s, int kind)
79072805 1377{
97aff369 1378 dVAR;
c35e046a 1379 if (*s) {
90e5519e
NC
1380 const STRLEN len = strlen(s);
1381 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1382 start_force(PL_curforce);
9ded7720 1383 NEXTVAL_NEXTTOKE.opval = o;
79072805 1384 force_next(WORD);
748a9306 1385 if (kind) {
11343788 1386 o->op_private = OPpCONST_ENTERED;
55497cff 1387 /* XXX see note in pp_entereval() for why we forgo typo
1388 warnings if the symbol must be introduced in an eval.
1389 GSAR 96-10-12 */
90e5519e
NC
1390 gv_fetchpvn_flags(s, len,
1391 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1392 : GV_ADD,
1393 kind == '$' ? SVt_PV :
1394 kind == '@' ? SVt_PVAV :
1395 kind == '%' ? SVt_PVHV :
a0d0e21e 1396 SVt_PVGV
90e5519e 1397 );
748a9306 1398 }
79072805
LW
1399 }
1400}
1401
1571675a
GS
1402NV
1403Perl_str_to_version(pTHX_ SV *sv)
1404{
1405 NV retval = 0.0;
1406 NV nshift = 1.0;
1407 STRLEN len;
cfd0369c 1408 const char *start = SvPV_const(sv,len);
9d4ba2ae 1409 const char * const end = start + len;
504618e9 1410 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1411 while (start < end) {
ba210ebe 1412 STRLEN skip;
1571675a
GS
1413 UV n;
1414 if (utf)
9041c2e3 1415 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1416 else {
1417 n = *(U8*)start;
1418 skip = 1;
1419 }
1420 retval += ((NV)n)/nshift;
1421 start += skip;
1422 nshift *= 1000;
1423 }
1424 return retval;
1425}
1426
4e553d73 1427/*
ffb4593c
NT
1428 * S_force_version
1429 * Forces the next token to be a version number.
e759cc13
RGS
1430 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1431 * and if "guessing" is TRUE, then no new token is created (and the caller
1432 * must use an alternative parsing method).
ffb4593c
NT
1433 */
1434
76e3520e 1435STATIC char *
e759cc13 1436S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1437{
97aff369 1438 dVAR;
5f66b61c 1439 OP *version = NULL;
44dcb63b 1440 char *d;
5db06880
NC
1441#ifdef PERL_MAD
1442 I32 startoff = s - SvPVX(PL_linestr);
1443#endif
89bfa8cd 1444
29595ff2 1445 s = SKIPSPACE1(s);
89bfa8cd 1446
44dcb63b 1447 d = s;
dd629d5b 1448 if (*d == 'v')
44dcb63b 1449 d++;
44dcb63b 1450 if (isDIGIT(*d)) {
e759cc13
RGS
1451 while (isDIGIT(*d) || *d == '_' || *d == '.')
1452 d++;
5db06880
NC
1453#ifdef PERL_MAD
1454 if (PL_madskills) {
cd81e915 1455 start_force(PL_curforce);
5db06880
NC
1456 curmad('X', newSVpvn(s,d-s));
1457 }
1458#endif
9f3d182e 1459 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1460 SV *ver;
b73d6f50 1461 s = scan_num(s, &yylval);
89bfa8cd 1462 version = yylval.opval;
dd629d5b
GS
1463 ver = cSVOPx(version)->op_sv;
1464 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1465 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1466 SvNV_set(ver, str_to_version(ver));
1571675a 1467 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1468 }
89bfa8cd 1469 }
5db06880
NC
1470 else if (guessing) {
1471#ifdef PERL_MAD
1472 if (PL_madskills) {
cd81e915
NC
1473 sv_free(PL_nextwhite); /* let next token collect whitespace */
1474 PL_nextwhite = 0;
5db06880
NC
1475 s = SvPVX(PL_linestr) + startoff;
1476 }
1477#endif
e759cc13 1478 return s;
5db06880 1479 }
89bfa8cd 1480 }
1481
5db06880
NC
1482#ifdef PERL_MAD
1483 if (PL_madskills && !version) {
cd81e915
NC
1484 sv_free(PL_nextwhite); /* let next token collect whitespace */
1485 PL_nextwhite = 0;
5db06880
NC
1486 s = SvPVX(PL_linestr) + startoff;
1487 }
1488#endif
89bfa8cd 1489 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1490 start_force(PL_curforce);
9ded7720 1491 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1492 force_next(WORD);
89bfa8cd 1493
e759cc13 1494 return s;
89bfa8cd 1495}
1496
ffb4593c
NT
1497/*
1498 * S_tokeq
1499 * Tokenize a quoted string passed in as an SV. It finds the next
1500 * chunk, up to end of string or a backslash. It may make a new
1501 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1502 * turns \\ into \.
1503 */
1504
76e3520e 1505STATIC SV *
cea2e8a9 1506S_tokeq(pTHX_ SV *sv)
79072805 1507{
97aff369 1508 dVAR;
79072805
LW
1509 register char *s;
1510 register char *send;
1511 register char *d;
b3ac6de7
IZ
1512 STRLEN len = 0;
1513 SV *pv = sv;
79072805
LW
1514
1515 if (!SvLEN(sv))
b3ac6de7 1516 goto finish;
79072805 1517
a0d0e21e 1518 s = SvPV_force(sv, len);
21a311ee 1519 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1520 goto finish;
463ee0b2 1521 send = s + len;
79072805
LW
1522 while (s < send && *s != '\\')
1523 s++;
1524 if (s == send)
b3ac6de7 1525 goto finish;
79072805 1526 d = s;
be4731d2 1527 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1528 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1529 if (SvUTF8(sv))
1530 SvUTF8_on(pv);
1531 }
79072805
LW
1532 while (s < send) {
1533 if (*s == '\\') {
a0d0e21e 1534 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1535 s++; /* all that, just for this */
1536 }
1537 *d++ = *s++;
1538 }
1539 *d = '\0';
95a20fc0 1540 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1541 finish:
3280af22 1542 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1543 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1544 return sv;
1545}
1546
ffb4593c
NT
1547/*
1548 * Now come three functions related to double-quote context,
1549 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1550 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1551 * interact with PL_lex_state, and create fake ( ... ) argument lists
1552 * to handle functions and concatenation.
1553 * They assume that whoever calls them will be setting up a fake
1554 * join call, because each subthing puts a ',' after it. This lets
1555 * "lower \luPpEr"
1556 * become
1557 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1558 *
1559 * (I'm not sure whether the spurious commas at the end of lcfirst's
1560 * arguments and join's arguments are created or not).
1561 */
1562
1563/*
1564 * S_sublex_start
1565 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1566 *
1567 * Pattern matching will set PL_lex_op to the pattern-matching op to
1568 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1569 *
1570 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1571 *
1572 * Everything else becomes a FUNC.
1573 *
1574 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1575 * had an OP_CONST or OP_READLINE). This just sets us up for a
1576 * call to S_sublex_push().
1577 */
1578
76e3520e 1579STATIC I32
cea2e8a9 1580S_sublex_start(pTHX)
79072805 1581{
97aff369 1582 dVAR;
0d46e09a 1583 register const I32 op_type = yylval.ival;
79072805
LW
1584
1585 if (op_type == OP_NULL) {
3280af22 1586 yylval.opval = PL_lex_op;
5f66b61c 1587 PL_lex_op = NULL;
79072805
LW
1588 return THING;
1589 }
1590 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1591 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1592
1593 if (SvTYPE(sv) == SVt_PVIV) {
1594 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1595 STRLEN len;
96a5add6 1596 const char * const p = SvPV_const(sv, len);
f54cb97a 1597 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1598 if (SvUTF8(sv))
1599 SvUTF8_on(nsv);
b3ac6de7
IZ
1600 SvREFCNT_dec(sv);
1601 sv = nsv;
4e553d73 1602 }
b3ac6de7 1603 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1604 PL_lex_stuff = NULL;
6f33ba73
RGS
1605 /* Allow <FH> // "foo" */
1606 if (op_type == OP_READLINE)
1607 PL_expect = XTERMORDORDOR;
79072805
LW
1608 return THING;
1609 }
e3f73d4e
RGS
1610 else if (op_type == OP_BACKTICK && PL_lex_op) {
1611 /* readpipe() vas overriden */
1612 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1613 yylval.opval = PL_lex_op;
9b201d7d 1614 PL_lex_op = NULL;
e3f73d4e
RGS
1615 PL_lex_stuff = NULL;
1616 return THING;
1617 }
79072805 1618
3280af22
NIS
1619 PL_sublex_info.super_state = PL_lex_state;
1620 PL_sublex_info.sub_inwhat = op_type;
1621 PL_sublex_info.sub_op = PL_lex_op;
1622 PL_lex_state = LEX_INTERPPUSH;
55497cff 1623
3280af22
NIS
1624 PL_expect = XTERM;
1625 if (PL_lex_op) {
1626 yylval.opval = PL_lex_op;
5f66b61c 1627 PL_lex_op = NULL;
55497cff 1628 return PMFUNC;
1629 }
1630 else
1631 return FUNC;
1632}
1633
ffb4593c
NT
1634/*
1635 * S_sublex_push
1636 * Create a new scope to save the lexing state. The scope will be
1637 * ended in S_sublex_done. Returns a '(', starting the function arguments
1638 * to the uc, lc, etc. found before.
1639 * Sets PL_lex_state to LEX_INTERPCONCAT.
1640 */
1641
76e3520e 1642STATIC I32
cea2e8a9 1643S_sublex_push(pTHX)
55497cff 1644{
27da23d5 1645 dVAR;
f46d017c 1646 ENTER;
55497cff 1647
3280af22
NIS
1648 PL_lex_state = PL_sublex_info.super_state;
1649 SAVEI32(PL_lex_dojoin);
1650 SAVEI32(PL_lex_brackets);
3280af22
NIS
1651 SAVEI32(PL_lex_casemods);
1652 SAVEI32(PL_lex_starts);
1653 SAVEI32(PL_lex_state);
7766f137 1654 SAVEVPTR(PL_lex_inpat);
3280af22 1655 SAVEI32(PL_lex_inwhat);
57843af0 1656 SAVECOPLINE(PL_curcop);
3280af22 1657 SAVEPPTR(PL_bufptr);
8452ff4b 1658 SAVEPPTR(PL_bufend);
3280af22
NIS
1659 SAVEPPTR(PL_oldbufptr);
1660 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1661 SAVEPPTR(PL_last_lop);
1662 SAVEPPTR(PL_last_uni);
3280af22
NIS
1663 SAVEPPTR(PL_linestart);
1664 SAVESPTR(PL_linestr);
8edd5f42
RGS
1665 SAVEGENERICPV(PL_lex_brackstack);
1666 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1667
1668 PL_linestr = PL_lex_stuff;
a0714e2c 1669 PL_lex_stuff = NULL;
3280af22 1670
9cbb5ea2
GS
1671 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1672 = SvPVX(PL_linestr);
3280af22 1673 PL_bufend += SvCUR(PL_linestr);
bd61b366 1674 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1675 SAVEFREESV(PL_linestr);
1676
1677 PL_lex_dojoin = FALSE;
1678 PL_lex_brackets = 0;
a02a5408
JC
1679 Newx(PL_lex_brackstack, 120, char);
1680 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1681 PL_lex_casemods = 0;
1682 *PL_lex_casestack = '\0';
1683 PL_lex_starts = 0;
1684 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1685 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1686
1687 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1688 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1689 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1690 else
5f66b61c 1691 PL_lex_inpat = NULL;
79072805 1692
55497cff 1693 return '(';
79072805
LW
1694}
1695
ffb4593c
NT
1696/*
1697 * S_sublex_done
1698 * Restores lexer state after a S_sublex_push.
1699 */
1700
76e3520e 1701STATIC I32
cea2e8a9 1702S_sublex_done(pTHX)
79072805 1703{
27da23d5 1704 dVAR;
3280af22 1705 if (!PL_lex_starts++) {
396482e1 1706 SV * const sv = newSVpvs("");
9aa983d2
JH
1707 if (SvUTF8(PL_linestr))
1708 SvUTF8_on(sv);
3280af22 1709 PL_expect = XOPERATOR;
9aa983d2 1710 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1711 return THING;
1712 }
1713
3280af22
NIS
1714 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1715 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1716 return yylex();
79072805
LW
1717 }
1718
ffb4593c 1719 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1720 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1721 PL_linestr = PL_lex_repl;
1722 PL_lex_inpat = 0;
1723 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1724 PL_bufend += SvCUR(PL_linestr);
bd61b366 1725 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1726 SAVEFREESV(PL_linestr);
1727 PL_lex_dojoin = FALSE;
1728 PL_lex_brackets = 0;
3280af22
NIS
1729 PL_lex_casemods = 0;
1730 *PL_lex_casestack = '\0';
1731 PL_lex_starts = 0;
25da4f38 1732 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1733 PL_lex_state = LEX_INTERPNORMAL;
1734 PL_lex_starts++;
e9fa98b2
HS
1735 /* we don't clear PL_lex_repl here, so that we can check later
1736 whether this is an evalled subst; that means we rely on the
1737 logic to ensure sublex_done() is called again only via the
1738 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1739 }
e9fa98b2 1740 else {
3280af22 1741 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1742 PL_lex_repl = NULL;
e9fa98b2 1743 }
79072805 1744 return ',';
ffed7fef
LW
1745 }
1746 else {
5db06880
NC
1747#ifdef PERL_MAD
1748 if (PL_madskills) {
cd81e915
NC
1749 if (PL_thiswhite) {
1750 if (!PL_endwhite)
6b29d1f5 1751 PL_endwhite = newSVpvs("");
cd81e915
NC
1752 sv_catsv(PL_endwhite, PL_thiswhite);
1753 PL_thiswhite = 0;
1754 }
1755 if (PL_thistoken)
1756 sv_setpvn(PL_thistoken,"",0);
5db06880 1757 else
cd81e915 1758 PL_realtokenstart = -1;
5db06880
NC
1759 }
1760#endif
f46d017c 1761 LEAVE;
3280af22
NIS
1762 PL_bufend = SvPVX(PL_linestr);
1763 PL_bufend += SvCUR(PL_linestr);
1764 PL_expect = XOPERATOR;
09bef843 1765 PL_sublex_info.sub_inwhat = 0;
79072805 1766 return ')';
ffed7fef
LW
1767 }
1768}
1769
02aa26ce
NT
1770/*
1771 scan_const
1772
1773 Extracts a pattern, double-quoted string, or transliteration. This
1774 is terrifying code.
1775
94def140 1776 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1777 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1778 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1779
94def140
TS
1780 Returns a pointer to the character scanned up to. If this is
1781 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1782 successfully parsed), will leave an OP for the substring scanned
1783 in yylval. Caller must intuit reason for not parsing further
1784 by looking at the next characters herself.
1785
02aa26ce
NT
1786 In patterns:
1787 backslashes:
1788 double-quoted style: \r and \n
1789 regexp special ones: \D \s
94def140
TS
1790 constants: \x31
1791 backrefs: \1
02aa26ce
NT
1792 case and quoting: \U \Q \E
1793 stops on @ and $, but not for $ as tail anchor
1794
1795 In transliterations:
1796 characters are VERY literal, except for - not at the start or end
94def140
TS
1797 of the string, which indicates a range. If the range is in bytes,
1798 scan_const expands the range to the full set of intermediate
1799 characters. If the range is in utf8, the hyphen is replaced with
1800 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1801
1802 In double-quoted strings:
1803 backslashes:
1804 double-quoted style: \r and \n
94def140
TS
1805 constants: \x31
1806 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1807 case and quoting: \U \Q \E
1808 stops on @ and $
1809
1810 scan_const does *not* construct ops to handle interpolated strings.
1811 It stops processing as soon as it finds an embedded $ or @ variable
1812 and leaves it to the caller to work out what's going on.
1813
94def140
TS
1814 embedded arrays (whether in pattern or not) could be:
1815 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1816
1817 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1818
1819 $ in pattern could be $foo or could be tail anchor. Assumption:
1820 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1821 followed by one of "()| \r\n\t"
02aa26ce
NT
1822
1823 \1 (backreferences) are turned into $1
1824
1825 The structure of the code is
1826 while (there's a character to process) {
94def140
TS
1827 handle transliteration ranges
1828 skip regexp comments /(?#comment)/ and codes /(?{code})/
1829 skip #-initiated comments in //x patterns
1830 check for embedded arrays
02aa26ce
NT
1831 check for embedded scalars
1832 if (backslash) {
94def140
TS
1833 leave intact backslashes from leaveit (below)
1834 deprecate \1 in substitution replacements
02aa26ce
NT
1835 handle string-changing backslashes \l \U \Q \E, etc.
1836 switch (what was escaped) {
94def140
TS
1837 handle \- in a transliteration (becomes a literal -)
1838 handle \132 (octal characters)
1839 handle \x15 and \x{1234} (hex characters)
1840 handle \N{name} (named characters)
1841 handle \cV (control characters)
1842 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1843 } (end switch)
1844 } (end if backslash)
1845 } (end while character to read)
4e553d73 1846
02aa26ce
NT
1847*/
1848
76e3520e 1849STATIC char *
cea2e8a9 1850S_scan_const(pTHX_ char *start)
79072805 1851{
97aff369 1852 dVAR;
3280af22 1853 register char *send = PL_bufend; /* end of the constant */
561b68a9 1854 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1855 register char *s = start; /* start of the constant */
1856 register char *d = SvPVX(sv); /* destination for copies */
1857 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1858 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1859 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1860 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1861 UV uv;
4c3a8340
TS
1862#ifdef EBCDIC
1863 UV literal_endpoint = 0;
e294cc5d 1864 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1865#endif
012bcf8d 1866
2b9d42f0
NIS
1867 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1868 /* If we are doing a trans and we know we want UTF8 set expectation */
1869 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1870 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1871 }
1872
1873
79072805 1874 while (s < send || dorange) {
02aa26ce 1875 /* get transliterations out of the way (they're most literal) */
3280af22 1876 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1877 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1878 if (dorange) {
1ba5c669
JH
1879 I32 i; /* current expanded character */
1880 I32 min; /* first character in range */
1881 I32 max; /* last character in range */
02aa26ce 1882
e294cc5d
JH
1883#ifdef EBCDIC
1884 UV uvmax = 0;
1885#endif
1886
1887 if (has_utf8
1888#ifdef EBCDIC
1889 && !native_range
1890#endif
1891 ) {
9d4ba2ae 1892 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1893 char *e = d++;
1894 while (e-- > c)
1895 *(e + 1) = *e;
25716404 1896 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1897 /* mark the range as done, and continue */
1898 dorange = FALSE;
1899 didrange = TRUE;
1900 continue;
1901 }
2b9d42f0 1902
95a20fc0 1903 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1904#ifdef EBCDIC
1905 SvGROW(sv,
1906 SvLEN(sv) + (has_utf8 ?
1907 (512 - UTF_CONTINUATION_MARK +
1908 UNISKIP(0x100))
1909 : 256));
1910 /* How many two-byte within 0..255: 128 in UTF-8,
1911 * 96 in UTF-8-mod. */
1912#else
9cbb5ea2 1913 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1914#endif
9cbb5ea2 1915 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1916#ifdef EBCDIC
1917 if (has_utf8) {
1918 int j;
1919 for (j = 0; j <= 1; j++) {
1920 char * const c = (char*)utf8_hop((U8*)d, -1);
1921 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1922 if (j)
1923 min = (U8)uv;
1924 else if (uv < 256)
1925 max = (U8)uv;
1926 else {
1927 max = (U8)0xff; /* only to \xff */
1928 uvmax = uv; /* \x{100} to uvmax */
1929 }
1930 d = c; /* eat endpoint chars */
1931 }
1932 }
1933 else {
1934#endif
1935 d -= 2; /* eat the first char and the - */
1936 min = (U8)*d; /* first char in range */
1937 max = (U8)d[1]; /* last char in range */
1938#ifdef EBCDIC
1939 }
1940#endif
8ada0baa 1941
c2e66d9e 1942 if (min > max) {
01ec43d0 1943 Perl_croak(aTHX_
d1573ac7 1944 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1945 (char)min, (char)max);
c2e66d9e
GS
1946 }
1947
c7f1f016 1948#ifdef EBCDIC
4c3a8340
TS
1949 if (literal_endpoint == 2 &&
1950 ((isLOWER(min) && isLOWER(max)) ||
1951 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1952 if (isLOWER(min)) {
1953 for (i = min; i <= max; i++)
1954 if (isLOWER(i))
db42d148 1955 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1956 } else {
1957 for (i = min; i <= max; i++)
1958 if (isUPPER(i))
db42d148 1959 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1960 }
1961 }
1962 else
1963#endif
1964 for (i = min; i <= max; i++)
e294cc5d
JH
1965#ifdef EBCDIC
1966 if (has_utf8) {
1967 const U8 ch = (U8)NATIVE_TO_UTF(i);
1968 if (UNI_IS_INVARIANT(ch))
1969 *d++ = (U8)i;
1970 else {
1971 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1972 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1973 }
1974 }
1975 else
1976#endif
1977 *d++ = (char)i;
1978
1979#ifdef EBCDIC
1980 if (uvmax) {
1981 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1982 if (uvmax > 0x101)
1983 *d++ = (char)UTF_TO_NATIVE(0xff);
1984 if (uvmax > 0x100)
1985 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1986 }
1987#endif
02aa26ce
NT
1988
1989 /* mark the range as done, and continue */
79072805 1990 dorange = FALSE;
01ec43d0 1991 didrange = TRUE;
4c3a8340
TS
1992#ifdef EBCDIC
1993 literal_endpoint = 0;
1994#endif
79072805 1995 continue;
4e553d73 1996 }
02aa26ce
NT
1997
1998 /* range begins (ignore - as first or last char) */
79072805 1999 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2000 if (didrange) {
1fafa243 2001 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2002 }
e294cc5d
JH
2003 if (has_utf8
2004#ifdef EBCDIC
2005 && !native_range
2006#endif
2007 ) {
25716404 2008 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2009 s++;
2010 continue;
2011 }
79072805
LW
2012 dorange = TRUE;
2013 s++;
01ec43d0
GS
2014 }
2015 else {
2016 didrange = FALSE;
4c3a8340
TS
2017#ifdef EBCDIC
2018 literal_endpoint = 0;
e294cc5d 2019 native_range = TRUE;
4c3a8340 2020#endif
01ec43d0 2021 }
79072805 2022 }
02aa26ce
NT
2023
2024 /* if we get here, we're not doing a transliteration */
2025
0f5d15d6
IZ
2026 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2027 except for the last char, which will be done separately. */
3280af22 2028 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2029 if (s[2] == '#') {
e994fd66 2030 while (s+1 < send && *s != ')')
db42d148 2031 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2032 }
2033 else if (s[2] == '{' /* This should match regcomp.c */
2034 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2035 {
cc6b7395 2036 I32 count = 1;
0f5d15d6 2037 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2038 char c;
2039
d9f97599
GS
2040 while (count && (c = *regparse)) {
2041 if (c == '\\' && regparse[1])
2042 regparse++;
4e553d73 2043 else if (c == '{')
cc6b7395 2044 count++;
4e553d73 2045 else if (c == '}')
cc6b7395 2046 count--;
d9f97599 2047 regparse++;
cc6b7395 2048 }
e994fd66 2049 if (*regparse != ')')
5bdf89e7 2050 regparse--; /* Leave one char for continuation. */
0f5d15d6 2051 while (s < regparse)
db42d148 2052 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2053 }
748a9306 2054 }
02aa26ce
NT
2055
2056 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2057 else if (*s == '#' && PL_lex_inpat &&
2058 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2059 while (s+1 < send && *s != '\n')
db42d148 2060 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2061 }
02aa26ce 2062
5d1d4326 2063 /* check for embedded arrays
da6eedaa 2064 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2065 */
1749ea0d
TS
2066 else if (*s == '@' && s[1]) {
2067 if (isALNUM_lazy_if(s+1,UTF))
2068 break;
2069 if (strchr(":'{$", s[1]))
2070 break;
2071 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2072 break; /* in regexp, neither @+ nor @- are interpolated */
2073 }
02aa26ce
NT
2074
2075 /* check for embedded scalars. only stop if we're sure it's a
2076 variable.
2077 */
79072805 2078 else if (*s == '$') {
3280af22 2079 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2080 break;
6002328a 2081 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2082 break; /* in regexp, $ might be tail anchor */
2083 }
02aa26ce 2084
2b9d42f0
NIS
2085 /* End of else if chain - OP_TRANS rejoin rest */
2086
02aa26ce 2087 /* backslashes */
79072805
LW
2088 if (*s == '\\' && s+1 < send) {
2089 s++;
02aa26ce 2090
02aa26ce 2091 /* deprecate \1 in strings and substitution replacements */
3280af22 2092 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2093 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2094 {
599cee73 2095 if (ckWARN(WARN_SYNTAX))
9014280d 2096 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2097 *--s = '$';
2098 break;
2099 }
02aa26ce
NT
2100
2101 /* string-change backslash escapes */
3280af22 2102 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2103 --s;
2104 break;
2105 }
cc74c5bd
TS
2106 /* skip any other backslash escapes in a pattern */
2107 else if (PL_lex_inpat) {
2108 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2109 goto default_action;
2110 }
02aa26ce
NT
2111
2112 /* if we get here, it's either a quoted -, or a digit */
79072805 2113 switch (*s) {
02aa26ce
NT
2114
2115 /* quoted - in transliterations */
79072805 2116 case '-':
3280af22 2117 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2118 *d++ = *s++;
2119 continue;
2120 }
2121 /* FALL THROUGH */
2122 default:
11b8faa4 2123 {
86f97054 2124 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2125 ckWARN(WARN_MISC))
9014280d 2126 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2127 "Unrecognized escape \\%c passed through",
2128 *s);
11b8faa4 2129 /* default action is to copy the quoted character */
f9a63242 2130 goto default_action;
11b8faa4 2131 }
02aa26ce
NT
2132
2133 /* \132 indicates an octal constant */
79072805
LW
2134 case '0': case '1': case '2': case '3':
2135 case '4': case '5': case '6': case '7':
ba210ebe 2136 {
53305cf1
NC
2137 I32 flags = 0;
2138 STRLEN len = 3;
2139 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2140 s += len;
2141 }
012bcf8d 2142 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2143
2144 /* \x24 indicates a hex constant */
79072805 2145 case 'x':
a0ed51b3
LW
2146 ++s;
2147 if (*s == '{') {
9d4ba2ae 2148 char* const e = strchr(s, '}');
a4c04bdc
NC
2149 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2150 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2151 STRLEN len;
355860ce 2152
53305cf1 2153 ++s;
adaeee49 2154 if (!e) {
a0ed51b3 2155 yyerror("Missing right brace on \\x{}");
355860ce 2156 continue;
ba210ebe 2157 }
53305cf1
NC
2158 len = e - s;
2159 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2160 s = e + 1;
a0ed51b3
LW
2161 }
2162 else {
ba210ebe 2163 {
53305cf1 2164 STRLEN len = 2;
a4c04bdc 2165 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2166 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2167 s += len;
2168 }
012bcf8d
GS
2169 }
2170
2171 NUM_ESCAPE_INSERT:
2172 /* Insert oct or hex escaped character.
301d3d20 2173 * There will always enough room in sv since such
db42d148 2174 * escapes will be longer than any UTF-8 sequence
301d3d20 2175 * they can end up as. */
ba7cea30 2176
c7f1f016
NIS
2177 /* We need to map to chars to ASCII before doing the tests
2178 to cover EBCDIC
2179 */
c4d5f83a 2180 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2181 if (!has_utf8 && uv > 255) {
301d3d20
JH
2182 /* Might need to recode whatever we have
2183 * accumulated so far if it contains any
2184 * hibit chars.
2185 *
2186 * (Can't we keep track of that and avoid
2187 * this rescan? --jhi)
012bcf8d 2188 */
c7f1f016 2189 int hicount = 0;
63cd0674
NIS
2190 U8 *c;
2191 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2192 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2193 hicount++;
db42d148 2194 }
012bcf8d 2195 }
63cd0674 2196 if (hicount) {
9d4ba2ae 2197 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2198 U8 *src, *dst;
2199 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2200 src = (U8 *)d - 1;
2201 dst = src+hicount;
2202 d += hicount;
cfd0369c 2203 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2204 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2205 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2206 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2207 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2208 }
2209 else {
63cd0674 2210 *dst-- = *src;
012bcf8d 2211 }
c7f1f016 2212 src--;
012bcf8d
GS
2213 }
2214 }
2215 }
2216
9aa983d2 2217 if (has_utf8 || uv > 255) {
9041c2e3 2218 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2219 has_utf8 = TRUE;
f9a63242
JH
2220 if (PL_lex_inwhat == OP_TRANS &&
2221 PL_sublex_info.sub_op) {
2222 PL_sublex_info.sub_op->op_private |=
2223 (PL_lex_repl ? OPpTRANS_FROM_UTF
2224 : OPpTRANS_TO_UTF);
f9a63242 2225 }
e294cc5d
JH
2226#ifdef EBCDIC
2227 if (uv > 255 && !dorange)
2228 native_range = FALSE;
2229#endif
012bcf8d 2230 }
a0ed51b3 2231 else {
012bcf8d 2232 *d++ = (char)uv;
a0ed51b3 2233 }
012bcf8d
GS
2234 }
2235 else {
c4d5f83a 2236 *d++ = (char) uv;
a0ed51b3 2237 }
79072805 2238 continue;
02aa26ce 2239
b239daa5 2240 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2241 case 'N':
55eda711 2242 ++s;
423cee85
JH
2243 if (*s == '{') {
2244 char* e = strchr(s, '}');
155aba94 2245 SV *res;
423cee85 2246 STRLEN len;
cfd0369c 2247 const char *str;
fc8cd66c 2248 SV *type;
4e553d73 2249
423cee85 2250 if (!e) {
5777a3f7 2251 yyerror("Missing right brace on \\N{}");
423cee85
JH
2252 e = s - 1;
2253 goto cont_scan;
2254 }
dbc0d4f2
JH
2255 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2256 /* \N{U+...} */
2257 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2258 PERL_SCAN_DISALLOW_PREFIX;
2259 s += 3;
2260 len = e - s;
2261 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2262 if ( e > s && len != (STRLEN)(e - s) ) {
2263 uv = 0xFFFD;
fc8cd66c 2264 }
dbc0d4f2
JH
2265 s = e + 1;
2266 goto NUM_ESCAPE_INSERT;
2267 }
55eda711 2268 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2269 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2270 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2271 res, NULL, SvPVX(type) );
2272 SvREFCNT_dec(type);
f9a63242
JH
2273 if (has_utf8)
2274 sv_utf8_upgrade(res);
cfd0369c 2275 str = SvPV_const(res,len);
1c47067b
JH
2276#ifdef EBCDIC_NEVER_MIND
2277 /* charnames uses pack U and that has been
2278 * recently changed to do the below uni->native
2279 * mapping, so this would be redundant (and wrong,
2280 * the code point would be doubly converted).
2281 * But leave this in just in case the pack U change
2282 * gets revoked, but the semantics is still
2283 * desireable for charnames. --jhi */
cddc7ef4 2284 {
cfd0369c 2285 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2286
2287 if (uv < 0x100) {
89ebb4a3 2288 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2289
2290 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2291 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2292 str = SvPV_const(res, len);
cddc7ef4
JH
2293 }
2294 }
2295#endif
89491803 2296 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2297 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2298 SvCUR_set(sv, d - ostart);
2299 SvPOK_on(sv);
e4f3eed8 2300 *d = '\0';
f08d6ad9 2301 sv_utf8_upgrade(sv);
d2f449dd 2302 /* this just broke our allocation above... */
eb160463 2303 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2304 d = SvPVX(sv) + SvCUR(sv);
89491803 2305 has_utf8 = TRUE;
f08d6ad9 2306 }
eb160463 2307 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2308 const char * const odest = SvPVX_const(sv);
423cee85 2309
8973db79 2310 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2311 d = SvPVX(sv) + (d - odest);
2312 }
e294cc5d
JH
2313#ifdef EBCDIC
2314 if (!dorange)
2315 native_range = FALSE; /* \N{} is guessed to be Unicode */
2316#endif
423cee85
JH
2317 Copy(str, d, len, char);
2318 d += len;
2319 SvREFCNT_dec(res);
2320 cont_scan:
2321 s = e + 1;
2322 }
2323 else
5777a3f7 2324 yyerror("Missing braces on \\N{}");
423cee85
JH
2325 continue;
2326
02aa26ce 2327 /* \c is a control character */
79072805
LW
2328 case 'c':
2329 s++;
961ce445 2330 if (s < send) {
ba210ebe 2331 U8 c = *s++;
c7f1f016
NIS
2332#ifdef EBCDIC
2333 if (isLOWER(c))
2334 c = toUPPER(c);
2335#endif
db42d148 2336 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2337 }
961ce445
RGS
2338 else {
2339 yyerror("Missing control char name in \\c");
2340 }
79072805 2341 continue;
02aa26ce
NT
2342
2343 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2344 case 'b':
db42d148 2345 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2346 break;
2347 case 'n':
db42d148 2348 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2349 break;
2350 case 'r':
db42d148 2351 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2352 break;
2353 case 'f':
db42d148 2354 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2355 break;
2356 case 't':
db42d148 2357 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2358 break;
34a3fe2a 2359 case 'e':
db42d148 2360 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2361 break;
2362 case 'a':
db42d148 2363 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2364 break;
02aa26ce
NT
2365 } /* end switch */
2366
79072805
LW
2367 s++;
2368 continue;
02aa26ce 2369 } /* end if (backslash) */
4c3a8340
TS
2370#ifdef EBCDIC
2371 else
2372 literal_endpoint++;
2373#endif
02aa26ce 2374
f9a63242 2375 default_action:
2b9d42f0
NIS
2376 /* If we started with encoded form, or already know we want it
2377 and then encode the next character */
2378 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2379 STRLEN len = 1;
5f66b61c
AL
2380 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2381 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2382 s += len;
2383 if (need > len) {
2384 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2385 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2386 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2387 }
5f66b61c 2388 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2389 has_utf8 = TRUE;
e294cc5d
JH
2390#ifdef EBCDIC
2391 if (uv > 255 && !dorange)
2392 native_range = FALSE;
2393#endif
2b9d42f0
NIS
2394 }
2395 else {
2396 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2397 }
02aa26ce
NT
2398 } /* while loop to process each character */
2399
2400 /* terminate the string and set up the sv */
79072805 2401 *d = '\0';
95a20fc0 2402 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2403 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2404 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2405
79072805 2406 SvPOK_on(sv);
9f4817db 2407 if (PL_encoding && !has_utf8) {
d0063567
DK
2408 sv_recode_to_utf8(sv, PL_encoding);
2409 if (SvUTF8(sv))
2410 has_utf8 = TRUE;
9f4817db 2411 }
2b9d42f0 2412 if (has_utf8) {
7e2040f0 2413 SvUTF8_on(sv);
2b9d42f0 2414 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2415 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2416 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2417 }
2418 }
79072805 2419
02aa26ce 2420 /* shrink the sv if we allocated more than we used */
79072805 2421 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2422 SvPV_shrink_to_cur(sv);
79072805 2423 }
02aa26ce 2424
9b599b2a 2425 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2426 if (s > PL_bufptr) {
2427 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2428 sv = new_constant(start, s - start,
2429 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2430 sv, NULL,
10edeb5d
JH
2431 (const char *)
2432 (( PL_lex_inwhat == OP_TRANS
2433 ? "tr"
2434 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2435 ? "s"
2436 : "qq"))));
79072805 2437 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2438 } else
8990e307 2439 SvREFCNT_dec(sv);
79072805
LW
2440 return s;
2441}
2442
ffb4593c
NT
2443/* S_intuit_more
2444 * Returns TRUE if there's more to the expression (e.g., a subscript),
2445 * FALSE otherwise.
ffb4593c
NT
2446 *
2447 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2448 *
2449 * ->[ and ->{ return TRUE
2450 * { and [ outside a pattern are always subscripts, so return TRUE
2451 * if we're outside a pattern and it's not { or [, then return FALSE
2452 * if we're in a pattern and the first char is a {
2453 * {4,5} (any digits around the comma) returns FALSE
2454 * if we're in a pattern and the first char is a [
2455 * [] returns FALSE
2456 * [SOMETHING] has a funky algorithm to decide whether it's a
2457 * character class or not. It has to deal with things like
2458 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2459 * anything else returns TRUE
2460 */
2461
9cbb5ea2
GS
2462/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2463
76e3520e 2464STATIC int
cea2e8a9 2465S_intuit_more(pTHX_ register char *s)
79072805 2466{
97aff369 2467 dVAR;
3280af22 2468 if (PL_lex_brackets)
79072805
LW
2469 return TRUE;
2470 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2471 return TRUE;
2472 if (*s != '{' && *s != '[')
2473 return FALSE;
3280af22 2474 if (!PL_lex_inpat)
79072805
LW
2475 return TRUE;
2476
2477 /* In a pattern, so maybe we have {n,m}. */
2478 if (*s == '{') {
2479 s++;
2480 if (!isDIGIT(*s))
2481 return TRUE;
2482 while (isDIGIT(*s))
2483 s++;
2484 if (*s == ',')
2485 s++;
2486 while (isDIGIT(*s))
2487 s++;
2488 if (*s == '}')
2489 return FALSE;
2490 return TRUE;
2491
2492 }
2493
2494 /* On the other hand, maybe we have a character class */
2495
2496 s++;
2497 if (*s == ']' || *s == '^')
2498 return FALSE;
2499 else {
ffb4593c 2500 /* this is terrifying, and it works */
79072805
LW
2501 int weight = 2; /* let's weigh the evidence */
2502 char seen[256];
f27ffc4a 2503 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2504 const char * const send = strchr(s,']');
3280af22 2505 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2506
2507 if (!send) /* has to be an expression */
2508 return TRUE;
2509
2510 Zero(seen,256,char);
2511 if (*s == '$')
2512 weight -= 3;
2513 else if (isDIGIT(*s)) {
2514 if (s[1] != ']') {
2515 if (isDIGIT(s[1]) && s[2] == ']')
2516 weight -= 10;
2517 }
2518 else
2519 weight -= 100;
2520 }
2521 for (; s < send; s++) {
2522 last_un_char = un_char;
2523 un_char = (unsigned char)*s;
2524 switch (*s) {
2525 case '@':
2526 case '&':
2527 case '$':
2528 weight -= seen[un_char] * 10;
7e2040f0 2529 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2530 int len;
8903cb82 2531 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2532 len = (int)strlen(tmpbuf);
2533 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2534 weight -= 100;
2535 else
2536 weight -= 10;
2537 }
2538 else if (*s == '$' && s[1] &&
93a17b20
LW
2539 strchr("[#!%*<>()-=",s[1])) {
2540 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2541 weight -= 10;
2542 else
2543 weight -= 1;
2544 }
2545 break;
2546 case '\\':
2547 un_char = 254;
2548 if (s[1]) {
93a17b20 2549 if (strchr("wds]",s[1]))
79072805 2550 weight += 100;
10edeb5d 2551 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2552 weight += 1;
93a17b20 2553 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2554 weight += 40;
2555 else if (isDIGIT(s[1])) {
2556 weight += 40;
2557 while (s[1] && isDIGIT(s[1]))
2558 s++;
2559 }
2560 }
2561 else
2562 weight += 100;
2563 break;
2564 case '-':
2565 if (s[1] == '\\')
2566 weight += 50;
93a17b20 2567 if (strchr("aA01! ",last_un_char))
79072805 2568 weight += 30;
93a17b20 2569 if (strchr("zZ79~",s[1]))
79072805 2570 weight += 30;
f27ffc4a
GS
2571 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2572 weight -= 5; /* cope with negative subscript */
79072805
LW
2573 break;
2574 default:
3792a11b
NC
2575 if (!isALNUM(last_un_char)
2576 && !(last_un_char == '$' || last_un_char == '@'
2577 || last_un_char == '&')
2578 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2579 char *d = tmpbuf;
2580 while (isALPHA(*s))
2581 *d++ = *s++;
2582 *d = '\0';
5458a98a 2583 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2584 weight -= 150;
2585 }
2586 if (un_char == last_un_char + 1)
2587 weight += 5;
2588 weight -= seen[un_char];
2589 break;
2590 }
2591 seen[un_char]++;
2592 }
2593 if (weight >= 0) /* probably a character class */
2594 return FALSE;
2595 }
2596
2597 return TRUE;
2598}
ffed7fef 2599
ffb4593c
NT
2600/*
2601 * S_intuit_method
2602 *
2603 * Does all the checking to disambiguate
2604 * foo bar
2605 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2606 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2607 *
2608 * First argument is the stuff after the first token, e.g. "bar".
2609 *
2610 * Not a method if bar is a filehandle.
2611 * Not a method if foo is a subroutine prototyped to take a filehandle.
2612 * Not a method if it's really "Foo $bar"
2613 * Method if it's "foo $bar"
2614 * Not a method if it's really "print foo $bar"
2615 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2616 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2617 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2618 * =>
2619 */
2620
76e3520e 2621STATIC int
62d55b22 2622S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2623{
97aff369 2624 dVAR;
a0d0e21e 2625 char *s = start + (*start == '$');
3280af22 2626 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2627 STRLEN len;
2628 GV* indirgv;
5db06880
NC
2629#ifdef PERL_MAD
2630 int soff;
2631#endif
a0d0e21e
LW
2632
2633 if (gv) {
62d55b22 2634 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2635 return 0;
62d55b22
NC
2636 if (cv) {
2637 if (SvPOK(cv)) {
2638 const char *proto = SvPVX_const(cv);
2639 if (proto) {
2640 if (*proto == ';')
2641 proto++;
2642 if (*proto == '*')
2643 return 0;
2644 }
b6c543e3
IZ
2645 }
2646 } else
c35e046a 2647 gv = NULL;
a0d0e21e 2648 }
8903cb82 2649 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2650 /* start is the beginning of the possible filehandle/object,
2651 * and s is the end of it
2652 * tmpbuf is a copy of it
2653 */
2654
a0d0e21e 2655 if (*start == '$') {
3280af22 2656 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2657 return 0;
5db06880
NC
2658#ifdef PERL_MAD
2659 len = start - SvPVX(PL_linestr);
2660#endif
29595ff2 2661 s = PEEKSPACE(s);
f0092767 2662#ifdef PERL_MAD
5db06880
NC
2663 start = SvPVX(PL_linestr) + len;
2664#endif
3280af22
NIS
2665 PL_bufptr = start;
2666 PL_expect = XREF;
a0d0e21e
LW
2667 return *s == '(' ? FUNCMETH : METHOD;
2668 }
5458a98a 2669 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2670 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2671 len -= 2;
2672 tmpbuf[len] = '\0';
5db06880
NC
2673#ifdef PERL_MAD
2674 soff = s - SvPVX(PL_linestr);
2675#endif
c3e0f903
GS
2676 goto bare_package;
2677 }
90e5519e 2678 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2679 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2680 return 0;
2681 /* filehandle or package name makes it a method */
89bfa8cd 2682 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2683#ifdef PERL_MAD
2684 soff = s - SvPVX(PL_linestr);
2685#endif
29595ff2 2686 s = PEEKSPACE(s);
3280af22 2687 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2688 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2689 bare_package:
cd81e915 2690 start_force(PL_curforce);
9ded7720 2691 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2692 newSVpvn(tmpbuf,len));
9ded7720 2693 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2694 if (PL_madskills)
2695 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2696 PL_expect = XTERM;
a0d0e21e 2697 force_next(WORD);
3280af22 2698 PL_bufptr = s;
5db06880
NC
2699#ifdef PERL_MAD
2700 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2701#endif
a0d0e21e
LW
2702 return *s == '(' ? FUNCMETH : METHOD;
2703 }
2704 }
2705 return 0;
2706}
2707
ffb4593c
NT
2708/*
2709 * S_incl_perldb
2710 * Return a string of Perl code to load the debugger. If PERL5DB
2711 * is set, it will return the contents of that, otherwise a
2712 * compile-time require of perl5db.pl.
2713 */
2714
bfed75c6 2715STATIC const char*
cea2e8a9 2716S_incl_perldb(pTHX)
a0d0e21e 2717{
97aff369 2718 dVAR;
3280af22 2719 if (PL_perldb) {
9d4ba2ae 2720 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2721
2722 if (pdb)
2723 return pdb;
93189314 2724 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2725 return "BEGIN { require 'perl5db.pl' }";
2726 }
2727 return "";
2728}
2729
2730
16d20bd9 2731/* Encoded script support. filter_add() effectively inserts a
4e553d73 2732 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2733 * Note that the filter function only applies to the current source file
2734 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2735 *
2736 * The datasv parameter (which may be NULL) can be used to pass
2737 * private data to this instance of the filter. The filter function
2738 * can recover the SV using the FILTER_DATA macro and use it to
2739 * store private buffers and state information.
2740 *
2741 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2742 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2743 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2744 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2745 * private use must be set using malloc'd pointers.
2746 */
16d20bd9
AD
2747
2748SV *
864dbfa3 2749Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2750{
97aff369 2751 dVAR;
f4c556ac 2752 if (!funcp)
a0714e2c 2753 return NULL;
f4c556ac 2754
3280af22
NIS
2755 if (!PL_rsfp_filters)
2756 PL_rsfp_filters = newAV();
16d20bd9 2757 if (!datasv)
561b68a9 2758 datasv = newSV(0);
862a34c6 2759 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2760 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2761 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2762 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2763 FPTR2DPTR(void *, IoANY(datasv)),
2764 SvPV_nolen(datasv)));
3280af22
NIS
2765 av_unshift(PL_rsfp_filters, 1);
2766 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2767 return(datasv);
2768}
4e553d73 2769
16d20bd9
AD
2770
2771/* Delete most recently added instance of this filter function. */
a0d0e21e 2772void
864dbfa3 2773Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2774{
97aff369 2775 dVAR;
e0c19803 2776 SV *datasv;
24801a4b 2777
33073adb 2778#ifdef DEBUGGING
55662e27
JH
2779 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2780 FPTR2DPTR(void*, funcp)));
33073adb 2781#endif
3280af22 2782 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2783 return;
2784 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2785 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2786 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2787 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2788 IoANY(datasv) = (void *)NULL;
3280af22 2789 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2790
16d20bd9
AD
2791 return;
2792 }
2793 /* we need to search for the correct entry and clear it */
cea2e8a9 2794 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2795}
2796
2797
1de9afcd
RGS
2798/* Invoke the idxth filter function for the current rsfp. */
2799/* maxlen 0 = read one text line */
16d20bd9 2800I32
864dbfa3 2801Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2802{
97aff369 2803 dVAR;
16d20bd9
AD
2804 filter_t funcp;
2805 SV *datasv = NULL;
f482118e
NC
2806 /* This API is bad. It should have been using unsigned int for maxlen.
2807 Not sure if we want to change the API, but if not we should sanity
2808 check the value here. */
39cd7a59
NC
2809 const unsigned int correct_length
2810 = maxlen < 0 ?
2811#ifdef PERL_MICRO
2812 0x7FFFFFFF
2813#else
2814 INT_MAX
2815#endif
2816 : maxlen;
e50aee73 2817
3280af22 2818 if (!PL_rsfp_filters)
16d20bd9 2819 return -1;
1de9afcd 2820 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2821 /* Provide a default input filter to make life easy. */
2822 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2823 DEBUG_P(PerlIO_printf(Perl_debug_log,
2824 "filter_read %d: from rsfp\n", idx));
f482118e 2825 if (correct_length) {
16d20bd9
AD
2826 /* Want a block */
2827 int len ;
f54cb97a 2828 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2829
2830 /* ensure buf_sv is large enough */
f482118e
NC
2831 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2832 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2833 correct_length)) <= 0) {
3280af22 2834 if (PerlIO_error(PL_rsfp))
37120919
AD
2835 return -1; /* error */
2836 else
2837 return 0 ; /* end of file */
2838 }
16d20bd9
AD
2839 SvCUR_set(buf_sv, old_len + len) ;
2840 } else {
2841 /* Want a line */
3280af22
NIS
2842 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2843 if (PerlIO_error(PL_rsfp))
37120919
AD
2844 return -1; /* error */
2845 else
2846 return 0 ; /* end of file */
2847 }
16d20bd9
AD
2848 }
2849 return SvCUR(buf_sv);
2850 }
2851 /* Skip this filter slot if filter has been deleted */
1de9afcd 2852 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2853 DEBUG_P(PerlIO_printf(Perl_debug_log,
2854 "filter_read %d: skipped (filter deleted)\n",
2855 idx));
f482118e 2856 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2857 }
2858 /* Get function pointer hidden within datasv */
8141890a 2859 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2860 DEBUG_P(PerlIO_printf(Perl_debug_log,
2861 "filter_read %d: via function %p (%s)\n",
ca0270c4 2862 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2863 /* Call function. The function is expected to */
2864 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2865 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2866 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2867}
2868
76e3520e 2869STATIC char *
cea2e8a9 2870S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2871{
97aff369 2872 dVAR;
c39cd008 2873#ifdef PERL_CR_FILTER
3280af22 2874 if (!PL_rsfp_filters) {
c39cd008 2875 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2876 }
2877#endif
3280af22 2878 if (PL_rsfp_filters) {
55497cff 2879 if (!append)
2880 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2881 if (FILTER_READ(0, sv, 0) > 0)
2882 return ( SvPVX(sv) ) ;
2883 else
bd61b366 2884 return NULL ;
16d20bd9 2885 }
9d116dd7 2886 else
fd049845 2887 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2888}
2889
01ec43d0 2890STATIC HV *
7fc63493 2891S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2892{
97aff369 2893 dVAR;
def3634b
GS
2894 GV *gv;
2895
01ec43d0 2896 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2897 return PL_curstash;
2898
2899 if (len > 2 &&
2900 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2901 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2902 {
2903 return GvHV(gv); /* Foo:: */
def3634b
GS
2904 }
2905
2906 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2907 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2908 if (gv && GvCV(gv)) {
2909 SV * const sv = cv_const_sv(GvCV(gv));
2910 if (sv)
83003860 2911 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2912 }
2913
2914 return gv_stashpv(pkgname, FALSE);
2915}
a0d0e21e 2916
e3f73d4e
RGS
2917/*
2918 * S_readpipe_override
2919 * Check whether readpipe() is overriden, and generates the appropriate
2920 * optree, provided sublex_start() is called afterwards.
2921 */
2922STATIC void
1d51329b 2923S_readpipe_override(pTHX)
e3f73d4e
RGS
2924{
2925 GV **gvp;
2926 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2927 yylval.ival = OP_BACKTICK;
2928 if ((gv_readpipe
2929 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2930 ||
2931 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2932 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2933 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2934 {
2935 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2936 append_elem(OP_LIST,
2937 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2938 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2939 }
2940 else {
2941 set_csh();
2942 }
2943}
2944
5db06880
NC
2945#ifdef PERL_MAD
2946 /*
2947 * Perl_madlex
2948 * The intent of this yylex wrapper is to minimize the changes to the
2949 * tokener when we aren't interested in collecting madprops. It remains
2950 * to be seen how successful this strategy will be...
2951 */
2952
2953int
2954Perl_madlex(pTHX)
2955{
2956 int optype;
2957 char *s = PL_bufptr;
2958
cd81e915
NC
2959 /* make sure PL_thiswhite is initialized */
2960 PL_thiswhite = 0;
2961 PL_thismad = 0;
5db06880 2962
cd81e915 2963 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2964 if (PL_pending_ident)
2965 return S_pending_ident(aTHX);
2966
2967 /* previous token ate up our whitespace? */
cd81e915
NC
2968 if (!PL_lasttoke && PL_nextwhite) {
2969 PL_thiswhite = PL_nextwhite;
2970 PL_nextwhite = 0;
5db06880
NC
2971 }
2972
2973 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2974 PL_realtokenstart = -1;
2975 PL_thistoken = 0;
5db06880
NC
2976 optype = yylex();
2977 s = PL_bufptr;
cd81e915 2978 assert(PL_curforce < 0);
5db06880 2979
cd81e915
NC
2980 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2981 if (!PL_thistoken) {
2982 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2983 PL_thistoken = newSVpvs("");
5db06880 2984 else {
c35e046a 2985 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2986 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2987 }
2988 }
cd81e915
NC
2989 if (PL_thismad) /* install head */
2990 CURMAD('X', PL_thistoken);
5db06880
NC
2991 }
2992
2993 /* last whitespace of a sublex? */
cd81e915
NC
2994 if (optype == ')' && PL_endwhite) {
2995 CURMAD('X', PL_endwhite);
5db06880
NC
2996 }
2997
cd81e915 2998 if (!PL_thismad) {
5db06880
NC
2999
3000 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3001 if (!PL_thiswhite && !PL_endwhite && !optype) {
3002 sv_free(PL_thistoken);
3003 PL_thistoken = 0;
5db06880
NC
3004 return 0;
3005 }
3006
3007 /* put off final whitespace till peg */
3008 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3009 PL_nextwhite = PL_thiswhite;
3010 PL_thiswhite = 0;
5db06880 3011 }
cd81e915
NC
3012 else if (PL_thisopen) {
3013 CURMAD('q', PL_thisopen);
3014 if (PL_thistoken)
3015 sv_free(PL_thistoken);
3016 PL_thistoken = 0;
5db06880
NC
3017 }
3018 else {
3019 /* Store actual token text as madprop X */
cd81e915 3020 CURMAD('X', PL_thistoken);
5db06880
NC
3021 }
3022
cd81e915 3023 if (PL_thiswhite) {
5db06880 3024 /* add preceding whitespace as madprop _ */
cd81e915 3025 CURMAD('_', PL_thiswhite);
5db06880
NC
3026 }
3027
cd81e915 3028 if (PL_thisstuff) {
5db06880 3029 /* add quoted material as madprop = */
cd81e915 3030 CURMAD('=', PL_thisstuff);
5db06880
NC
3031 }
3032
cd81e915 3033 if (PL_thisclose) {
5db06880 3034 /* add terminating quote as madprop Q */
cd81e915 3035 CURMAD('Q', PL_thisclose);
5db06880
NC
3036 }
3037 }
3038
3039 /* special processing based on optype */
3040
3041 switch (optype) {
3042
3043 /* opval doesn't need a TOKEN since it can already store mp */
3044 case WORD:
3045 case METHOD:
3046 case FUNCMETH:
3047 case THING:
3048 case PMFUNC:
3049 case PRIVATEREF:
3050 case FUNC0SUB:
3051 case UNIOPSUB:
3052 case LSTOPSUB:
3053 if (yylval.opval)
cd81e915
NC
3054 append_madprops(PL_thismad, yylval.opval, 0);
3055 PL_thismad = 0;
5db06880
NC
3056 return optype;
3057
3058 /* fake EOF */
3059 case 0:
3060 optype = PEG;
cd81e915
NC
3061 if (PL_endwhite) {
3062 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3063 PL_endwhite = 0;
5db06880
NC
3064 }
3065 break;
3066
3067 case ']':
3068 case '}':
cd81e915 3069 if (PL_faketokens)
5db06880
NC
3070 break;
3071 /* remember any fake bracket that lexer is about to discard */
3072 if (PL_lex_brackets == 1 &&
3073 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3074 {
3075 s = PL_bufptr;
3076 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3077 s++;
3078 if (*s == '}') {
cd81e915
NC
3079 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3080 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3081 PL_thiswhite = 0;
5db06880
NC
3082 PL_bufptr = s - 1;
3083 break; /* don't bother looking for trailing comment */
3084 }
3085 else
3086 s = PL_bufptr;
3087 }
3088 if (optype == ']')
3089 break;
3090 /* FALLTHROUGH */
3091
3092 /* attach a trailing comment to its statement instead of next token */
3093 case ';':
cd81e915 3094 if (PL_faketokens)
5db06880
NC
3095 break;
3096 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3097 s = PL_bufptr;
3098 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3099 s++;
3100 if (*s == '\n' || *s == '#') {
3101 while (s < PL_bufend && *s != '\n')
3102 s++;
3103 if (s < PL_bufend)
3104 s++;
cd81e915
NC
3105 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3106 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3107 PL_thiswhite = 0;
5db06880
NC
3108 PL_bufptr = s;
3109 }
3110 }
3111 break;
3112
3113 /* pval */
3114 case LABEL:
3115 break;
3116
3117 /* ival */
3118 default:
3119 break;
3120
3121 }
3122
3123 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3124 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3125 PL_thismad = 0;
5db06880
NC
3126 return optype;
3127}
3128#endif
3129
468aa647 3130STATIC char *
cc6ed77d 3131S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3132 dVAR;
468aa647
RGS
3133 if (PL_expect != XSTATE)
3134 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3135 is_use ? "use" : "no"));
29595ff2 3136 s = SKIPSPACE1(s);
468aa647
RGS
3137 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3138 s = force_version(s, TRUE);
29595ff2 3139 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3140 start_force(PL_curforce);
9ded7720 3141 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3142 force_next(WORD);
3143 }
3144 else if (*s == 'v') {
3145 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3146 s = force_version(s, FALSE);
3147 }
3148 }
3149 else {
3150 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3151 s = force_version(s, FALSE);
3152 }
3153 yylval.ival = is_use;
3154 return s;
3155}
748a9306 3156#ifdef DEBUGGING
27da23d5 3157 static const char* const exp_name[] =
09bef843 3158 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3159 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3160 };
748a9306 3161#endif
463ee0b2 3162
02aa26ce
NT
3163/*
3164 yylex
3165
3166 Works out what to call the token just pulled out of the input
3167 stream. The yacc parser takes care of taking the ops we return and
3168 stitching them into a tree.
3169
3170 Returns:
3171 PRIVATEREF
3172
3173 Structure:
3174 if read an identifier
3175 if we're in a my declaration
3176 croak if they tried to say my($foo::bar)
3177 build the ops for a my() declaration
3178 if it's an access to a my() variable
3179 are we in a sort block?
3180 croak if my($a); $a <=> $b
3181 build ops for access to a my() variable
3182 if in a dq string, and they've said @foo and we can't find @foo
3183 croak
3184 build ops for a bareword
3185 if we already built the token before, use it.
3186*/
3187
20141f0e 3188
dba4d153
JH
3189#ifdef __SC__
3190#pragma segment Perl_yylex
3191#endif
dba4d153 3192int
dba4d153 3193Perl_yylex(pTHX)
20141f0e 3194{
97aff369 3195 dVAR;
3afc138a 3196 register char *s = PL_bufptr;
378cc40b 3197 register char *d;
463ee0b2 3198 STRLEN len;
aa7440fb 3199 bool bof = FALSE;
a687059c 3200
10edeb5d
JH
3201 /* orig_keyword, gvp, and gv are initialized here because
3202 * jump to the label just_a_word_zero can bypass their
3203 * initialization later. */
3204 I32 orig_keyword = 0;
3205 GV *gv = NULL;
3206 GV **gvp = NULL;
3207
bbf60fe6 3208 DEBUG_T( {
396482e1 3209 SV* tmp = newSVpvs("");
b6007c36
DM
3210 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3211 (IV)CopLINE(PL_curcop),
3212 lex_state_names[PL_lex_state],
3213 exp_name[PL_expect],
3214 pv_display(tmp, s, strlen(s), 0, 60));
3215 SvREFCNT_dec(tmp);
bbf60fe6 3216 } );
02aa26ce 3217 /* check if there's an identifier for us to look at */
ba979b31 3218 if (PL_pending_ident)
bbf60fe6 3219 return REPORT(S_pending_ident(aTHX));
bbce6d69 3220
02aa26ce
NT
3221 /* no identifier pending identification */
3222
3280af22 3223 switch (PL_lex_state) {
79072805
LW
3224#ifdef COMMENTARY
3225 case LEX_NORMAL: /* Some compilers will produce faster */
3226 case LEX_INTERPNORMAL: /* code if we comment these out. */
3227 break;
3228#endif
3229
09bef843 3230 /* when we've already built the next token, just pull it out of the queue */
79072805 3231 case LEX_KNOWNEXT:
5db06880
NC
3232#ifdef PERL_MAD
3233 PL_lasttoke--;
3234 yylval = PL_nexttoke[PL_lasttoke].next_val;
3235 if (PL_madskills) {
cd81e915 3236 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3237 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3238 if (PL_thismad && PL_thismad->mad_key == '_') {
3239 PL_thiswhite = (SV*)PL_thismad->mad_val;
3240 PL_thismad->mad_val = 0;
3241 mad_free(PL_thismad);
3242 PL_thismad = 0;
5db06880
NC
3243 }
3244 }
3245 if (!PL_lasttoke) {
3246 PL_lex_state = PL_lex_defer;
3247 PL_expect = PL_lex_expect;
3248 PL_lex_defer = LEX_NORMAL;
3249 if (!PL_nexttoke[PL_lasttoke].next_type)
3250 return yylex();
3251 }
3252#else
3280af22 3253 PL_nexttoke--;
5db06880 3254 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3255 if (!PL_nexttoke) {
3256 PL_lex_state = PL_lex_defer;
3257 PL_expect = PL_lex_expect;
3258 PL_lex_defer = LEX_NORMAL;
463ee0b2 3259 }
5db06880
NC
3260#endif
3261#ifdef PERL_MAD
3262 /* FIXME - can these be merged? */
3263 return(PL_nexttoke[PL_lasttoke].next_type);
3264#else
bbf60fe6 3265 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3266#endif
79072805 3267
02aa26ce 3268 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3269 when we get here, PL_bufptr is at the \
02aa26ce 3270 */
79072805
LW
3271 case LEX_INTERPCASEMOD:
3272#ifdef DEBUGGING
3280af22 3273 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3274 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3275#endif
02aa26ce 3276 /* handle \E or end of string */
3280af22 3277 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3278 /* if at a \E */
3280af22 3279 if (PL_lex_casemods) {
f54cb97a 3280 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3281 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3282
3792a11b
NC
3283 if (PL_bufptr != PL_bufend
3284 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3285 PL_bufptr += 2;
3286 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3287#ifdef PERL_MAD
3288 if (PL_madskills)
6b29d1f5 3289 PL_thistoken = newSVpvs("\\E");
5db06880 3290#endif
a0d0e21e 3291 }
bbf60fe6 3292 return REPORT(')');
79072805 3293 }
5db06880
NC
3294#ifdef PERL_MAD
3295 while (PL_bufptr != PL_bufend &&
3296 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3297 if (!PL_thiswhite)
6b29d1f5 3298 PL_thiswhite = newSVpvs("");
cd81e915 3299 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3300 PL_bufptr += 2;
3301 }
3302#else
3280af22
NIS
3303 if (PL_bufptr != PL_bufend)
3304 PL_bufptr += 2;
5db06880 3305#endif
3280af22 3306 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3307 return yylex();
79072805
LW
3308 }
3309 else {
607df283 3310 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3311 "### Saw case modifier\n"); });
3280af22 3312 s = PL_bufptr + 1;
6e909404 3313 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3314#ifdef PERL_MAD
cd81e915 3315 if (!PL_thiswhite)
6b29d1f5 3316 PL_thiswhite = newSVpvs("");
cd81e915 3317 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3318#endif
89122651 3319 PL_bufptr = s + 3;
6e909404
JH
3320 PL_lex_state = LEX_INTERPCONCAT;
3321 return yylex();
a0d0e21e 3322 }
6e909404 3323 else {
90771dc0 3324 I32 tmp;
5db06880
NC
3325 if (!PL_madskills) /* when just compiling don't need correct */
3326 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3327 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3328 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3329 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3330 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3331 return REPORT(')');
6e909404
JH
3332 }
3333 if (PL_lex_casemods > 10)
3334 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3335 PL_lex_casestack[PL_lex_casemods++] = *s;
3336 PL_lex_casestack[PL_lex_casemods] = '\0';
3337 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3338 start_force(PL_curforce);
9ded7720 3339 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3340 force_next('(');
cd81e915 3341 start_force(PL_curforce);
6e909404 3342 if (*s == 'l')
9ded7720 3343 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3344 else if (*s == 'u')
9ded7720 3345 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3346 else if (*s == 'L')
9ded7720 3347 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3348 else if (*s == 'U')
9ded7720 3349 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3350 else if (*s == 'Q')
9ded7720 3351 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3352 else
3353 Perl_croak(aTHX_ "panic: yylex");
5db06880 3354 if (PL_madskills) {
6b29d1f5 3355 SV* const tmpsv = newSVpvs("");
5db06880
NC
3356 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3357 curmad('_', tmpsv);
3358 }
6e909404 3359 PL_bufptr = s + 1;
a0d0e21e 3360 }
79072805 3361 force_next(FUNC);
3280af22
NIS
3362 if (PL_lex_starts) {
3363 s = PL_bufptr;
3364 PL_lex_starts = 0;
5db06880
NC
3365#ifdef PERL_MAD
3366 if (PL_madskills) {
cd81e915
NC
3367 if (PL_thistoken)
3368 sv_free(PL_thistoken);
6b29d1f5 3369 PL_thistoken = newSVpvs("");
5db06880
NC
3370 }
3371#endif
131b3ad0
DM
3372 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3373 if (PL_lex_casemods == 1 && PL_lex_inpat)
3374 OPERATOR(',');
3375 else
3376 Aop(OP_CONCAT);
79072805
LW
3377 }
3378 else
cea2e8a9 3379 return yylex();
79072805
LW
3380 }
3381
55497cff 3382 case LEX_INTERPPUSH:
bbf60fe6 3383 return REPORT(sublex_push());
55497cff 3384
79072805 3385 case LEX_INTERPSTART:
3280af22 3386 if (PL_bufptr == PL_bufend)
bbf60fe6 3387 return REPORT(sublex_done());
607df283 3388 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3389 "### Interpolated variable\n"); });
3280af22
NIS
3390 PL_expect = XTERM;
3391 PL_lex_dojoin = (*PL_bufptr == '@');
3392 PL_lex_state = LEX_INTERPNORMAL;
3393 if (PL_lex_dojoin) {
cd81e915 3394 start_force(PL_curforce);
9ded7720 3395 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3396 force_next(',');
cd81e915 3397 start_force(PL_curforce);
a0d0e21e 3398 force_ident("\"", '$');
cd81e915 3399 start_force(PL_curforce);
9ded7720 3400 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3401 force_next('$');
cd81e915 3402 start_force(PL_curforce);
9ded7720 3403 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3404 force_next('(');
cd81e915 3405 start_force(PL_curforce);
9ded7720 3406 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3407 force_next(FUNC);
3408 }
3280af22
NIS
3409 if (PL_lex_starts++) {
3410 s = PL_bufptr;
5db06880
NC
3411#ifdef PERL_MAD
3412 if (PL_madskills) {
cd81e915
NC
3413 if (PL_thistoken)
3414 sv_free(PL_thistoken);
6b29d1f5 3415 PL_thistoken = newSVpvs("");
5db06880
NC
3416 }
3417#endif
131b3ad0
DM
3418 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3419 if (!PL_lex_casemods && PL_lex_inpat)
3420 OPERATOR(',');
3421 else
3422 Aop(OP_CONCAT);
79072805 3423 }
cea2e8a9 3424 return yylex();
79072805
LW
3425
3426 case LEX_INTERPENDMAYBE:
3280af22
NIS
3427 if (intuit_more(PL_bufptr)) {
3428 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3429 break;
3430 }
3431 /* FALL THROUGH */
3432
3433 case LEX_INTERPEND:
3280af22
NIS
3434 if (PL_lex_dojoin) {
3435 PL_lex_dojoin = FALSE;
3436 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3437#ifdef PERL_MAD
3438 if (PL_madskills) {
cd81e915
NC
3439 if (PL_thistoken)
3440 sv_free(PL_thistoken);
6b29d1f5 3441 PL_thistoken = newSVpvs("");
5db06880
NC
3442 }
3443#endif
bbf60fe6 3444 return REPORT(')');
79072805 3445 }
43a16006 3446 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3447 && SvEVALED(PL_lex_repl))
43a16006 3448 {
e9fa98b2 3449 if (PL_bufptr != PL_bufend)
cea2e8a9 3450 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3451 PL_lex_repl = NULL;
e9fa98b2 3452 }
79072805
LW
3453 /* FALLTHROUGH */
3454 case LEX_INTERPCONCAT:
3455#ifdef DEBUGGING
3280af22 3456 if (PL_lex_brackets)
cea2e8a9 3457 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3458#endif
3280af22 3459 if (PL_bufptr == PL_bufend)
bbf60fe6 3460 return REPORT(sublex_done());
79072805 3461
3280af22
NIS
3462 if (SvIVX(PL_linestr) == '\'') {
3463 SV *sv = newSVsv(PL_linestr);
3464 if (!PL_lex_inpat)
76e3520e 3465 sv = tokeq(sv);
3280af22 3466 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3467 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3468 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3469 s = PL_bufend;
79072805
LW
3470 }
3471 else {
3280af22 3472 s = scan_const(PL_bufptr);
79072805 3473 if (*s == '\\')
3280af22 3474 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3475 else
3280af22 3476 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3477 }
3478
3280af22 3479 if (s != PL_bufptr) {
cd81e915 3480 start_force(PL_curforce);
5db06880
NC
3481 if (PL_madskills) {
3482 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3483 }
9ded7720 3484 NEXTVAL_NEXTTOKE = yylval;
3280af22 3485 PL_expect = XTERM;
79072805 3486 force_next(THING);
131b3ad0 3487 if (PL_lex_starts++) {
5db06880
NC
3488#ifdef PERL_MAD
3489 if (PL_madskills) {
cd81e915
NC
3490 if (PL_thistoken)
3491 sv_free(PL_thistoken);
6b29d1f5 3492 PL_thistoken = newSVpvs("");
5db06880
NC
3493 }
3494#endif
131b3ad0
DM
3495 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3496 if (!PL_lex_casemods && PL_lex_inpat)
3497 OPERATOR(',');
3498 else
3499 Aop(OP_CONCAT);
3500 }
79072805 3501 else {
3280af22 3502 PL_bufptr = s;
cea2e8a9 3503 return yylex();
79072805
LW
3504 }
3505 }
3506
cea2e8a9 3507 return yylex();
a0d0e21e 3508 case LEX_FORMLINE:
3280af22
NIS
3509 PL_lex_state = LEX_NORMAL;
3510 s = scan_formline(PL_bufptr);
3511 if (!PL_lex_formbrack)
a0d0e21e
LW
3512 goto rightbracket;
3513 OPERATOR(';');
79072805
LW
3514 }
3515
3280af22
NIS
3516 s = PL_bufptr;
3517 PL_oldoldbufptr = PL_oldbufptr;
3518 PL_oldbufptr = s;
463ee0b2
LW
3519
3520 retry:
5db06880 3521#ifdef PERL_MAD
cd81e915
NC
3522 if (PL_thistoken) {
3523 sv_free(PL_thistoken);
3524 PL_thistoken = 0;
5db06880 3525 }
cd81e915 3526 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3527#endif
378cc40b
LW
3528 switch (*s) {
3529 default:
7e2040f0 3530 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3531 goto keylookup;
cea2e8a9 3532 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3533 case 4:
3534 case 26:
3535 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3536 case 0:
5db06880
NC
3537#ifdef PERL_MAD
3538 if (PL_madskills)
cd81e915 3539 PL_faketokens = 0;
5db06880 3540#endif
3280af22
NIS
3541 if (!PL_rsfp) {
3542 PL_last_uni = 0;
3543 PL_last_lop = 0;
c5ee2135 3544 if (PL_lex_brackets) {
10edeb5d
JH
3545 yyerror((const char *)
3546 (PL_lex_formbrack
3547 ? "Format not terminated"
3548 : "Missing right curly or square bracket"));
c5ee2135 3549 }
4e553d73 3550 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3551 "### Tokener got EOF\n");
5f80b19c 3552 } );
79072805 3553 TOKEN(0);
463ee0b2 3554 }
3280af22 3555 if (s++ < PL_bufend)
a687059c 3556 goto retry; /* ignore stray nulls */
3280af22
NIS
3557 PL_last_uni = 0;
3558 PL_last_lop = 0;
3559 if (!PL_in_eval && !PL_preambled) {
3560 PL_preambled = TRUE;
5db06880
NC
3561#ifdef PERL_MAD
3562 if (PL_madskills)
cd81e915 3563 PL_faketokens = 1;
5db06880