This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Coverity is a persistent beast. Hot on the tails of fixing one leak,
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a
PP
71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
301 { 0, TOKENTYPE_NONE, 0 }
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
d4c19fe8
AL
432 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 /**/;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
501 char he_name[32] = "feature_";
502 (void) strncpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
664 PL_linestr = line;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 667 s = SvPV_const(PL_linestr, len);
6f27f9a7 668 if (!len || s[len-1] != ';') {
3280af22
NIS
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 671 sv_catpvs(PL_linestr, "\n;");
8990e307 672 }
3280af22
NIS
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 676 PL_last_lop = PL_last_uni = NULL;
3280af22 677 PL_rsfp = 0;
79072805 678}
a687059c 679
ffb4593c
NT
680/*
681 * Perl_lex_end
9cbb5ea2
GS
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
ffb4593c
NT
684 */
685
463ee0b2 686void
864dbfa3 687Perl_lex_end(pTHX)
463ee0b2 688{
97aff369 689 dVAR;
3280af22 690 PL_doextract = FALSE;
463ee0b2
LW
691}
692
ffb4593c
NT
693/*
694 * S_incline
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
57843af0 697 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 698 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_incline(pTHX_ char *s)
463ee0b2 705{
97aff369 706 dVAR;
463ee0b2
LW
707 char *t;
708 char *n;
73659bf1 709 char *e;
463ee0b2 710 char ch;
463ee0b2 711
57843af0 712 CopLINE_inc(PL_curcop);
463ee0b2
LW
713 if (*s++ != '#')
714 return;
d4c19fe8
AL
715 while (SPACE_OR_TAB(*s))
716 s++;
73659bf1
GS
717 if (strnEQ(s, "line", 4))
718 s += 4;
719 else
720 return;
084592ab 721 if (SPACE_OR_TAB(*s))
73659bf1 722 s++;
4e553d73 723 else
73659bf1 724 return;
d4c19fe8
AL
725 while (SPACE_OR_TAB(*s))
726 s++;
463ee0b2
LW
727 if (!isDIGIT(*s))
728 return;
d4c19fe8 729
463ee0b2
LW
730 n = s;
731 while (isDIGIT(*s))
732 s++;
bf4acbe4 733 while (SPACE_OR_TAB(*s))
463ee0b2 734 s++;
73659bf1 735 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 736 s++;
73659bf1
GS
737 e = t + 1;
738 }
463ee0b2 739 else {
463ee0b2 740 for (t = s; !isSPACE(*t); t++) ;
73659bf1 741 e = t;
463ee0b2 742 }
bf4acbe4 743 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
744 e++;
745 if (*e != '\n' && *e != '\0')
746 return; /* false alarm */
747
463ee0b2
LW
748 ch = *t;
749 *t = '\0';
f4dd75d9 750 if (t - s > 0) {
8a5ee598 751#ifndef USE_ITHREADS
c4420975 752 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
753 STRLEN tmplen = cf ? strlen(cf) : 0;
754 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
755 /* must copy *{"::_<(eval N)[oldfilename:L]"}
756 * to *{"::_<newfilename"} */
757 char smallbuf[256], smallbuf2[256];
758 char *tmpbuf, *tmpbuf2;
8a5ee598 759 GV **gvp, *gv2;
e66cf94c
RGS
760 STRLEN tmplen2 = strlen(s);
761 if (tmplen + 3 < sizeof smallbuf)
762 tmpbuf = smallbuf;
763 else
764 Newx(tmpbuf, tmplen + 3, char);
765 if (tmplen2 + 3 < sizeof smallbuf2)
766 tmpbuf2 = smallbuf2;
767 else
768 Newx(tmpbuf2, tmplen2 + 3, char);
769 tmpbuf[0] = tmpbuf2[0] = '_';
770 tmpbuf[1] = tmpbuf2[1] = '<';
771 memcpy(tmpbuf + 2, cf, ++tmplen);
772 memcpy(tmpbuf2 + 2, s, ++tmplen2);
773 ++tmplen; ++tmplen2;
8a5ee598
RGS
774 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
775 if (gvp) {
776 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
777 if (!isGV(gv2))
778 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
779 /* adjust ${"::_<newfilename"} to store the new file name */
780 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
781 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
782 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
783 }
e66cf94c
RGS
784 if (tmpbuf != smallbuf) Safefree(tmpbuf);
785 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
786 }
8a5ee598 787#endif
05ec9bb3 788 CopFILE_free(PL_curcop);
57843af0 789 CopFILE_set(PL_curcop, s);
f4dd75d9 790 }
463ee0b2 791 *t = ch;
57843af0 792 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
793}
794
29595ff2 795#ifdef PERL_MAD
cd81e915 796/* skip space before PL_thistoken */
29595ff2
NC
797
798STATIC char *
799S_skipspace0(pTHX_ register char *s)
800{
801 s = skipspace(s);
802 if (!PL_madskills)
803 return s;
cd81e915
NC
804 if (PL_skipwhite) {
805 if (!PL_thiswhite)
806 PL_thiswhite = newSVpvn("",0);
807 sv_catsv(PL_thiswhite, PL_skipwhite);
808 sv_free(PL_skipwhite);
809 PL_skipwhite = 0;
810 }
811 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
812 return s;
813}
814
cd81e915 815/* skip space after PL_thistoken */
29595ff2
NC
816
817STATIC char *
818S_skipspace1(pTHX_ register char *s)
819{
d4c19fe8 820 const char *start = s;
29595ff2
NC
821 I32 startoff = start - SvPVX(PL_linestr);
822
823 s = skipspace(s);
824 if (!PL_madskills)
825 return s;
826 start = SvPVX(PL_linestr) + startoff;
cd81e915 827 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 828 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
829 PL_thistoken = newSVpvn(tstart, start - tstart);
830 }
831 PL_realtokenstart = -1;
832 if (PL_skipwhite) {
833 if (!PL_nextwhite)
834 PL_nextwhite = newSVpvn("",0);
835 sv_catsv(PL_nextwhite, PL_skipwhite);
836 sv_free(PL_skipwhite);
837 PL_skipwhite = 0;
29595ff2
NC
838 }
839 return s;
840}
841
842STATIC char *
843S_skipspace2(pTHX_ register char *s, SV **svp)
844{
845 char *start = s;
846 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
847 I32 startoff = start - SvPVX(PL_linestr);
848 s = skipspace(s);
849 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
850 if (!PL_madskills || !svp)
851 return s;
852 start = SvPVX(PL_linestr) + startoff;
cd81e915 853 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 854 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
855 PL_thistoken = newSVpvn(tstart, start - tstart);
856 PL_realtokenstart = -1;
29595ff2 857 }
cd81e915 858 if (PL_skipwhite) {
29595ff2
NC
859 if (!*svp)
860 *svp = newSVpvn("",0);
cd81e915
NC
861 sv_setsv(*svp, PL_skipwhite);
862 sv_free(PL_skipwhite);
863 PL_skipwhite = 0;
29595ff2
NC
864 }
865
866 return s;
867}
868#endif
869
ffb4593c
NT
870/*
871 * S_skipspace
872 * Called to gobble the appropriate amount and type of whitespace.
873 * Skips comments as well.
874 */
875
76e3520e 876STATIC char *
cea2e8a9 877S_skipspace(pTHX_ register char *s)
a687059c 878{
97aff369 879 dVAR;
5db06880
NC
880#ifdef PERL_MAD
881 int curoff;
882 int startoff = s - SvPVX(PL_linestr);
883
cd81e915
NC
884 if (PL_skipwhite) {
885 sv_free(PL_skipwhite);
886 PL_skipwhite = 0;
5db06880
NC
887 }
888#endif
889
3280af22 890 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 891 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 892 s++;
5db06880
NC
893#ifdef PERL_MAD
894 goto done;
895#else
463ee0b2 896 return s;
5db06880 897#endif
463ee0b2
LW
898 }
899 for (;;) {
fd049845 900 STRLEN prevlen;
09bef843 901 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 902 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
903 while (s < PL_bufend && isSPACE(*s)) {
904 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
905 incline(s);
906 }
ffb4593c
NT
907
908 /* comment */
3280af22
NIS
909 if (s < PL_bufend && *s == '#') {
910 while (s < PL_bufend && *s != '\n')
463ee0b2 911 s++;
60e6418e 912 if (s < PL_bufend) {
463ee0b2 913 s++;
60e6418e
GS
914 if (PL_in_eval && !PL_rsfp) {
915 incline(s);
916 continue;
917 }
918 }
463ee0b2 919 }
ffb4593c
NT
920
921 /* only continue to recharge the buffer if we're at the end
922 * of the buffer, we're not reading from a source filter, and
923 * we're in normal lexing mode
924 */
09bef843
SB
925 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
926 PL_lex_state == LEX_FORMLINE)
5db06880
NC
927#ifdef PERL_MAD
928 goto done;
929#else
463ee0b2 930 return s;
5db06880 931#endif
ffb4593c
NT
932
933 /* try to recharge the buffer */
5db06880
NC
934#ifdef PERL_MAD
935 curoff = s - SvPVX(PL_linestr);
936#endif
937
9cbb5ea2 938 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 939 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 940 {
5db06880
NC
941#ifdef PERL_MAD
942 if (PL_madskills && curoff != startoff) {
cd81e915
NC
943 if (!PL_skipwhite)
944 PL_skipwhite = newSVpvn("",0);
945 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
946 curoff - startoff);
947 }
948
949 /* mustn't throw out old stuff yet if madpropping */
950 SvCUR(PL_linestr) = curoff;
951 s = SvPVX(PL_linestr) + curoff;
952 *s = 0;
953 if (curoff && s[-1] == '\n')
954 s[-1] = ' ';
955#endif
956
9cbb5ea2 957 /* end of file. Add on the -p or -n magic */
cd81e915 958 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 959 if (PL_minus_p) {
5db06880
NC
960#ifdef PERL_MAD
961 sv_catpv(PL_linestr,
962 ";}continue{print or die qq(-p destination: $!\\n);}");
963#else
01a19ab0
NC
964 sv_setpv(PL_linestr,
965 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 966#endif
3280af22 967 PL_minus_n = PL_minus_p = 0;
a0d0e21e 968 }
01a19ab0 969 else if (PL_minus_n) {
5db06880
NC
970#ifdef PERL_MAD
971 sv_catpvn(PL_linestr, ";}", 2);
972#else
01a19ab0 973 sv_setpvn(PL_linestr, ";}", 2);
5db06880 974#endif
01a19ab0
NC
975 PL_minus_n = 0;
976 }
a0d0e21e 977 else
5db06880
NC
978#ifdef PERL_MAD
979 sv_catpvn(PL_linestr,";", 1);
980#else
4147a61b 981 sv_setpvn(PL_linestr,";", 1);
5db06880 982#endif
ffb4593c
NT
983
984 /* reset variables for next time we lex */
9cbb5ea2 985 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
986 = SvPVX(PL_linestr)
987#ifdef PERL_MAD
988 + curoff
989#endif
990 ;
3280af22 991 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 992 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
993
994 /* Close the filehandle. Could be from -P preprocessor,
995 * STDIN, or a regular file. If we were reading code from
996 * STDIN (because the commandline held no -e or filename)
997 * then we don't close it, we reset it so the code can
998 * read from STDIN too.
999 */
1000
3280af22
NIS
1001 if (PL_preprocess && !PL_in_eval)
1002 (void)PerlProc_pclose(PL_rsfp);
1003 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1004 PerlIO_clearerr(PL_rsfp);
8990e307 1005 else
3280af22 1006 (void)PerlIO_close(PL_rsfp);
4608196e 1007 PL_rsfp = NULL;
463ee0b2
LW
1008 return s;
1009 }
ffb4593c
NT
1010
1011 /* not at end of file, so we only read another line */
09bef843
SB
1012 /* make corresponding updates to old pointers, for yyerror() */
1013 oldprevlen = PL_oldbufptr - PL_bufend;
1014 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1015 if (PL_last_uni)
1016 oldunilen = PL_last_uni - PL_bufend;
1017 if (PL_last_lop)
1018 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1019 PL_linestart = PL_bufptr = s + prevlen;
1020 PL_bufend = s + SvCUR(PL_linestr);
1021 s = PL_bufptr;
09bef843
SB
1022 PL_oldbufptr = s + oldprevlen;
1023 PL_oldoldbufptr = s + oldoldprevlen;
1024 if (PL_last_uni)
1025 PL_last_uni = s + oldunilen;
1026 if (PL_last_lop)
1027 PL_last_lop = s + oldloplen;
a0d0e21e 1028 incline(s);
ffb4593c
NT
1029
1030 /* debugger active and we're not compiling the debugger code,
1031 * so store the line into the debugger's array of lines
1032 */
3280af22 1033 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1034 SV * const sv = newSV(0);
8990e307
LW
1035
1036 sv_upgrade(sv, SVt_PVMG);
3280af22 1037 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1038 (void)SvIOK_on(sv);
45977657 1039 SvIV_set(sv, 0);
36c7798d 1040 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1041 }
463ee0b2 1042 }
5db06880
NC
1043
1044#ifdef PERL_MAD
1045 done:
1046 if (PL_madskills) {
cd81e915
NC
1047 if (!PL_skipwhite)
1048 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1049 curoff = s - SvPVX(PL_linestr);
1050 if (curoff - startoff)
cd81e915 1051 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1052 curoff - startoff);
1053 }
1054 return s;
1055#endif
a687059c 1056}
378cc40b 1057
ffb4593c
NT
1058/*
1059 * S_check_uni
1060 * Check the unary operators to ensure there's no ambiguity in how they're
1061 * used. An ambiguous piece of code would be:
1062 * rand + 5
1063 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1064 * the +5 is its argument.
1065 */
1066
76e3520e 1067STATIC void
cea2e8a9 1068S_check_uni(pTHX)
ba106d47 1069{
97aff369 1070 dVAR;
d4c19fe8
AL
1071 const char *s;
1072 const char *t;
2f3197b3 1073
3280af22 1074 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1075 return;
3280af22
NIS
1076 while (isSPACE(*PL_last_uni))
1077 PL_last_uni++;
d4c19fe8
AL
1078 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++)
1079 /**/;
3280af22 1080 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1081 return;
6136c704 1082
0453d815 1083 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1084 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1085 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1086 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1087 }
2f3197b3
LW
1088}
1089
ffb4593c
NT
1090/*
1091 * LOP : macro to build a list operator. Its behaviour has been replaced
1092 * with a subroutine, S_lop() for which LOP is just another name.
1093 */
1094
a0d0e21e
LW
1095#define LOP(f,x) return lop(f,x,s)
1096
ffb4593c
NT
1097/*
1098 * S_lop
1099 * Build a list operator (or something that might be one). The rules:
1100 * - if we have a next token, then it's a list operator [why?]
1101 * - if the next thing is an opening paren, then it's a function
1102 * - else it's a list operator
1103 */
1104
76e3520e 1105STATIC I32
a0be28da 1106S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1107{
97aff369 1108 dVAR;
79072805 1109 yylval.ival = f;
35c8bce7 1110 CLINE;
3280af22
NIS
1111 PL_expect = x;
1112 PL_bufptr = s;
1113 PL_last_lop = PL_oldbufptr;
eb160463 1114 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1115#ifdef PERL_MAD
1116 if (PL_lasttoke)
1117 return REPORT(LSTOP);
1118#else
3280af22 1119 if (PL_nexttoke)
bbf60fe6 1120 return REPORT(LSTOP);
5db06880 1121#endif
79072805 1122 if (*s == '(')
bbf60fe6 1123 return REPORT(FUNC);
29595ff2 1124 s = PEEKSPACE(s);
79072805 1125 if (*s == '(')
bbf60fe6 1126 return REPORT(FUNC);
79072805 1127 else
bbf60fe6 1128 return REPORT(LSTOP);
79072805
LW
1129}
1130
5db06880
NC
1131#ifdef PERL_MAD
1132 /*
1133 * S_start_force
1134 * Sets up for an eventual force_next(). start_force(0) basically does
1135 * an unshift, while start_force(-1) does a push. yylex removes items
1136 * on the "pop" end.
1137 */
1138
1139STATIC void
1140S_start_force(pTHX_ int where)
1141{
1142 int i;
1143
cd81e915 1144 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1145 where = PL_lasttoke;
cd81e915
NC
1146 assert(PL_curforce < 0 || PL_curforce == where);
1147 if (PL_curforce != where) {
5db06880
NC
1148 for (i = PL_lasttoke; i > where; --i) {
1149 PL_nexttoke[i] = PL_nexttoke[i-1];
1150 }
1151 PL_lasttoke++;
1152 }
cd81e915 1153 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1154 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1155 PL_curforce = where;
1156 if (PL_nextwhite) {
5db06880
NC
1157 if (PL_madskills)
1158 curmad('^', newSVpvn("",0));
cd81e915 1159 CURMAD('_', PL_nextwhite);
5db06880
NC
1160 }
1161}
1162
1163STATIC void
1164S_curmad(pTHX_ char slot, SV *sv)
1165{
1166 MADPROP **where;
1167
1168 if (!sv)
1169 return;
cd81e915
NC
1170 if (PL_curforce < 0)
1171 where = &PL_thismad;
5db06880 1172 else
cd81e915 1173 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1174
cd81e915 1175 if (PL_faketokens)
5db06880
NC
1176 sv_setpvn(sv, "", 0);
1177 else {
1178 if (!IN_BYTES) {
1179 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1180 SvUTF8_on(sv);
1181 else if (PL_encoding) {
1182 sv_recode_to_utf8(sv, PL_encoding);
1183 }
1184 }
1185 }
1186
1187 /* keep a slot open for the head of the list? */
1188 if (slot != '_' && *where && (*where)->mad_key == '^') {
1189 (*where)->mad_key = slot;
1190 sv_free((*where)->mad_val);
1191 (*where)->mad_val = (void*)sv;
1192 }
1193 else
1194 addmad(newMADsv(slot, sv), where, 0);
1195}
1196#else
d4c19fe8
AL
1197# define start_force(where) /*EMPTY*/
1198# define curmad(slot, sv) /*EMPTY*/
5db06880
NC
1199#endif
1200
ffb4593c
NT
1201/*
1202 * S_force_next
9cbb5ea2 1203 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1204 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1205 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1206 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1207 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1208 */
1209
4e553d73 1210STATIC void
cea2e8a9 1211S_force_next(pTHX_ I32 type)
79072805 1212{
97aff369 1213 dVAR;
5db06880 1214#ifdef PERL_MAD
cd81e915 1215 if (PL_curforce < 0)
5db06880 1216 start_force(PL_lasttoke);
cd81e915 1217 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1218 if (PL_lex_state != LEX_KNOWNEXT)
1219 PL_lex_defer = PL_lex_state;
1220 PL_lex_state = LEX_KNOWNEXT;
1221 PL_lex_expect = PL_expect;
cd81e915 1222 PL_curforce = -1;
5db06880 1223#else
3280af22
NIS
1224 PL_nexttype[PL_nexttoke] = type;
1225 PL_nexttoke++;
1226 if (PL_lex_state != LEX_KNOWNEXT) {
1227 PL_lex_defer = PL_lex_state;
1228 PL_lex_expect = PL_expect;
1229 PL_lex_state = LEX_KNOWNEXT;
79072805 1230 }
5db06880 1231#endif
79072805
LW
1232}
1233
d0a148a6
NC
1234STATIC SV *
1235S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1236{
97aff369 1237 dVAR;
9d4ba2ae 1238 SV * const sv = newSVpvn(start,len);
bfed75c6 1239 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1240 SvUTF8_on(sv);
1241 return sv;
1242}
1243
ffb4593c
NT
1244/*
1245 * S_force_word
1246 * When the lexer knows the next thing is a word (for instance, it has
1247 * just seen -> and it knows that the next char is a word char, then
1248 * it calls S_force_word to stick the next word into the PL_next lookahead.
1249 *
1250 * Arguments:
b1b65b59 1251 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1252 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1253 * int check_keyword : if true, Perl checks to make sure the word isn't
1254 * a keyword (do this if the word is a label, e.g. goto FOO)
1255 * int allow_pack : if true, : characters will also be allowed (require,
1256 * use, etc. do this)
9cbb5ea2 1257 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1258 */
1259
76e3520e 1260STATIC char *
cea2e8a9 1261S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1262{
97aff369 1263 dVAR;
463ee0b2
LW
1264 register char *s;
1265 STRLEN len;
4e553d73 1266
29595ff2 1267 start = SKIPSPACE1(start);
463ee0b2 1268 s = start;
7e2040f0 1269 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1270 (allow_pack && *s == ':') ||
15f0808c 1271 (allow_initial_tick && *s == '\'') )
a0d0e21e 1272 {
3280af22
NIS
1273 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1274 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1275 return start;
cd81e915 1276 start_force(PL_curforce);
5db06880
NC
1277 if (PL_madskills)
1278 curmad('X', newSVpvn(start,s-start));
463ee0b2 1279 if (token == METHOD) {
29595ff2 1280 s = SKIPSPACE1(s);
463ee0b2 1281 if (*s == '(')
3280af22 1282 PL_expect = XTERM;
463ee0b2 1283 else {
3280af22 1284 PL_expect = XOPERATOR;
463ee0b2 1285 }
79072805 1286 }
9ded7720 1287 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1288 = (OP*)newSVOP(OP_CONST,0,
1289 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1290 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1291 force_next(token);
1292 }
1293 return s;
1294}
1295
ffb4593c
NT
1296/*
1297 * S_force_ident
9cbb5ea2 1298 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1299 * text only contains the "foo" portion. The first argument is a pointer
1300 * to the "foo", and the second argument is the type symbol to prefix.
1301 * Forces the next token to be a "WORD".
9cbb5ea2 1302 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1303 */
1304
76e3520e 1305STATIC void
bfed75c6 1306S_force_ident(pTHX_ register const char *s, int kind)
79072805 1307{
97aff369 1308 dVAR;
79072805 1309 if (s && *s) {
90e5519e
NC
1310 const STRLEN len = strlen(s);
1311 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1312 start_force(PL_curforce);
9ded7720 1313 NEXTVAL_NEXTTOKE.opval = o;
79072805 1314 force_next(WORD);
748a9306 1315 if (kind) {
11343788 1316 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1317 /* XXX see note in pp_entereval() for why we forgo typo
1318 warnings if the symbol must be introduced in an eval.
1319 GSAR 96-10-12 */
90e5519e
NC
1320 gv_fetchpvn_flags(s, len,
1321 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1322 : GV_ADD,
1323 kind == '$' ? SVt_PV :
1324 kind == '@' ? SVt_PVAV :
1325 kind == '%' ? SVt_PVHV :
a0d0e21e 1326 SVt_PVGV
90e5519e 1327 );
748a9306 1328 }
79072805
LW
1329 }
1330}
1331
1571675a
GS
1332NV
1333Perl_str_to_version(pTHX_ SV *sv)
1334{
1335 NV retval = 0.0;
1336 NV nshift = 1.0;
1337 STRLEN len;
cfd0369c 1338 const char *start = SvPV_const(sv,len);
9d4ba2ae 1339 const char * const end = start + len;
504618e9 1340 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1341 while (start < end) {
ba210ebe 1342 STRLEN skip;
1571675a
GS
1343 UV n;
1344 if (utf)
9041c2e3 1345 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1346 else {
1347 n = *(U8*)start;
1348 skip = 1;
1349 }
1350 retval += ((NV)n)/nshift;
1351 start += skip;
1352 nshift *= 1000;
1353 }
1354 return retval;
1355}
1356
4e553d73 1357/*
ffb4593c
NT
1358 * S_force_version
1359 * Forces the next token to be a version number.
e759cc13
RGS
1360 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1361 * and if "guessing" is TRUE, then no new token is created (and the caller
1362 * must use an alternative parsing method).
ffb4593c
NT
1363 */
1364
76e3520e 1365STATIC char *
e759cc13 1366S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1367{
97aff369 1368 dVAR;
5f66b61c 1369 OP *version = NULL;
44dcb63b 1370 char *d;
5db06880
NC
1371#ifdef PERL_MAD
1372 I32 startoff = s - SvPVX(PL_linestr);
1373#endif
89bfa8cd 1374
29595ff2 1375 s = SKIPSPACE1(s);
89bfa8cd 1376
44dcb63b 1377 d = s;
dd629d5b 1378 if (*d == 'v')
44dcb63b 1379 d++;
44dcb63b 1380 if (isDIGIT(*d)) {
e759cc13
RGS
1381 while (isDIGIT(*d) || *d == '_' || *d == '.')
1382 d++;
5db06880
NC
1383#ifdef PERL_MAD
1384 if (PL_madskills) {
cd81e915 1385 start_force(PL_curforce);
5db06880
NC
1386 curmad('X', newSVpvn(s,d-s));
1387 }
1388#endif
9f3d182e 1389 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1390 SV *ver;
b73d6f50 1391 s = scan_num(s, &yylval);
89bfa8cd 1392 version = yylval.opval;
dd629d5b
GS
1393 ver = cSVOPx(version)->op_sv;
1394 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1395 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1396 SvNV_set(ver, str_to_version(ver));
1571675a 1397 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1398 }
89bfa8cd 1399 }
5db06880
NC
1400 else if (guessing) {
1401#ifdef PERL_MAD
1402 if (PL_madskills) {
cd81e915
NC
1403 sv_free(PL_nextwhite); /* let next token collect whitespace */
1404 PL_nextwhite = 0;
5db06880
NC
1405 s = SvPVX(PL_linestr) + startoff;
1406 }
1407#endif
e759cc13 1408 return s;
5db06880 1409 }
89bfa8cd
PP
1410 }
1411
5db06880
NC
1412#ifdef PERL_MAD
1413 if (PL_madskills && !version) {
cd81e915
NC
1414 sv_free(PL_nextwhite); /* let next token collect whitespace */
1415 PL_nextwhite = 0;
5db06880
NC
1416 s = SvPVX(PL_linestr) + startoff;
1417 }
1418#endif
89bfa8cd 1419 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1420 start_force(PL_curforce);
9ded7720 1421 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1422 force_next(WORD);
89bfa8cd 1423
e759cc13 1424 return s;
89bfa8cd
PP
1425}
1426
ffb4593c
NT
1427/*
1428 * S_tokeq
1429 * Tokenize a quoted string passed in as an SV. It finds the next
1430 * chunk, up to end of string or a backslash. It may make a new
1431 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1432 * turns \\ into \.
1433 */
1434
76e3520e 1435STATIC SV *
cea2e8a9 1436S_tokeq(pTHX_ SV *sv)
79072805 1437{
97aff369 1438 dVAR;
79072805
LW
1439 register char *s;
1440 register char *send;
1441 register char *d;
b3ac6de7
IZ
1442 STRLEN len = 0;
1443 SV *pv = sv;
79072805
LW
1444
1445 if (!SvLEN(sv))
b3ac6de7 1446 goto finish;
79072805 1447
a0d0e21e 1448 s = SvPV_force(sv, len);
21a311ee 1449 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1450 goto finish;
463ee0b2 1451 send = s + len;
79072805
LW
1452 while (s < send && *s != '\\')
1453 s++;
1454 if (s == send)
b3ac6de7 1455 goto finish;
79072805 1456 d = s;
be4731d2 1457 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1458 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1459 if (SvUTF8(sv))
1460 SvUTF8_on(pv);
1461 }
79072805
LW
1462 while (s < send) {
1463 if (*s == '\\') {
a0d0e21e 1464 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1465 s++; /* all that, just for this */
1466 }
1467 *d++ = *s++;
1468 }
1469 *d = '\0';
95a20fc0 1470 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1471 finish:
3280af22 1472 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1473 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1474 return sv;
1475}
1476
ffb4593c
NT
1477/*
1478 * Now come three functions related to double-quote context,
1479 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1480 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1481 * interact with PL_lex_state, and create fake ( ... ) argument lists
1482 * to handle functions and concatenation.
1483 * They assume that whoever calls them will be setting up a fake
1484 * join call, because each subthing puts a ',' after it. This lets
1485 * "lower \luPpEr"
1486 * become
1487 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1488 *
1489 * (I'm not sure whether the spurious commas at the end of lcfirst's
1490 * arguments and join's arguments are created or not).
1491 */
1492
1493/*
1494 * S_sublex_start
1495 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1496 *
1497 * Pattern matching will set PL_lex_op to the pattern-matching op to
1498 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1499 *
1500 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1501 *
1502 * Everything else becomes a FUNC.
1503 *
1504 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1505 * had an OP_CONST or OP_READLINE). This just sets us up for a
1506 * call to S_sublex_push().
1507 */
1508
76e3520e 1509STATIC I32
cea2e8a9 1510S_sublex_start(pTHX)
79072805 1511{
97aff369 1512 dVAR;
0d46e09a 1513 register const I32 op_type = yylval.ival;
79072805
LW
1514
1515 if (op_type == OP_NULL) {
3280af22 1516 yylval.opval = PL_lex_op;
5f66b61c 1517 PL_lex_op = NULL;
79072805
LW
1518 return THING;
1519 }
1520 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1521 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1522
1523 if (SvTYPE(sv) == SVt_PVIV) {
1524 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1525 STRLEN len;
96a5add6 1526 const char * const p = SvPV_const(sv, len);
f54cb97a 1527 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1528 if (SvUTF8(sv))
1529 SvUTF8_on(nsv);
b3ac6de7
IZ
1530 SvREFCNT_dec(sv);
1531 sv = nsv;
4e553d73 1532 }
b3ac6de7 1533 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1534 PL_lex_stuff = NULL;
6f33ba73
RGS
1535 /* Allow <FH> // "foo" */
1536 if (op_type == OP_READLINE)
1537 PL_expect = XTERMORDORDOR;
79072805
LW
1538 return THING;
1539 }
1540
3280af22
NIS
1541 PL_sublex_info.super_state = PL_lex_state;
1542 PL_sublex_info.sub_inwhat = op_type;
1543 PL_sublex_info.sub_op = PL_lex_op;
1544 PL_lex_state = LEX_INTERPPUSH;
55497cff 1545
3280af22
NIS
1546 PL_expect = XTERM;
1547 if (PL_lex_op) {
1548 yylval.opval = PL_lex_op;
5f66b61c 1549 PL_lex_op = NULL;
55497cff
PP
1550 return PMFUNC;
1551 }
1552 else
1553 return FUNC;
1554}
1555
ffb4593c
NT
1556/*
1557 * S_sublex_push
1558 * Create a new scope to save the lexing state. The scope will be
1559 * ended in S_sublex_done. Returns a '(', starting the function arguments
1560 * to the uc, lc, etc. found before.
1561 * Sets PL_lex_state to LEX_INTERPCONCAT.
1562 */
1563
76e3520e 1564STATIC I32
cea2e8a9 1565S_sublex_push(pTHX)
55497cff 1566{
27da23d5 1567 dVAR;
f46d017c 1568 ENTER;
55497cff 1569
3280af22
NIS
1570 PL_lex_state = PL_sublex_info.super_state;
1571 SAVEI32(PL_lex_dojoin);
1572 SAVEI32(PL_lex_brackets);
3280af22
NIS
1573 SAVEI32(PL_lex_casemods);
1574 SAVEI32(PL_lex_starts);
1575 SAVEI32(PL_lex_state);
7766f137 1576 SAVEVPTR(PL_lex_inpat);
3280af22 1577 SAVEI32(PL_lex_inwhat);
57843af0 1578 SAVECOPLINE(PL_curcop);
3280af22 1579 SAVEPPTR(PL_bufptr);
8452ff4b 1580 SAVEPPTR(PL_bufend);
3280af22
NIS
1581 SAVEPPTR(PL_oldbufptr);
1582 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1583 SAVEPPTR(PL_last_lop);
1584 SAVEPPTR(PL_last_uni);
3280af22
NIS
1585 SAVEPPTR(PL_linestart);
1586 SAVESPTR(PL_linestr);
8edd5f42
RGS
1587 SAVEGENERICPV(PL_lex_brackstack);
1588 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1589
1590 PL_linestr = PL_lex_stuff;
a0714e2c 1591 PL_lex_stuff = NULL;
3280af22 1592
9cbb5ea2
GS
1593 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1594 = SvPVX(PL_linestr);
3280af22 1595 PL_bufend += SvCUR(PL_linestr);
bd61b366 1596 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1597 SAVEFREESV(PL_linestr);
1598
1599 PL_lex_dojoin = FALSE;
1600 PL_lex_brackets = 0;
a02a5408
JC
1601 Newx(PL_lex_brackstack, 120, char);
1602 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1603 PL_lex_casemods = 0;
1604 *PL_lex_casestack = '\0';
1605 PL_lex_starts = 0;
1606 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1607 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1608
1609 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1610 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1611 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1612 else
5f66b61c 1613 PL_lex_inpat = NULL;
79072805 1614
55497cff 1615 return '(';
79072805
LW
1616}
1617
ffb4593c
NT
1618/*
1619 * S_sublex_done
1620 * Restores lexer state after a S_sublex_push.
1621 */
1622
76e3520e 1623STATIC I32
cea2e8a9 1624S_sublex_done(pTHX)
79072805 1625{
27da23d5 1626 dVAR;
3280af22 1627 if (!PL_lex_starts++) {
396482e1 1628 SV * const sv = newSVpvs("");
9aa983d2
JH
1629 if (SvUTF8(PL_linestr))
1630 SvUTF8_on(sv);
3280af22 1631 PL_expect = XOPERATOR;
9aa983d2 1632 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1633 return THING;
1634 }
1635
3280af22
NIS
1636 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1637 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1638 return yylex();
79072805
LW
1639 }
1640
ffb4593c 1641 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1642 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1643 PL_linestr = PL_lex_repl;
1644 PL_lex_inpat = 0;
1645 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1646 PL_bufend += SvCUR(PL_linestr);
bd61b366 1647 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1648 SAVEFREESV(PL_linestr);
1649 PL_lex_dojoin = FALSE;
1650 PL_lex_brackets = 0;
3280af22
NIS
1651 PL_lex_casemods = 0;
1652 *PL_lex_casestack = '\0';
1653 PL_lex_starts = 0;
25da4f38 1654 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1655 PL_lex_state = LEX_INTERPNORMAL;
1656 PL_lex_starts++;
e9fa98b2
HS
1657 /* we don't clear PL_lex_repl here, so that we can check later
1658 whether this is an evalled subst; that means we rely on the
1659 logic to ensure sublex_done() is called again only via the
1660 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1661 }
e9fa98b2 1662 else {
3280af22 1663 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1664 PL_lex_repl = NULL;
e9fa98b2 1665 }
79072805 1666 return ',';
ffed7fef
LW
1667 }
1668 else {
5db06880
NC
1669#ifdef PERL_MAD
1670 if (PL_madskills) {
cd81e915
NC
1671 if (PL_thiswhite) {
1672 if (!PL_endwhite)
1673 PL_endwhite = newSVpvn("",0);
1674 sv_catsv(PL_endwhite, PL_thiswhite);
1675 PL_thiswhite = 0;
1676 }
1677 if (PL_thistoken)
1678 sv_setpvn(PL_thistoken,"",0);
5db06880 1679 else
cd81e915 1680 PL_realtokenstart = -1;
5db06880
NC
1681 }
1682#endif
f46d017c 1683 LEAVE;
3280af22
NIS
1684 PL_bufend = SvPVX(PL_linestr);
1685 PL_bufend += SvCUR(PL_linestr);
1686 PL_expect = XOPERATOR;
09bef843 1687 PL_sublex_info.sub_inwhat = 0;
79072805 1688 return ')';
ffed7fef
LW
1689 }
1690}
1691
02aa26ce
NT
1692/*
1693 scan_const
1694
1695 Extracts a pattern, double-quoted string, or transliteration. This
1696 is terrifying code.
1697
3280af22
NIS
1698 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1699 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1700 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1701
9b599b2a
GS
1702 Returns a pointer to the character scanned up to. Iff this is
1703 advanced from the start pointer supplied (ie if anything was
1704 successfully parsed), will leave an OP for the substring scanned
1705 in yylval. Caller must intuit reason for not parsing further
1706 by looking at the next characters herself.
1707
02aa26ce
NT
1708 In patterns:
1709 backslashes:
1710 double-quoted style: \r and \n
1711 regexp special ones: \D \s
1712 constants: \x3
1713 backrefs: \1 (deprecated in substitution replacements)
1714 case and quoting: \U \Q \E
1715 stops on @ and $, but not for $ as tail anchor
1716
1717 In transliterations:
1718 characters are VERY literal, except for - not at the start or end
1719 of the string, which indicates a range. scan_const expands the
1720 range to the full set of intermediate characters.
1721
1722 In double-quoted strings:
1723 backslashes:
1724 double-quoted style: \r and \n
1725 constants: \x3
1726 backrefs: \1 (deprecated)
1727 case and quoting: \U \Q \E
1728 stops on @ and $
1729
1730 scan_const does *not* construct ops to handle interpolated strings.
1731 It stops processing as soon as it finds an embedded $ or @ variable
1732 and leaves it to the caller to work out what's going on.
1733
da6eedaa 1734 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1735
1736 $ in pattern could be $foo or could be tail anchor. Assumption:
1737 it's a tail anchor if $ is the last thing in the string, or if it's
1738 followed by one of ")| \n\t"
1739
1740 \1 (backreferences) are turned into $1
1741
1742 The structure of the code is
1743 while (there's a character to process) {
1744 handle transliteration ranges
1745 skip regexp comments
1746 skip # initiated comments in //x patterns
1747 check for embedded @foo
1748 check for embedded scalars
1749 if (backslash) {
1750 leave intact backslashes from leave (below)
1751 deprecate \1 in strings and sub replacements
1752 handle string-changing backslashes \l \U \Q \E, etc.
1753 switch (what was escaped) {
1754 handle - in a transliteration (becomes a literal -)
1755 handle \132 octal characters
1756 handle 0x15 hex characters
1757 handle \cV (control V)
1758 handle printf backslashes (\f, \r, \n, etc)
1759 } (end switch)
1760 } (end if backslash)
1761 } (end while character to read)
4e553d73 1762
02aa26ce
NT
1763*/
1764
76e3520e 1765STATIC char *
cea2e8a9 1766S_scan_const(pTHX_ char *start)
79072805 1767{
97aff369 1768 dVAR;
3280af22 1769 register char *send = PL_bufend; /* end of the constant */
561b68a9 1770 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1771 register char *s = start; /* start of the constant */
1772 register char *d = SvPVX(sv); /* destination for copies */
1773 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1774 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1775 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1776 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1777 UV uv;
4c3a8340
ST
1778#ifdef EBCDIC
1779 UV literal_endpoint = 0;
1780#endif
012bcf8d 1781
d4c19fe8 1782 const char * const leaveit = /* set of acceptably-backslashed characters */
3280af22 1783 PL_lex_inpat
b6d5fef8 1784 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1785 : "";
79072805 1786
2b9d42f0
NIS
1787 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1788 /* If we are doing a trans and we know we want UTF8 set expectation */
1789 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1790 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1791 }
1792
1793
79072805 1794 while (s < send || dorange) {
02aa26ce 1795 /* get transliterations out of the way (they're most literal) */
3280af22 1796 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1797 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1798 if (dorange) {
1ba5c669
JH
1799 I32 i; /* current expanded character */
1800 I32 min; /* first character in range */
1801 I32 max; /* last character in range */
02aa26ce 1802
2b9d42f0 1803 if (has_utf8) {
9d4ba2ae 1804 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1805 char *e = d++;
1806 while (e-- > c)
1807 *(e + 1) = *e;
25716404 1808 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1809 /* mark the range as done, and continue */
1810 dorange = FALSE;
1811 didrange = TRUE;
1812 continue;
1813 }
2b9d42f0 1814
95a20fc0 1815 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1816 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1817 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1818 d -= 2; /* eat the first char and the - */
1819
8ada0baa
JH
1820 min = (U8)*d; /* first char in range */
1821 max = (U8)d[1]; /* last char in range */
1822
c2e66d9e 1823 if (min > max) {
01ec43d0 1824 Perl_croak(aTHX_
d1573ac7 1825 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1826 (char)min, (char)max);
c2e66d9e
GS
1827 }
1828
c7f1f016 1829#ifdef EBCDIC
4c3a8340
ST
1830 if (literal_endpoint == 2 &&
1831 ((isLOWER(min) && isLOWER(max)) ||
1832 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1833 if (isLOWER(min)) {
1834 for (i = min; i <= max; i++)
1835 if (isLOWER(i))
db42d148 1836 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1837 } else {
1838 for (i = min; i <= max; i++)
1839 if (isUPPER(i))
db42d148 1840 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1841 }
1842 }
1843 else
1844#endif
1845 for (i = min; i <= max; i++)
eb160463 1846 *d++ = (char)i;
02aa26ce
NT
1847
1848 /* mark the range as done, and continue */
79072805 1849 dorange = FALSE;
01ec43d0 1850 didrange = TRUE;
4c3a8340
ST
1851#ifdef EBCDIC
1852 literal_endpoint = 0;
1853#endif
79072805 1854 continue;
4e553d73 1855 }
02aa26ce
NT
1856
1857 /* range begins (ignore - as first or last char) */
79072805 1858 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1859 if (didrange) {
1fafa243 1860 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1861 }
2b9d42f0 1862 if (has_utf8) {
25716404 1863 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1864 s++;
1865 continue;
1866 }
79072805
LW
1867 dorange = TRUE;
1868 s++;
01ec43d0
GS
1869 }
1870 else {
1871 didrange = FALSE;
4c3a8340
ST
1872#ifdef EBCDIC
1873 literal_endpoint = 0;
1874#endif
01ec43d0 1875 }
79072805 1876 }
02aa26ce
NT
1877
1878 /* if we get here, we're not doing a transliteration */
1879
0f5d15d6
IZ
1880 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1881 except for the last char, which will be done separately. */
3280af22 1882 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1883 if (s[2] == '#') {
e994fd66 1884 while (s+1 < send && *s != ')')
db42d148 1885 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1886 }
1887 else if (s[2] == '{' /* This should match regcomp.c */
1888 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1889 {
cc6b7395 1890 I32 count = 1;
0f5d15d6 1891 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1892 char c;
1893
d9f97599
GS
1894 while (count && (c = *regparse)) {
1895 if (c == '\\' && regparse[1])
1896 regparse++;
4e553d73 1897 else if (c == '{')
cc6b7395 1898 count++;
4e553d73 1899 else if (c == '}')
cc6b7395 1900 count--;
d9f97599 1901 regparse++;
cc6b7395 1902 }
e994fd66 1903 if (*regparse != ')')
5bdf89e7 1904 regparse--; /* Leave one char for continuation. */
0f5d15d6 1905 while (s < regparse)
db42d148 1906 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1907 }
748a9306 1908 }
02aa26ce
NT
1909
1910 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1911 else if (*s == '#' && PL_lex_inpat &&
1912 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1913 while (s+1 < send && *s != '\n')
db42d148 1914 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1915 }
02aa26ce 1916
5d1d4326 1917 /* check for embedded arrays
da6eedaa 1918 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1919 */
7e2040f0 1920 else if (*s == '@' && s[1]
5d1d4326 1921 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1922 break;
02aa26ce
NT
1923
1924 /* check for embedded scalars. only stop if we're sure it's a
1925 variable.
1926 */
79072805 1927 else if (*s == '$') {
3280af22 1928 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1929 break;
6002328a 1930 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1931 break; /* in regexp, $ might be tail anchor */
1932 }
02aa26ce 1933
2b9d42f0
NIS
1934 /* End of else if chain - OP_TRANS rejoin rest */
1935
02aa26ce 1936 /* backslashes */
79072805
LW
1937 if (*s == '\\' && s+1 < send) {
1938 s++;
02aa26ce
NT
1939
1940 /* some backslashes we leave behind */
c9f97d15 1941 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1942 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1943 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1944 continue;
1945 }
02aa26ce
NT
1946
1947 /* deprecate \1 in strings and substitution replacements */
3280af22 1948 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1949 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1950 {
599cee73 1951 if (ckWARN(WARN_SYNTAX))
9014280d 1952 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1953 *--s = '$';
1954 break;
1955 }
02aa26ce
NT
1956
1957 /* string-change backslash escapes */
3280af22 1958 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1959 --s;
1960 break;
1961 }
02aa26ce
NT
1962
1963 /* if we get here, it's either a quoted -, or a digit */
79072805 1964 switch (*s) {
02aa26ce
NT
1965
1966 /* quoted - in transliterations */
79072805 1967 case '-':
3280af22 1968 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1969 *d++ = *s++;
1970 continue;
1971 }
1972 /* FALL THROUGH */
1973 default:
11b8faa4 1974 {
041457d9
DM
1975 if (isALNUM(*s) &&
1976 *s != '_' &&
1977 ckWARN(WARN_MISC))
9014280d 1978 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1979 "Unrecognized escape \\%c passed through",
1980 *s);
1981 /* default action is to copy the quoted character */
f9a63242 1982 goto default_action;
11b8faa4 1983 }
02aa26ce
NT
1984
1985 /* \132 indicates an octal constant */
79072805
LW
1986 case '0': case '1': case '2': case '3':
1987 case '4': case '5': case '6': case '7':
ba210ebe 1988 {
53305cf1
NC
1989 I32 flags = 0;
1990 STRLEN len = 3;
1991 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1992 s += len;
1993 }
012bcf8d 1994 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1995
1996 /* \x24 indicates a hex constant */
79072805 1997 case 'x':
a0ed51b3
LW
1998 ++s;
1999 if (*s == '{') {
9d4ba2ae 2000 char* const e = strchr(s, '}');
a4c04bdc
NC
2001 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2002 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2003 STRLEN len;
355860ce 2004
53305cf1 2005 ++s;
adaeee49 2006 if (!e) {
a0ed51b3 2007 yyerror("Missing right brace on \\x{}");
355860ce 2008 continue;
ba210ebe 2009 }
53305cf1
NC
2010 len = e - s;
2011 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2012 s = e + 1;
a0ed51b3
LW
2013 }
2014 else {
ba210ebe 2015 {
53305cf1 2016 STRLEN len = 2;
a4c04bdc 2017 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2018 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2019 s += len;
2020 }
012bcf8d
GS
2021 }
2022
2023 NUM_ESCAPE_INSERT:
2024 /* Insert oct or hex escaped character.
301d3d20 2025 * There will always enough room in sv since such
db42d148 2026 * escapes will be longer than any UTF-8 sequence
301d3d20 2027 * they can end up as. */
ba7cea30 2028
c7f1f016
NIS
2029 /* We need to map to chars to ASCII before doing the tests
2030 to cover EBCDIC
2031 */
c4d5f83a 2032 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2033 if (!has_utf8 && uv > 255) {
301d3d20
JH
2034 /* Might need to recode whatever we have
2035 * accumulated so far if it contains any
2036 * hibit chars.
2037 *
2038 * (Can't we keep track of that and avoid
2039 * this rescan? --jhi)
012bcf8d 2040 */
c7f1f016 2041 int hicount = 0;
63cd0674
NIS
2042 U8 *c;
2043 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2044 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2045 hicount++;
db42d148 2046 }
012bcf8d 2047 }
63cd0674 2048 if (hicount) {
9d4ba2ae 2049 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2050 U8 *src, *dst;
2051 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2052 src = (U8 *)d - 1;
2053 dst = src+hicount;
2054 d += hicount;
cfd0369c 2055 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2056 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2057 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2058 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2059 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2060 }
2061 else {
63cd0674 2062 *dst-- = *src;
012bcf8d 2063 }
c7f1f016 2064 src--;
012bcf8d
GS
2065 }
2066 }
2067 }
2068
9aa983d2 2069 if (has_utf8 || uv > 255) {
9041c2e3 2070 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2071 has_utf8 = TRUE;
f9a63242
JH
2072 if (PL_lex_inwhat == OP_TRANS &&
2073 PL_sublex_info.sub_op) {
2074 PL_sublex_info.sub_op->op_private |=
2075 (PL_lex_repl ? OPpTRANS_FROM_UTF
2076 : OPpTRANS_TO_UTF);
f9a63242 2077 }
012bcf8d 2078 }
a0ed51b3 2079 else {
012bcf8d 2080 *d++ = (char)uv;
a0ed51b3 2081 }
012bcf8d
GS
2082 }
2083 else {
c4d5f83a 2084 *d++ = (char) uv;
a0ed51b3 2085 }
79072805 2086 continue;
02aa26ce 2087
b239daa5 2088 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2089 case 'N':
55eda711 2090 ++s;
423cee85
JH
2091 if (*s == '{') {
2092 char* e = strchr(s, '}');
155aba94 2093 SV *res;
423cee85 2094 STRLEN len;
cfd0369c 2095 const char *str;
4e553d73 2096
423cee85 2097 if (!e) {
5777a3f7 2098 yyerror("Missing right brace on \\N{}");
423cee85
JH
2099 e = s - 1;
2100 goto cont_scan;
2101 }
dbc0d4f2
JH
2102 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2103 /* \N{U+...} */
2104 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2105 PERL_SCAN_DISALLOW_PREFIX;
2106 s += 3;
2107 len = e - s;
2108 uv = grok_hex(s, &len, &flags, NULL);
2109 s = e + 1;
2110 goto NUM_ESCAPE_INSERT;
2111 }
55eda711 2112 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2113 res = new_constant( NULL, 0, "charnames",
a0714e2c 2114 res, NULL, "\\N{...}" );
f9a63242
JH
2115 if (has_utf8)
2116 sv_utf8_upgrade(res);
cfd0369c 2117 str = SvPV_const(res,len);
1c47067b
JH
2118#ifdef EBCDIC_NEVER_MIND
2119 /* charnames uses pack U and that has been
2120 * recently changed to do the below uni->native
2121 * mapping, so this would be redundant (and wrong,
2122 * the code point would be doubly converted).
2123 * But leave this in just in case the pack U change
2124 * gets revoked, but the semantics is still
2125 * desireable for charnames. --jhi */
cddc7ef4 2126 {
cfd0369c 2127 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2128
2129 if (uv < 0x100) {
89ebb4a3 2130 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2131
2132 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2133 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2134 str = SvPV_const(res, len);
cddc7ef4
JH
2135 }
2136 }
2137#endif
89491803 2138 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2139 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2140 SvCUR_set(sv, d - ostart);
2141 SvPOK_on(sv);
e4f3eed8 2142 *d = '\0';
f08d6ad9 2143 sv_utf8_upgrade(sv);
d2f449dd 2144 /* this just broke our allocation above... */
eb160463 2145 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2146 d = SvPVX(sv) + SvCUR(sv);
89491803 2147 has_utf8 = TRUE;
f08d6ad9 2148 }
eb160463 2149 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2150 const char * const odest = SvPVX_const(sv);
423cee85 2151
8973db79 2152 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2153 d = SvPVX(sv) + (d - odest);
2154 }
2155 Copy(str, d, len, char);
2156 d += len;
2157 SvREFCNT_dec(res);
2158 cont_scan:
2159 s = e + 1;
2160 }
2161 else
5777a3f7 2162 yyerror("Missing braces on \\N{}");
423cee85
JH
2163 continue;
2164
02aa26ce 2165 /* \c is a control character */
79072805
LW
2166 case 'c':
2167 s++;
961ce445 2168 if (s < send) {
ba210ebe 2169 U8 c = *s++;
c7f1f016
NIS
2170#ifdef EBCDIC
2171 if (isLOWER(c))
2172 c = toUPPER(c);
2173#endif
db42d148 2174 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2175 }
961ce445
RGS
2176 else {
2177 yyerror("Missing control char name in \\c");
2178 }
79072805 2179 continue;
02aa26ce
NT
2180
2181 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2182 case 'b':
db42d148 2183 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2184 break;
2185 case 'n':
db42d148 2186 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2187 break;
2188 case 'r':
db42d148 2189 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2190 break;
2191 case 'f':
db42d148 2192 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2193 break;
2194 case 't':
db42d148 2195 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2196 break;
34a3fe2a 2197 case 'e':
db42d148 2198 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2199 break;
2200 case 'a':
db42d148 2201 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2202 break;
02aa26ce
NT
2203 } /* end switch */
2204
79072805
LW
2205 s++;
2206 continue;
02aa26ce 2207 } /* end if (backslash) */
4c3a8340
ST
2208#ifdef EBCDIC
2209 else
2210 literal_endpoint++;
2211#endif
02aa26ce 2212
f9a63242 2213 default_action:
2b9d42f0
NIS
2214 /* If we started with encoded form, or already know we want it
2215 and then encode the next character */
2216 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2217 STRLEN len = 1;
5f66b61c
AL
2218 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2219 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2220 s += len;
2221 if (need > len) {
2222 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2223 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2224 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2225 }
5f66b61c 2226 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2227 has_utf8 = TRUE;
2228 }
2229 else {
2230 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2231 }
02aa26ce
NT
2232 } /* while loop to process each character */
2233
2234 /* terminate the string and set up the sv */
79072805 2235 *d = '\0';
95a20fc0 2236 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2237 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2238 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2239
79072805 2240 SvPOK_on(sv);
9f4817db 2241 if (PL_encoding && !has_utf8) {
d0063567
DK
2242 sv_recode_to_utf8(sv, PL_encoding);
2243 if (SvUTF8(sv))
2244 has_utf8 = TRUE;
9f4817db 2245 }
2b9d42f0 2246 if (has_utf8) {
7e2040f0 2247 SvUTF8_on(sv);
2b9d42f0 2248 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2249 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2250 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2251 }
2252 }
79072805 2253
02aa26ce 2254 /* shrink the sv if we allocated more than we used */
79072805 2255 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2256 SvPV_shrink_to_cur(sv);
79072805 2257 }
02aa26ce 2258
9b599b2a 2259 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2260 if (s > PL_bufptr) {
2261 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2262 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2263 sv, NULL,
4e553d73 2264 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2265 ? "tr"
3280af22 2266 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2267 ? "s"
2268 : "qq")));
79072805 2269 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2270 } else
8990e307 2271 SvREFCNT_dec(sv);
79072805
LW
2272 return s;
2273}
2274
ffb4593c
NT
2275/* S_intuit_more
2276 * Returns TRUE if there's more to the expression (e.g., a subscript),
2277 * FALSE otherwise.
ffb4593c
NT
2278 *
2279 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2280 *
2281 * ->[ and ->{ return TRUE
2282 * { and [ outside a pattern are always subscripts, so return TRUE
2283 * if we're outside a pattern and it's not { or [, then return FALSE
2284 * if we're in a pattern and the first char is a {
2285 * {4,5} (any digits around the comma) returns FALSE
2286 * if we're in a pattern and the first char is a [
2287 * [] returns FALSE
2288 * [SOMETHING] has a funky algorithm to decide whether it's a
2289 * character class or not. It has to deal with things like
2290 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2291 * anything else returns TRUE
2292 */
2293
9cbb5ea2
GS
2294/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2295
76e3520e 2296STATIC int
cea2e8a9 2297S_intuit_more(pTHX_ register char *s)
79072805 2298{
97aff369 2299 dVAR;
3280af22 2300 if (PL_lex_brackets)
79072805
LW
2301 return TRUE;
2302 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2303 return TRUE;
2304 if (*s != '{' && *s != '[')
2305 return FALSE;
3280af22 2306 if (!PL_lex_inpat)
79072805
LW
2307 return TRUE;
2308
2309 /* In a pattern, so maybe we have {n,m}. */
2310 if (*s == '{') {
2311 s++;
2312 if (!isDIGIT(*s))
2313 return TRUE;
2314 while (isDIGIT(*s))
2315 s++;
2316 if (*s == ',')
2317 s++;
2318 while (isDIGIT(*s))
2319 s++;
2320 if (*s == '}')
2321 return FALSE;
2322 return TRUE;
2323
2324 }
2325
2326 /* On the other hand, maybe we have a character class */
2327
2328 s++;
2329 if (*s == ']' || *s == '^')
2330 return FALSE;
2331 else {
ffb4593c 2332 /* this is terrifying, and it works */
79072805
LW
2333 int weight = 2; /* let's weigh the evidence */
2334 char seen[256];
f27ffc4a 2335 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2336 const char * const send = strchr(s,']');
3280af22 2337 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2338
2339 if (!send) /* has to be an expression */
2340 return TRUE;
2341
2342 Zero(seen,256,char);
2343 if (*s == '$')
2344 weight -= 3;
2345 else if (isDIGIT(*s)) {
2346 if (s[1] != ']') {
2347 if (isDIGIT(s[1]) && s[2] == ']')
2348 weight -= 10;
2349 }
2350 else
2351 weight -= 100;
2352 }
2353 for (; s < send; s++) {
2354 last_un_char = un_char;
2355 un_char = (unsigned char)*s;
2356 switch (*s) {
2357 case '@':
2358 case '&':
2359 case '$':
2360 weight -= seen[un_char] * 10;
7e2040f0 2361 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2362 int len;
8903cb82 2363 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2364 len = (int)strlen(tmpbuf);
2365 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2366 weight -= 100;
2367 else
2368 weight -= 10;
2369 }
2370 else if (*s == '$' && s[1] &&
93a17b20
LW
2371 strchr("[#!%*<>()-=",s[1])) {
2372 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2373 weight -= 10;
2374 else
2375 weight -= 1;
2376 }
2377 break;
2378 case '\\':
2379 un_char = 254;
2380 if (s[1]) {
93a17b20 2381 if (strchr("wds]",s[1]))
79072805
LW
2382 weight += 100;
2383 else if (seen['\''] || seen['"'])
2384 weight += 1;
93a17b20 2385 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2386 weight += 40;
2387 else if (isDIGIT(s[1])) {
2388 weight += 40;
2389 while (s[1] && isDIGIT(s[1]))
2390 s++;
2391 }
2392 }
2393 else
2394 weight += 100;
2395 break;
2396 case '-':
2397 if (s[1] == '\\')
2398 weight += 50;
93a17b20 2399 if (strchr("aA01! ",last_un_char))
79072805 2400 weight += 30;
93a17b20 2401 if (strchr("zZ79~",s[1]))
79072805 2402 weight += 30;
f27ffc4a
GS
2403 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2404 weight -= 5; /* cope with negative subscript */
79072805
LW
2405 break;
2406 default:
3792a11b
NC
2407 if (!isALNUM(last_un_char)
2408 && !(last_un_char == '$' || last_un_char == '@'
2409 || last_un_char == '&')
2410 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2411 char *d = tmpbuf;
2412 while (isALPHA(*s))
2413 *d++ = *s++;
2414 *d = '\0';
2415 if (keyword(tmpbuf, d - tmpbuf))
2416 weight -= 150;
2417 }
2418 if (un_char == last_un_char + 1)
2419 weight += 5;
2420 weight -= seen[un_char];
2421 break;
2422 }
2423 seen[un_char]++;
2424 }
2425 if (weight >= 0) /* probably a character class */
2426 return FALSE;
2427 }
2428
2429 return TRUE;
2430}
ffed7fef 2431
ffb4593c
NT
2432/*
2433 * S_intuit_method
2434 *
2435 * Does all the checking to disambiguate
2436 * foo bar
2437 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2438 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2439 *
2440 * First argument is the stuff after the first token, e.g. "bar".
2441 *
2442 * Not a method if bar is a filehandle.
2443 * Not a method if foo is a subroutine prototyped to take a filehandle.
2444 * Not a method if it's really "Foo $bar"
2445 * Method if it's "foo $bar"
2446 * Not a method if it's really "print foo $bar"
2447 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2448 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2449 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2450 * =>
2451 */
2452
76e3520e 2453STATIC int
62d55b22 2454S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2455{
97aff369 2456 dVAR;
a0d0e21e 2457 char *s = start + (*start == '$');
3280af22 2458 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2459 STRLEN len;
2460 GV* indirgv;
5db06880
NC
2461#ifdef PERL_MAD
2462 int soff;
2463#endif
a0d0e21e
LW
2464
2465 if (gv) {
62d55b22 2466 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2467 return 0;
62d55b22
NC
2468 if (cv) {
2469 if (SvPOK(cv)) {
2470 const char *proto = SvPVX_const(cv);
2471 if (proto) {
2472 if (*proto == ';')
2473 proto++;
2474 if (*proto == '*')
2475 return 0;
2476 }
b6c543e3
IZ
2477 }
2478 } else
a0d0e21e
LW
2479 gv = 0;
2480 }
8903cb82 2481 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2482 /* start is the beginning of the possible filehandle/object,
2483 * and s is the end of it
2484 * tmpbuf is a copy of it
2485 */
2486
a0d0e21e 2487 if (*start == '$') {
3280af22 2488 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2489 return 0;
5db06880
NC
2490#ifdef PERL_MAD
2491 len = start - SvPVX(PL_linestr);
2492#endif
29595ff2 2493 s = PEEKSPACE(s);
5db06880
NC
2494#ifdef PERLMAD
2495 start = SvPVX(PL_linestr) + len;
2496#endif
3280af22
NIS
2497 PL_bufptr = start;
2498 PL_expect = XREF;
a0d0e21e
LW
2499 return *s == '(' ? FUNCMETH : METHOD;
2500 }
2501 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2502 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2503 len -= 2;
2504 tmpbuf[len] = '\0';
5db06880
NC
2505#ifdef PERL_MAD
2506 soff = s - SvPVX(PL_linestr);
2507#endif
c3e0f903
GS
2508 goto bare_package;
2509 }
90e5519e 2510 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2511 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2512 return 0;
2513 /* filehandle or package name makes it a method */
89bfa8cd 2514 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2515#ifdef PERL_MAD
2516 soff = s - SvPVX(PL_linestr);
2517#endif
29595ff2 2518 s = PEEKSPACE(s);
3280af22 2519 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2520 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2521 bare_package:
cd81e915 2522 start_force(PL_curforce);
9ded7720 2523 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2524 newSVpvn(tmpbuf,len));
9ded7720 2525 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2526 if (PL_madskills)
2527 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2528 PL_expect = XTERM;
a0d0e21e 2529 force_next(WORD);
3280af22 2530 PL_bufptr = s;
5db06880
NC
2531#ifdef PERL_MAD
2532 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2533#endif
a0d0e21e
LW
2534 return *s == '(' ? FUNCMETH : METHOD;
2535 }
2536 }
2537 return 0;
2538}
2539
ffb4593c
NT
2540/*
2541 * S_incl_perldb
2542 * Return a string of Perl code to load the debugger. If PERL5DB
2543 * is set, it will return the contents of that, otherwise a
2544 * compile-time require of perl5db.pl.
2545 */
2546
bfed75c6 2547STATIC const char*
cea2e8a9 2548S_incl_perldb(pTHX)
a0d0e21e 2549{
97aff369 2550 dVAR;
3280af22 2551 if (PL_perldb) {
9d4ba2ae 2552 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2553
2554 if (pdb)
2555 return pdb;
93189314 2556 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2557 return "BEGIN { require 'perl5db.pl' }";
2558 }
2559 return "";
2560}
2561
2562
16d20bd9 2563/* Encoded script support. filter_add() effectively inserts a
4e553d73 2564 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2565 * Note that the filter function only applies to the current source file
2566 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2567 *
2568 * The datasv parameter (which may be NULL) can be used to pass
2569 * private data to this instance of the filter. The filter function
2570 * can recover the SV using the FILTER_DATA macro and use it to
2571 * store private buffers and state information.
2572 *
2573 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2574 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2575 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2576 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2577 * private use must be set using malloc'd pointers.
2578 */
16d20bd9
AD
2579
2580SV *
864dbfa3 2581Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2582{
97aff369 2583 dVAR;
f4c556ac 2584 if (!funcp)
a0714e2c 2585 return NULL;
f4c556ac 2586
3280af22
NIS
2587 if (!PL_rsfp_filters)
2588 PL_rsfp_filters = newAV();
16d20bd9 2589 if (!datasv)
561b68a9 2590 datasv = newSV(0);
862a34c6 2591 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2592 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2593 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2594 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2595 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2596 av_unshift(PL_rsfp_filters, 1);
2597 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2598 return(datasv);
2599}
4e553d73 2600
16d20bd9
AD
2601
2602/* Delete most recently added instance of this filter function. */
a0d0e21e 2603void
864dbfa3 2604Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2605{
97aff369 2606 dVAR;
e0c19803 2607 SV *datasv;
24801a4b 2608
33073adb 2609#ifdef DEBUGGING
8141890a 2610 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2611#endif
3280af22 2612 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2613 return;
2614 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2615 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2616 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2617 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2618 IoANY(datasv) = (void *)NULL;
3280af22 2619 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2620
16d20bd9
AD
2621 return;
2622 }
2623 /* we need to search for the correct entry and clear it */
cea2e8a9 2624 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2625}
2626
2627
1de9afcd
RGS
2628/* Invoke the idxth filter function for the current rsfp. */
2629/* maxlen 0 = read one text line */
16d20bd9 2630I32
864dbfa3 2631Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2632{
97aff369 2633 dVAR;
16d20bd9
AD
2634 filter_t funcp;
2635 SV *datasv = NULL;
e50aee73 2636
3280af22 2637 if (!PL_rsfp_filters)
16d20bd9 2638 return -1;
1de9afcd 2639 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2640 /* Provide a default input filter to make life easy. */
2641 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2642 DEBUG_P(PerlIO_printf(Perl_debug_log,
2643 "filter_read %d: from rsfp\n", idx));
4e553d73 2644 if (maxlen) {
16d20bd9
AD
2645 /* Want a block */
2646 int len ;
f54cb97a 2647 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2648
2649 /* ensure buf_sv is large enough */
eb160463 2650 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2651 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2652 if (PerlIO_error(PL_rsfp))
37120919
AD
2653 return -1; /* error */
2654 else
2655 return 0 ; /* end of file */
2656 }
16d20bd9
AD
2657 SvCUR_set(buf_sv, old_len + len) ;
2658 } else {
2659 /* Want a line */
3280af22
NIS
2660 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2661 if (PerlIO_error(PL_rsfp))
37120919
AD
2662 return -1; /* error */
2663 else
2664 return 0 ; /* end of file */
2665 }
16d20bd9
AD
2666 }
2667 return SvCUR(buf_sv);
2668 }
2669 /* Skip this filter slot if filter has been deleted */
1de9afcd 2670 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2671 DEBUG_P(PerlIO_printf(Perl_debug_log,
2672 "filter_read %d: skipped (filter deleted)\n",
2673 idx));
16d20bd9
AD
2674 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2675 }
2676 /* Get function pointer hidden within datasv */
8141890a 2677 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2678 DEBUG_P(PerlIO_printf(Perl_debug_log,
2679 "filter_read %d: via function %p (%s)\n",
cfd0369c 2680 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2681 /* Call function. The function is expected to */
2682 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2683 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2684 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2685}
2686
76e3520e 2687STATIC char *
cea2e8a9 2688S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2689{
97aff369 2690 dVAR;
c39cd008 2691#ifdef PERL_CR_FILTER
3280af22 2692 if (!PL_rsfp_filters) {
c39cd008 2693 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2694 }
2695#endif
3280af22 2696 if (PL_rsfp_filters) {
55497cff
PP
2697 if (!append)
2698 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2699 if (FILTER_READ(0, sv, 0) > 0)
2700 return ( SvPVX(sv) ) ;
2701 else
bd61b366 2702 return NULL ;
16d20bd9 2703 }
9d116dd7 2704 else
fd049845 2705 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2706}
2707
01ec43d0 2708STATIC HV *
7fc63493 2709S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2710{
97aff369 2711 dVAR;
def3634b
GS
2712 GV *gv;
2713
01ec43d0 2714 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2715 return PL_curstash;
2716
2717 if (len > 2 &&
2718 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2719 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2720 {
2721 return GvHV(gv); /* Foo:: */
def3634b
GS
2722 }
2723
2724 /* use constant CLASS => 'MyClass' */
90e5519e 2725 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2726 SV *sv;
2727 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2728 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2729 }
2730 }
2731
2732 return gv_stashpv(pkgname, FALSE);
2733}
a0d0e21e 2734
5db06880
NC
2735#ifdef PERL_MAD
2736 /*
2737 * Perl_madlex
2738 * The intent of this yylex wrapper is to minimize the changes to the
2739 * tokener when we aren't interested in collecting madprops. It remains
2740 * to be seen how successful this strategy will be...
2741 */
2742
2743int
2744Perl_madlex(pTHX)
2745{
2746 int optype;
2747 char *s = PL_bufptr;
2748
cd81e915
NC
2749 /* make sure PL_thiswhite is initialized */
2750 PL_thiswhite = 0;
2751 PL_thismad = 0;
5db06880 2752
cd81e915 2753 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2754 if (PL_pending_ident)
2755 return S_pending_ident(aTHX);
2756
2757 /* previous token ate up our whitespace? */
cd81e915
NC
2758 if (!PL_lasttoke && PL_nextwhite) {
2759 PL_thiswhite = PL_nextwhite;
2760 PL_nextwhite = 0;
5db06880
NC
2761 }
2762
2763 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2764 PL_realtokenstart = -1;
2765 PL_thistoken = 0;
5db06880
NC
2766 optype = yylex();
2767 s = PL_bufptr;
cd81e915 2768 assert(PL_curforce < 0);
5db06880 2769
cd81e915
NC
2770 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2771 if (!PL_thistoken) {
2772 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2773 PL_thistoken = newSVpvn("",0);
5db06880 2774 else {
cd81e915
NC
2775 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2776 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2777 }
2778 }
cd81e915
NC
2779 if (PL_thismad) /* install head */
2780 CURMAD('X', PL_thistoken);
5db06880
NC
2781 }
2782
2783 /* last whitespace of a sublex? */
cd81e915
NC
2784 if (optype == ')' && PL_endwhite) {
2785 CURMAD('X', PL_endwhite);
5db06880
NC
2786 }
2787
cd81e915 2788 if (!PL_thismad) {
5db06880
NC
2789
2790 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2791 if (!PL_thiswhite && !PL_endwhite && !optype) {
2792 sv_free(PL_thistoken);
2793 PL_thistoken = 0;
5db06880
NC
2794 return 0;
2795 }
2796
2797 /* put off final whitespace till peg */
2798 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2799 PL_nextwhite = PL_thiswhite;
2800 PL_thiswhite = 0;
5db06880 2801 }
cd81e915
NC
2802 else if (PL_thisopen) {
2803 CURMAD('q', PL_thisopen);
2804 if (PL_thistoken)
2805 sv_free(PL_thistoken);
2806 PL_thistoken = 0;
5db06880
NC
2807 }
2808 else {
2809 /* Store actual token text as madprop X */
cd81e915 2810 CURMAD('X', PL_thistoken);
5db06880
NC
2811 }
2812
cd81e915 2813 if (PL_thiswhite) {
5db06880 2814 /* add preceding whitespace as madprop _ */
cd81e915 2815 CURMAD('_', PL_thiswhite);
5db06880
NC
2816 }
2817
cd81e915 2818 if (PL_thisstuff) {
5db06880 2819 /* add quoted material as madprop = */
cd81e915 2820 CURMAD('=', PL_thisstuff);
5db06880
NC
2821 }
2822
cd81e915 2823 if (PL_thisclose) {
5db06880 2824 /* add terminating quote as madprop Q */
cd81e915 2825 CURMAD('Q', PL_thisclose);
5db06880
NC
2826 }
2827 }
2828
2829 /* special processing based on optype */
2830
2831 switch (optype) {
2832
2833 /* opval doesn't need a TOKEN since it can already store mp */
2834 case WORD:
2835 case METHOD:
2836 case FUNCMETH:
2837 case THING:
2838 case PMFUNC:
2839 case PRIVATEREF:
2840 case FUNC0SUB:
2841 case UNIOPSUB:
2842 case LSTOPSUB:
2843 if (yylval.opval)
cd81e915
NC
2844 append_madprops(PL_thismad, yylval.opval, 0);
2845 PL_thismad = 0;
5db06880
NC
2846 return optype;
2847
2848 /* fake EOF */
2849 case 0:
2850 optype = PEG;
cd81e915
NC
2851 if (PL_endwhite) {
2852 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2853 PL_endwhite = 0;
5db06880
NC
2854 }
2855 break;
2856
2857 case ']':
2858 case '}':
cd81e915 2859 if (PL_faketokens)
5db06880
NC
2860 break;
2861 /* remember any fake bracket that lexer is about to discard */
2862 if (PL_lex_brackets == 1 &&
2863 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2864 {
2865 s = PL_bufptr;
2866 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2867 s++;
2868 if (*s == '}') {
cd81e915
NC
2869 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2870 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2871 PL_thiswhite = 0;
5db06880
NC
2872 PL_bufptr = s - 1;
2873 break; /* don't bother looking for trailing comment */
2874 }
2875 else
2876 s = PL_bufptr;
2877 }
2878 if (optype == ']')
2879 break;
2880 /* FALLTHROUGH */
2881
2882 /* attach a trailing comment to its statement instead of next token */
2883 case ';':
cd81e915 2884 if (PL_faketokens)
5db06880
NC
2885 break;
2886 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2887 s = PL_bufptr;
2888 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2889 s++;
2890 if (*s == '\n' || *s == '#') {
2891 while (s < PL_bufend && *s != '\n')
2892 s++;
2893 if (s < PL_bufend)
2894 s++;
cd81e915
NC
2895 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2896 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2897 PL_thiswhite = 0;
5db06880
NC
2898 PL_bufptr = s;
2899 }
2900 }
2901 break;
2902
2903 /* pval */
2904 case LABEL:
2905 break;
2906
2907 /* ival */
2908 default:
2909 break;
2910
2911 }
2912
2913 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
2914 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
2915 PL_thismad = 0;
5db06880
NC
2916 return optype;
2917}
2918#endif
2919
468aa647 2920STATIC char *
cc6ed77d 2921S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2922 dVAR;
468aa647
RGS
2923 if (PL_expect != XSTATE)
2924 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2925 is_use ? "use" : "no"));
29595ff2 2926 s = SKIPSPACE1(s);
468aa647
RGS
2927 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2928 s = force_version(s, TRUE);
29595ff2 2929 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 2930 start_force(PL_curforce);
9ded7720 2931 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2932 force_next(WORD);
2933 }
2934 else if (*s == 'v') {
2935 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2936 s = force_version(s, FALSE);
2937 }
2938 }
2939 else {
2940 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2941 s = force_version(s, FALSE);
2942 }
2943 yylval.ival = is_use;
2944 return s;
2945}
748a9306 2946#ifdef DEBUGGING
27da23d5 2947 static const char* const exp_name[] =
09bef843 2948 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2949 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2950 };
748a9306 2951#endif
463ee0b2 2952
02aa26ce
NT
2953/*
2954 yylex
2955
2956 Works out what to call the token just pulled out of the input
2957 stream. The yacc parser takes care of taking the ops we return and
2958 stitching them into a tree.
2959
2960 Returns:
2961 PRIVATEREF
2962
2963 Structure:
2964 if read an identifier
2965 if we're in a my declaration
2966 croak if they tried to say my($foo::bar)
2967 build the ops for a my() declaration
2968 if it's an access to a my() variable
2969 are we in a sort block?
2970 croak if my($a); $a <=> $b
2971 build ops for access to a my() variable
2972 if in a dq string, and they've said @foo and we can't find @foo
2973 croak
2974 build ops for a bareword
2975 if we already built the token before, use it.
2976*/
2977
20141f0e 2978
dba4d153
JH
2979#ifdef __SC__
2980#pragma segment Perl_yylex
2981#endif
dba4d153 2982int
dba4d153 2983Perl_yylex(pTHX)
20141f0e 2984{
97aff369 2985 dVAR;
3afc138a 2986 register char *s = PL_bufptr;
378cc40b 2987 register char *d;
463ee0b2 2988 STRLEN len;
aa7440fb 2989 bool bof = FALSE;
a687059c 2990
bbf60fe6 2991 DEBUG_T( {
396482e1 2992 SV* tmp = newSVpvs("");
b6007c36
DM
2993 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2994 (IV)CopLINE(PL_curcop),
2995 lex_state_names[PL_lex_state],
2996 exp_name[PL_expect],
2997 pv_display(tmp, s, strlen(s), 0, 60));
2998 SvREFCNT_dec(tmp);
bbf60fe6 2999 } );
02aa26ce 3000 /* check if there's an identifier for us to look at */
ba979b31 3001 if (PL_pending_ident)
bbf60fe6 3002 return REPORT(S_pending_ident(aTHX));
bbce6d69 3003
02aa26ce
NT
3004 /* no identifier pending identification */
3005
3280af22 3006 switch (PL_lex_state) {
79072805
LW
3007#ifdef COMMENTARY
3008 case LEX_NORMAL: /* Some compilers will produce faster */
3009 case LEX_INTERPNORMAL: /* code if we comment these out. */
3010 break;
3011#endif
3012
09bef843 3013 /* when we've already built the next token, just pull it out of the queue */
79072805 3014 case LEX_KNOWNEXT:
5db06880
NC
3015#ifdef PERL_MAD
3016 PL_lasttoke--;
3017 yylval = PL_nexttoke[PL_lasttoke].next_val;
3018 if (PL_madskills) {
cd81e915 3019 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3020 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3021 if (PL_thismad && PL_thismad->mad_key == '_') {
3022 PL_thiswhite = (SV*)PL_thismad->mad_val;
3023 PL_thismad->mad_val = 0;
3024 mad_free(PL_thismad);
3025 PL_thismad = 0;
5db06880
NC
3026 }
3027 }
3028 if (!PL_lasttoke) {
3029 PL_lex_state = PL_lex_defer;
3030 PL_expect = PL_lex_expect;
3031 PL_lex_defer = LEX_NORMAL;
3032 if (!PL_nexttoke[PL_lasttoke].next_type)
3033 return yylex();
3034 }
3035#else
3280af22 3036 PL_nexttoke--;
5db06880 3037 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3038 if (!PL_nexttoke) {
3039 PL_lex_state = PL_lex_defer;
3040 PL_expect = PL_lex_expect;
3041 PL_lex_defer = LEX_NORMAL;
463ee0b2 3042 }
5db06880
NC
3043#endif
3044#ifdef PERL_MAD
3045 /* FIXME - can these be merged? */
3046 return(PL_nexttoke[PL_lasttoke].next_type);
3047#else
bbf60fe6 3048 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3049#endif
79072805 3050
02aa26ce 3051 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3052 when we get here, PL_bufptr is at the \
02aa26ce 3053 */
79072805
LW
3054 case LEX_INTERPCASEMOD:
3055#ifdef DEBUGGING
3280af22 3056 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3057 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3058#endif
02aa26ce 3059 /* handle \E or end of string */
3280af22 3060 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3061 /* if at a \E */
3280af22 3062 if (PL_lex_casemods) {
f54cb97a 3063 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3064 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3065
3792a11b
NC
3066 if (PL_bufptr != PL_bufend
3067 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3068 PL_bufptr += 2;
3069 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3070#ifdef PERL_MAD
3071 if (PL_madskills)
cd81e915 3072 PL_thistoken = newSVpvn("\\E",2);
5db06880 3073#endif
a0d0e21e 3074 }
bbf60fe6 3075 return REPORT(')');
79072805 3076 }
5db06880
NC
3077#ifdef PERL_MAD
3078 while (PL_bufptr != PL_bufend &&
3079 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3080 if (!PL_thiswhite)
3081 PL_thiswhite = newSVpvn("",0);
3082 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3083 PL_bufptr += 2;
3084 }
3085#else
3280af22
NIS
3086 if (PL_bufptr != PL_bufend)
3087 PL_bufptr += 2;
5db06880 3088#endif
3280af22 3089 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3090 return yylex();
79072805
LW
3091 }
3092 else {
607df283 3093 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3094 "### Saw case modifier\n"); });
3280af22 3095 s = PL_bufptr + 1;
6e909404 3096 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3097#ifdef PERL_MAD
cd81e915
NC
3098 if (!PL_thiswhite)
3099 PL_thiswhite = newSVpvn("",0);
3100 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3101#endif
89122651 3102 PL_bufptr = s + 3;
6e909404
JH
3103 PL_lex_state = LEX_INTERPCONCAT;
3104 return yylex();
a0d0e21e 3105 }
6e909404 3106 else {
90771dc0 3107 I32 tmp;
5db06880
NC
3108 if (!PL_madskills) /* when just compiling don't need correct */
3109 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3110 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3111 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3112 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3113 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3114 return REPORT(')');
6e909404
JH
3115 }
3116 if (PL_lex_casemods > 10)
3117 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3118 PL_lex_casestack[PL_lex_casemods++] = *s;
3119 PL_lex_casestack[PL_lex_casemods] = '\0';
3120 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3121 start_force(PL_curforce);
9ded7720 3122 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3123 force_next('(');
cd81e915 3124 start_force(PL_curforce);
6e909404 3125 if (*s == 'l')
9ded7720 3126 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3127 else if (*s == 'u')
9ded7720 3128 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3129 else if (*s == 'L')
9ded7720 3130 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3131 else if (*s == 'U')
9ded7720 3132 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3133 else if (*s == 'Q')
9ded7720 3134 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3135 else
3136 Perl_croak(aTHX_ "panic: yylex");
5db06880 3137 if (PL_madskills) {
d4c19fe8 3138 SV* const tmpsv = newSVpvn("",0);
5db06880
NC
3139 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3140 curmad('_', tmpsv);
3141 }
6e909404 3142 PL_bufptr = s + 1;
a0d0e21e 3143 }
79072805 3144 force_next(FUNC);
3280af22
NIS
3145 if (PL_lex_starts) {
3146 s = PL_bufptr;
3147 PL_lex_starts = 0;
5db06880
NC
3148#ifdef PERL_MAD
3149 if (PL_madskills) {
cd81e915
NC
3150 if (PL_thistoken)
3151 sv_free(PL_thistoken);
3152 PL_thistoken = newSVpvn("",0);
5db06880
NC
3153 }
3154#endif
131b3ad0
DM
3155 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3156 if (PL_lex_casemods == 1 && PL_lex_inpat)
3157 OPERATOR(',');
3158 else
3159 Aop(OP_CONCAT);
79072805
LW
3160 }
3161 else
cea2e8a9 3162 return yylex();
79072805
LW
3163 }
3164
55497cff 3165 case LEX_INTERPPUSH:
bbf60fe6 3166 return REPORT(sublex_push());
55497cff 3167
79072805 3168 case LEX_INTERPSTART:
3280af22 3169 if (PL_bufptr == PL_bufend)
bbf60fe6 3170 return REPORT(sublex_done());
607df283 3171 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3172 "### Interpolated variable\n"); });
3280af22
NIS
3173 PL_expect = XTERM;
3174 PL_lex_dojoin = (*PL_bufptr == '@');
3175 PL_lex_state = LEX_INTERPNORMAL;
3176 if (PL_lex_dojoin) {
cd81e915 3177 start_force(PL_curforce);
9ded7720 3178 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3179 force_next(',');
cd81e915 3180 start_force(PL_curforce);
a0d0e21e 3181 force_ident("\"", '$');
cd81e915 3182 start_force(PL_curforce);
9ded7720 3183 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3184 force_next('$');
cd81e915 3185 start_force(PL_curforce);
9ded7720 3186 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3187 force_next('(');
cd81e915 3188 start_force(PL_curforce);
9ded7720 3189 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3190 force_next(FUNC);
3191 }
3280af22
NIS
3192 if (PL_lex_starts++) {
3193 s = PL_bufptr;
5db06880
NC
3194#ifdef PERL_MAD
3195 if (PL_madskills) {
cd81e915
NC
3196 if (PL_thistoken)
3197 sv_free(PL_thistoken);
3198 PL_thistoken = newSVpvn("",0);
5db06880
NC
3199 }
3200#endif
131b3ad0
DM
3201 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3202 if (!PL_lex_casemods && PL_lex_inpat)
3203 OPERATOR(',');
3204 else
3205 Aop(OP_CONCAT);
79072805 3206 }
cea2e8a9 3207 return yylex();
79072805
LW
3208
3209 case LEX_INTERPENDMAYBE:
3280af22
NIS
3210 if (intuit_more(PL_bufptr)) {
3211 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3212 break;
3213 }
3214 /* FALL THROUGH */
3215
3216 case LEX_INTERPEND:
3280af22
NIS
3217 if (PL_lex_dojoin) {
3218 PL_lex_dojoin = FALSE;
3219 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3220#ifdef PERL_MAD
3221 if (PL_madskills) {
cd81e915
NC
3222 if (PL_thistoken)
3223 sv_free(PL_thistoken);
3224 PL_thistoken = newSVpvn("",0);
5db06880
NC
3225 }
3226#endif
bbf60fe6 3227 return REPORT(')');
79072805 3228 }
43a16006 3229 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3230 && SvEVALED(PL_lex_repl))
43a16006 3231 {
e9fa98b2 3232 if (PL_bufptr != PL_bufend)
cea2e8a9 3233 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3234 PL_lex_repl = NULL;
e9fa98b2 3235 }
79072805
LW
3236 /* FALLTHROUGH */
3237 case LEX_INTERPCONCAT:
3238#ifdef DEBUGGING
3280af22 3239 if (PL_lex_brackets)
cea2e8a9 3240 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3241#endif
3280af22 3242 if (PL_bufptr == PL_bufend)
bbf60fe6 3243 return REPORT(sublex_done());
79072805 3244
3280af22
NIS
3245 if (SvIVX(PL_linestr) == '\'') {
3246 SV *sv = newSVsv(PL_linestr);
3247 if (!PL_lex_inpat)
76e3520e 3248 sv = tokeq(sv);
3280af22 3249 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3250 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3251 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3252 s = PL_bufend;
79072805
LW
3253 }
3254 else {
3280af22 3255 s = scan_const(PL_bufptr);
79072805 3256 if (*s == '\\')
3280af22 3257 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3258 else
3280af22 3259 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3260 }
3261
3280af22 3262 if (s != PL_bufptr) {
cd81e915 3263 start_force(PL_curforce);
5db06880
NC
3264 if (PL_madskills) {
3265 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3266 }
9ded7720 3267 NEXTVAL_NEXTTOKE = yylval;
3280af22 3268 PL_expect = XTERM;
79072805 3269 force_next(THING);
131b3ad0 3270 if (PL_lex_starts++) {
5db06880
NC
3271#ifdef PERL_MAD
3272 if (PL_madskills) {
cd81e915
NC
3273 if (PL_thistoken)
3274 sv_free(PL_thistoken);
3275 PL_thistoken = newSVpvn("",0);
5db06880
NC
3276 }
3277#endif
131b3ad0
DM
3278 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3279 if (!PL_lex_casemods && PL_lex_inpat)
3280 OPERATOR(',');
3281 else
3282 Aop(OP_CONCAT);
3283 }
79072805 3284 else {
3280af22 3285 PL_bufptr = s;
cea2e8a9 3286 return yylex();
79072805
LW
3287 }
3288 }
3289
cea2e8a9 3290 return yylex();
a0d0e21e 3291 case LEX_FORMLINE:
3280af22
NIS
3292 PL_lex_state = LEX_NORMAL;
3293 s = scan_formline(PL_bufptr);
3294 if (!PL_lex_formbrack)
a0d0e21e
LW
3295 goto rightbracket;
3296 OPERATOR(';');
79072805
LW
3297 }
3298
3280af22
NIS
3299 s = PL_bufptr;
3300 PL_oldoldbufptr = PL_oldbufptr;
3301 PL_oldbufptr = s;
463ee0b2
LW
3302
3303 retry:
5db06880 3304#ifdef PERL_MAD
cd81e915
NC
3305 if (PL_thistoken) {
3306 sv_free(PL_thistoken);
3307 PL_thistoken = 0;
5db06880 3308 }
cd81e915 3309 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3310#endif
378cc40b
LW
3311 switch (*s) {
3312 default:
7e2040f0 3313 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3314 goto keylookup;
cea2e8a9 3315 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3316 case 4:
3317 case 26:
3318 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3319 case 0:
5db06880
NC
3320#ifdef PERL_MAD
3321 if (PL_madskills)
cd81e915 3322 PL_faketokens = 0;
5db06880 3323#endif
3280af22
NIS
3324 if (!PL_rsfp) {
3325 PL_last_uni = 0;
3326 PL_last_lop = 0;
c5ee2135 3327 if (PL_lex_brackets) {
0bd48802
AL
3328 yyerror(PL_lex_formbrack
3329 ? "Format not terminated"
3330 : "Missing right curly or square bracket");
c5ee2135 3331 }
4e553d73 3332 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3333 "### Tokener got EOF\n");
5f80b19c 3334 } );
79072805 3335 TOKEN(0);
463ee0b2 3336 }
3280af22 3337 if (s++ < PL_bufend)
a687059c 3338 goto retry; /* ignore stray nulls */
3280af22
NIS
3339 PL_last_uni = 0;
3340 PL_last_lop = 0;
3341 if (!PL_in_eval && !PL_preambled) {
3342 PL_preambled = TRUE;
5db06880
NC
3343#ifdef PERL_MAD
3344 if (PL_madskills)
cd81e915 3345 PL_faketokens = 1;
5db06880 3346#endif
3280af22
NIS
3347 sv_setpv(PL_linestr,incl_perldb());
3348 if (SvCUR(PL_linestr))
396482e1 3349 sv_catpvs(PL_linestr,";");
3280af22
NIS
3350 if (PL_preambleav){
3351 while(AvFILLp(PL_preambleav) >= 0) {
3352 SV *tmpsv = av_shift(PL_preambleav);
3353 sv_catsv(PL_linestr, tmpsv);
396482e1 3354 sv_catpvs(PL_linestr, ";");
91b7def8
PP
3355 sv_free(tmpsv);
3356 }
3280af22
NIS
3357 sv_free((SV*)PL_preambleav);
3358 PL_preambleav = NULL;
91b7def8 3359 }
3280af22 3360 if (PL_minus_n || PL_minus_p) {
396482e1 3361 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3362 if (PL_minus_l)
396482e1 3363 sv_catpvs(PL_linestr,"chomp;");
3280af22 3364 if (PL_minus_a) {
3280af22 3365 if (PL_minus_F) {
3792a11b
NC
3366 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3367 || *PL_splitstr == '"')
3280af22 3368 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3369 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3370 else {
c8ef6a4b
NC
3371 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3372 bytes can be used as quoting characters. :-) */
dd374669 3373 const char *splits = PL_splitstr;
91d456ae 3374 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3375 do {
3376 /* Need to \ \s */
dd374669
AL
3377 if (*splits == '\\')
3378 sv_catpvn(PL_linestr, splits, 1);
3379 sv_catpvn(PL_linestr, splits, 1);
3380 } while (*splits++);
48c4c863
NC
3381 /* This loop will embed the trailing NUL of
3382 PL_linestr as the last thing it does before
3383 terminating. */
396482e1 3384 sv_catpvs(PL_linestr, ");");
54310121 3385 }
2304df62
AD
3386 }
3387 else
396482e1 3388 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3389 }
79072805 3390 }
bc9b29db 3391 if (PL_minus_E)
396482e1
GA
3392 sv_catpvs(PL_linestr,"use feature ':5.10';");
3393 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3394 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3395 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3396 PL_last_lop = PL_last_uni = NULL;
3280af22 3397 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3398 SV * const sv = newSV(0);
a0d0e21e
LW
3399
3400 sv_upgrade(sv, SVt_PVMG);
3280af22 3401 sv_setsv(sv,PL_linestr);
0ac0412a 3402 (void)SvIOK_on(sv);
45977657 3403 SvIV_set(sv, 0);
36c7798d 3404 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3405 }
79072805 3406 goto retry;
a687059c 3407 }
e929a76b 3408 do {
aa7440fb 3409 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3410 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3411 fake_eof:
5db06880 3412#ifdef PERL_MAD
cd81e915 3413 PL_realtokenstart = -1;
5db06880 3414#endif
7e28d3af
JH
3415 if (PL_rsfp) {
3416 if (PL_preprocess && !PL_in_eval)
3417 (void)PerlProc_pclose(PL_rsfp);
3418 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3419 PerlIO_clearerr(PL_rsfp);
3420 else
3421 (void)PerlIO_close(PL_rsfp);
4608196e 3422 PL_rsfp = NULL;
7e28d3af
JH
3423 PL_doextract = FALSE;
3424 }
3425 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3426#ifdef PERL_MAD
3427 if (PL_madskills)
cd81e915 3428 PL_faketokens = 1;
5db06880 3429#endif
a23c4656
NC
3430 sv_setpv(PL_linestr,PL_minus_p
3431 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3432 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3433 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3434 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3435 PL_minus_n = PL_minus_p = 0;
3436 goto retry;
3437 }
3438 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3439 PL_last_lop = PL_last_uni = NULL;
c69006e4 3440 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3441 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3442 }
7aa207d6
JH
3443 /* If it looks like the start of a BOM or raw UTF-16,
3444 * check if it in fact is. */
3445 else if (bof &&
3446 (*s == 0 ||
3447 *(U8*)s == 0xEF ||
3448 *(U8*)s >= 0xFE ||
3449 s[1] == 0)) {
226017aa 3450#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3451# ifdef __GNU_LIBRARY__
3452# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3453# define FTELL_FOR_PIPE_IS_BROKEN
3454# endif
e3f494f1
JH
3455# else
3456# ifdef __GLIBC__
3457# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3458# define FTELL_FOR_PIPE_IS_BROKEN
3459# endif
3460# endif
226017aa
DD
3461# endif
3462#endif
3463#ifdef FTELL_FOR_PIPE_IS_BROKEN
3464 /* This loses the possibility to detect the bof
3465 * situation on perl -P when the libc5 is being used.
3466 * Workaround? Maybe attach some extra state to PL_rsfp?
3467 */
3468 if (!PL_preprocess)
7e28d3af 3469 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3470#else
eb160463 3471 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3472#endif
7e28d3af 3473 if (bof) {
3280af22 3474 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3475 s = swallow_bom((U8*)s);
e929a76b 3476 }
378cc40b 3477 }
3280af22 3478 if (PL_doextract) {
a0d0e21e 3479 /* Incest with pod. */
5db06880
NC
3480#ifdef PERL_MAD
3481 if (PL_madskills)
cd81e915 3482 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 3483#endif
a0d0e21e 3484 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3485 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3486 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3487 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3488 PL_last_lop = PL_last_uni = NULL;
3280af22 3489 PL_doextract = FALSE;
a0d0e21e 3490 }
4e553d73 3491 }
463ee0b2 3492 incline(s);
3280af22
NIS
3493 } while (PL_doextract);
3494 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3495 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3496 SV * const sv = newSV(0);
a687059c 3497
93a17b20 3498 sv_upgrade(sv, SVt_PVMG);
3280af22 3499 sv_setsv(sv,PL_linestr);
0ac0412a 3500 (void)SvIOK_on(sv);
45977657 3501 SvIV_set(sv, 0);
36c7798d 3502 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 3503 }
3280af22 3504 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3505 PL_last_lop = PL_last_uni = NULL;
57843af0 3506 if (CopLINE(PL_curcop) == 1) {
3280af22 3507 while (s < PL_bufend && isSPACE(*s))
79072805 3508 s++;
a0d0e21e 3509 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 3510 s++;
5db06880
NC
3511#ifdef PERL_MAD
3512 if (PL_madskills)
cd81e915 3513 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 3514#endif
bd61b366 3515 d = NULL;
3280af22 3516 if (!PL_in_eval) {
44a8e56a
PP
3517 if (*s == '#' && *(s+1) == '!')
3518 d = s + 2;
3519#ifdef ALTERNATE_SHEBANG
3520 else {
bfed75c6 3521 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
3522 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3523 d = s + (sizeof(as) - 1);
3524 }
3525#endif /* ALTERNATE_SHEBANG */
3526 }
3527 if (d) {
b8378b72 3528 char *ipath;
774d564b