This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
IO::Compress::Zip
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a 71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
301 { 0, TOKENTYPE_NONE, 0 }
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
56da5a46
RGS
432 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
433 if (t < PL_bufptr && isSPACE(*t))
434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
435 "\t(Do you need to predeclare %.*s?)\n",
551405c4 436 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
437 }
438 else {
439 assert(s >= oldbp);
440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 441 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 442 }
07c798fb 443 }
3280af22 444 PL_bufptr = oldbp;
8990e307
LW
445}
446
ffb4593c
NT
447/*
448 * S_missingterm
449 * Complain about missing quote/regexp/heredoc terminator.
450 * If it's called with (char *)NULL then it cauterizes the line buffer.
451 * If we're in a delimited string and the delimiter is a control
452 * character, it's reformatted into a two-char sequence like ^C.
453 * This is fatal.
454 */
455
76e3520e 456STATIC void
cea2e8a9 457S_missingterm(pTHX_ char *s)
8990e307 458{
97aff369 459 dVAR;
8990e307
LW
460 char tmpbuf[3];
461 char q;
462 if (s) {
9d4ba2ae 463 char * const nl = strrchr(s,'\n');
d2719217 464 if (nl)
8990e307
LW
465 *nl = '\0';
466 }
9d116dd7
JH
467 else if (
468#ifdef EBCDIC
469 iscntrl(PL_multi_close)
470#else
471 PL_multi_close < 32 || PL_multi_close == 127
472#endif
473 ) {
8990e307 474 *tmpbuf = '^';
585ec06d 475 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
476 tmpbuf[2] = '\0';
477 s = tmpbuf;
478 }
479 else {
eb160463 480 *tmpbuf = (char)PL_multi_close;
8990e307
LW
481 tmpbuf[1] = '\0';
482 s = tmpbuf;
483 }
484 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 485 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 486}
79072805 487
ef89dcc3 488#define FEATURE_IS_ENABLED(name) \
0d863452 489 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 490 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
491/*
492 * S_feature_is_enabled
493 * Check whether the named feature is enabled.
494 */
495STATIC bool
496S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
497{
97aff369 498 dVAR;
0d863452 499 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
500 char he_name[32] = "feature_";
501 (void) strncpy(&he_name[8], name, 24);
502
503 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
504}
505
ffb4593c
NT
506/*
507 * Perl_deprecate
ffb4593c
NT
508 */
509
79072805 510void
bfed75c6 511Perl_deprecate(pTHX_ const char *s)
a0d0e21e 512{
599cee73 513 if (ckWARN(WARN_DEPRECATED))
9014280d 514 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
515}
516
12bcd1a6 517void
bfed75c6 518Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
519{
520 /* This function should NOT be called for any new deprecated warnings */
521 /* Use Perl_deprecate instead */
522 /* */
523 /* It is here to maintain backward compatibility with the pre-5.8 */
524 /* warnings category hierarchy. The "deprecated" category used to */
525 /* live under the "syntax" category. It is now a top-level category */
526 /* in its own right. */
527
528 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 529 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
530 "Use of %s is deprecated", s);
531}
532
ffb4593c 533/*
9cbb5ea2
GS
534 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
535 * utf16-to-utf8-reversed.
ffb4593c
NT
536 */
537
c39cd008
GS
538#ifdef PERL_CR_FILTER
539static void
540strip_return(SV *sv)
541{
95a20fc0 542 register const char *s = SvPVX_const(sv);
9d4ba2ae 543 register const char * const e = s + SvCUR(sv);
c39cd008
GS
544 /* outer loop optimized to do nothing if there are no CR-LFs */
545 while (s < e) {
546 if (*s++ == '\r' && *s == '\n') {
547 /* hit a CR-LF, need to copy the rest */
548 register char *d = s - 1;
549 *d++ = *s++;
550 while (s < e) {
551 if (*s == '\r' && s[1] == '\n')
552 s++;
553 *d++ = *s++;
554 }
555 SvCUR(sv) -= s - d;
556 return;
557 }
558 }
559}
a868473f 560
76e3520e 561STATIC I32
c39cd008 562S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 563{
f54cb97a 564 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
565 if (count > 0 && !maxlen)
566 strip_return(sv);
567 return count;
a868473f
NIS
568}
569#endif
570
ffb4593c
NT
571/*
572 * Perl_lex_start
9cbb5ea2
GS
573 * Initialize variables. Uses the Perl save_stack to save its state (for
574 * recursive calls to the parser).
ffb4593c
NT
575 */
576
a0d0e21e 577void
864dbfa3 578Perl_lex_start(pTHX_ SV *line)
79072805 579{
97aff369 580 dVAR;
cfd0369c 581 const char *s;
8990e307
LW
582 STRLEN len;
583
3280af22
NIS
584 SAVEI32(PL_lex_dojoin);
585 SAVEI32(PL_lex_brackets);
3280af22
NIS
586 SAVEI32(PL_lex_casemods);
587 SAVEI32(PL_lex_starts);
588 SAVEI32(PL_lex_state);
7766f137 589 SAVEVPTR(PL_lex_inpat);
3280af22 590 SAVEI32(PL_lex_inwhat);
5db06880
NC
591#ifdef PERL_MAD
592 if (PL_lex_state == LEX_KNOWNEXT) {
593 I32 toke = PL_lasttoke;
594 while (--toke >= 0) {
595 SAVEI32(PL_nexttoke[toke].next_type);
596 SAVEVPTR(PL_nexttoke[toke].next_val);
597 if (PL_madskills)
598 SAVEVPTR(PL_nexttoke[toke].next_mad);
599 }
600 SAVEI32(PL_lasttoke);
601 }
602 if (PL_madskills) {
cd81e915
NC
603 SAVESPTR(PL_thistoken);
604 SAVESPTR(PL_thiswhite);
605 SAVESPTR(PL_nextwhite);
606 SAVESPTR(PL_thisopen);
607 SAVESPTR(PL_thisclose);
608 SAVESPTR(PL_thisstuff);
609 SAVEVPTR(PL_thismad);
610 SAVEI32(PL_realtokenstart);
611 SAVEI32(PL_faketokens);
612 }
613 SAVEI32(PL_curforce);
5db06880 614#else
18b09519
GS
615 if (PL_lex_state == LEX_KNOWNEXT) {
616 I32 toke = PL_nexttoke;
617 while (--toke >= 0) {
618 SAVEI32(PL_nexttype[toke]);
619 SAVEVPTR(PL_nextval[toke]);
620 }
621 SAVEI32(PL_nexttoke);
18b09519 622 }
5db06880 623#endif
57843af0 624 SAVECOPLINE(PL_curcop);
3280af22
NIS
625 SAVEPPTR(PL_bufptr);
626 SAVEPPTR(PL_bufend);
627 SAVEPPTR(PL_oldbufptr);
628 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
629 SAVEPPTR(PL_last_lop);
630 SAVEPPTR(PL_last_uni);
3280af22
NIS
631 SAVEPPTR(PL_linestart);
632 SAVESPTR(PL_linestr);
8edd5f42
RGS
633 SAVEGENERICPV(PL_lex_brackstack);
634 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 635 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
636 SAVESPTR(PL_lex_stuff);
637 SAVEI32(PL_lex_defer);
09bef843 638 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 639 SAVESPTR(PL_lex_repl);
bebdddfc
GS
640 SAVEINT(PL_expect);
641 SAVEINT(PL_lex_expect);
3280af22
NIS
642
643 PL_lex_state = LEX_NORMAL;
644 PL_lex_defer = 0;
645 PL_expect = XSTATE;
646 PL_lex_brackets = 0;
a02a5408
JC
647 Newx(PL_lex_brackstack, 120, char);
648 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
649 PL_lex_casemods = 0;
650 *PL_lex_casestack = '\0';
651 PL_lex_dojoin = 0;
652 PL_lex_starts = 0;
a0714e2c
SS
653 PL_lex_stuff = NULL;
654 PL_lex_repl = NULL;
3280af22 655 PL_lex_inpat = 0;
5db06880
NC
656#ifdef PERL_MAD
657 PL_lasttoke = 0;
658#else
76be56bc 659 PL_nexttoke = 0;
5db06880 660#endif
3280af22 661 PL_lex_inwhat = 0;
09bef843 662 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
663 PL_linestr = line;
664 if (SvREADONLY(PL_linestr))
665 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 666 s = SvPV_const(PL_linestr, len);
6f27f9a7 667 if (!len || s[len-1] != ';') {
3280af22
NIS
668 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
669 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 670 sv_catpvs(PL_linestr, "\n;");
8990e307 671 }
3280af22
NIS
672 SvTEMP_off(PL_linestr);
673 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
674 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 675 PL_last_lop = PL_last_uni = NULL;
3280af22 676 PL_rsfp = 0;
79072805 677}
a687059c 678
ffb4593c
NT
679/*
680 * Perl_lex_end
9cbb5ea2
GS
681 * Finalizer for lexing operations. Must be called when the parser is
682 * done with the lexer.
ffb4593c
NT
683 */
684
463ee0b2 685void
864dbfa3 686Perl_lex_end(pTHX)
463ee0b2 687{
97aff369 688 dVAR;
3280af22 689 PL_doextract = FALSE;
463ee0b2
LW
690}
691
ffb4593c
NT
692/*
693 * S_incline
694 * This subroutine has nothing to do with tilting, whether at windmills
695 * or pinball tables. Its name is short for "increment line". It
57843af0 696 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 697 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
698 * # line 500 "foo.pm"
699 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
700 */
701
76e3520e 702STATIC void
cea2e8a9 703S_incline(pTHX_ char *s)
463ee0b2 704{
97aff369 705 dVAR;
463ee0b2
LW
706 char *t;
707 char *n;
73659bf1 708 char *e;
463ee0b2 709 char ch;
463ee0b2 710
57843af0 711 CopLINE_inc(PL_curcop);
463ee0b2
LW
712 if (*s++ != '#')
713 return;
bf4acbe4 714 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
715 if (strnEQ(s, "line", 4))
716 s += 4;
717 else
718 return;
084592ab 719 if (SPACE_OR_TAB(*s))
73659bf1 720 s++;
4e553d73 721 else
73659bf1 722 return;
bf4acbe4 723 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
724 if (!isDIGIT(*s))
725 return;
726 n = s;
727 while (isDIGIT(*s))
728 s++;
bf4acbe4 729 while (SPACE_OR_TAB(*s))
463ee0b2 730 s++;
73659bf1 731 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 732 s++;
73659bf1
GS
733 e = t + 1;
734 }
463ee0b2 735 else {
463ee0b2 736 for (t = s; !isSPACE(*t); t++) ;
73659bf1 737 e = t;
463ee0b2 738 }
bf4acbe4 739 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
740 e++;
741 if (*e != '\n' && *e != '\0')
742 return; /* false alarm */
743
463ee0b2
LW
744 ch = *t;
745 *t = '\0';
f4dd75d9 746 if (t - s > 0) {
8a5ee598 747#ifndef USE_ITHREADS
c4420975 748 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
749 STRLEN tmplen = cf ? strlen(cf) : 0;
750 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
751 /* must copy *{"::_<(eval N)[oldfilename:L]"}
752 * to *{"::_<newfilename"} */
753 char smallbuf[256], smallbuf2[256];
754 char *tmpbuf, *tmpbuf2;
8a5ee598 755 GV **gvp, *gv2;
e66cf94c
RGS
756 STRLEN tmplen2 = strlen(s);
757 if (tmplen + 3 < sizeof smallbuf)
758 tmpbuf = smallbuf;
759 else
760 Newx(tmpbuf, tmplen + 3, char);
761 if (tmplen2 + 3 < sizeof smallbuf2)
762 tmpbuf2 = smallbuf2;
763 else
764 Newx(tmpbuf2, tmplen2 + 3, char);
765 tmpbuf[0] = tmpbuf2[0] = '_';
766 tmpbuf[1] = tmpbuf2[1] = '<';
767 memcpy(tmpbuf + 2, cf, ++tmplen);
768 memcpy(tmpbuf2 + 2, s, ++tmplen2);
769 ++tmplen; ++tmplen2;
8a5ee598
RGS
770 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
771 if (gvp) {
772 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
773 if (!isGV(gv2))
774 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
775 /* adjust ${"::_<newfilename"} to store the new file name */
776 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
777 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
778 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
779 }
e66cf94c
RGS
780 if (tmpbuf != smallbuf) Safefree(tmpbuf);
781 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
782 }
8a5ee598 783#endif
05ec9bb3 784 CopFILE_free(PL_curcop);
57843af0 785 CopFILE_set(PL_curcop, s);
f4dd75d9 786 }
463ee0b2 787 *t = ch;
57843af0 788 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
789}
790
29595ff2 791#ifdef PERL_MAD
cd81e915 792/* skip space before PL_thistoken */
29595ff2
NC
793
794STATIC char *
795S_skipspace0(pTHX_ register char *s)
796{
797 s = skipspace(s);
798 if (!PL_madskills)
799 return s;
cd81e915
NC
800 if (PL_skipwhite) {
801 if (!PL_thiswhite)
802 PL_thiswhite = newSVpvn("",0);
803 sv_catsv(PL_thiswhite, PL_skipwhite);
804 sv_free(PL_skipwhite);
805 PL_skipwhite = 0;
806 }
807 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
808 return s;
809}
810
cd81e915 811/* skip space after PL_thistoken */
29595ff2
NC
812
813STATIC char *
814S_skipspace1(pTHX_ register char *s)
815{
816 char *start = s;
817 I32 startoff = start - SvPVX(PL_linestr);
818
819 s = skipspace(s);
820 if (!PL_madskills)
821 return s;
822 start = SvPVX(PL_linestr) + startoff;
cd81e915
NC
823 if (!PL_thistoken && PL_realtokenstart >= 0) {
824 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
825 PL_thistoken = newSVpvn(tstart, start - tstart);
826 }
827 PL_realtokenstart = -1;
828 if (PL_skipwhite) {
829 if (!PL_nextwhite)
830 PL_nextwhite = newSVpvn("",0);
831 sv_catsv(PL_nextwhite, PL_skipwhite);
832 sv_free(PL_skipwhite);
833 PL_skipwhite = 0;
29595ff2
NC
834 }
835 return s;
836}
837
838STATIC char *
839S_skipspace2(pTHX_ register char *s, SV **svp)
840{
841 char *start = s;
842 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
843 I32 startoff = start - SvPVX(PL_linestr);
844 s = skipspace(s);
845 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
846 if (!PL_madskills || !svp)
847 return s;
848 start = SvPVX(PL_linestr) + startoff;
cd81e915
NC
849 if (!PL_thistoken && PL_realtokenstart >= 0) {
850 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
851 PL_thistoken = newSVpvn(tstart, start - tstart);
852 PL_realtokenstart = -1;
29595ff2 853 }
cd81e915 854 if (PL_skipwhite) {
29595ff2
NC
855 if (!*svp)
856 *svp = newSVpvn("",0);
cd81e915
NC
857 sv_setsv(*svp, PL_skipwhite);
858 sv_free(PL_skipwhite);
859 PL_skipwhite = 0;
29595ff2
NC
860 }
861
862 return s;
863}
864#endif
865
ffb4593c
NT
866/*
867 * S_skipspace
868 * Called to gobble the appropriate amount and type of whitespace.
869 * Skips comments as well.
870 */
871
76e3520e 872STATIC char *
cea2e8a9 873S_skipspace(pTHX_ register char *s)
a687059c 874{
97aff369 875 dVAR;
5db06880
NC
876#ifdef PERL_MAD
877 int curoff;
878 int startoff = s - SvPVX(PL_linestr);
879
cd81e915
NC
880 if (PL_skipwhite) {
881 sv_free(PL_skipwhite);
882 PL_skipwhite = 0;
5db06880
NC
883 }
884#endif
885
3280af22 886 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 887 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 888 s++;
5db06880
NC
889#ifdef PERL_MAD
890 goto done;
891#else
463ee0b2 892 return s;
5db06880 893#endif
463ee0b2
LW
894 }
895 for (;;) {
fd049845 896 STRLEN prevlen;
09bef843 897 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 898 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
899 while (s < PL_bufend && isSPACE(*s)) {
900 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
901 incline(s);
902 }
ffb4593c
NT
903
904 /* comment */
3280af22
NIS
905 if (s < PL_bufend && *s == '#') {
906 while (s < PL_bufend && *s != '\n')
463ee0b2 907 s++;
60e6418e 908 if (s < PL_bufend) {
463ee0b2 909 s++;
60e6418e
GS
910 if (PL_in_eval && !PL_rsfp) {
911 incline(s);
912 continue;
913 }
914 }
463ee0b2 915 }
ffb4593c
NT
916
917 /* only continue to recharge the buffer if we're at the end
918 * of the buffer, we're not reading from a source filter, and
919 * we're in normal lexing mode
920 */
09bef843
SB
921 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
922 PL_lex_state == LEX_FORMLINE)
5db06880
NC
923#ifdef PERL_MAD
924 goto done;
925#else
463ee0b2 926 return s;
5db06880 927#endif
ffb4593c
NT
928
929 /* try to recharge the buffer */
5db06880
NC
930#ifdef PERL_MAD
931 curoff = s - SvPVX(PL_linestr);
932#endif
933
9cbb5ea2 934 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 935 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 936 {
5db06880
NC
937#ifdef PERL_MAD
938 if (PL_madskills && curoff != startoff) {
cd81e915
NC
939 if (!PL_skipwhite)
940 PL_skipwhite = newSVpvn("",0);
941 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
942 curoff - startoff);
943 }
944
945 /* mustn't throw out old stuff yet if madpropping */
946 SvCUR(PL_linestr) = curoff;
947 s = SvPVX(PL_linestr) + curoff;
948 *s = 0;
949 if (curoff && s[-1] == '\n')
950 s[-1] = ' ';
951#endif
952
9cbb5ea2 953 /* end of file. Add on the -p or -n magic */
cd81e915 954 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 955 if (PL_minus_p) {
5db06880
NC
956#ifdef PERL_MAD
957 sv_catpv(PL_linestr,
958 ";}continue{print or die qq(-p destination: $!\\n);}");
959#else
01a19ab0
NC
960 sv_setpv(PL_linestr,
961 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 962#endif
3280af22 963 PL_minus_n = PL_minus_p = 0;
a0d0e21e 964 }
01a19ab0 965 else if (PL_minus_n) {
5db06880
NC
966#ifdef PERL_MAD
967 sv_catpvn(PL_linestr, ";}", 2);
968#else
01a19ab0 969 sv_setpvn(PL_linestr, ";}", 2);
5db06880 970#endif
01a19ab0
NC
971 PL_minus_n = 0;
972 }
a0d0e21e 973 else
5db06880
NC
974#ifdef PERL_MAD
975 sv_catpvn(PL_linestr,";", 1);
976#else
4147a61b 977 sv_setpvn(PL_linestr,";", 1);
5db06880 978#endif
ffb4593c
NT
979
980 /* reset variables for next time we lex */
9cbb5ea2 981 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
982 = SvPVX(PL_linestr)
983#ifdef PERL_MAD
984 + curoff
985#endif
986 ;
3280af22 987 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 988 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
989
990 /* Close the filehandle. Could be from -P preprocessor,
991 * STDIN, or a regular file. If we were reading code from
992 * STDIN (because the commandline held no -e or filename)
993 * then we don't close it, we reset it so the code can
994 * read from STDIN too.
995 */
996
3280af22
NIS
997 if (PL_preprocess && !PL_in_eval)
998 (void)PerlProc_pclose(PL_rsfp);
999 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1000 PerlIO_clearerr(PL_rsfp);
8990e307 1001 else
3280af22 1002 (void)PerlIO_close(PL_rsfp);
4608196e 1003 PL_rsfp = NULL;
463ee0b2
LW
1004 return s;
1005 }
ffb4593c
NT
1006
1007 /* not at end of file, so we only read another line */
09bef843
SB
1008 /* make corresponding updates to old pointers, for yyerror() */
1009 oldprevlen = PL_oldbufptr - PL_bufend;
1010 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1011 if (PL_last_uni)
1012 oldunilen = PL_last_uni - PL_bufend;
1013 if (PL_last_lop)
1014 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1015 PL_linestart = PL_bufptr = s + prevlen;
1016 PL_bufend = s + SvCUR(PL_linestr);
1017 s = PL_bufptr;
09bef843
SB
1018 PL_oldbufptr = s + oldprevlen;
1019 PL_oldoldbufptr = s + oldoldprevlen;
1020 if (PL_last_uni)
1021 PL_last_uni = s + oldunilen;
1022 if (PL_last_lop)
1023 PL_last_lop = s + oldloplen;
a0d0e21e 1024 incline(s);
ffb4593c
NT
1025
1026 /* debugger active and we're not compiling the debugger code,
1027 * so store the line into the debugger's array of lines
1028 */
3280af22 1029 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1030 SV * const sv = newSV(0);
8990e307
LW
1031
1032 sv_upgrade(sv, SVt_PVMG);
3280af22 1033 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1034 (void)SvIOK_on(sv);
45977657 1035 SvIV_set(sv, 0);
36c7798d 1036 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1037 }
463ee0b2 1038 }
5db06880
NC
1039
1040#ifdef PERL_MAD
1041 done:
1042 if (PL_madskills) {
cd81e915
NC
1043 if (!PL_skipwhite)
1044 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1045 curoff = s - SvPVX(PL_linestr);
1046 if (curoff - startoff)
cd81e915 1047 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1048 curoff - startoff);
1049 }
1050 return s;
1051#endif
a687059c 1052}
378cc40b 1053
ffb4593c
NT
1054/*
1055 * S_check_uni
1056 * Check the unary operators to ensure there's no ambiguity in how they're
1057 * used. An ambiguous piece of code would be:
1058 * rand + 5
1059 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1060 * the +5 is its argument.
1061 */
1062
76e3520e 1063STATIC void
cea2e8a9 1064S_check_uni(pTHX)
ba106d47 1065{
97aff369 1066 dVAR;
2f3197b3 1067 char *s;
a0d0e21e 1068 char *t;
2f3197b3 1069
3280af22 1070 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1071 return;
3280af22
NIS
1072 while (isSPACE(*PL_last_uni))
1073 PL_last_uni++;
7e2040f0 1074 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 1075 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1076 return;
6136c704 1077
0453d815 1078 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1079 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1080 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1081 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1082 }
2f3197b3
LW
1083}
1084
ffb4593c
NT
1085/*
1086 * LOP : macro to build a list operator. Its behaviour has been replaced
1087 * with a subroutine, S_lop() for which LOP is just another name.
1088 */
1089
a0d0e21e
LW
1090#define LOP(f,x) return lop(f,x,s)
1091
ffb4593c
NT
1092/*
1093 * S_lop
1094 * Build a list operator (or something that might be one). The rules:
1095 * - if we have a next token, then it's a list operator [why?]
1096 * - if the next thing is an opening paren, then it's a function
1097 * - else it's a list operator
1098 */
1099
76e3520e 1100STATIC I32
a0be28da 1101S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1102{
97aff369 1103 dVAR;
79072805 1104 yylval.ival = f;
35c8bce7 1105 CLINE;
3280af22
NIS
1106 PL_expect = x;
1107 PL_bufptr = s;
1108 PL_last_lop = PL_oldbufptr;
eb160463 1109 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1110#ifdef PERL_MAD
1111 if (PL_lasttoke)
1112 return REPORT(LSTOP);
1113#else
3280af22 1114 if (PL_nexttoke)
bbf60fe6 1115 return REPORT(LSTOP);
5db06880 1116#endif
79072805 1117 if (*s == '(')
bbf60fe6 1118 return REPORT(FUNC);
29595ff2 1119 s = PEEKSPACE(s);
79072805 1120 if (*s == '(')
bbf60fe6 1121 return REPORT(FUNC);
79072805 1122 else
bbf60fe6 1123 return REPORT(LSTOP);
79072805
LW
1124}
1125
5db06880
NC
1126#ifdef PERL_MAD
1127 /*
1128 * S_start_force
1129 * Sets up for an eventual force_next(). start_force(0) basically does
1130 * an unshift, while start_force(-1) does a push. yylex removes items
1131 * on the "pop" end.
1132 */
1133
1134STATIC void
1135S_start_force(pTHX_ int where)
1136{
1137 int i;
1138
cd81e915 1139 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1140 where = PL_lasttoke;
cd81e915
NC
1141 assert(PL_curforce < 0 || PL_curforce == where);
1142 if (PL_curforce != where) {
5db06880
NC
1143 for (i = PL_lasttoke; i > where; --i) {
1144 PL_nexttoke[i] = PL_nexttoke[i-1];
1145 }
1146 PL_lasttoke++;
1147 }
cd81e915 1148 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1149 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1150 PL_curforce = where;
1151 if (PL_nextwhite) {
5db06880
NC
1152 if (PL_madskills)
1153 curmad('^', newSVpvn("",0));
cd81e915 1154 CURMAD('_', PL_nextwhite);
5db06880
NC
1155 }
1156}
1157
1158STATIC void
1159S_curmad(pTHX_ char slot, SV *sv)
1160{
1161 MADPROP **where;
1162
1163 if (!sv)
1164 return;
cd81e915
NC
1165 if (PL_curforce < 0)
1166 where = &PL_thismad;
5db06880 1167 else
cd81e915 1168 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1169
cd81e915 1170 if (PL_faketokens)
5db06880
NC
1171 sv_setpvn(sv, "", 0);
1172 else {
1173 if (!IN_BYTES) {
1174 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1175 SvUTF8_on(sv);
1176 else if (PL_encoding) {
1177 sv_recode_to_utf8(sv, PL_encoding);
1178 }
1179 }
1180 }
1181
1182 /* keep a slot open for the head of the list? */
1183 if (slot != '_' && *where && (*where)->mad_key == '^') {
1184 (*where)->mad_key = slot;
1185 sv_free((*where)->mad_val);
1186 (*where)->mad_val = (void*)sv;
1187 }
1188 else
1189 addmad(newMADsv(slot, sv), where, 0);
1190}
1191#else
1192# define start_force(where)
1193# define curmad(slot, sv)
1194#endif
1195
ffb4593c
NT
1196/*
1197 * S_force_next
9cbb5ea2 1198 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1199 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1200 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1201 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1202 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1203 */
1204
4e553d73 1205STATIC void
cea2e8a9 1206S_force_next(pTHX_ I32 type)
79072805 1207{
97aff369 1208 dVAR;
5db06880 1209#ifdef PERL_MAD
cd81e915 1210 if (PL_curforce < 0)
5db06880 1211 start_force(PL_lasttoke);
cd81e915 1212 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1213 if (PL_lex_state != LEX_KNOWNEXT)
1214 PL_lex_defer = PL_lex_state;
1215 PL_lex_state = LEX_KNOWNEXT;
1216 PL_lex_expect = PL_expect;
cd81e915 1217 PL_curforce = -1;
5db06880 1218#else
3280af22
NIS
1219 PL_nexttype[PL_nexttoke] = type;
1220 PL_nexttoke++;
1221 if (PL_lex_state != LEX_KNOWNEXT) {
1222 PL_lex_defer = PL_lex_state;
1223 PL_lex_expect = PL_expect;
1224 PL_lex_state = LEX_KNOWNEXT;
79072805 1225 }
5db06880 1226#endif
79072805
LW
1227}
1228
d0a148a6
NC
1229STATIC SV *
1230S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1231{
97aff369 1232 dVAR;
9d4ba2ae 1233 SV * const sv = newSVpvn(start,len);
bfed75c6 1234 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1235 SvUTF8_on(sv);
1236 return sv;
1237}
1238
ffb4593c
NT
1239/*
1240 * S_force_word
1241 * When the lexer knows the next thing is a word (for instance, it has
1242 * just seen -> and it knows that the next char is a word char, then
1243 * it calls S_force_word to stick the next word into the PL_next lookahead.
1244 *
1245 * Arguments:
b1b65b59 1246 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1247 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1248 * int check_keyword : if true, Perl checks to make sure the word isn't
1249 * a keyword (do this if the word is a label, e.g. goto FOO)
1250 * int allow_pack : if true, : characters will also be allowed (require,
1251 * use, etc. do this)
9cbb5ea2 1252 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1253 */
1254
76e3520e 1255STATIC char *
cea2e8a9 1256S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1257{
97aff369 1258 dVAR;
463ee0b2
LW
1259 register char *s;
1260 STRLEN len;
4e553d73 1261
29595ff2 1262 start = SKIPSPACE1(start);
463ee0b2 1263 s = start;
7e2040f0 1264 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1265 (allow_pack && *s == ':') ||
15f0808c 1266 (allow_initial_tick && *s == '\'') )
a0d0e21e 1267 {
3280af22
NIS
1268 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1269 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1270 return start;
cd81e915 1271 start_force(PL_curforce);
5db06880
NC
1272 if (PL_madskills)
1273 curmad('X', newSVpvn(start,s-start));
463ee0b2 1274 if (token == METHOD) {
29595ff2 1275 s = SKIPSPACE1(s);
463ee0b2 1276 if (*s == '(')
3280af22 1277 PL_expect = XTERM;
463ee0b2 1278 else {
3280af22 1279 PL_expect = XOPERATOR;
463ee0b2 1280 }
79072805 1281 }
9ded7720 1282 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1283 = (OP*)newSVOP(OP_CONST,0,
1284 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1285 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1286 force_next(token);
1287 }
1288 return s;
1289}
1290
ffb4593c
NT
1291/*
1292 * S_force_ident
9cbb5ea2 1293 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1294 * text only contains the "foo" portion. The first argument is a pointer
1295 * to the "foo", and the second argument is the type symbol to prefix.
1296 * Forces the next token to be a "WORD".
9cbb5ea2 1297 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1298 */
1299
76e3520e 1300STATIC void
bfed75c6 1301S_force_ident(pTHX_ register const char *s, int kind)
79072805 1302{
97aff369 1303 dVAR;
79072805 1304 if (s && *s) {
90e5519e
NC
1305 const STRLEN len = strlen(s);
1306 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1307 start_force(PL_curforce);
9ded7720 1308 NEXTVAL_NEXTTOKE.opval = o;
79072805 1309 force_next(WORD);
748a9306 1310 if (kind) {
11343788 1311 o->op_private = OPpCONST_ENTERED;
55497cff 1312 /* XXX see note in pp_entereval() for why we forgo typo
1313 warnings if the symbol must be introduced in an eval.
1314 GSAR 96-10-12 */
90e5519e
NC
1315 gv_fetchpvn_flags(s, len,
1316 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1317 : GV_ADD,
1318 kind == '$' ? SVt_PV :
1319 kind == '@' ? SVt_PVAV :
1320 kind == '%' ? SVt_PVHV :
a0d0e21e 1321 SVt_PVGV
90e5519e 1322 );
748a9306 1323 }
79072805
LW
1324 }
1325}
1326
1571675a
GS
1327NV
1328Perl_str_to_version(pTHX_ SV *sv)
1329{
1330 NV retval = 0.0;
1331 NV nshift = 1.0;
1332 STRLEN len;
cfd0369c 1333 const char *start = SvPV_const(sv,len);
9d4ba2ae 1334 const char * const end = start + len;
504618e9 1335 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1336 while (start < end) {
ba210ebe 1337 STRLEN skip;
1571675a
GS
1338 UV n;
1339 if (utf)
9041c2e3 1340 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1341 else {
1342 n = *(U8*)start;
1343 skip = 1;
1344 }
1345 retval += ((NV)n)/nshift;
1346 start += skip;
1347 nshift *= 1000;
1348 }
1349 return retval;
1350}
1351
4e553d73 1352/*
ffb4593c
NT
1353 * S_force_version
1354 * Forces the next token to be a version number.
e759cc13
RGS
1355 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1356 * and if "guessing" is TRUE, then no new token is created (and the caller
1357 * must use an alternative parsing method).
ffb4593c
NT
1358 */
1359
76e3520e 1360STATIC char *
e759cc13 1361S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1362{
97aff369 1363 dVAR;
5f66b61c 1364 OP *version = NULL;
44dcb63b 1365 char *d;
5db06880
NC
1366#ifdef PERL_MAD
1367 I32 startoff = s - SvPVX(PL_linestr);
1368#endif
89bfa8cd 1369
29595ff2 1370 s = SKIPSPACE1(s);
89bfa8cd 1371
44dcb63b 1372 d = s;
dd629d5b 1373 if (*d == 'v')
44dcb63b 1374 d++;
44dcb63b 1375 if (isDIGIT(*d)) {
e759cc13
RGS
1376 while (isDIGIT(*d) || *d == '_' || *d == '.')
1377 d++;
5db06880
NC
1378#ifdef PERL_MAD
1379 if (PL_madskills) {
cd81e915 1380 start_force(PL_curforce);
5db06880
NC
1381 curmad('X', newSVpvn(s,d-s));
1382 }
1383#endif
9f3d182e 1384 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1385 SV *ver;
b73d6f50 1386 s = scan_num(s, &yylval);
89bfa8cd 1387 version = yylval.opval;
dd629d5b
GS
1388 ver = cSVOPx(version)->op_sv;
1389 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1390 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1391 SvNV_set(ver, str_to_version(ver));
1571675a 1392 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1393 }
89bfa8cd 1394 }
5db06880
NC
1395 else if (guessing) {
1396#ifdef PERL_MAD
1397 if (PL_madskills) {
cd81e915
NC
1398 sv_free(PL_nextwhite); /* let next token collect whitespace */
1399 PL_nextwhite = 0;
5db06880
NC
1400 s = SvPVX(PL_linestr) + startoff;
1401 }
1402#endif
e759cc13 1403 return s;
5db06880 1404 }
89bfa8cd 1405 }
1406
5db06880
NC
1407#ifdef PERL_MAD
1408 if (PL_madskills && !version) {
cd81e915
NC
1409 sv_free(PL_nextwhite); /* let next token collect whitespace */
1410 PL_nextwhite = 0;
5db06880
NC
1411 s = SvPVX(PL_linestr) + startoff;
1412 }
1413#endif
89bfa8cd 1414 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1415 start_force(PL_curforce);
9ded7720 1416 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1417 force_next(WORD);
89bfa8cd 1418
e759cc13 1419 return s;
89bfa8cd 1420}
1421
ffb4593c
NT
1422/*
1423 * S_tokeq
1424 * Tokenize a quoted string passed in as an SV. It finds the next
1425 * chunk, up to end of string or a backslash. It may make a new
1426 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1427 * turns \\ into \.
1428 */
1429
76e3520e 1430STATIC SV *
cea2e8a9 1431S_tokeq(pTHX_ SV *sv)
79072805 1432{
97aff369 1433 dVAR;
79072805
LW
1434 register char *s;
1435 register char *send;
1436 register char *d;
b3ac6de7
IZ
1437 STRLEN len = 0;
1438 SV *pv = sv;
79072805
LW
1439
1440 if (!SvLEN(sv))
b3ac6de7 1441 goto finish;
79072805 1442
a0d0e21e 1443 s = SvPV_force(sv, len);
21a311ee 1444 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1445 goto finish;
463ee0b2 1446 send = s + len;
79072805
LW
1447 while (s < send && *s != '\\')
1448 s++;
1449 if (s == send)
b3ac6de7 1450 goto finish;
79072805 1451 d = s;
be4731d2 1452 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1453 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1454 if (SvUTF8(sv))
1455 SvUTF8_on(pv);
1456 }
79072805
LW
1457 while (s < send) {
1458 if (*s == '\\') {
a0d0e21e 1459 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1460 s++; /* all that, just for this */
1461 }
1462 *d++ = *s++;
1463 }
1464 *d = '\0';
95a20fc0 1465 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1466 finish:
3280af22 1467 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1468 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1469 return sv;
1470}
1471
ffb4593c
NT
1472/*
1473 * Now come three functions related to double-quote context,
1474 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1475 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1476 * interact with PL_lex_state, and create fake ( ... ) argument lists
1477 * to handle functions and concatenation.
1478 * They assume that whoever calls them will be setting up a fake
1479 * join call, because each subthing puts a ',' after it. This lets
1480 * "lower \luPpEr"
1481 * become
1482 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1483 *
1484 * (I'm not sure whether the spurious commas at the end of lcfirst's
1485 * arguments and join's arguments are created or not).
1486 */
1487
1488/*
1489 * S_sublex_start
1490 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1491 *
1492 * Pattern matching will set PL_lex_op to the pattern-matching op to
1493 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1494 *
1495 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1496 *
1497 * Everything else becomes a FUNC.
1498 *
1499 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1500 * had an OP_CONST or OP_READLINE). This just sets us up for a
1501 * call to S_sublex_push().
1502 */
1503
76e3520e 1504STATIC I32
cea2e8a9 1505S_sublex_start(pTHX)
79072805 1506{
97aff369 1507 dVAR;
0d46e09a 1508 register const I32 op_type = yylval.ival;
79072805
LW
1509
1510 if (op_type == OP_NULL) {
3280af22 1511 yylval.opval = PL_lex_op;
5f66b61c 1512 PL_lex_op = NULL;
79072805
LW
1513 return THING;
1514 }
1515 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1516 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1517
1518 if (SvTYPE(sv) == SVt_PVIV) {
1519 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1520 STRLEN len;
96a5add6 1521 const char * const p = SvPV_const(sv, len);
f54cb97a 1522 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1523 if (SvUTF8(sv))
1524 SvUTF8_on(nsv);
b3ac6de7
IZ
1525 SvREFCNT_dec(sv);
1526 sv = nsv;
4e553d73 1527 }
b3ac6de7 1528 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1529 PL_lex_stuff = NULL;
6f33ba73
RGS
1530 /* Allow <FH> // "foo" */
1531 if (op_type == OP_READLINE)
1532 PL_expect = XTERMORDORDOR;
79072805
LW
1533 return THING;
1534 }
1535
3280af22
NIS
1536 PL_sublex_info.super_state = PL_lex_state;
1537 PL_sublex_info.sub_inwhat = op_type;
1538 PL_sublex_info.sub_op = PL_lex_op;
1539 PL_lex_state = LEX_INTERPPUSH;
55497cff 1540
3280af22
NIS
1541 PL_expect = XTERM;
1542 if (PL_lex_op) {
1543 yylval.opval = PL_lex_op;
5f66b61c 1544 PL_lex_op = NULL;
55497cff 1545 return PMFUNC;
1546 }
1547 else
1548 return FUNC;
1549}
1550
ffb4593c
NT
1551/*
1552 * S_sublex_push
1553 * Create a new scope to save the lexing state. The scope will be
1554 * ended in S_sublex_done. Returns a '(', starting the function arguments
1555 * to the uc, lc, etc. found before.
1556 * Sets PL_lex_state to LEX_INTERPCONCAT.
1557 */
1558
76e3520e 1559STATIC I32
cea2e8a9 1560S_sublex_push(pTHX)
55497cff 1561{
27da23d5 1562 dVAR;
f46d017c 1563 ENTER;
55497cff 1564
3280af22
NIS
1565 PL_lex_state = PL_sublex_info.super_state;
1566 SAVEI32(PL_lex_dojoin);
1567 SAVEI32(PL_lex_brackets);
3280af22
NIS
1568 SAVEI32(PL_lex_casemods);
1569 SAVEI32(PL_lex_starts);
1570 SAVEI32(PL_lex_state);
7766f137 1571 SAVEVPTR(PL_lex_inpat);
3280af22 1572 SAVEI32(PL_lex_inwhat);
57843af0 1573 SAVECOPLINE(PL_curcop);
3280af22 1574 SAVEPPTR(PL_bufptr);
8452ff4b 1575 SAVEPPTR(PL_bufend);
3280af22
NIS
1576 SAVEPPTR(PL_oldbufptr);
1577 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1578 SAVEPPTR(PL_last_lop);
1579 SAVEPPTR(PL_last_uni);
3280af22
NIS
1580 SAVEPPTR(PL_linestart);
1581 SAVESPTR(PL_linestr);
8edd5f42
RGS
1582 SAVEGENERICPV(PL_lex_brackstack);
1583 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1584
1585 PL_linestr = PL_lex_stuff;
a0714e2c 1586 PL_lex_stuff = NULL;
3280af22 1587
9cbb5ea2
GS
1588 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1589 = SvPVX(PL_linestr);
3280af22 1590 PL_bufend += SvCUR(PL_linestr);
bd61b366 1591 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1592 SAVEFREESV(PL_linestr);
1593
1594 PL_lex_dojoin = FALSE;
1595 PL_lex_brackets = 0;
a02a5408
JC
1596 Newx(PL_lex_brackstack, 120, char);
1597 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1598 PL_lex_casemods = 0;
1599 *PL_lex_casestack = '\0';
1600 PL_lex_starts = 0;
1601 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1602 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1603
1604 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1605 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1606 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1607 else
5f66b61c 1608 PL_lex_inpat = NULL;
79072805 1609
55497cff 1610 return '(';
79072805
LW
1611}
1612
ffb4593c
NT
1613/*
1614 * S_sublex_done
1615 * Restores lexer state after a S_sublex_push.
1616 */
1617
76e3520e 1618STATIC I32
cea2e8a9 1619S_sublex_done(pTHX)
79072805 1620{
27da23d5 1621 dVAR;
3280af22 1622 if (!PL_lex_starts++) {
396482e1 1623 SV * const sv = newSVpvs("");
9aa983d2
JH
1624 if (SvUTF8(PL_linestr))
1625 SvUTF8_on(sv);
3280af22 1626 PL_expect = XOPERATOR;
9aa983d2 1627 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1628 return THING;
1629 }
1630
3280af22
NIS
1631 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1632 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1633 return yylex();
79072805
LW
1634 }
1635
ffb4593c 1636 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1637 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1638 PL_linestr = PL_lex_repl;
1639 PL_lex_inpat = 0;
1640 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1641 PL_bufend += SvCUR(PL_linestr);
bd61b366 1642 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1643 SAVEFREESV(PL_linestr);
1644 PL_lex_dojoin = FALSE;
1645 PL_lex_brackets = 0;
3280af22
NIS
1646 PL_lex_casemods = 0;
1647 *PL_lex_casestack = '\0';
1648 PL_lex_starts = 0;
25da4f38 1649 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1650 PL_lex_state = LEX_INTERPNORMAL;
1651 PL_lex_starts++;
e9fa98b2
HS
1652 /* we don't clear PL_lex_repl here, so that we can check later
1653 whether this is an evalled subst; that means we rely on the
1654 logic to ensure sublex_done() is called again only via the
1655 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1656 }
e9fa98b2 1657 else {
3280af22 1658 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1659 PL_lex_repl = NULL;
e9fa98b2 1660 }
79072805 1661 return ',';
ffed7fef
LW
1662 }
1663 else {
5db06880
NC
1664#ifdef PERL_MAD
1665 if (PL_madskills) {
cd81e915
NC
1666 if (PL_thiswhite) {
1667 if (!PL_endwhite)
1668 PL_endwhite = newSVpvn("",0);
1669 sv_catsv(PL_endwhite, PL_thiswhite);
1670 PL_thiswhite = 0;
1671 }
1672 if (PL_thistoken)
1673 sv_setpvn(PL_thistoken,"",0);
5db06880 1674 else
cd81e915 1675 PL_realtokenstart = -1;
5db06880
NC
1676 }
1677#endif
f46d017c 1678 LEAVE;
3280af22
NIS
1679 PL_bufend = SvPVX(PL_linestr);
1680 PL_bufend += SvCUR(PL_linestr);
1681 PL_expect = XOPERATOR;
09bef843 1682 PL_sublex_info.sub_inwhat = 0;
79072805 1683 return ')';
ffed7fef
LW
1684 }
1685}
1686
02aa26ce
NT
1687/*
1688 scan_const
1689
1690 Extracts a pattern, double-quoted string, or transliteration. This
1691 is terrifying code.
1692
3280af22
NIS
1693 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1694 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1695 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1696
9b599b2a
GS
1697 Returns a pointer to the character scanned up to. Iff this is
1698 advanced from the start pointer supplied (ie if anything was
1699 successfully parsed), will leave an OP for the substring scanned
1700 in yylval. Caller must intuit reason for not parsing further
1701 by looking at the next characters herself.
1702
02aa26ce
NT
1703 In patterns:
1704 backslashes:
1705 double-quoted style: \r and \n
1706 regexp special ones: \D \s
1707 constants: \x3
1708 backrefs: \1 (deprecated in substitution replacements)
1709 case and quoting: \U \Q \E
1710 stops on @ and $, but not for $ as tail anchor
1711
1712 In transliterations:
1713 characters are VERY literal, except for - not at the start or end
1714 of the string, which indicates a range. scan_const expands the
1715 range to the full set of intermediate characters.
1716
1717 In double-quoted strings:
1718 backslashes:
1719 double-quoted style: \r and \n
1720 constants: \x3
1721 backrefs: \1 (deprecated)
1722 case and quoting: \U \Q \E
1723 stops on @ and $
1724
1725 scan_const does *not* construct ops to handle interpolated strings.
1726 It stops processing as soon as it finds an embedded $ or @ variable
1727 and leaves it to the caller to work out what's going on.
1728
da6eedaa 1729 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1730
1731 $ in pattern could be $foo or could be tail anchor. Assumption:
1732 it's a tail anchor if $ is the last thing in the string, or if it's
1733 followed by one of ")| \n\t"
1734
1735 \1 (backreferences) are turned into $1
1736
1737 The structure of the code is
1738 while (there's a character to process) {
1739 handle transliteration ranges
1740 skip regexp comments
1741 skip # initiated comments in //x patterns
1742 check for embedded @foo
1743 check for embedded scalars
1744 if (backslash) {
1745 leave intact backslashes from leave (below)
1746 deprecate \1 in strings and sub replacements
1747 handle string-changing backslashes \l \U \Q \E, etc.
1748 switch (what was escaped) {
1749 handle - in a transliteration (becomes a literal -)
1750 handle \132 octal characters
1751 handle 0x15 hex characters
1752 handle \cV (control V)
1753 handle printf backslashes (\f, \r, \n, etc)
1754 } (end switch)
1755 } (end if backslash)
1756 } (end while character to read)
4e553d73 1757
02aa26ce
NT
1758*/
1759
76e3520e 1760STATIC char *
cea2e8a9 1761S_scan_const(pTHX_ char *start)
79072805 1762{
97aff369 1763 dVAR;
3280af22 1764 register char *send = PL_bufend; /* end of the constant */
561b68a9 1765 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1766 register char *s = start; /* start of the constant */
1767 register char *d = SvPVX(sv); /* destination for copies */
1768 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1769 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1770 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1771 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1772 UV uv;
4c3a8340
TS
1773#ifdef EBCDIC
1774 UV literal_endpoint = 0;
1775#endif
012bcf8d 1776
dff6d3cd 1777 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1778 PL_lex_inpat
b6d5fef8 1779 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1780 : "";
79072805 1781
2b9d42f0
NIS
1782 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1783 /* If we are doing a trans and we know we want UTF8 set expectation */
1784 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1785 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1786 }
1787
1788
79072805 1789 while (s < send || dorange) {
02aa26ce 1790 /* get transliterations out of the way (they're most literal) */
3280af22 1791 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1792 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1793 if (dorange) {
1ba5c669
JH
1794 I32 i; /* current expanded character */
1795 I32 min; /* first character in range */
1796 I32 max; /* last character in range */
02aa26ce 1797
2b9d42f0 1798 if (has_utf8) {
9d4ba2ae 1799 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1800 char *e = d++;
1801 while (e-- > c)
1802 *(e + 1) = *e;
25716404 1803 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1804 /* mark the range as done, and continue */
1805 dorange = FALSE;
1806 didrange = TRUE;
1807 continue;
1808 }
2b9d42f0 1809
95a20fc0 1810 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1811 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1812 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1813 d -= 2; /* eat the first char and the - */
1814
8ada0baa
JH
1815 min = (U8)*d; /* first char in range */
1816 max = (U8)d[1]; /* last char in range */
1817
c2e66d9e 1818 if (min > max) {
01ec43d0 1819 Perl_croak(aTHX_
d1573ac7 1820 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1821 (char)min, (char)max);
c2e66d9e
GS
1822 }
1823
c7f1f016 1824#ifdef EBCDIC
4c3a8340
TS
1825 if (literal_endpoint == 2 &&
1826 ((isLOWER(min) && isLOWER(max)) ||
1827 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1828 if (isLOWER(min)) {
1829 for (i = min; i <= max; i++)
1830 if (isLOWER(i))
db42d148 1831 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1832 } else {
1833 for (i = min; i <= max; i++)
1834 if (isUPPER(i))
db42d148 1835 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1836 }
1837 }
1838 else
1839#endif
1840 for (i = min; i <= max; i++)
eb160463 1841 *d++ = (char)i;
02aa26ce
NT
1842
1843 /* mark the range as done, and continue */
79072805 1844 dorange = FALSE;
01ec43d0 1845 didrange = TRUE;
4c3a8340
TS
1846#ifdef EBCDIC
1847 literal_endpoint = 0;
1848#endif
79072805 1849 continue;
4e553d73 1850 }
02aa26ce
NT
1851
1852 /* range begins (ignore - as first or last char) */
79072805 1853 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1854 if (didrange) {
1fafa243 1855 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1856 }
2b9d42f0 1857 if (has_utf8) {
25716404 1858 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1859 s++;
1860 continue;
1861 }
79072805
LW
1862 dorange = TRUE;
1863 s++;
01ec43d0
GS
1864 }
1865 else {
1866 didrange = FALSE;
4c3a8340
TS
1867#ifdef EBCDIC
1868 literal_endpoint = 0;
1869#endif
01ec43d0 1870 }
79072805 1871 }
02aa26ce
NT
1872
1873 /* if we get here, we're not doing a transliteration */
1874
0f5d15d6
IZ
1875 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1876 except for the last char, which will be done separately. */
3280af22 1877 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1878 if (s[2] == '#') {
e994fd66 1879 while (s+1 < send && *s != ')')
db42d148 1880 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1881 }
1882 else if (s[2] == '{' /* This should match regcomp.c */
1883 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1884 {
cc6b7395 1885 I32 count = 1;
0f5d15d6 1886 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1887 char c;
1888
d9f97599
GS
1889 while (count && (c = *regparse)) {
1890 if (c == '\\' && regparse[1])
1891 regparse++;
4e553d73 1892 else if (c == '{')
cc6b7395 1893 count++;
4e553d73 1894 else if (c == '}')
cc6b7395 1895 count--;
d9f97599 1896 regparse++;
cc6b7395 1897 }
e994fd66 1898 if (*regparse != ')')
5bdf89e7 1899 regparse--; /* Leave one char for continuation. */
0f5d15d6 1900 while (s < regparse)
db42d148 1901 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1902 }
748a9306 1903 }
02aa26ce
NT
1904
1905 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1906 else if (*s == '#' && PL_lex_inpat &&
1907 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1908 while (s+1 < send && *s != '\n')
db42d148 1909 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1910 }
02aa26ce 1911
5d1d4326 1912 /* check for embedded arrays
da6eedaa 1913 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1914 */
7e2040f0 1915 else if (*s == '@' && s[1]
5d1d4326 1916 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1917 break;
02aa26ce
NT
1918
1919 /* check for embedded scalars. only stop if we're sure it's a
1920 variable.
1921 */
79072805 1922 else if (*s == '$') {
3280af22 1923 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1924 break;
6002328a 1925 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1926 break; /* in regexp, $ might be tail anchor */
1927 }
02aa26ce 1928
2b9d42f0
NIS
1929 /* End of else if chain - OP_TRANS rejoin rest */
1930
02aa26ce 1931 /* backslashes */
79072805
LW
1932 if (*s == '\\' && s+1 < send) {
1933 s++;
02aa26ce
NT
1934
1935 /* some backslashes we leave behind */
c9f97d15 1936 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1937 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1938 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1939 continue;
1940 }
02aa26ce
NT
1941
1942 /* deprecate \1 in strings and substitution replacements */
3280af22 1943 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1944 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1945 {
599cee73 1946 if (ckWARN(WARN_SYNTAX))
9014280d 1947 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1948 *--s = '$';
1949 break;
1950 }
02aa26ce
NT
1951
1952 /* string-change backslash escapes */
3280af22 1953 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1954 --s;
1955 break;
1956 }
02aa26ce
NT
1957
1958 /* if we get here, it's either a quoted -, or a digit */
79072805 1959 switch (*s) {
02aa26ce
NT
1960
1961 /* quoted - in transliterations */
79072805 1962 case '-':
3280af22 1963 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1964 *d++ = *s++;
1965 continue;
1966 }
1967 /* FALL THROUGH */
1968 default:
11b8faa4 1969 {
041457d9
DM
1970 if (isALNUM(*s) &&
1971 *s != '_' &&
1972 ckWARN(WARN_MISC))
9014280d 1973 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1974 "Unrecognized escape \\%c passed through",
1975 *s);
1976 /* default action is to copy the quoted character */
f9a63242 1977 goto default_action;
11b8faa4 1978 }
02aa26ce
NT
1979
1980 /* \132 indicates an octal constant */
79072805
LW
1981 case '0': case '1': case '2': case '3':
1982 case '4': case '5': case '6': case '7':
ba210ebe 1983 {
53305cf1
NC
1984 I32 flags = 0;
1985 STRLEN len = 3;
1986 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1987 s += len;
1988 }
012bcf8d 1989 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1990
1991 /* \x24 indicates a hex constant */
79072805 1992 case 'x':
a0ed51b3
LW
1993 ++s;
1994 if (*s == '{') {
9d4ba2ae 1995 char* const e = strchr(s, '}');
a4c04bdc
NC
1996 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1997 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1998 STRLEN len;
355860ce 1999
53305cf1 2000 ++s;
adaeee49 2001 if (!e) {
a0ed51b3 2002 yyerror("Missing right brace on \\x{}");
355860ce 2003 continue;
ba210ebe 2004 }
53305cf1
NC
2005 len = e - s;
2006 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2007 s = e + 1;
a0ed51b3
LW
2008 }
2009 else {
ba210ebe 2010 {
53305cf1 2011 STRLEN len = 2;
a4c04bdc 2012 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2013 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2014 s += len;
2015 }
012bcf8d
GS
2016 }
2017
2018 NUM_ESCAPE_INSERT:
2019 /* Insert oct or hex escaped character.
301d3d20 2020 * There will always enough room in sv since such
db42d148 2021 * escapes will be longer than any UTF-8 sequence
301d3d20 2022 * they can end up as. */
ba7cea30 2023
c7f1f016
NIS
2024 /* We need to map to chars to ASCII before doing the tests
2025 to cover EBCDIC
2026 */
c4d5f83a 2027 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2028 if (!has_utf8 && uv > 255) {
301d3d20
JH
2029 /* Might need to recode whatever we have
2030 * accumulated so far if it contains any
2031 * hibit chars.
2032 *
2033 * (Can't we keep track of that and avoid
2034 * this rescan? --jhi)
012bcf8d 2035 */
c7f1f016 2036 int hicount = 0;
63cd0674
NIS
2037 U8 *c;
2038 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2039 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2040 hicount++;
db42d148 2041 }
012bcf8d 2042 }
63cd0674 2043 if (hicount) {
9d4ba2ae 2044 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2045 U8 *src, *dst;
2046 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2047 src = (U8 *)d - 1;
2048 dst = src+hicount;
2049 d += hicount;
cfd0369c 2050 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2051 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2052 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2053 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2054 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2055 }
2056 else {
63cd0674 2057 *dst-- = *src;
012bcf8d 2058 }
c7f1f016 2059 src--;
012bcf8d
GS
2060 }
2061 }
2062 }
2063
9aa983d2 2064 if (has_utf8 || uv > 255) {
9041c2e3 2065 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2066 has_utf8 = TRUE;
f9a63242
JH
2067 if (PL_lex_inwhat == OP_TRANS &&
2068 PL_sublex_info.sub_op) {
2069 PL_sublex_info.sub_op->op_private |=
2070 (PL_lex_repl ? OPpTRANS_FROM_UTF
2071 : OPpTRANS_TO_UTF);
f9a63242 2072 }
012bcf8d 2073 }
a0ed51b3 2074 else {
012bcf8d 2075 *d++ = (char)uv;
a0ed51b3 2076 }
012bcf8d
GS
2077 }
2078 else {
c4d5f83a 2079 *d++ = (char) uv;
a0ed51b3 2080 }
79072805 2081 continue;
02aa26ce 2082
b239daa5 2083 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2084 case 'N':
55eda711 2085 ++s;
423cee85
JH
2086 if (*s == '{') {
2087 char* e = strchr(s, '}');
155aba94 2088 SV *res;
423cee85 2089 STRLEN len;
cfd0369c 2090 const char *str;
4e553d73 2091
423cee85 2092 if (!e) {
5777a3f7 2093 yyerror("Missing right brace on \\N{}");
423cee85
JH
2094 e = s - 1;
2095 goto cont_scan;
2096 }
dbc0d4f2
JH
2097 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2098 /* \N{U+...} */
2099 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2100 PERL_SCAN_DISALLOW_PREFIX;
2101 s += 3;
2102 len = e - s;
2103 uv = grok_hex(s, &len, &flags, NULL);
2104 s = e + 1;
2105 goto NUM_ESCAPE_INSERT;
2106 }
55eda711 2107 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2108 res = new_constant( NULL, 0, "charnames",
a0714e2c 2109 res, NULL, "\\N{...}" );
f9a63242
JH
2110 if (has_utf8)
2111 sv_utf8_upgrade(res);
cfd0369c 2112 str = SvPV_const(res,len);
1c47067b
JH
2113#ifdef EBCDIC_NEVER_MIND
2114 /* charnames uses pack U and that has been
2115 * recently changed to do the below uni->native
2116 * mapping, so this would be redundant (and wrong,
2117 * the code point would be doubly converted).
2118 * But leave this in just in case the pack U change
2119 * gets revoked, but the semantics is still
2120 * desireable for charnames. --jhi */
cddc7ef4 2121 {
cfd0369c 2122 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2123
2124 if (uv < 0x100) {
89ebb4a3 2125 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2126
2127 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2128 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2129 str = SvPV_const(res, len);
cddc7ef4
JH
2130 }
2131 }
2132#endif
89491803 2133 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2134 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2135 SvCUR_set(sv, d - ostart);
2136 SvPOK_on(sv);
e4f3eed8 2137 *d = '\0';
f08d6ad9 2138 sv_utf8_upgrade(sv);
d2f449dd 2139 /* this just broke our allocation above... */
eb160463 2140 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2141 d = SvPVX(sv) + SvCUR(sv);
89491803 2142 has_utf8 = TRUE;
f08d6ad9 2143 }
eb160463 2144 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2145 const char * const odest = SvPVX_const(sv);
423cee85 2146
8973db79 2147 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2148 d = SvPVX(sv) + (d - odest);
2149 }
2150 Copy(str, d, len, char);
2151 d += len;
2152 SvREFCNT_dec(res);
2153 cont_scan:
2154 s = e + 1;
2155 }
2156 else
5777a3f7 2157 yyerror("Missing braces on \\N{}");
423cee85
JH
2158 continue;
2159
02aa26ce 2160 /* \c is a control character */
79072805
LW
2161 case 'c':
2162 s++;
961ce445 2163 if (s < send) {
ba210ebe 2164 U8 c = *s++;
c7f1f016
NIS
2165#ifdef EBCDIC
2166 if (isLOWER(c))
2167 c = toUPPER(c);
2168#endif
db42d148 2169 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2170 }
961ce445
RGS
2171 else {
2172 yyerror("Missing control char name in \\c");
2173 }
79072805 2174 continue;
02aa26ce
NT
2175
2176 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2177 case 'b':
db42d148 2178 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2179 break;
2180 case 'n':
db42d148 2181 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2182 break;
2183 case 'r':
db42d148 2184 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2185 break;
2186 case 'f':
db42d148 2187 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2188 break;
2189 case 't':
db42d148 2190 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2191 break;
34a3fe2a 2192 case 'e':
db42d148 2193 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2194 break;
2195 case 'a':
db42d148 2196 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2197 break;
02aa26ce
NT
2198 } /* end switch */
2199
79072805
LW
2200 s++;
2201 continue;
02aa26ce 2202 } /* end if (backslash) */
4c3a8340
TS
2203#ifdef EBCDIC
2204 else
2205 literal_endpoint++;
2206#endif
02aa26ce 2207
f9a63242 2208 default_action:
2b9d42f0
NIS
2209 /* If we started with encoded form, or already know we want it
2210 and then encode the next character */
2211 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2212 STRLEN len = 1;
5f66b61c
AL
2213 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2214 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2215 s += len;
2216 if (need > len) {
2217 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2218 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2219 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2220 }
5f66b61c 2221 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2222 has_utf8 = TRUE;
2223 }
2224 else {
2225 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2226 }
02aa26ce
NT
2227 } /* while loop to process each character */
2228
2229 /* terminate the string and set up the sv */
79072805 2230 *d = '\0';
95a20fc0 2231 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2232 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2233 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2234
79072805 2235 SvPOK_on(sv);
9f4817db 2236 if (PL_encoding && !has_utf8) {
d0063567
DK
2237 sv_recode_to_utf8(sv, PL_encoding);
2238 if (SvUTF8(sv))
2239 has_utf8 = TRUE;
9f4817db 2240 }
2b9d42f0 2241 if (has_utf8) {
7e2040f0 2242 SvUTF8_on(sv);
2b9d42f0 2243 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2244 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2245 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2246 }
2247 }
79072805 2248
02aa26ce 2249 /* shrink the sv if we allocated more than we used */
79072805 2250 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2251 SvPV_shrink_to_cur(sv);
79072805 2252 }
02aa26ce 2253
9b599b2a 2254 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2255 if (s > PL_bufptr) {
2256 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2257 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2258 sv, NULL,
4e553d73 2259 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2260 ? "tr"
3280af22 2261 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2262 ? "s"
2263 : "qq")));
79072805 2264 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2265 } else
8990e307 2266 SvREFCNT_dec(sv);
79072805
LW
2267 return s;
2268}
2269
ffb4593c
NT
2270/* S_intuit_more
2271 * Returns TRUE if there's more to the expression (e.g., a subscript),
2272 * FALSE otherwise.
ffb4593c
NT
2273 *
2274 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2275 *
2276 * ->[ and ->{ return TRUE
2277 * { and [ outside a pattern are always subscripts, so return TRUE
2278 * if we're outside a pattern and it's not { or [, then return FALSE
2279 * if we're in a pattern and the first char is a {
2280 * {4,5} (any digits around the comma) returns FALSE
2281 * if we're in a pattern and the first char is a [
2282 * [] returns FALSE
2283 * [SOMETHING] has a funky algorithm to decide whether it's a
2284 * character class or not. It has to deal with things like
2285 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2286 * anything else returns TRUE
2287 */
2288
9cbb5ea2
GS
2289/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2290
76e3520e 2291STATIC int
cea2e8a9 2292S_intuit_more(pTHX_ register char *s)
79072805 2293{
97aff369 2294 dVAR;
3280af22 2295 if (PL_lex_brackets)
79072805
LW
2296 return TRUE;
2297 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2298 return TRUE;
2299 if (*s != '{' && *s != '[')
2300 return FALSE;
3280af22 2301 if (!PL_lex_inpat)
79072805
LW
2302 return TRUE;
2303
2304 /* In a pattern, so maybe we have {n,m}. */
2305 if (*s == '{') {
2306 s++;
2307 if (!isDIGIT(*s))
2308 return TRUE;
2309 while (isDIGIT(*s))
2310 s++;
2311 if (*s == ',')
2312 s++;
2313 while (isDIGIT(*s))
2314 s++;
2315 if (*s == '}')
2316 return FALSE;
2317 return TRUE;
2318
2319 }
2320
2321 /* On the other hand, maybe we have a character class */
2322
2323 s++;
2324 if (*s == ']' || *s == '^')
2325 return FALSE;
2326 else {
ffb4593c 2327 /* this is terrifying, and it works */
79072805
LW
2328 int weight = 2; /* let's weigh the evidence */
2329 char seen[256];
f27ffc4a 2330 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2331 const char * const send = strchr(s,']');
3280af22 2332 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2333
2334 if (!send) /* has to be an expression */
2335 return TRUE;
2336
2337 Zero(seen,256,char);
2338 if (*s == '$')
2339 weight -= 3;
2340 else if (isDIGIT(*s)) {
2341 if (s[1] != ']') {
2342 if (isDIGIT(s[1]) && s[2] == ']')
2343 weight -= 10;
2344 }
2345 else
2346 weight -= 100;
2347 }
2348 for (; s < send; s++) {
2349 last_un_char = un_char;
2350 un_char = (unsigned char)*s;
2351 switch (*s) {
2352 case '@':
2353 case '&':
2354 case '$':
2355 weight -= seen[un_char] * 10;
7e2040f0 2356 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2357 int len;
8903cb82 2358 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2359 len = (int)strlen(tmpbuf);
2360 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2361 weight -= 100;
2362 else
2363 weight -= 10;
2364 }
2365 else if (*s == '$' && s[1] &&
93a17b20
LW
2366 strchr("[#!%*<>()-=",s[1])) {
2367 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2368 weight -= 10;
2369 else
2370 weight -= 1;
2371 }
2372 break;
2373 case '\\':
2374 un_char = 254;
2375 if (s[1]) {
93a17b20 2376 if (strchr("wds]",s[1]))
79072805
LW
2377 weight += 100;
2378 else if (seen['\''] || seen['"'])
2379 weight += 1;
93a17b20 2380 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2381 weight += 40;
2382 else if (isDIGIT(s[1])) {
2383 weight += 40;
2384 while (s[1] && isDIGIT(s[1]))
2385 s++;
2386 }
2387 }
2388 else
2389 weight += 100;
2390 break;
2391 case '-':
2392 if (s[1] == '\\')
2393 weight += 50;
93a17b20 2394 if (strchr("aA01! ",last_un_char))
79072805 2395 weight += 30;
93a17b20 2396 if (strchr("zZ79~",s[1]))
79072805 2397 weight += 30;
f27ffc4a
GS
2398 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2399 weight -= 5; /* cope with negative subscript */
79072805
LW
2400 break;
2401 default:
3792a11b
NC
2402 if (!isALNUM(last_un_char)
2403 && !(last_un_char == '$' || last_un_char == '@'
2404 || last_un_char == '&')
2405 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2406 char *d = tmpbuf;
2407 while (isALPHA(*s))
2408 *d++ = *s++;
2409 *d = '\0';
2410 if (keyword(tmpbuf, d - tmpbuf))
2411 weight -= 150;
2412 }
2413 if (un_char == last_un_char + 1)
2414 weight += 5;
2415 weight -= seen[un_char];
2416 break;
2417 }
2418 seen[un_char]++;
2419 }
2420 if (weight >= 0) /* probably a character class */
2421 return FALSE;
2422 }
2423
2424 return TRUE;
2425}
ffed7fef 2426
ffb4593c
NT
2427/*
2428 * S_intuit_method
2429 *
2430 * Does all the checking to disambiguate
2431 * foo bar
2432 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2433 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2434 *
2435 * First argument is the stuff after the first token, e.g. "bar".
2436 *
2437 * Not a method if bar is a filehandle.
2438 * Not a method if foo is a subroutine prototyped to take a filehandle.
2439 * Not a method if it's really "Foo $bar"
2440 * Method if it's "foo $bar"
2441 * Not a method if it's really "print foo $bar"
2442 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2443 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2444 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2445 * =>
2446 */
2447
76e3520e 2448STATIC int
62d55b22 2449S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2450{
97aff369 2451 dVAR;
a0d0e21e 2452 char *s = start + (*start == '$');
3280af22 2453 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2454 STRLEN len;
2455 GV* indirgv;
5db06880
NC
2456#ifdef PERL_MAD
2457 int soff;
2458#endif
a0d0e21e
LW
2459
2460 if (gv) {
62d55b22 2461 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2462 return 0;
62d55b22
NC
2463 if (cv) {
2464 if (SvPOK(cv)) {
2465 const char *proto = SvPVX_const(cv);
2466 if (proto) {
2467 if (*proto == ';')
2468 proto++;
2469 if (*proto == '*')
2470 return 0;
2471 }
b6c543e3
IZ
2472 }
2473 } else
a0d0e21e
LW
2474 gv = 0;
2475 }
8903cb82 2476 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2477 /* start is the beginning of the possible filehandle/object,
2478 * and s is the end of it
2479 * tmpbuf is a copy of it
2480 */
2481
a0d0e21e 2482 if (*start == '$') {
3280af22 2483 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2484 return 0;
5db06880
NC
2485#ifdef PERL_MAD
2486 len = start - SvPVX(PL_linestr);
2487#endif
29595ff2 2488 s = PEEKSPACE(s);
5db06880
NC
2489#ifdef PERLMAD
2490 start = SvPVX(PL_linestr) + len;
2491#endif
3280af22
NIS
2492 PL_bufptr = start;
2493 PL_expect = XREF;
a0d0e21e
LW
2494 return *s == '(' ? FUNCMETH : METHOD;
2495 }
2496 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2497 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2498 len -= 2;
2499 tmpbuf[len] = '\0';
5db06880
NC
2500#ifdef PERL_MAD
2501 soff = s - SvPVX(PL_linestr);
2502#endif
c3e0f903
GS
2503 goto bare_package;
2504 }
90e5519e 2505 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2506 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2507 return 0;
2508 /* filehandle or package name makes it a method */
89bfa8cd 2509 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2510#ifdef PERL_MAD
2511 soff = s - SvPVX(PL_linestr);
2512#endif
29595ff2 2513 s = PEEKSPACE(s);
3280af22 2514 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2515 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2516 bare_package:
cd81e915 2517 start_force(PL_curforce);
9ded7720 2518 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2519 newSVpvn(tmpbuf,len));
9ded7720 2520 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2521 if (PL_madskills)
2522 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2523 PL_expect = XTERM;
a0d0e21e 2524 force_next(WORD);
3280af22 2525 PL_bufptr = s;
5db06880
NC
2526#ifdef PERL_MAD
2527 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2528#endif
a0d0e21e
LW
2529 return *s == '(' ? FUNCMETH : METHOD;
2530 }
2531 }
2532 return 0;
2533}
2534
ffb4593c
NT
2535/*
2536 * S_incl_perldb
2537 * Return a string of Perl code to load the debugger. If PERL5DB
2538 * is set, it will return the contents of that, otherwise a
2539 * compile-time require of perl5db.pl.
2540 */
2541
bfed75c6 2542STATIC const char*
cea2e8a9 2543S_incl_perldb(pTHX)
a0d0e21e 2544{
97aff369 2545 dVAR;
3280af22 2546 if (PL_perldb) {
9d4ba2ae 2547 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2548
2549 if (pdb)
2550 return pdb;
93189314 2551 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2552 return "BEGIN { require 'perl5db.pl' }";
2553 }
2554 return "";
2555}
2556
2557
16d20bd9 2558/* Encoded script support. filter_add() effectively inserts a
4e553d73 2559 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2560 * Note that the filter function only applies to the current source file
2561 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2562 *
2563 * The datasv parameter (which may be NULL) can be used to pass
2564 * private data to this instance of the filter. The filter function
2565 * can recover the SV using the FILTER_DATA macro and use it to
2566 * store private buffers and state information.
2567 *
2568 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2569 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2570 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2571 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2572 * private use must be set using malloc'd pointers.
2573 */
16d20bd9
AD
2574
2575SV *
864dbfa3 2576Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2577{
97aff369 2578 dVAR;
f4c556ac 2579 if (!funcp)
a0714e2c 2580 return NULL;
f4c556ac 2581
3280af22
NIS
2582 if (!PL_rsfp_filters)
2583 PL_rsfp_filters = newAV();
16d20bd9 2584 if (!datasv)
561b68a9 2585 datasv = newSV(0);
862a34c6 2586 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2587 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2588 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2589 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2590 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2591 av_unshift(PL_rsfp_filters, 1);
2592 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2593 return(datasv);
2594}
4e553d73 2595
16d20bd9
AD
2596
2597/* Delete most recently added instance of this filter function. */
a0d0e21e 2598void
864dbfa3 2599Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2600{
97aff369 2601 dVAR;
e0c19803 2602 SV *datasv;
24801a4b 2603
33073adb 2604#ifdef DEBUGGING
8141890a 2605 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2606#endif
3280af22 2607 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2608 return;
2609 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2610 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2611 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2612 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2613 IoANY(datasv) = (void *)NULL;
3280af22 2614 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2615
16d20bd9
AD
2616 return;
2617 }
2618 /* we need to search for the correct entry and clear it */
cea2e8a9 2619 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2620}
2621
2622
1de9afcd
RGS
2623/* Invoke the idxth filter function for the current rsfp. */
2624/* maxlen 0 = read one text line */
16d20bd9 2625I32
864dbfa3 2626Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2627{
97aff369 2628 dVAR;
16d20bd9
AD
2629 filter_t funcp;
2630 SV *datasv = NULL;
e50aee73 2631
3280af22 2632 if (!PL_rsfp_filters)
16d20bd9 2633 return -1;
1de9afcd 2634 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2635 /* Provide a default input filter to make life easy. */
2636 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2637 DEBUG_P(PerlIO_printf(Perl_debug_log,
2638 "filter_read %d: from rsfp\n", idx));
4e553d73 2639 if (maxlen) {
16d20bd9
AD
2640 /* Want a block */
2641 int len ;
f54cb97a 2642 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2643
2644 /* ensure buf_sv is large enough */
eb160463 2645 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2646 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2647 if (PerlIO_error(PL_rsfp))
37120919
AD
2648 return -1; /* error */
2649 else
2650 return 0 ; /* end of file */
2651 }
16d20bd9
AD
2652 SvCUR_set(buf_sv, old_len + len) ;
2653 } else {
2654 /* Want a line */
3280af22
NIS
2655 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2656 if (PerlIO_error(PL_rsfp))
37120919
AD
2657 return -1; /* error */
2658 else
2659 return 0 ; /* end of file */
2660 }
16d20bd9
AD
2661 }
2662 return SvCUR(buf_sv);
2663 }
2664 /* Skip this filter slot if filter has been deleted */
1de9afcd 2665 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2666 DEBUG_P(PerlIO_printf(Perl_debug_log,
2667 "filter_read %d: skipped (filter deleted)\n",
2668 idx));
16d20bd9
AD
2669 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2670 }
2671 /* Get function pointer hidden within datasv */
8141890a 2672 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2673 DEBUG_P(PerlIO_printf(Perl_debug_log,
2674 "filter_read %d: via function %p (%s)\n",
cfd0369c 2675 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2676 /* Call function. The function is expected to */
2677 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2678 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2679 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2680}
2681
76e3520e 2682STATIC char *
cea2e8a9 2683S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2684{
97aff369 2685 dVAR;
c39cd008 2686#ifdef PERL_CR_FILTER
3280af22 2687 if (!PL_rsfp_filters) {
c39cd008 2688 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2689 }
2690#endif
3280af22 2691 if (PL_rsfp_filters) {
55497cff 2692 if (!append)
2693 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2694 if (FILTER_READ(0, sv, 0) > 0)
2695 return ( SvPVX(sv) ) ;
2696 else
bd61b366 2697 return NULL ;
16d20bd9 2698 }
9d116dd7 2699 else
fd049845 2700 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2701}
2702
01ec43d0 2703STATIC HV *
7fc63493 2704S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2705{
97aff369 2706 dVAR;
def3634b
GS
2707 GV *gv;
2708
01ec43d0 2709 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2710 return PL_curstash;
2711
2712 if (len > 2 &&
2713 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2714 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2715 {
2716 return GvHV(gv); /* Foo:: */
def3634b
GS
2717 }
2718
2719 /* use constant CLASS => 'MyClass' */
90e5519e 2720 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2721 SV *sv;
2722 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2723 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2724 }
2725 }
2726
2727 return gv_stashpv(pkgname, FALSE);
2728}
a0d0e21e 2729
5db06880
NC
2730#ifdef PERL_MAD
2731 /*
2732 * Perl_madlex
2733 * The intent of this yylex wrapper is to minimize the changes to the
2734 * tokener when we aren't interested in collecting madprops. It remains
2735 * to be seen how successful this strategy will be...
2736 */
2737
2738int
2739Perl_madlex(pTHX)
2740{
2741 int optype;
2742 char *s = PL_bufptr;
2743
cd81e915
NC
2744 /* make sure PL_thiswhite is initialized */
2745 PL_thiswhite = 0;
2746 PL_thismad = 0;
5db06880 2747
cd81e915 2748 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2749 if (PL_pending_ident)
2750 return S_pending_ident(aTHX);
2751
2752 /* previous token ate up our whitespace? */
cd81e915
NC
2753 if (!PL_lasttoke && PL_nextwhite) {
2754 PL_thiswhite = PL_nextwhite;
2755 PL_nextwhite = 0;
5db06880
NC
2756 }
2757
2758 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2759 PL_realtokenstart = -1;
2760 PL_thistoken = 0;
5db06880
NC
2761 optype = yylex();
2762 s = PL_bufptr;
cd81e915 2763 assert(PL_curforce < 0);
5db06880 2764
cd81e915
NC
2765 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2766 if (!PL_thistoken) {
2767 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2768 PL_thistoken = newSVpvn("",0);
5db06880 2769 else {
cd81e915
NC
2770 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2771 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2772 }
2773 }
cd81e915
NC
2774 if (PL_thismad) /* install head */
2775 CURMAD('X', PL_thistoken);
5db06880
NC
2776 }
2777
2778 /* last whitespace of a sublex? */
cd81e915
NC
2779 if (optype == ')' && PL_endwhite) {
2780 CURMAD('X', PL_endwhite);
5db06880
NC
2781 }
2782
cd81e915 2783 if (!PL_thismad) {
5db06880
NC
2784
2785 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2786 if (!PL_thiswhite && !PL_endwhite && !optype) {
2787 sv_free(PL_thistoken);
2788 PL_thistoken = 0;
5db06880
NC
2789 return 0;
2790 }
2791
2792 /* put off final whitespace till peg */
2793 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2794 PL_nextwhite = PL_thiswhite;
2795 PL_thiswhite = 0;
5db06880 2796 }
cd81e915
NC
2797 else if (PL_thisopen) {
2798 CURMAD('q', PL_thisopen);
2799 if (PL_thistoken)
2800 sv_free(PL_thistoken);
2801 PL_thistoken = 0;
5db06880
NC
2802 }
2803 else {
2804 /* Store actual token text as madprop X */
cd81e915 2805 CURMAD('X', PL_thistoken);
5db06880
NC
2806 }
2807
cd81e915 2808 if (PL_thiswhite) {
5db06880 2809 /* add preceding whitespace as madprop _ */
cd81e915 2810 CURMAD('_', PL_thiswhite);
5db06880
NC
2811 }
2812
cd81e915 2813 if (PL_thisstuff) {
5db06880 2814 /* add quoted material as madprop = */
cd81e915 2815 CURMAD('=', PL_thisstuff);
5db06880
NC
2816 }
2817
cd81e915 2818 if (PL_thisclose) {
5db06880 2819 /* add terminating quote as madprop Q */
cd81e915 2820 CURMAD('Q', PL_thisclose);
5db06880
NC
2821 }
2822 }
2823
2824 /* special processing based on optype */
2825
2826 switch (optype) {
2827
2828 /* opval doesn't need a TOKEN since it can already store mp */
2829 case WORD:
2830 case METHOD:
2831 case FUNCMETH:
2832 case THING:
2833 case PMFUNC:
2834 case PRIVATEREF:
2835 case FUNC0SUB:
2836 case UNIOPSUB:
2837 case LSTOPSUB:
2838 if (yylval.opval)
cd81e915
NC
2839 append_madprops(PL_thismad, yylval.opval, 0);
2840 PL_thismad = 0;
5db06880
NC
2841 return optype;
2842
2843 /* fake EOF */
2844 case 0:
2845 optype = PEG;
cd81e915
NC
2846 if (PL_endwhite) {
2847 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2848 PL_endwhite = 0;
5db06880
NC
2849 }
2850 break;
2851
2852 case ']':
2853 case '}':
cd81e915 2854 if (PL_faketokens)
5db06880
NC
2855 break;
2856 /* remember any fake bracket that lexer is about to discard */
2857 if (PL_lex_brackets == 1 &&
2858 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2859 {
2860 s = PL_bufptr;
2861 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2862 s++;
2863 if (*s == '}') {
cd81e915
NC
2864 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2865 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2866 PL_thiswhite = 0;
5db06880
NC
2867 PL_bufptr = s - 1;
2868 break; /* don't bother looking for trailing comment */
2869 }
2870 else
2871 s = PL_bufptr;
2872 }
2873 if (optype == ']')
2874 break;
2875 /* FALLTHROUGH */
2876
2877 /* attach a trailing comment to its statement instead of next token */
2878 case ';':
cd81e915 2879 if (PL_faketokens)
5db06880
NC
2880 break;
2881 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2882 s = PL_bufptr;
2883 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2884 s++;
2885 if (*s == '\n' || *s == '#') {
2886 while (s < PL_bufend && *s != '\n')
2887 s++;
2888 if (s < PL_bufend)
2889 s++;
cd81e915
NC
2890 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2891 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2892 PL_thiswhite = 0;
5db06880
NC
2893 PL_bufptr = s;
2894 }
2895 }
2896 break;
2897
2898 /* pval */
2899 case LABEL:
2900 break;
2901
2902 /* ival */
2903 default:
2904 break;
2905
2906 }
2907
2908 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
2909 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
2910 PL_thismad = 0;
5db06880
NC
2911 return optype;
2912}
2913#endif
2914
468aa647 2915STATIC char *
cc6ed77d 2916S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2917 dVAR;
468aa647
RGS
2918 if (PL_expect != XSTATE)
2919 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2920 is_use ? "use" : "no"));
29595ff2 2921 s = SKIPSPACE1(s);
468aa647
RGS
2922 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2923 s = force_version(s, TRUE);
29595ff2 2924 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 2925 start_force(PL_curforce);
9ded7720 2926 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2927 force_next(WORD);
2928 }
2929 else if (*s == 'v') {
2930 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2931 s = force_version(s, FALSE);
2932 }
2933 }
2934 else {
2935 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2936 s = force_version(s, FALSE);
2937 }
2938 yylval.ival = is_use;
2939 return s;
2940}
748a9306 2941#ifdef DEBUGGING
27da23d5 2942 static const char* const exp_name[] =
09bef843 2943 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2944 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2945 };
748a9306 2946#endif
463ee0b2 2947
02aa26ce
NT
2948/*
2949 yylex
2950
2951 Works out what to call the token just pulled out of the input
2952 stream. The yacc parser takes care of taking the ops we return and
2953 stitching them into a tree.
2954
2955 Returns:
2956 PRIVATEREF
2957
2958 Structure:
2959 if read an identifier
2960 if we're in a my declaration
2961 croak if they tried to say my($foo::bar)
2962 build the ops for a my() declaration
2963 if it's an access to a my() variable
2964 are we in a sort block?
2965 croak if my($a); $a <=> $b
2966 build ops for access to a my() variable
2967 if in a dq string, and they've said @foo and we can't find @foo
2968 croak
2969 build ops for a bareword
2970 if we already built the token before, use it.
2971*/
2972
20141f0e 2973
dba4d153
JH
2974#ifdef __SC__
2975#pragma segment Perl_yylex
2976#endif
dba4d153 2977int
dba4d153 2978Perl_yylex(pTHX)
20141f0e 2979{
97aff369 2980 dVAR;
3afc138a 2981 register char *s = PL_bufptr;
378cc40b 2982 register char *d;
463ee0b2 2983 STRLEN len;
aa7440fb 2984 bool bof = FALSE;
a687059c 2985
bbf60fe6 2986 DEBUG_T( {
396482e1 2987 SV* tmp = newSVpvs("");
b6007c36
DM
2988 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2989 (IV)CopLINE(PL_curcop),
2990 lex_state_names[PL_lex_state],
2991 exp_name[PL_expect],
2992 pv_display(tmp, s, strlen(s), 0, 60));
2993 SvREFCNT_dec(tmp);
bbf60fe6 2994 } );
02aa26ce 2995 /* check if there's an identifier for us to look at */
ba979b31 2996 if (PL_pending_ident)
bbf60fe6 2997 return REPORT(S_pending_ident(aTHX));
bbce6d69 2998
02aa26ce
NT
2999 /* no identifier pending identification */
3000
3280af22 3001 switch (PL_lex_state) {
79072805
LW
3002#ifdef COMMENTARY
3003 case LEX_NORMAL: /* Some compilers will produce faster */
3004 case LEX_INTERPNORMAL: /* code if we comment these out. */
3005 break;
3006#endif
3007
09bef843 3008 /* when we've already built the next token, just pull it out of the queue */
79072805 3009 case LEX_KNOWNEXT:
5db06880
NC
3010#ifdef PERL_MAD
3011 PL_lasttoke--;
3012 yylval = PL_nexttoke[PL_lasttoke].next_val;
3013 if (PL_madskills) {
cd81e915 3014 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3015 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3016 if (PL_thismad && PL_thismad->mad_key == '_') {
3017 PL_thiswhite = (SV*)PL_thismad->mad_val;
3018 PL_thismad->mad_val = 0;
3019 mad_free(PL_thismad);
3020 PL_thismad = 0;
5db06880
NC
3021 }
3022 }
3023 if (!PL_lasttoke) {
3024 PL_lex_state = PL_lex_defer;
3025 PL_expect = PL_lex_expect;
3026 PL_lex_defer = LEX_NORMAL;
3027 if (!PL_nexttoke[PL_lasttoke].next_type)
3028 return yylex();
3029 }
3030#else
3280af22 3031 PL_nexttoke--;
5db06880 3032 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3033 if (!PL_nexttoke) {
3034 PL_lex_state = PL_lex_defer;
3035 PL_expect = PL_lex_expect;
3036 PL_lex_defer = LEX_NORMAL;
463ee0b2 3037 }
5db06880
NC
3038#endif
3039#ifdef PERL_MAD
3040 /* FIXME - can these be merged? */
3041 return(PL_nexttoke[PL_lasttoke].next_type);
3042#else
bbf60fe6 3043 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3044#endif
79072805 3045
02aa26ce 3046 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3047 when we get here, PL_bufptr is at the \
02aa26ce 3048 */
79072805
LW
3049 case LEX_INTERPCASEMOD:
3050#ifdef DEBUGGING
3280af22 3051 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3052 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3053#endif
02aa26ce 3054 /* handle \E or end of string */
3280af22 3055 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3056 /* if at a \E */
3280af22 3057 if (PL_lex_casemods) {
f54cb97a 3058 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3059 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3060
3792a11b
NC
3061 if (PL_bufptr != PL_bufend
3062 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3063 PL_bufptr += 2;
3064 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3065#ifdef PERL_MAD
3066 if (PL_madskills)
cd81e915 3067 PL_thistoken = newSVpvn("\\E",2);
5db06880 3068#endif
a0d0e21e 3069 }
bbf60fe6 3070 return REPORT(')');
79072805 3071 }
5db06880
NC
3072#ifdef PERL_MAD
3073 while (PL_bufptr != PL_bufend &&
3074 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3075 if (!PL_thiswhite)
3076 PL_thiswhite = newSVpvn("",0);
3077 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3078 PL_bufptr += 2;
3079 }
3080#else
3280af22
NIS
3081 if (PL_bufptr != PL_bufend)
3082 PL_bufptr += 2;
5db06880 3083#endif
3280af22 3084 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3085 return yylex();
79072805
LW
3086 }
3087 else {
607df283 3088 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3089 "### Saw case modifier\n"); });
3280af22 3090 s = PL_bufptr + 1;
6e909404 3091 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3092#ifdef PERL_MAD
cd81e915
NC
3093 if (!PL_thiswhite)
3094 PL_thiswhite = newSVpvn("",0);
3095 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3096#endif
89122651 3097 PL_bufptr = s + 3;
6e909404
JH
3098 PL_lex_state = LEX_INTERPCONCAT;
3099 return yylex();
a0d0e21e 3100 }
6e909404 3101 else {
90771dc0 3102 I32 tmp;
5db06880
NC
3103 if (!PL_madskills) /* when just compiling don't need correct */
3104 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3105 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3106 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3107 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3108 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3109 return REPORT(')');
6e909404
JH
3110 }
3111 if (PL_lex_casemods > 10)
3112 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3113 PL_lex_casestack[PL_lex_casemods++] = *s;
3114 PL_lex_casestack[PL_lex_casemods] = '\0';
3115 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3116 start_force(PL_curforce);
9ded7720 3117 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3118 force_next('(');
cd81e915 3119 start_force(PL_curforce);
6e909404 3120 if (*s == 'l')
9ded7720 3121 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3122 else if (*s == 'u')
9ded7720 3123 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3124 else if (*s == 'L')
9ded7720 3125 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3126 else if (*s == 'U')
9ded7720 3127 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3128 else if (*s == 'Q')
9ded7720 3129 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3130 else
3131 Perl_croak(aTHX_ "panic: yylex");
5db06880
NC
3132 if (PL_madskills) {
3133 SV* tmpsv = newSVpvn("",0);
3134 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3135 curmad('_', tmpsv);
3136 }
6e909404 3137 PL_bufptr = s + 1;
a0d0e21e 3138 }
79072805 3139 force_next(FUNC);
3280af22
NIS
3140 if (PL_lex_starts) {
3141 s = PL_bufptr;
3142 PL_lex_starts = 0;
5db06880
NC
3143#ifdef PERL_MAD
3144 if (PL_madskills) {
cd81e915
NC
3145 if (PL_thistoken)
3146 sv_free(PL_thistoken);
3147 PL_thistoken = newSVpvn("",0);
5db06880
NC
3148 }
3149#endif
131b3ad0
DM
3150 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3151 if (PL_lex_casemods == 1 && PL_lex_inpat)
3152 OPERATOR(',');
3153 else
3154 Aop(OP_CONCAT);
79072805
LW
3155 }
3156 else
cea2e8a9 3157 return yylex();
79072805
LW
3158 }
3159
55497cff 3160 case LEX_INTERPPUSH:
bbf60fe6 3161 return REPORT(sublex_push());
55497cff 3162
79072805 3163 case LEX_INTERPSTART:
3280af22 3164 if (PL_bufptr == PL_bufend)
bbf60fe6 3165 return REPORT(sublex_done());
607df283 3166 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3167 "### Interpolated variable\n"); });
3280af22
NIS
3168 PL_expect = XTERM;
3169 PL_lex_dojoin = (*PL_bufptr == '@');
3170 PL_lex_state = LEX_INTERPNORMAL;
3171 if (PL_lex_dojoin) {
cd81e915 3172 start_force(PL_curforce);
9ded7720 3173 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3174 force_next(',');
cd81e915 3175 start_force(PL_curforce);
a0d0e21e 3176 force_ident("\"", '$');
cd81e915 3177 start_force(PL_curforce);
9ded7720 3178 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3179 force_next('$');
cd81e915 3180 start_force(PL_curforce);
9ded7720 3181 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3182 force_next('(');
cd81e915 3183 start_force(PL_curforce);
9ded7720 3184 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3185 force_next(FUNC);
3186 }
3280af22
NIS
3187 if (PL_lex_starts++) {
3188 s = PL_bufptr;
5db06880
NC
3189#ifdef PERL_MAD
3190 if (PL_madskills) {
cd81e915
NC
3191 if (PL_thistoken)
3192 sv_free(PL_thistoken);
3193 PL_thistoken = newSVpvn("",0);
5db06880
NC
3194 }
3195#endif
131b3ad0
DM
3196 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3197 if (!PL_lex_casemods && PL_lex_inpat)
3198 OPERATOR(',');
3199 else
3200 Aop(OP_CONCAT);
79072805 3201 }
cea2e8a9 3202 return yylex();
79072805
LW
3203
3204 case LEX_INTERPENDMAYBE:
3280af22
NIS
3205 if (intuit_more(PL_bufptr)) {
3206 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3207 break;
3208 }
3209 /* FALL THROUGH */
3210
3211 case LEX_INTERPEND:
3280af22
NIS
3212 if (PL_lex_dojoin) {
3213 PL_lex_dojoin = FALSE;
3214 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3215#ifdef PERL_MAD
3216 if (PL_madskills) {
cd81e915
NC
3217 if (PL_thistoken)
3218 sv_free(PL_thistoken);
3219 PL_thistoken = newSVpvn("",0);
5db06880
NC
3220 }
3221#endif
bbf60fe6 3222 return REPORT(')');
79072805 3223 }
43a16006 3224 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3225 && SvEVALED(PL_lex_repl))
43a16006 3226 {
e9fa98b2 3227 if (PL_bufptr != PL_bufend)
cea2e8a9 3228 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3229 PL_lex_repl = NULL;
e9fa98b2 3230 }
79072805
LW
3231 /* FALLTHROUGH */
3232 case LEX_INTERPCONCAT:
3233#ifdef DEBUGGING
3280af22 3234 if (PL_lex_brackets)
cea2e8a9 3235 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3236#endif
3280af22 3237 if (PL_bufptr == PL_bufend)
bbf60fe6 3238 return REPORT(sublex_done());
79072805 3239
3280af22
NIS
3240 if (SvIVX(PL_linestr) == '\'') {
3241 SV *sv = newSVsv(PL_linestr);
3242 if (!PL_lex_inpat)
76e3520e 3243 sv = tokeq(sv);
3280af22 3244 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3245 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3246 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3247 s = PL_bufend;
79072805
LW
3248 }
3249 else {
3280af22 3250 s = scan_const(PL_bufptr);
79072805 3251 if (*s == '\\')
3280af22 3252 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3253 else
3280af22 3254 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3255 }
3256
3280af22 3257 if (s != PL_bufptr) {
cd81e915 3258 start_force(PL_curforce);
5db06880
NC
3259 if (PL_madskills) {
3260 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3261 }
9ded7720 3262 NEXTVAL_NEXTTOKE = yylval;
3280af22 3263 PL_expect = XTERM;
79072805 3264 force_next(THING);
131b3ad0 3265 if (PL_lex_starts++) {
5db06880
NC
3266#ifdef PERL_MAD
3267 if (PL_madskills) {
cd81e915
NC
3268 if (PL_thistoken)
3269 sv_free(PL_thistoken);
3270 PL_thistoken = newSVpvn("",0);
5db06880
NC
3271 }
3272#endif
131b3ad0
DM
3273 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3274 if (!PL_lex_casemods && PL_lex_inpat)
3275 OPERATOR(',');
3276 else
3277 Aop(OP_CONCAT);
3278 }
79072805 3279 else {
3280af22 3280 PL_bufptr = s;
cea2e8a9 3281 return yylex();
79072805
LW
3282 }
3283 }
3284
cea2e8a9 3285 return yylex();
a0d0e21e 3286 case LEX_FORMLINE:
3280af22
NIS
3287 PL_lex_state = LEX_NORMAL;
3288 s = scan_formline(PL_bufptr);
3289 if (!PL_lex_formbrack)
a0d0e21e
LW
3290 goto rightbracket;
3291 OPERATOR(';');
79072805
LW
3292 }
3293
3280af22
NIS
3294 s = PL_bufptr;
3295 PL_oldoldbufptr = PL_oldbufptr;
3296 PL_oldbufptr = s;
463ee0b2
LW
3297
3298 retry:
5db06880 3299#ifdef PERL_MAD
cd81e915
NC
3300 if (PL_thistoken) {
3301 sv_free(PL_thistoken);
3302 PL_thistoken = 0;
5db06880 3303 }
cd81e915 3304 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3305#endif
378cc40b
LW
3306 switch (*s) {
3307 default:
7e2040f0 3308 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3309 goto keylookup;
cea2e8a9 3310 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3311 case 4:
3312 case 26:
3313 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3314 case 0:
5db06880
NC
3315#ifdef PERL_MAD
3316 if (PL_madskills)
cd81e915 3317 PL_faketokens = 0;
5db06880 3318#endif
3280af22
NIS
3319 if (!PL_rsfp) {
3320 PL_last_uni = 0;
3321 PL_last_lop = 0;
c5ee2135 3322 if (PL_lex_brackets) {
0bd48802
AL
3323 yyerror(PL_lex_formbrack
3324 ? "Format not terminated"
3325 : "Missing right curly or square bracket");
c5ee2135 3326 }
4e553d73 3327 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3328 "### Tokener got EOF\n");
5f80b19c 3329 } );
79072805 3330 TOKEN(0);
463ee0b2 3331 }
3280af22 3332 if (s++ < PL_bufend)
a687059c 3333 goto retry; /* ignore stray nulls */
3280af22
NIS
3334 PL_last_uni = 0;
3335 PL_last_lop = 0;
3336 if (!PL_in_eval && !PL_preambled) {
3337 PL_preambled = TRUE;
5db06880
NC
3338#ifdef PERL_MAD
3339 if (PL_madskills)
cd81e915 3340 PL_faketokens = 1;
5db06880 3341#endif
3280af22
NIS
3342 sv_setpv(PL_linestr,incl_perldb());
3343 if (SvCUR(PL_linestr))
396482e1 3344 sv_catpvs(PL_linestr,";");
3280af22
NIS
3345 if (PL_preambleav){
3346 while(AvFILLp(PL_preambleav) >= 0) {
3347 SV *tmpsv = av_shift(PL_preambleav);
3348 sv_catsv(PL_linestr, tmpsv);
396482e1 3349 sv_catpvs(PL_linestr, ";");
91b7def8 3350 sv_free(tmpsv);
3351 }
3280af22
NIS
3352 sv_free((SV*)PL_preambleav);
3353 PL_preambleav = NULL;
91b7def8 3354 }
3280af22 3355 if (PL_minus_n || PL_minus_p) {
396482e1 3356 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3357 if (PL_minus_l)
396482e1 3358 sv_catpvs(PL_linestr,"chomp;");
3280af22 3359 if (PL_minus_a) {
3280af22 3360 if (PL_minus_F) {
3792a11b
NC
3361 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3362 || *PL_splitstr == '"')
3280af22 3363 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3364 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3365 else {
c8ef6a4b
NC
3366 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3367 bytes can be used as quoting characters. :-) */
dd374669 3368 const char *splits = PL_splitstr;
91d456ae 3369 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3370 do {
3371 /* Need to \ \s */
dd374669
AL
3372 if (*splits == '\\')
3373 sv_catpvn(PL_linestr, splits, 1);
3374 sv_catpvn(PL_linestr, splits, 1);
3375 } while (*splits++);
48c4c863
NC
3376 /* This loop will embed the trailing NUL of
3377 PL_linestr as the last thing it does before
3378 terminating. */
396482e1 3379 sv_catpvs(PL_linestr, ");");
54310121 3380 }
2304df62
AD
3381 }
3382 else
396482e1 3383 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3384 }
79072805 3385 }
bc9b29db 3386 if (PL_minus_E)
396482e1
GA
3387 sv_catpvs(PL_linestr,"use feature ':5.10';");
3388 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3389 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3390 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3391 PL_last_lop = PL_last_uni = NULL;
3280af22 3392 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3393 SV * const sv = newSV(0);
a0d0e21e
LW
3394
3395 sv_upgrade(sv, SVt_PVMG);
3280af22 3396 sv_setsv(sv,PL_linestr);
0ac0412a 3397 (void)SvIOK_on(sv);
45977657 3398 SvIV_set(sv, 0);
36c7798d 3399 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3400 }
79072805 3401 goto retry;
a687059c 3402 }
e929a76b 3403 do {
aa7440fb 3404 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3405 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3406 fake_eof:
5db06880 3407#ifdef PERL_MAD
cd81e915 3408 PL_realtokenstart = -1;
5db06880 3409#endif
7e28d3af
JH
3410 if (PL_rsfp) {
3411 if (PL_preprocess && !PL_in_eval)
3412 (void)PerlProc_pclose(PL_rsfp);
3413 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3414 PerlIO_clearerr(PL_rsfp);
3415 else
3416 (void)PerlIO_close(PL_rsfp);
4608196e 3417 PL_rsfp = NULL;
7e28d3af
JH
3418 PL_doextract = FALSE;
3419 }
3420 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3421#ifdef PERL_MAD
3422 if (PL_madskills)
cd81e915 3423 PL_faketokens = 1;
5db06880 3424#endif
a23c4656
NC
3425 sv_setpv(PL_linestr,PL_minus_p
3426 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3427 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3428 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3429 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3430 PL_minus_n = PL_minus_p = 0;
3431 goto retry;
3432 }
3433 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3434 PL_last_lop = PL_last_uni = NULL;
c69006e4 3435 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3436 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3437 }
7aa207d6
JH
3438 /* If it looks like the start of a BOM or raw UTF-16,
3439 * check if it in fact is. */
3440 else if (bof &&
3441 (*s == 0 ||
3442 *(U8*)s == 0xEF ||
3443 *(U8*)s >= 0xFE ||
3444 s[1] == 0)) {
226017aa 3445#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3446# ifdef __GNU_LIBRARY__
3447# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3448# define FTELL_FOR_PIPE_IS_BROKEN
3449# endif
e3f494f1
JH
3450# else
3451# ifdef __GLIBC__
3452# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3453# define FTELL_FOR_PIPE_IS_BROKEN
3454# endif
3455# endif
226017aa
DD
3456# endif
3457#endif
3458#ifdef FTELL_FOR_PIPE_IS_BROKEN
3459 /* This loses the possibility to detect the bof
3460 * situation on perl -P when the libc5 is being used.
3461 * Workaround? Maybe attach some extra state to PL_rsfp?
3462 */
3463 if (!PL_preprocess)
7e28d3af 3464 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3465#else
eb160463 3466 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3467#endif
7e28d3af 3468 if (bof) {
3280af22 3469 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3470 s = swallow_bom((U8*)s);
e929a76b 3471 }
378cc40b 3472 }
3280af22 3473 if (PL_doextract) {
a0d0e21e 3474 /* Incest with pod. */
5db06880
NC
3475#ifdef PERL_MAD
3476 if (PL_madskills)
cd81e915 3477 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 3478#endif
a0d0e21e 3479 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3480 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3481 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3482 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3483 PL_last_lop = PL_last_uni = NULL;
3280af22 3484 PL_doextract = FALSE;
a0d0e21e 3485 }
4e553d73 3486 }
463ee0b2 3487 incline(s);
3280af22
NIS
3488 } while (PL_doextract);
3489 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3490 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3491 SV * const sv = newSV(0);
a687059c 3492
93a17b20 3493 sv_upgrade(sv, SVt_PVMG);
3280af22 3494 sv_setsv(sv,PL_linestr);
0ac0412a 3495 (void)SvIOK_on(sv);
45977657 3496 SvIV_set(sv, 0);
36c7798d 3497 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 3498 }
3280af22 3499 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3500 PL_last_lop = PL_last_uni = NULL;
57843af0 3501 if (CopLINE(PL_curcop) == 1) {
3280af22 3502 while (s < PL_bufend && isSPACE(*s))
79072805 3503 s++;
a0d0e21e 3504 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 3505 s++;
5db06880
NC
3506#ifdef PERL_MAD
3507 if (PL_madskills)
cd81e915 3508 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 3509#endif
bd61b366 3510 d = NULL;
3280af22 3511 if (!PL_in_eval) {
44a8e56a 3512 if (*s == '#' && *(s+1) == '!')
3513 d = s + 2;
3514#ifdef ALTERNATE_SHEBANG
3515 else {
bfed75c6 3516 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 3517 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3518 d = s + (sizeof(as) - 1);
3519 }
3520#endif /* ALTERNATE_SHEBANG */
3521 }
3522 if (d) {
b8378b72 3523 char *ipath;
774d564b 3524 char *ipathend;
b8378b72 3525
774d564b 3526 while (isSPACE(*d))
b8378b72
CS
3527 d++;
3528 ipath = d;
774d564b 3529 while (*d && !isSPACE(*d))
3530 d++;
3531 ipathend = d;
3532
3533#ifdef ARG_ZERO_IS_SCRIPT
3534 if (ipathend > ipath) {
3535 /*
3536 * HP-UX (at least) sets argv[0] to the script name,
3537 * which makes $^X incorrect. And Digital UNIX and Linux,
3538 * at least, set argv[0] to the basename of the Perl
3539 * interpreter. So, having found "#!", we'll set it right.
3540 */
fafc274c
NC