This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
random thread test failure
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a 71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
301 { 0, TOKENTYPE_NONE, 0 }
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
d4c19fe8
AL
432 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 /**/;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
501 char he_name[32] = "feature_";
502 (void) strncpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
664 PL_linestr = line;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 667 s = SvPV_const(PL_linestr, len);
6f27f9a7 668 if (!len || s[len-1] != ';') {
3280af22
NIS
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 671 sv_catpvs(PL_linestr, "\n;");
8990e307 672 }
3280af22
NIS
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 676 PL_last_lop = PL_last_uni = NULL;
3280af22 677 PL_rsfp = 0;
79072805 678}
a687059c 679
ffb4593c
NT
680/*
681 * Perl_lex_end
9cbb5ea2
GS
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
ffb4593c
NT
684 */
685
463ee0b2 686void
864dbfa3 687Perl_lex_end(pTHX)
463ee0b2 688{
97aff369 689 dVAR;
3280af22 690 PL_doextract = FALSE;
463ee0b2
LW
691}
692
ffb4593c
NT
693/*
694 * S_incline
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
57843af0 697 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 698 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_incline(pTHX_ char *s)
463ee0b2 705{
97aff369 706 dVAR;
463ee0b2
LW
707 char *t;
708 char *n;
73659bf1 709 char *e;
463ee0b2 710 char ch;
463ee0b2 711
57843af0 712 CopLINE_inc(PL_curcop);
463ee0b2
LW
713 if (*s++ != '#')
714 return;
d4c19fe8
AL
715 while (SPACE_OR_TAB(*s))
716 s++;
73659bf1
GS
717 if (strnEQ(s, "line", 4))
718 s += 4;
719 else
720 return;
084592ab 721 if (SPACE_OR_TAB(*s))
73659bf1 722 s++;
4e553d73 723 else
73659bf1 724 return;
d4c19fe8
AL
725 while (SPACE_OR_TAB(*s))
726 s++;
463ee0b2
LW
727 if (!isDIGIT(*s))
728 return;
d4c19fe8 729
463ee0b2
LW
730 n = s;
731 while (isDIGIT(*s))
732 s++;
bf4acbe4 733 while (SPACE_OR_TAB(*s))
463ee0b2 734 s++;
73659bf1 735 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 736 s++;
73659bf1
GS
737 e = t + 1;
738 }
463ee0b2 739 else {
463ee0b2 740 for (t = s; !isSPACE(*t); t++) ;
73659bf1 741 e = t;
463ee0b2 742 }
bf4acbe4 743 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
744 e++;
745 if (*e != '\n' && *e != '\0')
746 return; /* false alarm */
747
463ee0b2
LW
748 ch = *t;
749 *t = '\0';
f4dd75d9 750 if (t - s > 0) {
8a5ee598 751#ifndef USE_ITHREADS
c4420975 752 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
753 STRLEN tmplen = cf ? strlen(cf) : 0;
754 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
755 /* must copy *{"::_<(eval N)[oldfilename:L]"}
756 * to *{"::_<newfilename"} */
757 char smallbuf[256], smallbuf2[256];
758 char *tmpbuf, *tmpbuf2;
8a5ee598 759 GV **gvp, *gv2;
e66cf94c
RGS
760 STRLEN tmplen2 = strlen(s);
761 if (tmplen + 3 < sizeof smallbuf)
762 tmpbuf = smallbuf;
763 else
764 Newx(tmpbuf, tmplen + 3, char);
765 if (tmplen2 + 3 < sizeof smallbuf2)
766 tmpbuf2 = smallbuf2;
767 else
768 Newx(tmpbuf2, tmplen2 + 3, char);
769 tmpbuf[0] = tmpbuf2[0] = '_';
770 tmpbuf[1] = tmpbuf2[1] = '<';
771 memcpy(tmpbuf + 2, cf, ++tmplen);
772 memcpy(tmpbuf2 + 2, s, ++tmplen2);
773 ++tmplen; ++tmplen2;
8a5ee598
RGS
774 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
775 if (gvp) {
776 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
777 if (!isGV(gv2))
778 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
779 /* adjust ${"::_<newfilename"} to store the new file name */
780 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
781 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
782 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
783 }
e66cf94c
RGS
784 if (tmpbuf != smallbuf) Safefree(tmpbuf);
785 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
786 }
8a5ee598 787#endif
05ec9bb3 788 CopFILE_free(PL_curcop);
57843af0 789 CopFILE_set(PL_curcop, s);
f4dd75d9 790 }
463ee0b2 791 *t = ch;
57843af0 792 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
793}
794
29595ff2 795#ifdef PERL_MAD
cd81e915 796/* skip space before PL_thistoken */
29595ff2
NC
797
798STATIC char *
799S_skipspace0(pTHX_ register char *s)
800{
801 s = skipspace(s);
802 if (!PL_madskills)
803 return s;
cd81e915
NC
804 if (PL_skipwhite) {
805 if (!PL_thiswhite)
806 PL_thiswhite = newSVpvn("",0);
807 sv_catsv(PL_thiswhite, PL_skipwhite);
808 sv_free(PL_skipwhite);
809 PL_skipwhite = 0;
810 }
811 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
812 return s;
813}
814
cd81e915 815/* skip space after PL_thistoken */
29595ff2
NC
816
817STATIC char *
818S_skipspace1(pTHX_ register char *s)
819{
d4c19fe8 820 const char *start = s;
29595ff2
NC
821 I32 startoff = start - SvPVX(PL_linestr);
822
823 s = skipspace(s);
824 if (!PL_madskills)
825 return s;
826 start = SvPVX(PL_linestr) + startoff;
cd81e915 827 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 828 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
829 PL_thistoken = newSVpvn(tstart, start - tstart);
830 }
831 PL_realtokenstart = -1;
832 if (PL_skipwhite) {
833 if (!PL_nextwhite)
834 PL_nextwhite = newSVpvn("",0);
835 sv_catsv(PL_nextwhite, PL_skipwhite);
836 sv_free(PL_skipwhite);
837 PL_skipwhite = 0;
29595ff2
NC
838 }
839 return s;
840}
841
842STATIC char *
843S_skipspace2(pTHX_ register char *s, SV **svp)
844{
845 char *start = s;
846 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
847 I32 startoff = start - SvPVX(PL_linestr);
848 s = skipspace(s);
849 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
850 if (!PL_madskills || !svp)
851 return s;
852 start = SvPVX(PL_linestr) + startoff;
cd81e915 853 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 854 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
855 PL_thistoken = newSVpvn(tstart, start - tstart);
856 PL_realtokenstart = -1;
29595ff2 857 }
cd81e915 858 if (PL_skipwhite) {
29595ff2
NC
859 if (!*svp)
860 *svp = newSVpvn("",0);
cd81e915
NC
861 sv_setsv(*svp, PL_skipwhite);
862 sv_free(PL_skipwhite);
863 PL_skipwhite = 0;
29595ff2
NC
864 }
865
866 return s;
867}
868#endif
869
ffb4593c
NT
870/*
871 * S_skipspace
872 * Called to gobble the appropriate amount and type of whitespace.
873 * Skips comments as well.
874 */
875
76e3520e 876STATIC char *
cea2e8a9 877S_skipspace(pTHX_ register char *s)
a687059c 878{
97aff369 879 dVAR;
5db06880
NC
880#ifdef PERL_MAD
881 int curoff;
882 int startoff = s - SvPVX(PL_linestr);
883
cd81e915
NC
884 if (PL_skipwhite) {
885 sv_free(PL_skipwhite);
886 PL_skipwhite = 0;
5db06880
NC
887 }
888#endif
889
3280af22 890 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 891 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 892 s++;
5db06880
NC
893#ifdef PERL_MAD
894 goto done;
895#else
463ee0b2 896 return s;
5db06880 897#endif
463ee0b2
LW
898 }
899 for (;;) {
fd049845 900 STRLEN prevlen;
09bef843 901 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 902 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
903 while (s < PL_bufend && isSPACE(*s)) {
904 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
905 incline(s);
906 }
ffb4593c
NT
907
908 /* comment */
3280af22
NIS
909 if (s < PL_bufend && *s == '#') {
910 while (s < PL_bufend && *s != '\n')
463ee0b2 911 s++;
60e6418e 912 if (s < PL_bufend) {
463ee0b2 913 s++;
60e6418e
GS
914 if (PL_in_eval && !PL_rsfp) {
915 incline(s);
916 continue;
917 }
918 }
463ee0b2 919 }
ffb4593c
NT
920
921 /* only continue to recharge the buffer if we're at the end
922 * of the buffer, we're not reading from a source filter, and
923 * we're in normal lexing mode
924 */
09bef843
SB
925 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
926 PL_lex_state == LEX_FORMLINE)
5db06880
NC
927#ifdef PERL_MAD
928 goto done;
929#else
463ee0b2 930 return s;
5db06880 931#endif
ffb4593c
NT
932
933 /* try to recharge the buffer */
5db06880
NC
934#ifdef PERL_MAD
935 curoff = s - SvPVX(PL_linestr);
936#endif
937
9cbb5ea2 938 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 939 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 940 {
5db06880
NC
941#ifdef PERL_MAD
942 if (PL_madskills && curoff != startoff) {
cd81e915
NC
943 if (!PL_skipwhite)
944 PL_skipwhite = newSVpvn("",0);
945 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
946 curoff - startoff);
947 }
948
949 /* mustn't throw out old stuff yet if madpropping */
950 SvCUR(PL_linestr) = curoff;
951 s = SvPVX(PL_linestr) + curoff;
952 *s = 0;
953 if (curoff && s[-1] == '\n')
954 s[-1] = ' ';
955#endif
956
9cbb5ea2 957 /* end of file. Add on the -p or -n magic */
cd81e915 958 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 959 if (PL_minus_p) {
5db06880
NC
960#ifdef PERL_MAD
961 sv_catpv(PL_linestr,
962 ";}continue{print or die qq(-p destination: $!\\n);}");
963#else
01a19ab0
NC
964 sv_setpv(PL_linestr,
965 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 966#endif
3280af22 967 PL_minus_n = PL_minus_p = 0;
a0d0e21e 968 }
01a19ab0 969 else if (PL_minus_n) {
5db06880
NC
970#ifdef PERL_MAD
971 sv_catpvn(PL_linestr, ";}", 2);
972#else
01a19ab0 973 sv_setpvn(PL_linestr, ";}", 2);
5db06880 974#endif
01a19ab0
NC
975 PL_minus_n = 0;
976 }
a0d0e21e 977 else
5db06880
NC
978#ifdef PERL_MAD
979 sv_catpvn(PL_linestr,";", 1);
980#else
4147a61b 981 sv_setpvn(PL_linestr,";", 1);
5db06880 982#endif
ffb4593c
NT
983
984 /* reset variables for next time we lex */
9cbb5ea2 985 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
986 = SvPVX(PL_linestr)
987#ifdef PERL_MAD
988 + curoff
989#endif
990 ;
3280af22 991 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 992 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
993
994 /* Close the filehandle. Could be from -P preprocessor,
995 * STDIN, or a regular file. If we were reading code from
996 * STDIN (because the commandline held no -e or filename)
997 * then we don't close it, we reset it so the code can
998 * read from STDIN too.
999 */
1000
3280af22
NIS
1001 if (PL_preprocess && !PL_in_eval)
1002 (void)PerlProc_pclose(PL_rsfp);
1003 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1004 PerlIO_clearerr(PL_rsfp);
8990e307 1005 else
3280af22 1006 (void)PerlIO_close(PL_rsfp);
4608196e 1007 PL_rsfp = NULL;
463ee0b2
LW
1008 return s;
1009 }
ffb4593c
NT
1010
1011 /* not at end of file, so we only read another line */
09bef843
SB
1012 /* make corresponding updates to old pointers, for yyerror() */
1013 oldprevlen = PL_oldbufptr - PL_bufend;
1014 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1015 if (PL_last_uni)
1016 oldunilen = PL_last_uni - PL_bufend;
1017 if (PL_last_lop)
1018 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1019 PL_linestart = PL_bufptr = s + prevlen;
1020 PL_bufend = s + SvCUR(PL_linestr);
1021 s = PL_bufptr;
09bef843
SB
1022 PL_oldbufptr = s + oldprevlen;
1023 PL_oldoldbufptr = s + oldoldprevlen;
1024 if (PL_last_uni)
1025 PL_last_uni = s + oldunilen;
1026 if (PL_last_lop)
1027 PL_last_lop = s + oldloplen;
a0d0e21e 1028 incline(s);
ffb4593c
NT
1029
1030 /* debugger active and we're not compiling the debugger code,
1031 * so store the line into the debugger's array of lines
1032 */
3280af22 1033 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1034 SV * const sv = newSV(0);
8990e307
LW
1035
1036 sv_upgrade(sv, SVt_PVMG);
3280af22 1037 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1038 (void)SvIOK_on(sv);
45977657 1039 SvIV_set(sv, 0);
36c7798d 1040 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1041 }
463ee0b2 1042 }
5db06880
NC
1043
1044#ifdef PERL_MAD
1045 done:
1046 if (PL_madskills) {
cd81e915
NC
1047 if (!PL_skipwhite)
1048 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1049 curoff = s - SvPVX(PL_linestr);
1050 if (curoff - startoff)
cd81e915 1051 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1052 curoff - startoff);
1053 }
1054 return s;
1055#endif
a687059c 1056}
378cc40b 1057
ffb4593c
NT
1058/*
1059 * S_check_uni
1060 * Check the unary operators to ensure there's no ambiguity in how they're
1061 * used. An ambiguous piece of code would be:
1062 * rand + 5
1063 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1064 * the +5 is its argument.
1065 */
1066
76e3520e 1067STATIC void
cea2e8a9 1068S_check_uni(pTHX)
ba106d47 1069{
97aff369 1070 dVAR;
d4c19fe8
AL
1071 const char *s;
1072 const char *t;
2f3197b3 1073
3280af22 1074 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1075 return;
3280af22
NIS
1076 while (isSPACE(*PL_last_uni))
1077 PL_last_uni++;
d4c19fe8
AL
1078 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++)
1079 /**/;
3280af22 1080 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1081 return;
6136c704 1082
0453d815 1083 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1084 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1085 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1086 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1087 }
2f3197b3
LW
1088}
1089
ffb4593c
NT
1090/*
1091 * LOP : macro to build a list operator. Its behaviour has been replaced
1092 * with a subroutine, S_lop() for which LOP is just another name.
1093 */
1094
a0d0e21e
LW
1095#define LOP(f,x) return lop(f,x,s)
1096
ffb4593c
NT
1097/*
1098 * S_lop
1099 * Build a list operator (or something that might be one). The rules:
1100 * - if we have a next token, then it's a list operator [why?]
1101 * - if the next thing is an opening paren, then it's a function
1102 * - else it's a list operator
1103 */
1104
76e3520e 1105STATIC I32
a0be28da 1106S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1107{
97aff369 1108 dVAR;
79072805 1109 yylval.ival = f;
35c8bce7 1110 CLINE;
3280af22
NIS
1111 PL_expect = x;
1112 PL_bufptr = s;
1113 PL_last_lop = PL_oldbufptr;
eb160463 1114 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1115#ifdef PERL_MAD
1116 if (PL_lasttoke)
1117 return REPORT(LSTOP);
1118#else
3280af22 1119 if (PL_nexttoke)
bbf60fe6 1120 return REPORT(LSTOP);
5db06880 1121#endif
79072805 1122 if (*s == '(')
bbf60fe6 1123 return REPORT(FUNC);
29595ff2 1124 s = PEEKSPACE(s);
79072805 1125 if (*s == '(')
bbf60fe6 1126 return REPORT(FUNC);
79072805 1127 else
bbf60fe6 1128 return REPORT(LSTOP);
79072805
LW
1129}
1130
5db06880
NC
1131#ifdef PERL_MAD
1132 /*
1133 * S_start_force
1134 * Sets up for an eventual force_next(). start_force(0) basically does
1135 * an unshift, while start_force(-1) does a push. yylex removes items
1136 * on the "pop" end.
1137 */
1138
1139STATIC void
1140S_start_force(pTHX_ int where)
1141{
1142 int i;
1143
cd81e915 1144 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1145 where = PL_lasttoke;
cd81e915
NC
1146 assert(PL_curforce < 0 || PL_curforce == where);
1147 if (PL_curforce != where) {
5db06880
NC
1148 for (i = PL_lasttoke; i > where; --i) {
1149 PL_nexttoke[i] = PL_nexttoke[i-1];
1150 }
1151 PL_lasttoke++;
1152 }
cd81e915 1153 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1154 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1155 PL_curforce = where;
1156 if (PL_nextwhite) {
5db06880
NC
1157 if (PL_madskills)
1158 curmad('^', newSVpvn("",0));
cd81e915 1159 CURMAD('_', PL_nextwhite);
5db06880
NC
1160 }
1161}
1162
1163STATIC void
1164S_curmad(pTHX_ char slot, SV *sv)
1165{
1166 MADPROP **where;
1167
1168 if (!sv)
1169 return;
cd81e915
NC
1170 if (PL_curforce < 0)
1171 where = &PL_thismad;
5db06880 1172 else
cd81e915 1173 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1174
cd81e915 1175 if (PL_faketokens)
5db06880
NC
1176 sv_setpvn(sv, "", 0);
1177 else {
1178 if (!IN_BYTES) {
1179 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1180 SvUTF8_on(sv);
1181 else if (PL_encoding) {
1182 sv_recode_to_utf8(sv, PL_encoding);
1183 }
1184 }
1185 }
1186
1187 /* keep a slot open for the head of the list? */
1188 if (slot != '_' && *where && (*where)->mad_key == '^') {
1189 (*where)->mad_key = slot;
1190 sv_free((*where)->mad_val);
1191 (*where)->mad_val = (void*)sv;
1192 }
1193 else
1194 addmad(newMADsv(slot, sv), where, 0);
1195}
1196#else
b3f24c00
MHM
1197# define start_force(where) NOOP
1198# define curmad(slot, sv) NOOP
5db06880
NC
1199#endif
1200
ffb4593c
NT
1201/*
1202 * S_force_next
9cbb5ea2 1203 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1204 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1205 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1206 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1207 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1208 */
1209
4e553d73 1210STATIC void
cea2e8a9 1211S_force_next(pTHX_ I32 type)
79072805 1212{
97aff369 1213 dVAR;
5db06880 1214#ifdef PERL_MAD
cd81e915 1215 if (PL_curforce < 0)
5db06880 1216 start_force(PL_lasttoke);
cd81e915 1217 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1218 if (PL_lex_state != LEX_KNOWNEXT)
1219 PL_lex_defer = PL_lex_state;
1220 PL_lex_state = LEX_KNOWNEXT;
1221 PL_lex_expect = PL_expect;
cd81e915 1222 PL_curforce = -1;
5db06880 1223#else
3280af22
NIS
1224 PL_nexttype[PL_nexttoke] = type;
1225 PL_nexttoke++;
1226 if (PL_lex_state != LEX_KNOWNEXT) {
1227 PL_lex_defer = PL_lex_state;
1228 PL_lex_expect = PL_expect;
1229 PL_lex_state = LEX_KNOWNEXT;
79072805 1230 }
5db06880 1231#endif
79072805
LW
1232}
1233
d0a148a6
NC
1234STATIC SV *
1235S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1236{
97aff369 1237 dVAR;
9d4ba2ae 1238 SV * const sv = newSVpvn(start,len);
bfed75c6 1239 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1240 SvUTF8_on(sv);
1241 return sv;
1242}
1243
ffb4593c
NT
1244/*
1245 * S_force_word
1246 * When the lexer knows the next thing is a word (for instance, it has
1247 * just seen -> and it knows that the next char is a word char, then
1248 * it calls S_force_word to stick the next word into the PL_next lookahead.
1249 *
1250 * Arguments:
b1b65b59 1251 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1252 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1253 * int check_keyword : if true, Perl checks to make sure the word isn't
1254 * a keyword (do this if the word is a label, e.g. goto FOO)
1255 * int allow_pack : if true, : characters will also be allowed (require,
1256 * use, etc. do this)
9cbb5ea2 1257 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1258 */
1259
76e3520e 1260STATIC char *
cea2e8a9 1261S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1262{
97aff369 1263 dVAR;
463ee0b2
LW
1264 register char *s;
1265 STRLEN len;
4e553d73 1266
29595ff2 1267 start = SKIPSPACE1(start);
463ee0b2 1268 s = start;
7e2040f0 1269 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1270 (allow_pack && *s == ':') ||
15f0808c 1271 (allow_initial_tick && *s == '\'') )
a0d0e21e 1272 {
3280af22
NIS
1273 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1274 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1275 return start;
cd81e915 1276 start_force(PL_curforce);
5db06880
NC
1277 if (PL_madskills)
1278 curmad('X', newSVpvn(start,s-start));
463ee0b2 1279 if (token == METHOD) {
29595ff2 1280 s = SKIPSPACE1(s);
463ee0b2 1281 if (*s == '(')
3280af22 1282 PL_expect = XTERM;
463ee0b2 1283 else {
3280af22 1284 PL_expect = XOPERATOR;
463ee0b2 1285 }
79072805 1286 }
9ded7720 1287 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1288 = (OP*)newSVOP(OP_CONST,0,
1289 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1290 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1291 force_next(token);
1292 }
1293 return s;
1294}
1295
ffb4593c
NT
1296/*
1297 * S_force_ident
9cbb5ea2 1298 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1299 * text only contains the "foo" portion. The first argument is a pointer
1300 * to the "foo", and the second argument is the type symbol to prefix.
1301 * Forces the next token to be a "WORD".
9cbb5ea2 1302 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1303 */
1304
76e3520e 1305STATIC void
bfed75c6 1306S_force_ident(pTHX_ register const char *s, int kind)
79072805 1307{
97aff369 1308 dVAR;
79072805 1309 if (s && *s) {
90e5519e
NC
1310 const STRLEN len = strlen(s);
1311 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1312 start_force(PL_curforce);
9ded7720 1313 NEXTVAL_NEXTTOKE.opval = o;
79072805 1314 force_next(WORD);
748a9306 1315 if (kind) {
11343788 1316 o->op_private = OPpCONST_ENTERED;
55497cff 1317 /* XXX see note in pp_entereval() for why we forgo typo
1318 warnings if the symbol must be introduced in an eval.
1319 GSAR 96-10-12 */
90e5519e
NC
1320 gv_fetchpvn_flags(s, len,
1321 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1322 : GV_ADD,
1323 kind == '$' ? SVt_PV :
1324 kind == '@' ? SVt_PVAV :
1325 kind == '%' ? SVt_PVHV :
a0d0e21e 1326 SVt_PVGV
90e5519e 1327 );
748a9306 1328 }
79072805
LW
1329 }
1330}
1331
1571675a
GS
1332NV
1333Perl_str_to_version(pTHX_ SV *sv)
1334{
1335 NV retval = 0.0;
1336 NV nshift = 1.0;
1337 STRLEN len;
cfd0369c 1338 const char *start = SvPV_const(sv,len);
9d4ba2ae 1339 const char * const end = start + len;
504618e9 1340 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1341 while (start < end) {
ba210ebe 1342 STRLEN skip;
1571675a
GS
1343 UV n;
1344 if (utf)
9041c2e3 1345 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1346 else {
1347 n = *(U8*)start;
1348 skip = 1;
1349 }
1350 retval += ((NV)n)/nshift;
1351 start += skip;
1352 nshift *= 1000;
1353 }
1354 return retval;
1355}
1356
4e553d73 1357/*
ffb4593c
NT
1358 * S_force_version
1359 * Forces the next token to be a version number.
e759cc13
RGS
1360 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1361 * and if "guessing" is TRUE, then no new token is created (and the caller
1362 * must use an alternative parsing method).
ffb4593c
NT
1363 */
1364
76e3520e 1365STATIC char *
e759cc13 1366S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1367{
97aff369 1368 dVAR;
5f66b61c 1369 OP *version = NULL;
44dcb63b 1370 char *d;
5db06880
NC
1371#ifdef PERL_MAD
1372 I32 startoff = s - SvPVX(PL_linestr);
1373#endif
89bfa8cd 1374
29595ff2 1375 s = SKIPSPACE1(s);
89bfa8cd 1376
44dcb63b 1377 d = s;
dd629d5b 1378 if (*d == 'v')
44dcb63b 1379 d++;
44dcb63b 1380 if (isDIGIT(*d)) {
e759cc13
RGS
1381 while (isDIGIT(*d) || *d == '_' || *d == '.')
1382 d++;
5db06880
NC
1383#ifdef PERL_MAD
1384 if (PL_madskills) {
cd81e915 1385 start_force(PL_curforce);
5db06880
NC
1386 curmad('X', newSVpvn(s,d-s));
1387 }
1388#endif
9f3d182e 1389 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1390 SV *ver;
b73d6f50 1391 s = scan_num(s, &yylval);
89bfa8cd 1392 version = yylval.opval;
dd629d5b
GS
1393 ver = cSVOPx(version)->op_sv;
1394 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1395 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1396 SvNV_set(ver, str_to_version(ver));
1571675a 1397 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1398 }
89bfa8cd 1399 }
5db06880
NC
1400 else if (guessing) {
1401#ifdef PERL_MAD
1402 if (PL_madskills) {
cd81e915
NC
1403 sv_free(PL_nextwhite); /* let next token collect whitespace */
1404 PL_nextwhite = 0;
5db06880
NC
1405 s = SvPVX(PL_linestr) + startoff;
1406 }
1407#endif
e759cc13 1408 return s;
5db06880 1409 }
89bfa8cd 1410 }
1411
5db06880
NC
1412#ifdef PERL_MAD
1413 if (PL_madskills && !version) {
cd81e915
NC
1414 sv_free(PL_nextwhite); /* let next token collect whitespace */
1415 PL_nextwhite = 0;
5db06880
NC
1416 s = SvPVX(PL_linestr) + startoff;
1417 }
1418#endif
89bfa8cd 1419 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1420 start_force(PL_curforce);
9ded7720 1421 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1422 force_next(WORD);
89bfa8cd 1423
e759cc13 1424 return s;
89bfa8cd 1425}
1426
ffb4593c
NT
1427/*
1428 * S_tokeq
1429 * Tokenize a quoted string passed in as an SV. It finds the next
1430 * chunk, up to end of string or a backslash. It may make a new
1431 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1432 * turns \\ into \.
1433 */
1434
76e3520e 1435STATIC SV *
cea2e8a9 1436S_tokeq(pTHX_ SV *sv)
79072805 1437{
97aff369 1438 dVAR;
79072805
LW
1439 register char *s;
1440 register char *send;
1441 register char *d;
b3ac6de7
IZ
1442 STRLEN len = 0;
1443 SV *pv = sv;
79072805
LW
1444
1445 if (!SvLEN(sv))
b3ac6de7 1446 goto finish;
79072805 1447
a0d0e21e 1448 s = SvPV_force(sv, len);
21a311ee 1449 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1450 goto finish;
463ee0b2 1451 send = s + len;
79072805
LW
1452 while (s < send && *s != '\\')
1453 s++;
1454 if (s == send)
b3ac6de7 1455 goto finish;
79072805 1456 d = s;
be4731d2 1457 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1458 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1459 if (SvUTF8(sv))
1460 SvUTF8_on(pv);
1461 }
79072805
LW
1462 while (s < send) {
1463 if (*s == '\\') {
a0d0e21e 1464 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1465 s++; /* all that, just for this */
1466 }
1467 *d++ = *s++;
1468 }
1469 *d = '\0';
95a20fc0 1470 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1471 finish:
3280af22 1472 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1473 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1474 return sv;
1475}
1476
ffb4593c
NT
1477/*
1478 * Now come three functions related to double-quote context,
1479 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1480 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1481 * interact with PL_lex_state, and create fake ( ... ) argument lists
1482 * to handle functions and concatenation.
1483 * They assume that whoever calls them will be setting up a fake
1484 * join call, because each subthing puts a ',' after it. This lets
1485 * "lower \luPpEr"
1486 * become
1487 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1488 *
1489 * (I'm not sure whether the spurious commas at the end of lcfirst's
1490 * arguments and join's arguments are created or not).
1491 */
1492
1493/*
1494 * S_sublex_start
1495 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1496 *
1497 * Pattern matching will set PL_lex_op to the pattern-matching op to
1498 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1499 *
1500 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1501 *
1502 * Everything else becomes a FUNC.
1503 *
1504 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1505 * had an OP_CONST or OP_READLINE). This just sets us up for a
1506 * call to S_sublex_push().
1507 */
1508
76e3520e 1509STATIC I32
cea2e8a9 1510S_sublex_start(pTHX)
79072805 1511{
97aff369 1512 dVAR;
0d46e09a 1513 register const I32 op_type = yylval.ival;
79072805
LW
1514
1515 if (op_type == OP_NULL) {
3280af22 1516 yylval.opval = PL_lex_op;
5f66b61c 1517 PL_lex_op = NULL;
79072805
LW
1518 return THING;
1519 }
1520 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1521 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1522
1523 if (SvTYPE(sv) == SVt_PVIV) {
1524 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1525 STRLEN len;
96a5add6 1526 const char * const p = SvPV_const(sv, len);
f54cb97a 1527 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1528 if (SvUTF8(sv))
1529 SvUTF8_on(nsv);
b3ac6de7
IZ
1530 SvREFCNT_dec(sv);
1531 sv = nsv;
4e553d73 1532 }
b3ac6de7 1533 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1534 PL_lex_stuff = NULL;
6f33ba73
RGS
1535 /* Allow <FH> // "foo" */
1536 if (op_type == OP_READLINE)
1537 PL_expect = XTERMORDORDOR;
79072805
LW
1538 return THING;
1539 }
1540
3280af22
NIS
1541 PL_sublex_info.super_state = PL_lex_state;
1542 PL_sublex_info.sub_inwhat = op_type;
1543 PL_sublex_info.sub_op = PL_lex_op;
1544 PL_lex_state = LEX_INTERPPUSH;
55497cff 1545
3280af22
NIS
1546 PL_expect = XTERM;
1547 if (PL_lex_op) {
1548 yylval.opval = PL_lex_op;
5f66b61c 1549 PL_lex_op = NULL;
55497cff 1550 return PMFUNC;
1551 }
1552 else
1553 return FUNC;
1554}
1555
ffb4593c
NT
1556/*
1557 * S_sublex_push
1558 * Create a new scope to save the lexing state. The scope will be
1559 * ended in S_sublex_done. Returns a '(', starting the function arguments
1560 * to the uc, lc, etc. found before.
1561 * Sets PL_lex_state to LEX_INTERPCONCAT.
1562 */
1563
76e3520e 1564STATIC I32
cea2e8a9 1565S_sublex_push(pTHX)
55497cff 1566{
27da23d5 1567 dVAR;
f46d017c 1568 ENTER;
55497cff 1569
3280af22
NIS
1570 PL_lex_state = PL_sublex_info.super_state;
1571 SAVEI32(PL_lex_dojoin);
1572 SAVEI32(PL_lex_brackets);
3280af22
NIS
1573 SAVEI32(PL_lex_casemods);
1574 SAVEI32(PL_lex_starts);
1575 SAVEI32(PL_lex_state);
7766f137 1576 SAVEVPTR(PL_lex_inpat);
3280af22 1577 SAVEI32(PL_lex_inwhat);
57843af0 1578 SAVECOPLINE(PL_curcop);
3280af22 1579 SAVEPPTR(PL_bufptr);
8452ff4b 1580 SAVEPPTR(PL_bufend);
3280af22
NIS
1581 SAVEPPTR(PL_oldbufptr);
1582 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1583 SAVEPPTR(PL_last_lop);
1584 SAVEPPTR(PL_last_uni);
3280af22
NIS
1585 SAVEPPTR(PL_linestart);
1586 SAVESPTR(PL_linestr);
8edd5f42
RGS
1587 SAVEGENERICPV(PL_lex_brackstack);
1588 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1589
1590 PL_linestr = PL_lex_stuff;
a0714e2c 1591 PL_lex_stuff = NULL;
3280af22 1592
9cbb5ea2
GS
1593 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1594 = SvPVX(PL_linestr);
3280af22 1595 PL_bufend += SvCUR(PL_linestr);
bd61b366 1596 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1597 SAVEFREESV(PL_linestr);
1598
1599 PL_lex_dojoin = FALSE;
1600 PL_lex_brackets = 0;
a02a5408
JC
1601 Newx(PL_lex_brackstack, 120, char);
1602 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1603 PL_lex_casemods = 0;
1604 *PL_lex_casestack = '\0';
1605 PL_lex_starts = 0;
1606 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1607 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1608
1609 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1610 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1611 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1612 else
5f66b61c 1613 PL_lex_inpat = NULL;
79072805 1614
55497cff 1615 return '(';
79072805
LW
1616}
1617
ffb4593c
NT
1618/*
1619 * S_sublex_done
1620 * Restores lexer state after a S_sublex_push.
1621 */
1622
76e3520e 1623STATIC I32
cea2e8a9 1624S_sublex_done(pTHX)
79072805 1625{
27da23d5 1626 dVAR;
3280af22 1627 if (!PL_lex_starts++) {
396482e1 1628 SV * const sv = newSVpvs("");
9aa983d2
JH
1629 if (SvUTF8(PL_linestr))
1630 SvUTF8_on(sv);
3280af22 1631 PL_expect = XOPERATOR;
9aa983d2 1632 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1633 return THING;
1634 }
1635
3280af22
NIS
1636 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1637 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1638 return yylex();
79072805
LW
1639 }
1640
ffb4593c 1641 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1642 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1643 PL_linestr = PL_lex_repl;
1644 PL_lex_inpat = 0;
1645 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1646 PL_bufend += SvCUR(PL_linestr);
bd61b366 1647 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1648 SAVEFREESV(PL_linestr);
1649 PL_lex_dojoin = FALSE;
1650 PL_lex_brackets = 0;
3280af22
NIS
1651 PL_lex_casemods = 0;
1652 *PL_lex_casestack = '\0';
1653 PL_lex_starts = 0;
25da4f38 1654 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1655 PL_lex_state = LEX_INTERPNORMAL;
1656 PL_lex_starts++;
e9fa98b2
HS
1657 /* we don't clear PL_lex_repl here, so that we can check later
1658 whether this is an evalled subst; that means we rely on the
1659 logic to ensure sublex_done() is called again only via the
1660 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1661 }
e9fa98b2 1662 else {
3280af22 1663 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1664 PL_lex_repl = NULL;
e9fa98b2 1665 }
79072805 1666 return ',';
ffed7fef
LW
1667 }
1668 else {
5db06880
NC
1669#ifdef PERL_MAD
1670 if (PL_madskills) {
cd81e915
NC
1671 if (PL_thiswhite) {
1672 if (!PL_endwhite)
1673 PL_endwhite = newSVpvn("",0);
1674 sv_catsv(PL_endwhite, PL_thiswhite);
1675 PL_thiswhite = 0;
1676 }
1677 if (PL_thistoken)
1678 sv_setpvn(PL_thistoken,"",0);
5db06880 1679 else
cd81e915 1680 PL_realtokenstart = -1;
5db06880
NC
1681 }
1682#endif
f46d017c 1683 LEAVE;
3280af22
NIS
1684 PL_bufend = SvPVX(PL_linestr);
1685 PL_bufend += SvCUR(PL_linestr);
1686 PL_expect = XOPERATOR;
09bef843 1687 PL_sublex_info.sub_inwhat = 0;
79072805 1688 return ')';
ffed7fef
LW
1689 }
1690}
1691
02aa26ce
NT
1692/*
1693 scan_const
1694
1695 Extracts a pattern, double-quoted string, or transliteration. This
1696 is terrifying code.
1697
3280af22
NIS
1698 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1699 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1700 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1701
9b599b2a
GS
1702 Returns a pointer to the character scanned up to. Iff this is
1703 advanced from the start pointer supplied (ie if anything was
1704 successfully parsed), will leave an OP for the substring scanned
1705 in yylval. Caller must intuit reason for not parsing further
1706 by looking at the next characters herself.
1707
02aa26ce
NT
1708 In patterns:
1709 backslashes:
1710 double-quoted style: \r and \n
1711 regexp special ones: \D \s
1712 constants: \x3
1713 backrefs: \1 (deprecated in substitution replacements)
1714 case and quoting: \U \Q \E
1715 stops on @ and $, but not for $ as tail anchor
1716
1717 In transliterations:
1718 characters are VERY literal, except for - not at the start or end
1719 of the string, which indicates a range. scan_const expands the
1720 range to the full set of intermediate characters.
1721
1722 In double-quoted strings:
1723 backslashes:
1724 double-quoted style: \r and \n
1725 constants: \x3
1726 backrefs: \1 (deprecated)
1727 case and quoting: \U \Q \E
1728 stops on @ and $
1729
1730 scan_const does *not* construct ops to handle interpolated strings.
1731 It stops processing as soon as it finds an embedded $ or @ variable
1732 and leaves it to the caller to work out what's going on.
1733
da6eedaa 1734 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1735
1736 $ in pattern could be $foo or could be tail anchor. Assumption:
1737 it's a tail anchor if $ is the last thing in the string, or if it's
1738 followed by one of ")| \n\t"
1739
1740 \1 (backreferences) are turned into $1
1741
1742 The structure of the code is
1743 while (there's a character to process) {
1744 handle transliteration ranges
1745 skip regexp comments
1746 skip # initiated comments in //x patterns
1747 check for embedded @foo
1748 check for embedded scalars
1749 if (backslash) {
1750 leave intact backslashes from leave (below)
1751 deprecate \1 in strings and sub replacements
1752 handle string-changing backslashes \l \U \Q \E, etc.
1753 switch (what was escaped) {
1754 handle - in a transliteration (becomes a literal -)
1755 handle \132 octal characters
1756 handle 0x15 hex characters
1757 handle \cV (control V)
1758 handle printf backslashes (\f, \r, \n, etc)
1759 } (end switch)
1760 } (end if backslash)
1761 } (end while character to read)
4e553d73 1762
02aa26ce
NT
1763*/
1764
76e3520e 1765STATIC char *
cea2e8a9 1766S_scan_const(pTHX_ char *start)
79072805 1767{
97aff369 1768 dVAR;
3280af22 1769 register char *send = PL_bufend; /* end of the constant */
561b68a9 1770 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1771 register char *s = start; /* start of the constant */
1772 register char *d = SvPVX(sv); /* destination for copies */
1773 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1774 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1775 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1776 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1777 UV uv;
4c3a8340
TS
1778#ifdef EBCDIC
1779 UV literal_endpoint = 0;
1780#endif
012bcf8d 1781
d4c19fe8 1782 const char * const leaveit = /* set of acceptably-backslashed characters */
3280af22 1783 PL_lex_inpat
b6d5fef8 1784 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1785 : "";
79072805 1786
2b9d42f0
NIS
1787 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1788 /* If we are doing a trans and we know we want UTF8 set expectation */
1789 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1790 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1791 }
1792
1793
79072805 1794 while (s < send || dorange) {
02aa26ce 1795 /* get transliterations out of the way (they're most literal) */
3280af22 1796 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1797 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1798 if (dorange) {
1ba5c669
JH
1799 I32 i; /* current expanded character */
1800 I32 min; /* first character in range */
1801 I32 max; /* last character in range */
02aa26ce 1802
2b9d42f0 1803 if (has_utf8) {
9d4ba2ae 1804 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1805 char *e = d++;
1806 while (e-- > c)
1807 *(e + 1) = *e;
25716404 1808 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1809 /* mark the range as done, and continue */
1810 dorange = FALSE;
1811 didrange = TRUE;
1812 continue;
1813 }
2b9d42f0 1814
95a20fc0 1815 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1816 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1817 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1818 d -= 2; /* eat the first char and the - */
1819
8ada0baa
JH
1820 min = (U8)*d; /* first char in range */
1821 max = (U8)d[1]; /* last char in range */
1822
c2e66d9e 1823 if (min > max) {
01ec43d0 1824 Perl_croak(aTHX_
d1573ac7 1825 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1826 (char)min, (char)max);
c2e66d9e
GS
1827 }
1828
c7f1f016 1829#ifdef EBCDIC
4c3a8340
TS
1830 if (literal_endpoint == 2 &&
1831 ((isLOWER(min) && isLOWER(max)) ||
1832 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1833 if (isLOWER(min)) {
1834 for (i = min; i <= max; i++)
1835 if (isLOWER(i))
db42d148 1836 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1837 } else {
1838 for (i = min; i <= max; i++)
1839 if (isUPPER(i))
db42d148 1840 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1841 }
1842 }
1843 else
1844#endif
1845 for (i = min; i <= max; i++)
eb160463 1846 *d++ = (char)i;
02aa26ce
NT
1847
1848 /* mark the range as done, and continue */
79072805 1849 dorange = FALSE;
01ec43d0 1850 didrange = TRUE;
4c3a8340
TS
1851#ifdef EBCDIC
1852 literal_endpoint = 0;
1853#endif
79072805 1854 continue;
4e553d73 1855 }
02aa26ce
NT
1856
1857 /* range begins (ignore - as first or last char) */
79072805 1858 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1859 if (didrange) {
1fafa243 1860 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1861 }
2b9d42f0 1862 if (has_utf8) {
25716404 1863 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1864 s++;
1865 continue;
1866 }
79072805
LW
1867 dorange = TRUE;
1868 s++;
01ec43d0
GS
1869 }
1870 else {
1871 didrange = FALSE;
4c3a8340
TS
1872#ifdef EBCDIC
1873 literal_endpoint = 0;
1874#endif
01ec43d0 1875 }
79072805 1876 }
02aa26ce
NT
1877
1878 /* if we get here, we're not doing a transliteration */
1879
0f5d15d6
IZ
1880 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1881 except for the last char, which will be done separately. */
3280af22 1882 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1883 if (s[2] == '#') {
e994fd66 1884 while (s+1 < send && *s != ')')
db42d148 1885 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1886 }
1887 else if (s[2] == '{' /* This should match regcomp.c */
1888 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1889 {
cc6b7395 1890 I32 count = 1;
0f5d15d6 1891 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1892 char c;
1893
d9f97599
GS
1894 while (count && (c = *regparse)) {
1895 if (c == '\\' && regparse[1])
1896 regparse++;
4e553d73 1897 else if (c == '{')
cc6b7395 1898 count++;
4e553d73 1899 else if (c == '}')
cc6b7395 1900 count--;
d9f97599 1901 regparse++;
cc6b7395 1902 }
e994fd66 1903 if (*regparse != ')')
5bdf89e7 1904 regparse--; /* Leave one char for continuation. */
0f5d15d6 1905 while (s < regparse)
db42d148 1906 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1907 }
748a9306 1908 }
02aa26ce
NT
1909
1910 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1911 else if (*s == '#' && PL_lex_inpat &&
1912 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1913 while (s+1 < send && *s != '\n')
db42d148 1914 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1915 }
02aa26ce 1916
5d1d4326 1917 /* check for embedded arrays
da6eedaa 1918 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1919 */
7e2040f0 1920 else if (*s == '@' && s[1]
5d1d4326 1921 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1922 break;
02aa26ce
NT
1923
1924 /* check for embedded scalars. only stop if we're sure it's a
1925 variable.
1926 */
79072805 1927 else if (*s == '$') {
3280af22 1928 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1929 break;
6002328a 1930 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1931 break; /* in regexp, $ might be tail anchor */
1932 }
02aa26ce 1933
2b9d42f0
NIS
1934 /* End of else if chain - OP_TRANS rejoin rest */
1935
02aa26ce 1936 /* backslashes */
79072805
LW
1937 if (*s == '\\' && s+1 < send) {
1938 s++;
02aa26ce
NT
1939
1940 /* some backslashes we leave behind */
c9f97d15 1941 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1942 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1943 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1944 continue;
1945 }
02aa26ce
NT
1946
1947 /* deprecate \1 in strings and substitution replacements */
3280af22 1948 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1949 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1950 {
599cee73 1951 if (ckWARN(WARN_SYNTAX))
9014280d 1952 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1953 *--s = '$';
1954 break;
1955 }
02aa26ce
NT
1956
1957 /* string-change backslash escapes */
3280af22 1958 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1959 --s;
1960 break;
1961 }
02aa26ce
NT
1962
1963 /* if we get here, it's either a quoted -, or a digit */
79072805 1964 switch (*s) {
02aa26ce
NT
1965
1966 /* quoted - in transliterations */
79072805 1967 case '-':
3280af22 1968 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1969 *d++ = *s++;
1970 continue;
1971 }
1972 /* FALL THROUGH */
1973 default:
11b8faa4 1974 {
041457d9
DM
1975 if (isALNUM(*s) &&
1976 *s != '_' &&
1977 ckWARN(WARN_MISC))
9014280d 1978 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1979 "Unrecognized escape \\%c passed through",
1980 *s);
1981 /* default action is to copy the quoted character */
f9a63242 1982 goto default_action;
11b8faa4 1983 }
02aa26ce
NT
1984
1985 /* \132 indicates an octal constant */
79072805
LW
1986 case '0': case '1': case '2': case '3':
1987 case '4': case '5': case '6': case '7':
ba210ebe 1988 {
53305cf1
NC
1989 I32 flags = 0;
1990 STRLEN len = 3;
1991 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1992 s += len;
1993 }
012bcf8d 1994 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1995
1996 /* \x24 indicates a hex constant */
79072805 1997 case 'x':
a0ed51b3
LW
1998 ++s;
1999 if (*s == '{') {
9d4ba2ae 2000 char* const e = strchr(s, '}');
a4c04bdc
NC
2001 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2002 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2003 STRLEN len;
355860ce 2004
53305cf1 2005 ++s;
adaeee49 2006 if (!e) {
a0ed51b3 2007 yyerror("Missing right brace on \\x{}");
355860ce 2008 continue;
ba210ebe 2009 }
53305cf1
NC
2010 len = e - s;
2011 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2012 s = e + 1;
a0ed51b3
LW
2013 }
2014 else {
ba210ebe 2015 {
53305cf1 2016 STRLEN len = 2;
a4c04bdc 2017 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2018 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2019 s += len;
2020 }
012bcf8d
GS
2021 }
2022
2023 NUM_ESCAPE_INSERT:
2024 /* Insert oct or hex escaped character.
301d3d20 2025 * There will always enough room in sv since such
db42d148 2026 * escapes will be longer than any UTF-8 sequence
301d3d20 2027 * they can end up as. */
ba7cea30 2028
c7f1f016
NIS
2029 /* We need to map to chars to ASCII before doing the tests
2030 to cover EBCDIC
2031 */
c4d5f83a 2032 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2033 if (!has_utf8 && uv > 255) {
301d3d20
JH
2034 /* Might need to recode whatever we have
2035 * accumulated so far if it contains any
2036 * hibit chars.
2037 *
2038 * (Can't we keep track of that and avoid
2039 * this rescan? --jhi)
012bcf8d 2040 */
c7f1f016 2041 int hicount = 0;
63cd0674
NIS
2042 U8 *c;
2043 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2044 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2045 hicount++;
db42d148 2046 }
012bcf8d 2047 }
63cd0674 2048 if (hicount) {
9d4ba2ae 2049 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2050 U8 *src, *dst;
2051 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2052 src = (U8 *)d - 1;
2053 dst = src+hicount;
2054 d += hicount;
cfd0369c 2055 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2056 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2057 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2058 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2059 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2060 }
2061 else {
63cd0674 2062 *dst-- = *src;
012bcf8d 2063 }
c7f1f016 2064 src--;
012bcf8d
GS
2065 }
2066 }
2067 }
2068
9aa983d2 2069 if (has_utf8 || uv > 255) {
9041c2e3 2070 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2071 has_utf8 = TRUE;
f9a63242
JH
2072 if (PL_lex_inwhat == OP_TRANS &&
2073 PL_sublex_info.sub_op) {
2074 PL_sublex_info.sub_op->op_private |=
2075 (PL_lex_repl ? OPpTRANS_FROM_UTF
2076 : OPpTRANS_TO_UTF);
f9a63242 2077 }
012bcf8d 2078 }
a0ed51b3 2079 else {
012bcf8d 2080 *d++ = (char)uv;
a0ed51b3 2081 }
012bcf8d
GS
2082 }
2083 else {
c4d5f83a 2084 *d++ = (char) uv;
a0ed51b3 2085 }
79072805 2086 continue;
02aa26ce 2087
b239daa5 2088 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2089 case 'N':
55eda711 2090 ++s;
423cee85
JH
2091 if (*s == '{') {
2092 char* e = strchr(s, '}');
155aba94 2093 SV *res;
423cee85 2094 STRLEN len;
cfd0369c 2095 const char *str;
4e553d73 2096
423cee85 2097 if (!e) {
5777a3f7 2098 yyerror("Missing right brace on \\N{}");
423cee85
JH
2099 e = s - 1;
2100 goto cont_scan;
2101 }
dbc0d4f2
JH
2102 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2103 /* \N{U+...} */
2104 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2105 PERL_SCAN_DISALLOW_PREFIX;
2106 s += 3;
2107 len = e - s;
2108 uv = grok_hex(s, &len, &flags, NULL);
2109 s = e + 1;
2110 goto NUM_ESCAPE_INSERT;
2111 }
55eda711 2112 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2113 res = new_constant( NULL, 0, "charnames",
a0714e2c 2114 res, NULL, "\\N{...}" );
f9a63242
JH
2115 if (has_utf8)
2116 sv_utf8_upgrade(res);
cfd0369c 2117 str = SvPV_const(res,len);
1c47067b
JH
2118#ifdef EBCDIC_NEVER_MIND
2119 /* charnames uses pack U and that has been
2120 * recently changed to do the below uni->native
2121 * mapping, so this would be redundant (and wrong,
2122 * the code point would be doubly converted).
2123 * But leave this in just in case the pack U change
2124 * gets revoked, but the semantics is still
2125 * desireable for charnames. --jhi */
cddc7ef4 2126 {
cfd0369c 2127 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2128
2129 if (uv < 0x100) {
89ebb4a3 2130 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2131
2132 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2133 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2134 str = SvPV_const(res, len);
cddc7ef4
JH
2135 }
2136 }
2137#endif
89491803 2138 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2139 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2140 SvCUR_set(sv, d - ostart);
2141 SvPOK_on(sv);
e4f3eed8 2142 *d = '\0';
f08d6ad9 2143 sv_utf8_upgrade(sv);
d2f449dd 2144 /* this just broke our allocation above... */
eb160463 2145 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2146 d = SvPVX(sv) + SvCUR(sv);
89491803 2147 has_utf8 = TRUE;
f08d6ad9 2148 }
eb160463 2149 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2150 const char * const odest = SvPVX_const(sv);
423cee85 2151
8973db79 2152 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2153 d = SvPVX(sv) + (d - odest);
2154 }
2155 Copy(str, d, len, char);
2156 d += len;
2157 SvREFCNT_dec(res);
2158 cont_scan:
2159 s = e + 1;
2160 }
2161 else
5777a3f7 2162 yyerror("Missing braces on \\N{}");
423cee85
JH
2163 continue;
2164
02aa26ce 2165 /* \c is a control character */
79072805
LW
2166 case 'c':
2167 s++;
961ce445 2168 if (s < send) {
ba210ebe 2169 U8 c = *s++;
c7f1f016
NIS
2170#ifdef EBCDIC
2171 if (isLOWER(c))
2172 c = toUPPER(c);
2173#endif
db42d148 2174 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2175 }
961ce445
RGS
2176 else {
2177 yyerror("Missing control char name in \\c");
2178 }
79072805 2179 continue;
02aa26ce
NT
2180
2181 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2182 case 'b':
db42d148 2183 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2184 break;
2185 case 'n':
db42d148 2186 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2187 break;
2188 case 'r':
db42d148 2189 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2190 break;
2191 case 'f':
db42d148 2192 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2193 break;
2194 case 't':
db42d148 2195 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2196 break;
34a3fe2a 2197 case 'e':
db42d148 2198 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2199 break;
2200 case 'a':
db42d148 2201 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2202 break;
02aa26ce
NT
2203 } /* end switch */
2204
79072805
LW
2205 s++;
2206 continue;
02aa26ce 2207 } /* end if (backslash) */
4c3a8340
TS
2208#ifdef EBCDIC
2209 else
2210 literal_endpoint++;
2211#endif
02aa26ce 2212
f9a63242 2213 default_action:
2b9d42f0
NIS
2214 /* If we started with encoded form, or already know we want it
2215 and then encode the next character */
2216 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2217 STRLEN len = 1;
5f66b61c
AL
2218 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2219 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2220 s += len;
2221 if (need > len) {
2222 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2223 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2224 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2225 }
5f66b61c 2226 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2227 has_utf8 = TRUE;
2228 }
2229 else {
2230 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2231 }
02aa26ce
NT
2232 } /* while loop to process each character */
2233
2234 /* terminate the string and set up the sv */
79072805 2235 *d = '\0';
95a20fc0 2236 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2237 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2238 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2239
79072805 2240 SvPOK_on(sv);
9f4817db 2241 if (PL_encoding && !has_utf8) {
d0063567
DK
2242 sv_recode_to_utf8(sv, PL_encoding);
2243 if (SvUTF8(sv))
2244 has_utf8 = TRUE;
9f4817db 2245 }
2b9d42f0 2246 if (has_utf8) {
7e2040f0 2247 SvUTF8_on(sv);
2b9d42f0 2248 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2249 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2250 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2251 }
2252 }
79072805 2253
02aa26ce 2254 /* shrink the sv if we allocated more than we used */
79072805 2255 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2256 SvPV_shrink_to_cur(sv);
79072805 2257 }
02aa26ce 2258
9b599b2a 2259 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2260 if (s > PL_bufptr) {
2261 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2262 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2263 sv, NULL,
4e553d73 2264 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2265 ? "tr"
3280af22 2266 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2267 ? "s"
2268 : "qq")));
79072805 2269 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2270 } else
8990e307 2271 SvREFCNT_dec(sv);
79072805
LW
2272 return s;
2273}
2274
ffb4593c
NT
2275/* S_intuit_more
2276 * Returns TRUE if there's more to the expression (e.g., a subscript),
2277 * FALSE otherwise.
ffb4593c
NT
2278 *
2279 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2280 *
2281 * ->[ and ->{ return TRUE
2282 * { and [ outside a pattern are always subscripts, so return TRUE
2283 * if we're outside a pattern and it's not { or [, then return FALSE
2284 * if we're in a pattern and the first char is a {
2285 * {4,5} (any digits around the comma) returns FALSE
2286 * if we're in a pattern and the first char is a [
2287 * [] returns FALSE
2288 * [SOMETHING] has a funky algorithm to decide whether it's a
2289 * character class or not. It has to deal with things like
2290 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2291 * anything else returns TRUE
2292 */
2293
9cbb5ea2
GS
2294/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2295
76e3520e 2296STATIC int
cea2e8a9 2297S_intuit_more(pTHX_ register char *s)
79072805 2298{
97aff369 2299 dVAR;
3280af22 2300 if (PL_lex_brackets)
79072805
LW
2301 return TRUE;
2302 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2303 return TRUE;
2304 if (*s != '{' && *s != '[')
2305 return FALSE;
3280af22 2306 if (!PL_lex_inpat)
79072805
LW
2307 return TRUE;
2308
2309 /* In a pattern, so maybe we have {n,m}. */
2310 if (*s == '{') {
2311 s++;
2312 if (!isDIGIT(*s))
2313 return TRUE;
2314 while (isDIGIT(*s))
2315 s++;
2316 if (*s == ',')
2317 s++;
2318 while (isDIGIT(*s))
2319 s++;
2320 if (*s == '}')
2321 return FALSE;
2322 return TRUE;
2323
2324 }
2325
2326 /* On the other hand, maybe we have a character class */
2327
2328 s++;
2329 if (*s == ']' || *s == '^')
2330 return FALSE;
2331 else {
ffb4593c 2332 /* this is terrifying, and it works */
79072805
LW
2333 int weight = 2; /* let's weigh the evidence */
2334 char seen[256];
f27ffc4a 2335 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2336 const char * const send = strchr(s,']');
3280af22 2337 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2338
2339 if (!send) /* has to be an expression */
2340 return TRUE;
2341
2342 Zero(seen,256,char);
2343 if (*s == '$')
2344 weight -= 3;
2345 else if (isDIGIT(*s)) {
2346 if (s[1] != ']') {
2347 if (isDIGIT(s[1]) && s[2] == ']')
2348 weight -= 10;
2349 }
2350 else
2351 weight -= 100;
2352 }
2353 for (; s < send; s++) {
2354 last_un_char = un_char;
2355 un_char = (unsigned char)*s;
2356 switch (*s) {
2357 case '@':
2358 case '&':
2359 case '$':
2360 weight -= seen[un_char] * 10;
7e2040f0 2361 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2362 int len;
8903cb82 2363 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2364 len = (int)strlen(tmpbuf);
2365 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2366 weight -= 100;
2367 else
2368 weight -= 10;
2369 }
2370 else if (*s == '$' && s[1] &&
93a17b20
LW
2371 strchr("[#!%*<>()-=",s[1])) {
2372 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2373 weight -= 10;
2374 else
2375 weight -= 1;
2376 }
2377 break;
2378 case '\\':
2379 un_char = 254;
2380 if (s[1]) {
93a17b20 2381 if (strchr("wds]",s[1]))
79072805
LW
2382 weight += 100;
2383 else if (seen['\''] || seen['"'])
2384 weight += 1;
93a17b20 2385 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2386 weight += 40;
2387 else if (isDIGIT(s[1])) {
2388 weight += 40;
2389 while (s[1] && isDIGIT(s[1]))
2390 s++;
2391 }
2392 }
2393 else
2394 weight += 100;
2395 break;
2396 case '-':
2397 if (s[1] == '\\')
2398 weight += 50;
93a17b20 2399 if (strchr("aA01! ",last_un_char))
79072805 2400 weight += 30;
93a17b20 2401 if (strchr("zZ79~",s[1]))
79072805 2402 weight += 30;
f27ffc4a
GS
2403 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2404 weight -= 5; /* cope with negative subscript */
79072805
LW
2405 break;
2406 default:
3792a11b
NC
2407 if (!isALNUM(last_un_char)
2408 && !(last_un_char == '$' || last_un_char == '@'
2409 || last_un_char == '&')
2410 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2411 char *d = tmpbuf;
2412 while (isALPHA(*s))
2413 *d++ = *s++;
2414 *d = '\0';
2415 if (keyword(tmpbuf, d - tmpbuf))
2416 weight -= 150;
2417 }
2418 if (un_char == last_un_char + 1)
2419 weight += 5;
2420 weight -= seen[un_char];
2421 break;
2422 }
2423 seen[un_char]++;
2424 }
2425 if (weight >= 0) /* probably a character class */
2426 return FALSE;
2427 }
2428
2429 return TRUE;
2430}
ffed7fef 2431
ffb4593c
NT
2432/*
2433 * S_intuit_method
2434 *
2435 * Does all the checking to disambiguate
2436 * foo bar
2437 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2438 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2439 *
2440 * First argument is the stuff after the first token, e.g. "bar".
2441 *
2442 * Not a method if bar is a filehandle.
2443 * Not a method if foo is a subroutine prototyped to take a filehandle.
2444 * Not a method if it's really "Foo $bar"
2445 * Method if it's "foo $bar"
2446 * Not a method if it's really "print foo $bar"
2447 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2448 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2449 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2450 * =>
2451 */
2452
76e3520e 2453STATIC int
62d55b22 2454S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2455{
97aff369 2456 dVAR;
a0d0e21e 2457 char *s = start + (*start == '$');
3280af22 2458 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2459 STRLEN len;
2460 GV* indirgv;
5db06880
NC
2461#ifdef PERL_MAD
2462 int soff;
2463#endif
a0d0e21e
LW
2464
2465 if (gv) {
62d55b22 2466 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2467 return 0;
62d55b22
NC
2468 if (cv) {
2469 if (SvPOK(cv)) {
2470 const char *proto = SvPVX_const(cv);
2471 if (proto) {
2472 if (*proto == ';')
2473 proto++;
2474 if (*proto == '*')
2475 return 0;
2476 }
b6c543e3
IZ
2477 }
2478 } else
a0d0e21e
LW
2479 gv = 0;
2480 }
8903cb82 2481 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2482 /* start is the beginning of the possible filehandle/object,
2483 * and s is the end of it
2484 * tmpbuf is a copy of it
2485 */
2486
a0d0e21e 2487 if (*start == '$') {
3280af22 2488 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2489 return 0;
5db06880
NC
2490#ifdef PERL_MAD
2491 len = start - SvPVX(PL_linestr);
2492#endif
29595ff2 2493 s = PEEKSPACE(s);
5db06880
NC
2494#ifdef PERLMAD
2495 start = SvPVX(PL_linestr) + len;
2496#endif
3280af22
NIS
2497 PL_bufptr = start;
2498 PL_expect = XREF;
a0d0e21e
LW
2499 return *s == '(' ? FUNCMETH : METHOD;
2500 }
2501 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2502 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2503 len -= 2;
2504 tmpbuf[len] = '\0';
5db06880
NC
2505#ifdef PERL_MAD
2506 soff = s - SvPVX(PL_linestr);
2507#endif
c3e0f903
GS
2508 goto bare_package;
2509 }
90e5519e 2510 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2511 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2512 return 0;
2513 /* filehandle or package name makes it a method */
89bfa8cd 2514 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2515#ifdef PERL_MAD
2516 soff = s - SvPVX(PL_linestr);
2517#endif
29595ff2 2518 s = PEEKSPACE(s);
3280af22 2519 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2520 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2521 bare_package:
cd81e915 2522 start_force(PL_curforce);
9ded7720 2523 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2524 newSVpvn(tmpbuf,len));
9ded7720 2525 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2526 if (PL_madskills)
2527 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2528 PL_expect = XTERM;
a0d0e21e 2529 force_next(WORD);
3280af22 2530 PL_bufptr = s;
5db06880
NC
2531#ifdef PERL_MAD
2532 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2533#endif
a0d0e21e
LW
2534 return *s == '(' ? FUNCMETH : METHOD;
2535 }
2536 }
2537 return 0;
2538}
2539
ffb4593c
NT
2540/*
2541 * S_incl_perldb
2542 * Return a string of Perl code to load the debugger. If PERL5DB
2543 * is set, it will return the contents of that, otherwise a
2544 * compile-time require of perl5db.pl.
2545 */
2546
bfed75c6 2547STATIC const char*
cea2e8a9 2548S_incl_perldb(pTHX)
a0d0e21e 2549{
97aff369 2550 dVAR;
3280af22 2551 if (PL_perldb) {
9d4ba2ae 2552 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2553
2554 if (pdb)
2555 return pdb;
93189314 2556 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2557 return "BEGIN { require 'perl5db.pl' }";
2558 }
2559 return "";
2560}
2561
2562
16d20bd9 2563/* Encoded script support. filter_add() effectively inserts a
4e553d73 2564 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2565 * Note that the filter function only applies to the current source file
2566 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2567 *
2568 * The datasv parameter (which may be NULL) can be used to pass
2569 * private data to this instance of the filter. The filter function
2570 * can recover the SV using the FILTER_DATA macro and use it to
2571 * store private buffers and state information.
2572 *
2573 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2574 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2575 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2576 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2577 * private use must be set using malloc'd pointers.
2578 */
16d20bd9
AD
2579
2580SV *
864dbfa3 2581Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2582{
97aff369 2583 dVAR;
f4c556ac 2584 if (!funcp)
a0714e2c 2585 return NULL;
f4c556ac 2586
3280af22
NIS
2587 if (!PL_rsfp_filters)
2588 PL_rsfp_filters = newAV();
16d20bd9 2589 if (!datasv)
561b68a9 2590 datasv = newSV(0);
862a34c6 2591 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2592 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2593 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2594 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2595 FPTR2DPTR(void *, IoANY(datasv)),
2596 SvPV_nolen(datasv)));
3280af22
NIS
2597 av_unshift(PL_rsfp_filters, 1);
2598 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2599 return(datasv);
2600}
4e553d73 2601
16d20bd9
AD
2602
2603/* Delete most recently added instance of this filter function. */
a0d0e21e 2604void
864dbfa3 2605Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2606{
97aff369 2607 dVAR;
e0c19803 2608 SV *datasv;
24801a4b 2609
33073adb 2610#ifdef DEBUGGING
55662e27
JH
2611 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2612 FPTR2DPTR(void*, funcp)));
33073adb 2613#endif
3280af22 2614 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2615 return;
2616 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2617 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2618 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2619 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2620 IoANY(datasv) = (void *)NULL;
3280af22 2621 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2622
16d20bd9
AD
2623 return;
2624 }
2625 /* we need to search for the correct entry and clear it */
cea2e8a9 2626 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2627}
2628
2629
1de9afcd
RGS
2630/* Invoke the idxth filter function for the current rsfp. */
2631/* maxlen 0 = read one text line */
16d20bd9 2632I32
864dbfa3 2633Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2634{
97aff369 2635 dVAR;
16d20bd9
AD
2636 filter_t funcp;
2637 SV *datasv = NULL;
f482118e
NC
2638 /* This API is bad. It should have been using unsigned int for maxlen.
2639 Not sure if we want to change the API, but if not we should sanity
2640 check the value here. */
39cd7a59
NC
2641 const unsigned int correct_length
2642 = maxlen < 0 ?
2643#ifdef PERL_MICRO
2644 0x7FFFFFFF
2645#else
2646 INT_MAX
2647#endif
2648 : maxlen;
e50aee73 2649
3280af22 2650 if (!PL_rsfp_filters)
16d20bd9 2651 return -1;
1de9afcd 2652 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2653 /* Provide a default input filter to make life easy. */
2654 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2655 DEBUG_P(PerlIO_printf(Perl_debug_log,
2656 "filter_read %d: from rsfp\n", idx));
f482118e 2657 if (correct_length) {
16d20bd9
AD
2658 /* Want a block */
2659 int len ;
f54cb97a 2660 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2661
2662 /* ensure buf_sv is large enough */
f482118e
NC
2663 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2664 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2665 correct_length)) <= 0) {
3280af22 2666 if (PerlIO_error(PL_rsfp))
37120919
AD
2667 return -1; /* error */
2668 else
2669 return 0 ; /* end of file */
2670 }
16d20bd9
AD
2671 SvCUR_set(buf_sv, old_len + len) ;
2672 } else {
2673 /* Want a line */
3280af22
NIS
2674 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2675 if (PerlIO_error(PL_rsfp))
37120919
AD
2676 return -1; /* error */
2677 else
2678 return 0 ; /* end of file */
2679 }
16d20bd9
AD
2680 }
2681 return SvCUR(buf_sv);
2682 }
2683 /* Skip this filter slot if filter has been deleted */
1de9afcd 2684 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2685 DEBUG_P(PerlIO_printf(Perl_debug_log,
2686 "filter_read %d: skipped (filter deleted)\n",
2687 idx));
f482118e 2688 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2689 }
2690 /* Get function pointer hidden within datasv */
8141890a 2691 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2692 DEBUG_P(PerlIO_printf(Perl_debug_log,
2693 "filter_read %d: via function %p (%s)\n",
ca0270c4 2694 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2695 /* Call function. The function is expected to */
2696 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2697 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2698 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2699}
2700
76e3520e 2701STATIC char *
cea2e8a9 2702S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2703{
97aff369 2704 dVAR;
c39cd008 2705#ifdef PERL_CR_FILTER
3280af22 2706 if (!PL_rsfp_filters) {
c39cd008 2707 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2708 }
2709#endif
3280af22 2710 if (PL_rsfp_filters) {
55497cff 2711 if (!append)
2712 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2713 if (FILTER_READ(0, sv, 0) > 0)
2714 return ( SvPVX(sv) ) ;
2715 else
bd61b366 2716 return NULL ;
16d20bd9 2717 }
9d116dd7 2718 else
fd049845 2719 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2720}
2721
01ec43d0 2722STATIC HV *
7fc63493 2723S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2724{
97aff369 2725 dVAR;
def3634b
GS
2726 GV *gv;
2727
01ec43d0 2728 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2729 return PL_curstash;
2730
2731 if (len > 2 &&
2732 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2733 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2734 {
2735 return GvHV(gv); /* Foo:: */
def3634b
GS
2736 }
2737
2738 /* use constant CLASS => 'MyClass' */
90e5519e 2739 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2740 SV *sv;
2741 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2742 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2743 }
2744 }
2745
2746 return gv_stashpv(pkgname, FALSE);
2747}
a0d0e21e 2748
5db06880
NC
2749#ifdef PERL_MAD
2750 /*
2751 * Perl_madlex
2752 * The intent of this yylex wrapper is to minimize the changes to the
2753 * tokener when we aren't interested in collecting madprops. It remains
2754 * to be seen how successful this strategy will be...
2755 */
2756
2757int
2758Perl_madlex(pTHX)
2759{
2760 int optype;
2761 char *s = PL_bufptr;
2762
cd81e915
NC
2763 /* make sure PL_thiswhite is initialized */
2764 PL_thiswhite = 0;
2765 PL_thismad = 0;
5db06880 2766
cd81e915 2767 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2768 if (PL_pending_ident)
2769 return S_pending_ident(aTHX);
2770
2771 /* previous token ate up our whitespace? */
cd81e915
NC
2772 if (!PL_lasttoke && PL_nextwhite) {
2773 PL_thiswhite = PL_nextwhite;
2774 PL_nextwhite = 0;
5db06880
NC
2775 }
2776
2777 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2778 PL_realtokenstart = -1;
2779 PL_thistoken = 0;
5db06880
NC
2780 optype = yylex();
2781 s = PL_bufptr;
cd81e915 2782 assert(PL_curforce < 0);
5db06880 2783
cd81e915
NC
2784 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2785 if (!PL_thistoken) {
2786 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2787 PL_thistoken = newSVpvn("",0);
5db06880 2788 else {
cd81e915
NC
2789 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2790 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2791 }
2792 }
cd81e915
NC
2793 if (PL_thismad) /* install head */
2794 CURMAD('X', PL_thistoken);
5db06880
NC
2795 }
2796
2797 /* last whitespace of a sublex? */
cd81e915
NC
2798 if (optype == ')' && PL_endwhite) {
2799 CURMAD('X', PL_endwhite);
5db06880
NC
2800 }
2801
cd81e915 2802 if (!PL_thismad) {
5db06880
NC
2803
2804 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2805 if (!PL_thiswhite && !PL_endwhite && !optype) {
2806 sv_free(PL_thistoken);
2807 PL_thistoken = 0;
5db06880
NC
2808 return 0;
2809 }
2810
2811 /* put off final whitespace till peg */
2812 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2813 PL_nextwhite = PL_thiswhite;
2814 PL_thiswhite = 0;
5db06880 2815 }
cd81e915
NC
2816 else if (PL_thisopen) {
2817 CURMAD('q', PL_thisopen);
2818 if (PL_thistoken)
2819 sv_free(PL_thistoken);
2820 PL_thistoken = 0;
5db06880
NC
2821 }
2822 else {
2823 /* Store actual token text as madprop X */
cd81e915 2824 CURMAD('X', PL_thistoken);
5db06880
NC
2825 }
2826
cd81e915 2827 if (PL_thiswhite) {
5db06880 2828 /* add preceding whitespace as madprop _ */
cd81e915 2829 CURMAD('_', PL_thiswhite);
5db06880
NC
2830 }
2831
cd81e915 2832 if (PL_thisstuff) {
5db06880 2833 /* add quoted material as madprop = */
cd81e915 2834 CURMAD('=', PL_thisstuff);
5db06880
NC
2835 }
2836
cd81e915 2837 if (PL_thisclose) {
5db06880 2838 /* add terminating quote as madprop Q */
cd81e915 2839 CURMAD('Q', PL_thisclose);
5db06880
NC
2840 }
2841 }
2842
2843 /* special processing based on optype */
2844
2845 switch (optype) {
2846
2847 /* opval doesn't need a TOKEN since it can already store mp */
2848 case WORD:
2849 case METHOD:
2850 case FUNCMETH:
2851 case THING:
2852 case PMFUNC:
2853 case PRIVATEREF:
2854 case FUNC0SUB:
2855 case UNIOPSUB:
2856 case LSTOPSUB:
2857 if (yylval.opval)
cd81e915
NC
2858 append_madprops(PL_thismad, yylval.opval, 0);
2859 PL_thismad = 0;
5db06880
NC
2860 return optype;
2861
2862 /* fake EOF */
2863 case 0:
2864 optype = PEG;
cd81e915
NC
2865 if (PL_endwhite) {
2866 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2867 PL_endwhite = 0;
5db06880
NC
2868 }
2869 break;
2870
2871 case ']':
2872 case '}':
cd81e915 2873 if (PL_faketokens)
5db06880
NC
2874 break;
2875 /* remember any fake bracket that lexer is about to discard */
2876 if (PL_lex_brackets == 1 &&
2877 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2878 {
2879 s = PL_bufptr;
2880 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2881 s++;
2882 if (*s == '}') {
cd81e915
NC
2883 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2884 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2885 PL_thiswhite = 0;
5db06880
NC
2886 PL_bufptr = s - 1;
2887 break; /* don't bother looking for trailing comment */
2888 }
2889 else
2890 s = PL_bufptr;
2891 }
2892 if (optype == ']')
2893 break;
2894 /* FALLTHROUGH */
2895
2896 /* attach a trailing comment to its statement instead of next token */
2897 case ';':
cd81e915 2898 if (PL_faketokens)
5db06880
NC
2899 break;
2900 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2901 s = PL_bufptr;
2902 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2903 s++;
2904 if (*s == '\n' || *s == '#') {
2905 while (s < PL_bufend && *s != '\n')
2906 s++;
2907 if (s < PL_bufend)
2908 s++;
cd81e915
NC
2909 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2910 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2911 PL_thiswhite = 0;
5db06880
NC
2912 PL_bufptr = s;
2913 }
2914 }
2915 break;
2916
2917 /* pval */
2918 case LABEL:
2919 break;
2920
2921 /* ival */
2922 default:
2923 break;
2924
2925 }
2926
2927 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
2928 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
2929 PL_thismad = 0;
5db06880
NC
2930 return optype;
2931}
2932#endif
2933
468aa647 2934STATIC char *
cc6ed77d 2935S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2936 dVAR;
468aa647
RGS
2937 if (PL_expect != XSTATE)
2938 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2939 is_use ? "use" : "no"));
29595ff2 2940 s = SKIPSPACE1(s);
468aa647
RGS
2941 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2942 s = force_version(s, TRUE);
29595ff2 2943 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 2944 start_force(PL_curforce);
9ded7720 2945 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2946 force_next(WORD);
2947 }
2948 else if (*s == 'v') {
2949 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2950 s = force_version(s, FALSE);
2951 }
2952 }
2953 else {
2954 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2955 s = force_version(s, FALSE);
2956 }
2957 yylval.ival = is_use;
2958 return s;
2959}
748a9306 2960#ifdef DEBUGGING
27da23d5 2961 static const char* const exp_name[] =
09bef843 2962 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2963 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2964 };
748a9306 2965#endif
463ee0b2 2966
02aa26ce
NT
2967/*
2968 yylex
2969
2970 Works out what to call the token just pulled out of the input
2971 stream. The yacc parser takes care of taking the ops we return and
2972 stitching them into a tree.
2973
2974 Returns:
2975 PRIVATEREF
2976
2977 Structure:
2978 if read an identifier
2979 if we're in a my declaration
2980 croak if they tried to say my($foo::bar)
2981 build the ops for a my() declaration
2982 if it's an access to a my() variable
2983 are we in a sort block?
2984 croak if my($a); $a <=> $b
2985 build ops for access to a my() variable
2986 if in a dq string, and they've said @foo and we can't find @foo
2987 croak
2988 build ops for a bareword
2989 if we already built the token before, use it.
2990*/
2991
20141f0e 2992
dba4d153
JH
2993#ifdef __SC__
2994#pragma segment Perl_yylex
2995#endif
dba4d153 2996int
dba4d153 2997Perl_yylex(pTHX)
20141f0e 2998{
97aff369 2999 dVAR;
3afc138a 3000 register char *s = PL_bufptr;
378cc40b 3001 register char *d;
463ee0b2 3002 STRLEN len;
aa7440fb 3003 bool bof = FALSE;
a687059c 3004
bbf60fe6 3005 DEBUG_T( {
396482e1 3006 SV* tmp = newSVpvs("");
b6007c36
DM
3007 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3008 (IV)CopLINE(PL_curcop),
3009 lex_state_names[PL_lex_state],
3010 exp_name[PL_expect],
3011 pv_display(tmp, s, strlen(s), 0, 60));
3012 SvREFCNT_dec(tmp);
bbf60fe6 3013 } );
02aa26ce 3014 /* check if there's an identifier for us to look at */
ba979b31 3015 if (PL_pending_ident)
bbf60fe6 3016 return REPORT(S_pending_ident(aTHX));
bbce6d69 3017
02aa26ce
NT
3018 /* no identifier pending identification */
3019
3280af22 3020 switch (PL_lex_state) {
79072805
LW
3021#ifdef COMMENTARY
3022 case LEX_NORMAL: /* Some compilers will produce faster */
3023 case LEX_INTERPNORMAL: /* code if we comment these out. */
3024 break;
3025#endif
3026
09bef843 3027 /* when we've already built the next token, just pull it out of the queue */
79072805 3028 case LEX_KNOWNEXT:
5db06880
NC
3029#ifdef PERL_MAD
3030 PL_lasttoke--;
3031 yylval = PL_nexttoke[PL_lasttoke].next_val;
3032 if (PL_madskills) {
cd81e915 3033 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3034 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3035 if (PL_thismad && PL_thismad->mad_key == '_') {
3036 PL_thiswhite = (SV*)PL_thismad->mad_val;
3037 PL_thismad->mad_val = 0;
3038 mad_free(PL_thismad);
3039 PL_thismad = 0;
5db06880
NC
3040 }
3041 }
3042 if (!PL_lasttoke) {
3043 PL_lex_state = PL_lex_defer;
3044 PL_expect = PL_lex_expect;
3045 PL_lex_defer = LEX_NORMAL;
3046 if (!PL_nexttoke[PL_lasttoke].next_type)
3047 return yylex();
3048 }
3049#else
3280af22 3050 PL_nexttoke--;
5db06880 3051 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3052 if (!PL_nexttoke) {
3053 PL_lex_state = PL_lex_defer;
3054 PL_expect = PL_lex_expect;
3055 PL_lex_defer = LEX_NORMAL;
463ee0b2 3056 }
5db06880
NC
3057#endif
3058#ifdef PERL_MAD
3059 /* FIXME - can these be merged? */
3060 return(PL_nexttoke[PL_lasttoke].next_type);
3061#else
bbf60fe6 3062 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3063#endif
79072805 3064
02aa26ce 3065 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3066 when we get here, PL_bufptr is at the \
02aa26ce 3067 */
79072805
LW
3068 case LEX_INTERPCASEMOD:
3069#ifdef DEBUGGING
3280af22 3070 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3071 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3072#endif
02aa26ce 3073 /* handle \E or end of string */
3280af22 3074 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3075 /* if at a \E */
3280af22 3076 if (PL_lex_casemods) {
f54cb97a 3077 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3078 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3079
3792a11b
NC
3080 if (PL_bufptr != PL_bufend
3081 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3082 PL_bufptr += 2;
3083 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3084#ifdef PERL_MAD
3085 if (PL_madskills)
cd81e915 3086 PL_thistoken = newSVpvn("\\E",2);
5db06880 3087#endif
a0d0e21e 3088 }
bbf60fe6 3089 return REPORT(')');
79072805 3090 }
5db06880
NC
3091#ifdef PERL_MAD
3092 while (PL_bufptr != PL_bufend &&
3093 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3094 if (!PL_thiswhite)
3095 PL_thiswhite = newSVpvn("",0);
3096 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3097 PL_bufptr += 2;
3098 }
3099#else
3280af22
NIS
3100 if (PL_bufptr != PL_bufend)
3101 PL_bufptr += 2;
5db06880 3102#endif
3280af22 3103 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3104 return yylex();
79072805
LW
3105 }
3106 else {
607df283 3107 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3108 "### Saw case modifier\n"); });
3280af22 3109 s = PL_bufptr + 1;
6e909404 3110 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3111#ifdef PERL_MAD
cd81e915
NC
3112 if (!PL_thiswhite)
3113 PL_thiswhite = newSVpvn("",0);
3114 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3115#endif
89122651 3116 PL_bufptr = s + 3;
6e909404
JH
3117 PL_lex_state = LEX_INTERPCONCAT;
3118 return yylex();
a0d0e21e 3119 }
6e909404 3120 else {
90771dc0 3121 I32 tmp;
5db06880
NC
3122 if (!PL_madskills) /* when just compiling don't need correct */
3123 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3124 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3125 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3126 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3127 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3128 return REPORT(')');
6e909404
JH
3129 }
3130 if (PL_lex_casemods > 10)
3131 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3132 PL_lex_casestack[PL_lex_casemods++] = *s;
3133 PL_lex_casestack[PL_lex_casemods] = '\0';
3134 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3135 start_force(PL_curforce);
9ded7720 3136 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3137 force_next('(');
cd81e915 3138 start_force(PL_curforce);
6e909404 3139 if (*s == 'l')
9ded7720 3140 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3141 else if (*s == 'u')
9ded7720 3142 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3143 else if (*s == 'L')
9ded7720 3144 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3145 else if (*s == 'U')
9ded7720 3146 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3147 else if (*s == 'Q')
9ded7720 3148 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3149 else
3150 Perl_croak(aTHX_ "panic: yylex");
5db06880 3151 if (PL_madskills) {
d4c19fe8 3152 SV* const tmpsv = newSVpvn("",0);
5db06880
NC
3153 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3154 curmad('_', tmpsv);
3155 }
6e909404 3156 PL_bufptr = s + 1;
a0d0e21e 3157 }
79072805 3158 force_next(FUNC);
3280af22
NIS
3159 if (PL_lex_starts) {
3160 s = PL_bufptr;
3161 PL_lex_starts = 0;
5db06880
NC
3162#ifdef PERL_MAD
3163 if (PL_madskills) {
cd81e915
NC
3164 if (PL_thistoken)
3165 sv_free(PL_thistoken);
3166 PL_thistoken = newSVpvn("",0);
5db06880
NC
3167 }
3168#endif
131b3ad0
DM
3169 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3170 if (PL_lex_casemods == 1 && PL_lex_inpat)
3171 OPERATOR(',');
3172 else
3173 Aop(OP_CONCAT);
79072805
LW
3174 }
3175 else
cea2e8a9 3176 return yylex();
79072805
LW
3177 }
3178
55497cff 3179 case LEX_INTERPPUSH:
bbf60fe6 3180 return REPORT(sublex_push());
55497cff 3181
79072805 3182 case LEX_INTERPSTART:
3280af22 3183 if (PL_bufptr == PL_bufend)
bbf60fe6 3184 return REPORT(sublex_done());
607df283 3185 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3186 "### Interpolated variable\n"); });
3280af22
NIS
3187 PL_expect = XTERM;
3188 PL_lex_dojoin = (*PL_bufptr == '@');
3189 PL_lex_state = LEX_INTERPNORMAL;
3190 if (PL_lex_dojoin) {
cd81e915 3191 start_force(PL_curforce);
9ded7720 3192 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3193 force_next(',');
cd81e915 3194 start_force(PL_curforce);
a0d0e21e 3195 force_ident("\"", '$');
cd81e915 3196 start_force(PL_curforce);
9ded7720 3197 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3198 force_next('$');
cd81e915 3199 start_force(PL_curforce);
9ded7720 3200 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3201 force_next('(');
cd81e915 3202 start_force(PL_curforce);
9ded7720 3203 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3204 force_next(FUNC);
3205 }
3280af22
NIS
3206 if (PL_lex_starts++) {
3207 s = PL_bufptr;
5db06880
NC
3208#ifdef PERL_MAD
3209 if (PL_madskills) {
cd81e915
NC
3210 if (PL_thistoken)
3211 sv_free(PL_thistoken);
3212 PL_thistoken = newSVpvn("",0);
5db06880
NC
3213 }
3214#endif
131b3ad0
DM
3215 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3216 if (!PL_lex_casemods && PL_lex_inpat)
3217 OPERATOR(',');
3218 else
3219 Aop(OP_CONCAT);
79072805 3220 }
cea2e8a9 3221 return yylex();
79072805
LW
3222
3223 case LEX_INTERPENDMAYBE:
3280af22
NIS
3224 if (intuit_more(PL_bufptr)) {
3225 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3226 break;
3227 }
3228 /* FALL THROUGH */
3229
3230 case LEX_INTERPEND:
3280af22
NIS
3231 if (PL_lex_dojoin) {
3232 PL_lex_dojoin = FALSE;
3233 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3234#ifdef PERL_MAD
3235 if (PL_madskills) {
cd81e915
NC
3236 if (PL_thistoken)
3237 sv_free(PL_thistoken);
3238 PL_thistoken = newSVpvn("",0);
5db06880
NC
3239 }
3240#endif
bbf60fe6 3241 return REPORT(')');
79072805 3242 }
43a16006 3243 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3244 && SvEVALED(PL_lex_repl))
43a16006 3245 {
e9fa98b2 3246 if (PL_bufptr != PL_bufend)
cea2e8a9 3247 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3248 PL_lex_repl = NULL;
e9fa98b2 3249 }
79072805
LW
3250 /* FALLTHROUGH */
3251 case LEX_INTERPCONCAT:
3252#ifdef DEBUGGING
3280af22 3253 if (PL_lex_brackets)
cea2e8a9 3254 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3255#endif
3280af22 3256 if (PL_bufptr == PL_bufend)
bbf60fe6 3257 return REPORT(sublex_done());
79072805 3258
3280af22
NIS
3259 if (SvIVX(PL_linestr) == '\'') {
3260 SV *sv = newSVsv(PL_linestr);
3261 if (!PL_lex_inpat)
76e3520e 3262 sv = tokeq(sv);
3280af22 3263 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3264 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3265 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3266 s = PL_bufend;
79072805
LW
3267 }
3268 else {
3280af22 3269 s = scan_const(PL_bufptr);
79072805 3270 if (*s == '\\')
3280af22 3271 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3272 else
3280af22 3273 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3274 }
3275
3280af22 3276 if (s != PL_bufptr) {
cd81e915 3277 start_force(PL_curforce);
5db06880
NC
3278 if (PL_madskills) {
3279 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3280 }
9ded7720 3281 NEXTVAL_NEXTTOKE = yylval;
3280af22 3282 PL_expect = XTERM;
79072805 3283 force_next(THING);
131b3ad0 3284 if (PL_lex_starts++) {
5db06880
NC
3285#ifdef PERL_MAD
3286 if (PL_madskills) {
cd81e915
NC
3287 if (PL_thistoken)
3288 sv_free(PL_thistoken);
3289 PL_thistoken = newSVpvn("",0);
5db06880
NC
3290 }
3291#endif
131b3ad0
DM
3292 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3293 if (!PL_lex_casemods && PL_lex_inpat)
3294 OPERATOR(',');
3295 else
3296 Aop(OP_CONCAT);
3297 }
79072805 3298 else {
3280af22 3299 PL_bufptr = s;
cea2e8a9 3300 return yylex();
79072805
LW
3301 }
3302 }
3303
cea2e8a9 3304 return yylex();
a0d0e21e 3305 case LEX_FORMLINE:
3280af22
NIS
3306 PL_lex_state = LEX_NORMAL;
3307 s = scan_formline(PL_bufptr);
3308 if (!PL_lex_formbrack)
a0d0e21e
LW
3309 goto rightbracket;
3310 OPERATOR(';');
79072805
LW
3311 }
3312
3280af22
NIS
3313 s = PL_bufptr;
3314 PL_oldoldbufptr = PL_oldbufptr;
3315 PL_oldbufptr = s;
463ee0b2
LW
3316
3317 retry:
5db06880 3318#ifdef PERL_MAD
cd81e915
NC
3319 if (PL_thistoken) {
3320 sv_free(PL_thistoken);
3321 PL_thistoken = 0;
5db06880 3322 }
cd81e915 3323 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3324#endif
378cc40b
LW
3325 switch (*s) {
3326 default:
7e2040f0 3327 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3328 goto keylookup;
cea2e8a9 3329 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3330 case 4:
3331 case 26:
3332 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3333 case 0:
5db06880
NC
3334#ifdef PERL_MAD
3335 if (PL_madskills)
cd81e915 3336 PL_faketokens = 0;
5db06880 3337#endif
3280af22
NIS
3338 if (!PL_rsfp) {
3339 PL_last_uni = 0;
3340 PL_last_lop = 0;
c5ee2135 3341 if (PL_lex_brackets) {
0bd48802
AL
3342 yyerror(PL_lex_formbrack
3343 ? "Format not terminated"
3344 : "Missing right curly or square bracket");
c5ee2135 3345 }
4e553d73 3346 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3347 "### Tokener got EOF\n");
5f80b19c 3348 } );
79072805 3349 TOKEN(0);
463ee0b2 3350 }
3280af22 3351 if (s++ < PL_bufend)
a687059c 3352 goto retry; /* ignore stray nulls */
3280af22
NIS
3353 PL_last_uni = 0;
3354 PL_last_lop = 0;
3355 if (!PL_in_eval && !PL_preambled) {
3356 PL_preambled = TRUE;
5db06880
NC
3357#ifdef PERL_MAD
3358 if (PL_madskills)
cd81e915 3359 PL_faketokens = 1;
5db06880 3360#endif
3280af22
NIS
3361 sv_setpv(PL_linestr,incl_perldb());
3362 if (SvCUR(PL_linestr))
396482e1 3363 sv_catpvs(PL_linestr,";");
3280af22
NIS
3364 if (PL_preambleav){
3365 while(AvFILLp(PL_preambleav) >= 0) {
3366 SV *tmpsv = av_shift(PL_preambleav);
3367 sv_catsv(PL_linestr, tmpsv);
396482e1 3368 sv_catpvs(PL_linestr, ";");
91b7def8 3369 sv_free(tmpsv);
3370 }
3280af22
NIS
3371 sv_free((SV*)PL_preambleav);
3372 PL_preambleav = NULL;
91b7def8 3373 }
3280af22 3374 if (PL_minus_n || PL_minus_p) {
396482e1 3375 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3376 if (PL_minus_l)
396482e1 3377 sv_catpvs(PL_linestr,"chomp;");
3280af22 3378 if (PL_minus_a) {
3280af22 3379 if (PL_minus_F) {
3792a11b
NC
3380 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3381 || *PL_splitstr == '"')
3280af22 3382 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3383 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3384 else {
c8ef6a4b
NC
3385 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3386 bytes can be used as quoting characters. :-) */
dd374669 3387 const char *splits = PL_splitstr;
91d456ae 3388 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3389 do {
3390 /* Need to \ \s */
dd374669
AL
3391 if (*splits == '\\')
3392 sv_catpvn(PL_linestr, splits, 1);
3393 sv_catpvn(PL_linestr, splits, 1);
3394 } while (*splits++);
48c4c863
NC
3395 /* This loop will embed the trailing NUL of
3396 PL_linestr as the last thing it does before
3397 terminating. */
396482e1 3398 sv_catpvs(PL_linestr, ");");
54310121 3399 }
2304df62
AD
3400 }
3401 else
396482e1 3402 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3403 }
79072805 3404 }
bc9b29db 3405 if (PL_minus_E)
396482e1
GA
3406 sv_catpvs(PL_linestr,"use feature ':5.10';");
3407 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3408 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3409 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3410 PL_last_lop = PL_last_uni = NULL;
3280af22 3411 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3412 SV * const sv = newSV(0);
a0d0e21e
LW
3413
3414 sv_upgrade(sv, SVt_PVMG);
3280af22 3415 sv_setsv(sv,PL_linestr);
0ac0412a 3416 (void)SvIOK_on(sv);
45977657 3417 SvIV_set(sv, 0);
36c7798d 3418 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3419 }
79072805 3420 goto retry;
a687059c 3421 }
e929a76b 3422 do {
aa7440fb 3423 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3424 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3425 fake_eof:
5db06880 3426#ifdef PERL_MAD
cd81e915 3427 PL_realtokenstart = -1;
5db06880 3428#endif
7e28d3af
JH
3429 if (PL_rsfp) {
3430 if (PL_preprocess && !PL_in_eval)
3431 (void)PerlProc_pclose(PL_rsfp);
3432 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3433 PerlIO_clearerr(PL_rsfp);
3434 else
3435 (void)PerlIO_close(PL_rsfp);
4608196e 3436 PL_rsfp = NULL;
7e28d3af
JH
3437 PL_doextract = FALSE;
3438 }
3439 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3440#ifdef PERL_MAD
3441 if (PL_madskills)
cd81e915 3442 PL_faketokens = 1;
5db06880 3443#endif
a23c4656
NC
3444 sv_setpv(PL_linestr,PL_minus_p
3445 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3446 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3447 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3448 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3449 PL_minus_n = PL_minus_p = 0;
3450 goto retry;
3451 }
3452 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3453 PL_last_lop = PL_last_uni = NULL;
c69006e4 3454 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3455 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3456 }
7aa207d6
JH
3457 /* If it looks like the start of a BOM or raw UTF-16,
3458 * check if it in fact is. */
3459 else if (bof &&
3460 (*s == 0 ||
3461 *(U8*)s == 0xEF ||
3462 *(U8*)s >= 0xFE ||
3463 s[1] == 0)) {
226017aa 3464#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3465# ifdef __GNU_LIBRARY__
3466# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3467# define FTELL_FOR_PIPE_IS_BROKEN
3468# endif
e3f494f1
JH
3469# else
3470# ifdef __GLIBC__
3471# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3472# define FTELL_FOR_PIPE_IS_BROKEN
3473# endif
3474# endif
226017aa
DD
3475# endif
3476#endif
3477#ifdef FTELL_FOR_PIPE_IS_BROKEN
3478 /* This loses the possibility to detect the bof
3479 * situation on perl -P when the libc5 is being used.
3480 * Workaround? Maybe attach some extra state to PL_rsfp?
3481 */
3482 if (!PL_preprocess)
7e28d3af 3483 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3484#else
eb160463 3485 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3486#endif
7e28d3af 3487 if (bof) {
3280af22 3488 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3489 s = swallow_bom((U8*)s);
e929a76b 3490 }
378cc40b 3491 }
3280af22 3492 if (PL_doextract) {
a0d0e21e 3493 /* Incest with pod. */
5db06880
NC
3494#ifdef PERL_MAD
3495 if (PL_madskills)
cd81e915 3496 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 3497#endif
a0d0e21e 3498 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3499 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3500 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3501 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3502 PL_last_lop = PL_last_uni = NULL;
3280af22 3503 PL_doextract = FALSE;
a0d0e21e 3504 }
4e553d73 3505 }
463ee0b2 3506 incline(s);
3280af22
NIS
3507 } while (PL_doextract);
3508 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3509 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3510 SV * const sv = newSV(0);
a687059c 3511
93a17b20 3512 sv_upgrade(sv, SVt_PVMG);
3280af22 3513 sv_setsv(sv,PL_linestr);
0ac0412a 3514 (void)SvIOK_on(sv);
45977657 3515 SvIV_set(sv, 0);
36c7798d 3516 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 3517 }
3280af22 3518 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3519 PL_last_lop = PL_last_uni = NULL;
57843af0 3520 if (CopLINE(PL_curcop) == 1) {
3280af22 3521 while (s < PL_bufend && isSPACE(*s))
79072805 3522 s++;
a0d0e21e 3523 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 3524 s++;
5db06880
NC
3525#ifdef PERL_MAD
3526 if (PL_madskills)
cd81e915 3527 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 3528#endif
bd61b366 3529 d = NULL;
3280af22 3530 if (!PL_in_eval) {