This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Updated escaping code. utf8 regex debug output improvements
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a 71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 301 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
c35e046a
AL
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 NOOP;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 501 char he_name[32] = "feature_";
6fca0082 502 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
664 PL_linestr = line;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 667 s = SvPV_const(PL_linestr, len);
6f27f9a7 668 if (!len || s[len-1] != ';') {
3280af22
NIS
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 671 sv_catpvs(PL_linestr, "\n;");
8990e307 672 }
3280af22
NIS
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 676 PL_last_lop = PL_last_uni = NULL;
3280af22 677 PL_rsfp = 0;
79072805 678}
a687059c 679
ffb4593c
NT
680/*
681 * Perl_lex_end
9cbb5ea2
GS
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
ffb4593c
NT
684 */
685
463ee0b2 686void
864dbfa3 687Perl_lex_end(pTHX)
463ee0b2 688{
97aff369 689 dVAR;
3280af22 690 PL_doextract = FALSE;
463ee0b2
LW
691}
692
ffb4593c
NT
693/*
694 * S_incline
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
57843af0 697 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 698 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_incline(pTHX_ char *s)
463ee0b2 705{
97aff369 706 dVAR;
463ee0b2
LW
707 char *t;
708 char *n;
73659bf1 709 char *e;
463ee0b2 710 char ch;
463ee0b2 711
57843af0 712 CopLINE_inc(PL_curcop);
463ee0b2
LW
713 if (*s++ != '#')
714 return;
d4c19fe8
AL
715 while (SPACE_OR_TAB(*s))
716 s++;
73659bf1
GS
717 if (strnEQ(s, "line", 4))
718 s += 4;
719 else
720 return;
084592ab 721 if (SPACE_OR_TAB(*s))
73659bf1 722 s++;
4e553d73 723 else
73659bf1 724 return;
d4c19fe8
AL
725 while (SPACE_OR_TAB(*s))
726 s++;
463ee0b2
LW
727 if (!isDIGIT(*s))
728 return;
d4c19fe8 729
463ee0b2
LW
730 n = s;
731 while (isDIGIT(*s))
732 s++;
bf4acbe4 733 while (SPACE_OR_TAB(*s))
463ee0b2 734 s++;
73659bf1 735 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 736 s++;
73659bf1
GS
737 e = t + 1;
738 }
463ee0b2 739 else {
c35e046a
AL
740 t = s;
741 while (!isSPACE(*t))
742 t++;
73659bf1 743 e = t;
463ee0b2 744 }
bf4acbe4 745 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
746 e++;
747 if (*e != '\n' && *e != '\0')
748 return; /* false alarm */
749
463ee0b2
LW
750 ch = *t;
751 *t = '\0';
f4dd75d9 752 if (t - s > 0) {
8a5ee598 753#ifndef USE_ITHREADS
c4420975 754 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
755 STRLEN tmplen = cf ? strlen(cf) : 0;
756 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
757 /* must copy *{"::_<(eval N)[oldfilename:L]"}
758 * to *{"::_<newfilename"} */
759 char smallbuf[256], smallbuf2[256];
760 char *tmpbuf, *tmpbuf2;
8a5ee598 761 GV **gvp, *gv2;
e66cf94c
RGS
762 STRLEN tmplen2 = strlen(s);
763 if (tmplen + 3 < sizeof smallbuf)
764 tmpbuf = smallbuf;
765 else
766 Newx(tmpbuf, tmplen + 3, char);
767 if (tmplen2 + 3 < sizeof smallbuf2)
768 tmpbuf2 = smallbuf2;
769 else
770 Newx(tmpbuf2, tmplen2 + 3, char);
771 tmpbuf[0] = tmpbuf2[0] = '_';
772 tmpbuf[1] = tmpbuf2[1] = '<';
773 memcpy(tmpbuf + 2, cf, ++tmplen);
774 memcpy(tmpbuf2 + 2, s, ++tmplen2);
775 ++tmplen; ++tmplen2;
8a5ee598
RGS
776 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
777 if (gvp) {
778 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
779 if (!isGV(gv2))
780 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
781 /* adjust ${"::_<newfilename"} to store the new file name */
782 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
783 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
784 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
785 }
e66cf94c
RGS
786 if (tmpbuf != smallbuf) Safefree(tmpbuf);
787 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
788 }
8a5ee598 789#endif
05ec9bb3 790 CopFILE_free(PL_curcop);
57843af0 791 CopFILE_set(PL_curcop, s);
f4dd75d9 792 }
463ee0b2 793 *t = ch;
57843af0 794 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
795}
796
29595ff2 797#ifdef PERL_MAD
cd81e915 798/* skip space before PL_thistoken */
29595ff2
NC
799
800STATIC char *
801S_skipspace0(pTHX_ register char *s)
802{
803 s = skipspace(s);
804 if (!PL_madskills)
805 return s;
cd81e915
NC
806 if (PL_skipwhite) {
807 if (!PL_thiswhite)
808 PL_thiswhite = newSVpvn("",0);
809 sv_catsv(PL_thiswhite, PL_skipwhite);
810 sv_free(PL_skipwhite);
811 PL_skipwhite = 0;
812 }
813 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
814 return s;
815}
816
cd81e915 817/* skip space after PL_thistoken */
29595ff2
NC
818
819STATIC char *
820S_skipspace1(pTHX_ register char *s)
821{
d4c19fe8 822 const char *start = s;
29595ff2
NC
823 I32 startoff = start - SvPVX(PL_linestr);
824
825 s = skipspace(s);
826 if (!PL_madskills)
827 return s;
828 start = SvPVX(PL_linestr) + startoff;
cd81e915 829 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 830 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
831 PL_thistoken = newSVpvn(tstart, start - tstart);
832 }
833 PL_realtokenstart = -1;
834 if (PL_skipwhite) {
835 if (!PL_nextwhite)
836 PL_nextwhite = newSVpvn("",0);
837 sv_catsv(PL_nextwhite, PL_skipwhite);
838 sv_free(PL_skipwhite);
839 PL_skipwhite = 0;
29595ff2
NC
840 }
841 return s;
842}
843
844STATIC char *
845S_skipspace2(pTHX_ register char *s, SV **svp)
846{
c35e046a
AL
847 char *start;
848 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
849 const I32 startoff = s - SvPVX(PL_linestr);
850
29595ff2
NC
851 s = skipspace(s);
852 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
853 if (!PL_madskills || !svp)
854 return s;
855 start = SvPVX(PL_linestr) + startoff;
cd81e915 856 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 857 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
858 PL_thistoken = newSVpvn(tstart, start - tstart);
859 PL_realtokenstart = -1;
29595ff2 860 }
cd81e915 861 if (PL_skipwhite) {
29595ff2
NC
862 if (!*svp)
863 *svp = newSVpvn("",0);
cd81e915
NC
864 sv_setsv(*svp, PL_skipwhite);
865 sv_free(PL_skipwhite);
866 PL_skipwhite = 0;
29595ff2
NC
867 }
868
869 return s;
870}
871#endif
872
ffb4593c
NT
873/*
874 * S_skipspace
875 * Called to gobble the appropriate amount and type of whitespace.
876 * Skips comments as well.
877 */
878
76e3520e 879STATIC char *
cea2e8a9 880S_skipspace(pTHX_ register char *s)
a687059c 881{
97aff369 882 dVAR;
5db06880
NC
883#ifdef PERL_MAD
884 int curoff;
885 int startoff = s - SvPVX(PL_linestr);
886
cd81e915
NC
887 if (PL_skipwhite) {
888 sv_free(PL_skipwhite);
889 PL_skipwhite = 0;
5db06880
NC
890 }
891#endif
892
3280af22 893 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 894 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 895 s++;
5db06880
NC
896#ifdef PERL_MAD
897 goto done;
898#else
463ee0b2 899 return s;
5db06880 900#endif
463ee0b2
LW
901 }
902 for (;;) {
fd049845 903 STRLEN prevlen;
09bef843 904 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 905 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
906 while (s < PL_bufend && isSPACE(*s)) {
907 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
908 incline(s);
909 }
ffb4593c
NT
910
911 /* comment */
3280af22
NIS
912 if (s < PL_bufend && *s == '#') {
913 while (s < PL_bufend && *s != '\n')
463ee0b2 914 s++;
60e6418e 915 if (s < PL_bufend) {
463ee0b2 916 s++;
60e6418e
GS
917 if (PL_in_eval && !PL_rsfp) {
918 incline(s);
919 continue;
920 }
921 }
463ee0b2 922 }
ffb4593c
NT
923
924 /* only continue to recharge the buffer if we're at the end
925 * of the buffer, we're not reading from a source filter, and
926 * we're in normal lexing mode
927 */
09bef843
SB
928 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
929 PL_lex_state == LEX_FORMLINE)
5db06880
NC
930#ifdef PERL_MAD
931 goto done;
932#else
463ee0b2 933 return s;
5db06880 934#endif
ffb4593c
NT
935
936 /* try to recharge the buffer */
5db06880
NC
937#ifdef PERL_MAD
938 curoff = s - SvPVX(PL_linestr);
939#endif
940
9cbb5ea2 941 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 942 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 943 {
5db06880
NC
944#ifdef PERL_MAD
945 if (PL_madskills && curoff != startoff) {
cd81e915
NC
946 if (!PL_skipwhite)
947 PL_skipwhite = newSVpvn("",0);
948 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
949 curoff - startoff);
950 }
951
952 /* mustn't throw out old stuff yet if madpropping */
953 SvCUR(PL_linestr) = curoff;
954 s = SvPVX(PL_linestr) + curoff;
955 *s = 0;
956 if (curoff && s[-1] == '\n')
957 s[-1] = ' ';
958#endif
959
9cbb5ea2 960 /* end of file. Add on the -p or -n magic */
cd81e915 961 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 962 if (PL_minus_p) {
5db06880
NC
963#ifdef PERL_MAD
964 sv_catpv(PL_linestr,
965 ";}continue{print or die qq(-p destination: $!\\n);}");
966#else
01a19ab0
NC
967 sv_setpv(PL_linestr,
968 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 969#endif
3280af22 970 PL_minus_n = PL_minus_p = 0;
a0d0e21e 971 }
01a19ab0 972 else if (PL_minus_n) {
5db06880
NC
973#ifdef PERL_MAD
974 sv_catpvn(PL_linestr, ";}", 2);
975#else
01a19ab0 976 sv_setpvn(PL_linestr, ";}", 2);
5db06880 977#endif
01a19ab0
NC
978 PL_minus_n = 0;
979 }
a0d0e21e 980 else
5db06880
NC
981#ifdef PERL_MAD
982 sv_catpvn(PL_linestr,";", 1);
983#else
4147a61b 984 sv_setpvn(PL_linestr,";", 1);
5db06880 985#endif
ffb4593c
NT
986
987 /* reset variables for next time we lex */
9cbb5ea2 988 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
989 = SvPVX(PL_linestr)
990#ifdef PERL_MAD
991 + curoff
992#endif
993 ;
3280af22 994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 995 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
996
997 /* Close the filehandle. Could be from -P preprocessor,
998 * STDIN, or a regular file. If we were reading code from
999 * STDIN (because the commandline held no -e or filename)
1000 * then we don't close it, we reset it so the code can
1001 * read from STDIN too.
1002 */
1003
3280af22
NIS
1004 if (PL_preprocess && !PL_in_eval)
1005 (void)PerlProc_pclose(PL_rsfp);
1006 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1007 PerlIO_clearerr(PL_rsfp);
8990e307 1008 else
3280af22 1009 (void)PerlIO_close(PL_rsfp);
4608196e 1010 PL_rsfp = NULL;
463ee0b2
LW
1011 return s;
1012 }
ffb4593c
NT
1013
1014 /* not at end of file, so we only read another line */
09bef843
SB
1015 /* make corresponding updates to old pointers, for yyerror() */
1016 oldprevlen = PL_oldbufptr - PL_bufend;
1017 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1018 if (PL_last_uni)
1019 oldunilen = PL_last_uni - PL_bufend;
1020 if (PL_last_lop)
1021 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1022 PL_linestart = PL_bufptr = s + prevlen;
1023 PL_bufend = s + SvCUR(PL_linestr);
1024 s = PL_bufptr;
09bef843
SB
1025 PL_oldbufptr = s + oldprevlen;
1026 PL_oldoldbufptr = s + oldoldprevlen;
1027 if (PL_last_uni)
1028 PL_last_uni = s + oldunilen;
1029 if (PL_last_lop)
1030 PL_last_lop = s + oldloplen;
a0d0e21e 1031 incline(s);
ffb4593c
NT
1032
1033 /* debugger active and we're not compiling the debugger code,
1034 * so store the line into the debugger's array of lines
1035 */
3280af22 1036 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1037 SV * const sv = newSV(0);
8990e307
LW
1038
1039 sv_upgrade(sv, SVt_PVMG);
3280af22 1040 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1041 (void)SvIOK_on(sv);
45977657 1042 SvIV_set(sv, 0);
36c7798d 1043 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1044 }
463ee0b2 1045 }
5db06880
NC
1046
1047#ifdef PERL_MAD
1048 done:
1049 if (PL_madskills) {
cd81e915
NC
1050 if (!PL_skipwhite)
1051 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1052 curoff = s - SvPVX(PL_linestr);
1053 if (curoff - startoff)
cd81e915 1054 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1055 curoff - startoff);
1056 }
1057 return s;
1058#endif
a687059c 1059}
378cc40b 1060
ffb4593c
NT
1061/*
1062 * S_check_uni
1063 * Check the unary operators to ensure there's no ambiguity in how they're
1064 * used. An ambiguous piece of code would be:
1065 * rand + 5
1066 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1067 * the +5 is its argument.
1068 */
1069
76e3520e 1070STATIC void
cea2e8a9 1071S_check_uni(pTHX)
ba106d47 1072{
97aff369 1073 dVAR;
d4c19fe8
AL
1074 const char *s;
1075 const char *t;
2f3197b3 1076
3280af22 1077 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1078 return;
3280af22
NIS
1079 while (isSPACE(*PL_last_uni))
1080 PL_last_uni++;
c35e046a
AL
1081 s = PL_last_uni;
1082 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1083 s++;
3280af22 1084 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1085 return;
6136c704 1086
0453d815 1087 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1088 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1089 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1090 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1091 }
2f3197b3
LW
1092}
1093
ffb4593c
NT
1094/*
1095 * LOP : macro to build a list operator. Its behaviour has been replaced
1096 * with a subroutine, S_lop() for which LOP is just another name.
1097 */
1098
a0d0e21e
LW
1099#define LOP(f,x) return lop(f,x,s)
1100
ffb4593c
NT
1101/*
1102 * S_lop
1103 * Build a list operator (or something that might be one). The rules:
1104 * - if we have a next token, then it's a list operator [why?]
1105 * - if the next thing is an opening paren, then it's a function
1106 * - else it's a list operator
1107 */
1108
76e3520e 1109STATIC I32
a0be28da 1110S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1111{
97aff369 1112 dVAR;
79072805 1113 yylval.ival = f;
35c8bce7 1114 CLINE;
3280af22
NIS
1115 PL_expect = x;
1116 PL_bufptr = s;
1117 PL_last_lop = PL_oldbufptr;
eb160463 1118 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1119#ifdef PERL_MAD
1120 if (PL_lasttoke)
1121 return REPORT(LSTOP);
1122#else
3280af22 1123 if (PL_nexttoke)
bbf60fe6 1124 return REPORT(LSTOP);
5db06880 1125#endif
79072805 1126 if (*s == '(')
bbf60fe6 1127 return REPORT(FUNC);
29595ff2 1128 s = PEEKSPACE(s);
79072805 1129 if (*s == '(')
bbf60fe6 1130 return REPORT(FUNC);
79072805 1131 else
bbf60fe6 1132 return REPORT(LSTOP);
79072805
LW
1133}
1134
5db06880
NC
1135#ifdef PERL_MAD
1136 /*
1137 * S_start_force
1138 * Sets up for an eventual force_next(). start_force(0) basically does
1139 * an unshift, while start_force(-1) does a push. yylex removes items
1140 * on the "pop" end.
1141 */
1142
1143STATIC void
1144S_start_force(pTHX_ int where)
1145{
1146 int i;
1147
cd81e915 1148 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1149 where = PL_lasttoke;
cd81e915
NC
1150 assert(PL_curforce < 0 || PL_curforce == where);
1151 if (PL_curforce != where) {
5db06880
NC
1152 for (i = PL_lasttoke; i > where; --i) {
1153 PL_nexttoke[i] = PL_nexttoke[i-1];
1154 }
1155 PL_lasttoke++;
1156 }
cd81e915 1157 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1158 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1159 PL_curforce = where;
1160 if (PL_nextwhite) {
5db06880
NC
1161 if (PL_madskills)
1162 curmad('^', newSVpvn("",0));
cd81e915 1163 CURMAD('_', PL_nextwhite);
5db06880
NC
1164 }
1165}
1166
1167STATIC void
1168S_curmad(pTHX_ char slot, SV *sv)
1169{
1170 MADPROP **where;
1171
1172 if (!sv)
1173 return;
cd81e915
NC
1174 if (PL_curforce < 0)
1175 where = &PL_thismad;
5db06880 1176 else
cd81e915 1177 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1178
cd81e915 1179 if (PL_faketokens)
5db06880
NC
1180 sv_setpvn(sv, "", 0);
1181 else {
1182 if (!IN_BYTES) {
1183 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1184 SvUTF8_on(sv);
1185 else if (PL_encoding) {
1186 sv_recode_to_utf8(sv, PL_encoding);
1187 }
1188 }
1189 }
1190
1191 /* keep a slot open for the head of the list? */
1192 if (slot != '_' && *where && (*where)->mad_key == '^') {
1193 (*where)->mad_key = slot;
1194 sv_free((*where)->mad_val);
1195 (*where)->mad_val = (void*)sv;
1196 }
1197 else
1198 addmad(newMADsv(slot, sv), where, 0);
1199}
1200#else
b3f24c00
MHM
1201# define start_force(where) NOOP
1202# define curmad(slot, sv) NOOP
5db06880
NC
1203#endif
1204
ffb4593c
NT
1205/*
1206 * S_force_next
9cbb5ea2 1207 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1208 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1209 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1210 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1211 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1212 */
1213
4e553d73 1214STATIC void
cea2e8a9 1215S_force_next(pTHX_ I32 type)
79072805 1216{
97aff369 1217 dVAR;
5db06880 1218#ifdef PERL_MAD
cd81e915 1219 if (PL_curforce < 0)
5db06880 1220 start_force(PL_lasttoke);
cd81e915 1221 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1222 if (PL_lex_state != LEX_KNOWNEXT)
1223 PL_lex_defer = PL_lex_state;
1224 PL_lex_state = LEX_KNOWNEXT;
1225 PL_lex_expect = PL_expect;
cd81e915 1226 PL_curforce = -1;
5db06880 1227#else
3280af22
NIS
1228 PL_nexttype[PL_nexttoke] = type;
1229 PL_nexttoke++;
1230 if (PL_lex_state != LEX_KNOWNEXT) {
1231 PL_lex_defer = PL_lex_state;
1232 PL_lex_expect = PL_expect;
1233 PL_lex_state = LEX_KNOWNEXT;
79072805 1234 }
5db06880 1235#endif
79072805
LW
1236}
1237
d0a148a6
NC
1238STATIC SV *
1239S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1240{
97aff369 1241 dVAR;
9d4ba2ae 1242 SV * const sv = newSVpvn(start,len);
bfed75c6 1243 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1244 SvUTF8_on(sv);
1245 return sv;
1246}
1247
ffb4593c
NT
1248/*
1249 * S_force_word
1250 * When the lexer knows the next thing is a word (for instance, it has
1251 * just seen -> and it knows that the next char is a word char, then
1252 * it calls S_force_word to stick the next word into the PL_next lookahead.
1253 *
1254 * Arguments:
b1b65b59 1255 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1256 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1257 * int check_keyword : if true, Perl checks to make sure the word isn't
1258 * a keyword (do this if the word is a label, e.g. goto FOO)
1259 * int allow_pack : if true, : characters will also be allowed (require,
1260 * use, etc. do this)
9cbb5ea2 1261 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1262 */
1263
76e3520e 1264STATIC char *
cea2e8a9 1265S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1266{
97aff369 1267 dVAR;
463ee0b2
LW
1268 register char *s;
1269 STRLEN len;
4e553d73 1270
29595ff2 1271 start = SKIPSPACE1(start);
463ee0b2 1272 s = start;
7e2040f0 1273 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1274 (allow_pack && *s == ':') ||
15f0808c 1275 (allow_initial_tick && *s == '\'') )
a0d0e21e 1276 {
3280af22
NIS
1277 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1278 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1279 return start;
cd81e915 1280 start_force(PL_curforce);
5db06880
NC
1281 if (PL_madskills)
1282 curmad('X', newSVpvn(start,s-start));
463ee0b2 1283 if (token == METHOD) {
29595ff2 1284 s = SKIPSPACE1(s);
463ee0b2 1285 if (*s == '(')
3280af22 1286 PL_expect = XTERM;
463ee0b2 1287 else {
3280af22 1288 PL_expect = XOPERATOR;
463ee0b2 1289 }
79072805 1290 }
9ded7720 1291 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1292 = (OP*)newSVOP(OP_CONST,0,
1293 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1294 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1295 force_next(token);
1296 }
1297 return s;
1298}
1299
ffb4593c
NT
1300/*
1301 * S_force_ident
9cbb5ea2 1302 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1303 * text only contains the "foo" portion. The first argument is a pointer
1304 * to the "foo", and the second argument is the type symbol to prefix.
1305 * Forces the next token to be a "WORD".
9cbb5ea2 1306 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1307 */
1308
76e3520e 1309STATIC void
bfed75c6 1310S_force_ident(pTHX_ register const char *s, int kind)
79072805 1311{
97aff369 1312 dVAR;
c35e046a 1313 if (*s) {
90e5519e
NC
1314 const STRLEN len = strlen(s);
1315 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1316 start_force(PL_curforce);
9ded7720 1317 NEXTVAL_NEXTTOKE.opval = o;
79072805 1318 force_next(WORD);
748a9306 1319 if (kind) {
11343788 1320 o->op_private = OPpCONST_ENTERED;
55497cff 1321 /* XXX see note in pp_entereval() for why we forgo typo
1322 warnings if the symbol must be introduced in an eval.
1323 GSAR 96-10-12 */
90e5519e
NC
1324 gv_fetchpvn_flags(s, len,
1325 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1326 : GV_ADD,
1327 kind == '$' ? SVt_PV :
1328 kind == '@' ? SVt_PVAV :
1329 kind == '%' ? SVt_PVHV :
a0d0e21e 1330 SVt_PVGV
90e5519e 1331 );
748a9306 1332 }
79072805
LW
1333 }
1334}
1335
1571675a
GS
1336NV
1337Perl_str_to_version(pTHX_ SV *sv)
1338{
1339 NV retval = 0.0;
1340 NV nshift = 1.0;
1341 STRLEN len;
cfd0369c 1342 const char *start = SvPV_const(sv,len);
9d4ba2ae 1343 const char * const end = start + len;
504618e9 1344 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1345 while (start < end) {
ba210ebe 1346 STRLEN skip;
1571675a
GS
1347 UV n;
1348 if (utf)
9041c2e3 1349 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1350 else {
1351 n = *(U8*)start;
1352 skip = 1;
1353 }
1354 retval += ((NV)n)/nshift;
1355 start += skip;
1356 nshift *= 1000;
1357 }
1358 return retval;
1359}
1360
4e553d73 1361/*
ffb4593c
NT
1362 * S_force_version
1363 * Forces the next token to be a version number.
e759cc13
RGS
1364 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1365 * and if "guessing" is TRUE, then no new token is created (and the caller
1366 * must use an alternative parsing method).
ffb4593c
NT
1367 */
1368
76e3520e 1369STATIC char *
e759cc13 1370S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1371{
97aff369 1372 dVAR;
5f66b61c 1373 OP *version = NULL;
44dcb63b 1374 char *d;
5db06880
NC
1375#ifdef PERL_MAD
1376 I32 startoff = s - SvPVX(PL_linestr);
1377#endif
89bfa8cd 1378
29595ff2 1379 s = SKIPSPACE1(s);
89bfa8cd 1380
44dcb63b 1381 d = s;
dd629d5b 1382 if (*d == 'v')
44dcb63b 1383 d++;
44dcb63b 1384 if (isDIGIT(*d)) {
e759cc13
RGS
1385 while (isDIGIT(*d) || *d == '_' || *d == '.')
1386 d++;
5db06880
NC
1387#ifdef PERL_MAD
1388 if (PL_madskills) {
cd81e915 1389 start_force(PL_curforce);
5db06880
NC
1390 curmad('X', newSVpvn(s,d-s));
1391 }
1392#endif
9f3d182e 1393 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1394 SV *ver;
b73d6f50 1395 s = scan_num(s, &yylval);
89bfa8cd 1396 version = yylval.opval;
dd629d5b
GS
1397 ver = cSVOPx(version)->op_sv;
1398 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1399 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1400 SvNV_set(ver, str_to_version(ver));
1571675a 1401 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1402 }
89bfa8cd 1403 }
5db06880
NC
1404 else if (guessing) {
1405#ifdef PERL_MAD
1406 if (PL_madskills) {
cd81e915
NC
1407 sv_free(PL_nextwhite); /* let next token collect whitespace */
1408 PL_nextwhite = 0;
5db06880
NC
1409 s = SvPVX(PL_linestr) + startoff;
1410 }
1411#endif
e759cc13 1412 return s;
5db06880 1413 }
89bfa8cd 1414 }
1415
5db06880
NC
1416#ifdef PERL_MAD
1417 if (PL_madskills && !version) {
cd81e915
NC
1418 sv_free(PL_nextwhite); /* let next token collect whitespace */
1419 PL_nextwhite = 0;
5db06880
NC
1420 s = SvPVX(PL_linestr) + startoff;
1421 }
1422#endif
89bfa8cd 1423 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1424 start_force(PL_curforce);
9ded7720 1425 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1426 force_next(WORD);
89bfa8cd 1427
e759cc13 1428 return s;
89bfa8cd 1429}
1430
ffb4593c
NT
1431/*
1432 * S_tokeq
1433 * Tokenize a quoted string passed in as an SV. It finds the next
1434 * chunk, up to end of string or a backslash. It may make a new
1435 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1436 * turns \\ into \.
1437 */
1438
76e3520e 1439STATIC SV *
cea2e8a9 1440S_tokeq(pTHX_ SV *sv)
79072805 1441{
97aff369 1442 dVAR;
79072805
LW
1443 register char *s;
1444 register char *send;
1445 register char *d;
b3ac6de7
IZ
1446 STRLEN len = 0;
1447 SV *pv = sv;
79072805
LW
1448
1449 if (!SvLEN(sv))
b3ac6de7 1450 goto finish;
79072805 1451
a0d0e21e 1452 s = SvPV_force(sv, len);
21a311ee 1453 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1454 goto finish;
463ee0b2 1455 send = s + len;
79072805
LW
1456 while (s < send && *s != '\\')
1457 s++;
1458 if (s == send)
b3ac6de7 1459 goto finish;
79072805 1460 d = s;
be4731d2 1461 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1462 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1463 if (SvUTF8(sv))
1464 SvUTF8_on(pv);
1465 }
79072805
LW
1466 while (s < send) {
1467 if (*s == '\\') {
a0d0e21e 1468 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1469 s++; /* all that, just for this */
1470 }
1471 *d++ = *s++;
1472 }
1473 *d = '\0';
95a20fc0 1474 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1475 finish:
3280af22 1476 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1477 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1478 return sv;
1479}
1480
ffb4593c
NT
1481/*
1482 * Now come three functions related to double-quote context,
1483 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1484 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1485 * interact with PL_lex_state, and create fake ( ... ) argument lists
1486 * to handle functions and concatenation.
1487 * They assume that whoever calls them will be setting up a fake
1488 * join call, because each subthing puts a ',' after it. This lets
1489 * "lower \luPpEr"
1490 * become
1491 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1492 *
1493 * (I'm not sure whether the spurious commas at the end of lcfirst's
1494 * arguments and join's arguments are created or not).
1495 */
1496
1497/*
1498 * S_sublex_start
1499 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1500 *
1501 * Pattern matching will set PL_lex_op to the pattern-matching op to
1502 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1503 *
1504 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1505 *
1506 * Everything else becomes a FUNC.
1507 *
1508 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1509 * had an OP_CONST or OP_READLINE). This just sets us up for a
1510 * call to S_sublex_push().
1511 */
1512
76e3520e 1513STATIC I32
cea2e8a9 1514S_sublex_start(pTHX)
79072805 1515{
97aff369 1516 dVAR;
0d46e09a 1517 register const I32 op_type = yylval.ival;
79072805
LW
1518
1519 if (op_type == OP_NULL) {
3280af22 1520 yylval.opval = PL_lex_op;
5f66b61c 1521 PL_lex_op = NULL;
79072805
LW
1522 return THING;
1523 }
1524 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1525 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1526
1527 if (SvTYPE(sv) == SVt_PVIV) {
1528 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1529 STRLEN len;
96a5add6 1530 const char * const p = SvPV_const(sv, len);
f54cb97a 1531 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1532 if (SvUTF8(sv))
1533 SvUTF8_on(nsv);
b3ac6de7
IZ
1534 SvREFCNT_dec(sv);
1535 sv = nsv;
4e553d73 1536 }
b3ac6de7 1537 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1538 PL_lex_stuff = NULL;
6f33ba73
RGS
1539 /* Allow <FH> // "foo" */
1540 if (op_type == OP_READLINE)
1541 PL_expect = XTERMORDORDOR;
79072805
LW
1542 return THING;
1543 }
1544
3280af22
NIS
1545 PL_sublex_info.super_state = PL_lex_state;
1546 PL_sublex_info.sub_inwhat = op_type;
1547 PL_sublex_info.sub_op = PL_lex_op;
1548 PL_lex_state = LEX_INTERPPUSH;
55497cff 1549
3280af22
NIS
1550 PL_expect = XTERM;
1551 if (PL_lex_op) {
1552 yylval.opval = PL_lex_op;
5f66b61c 1553 PL_lex_op = NULL;
55497cff 1554 return PMFUNC;
1555 }
1556 else
1557 return FUNC;
1558}
1559
ffb4593c
NT
1560/*
1561 * S_sublex_push
1562 * Create a new scope to save the lexing state. The scope will be
1563 * ended in S_sublex_done. Returns a '(', starting the function arguments
1564 * to the uc, lc, etc. found before.
1565 * Sets PL_lex_state to LEX_INTERPCONCAT.
1566 */
1567
76e3520e 1568STATIC I32
cea2e8a9 1569S_sublex_push(pTHX)
55497cff 1570{
27da23d5 1571 dVAR;
f46d017c 1572 ENTER;
55497cff 1573
3280af22
NIS
1574 PL_lex_state = PL_sublex_info.super_state;
1575 SAVEI32(PL_lex_dojoin);
1576 SAVEI32(PL_lex_brackets);
3280af22
NIS
1577 SAVEI32(PL_lex_casemods);
1578 SAVEI32(PL_lex_starts);
1579 SAVEI32(PL_lex_state);
7766f137 1580 SAVEVPTR(PL_lex_inpat);
3280af22 1581 SAVEI32(PL_lex_inwhat);
57843af0 1582 SAVECOPLINE(PL_curcop);
3280af22 1583 SAVEPPTR(PL_bufptr);
8452ff4b 1584 SAVEPPTR(PL_bufend);
3280af22
NIS
1585 SAVEPPTR(PL_oldbufptr);
1586 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1587 SAVEPPTR(PL_last_lop);
1588 SAVEPPTR(PL_last_uni);
3280af22
NIS
1589 SAVEPPTR(PL_linestart);
1590 SAVESPTR(PL_linestr);
8edd5f42
RGS
1591 SAVEGENERICPV(PL_lex_brackstack);
1592 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1593
1594 PL_linestr = PL_lex_stuff;
a0714e2c 1595 PL_lex_stuff = NULL;
3280af22 1596
9cbb5ea2
GS
1597 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1598 = SvPVX(PL_linestr);
3280af22 1599 PL_bufend += SvCUR(PL_linestr);
bd61b366 1600 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1601 SAVEFREESV(PL_linestr);
1602
1603 PL_lex_dojoin = FALSE;
1604 PL_lex_brackets = 0;
a02a5408
JC
1605 Newx(PL_lex_brackstack, 120, char);
1606 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1607 PL_lex_casemods = 0;
1608 *PL_lex_casestack = '\0';
1609 PL_lex_starts = 0;
1610 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1611 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1612
1613 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1614 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1615 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1616 else
5f66b61c 1617 PL_lex_inpat = NULL;
79072805 1618
55497cff 1619 return '(';
79072805
LW
1620}
1621
ffb4593c
NT
1622/*
1623 * S_sublex_done
1624 * Restores lexer state after a S_sublex_push.
1625 */
1626
76e3520e 1627STATIC I32
cea2e8a9 1628S_sublex_done(pTHX)
79072805 1629{
27da23d5 1630 dVAR;
3280af22 1631 if (!PL_lex_starts++) {
396482e1 1632 SV * const sv = newSVpvs("");
9aa983d2
JH
1633 if (SvUTF8(PL_linestr))
1634 SvUTF8_on(sv);
3280af22 1635 PL_expect = XOPERATOR;
9aa983d2 1636 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1637 return THING;
1638 }
1639
3280af22
NIS
1640 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1641 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1642 return yylex();
79072805
LW
1643 }
1644
ffb4593c 1645 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1646 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1647 PL_linestr = PL_lex_repl;
1648 PL_lex_inpat = 0;
1649 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1650 PL_bufend += SvCUR(PL_linestr);
bd61b366 1651 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1652 SAVEFREESV(PL_linestr);
1653 PL_lex_dojoin = FALSE;
1654 PL_lex_brackets = 0;
3280af22
NIS
1655 PL_lex_casemods = 0;
1656 *PL_lex_casestack = '\0';
1657 PL_lex_starts = 0;
25da4f38 1658 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1659 PL_lex_state = LEX_INTERPNORMAL;
1660 PL_lex_starts++;
e9fa98b2
HS
1661 /* we don't clear PL_lex_repl here, so that we can check later
1662 whether this is an evalled subst; that means we rely on the
1663 logic to ensure sublex_done() is called again only via the
1664 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1665 }
e9fa98b2 1666 else {
3280af22 1667 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1668 PL_lex_repl = NULL;
e9fa98b2 1669 }
79072805 1670 return ',';
ffed7fef
LW
1671 }
1672 else {
5db06880
NC
1673#ifdef PERL_MAD
1674 if (PL_madskills) {
cd81e915
NC
1675 if (PL_thiswhite) {
1676 if (!PL_endwhite)
1677 PL_endwhite = newSVpvn("",0);
1678 sv_catsv(PL_endwhite, PL_thiswhite);
1679 PL_thiswhite = 0;
1680 }
1681 if (PL_thistoken)
1682 sv_setpvn(PL_thistoken,"",0);
5db06880 1683 else
cd81e915 1684 PL_realtokenstart = -1;
5db06880
NC
1685 }
1686#endif
f46d017c 1687 LEAVE;
3280af22
NIS
1688 PL_bufend = SvPVX(PL_linestr);
1689 PL_bufend += SvCUR(PL_linestr);
1690 PL_expect = XOPERATOR;
09bef843 1691 PL_sublex_info.sub_inwhat = 0;
79072805 1692 return ')';
ffed7fef
LW
1693 }
1694}
1695
02aa26ce
NT
1696/*
1697 scan_const
1698
1699 Extracts a pattern, double-quoted string, or transliteration. This
1700 is terrifying code.
1701
3280af22
NIS
1702 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1703 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1704 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1705
9b599b2a
GS
1706 Returns a pointer to the character scanned up to. Iff this is
1707 advanced from the start pointer supplied (ie if anything was
1708 successfully parsed), will leave an OP for the substring scanned
1709 in yylval. Caller must intuit reason for not parsing further
1710 by looking at the next characters herself.
1711
02aa26ce
NT
1712 In patterns:
1713 backslashes:
1714 double-quoted style: \r and \n
1715 regexp special ones: \D \s
1716 constants: \x3
1717 backrefs: \1 (deprecated in substitution replacements)
1718 case and quoting: \U \Q \E
1719 stops on @ and $, but not for $ as tail anchor
1720
1721 In transliterations:
1722 characters are VERY literal, except for - not at the start or end
1723 of the string, which indicates a range. scan_const expands the
1724 range to the full set of intermediate characters.
1725
1726 In double-quoted strings:
1727 backslashes:
1728 double-quoted style: \r and \n
1729 constants: \x3
1730 backrefs: \1 (deprecated)
1731 case and quoting: \U \Q \E
1732 stops on @ and $
1733
1734 scan_const does *not* construct ops to handle interpolated strings.
1735 It stops processing as soon as it finds an embedded $ or @ variable
1736 and leaves it to the caller to work out what's going on.
1737
da6eedaa 1738 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1739
1740 $ in pattern could be $foo or could be tail anchor. Assumption:
1741 it's a tail anchor if $ is the last thing in the string, or if it's
1742 followed by one of ")| \n\t"
1743
1744 \1 (backreferences) are turned into $1
1745
1746 The structure of the code is
1747 while (there's a character to process) {
1748 handle transliteration ranges
1749 skip regexp comments
1750 skip # initiated comments in //x patterns
1751 check for embedded @foo
1752 check for embedded scalars
1753 if (backslash) {
1754 leave intact backslashes from leave (below)
1755 deprecate \1 in strings and sub replacements
1756 handle string-changing backslashes \l \U \Q \E, etc.
1757 switch (what was escaped) {
1758 handle - in a transliteration (becomes a literal -)
1759 handle \132 octal characters
1760 handle 0x15 hex characters
1761 handle \cV (control V)
1762 handle printf backslashes (\f, \r, \n, etc)
1763 } (end switch)
1764 } (end if backslash)
1765 } (end while character to read)
4e553d73 1766
02aa26ce
NT
1767*/
1768
76e3520e 1769STATIC char *
cea2e8a9 1770S_scan_const(pTHX_ char *start)
79072805 1771{
97aff369 1772 dVAR;
3280af22 1773 register char *send = PL_bufend; /* end of the constant */
561b68a9 1774 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1775 register char *s = start; /* start of the constant */
1776 register char *d = SvPVX(sv); /* destination for copies */
1777 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1778 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1779 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1780 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1781 UV uv;
4c3a8340
TS
1782#ifdef EBCDIC
1783 UV literal_endpoint = 0;
1784#endif
012bcf8d 1785
d4c19fe8 1786 const char * const leaveit = /* set of acceptably-backslashed characters */
3280af22 1787 PL_lex_inpat
25f684f7 1788 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1789 : "";
79072805 1790
2b9d42f0
NIS
1791 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1792 /* If we are doing a trans and we know we want UTF8 set expectation */
1793 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1794 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1795 }
1796
1797
79072805 1798 while (s < send || dorange) {
02aa26ce 1799 /* get transliterations out of the way (they're most literal) */
3280af22 1800 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1801 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1802 if (dorange) {
1ba5c669
JH
1803 I32 i; /* current expanded character */
1804 I32 min; /* first character in range */
1805 I32 max; /* last character in range */
02aa26ce 1806
2b9d42f0 1807 if (has_utf8) {
9d4ba2ae 1808 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1809 char *e = d++;
1810 while (e-- > c)
1811 *(e + 1) = *e;
25716404 1812 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1813 /* mark the range as done, and continue */
1814 dorange = FALSE;
1815 didrange = TRUE;
1816 continue;
1817 }
2b9d42f0 1818
95a20fc0 1819 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1820 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1821 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1822 d -= 2; /* eat the first char and the - */
1823
8ada0baa
JH
1824 min = (U8)*d; /* first char in range */
1825 max = (U8)d[1]; /* last char in range */
1826
c2e66d9e 1827 if (min > max) {
01ec43d0 1828 Perl_croak(aTHX_
d1573ac7 1829 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1830 (char)min, (char)max);
c2e66d9e
GS
1831 }
1832
c7f1f016 1833#ifdef EBCDIC
4c3a8340
TS
1834 if (literal_endpoint == 2 &&
1835 ((isLOWER(min) && isLOWER(max)) ||
1836 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1837 if (isLOWER(min)) {
1838 for (i = min; i <= max; i++)
1839 if (isLOWER(i))
db42d148 1840 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1841 } else {
1842 for (i = min; i <= max; i++)
1843 if (isUPPER(i))
db42d148 1844 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1845 }
1846 }
1847 else
1848#endif
1849 for (i = min; i <= max; i++)
eb160463 1850 *d++ = (char)i;
02aa26ce
NT
1851
1852 /* mark the range as done, and continue */
79072805 1853 dorange = FALSE;
01ec43d0 1854 didrange = TRUE;
4c3a8340
TS
1855#ifdef EBCDIC
1856 literal_endpoint = 0;
1857#endif
79072805 1858 continue;
4e553d73 1859 }
02aa26ce
NT
1860
1861 /* range begins (ignore - as first or last char) */
79072805 1862 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1863 if (didrange) {
1fafa243 1864 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1865 }
2b9d42f0 1866 if (has_utf8) {
25716404 1867 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1868 s++;
1869 continue;
1870 }
79072805
LW
1871 dorange = TRUE;
1872 s++;
01ec43d0
GS
1873 }
1874 else {
1875 didrange = FALSE;
4c3a8340
TS
1876#ifdef EBCDIC
1877 literal_endpoint = 0;
1878#endif
01ec43d0 1879 }
79072805 1880 }
02aa26ce
NT
1881
1882 /* if we get here, we're not doing a transliteration */
1883
0f5d15d6
IZ
1884 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1885 except for the last char, which will be done separately. */
3280af22 1886 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1887 if (s[2] == '#') {
e994fd66 1888 while (s+1 < send && *s != ')')
db42d148 1889 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1890 }
1891 else if (s[2] == '{' /* This should match regcomp.c */
1892 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1893 {
cc6b7395 1894 I32 count = 1;
0f5d15d6 1895 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1896 char c;
1897
d9f97599
GS
1898 while (count && (c = *regparse)) {
1899 if (c == '\\' && regparse[1])
1900 regparse++;
4e553d73 1901 else if (c == '{')
cc6b7395 1902 count++;
4e553d73 1903 else if (c == '}')
cc6b7395 1904 count--;
d9f97599 1905 regparse++;
cc6b7395 1906 }
e994fd66 1907 if (*regparse != ')')
5bdf89e7 1908 regparse--; /* Leave one char for continuation. */
0f5d15d6 1909 while (s < regparse)
db42d148 1910 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1911 }
748a9306 1912 }
02aa26ce
NT
1913
1914 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1915 else if (*s == '#' && PL_lex_inpat &&
1916 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1917 while (s+1 < send && *s != '\n')
db42d148 1918 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1919 }
02aa26ce 1920
5d1d4326 1921 /* check for embedded arrays
da6eedaa 1922 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1923 */
7e2040f0 1924 else if (*s == '@' && s[1]
5d1d4326 1925 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1926 break;
02aa26ce
NT
1927
1928 /* check for embedded scalars. only stop if we're sure it's a
1929 variable.
1930 */
79072805 1931 else if (*s == '$') {
3280af22 1932 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1933 break;
6002328a 1934 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1935 break; /* in regexp, $ might be tail anchor */
1936 }
02aa26ce 1937
2b9d42f0
NIS
1938 /* End of else if chain - OP_TRANS rejoin rest */
1939
02aa26ce 1940 /* backslashes */
79072805
LW
1941 if (*s == '\\' && s+1 < send) {
1942 s++;
02aa26ce
NT
1943
1944 /* some backslashes we leave behind */
c9f97d15 1945 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1946 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1947 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1948 continue;
1949 }
02aa26ce
NT
1950
1951 /* deprecate \1 in strings and substitution replacements */
3280af22 1952 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1953 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1954 {
599cee73 1955 if (ckWARN(WARN_SYNTAX))
9014280d 1956 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1957 *--s = '$';
1958 break;
1959 }
02aa26ce
NT
1960
1961 /* string-change backslash escapes */
3280af22 1962 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1963 --s;
1964 break;
1965 }
02aa26ce
NT
1966
1967 /* if we get here, it's either a quoted -, or a digit */
79072805 1968 switch (*s) {
02aa26ce
NT
1969
1970 /* quoted - in transliterations */
79072805 1971 case '-':
3280af22 1972 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1973 *d++ = *s++;
1974 continue;
1975 }
1976 /* FALL THROUGH */
1977 default:
11b8faa4 1978 {
86f97054 1979 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 1980 ckWARN(WARN_MISC))
9014280d 1981 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1982 "Unrecognized escape \\%c passed through",
1983 *s);
1984 /* default action is to copy the quoted character */
f9a63242 1985 goto default_action;
11b8faa4 1986 }
02aa26ce
NT
1987
1988 /* \132 indicates an octal constant */
79072805
LW
1989 case '0': case '1': case '2': case '3':
1990 case '4': case '5': case '6': case '7':
ba210ebe 1991 {
53305cf1
NC
1992 I32 flags = 0;
1993 STRLEN len = 3;
1994 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1995 s += len;
1996 }
012bcf8d 1997 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1998
1999 /* \x24 indicates a hex constant */
79072805 2000 case 'x':
a0ed51b3
LW
2001 ++s;
2002 if (*s == '{') {
9d4ba2ae 2003 char* const e = strchr(s, '}');
a4c04bdc
NC
2004 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2005 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2006 STRLEN len;
355860ce 2007
53305cf1 2008 ++s;
adaeee49 2009 if (!e) {
a0ed51b3 2010 yyerror("Missing right brace on \\x{}");
355860ce 2011 continue;
ba210ebe 2012 }
53305cf1
NC
2013 len = e - s;
2014 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2015 s = e + 1;
a0ed51b3
LW
2016 }
2017 else {
ba210ebe 2018 {
53305cf1 2019 STRLEN len = 2;
a4c04bdc 2020 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2021 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2022 s += len;
2023 }
012bcf8d
GS
2024 }
2025
2026 NUM_ESCAPE_INSERT:
2027 /* Insert oct or hex escaped character.
301d3d20 2028 * There will always enough room in sv since such
db42d148 2029 * escapes will be longer than any UTF-8 sequence
301d3d20 2030 * they can end up as. */
ba7cea30 2031
c7f1f016
NIS
2032 /* We need to map to chars to ASCII before doing the tests
2033 to cover EBCDIC
2034 */
c4d5f83a 2035 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2036 if (!has_utf8 && uv > 255) {
301d3d20
JH
2037 /* Might need to recode whatever we have
2038 * accumulated so far if it contains any
2039 * hibit chars.
2040 *
2041 * (Can't we keep track of that and avoid
2042 * this rescan? --jhi)
012bcf8d 2043 */
c7f1f016 2044 int hicount = 0;
63cd0674
NIS
2045 U8 *c;
2046 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2047 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2048 hicount++;
db42d148 2049 }
012bcf8d 2050 }
63cd0674 2051 if (hicount) {
9d4ba2ae 2052 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2053 U8 *src, *dst;
2054 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2055 src = (U8 *)d - 1;
2056 dst = src+hicount;
2057 d += hicount;
cfd0369c 2058 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2059 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2060 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2061 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2062 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2063 }
2064 else {
63cd0674 2065 *dst-- = *src;
012bcf8d 2066 }
c7f1f016 2067 src--;
012bcf8d
GS
2068 }
2069 }
2070 }
2071
9aa983d2 2072 if (has_utf8 || uv > 255) {
9041c2e3 2073 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2074 has_utf8 = TRUE;
f9a63242
JH
2075 if (PL_lex_inwhat == OP_TRANS &&
2076 PL_sublex_info.sub_op) {
2077 PL_sublex_info.sub_op->op_private |=
2078 (PL_lex_repl ? OPpTRANS_FROM_UTF
2079 : OPpTRANS_TO_UTF);
f9a63242 2080 }
012bcf8d 2081 }
a0ed51b3 2082 else {
012bcf8d 2083 *d++ = (char)uv;
a0ed51b3 2084 }
012bcf8d
GS
2085 }
2086 else {
c4d5f83a 2087 *d++ = (char) uv;
a0ed51b3 2088 }
79072805 2089 continue;
02aa26ce 2090
b239daa5 2091 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2092 case 'N':
55eda711 2093 ++s;
423cee85
JH
2094 if (*s == '{') {
2095 char* e = strchr(s, '}');
155aba94 2096 SV *res;
423cee85 2097 STRLEN len;
cfd0369c 2098 const char *str;
4e553d73 2099
423cee85 2100 if (!e) {
5777a3f7 2101 yyerror("Missing right brace on \\N{}");
423cee85
JH
2102 e = s - 1;
2103 goto cont_scan;
2104 }
dbc0d4f2
JH
2105 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2106 /* \N{U+...} */
2107 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2108 PERL_SCAN_DISALLOW_PREFIX;
2109 s += 3;
2110 len = e - s;
2111 uv = grok_hex(s, &len, &flags, NULL);
2112 s = e + 1;
2113 goto NUM_ESCAPE_INSERT;
2114 }
55eda711 2115 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2116 res = new_constant( NULL, 0, "charnames",
a0714e2c 2117 res, NULL, "\\N{...}" );
f9a63242
JH
2118 if (has_utf8)
2119 sv_utf8_upgrade(res);
cfd0369c 2120 str = SvPV_const(res,len);
1c47067b
JH
2121#ifdef EBCDIC_NEVER_MIND
2122 /* charnames uses pack U and that has been
2123 * recently changed to do the below uni->native
2124 * mapping, so this would be redundant (and wrong,
2125 * the code point would be doubly converted).
2126 * But leave this in just in case the pack U change
2127 * gets revoked, but the semantics is still
2128 * desireable for charnames. --jhi */
cddc7ef4 2129 {
cfd0369c 2130 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2131
2132 if (uv < 0x100) {
89ebb4a3 2133 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2134
2135 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2136 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2137 str = SvPV_const(res, len);
cddc7ef4
JH
2138 }
2139 }
2140#endif
89491803 2141 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2142 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2143 SvCUR_set(sv, d - ostart);
2144 SvPOK_on(sv);
e4f3eed8 2145 *d = '\0';
f08d6ad9 2146 sv_utf8_upgrade(sv);
d2f449dd 2147 /* this just broke our allocation above... */
eb160463 2148 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2149 d = SvPVX(sv) + SvCUR(sv);
89491803 2150 has_utf8 = TRUE;
f08d6ad9 2151 }
eb160463 2152 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2153 const char * const odest = SvPVX_const(sv);
423cee85 2154
8973db79 2155 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2156 d = SvPVX(sv) + (d - odest);
2157 }
2158 Copy(str, d, len, char);
2159 d += len;
2160 SvREFCNT_dec(res);
2161 cont_scan:
2162 s = e + 1;
2163 }
2164 else
5777a3f7 2165 yyerror("Missing braces on \\N{}");
423cee85
JH
2166 continue;
2167
02aa26ce 2168 /* \c is a control character */
79072805
LW
2169 case 'c':
2170 s++;
961ce445 2171 if (s < send) {
ba210ebe 2172 U8 c = *s++;
c7f1f016
NIS
2173#ifdef EBCDIC
2174 if (isLOWER(c))
2175 c = toUPPER(c);
2176#endif
db42d148 2177 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2178 }
961ce445
RGS
2179 else {
2180 yyerror("Missing control char name in \\c");
2181 }
79072805 2182 continue;
02aa26ce
NT
2183
2184 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2185 case 'b':
db42d148 2186 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2187 break;
2188 case 'n':
db42d148 2189 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2190 break;
2191 case 'r':
db42d148 2192 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2193 break;
2194 case 'f':
db42d148 2195 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2196 break;
2197 case 't':
db42d148 2198 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2199 break;
34a3fe2a 2200 case 'e':
db42d148 2201 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2202 break;
2203 case 'a':
db42d148 2204 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2205 break;
02aa26ce
NT
2206 } /* end switch */
2207
79072805
LW
2208 s++;
2209 continue;
02aa26ce 2210 } /* end if (backslash) */
4c3a8340
TS
2211#ifdef EBCDIC
2212 else
2213 literal_endpoint++;
2214#endif
02aa26ce 2215
f9a63242 2216 default_action:
2b9d42f0
NIS
2217 /* If we started with encoded form, or already know we want it
2218 and then encode the next character */
2219 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2220 STRLEN len = 1;
5f66b61c
AL
2221 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2222 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2223 s += len;
2224 if (need > len) {
2225 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2226 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2227 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2228 }
5f66b61c 2229 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2230 has_utf8 = TRUE;
2231 }
2232 else {
2233 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2234 }
02aa26ce
NT
2235 } /* while loop to process each character */
2236
2237 /* terminate the string and set up the sv */
79072805 2238 *d = '\0';
95a20fc0 2239 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2240 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2241 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2242
79072805 2243 SvPOK_on(sv);
9f4817db 2244 if (PL_encoding && !has_utf8) {
d0063567
DK
2245 sv_recode_to_utf8(sv, PL_encoding);
2246 if (SvUTF8(sv))
2247 has_utf8 = TRUE;
9f4817db 2248 }
2b9d42f0 2249 if (has_utf8) {
7e2040f0 2250 SvUTF8_on(sv);
2b9d42f0 2251 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2252 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2253 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2254 }
2255 }
79072805 2256
02aa26ce 2257 /* shrink the sv if we allocated more than we used */
79072805 2258 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2259 SvPV_shrink_to_cur(sv);
79072805 2260 }
02aa26ce 2261
9b599b2a 2262 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2263 if (s > PL_bufptr) {
2264 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2265 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2266 sv, NULL,
4e553d73 2267 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2268 ? "tr"
3280af22 2269 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2270 ? "s"
2271 : "qq")));
79072805 2272 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2273 } else
8990e307 2274 SvREFCNT_dec(sv);
79072805
LW
2275 return s;
2276}
2277
ffb4593c
NT
2278/* S_intuit_more
2279 * Returns TRUE if there's more to the expression (e.g., a subscript),
2280 * FALSE otherwise.
ffb4593c
NT
2281 *
2282 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2283 *
2284 * ->[ and ->{ return TRUE
2285 * { and [ outside a pattern are always subscripts, so return TRUE
2286 * if we're outside a pattern and it's not { or [, then return FALSE
2287 * if we're in a pattern and the first char is a {
2288 * {4,5} (any digits around the comma) returns FALSE
2289 * if we're in a pattern and the first char is a [
2290 * [] returns FALSE
2291 * [SOMETHING] has a funky algorithm to decide whether it's a
2292 * character class or not. It has to deal with things like
2293 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2294 * anything else returns TRUE
2295 */
2296
9cbb5ea2
GS
2297/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2298
76e3520e 2299STATIC int
cea2e8a9 2300S_intuit_more(pTHX_ register char *s)
79072805 2301{
97aff369 2302 dVAR;
3280af22 2303 if (PL_lex_brackets)
79072805
LW
2304 return TRUE;
2305 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2306 return TRUE;
2307 if (*s != '{' && *s != '[')
2308 return FALSE;
3280af22 2309 if (!PL_lex_inpat)
79072805
LW
2310 return TRUE;
2311
2312 /* In a pattern, so maybe we have {n,m}. */
2313 if (*s == '{') {
2314 s++;
2315 if (!isDIGIT(*s))
2316 return TRUE;
2317 while (isDIGIT(*s))
2318 s++;
2319 if (*s == ',')
2320 s++;
2321 while (isDIGIT(*s))
2322 s++;
2323 if (*s == '}')
2324 return FALSE;
2325 return TRUE;
2326
2327 }
2328
2329 /* On the other hand, maybe we have a character class */
2330
2331 s++;
2332 if (*s == ']' || *s == '^')
2333 return FALSE;
2334 else {
ffb4593c 2335 /* this is terrifying, and it works */
79072805
LW
2336 int weight = 2; /* let's weigh the evidence */
2337 char seen[256];
f27ffc4a 2338 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2339 const char * const send = strchr(s,']');
3280af22 2340 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2341
2342 if (!send) /* has to be an expression */
2343 return TRUE;
2344
2345 Zero(seen,256,char);
2346 if (*s == '$')
2347 weight -= 3;
2348 else if (isDIGIT(*s)) {
2349 if (s[1] != ']') {
2350 if (isDIGIT(s[1]) && s[2] == ']')
2351 weight -= 10;
2352 }
2353 else
2354 weight -= 100;
2355 }
2356 for (; s < send; s++) {
2357 last_un_char = un_char;
2358 un_char = (unsigned char)*s;
2359 switch (*s) {
2360 case '@':
2361 case '&':
2362 case '$':
2363 weight -= seen[un_char] * 10;
7e2040f0 2364 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2365 int len;
8903cb82 2366 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2367 len = (int)strlen(tmpbuf);
2368 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2369 weight -= 100;
2370 else
2371 weight -= 10;
2372 }
2373 else if (*s == '$' && s[1] &&
93a17b20
LW
2374 strchr("[#!%*<>()-=",s[1])) {
2375 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2376 weight -= 10;
2377 else
2378 weight -= 1;
2379 }
2380 break;
2381 case '\\':
2382 un_char = 254;
2383 if (s[1]) {
93a17b20 2384 if (strchr("wds]",s[1]))
79072805
LW
2385 weight += 100;
2386 else if (seen['\''] || seen['"'])
2387 weight += 1;
93a17b20 2388 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2389 weight += 40;
2390 else if (isDIGIT(s[1])) {
2391 weight += 40;
2392 while (s[1] && isDIGIT(s[1]))
2393 s++;
2394 }
2395 }
2396 else
2397 weight += 100;
2398 break;
2399 case '-':
2400 if (s[1] == '\\')
2401 weight += 50;
93a17b20 2402 if (strchr("aA01! ",last_un_char))
79072805 2403 weight += 30;
93a17b20 2404 if (strchr("zZ79~",s[1]))
79072805 2405 weight += 30;
f27ffc4a
GS
2406 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2407 weight -= 5; /* cope with negative subscript */
79072805
LW
2408 break;
2409 default:
3792a11b
NC
2410 if (!isALNUM(last_un_char)
2411 && !(last_un_char == '$' || last_un_char == '@'
2412 || last_un_char == '&')
2413 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2414 char *d = tmpbuf;
2415 while (isALPHA(*s))
2416 *d++ = *s++;
2417 *d = '\0';
2418 if (keyword(tmpbuf, d - tmpbuf))
2419 weight -= 150;
2420 }
2421 if (un_char == last_un_char + 1)
2422 weight += 5;
2423 weight -= seen[un_char];
2424 break;
2425 }
2426 seen[un_char]++;
2427 }
2428 if (weight >= 0) /* probably a character class */
2429 return FALSE;
2430 }
2431
2432 return TRUE;
2433}
ffed7fef 2434
ffb4593c
NT
2435/*
2436 * S_intuit_method
2437 *
2438 * Does all the checking to disambiguate
2439 * foo bar
2440 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2441 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2442 *
2443 * First argument is the stuff after the first token, e.g. "bar".
2444 *
2445 * Not a method if bar is a filehandle.
2446 * Not a method if foo is a subroutine prototyped to take a filehandle.
2447 * Not a method if it's really "Foo $bar"
2448 * Method if it's "foo $bar"
2449 * Not a method if it's really "print foo $bar"
2450 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2451 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2452 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2453 * =>
2454 */
2455
76e3520e 2456STATIC int
62d55b22 2457S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2458{
97aff369 2459 dVAR;
a0d0e21e 2460 char *s = start + (*start == '$');
3280af22 2461 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2462 STRLEN len;
2463 GV* indirgv;
5db06880
NC
2464#ifdef PERL_MAD
2465 int soff;
2466#endif
a0d0e21e
LW
2467
2468 if (gv) {
62d55b22 2469 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2470 return 0;
62d55b22
NC
2471 if (cv) {
2472 if (SvPOK(cv)) {
2473 const char *proto = SvPVX_const(cv);
2474 if (proto) {
2475 if (*proto == ';')
2476 proto++;
2477 if (*proto == '*')
2478 return 0;
2479 }
b6c543e3
IZ
2480 }
2481 } else
c35e046a 2482 gv = NULL;
a0d0e21e 2483 }
8903cb82 2484 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2485 /* start is the beginning of the possible filehandle/object,
2486 * and s is the end of it
2487 * tmpbuf is a copy of it
2488 */
2489
a0d0e21e 2490 if (*start == '$') {
3280af22 2491 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2492 return 0;
5db06880
NC
2493#ifdef PERL_MAD
2494 len = start - SvPVX(PL_linestr);
2495#endif
29595ff2 2496 s = PEEKSPACE(s);
5db06880
NC
2497#ifdef PERLMAD
2498 start = SvPVX(PL_linestr) + len;
2499#endif
3280af22
NIS
2500 PL_bufptr = start;
2501 PL_expect = XREF;
a0d0e21e
LW
2502 return *s == '(' ? FUNCMETH : METHOD;
2503 }
2504 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2505 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2506 len -= 2;
2507 tmpbuf[len] = '\0';
5db06880
NC
2508#ifdef PERL_MAD
2509 soff = s - SvPVX(PL_linestr);
2510#endif
c3e0f903
GS
2511 goto bare_package;
2512 }
90e5519e 2513 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2514 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2515 return 0;
2516 /* filehandle or package name makes it a method */
89bfa8cd 2517 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2518#ifdef PERL_MAD
2519 soff = s - SvPVX(PL_linestr);
2520#endif
29595ff2 2521 s = PEEKSPACE(s);
3280af22 2522 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2523 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2524 bare_package:
cd81e915 2525 start_force(PL_curforce);
9ded7720 2526 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2527 newSVpvn(tmpbuf,len));
9ded7720 2528 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2529 if (PL_madskills)
2530 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2531 PL_expect = XTERM;
a0d0e21e 2532 force_next(WORD);
3280af22 2533 PL_bufptr = s;
5db06880
NC
2534#ifdef PERL_MAD
2535 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2536#endif
a0d0e21e
LW
2537 return *s == '(' ? FUNCMETH : METHOD;
2538 }
2539 }
2540 return 0;
2541}
2542
ffb4593c
NT
2543/*
2544 * S_incl_perldb
2545 * Return a string of Perl code to load the debugger. If PERL5DB
2546 * is set, it will return the contents of that, otherwise a
2547 * compile-time require of perl5db.pl.
2548 */
2549
bfed75c6 2550STATIC const char*
cea2e8a9 2551S_incl_perldb(pTHX)
a0d0e21e 2552{
97aff369 2553 dVAR;
3280af22 2554 if (PL_perldb) {
9d4ba2ae 2555 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2556
2557 if (pdb)
2558 return pdb;
93189314 2559 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2560 return "BEGIN { require 'perl5db.pl' }";
2561 }
2562 return "";
2563}
2564
2565
16d20bd9 2566/* Encoded script support. filter_add() effectively inserts a
4e553d73 2567 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2568 * Note that the filter function only applies to the current source file
2569 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2570 *
2571 * The datasv parameter (which may be NULL) can be used to pass
2572 * private data to this instance of the filter. The filter function
2573 * can recover the SV using the FILTER_DATA macro and use it to
2574 * store private buffers and state information.
2575 *
2576 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2577 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2578 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2579 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2580 * private use must be set using malloc'd pointers.
2581 */
16d20bd9
AD
2582
2583SV *
864dbfa3 2584Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2585{
97aff369 2586 dVAR;
f4c556ac 2587 if (!funcp)
a0714e2c 2588 return NULL;
f4c556ac 2589
3280af22
NIS
2590 if (!PL_rsfp_filters)
2591 PL_rsfp_filters = newAV();
16d20bd9 2592 if (!datasv)
561b68a9 2593 datasv = newSV(0);
862a34c6 2594 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2595 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2596 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2597 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2598 FPTR2DPTR(void *, IoANY(datasv)),
2599 SvPV_nolen(datasv)));
3280af22
NIS
2600 av_unshift(PL_rsfp_filters, 1);
2601 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2602 return(datasv);
2603}
4e553d73 2604
16d20bd9
AD
2605
2606/* Delete most recently added instance of this filter function. */
a0d0e21e 2607void
864dbfa3 2608Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2609{
97aff369 2610 dVAR;
e0c19803 2611 SV *datasv;
24801a4b 2612
33073adb 2613#ifdef DEBUGGING
55662e27
JH
2614 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2615 FPTR2DPTR(void*, funcp)));
33073adb 2616#endif
3280af22 2617 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2618 return;
2619 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2620 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2621 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2622 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2623 IoANY(datasv) = (void *)NULL;
3280af22 2624 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2625
16d20bd9
AD
2626 return;
2627 }
2628 /* we need to search for the correct entry and clear it */
cea2e8a9 2629 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2630}
2631
2632
1de9afcd
RGS
2633/* Invoke the idxth filter function for the current rsfp. */
2634/* maxlen 0 = read one text line */
16d20bd9 2635I32
864dbfa3 2636Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2637{
97aff369 2638 dVAR;
16d20bd9
AD
2639 filter_t funcp;
2640 SV *datasv = NULL;
f482118e
NC
2641 /* This API is bad. It should have been using unsigned int for maxlen.
2642 Not sure if we want to change the API, but if not we should sanity
2643 check the value here. */
39cd7a59
NC
2644 const unsigned int correct_length
2645 = maxlen < 0 ?
2646#ifdef PERL_MICRO
2647 0x7FFFFFFF
2648#else
2649 INT_MAX
2650#endif
2651 : maxlen;
e50aee73 2652
3280af22 2653 if (!PL_rsfp_filters)
16d20bd9 2654 return -1;
1de9afcd 2655 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2656 /* Provide a default input filter to make life easy. */
2657 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2658 DEBUG_P(PerlIO_printf(Perl_debug_log,
2659 "filter_read %d: from rsfp\n", idx));
f482118e 2660 if (correct_length) {
16d20bd9
AD
2661 /* Want a block */
2662 int len ;
f54cb97a 2663 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2664
2665 /* ensure buf_sv is large enough */
f482118e
NC
2666 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2667 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2668 correct_length)) <= 0) {
3280af22 2669 if (PerlIO_error(PL_rsfp))
37120919
AD
2670 return -1; /* error */
2671 else
2672 return 0 ; /* end of file */
2673 }
16d20bd9
AD
2674 SvCUR_set(buf_sv, old_len + len) ;
2675 } else {
2676 /* Want a line */
3280af22
NIS
2677 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2678 if (PerlIO_error(PL_rsfp))
37120919
AD
2679 return -1; /* error */
2680 else
2681 return 0 ; /* end of file */
2682 }
16d20bd9
AD
2683 }
2684 return SvCUR(buf_sv);
2685 }
2686 /* Skip this filter slot if filter has been deleted */
1de9afcd 2687 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2688 DEBUG_P(PerlIO_printf(Perl_debug_log,
2689 "filter_read %d: skipped (filter deleted)\n",
2690 idx));
f482118e 2691 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2692 }
2693 /* Get function pointer hidden within datasv */
8141890a 2694 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2695 DEBUG_P(PerlIO_printf(Perl_debug_log,
2696 "filter_read %d: via function %p (%s)\n",
ca0270c4 2697 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2698 /* Call function. The function is expected to */
2699 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2700 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2701 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2702}
2703
76e3520e 2704STATIC char *
cea2e8a9 2705S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2706{
97aff369 2707 dVAR;
c39cd008 2708#ifdef PERL_CR_FILTER
3280af22 2709 if (!PL_rsfp_filters) {
c39cd008 2710 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2711 }
2712#endif
3280af22 2713 if (PL_rsfp_filters) {
55497cff 2714 if (!append)
2715 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2716 if (FILTER_READ(0, sv, 0) > 0)
2717 return ( SvPVX(sv) ) ;
2718 else
bd61b366 2719 return NULL ;
16d20bd9 2720 }
9d116dd7 2721 else
fd049845 2722 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2723}
2724
01ec43d0 2725STATIC HV *
7fc63493 2726S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2727{
97aff369 2728 dVAR;
def3634b
GS
2729 GV *gv;
2730
01ec43d0 2731 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2732 return PL_curstash;
2733
2734 if (len > 2 &&
2735 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2736 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2737 {
2738 return GvHV(gv); /* Foo:: */
def3634b
GS
2739 }
2740
2741 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2742 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2743 if (gv && GvCV(gv)) {
2744 SV * const sv = cv_const_sv(GvCV(gv));
2745 if (sv)
83003860 2746 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2747 }
2748
2749 return gv_stashpv(pkgname, FALSE);
2750}
a0d0e21e 2751
5db06880
NC
2752#ifdef PERL_MAD
2753 /*
2754 * Perl_madlex
2755 * The intent of this yylex wrapper is to minimize the changes to the
2756 * tokener when we aren't interested in collecting madprops. It remains
2757 * to be seen how successful this strategy will be...
2758 */
2759
2760int
2761Perl_madlex(pTHX)
2762{
2763 int optype;
2764 char *s = PL_bufptr;
2765
cd81e915
NC
2766 /* make sure PL_thiswhite is initialized */
2767 PL_thiswhite = 0;
2768 PL_thismad = 0;
5db06880 2769
cd81e915 2770 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2771 if (PL_pending_ident)
2772 return S_pending_ident(aTHX);
2773
2774 /* previous token ate up our whitespace? */
cd81e915
NC
2775 if (!PL_lasttoke && PL_nextwhite) {
2776 PL_thiswhite = PL_nextwhite;
2777 PL_nextwhite = 0;
5db06880
NC
2778 }
2779
2780 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2781 PL_realtokenstart = -1;
2782 PL_thistoken = 0;
5db06880
NC
2783 optype = yylex();
2784 s = PL_bufptr;
cd81e915 2785 assert(PL_curforce < 0);
5db06880 2786
cd81e915
NC
2787 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2788 if (!PL_thistoken) {
2789 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2790 PL_thistoken = newSVpvn("",0);
5db06880 2791 else {
c35e046a 2792 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2793 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2794 }
2795 }
cd81e915
NC
2796 if (PL_thismad) /* install head */
2797 CURMAD('X', PL_thistoken);
5db06880
NC
2798 }
2799
2800 /* last whitespace of a sublex? */
cd81e915
NC
2801 if (optype == ')' && PL_endwhite) {
2802 CURMAD('X', PL_endwhite);
5db06880
NC
2803 }
2804
cd81e915 2805 if (!PL_thismad) {
5db06880
NC
2806
2807 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2808 if (!PL_thiswhite && !PL_endwhite && !optype) {
2809 sv_free(PL_thistoken);
2810 PL_thistoken = 0;
5db06880
NC
2811 return 0;
2812 }
2813
2814 /* put off final whitespace till peg */
2815 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2816 PL_nextwhite = PL_thiswhite;
2817 PL_thiswhite = 0;
5db06880 2818 }
cd81e915
NC
2819 else if (PL_thisopen) {
2820 CURMAD('q', PL_thisopen);
2821 if (PL_thistoken)
2822 sv_free(PL_thistoken);
2823 PL_thistoken = 0;
5db06880
NC
2824 }
2825 else {
2826 /* Store actual token text as madprop X */
cd81e915 2827 CURMAD('X', PL_thistoken);
5db06880
NC
2828 }
2829
cd81e915 2830 if (PL_thiswhite) {
5db06880 2831 /* add preceding whitespace as madprop _ */
cd81e915 2832 CURMAD('_', PL_thiswhite);
5db06880
NC
2833 }
2834
cd81e915 2835 if (PL_thisstuff) {
5db06880 2836 /* add quoted material as madprop = */
cd81e915 2837 CURMAD('=', PL_thisstuff);
5db06880
NC
2838 }
2839
cd81e915 2840 if (PL_thisclose) {
5db06880 2841 /* add terminating quote as madprop Q */
cd81e915 2842 CURMAD('Q', PL_thisclose);
5db06880
NC
2843 }
2844 }
2845
2846 /* special processing based on optype */
2847
2848 switch (optype) {
2849
2850 /* opval doesn't need a TOKEN since it can already store mp */
2851 case WORD:
2852 case METHOD:
2853 case FUNCMETH:
2854 case THING:
2855 case PMFUNC:
2856 case PRIVATEREF:
2857 case FUNC0SUB:
2858 case UNIOPSUB:
2859 case LSTOPSUB:
2860 if (yylval.opval)
cd81e915
NC
2861 append_madprops(PL_thismad, yylval.opval, 0);
2862 PL_thismad = 0;
5db06880
NC
2863 return optype;
2864
2865 /* fake EOF */
2866 case 0:
2867 optype = PEG;
cd81e915
NC
2868 if (PL_endwhite) {
2869 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2870 PL_endwhite = 0;
5db06880
NC
2871 }
2872 break;
2873
2874 case ']':
2875 case '}':
cd81e915 2876 if (PL_faketokens)
5db06880
NC
2877 break;
2878 /* remember any fake bracket that lexer is about to discard */
2879 if (PL_lex_brackets == 1 &&
2880 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2881 {
2882 s = PL_bufptr;
2883 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2884 s++;
2885 if (*s == '}') {
cd81e915
NC
2886 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2887 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2888 PL_thiswhite = 0;
5db06880
NC
2889 PL_bufptr = s - 1;
2890 break; /* don't bother looking for trailing comment */
2891 }
2892 else
2893 s = PL_bufptr;
2894 }
2895 if (optype == ']')
2896 break;
2897 /* FALLTHROUGH */
2898
2899 /* attach a trailing comment to its statement instead of next token */
2900 case ';':
cd81e915 2901 if (PL_faketokens)
5db06880
NC
2902 break;
2903 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2904 s = PL_bufptr;
2905 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2906 s++;
2907 if (*s == '\n' || *s == '#') {
2908 while (s < PL_bufend && *s != '\n')
2909 s++;
2910 if (s < PL_bufend)
2911 s++;
cd81e915
NC
2912 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2913 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2914 PL_thiswhite = 0;
5db06880
NC
2915 PL_bufptr = s;
2916 }
2917 }
2918 break;
2919
2920 /* pval */
2921 case LABEL:
2922 break;
2923
2924 /* ival */
2925 default:
2926 break;
2927
2928 }
2929
2930 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
2931 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
2932 PL_thismad = 0;
5db06880
NC
2933 return optype;
2934}
2935#endif
2936
468aa647 2937STATIC char *
cc6ed77d 2938S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2939 dVAR;
468aa647
RGS
2940 if (PL_expect != XSTATE)
2941 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2942 is_use ? "use" : "no"));
29595ff2 2943 s = SKIPSPACE1(s);
468aa647
RGS
2944 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2945 s = force_version(s, TRUE);
29595ff2 2946 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 2947 start_force(PL_curforce);
9ded7720 2948 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2949 force_next(WORD);
2950 }
2951 else if (*s == 'v') {
2952 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2953 s = force_version(s, FALSE);
2954 }
2955 }
2956 else {
2957 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2958 s = force_version(s, FALSE);
2959 }
2960 yylval.ival = is_use;
2961 return s;
2962}
748a9306 2963#ifdef DEBUGGING
27da23d5 2964 static const char* const exp_name[] =
09bef843 2965 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2966 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2967 };
748a9306 2968#endif
463ee0b2 2969
02aa26ce
NT
2970/*
2971 yylex
2972
2973 Works out what to call the token just pulled out of the input
2974 stream. The yacc parser takes care of taking the ops we return and
2975 stitching them into a tree.
2976
2977 Returns:
2978 PRIVATEREF
2979
2980 Structure:
2981 if read an identifier
2982 if we're in a my declaration
2983 croak if they tried to say my($foo::bar)
2984 build the ops for a my() declaration
2985 if it's an access to a my() variable
2986 are we in a sort block?
2987 croak if my($a); $a <=> $b
2988 build ops for access to a my() variable
2989 if in a dq string, and they've said @foo and we can't find @foo
2990 croak
2991 build ops for a bareword
2992 if we already built the token before, use it.
2993*/
2994
20141f0e 2995
dba4d153
JH
2996#ifdef __SC__
2997#pragma segment Perl_yylex
2998#endif
dba4d153 2999int
dba4d153 3000Perl_yylex(pTHX)
20141f0e 3001{
97aff369 3002 dVAR;
3afc138a 3003 register char *s = PL_bufptr;
378cc40b 3004 register char *d;
463ee0b2 3005 STRLEN len;
aa7440fb 3006 bool bof = FALSE;
a687059c 3007
bbf60fe6 3008 DEBUG_T( {
396482e1 3009 SV* tmp = newSVpvs("");
b6007c36
DM
3010 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3011 (IV)CopLINE(PL_curcop),
3012 lex_state_names[PL_lex_state],
3013 exp_name[PL_expect],
3014 pv_display(tmp, s, strlen(s), 0, 60));
3015 SvREFCNT_dec(tmp);
bbf60fe6 3016 } );
02aa26ce 3017 /* check if there's an identifier for us to look at */
ba979b31 3018 if (PL_pending_ident)
bbf60fe6 3019 return REPORT(S_pending_ident(aTHX));
bbce6d69 3020
02aa26ce
NT
3021 /* no identifier pending identification */
3022
3280af22 3023 switch (PL_lex_state) {
79072805
LW
3024#ifdef COMMENTARY
3025 case LEX_NORMAL: /* Some compilers will produce faster */
3026 case LEX_INTERPNORMAL: /* code if we comment these out. */
3027 break;
3028#endif
3029
09bef843 3030 /* when we've already built the next token, just pull it out of the queue */
79072805 3031 case LEX_KNOWNEXT:
5db06880
NC
3032#ifdef PERL_MAD
3033 PL_lasttoke--;
3034 yylval = PL_nexttoke[PL_lasttoke].next_val;
3035 if (PL_madskills) {
cd81e915 3036 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3037 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3038 if (PL_thismad && PL_thismad->mad_key == '_') {
3039 PL_thiswhite = (SV*)PL_thismad->mad_val;
3040 PL_thismad->mad_val = 0;
3041 mad_free(PL_thismad);
3042 PL_thismad = 0;
5db06880
NC
3043 }
3044 }
3045 if (!PL_lasttoke) {
3046 PL_lex_state = PL_lex_defer;
3047 PL_expect = PL_lex_expect;
3048 PL_lex_defer = LEX_NORMAL;
3049 if (!PL_nexttoke[PL_lasttoke].next_type)
3050 return yylex();
3051 }
3052#else
3280af22 3053 PL_nexttoke--;
5db06880 3054 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3055 if (!PL_nexttoke) {
3056 PL_lex_state = PL_lex_defer;
3057 PL_expect = PL_lex_expect;
3058 PL_lex_defer = LEX_NORMAL;
463ee0b2 3059 }
5db06880
NC
3060#endif
3061#ifdef PERL_MAD
3062 /* FIXME - can these be merged? */
3063 return(PL_nexttoke[PL_lasttoke].next_type);
3064#else
bbf60fe6 3065 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3066#endif
79072805 3067
02aa26ce 3068 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3069 when we get here, PL_bufptr is at the \
02aa26ce 3070 */
79072805
LW
3071 case LEX_INTERPCASEMOD:
3072#ifdef DEBUGGING
3280af22 3073 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3074 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3075#endif
02aa26ce 3076 /* handle \E or end of string */
3280af22 3077 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3078 /* if at a \E */
3280af22 3079 if (PL_lex_casemods) {
f54cb97a 3080 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3081 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3082
3792a11b
NC
3083 if (PL_bufptr != PL_bufend
3084 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3085 PL_bufptr += 2;
3086 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3087#ifdef PERL_MAD
3088 if (PL_madskills)
cd81e915 3089 PL_thistoken = newSVpvn("\\E",2);
5db06880 3090#endif
a0d0e21e 3091 }
bbf60fe6 3092 return REPORT(')');
79072805 3093 }
5db06880
NC
3094#ifdef PERL_MAD
3095 while (PL_bufptr != PL_bufend &&
3096 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3097 if (!PL_thiswhite)
3098 PL_thiswhite = newSVpvn("",0);
3099 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3100 PL_bufptr += 2;
3101 }
3102#else
3280af22
NIS
3103 if (PL_bufptr != PL_bufend)
3104 PL_bufptr += 2;
5db06880 3105#endif
3280af22 3106 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3107 return yylex();
79072805
LW
3108 }
3109 else {
607df283 3110 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3111 "### Saw case modifier\n"); });
3280af22 3112 s = PL_bufptr + 1;
6e909404 3113 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3114#ifdef PERL_MAD
cd81e915
NC
3115 if (!PL_thiswhite)
3116 PL_thiswhite = newSVpvn("",0);
3117 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3118#endif
89122651 3119 PL_bufptr = s + 3;
6e909404
JH
3120 PL_lex_state = LEX_INTERPCONCAT;
3121 return yylex();
a0d0e21e 3122 }
6e909404 3123 else {
90771dc0 3124 I32 tmp;
5db06880
NC
3125 if (!PL_madskills) /* when just compiling don't need correct */
3126 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3127 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3128 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3129 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3130 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3131 return REPORT(')');
6e909404
JH
3132 }
3133 if (PL_lex_casemods > 10)
3134 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3135 PL_lex_casestack[PL_lex_casemods++] = *s;
3136 PL_lex_casestack[PL_lex_casemods] = '\0';
3137 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3138 start_force(PL_curforce);
9ded7720 3139 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3140 force_next('(');
cd81e915 3141 start_force(PL_curforce);
6e909404 3142 if (*s == 'l')
9ded7720 3143 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3144 else if (*s == 'u')
9ded7720 3145 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3146 else if (*s == 'L')
9ded7720 3147 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3148 else if (*s == 'U')
9ded7720 3149 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3150 else if (*s == 'Q')
9ded7720 3151 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3152 else
3153 Perl_croak(aTHX_ "panic: yylex");
5db06880 3154 if (PL_madskills) {
d4c19fe8 3155 SV* const tmpsv = newSVpvn("",0);
5db06880
NC
3156 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3157 curmad('_', tmpsv);
3158 }
6e909404 3159 PL_bufptr = s + 1;
a0d0e21e 3160 }
79072805 3161 force_next(FUNC);
3280af22
NIS
3162 if (PL_lex_starts) {
3163 s = PL_bufptr;
3164 PL_lex_starts = 0;
5db06880
NC
3165#ifdef PERL_MAD
3166 if (PL_madskills) {
cd81e915
NC
3167 if (PL_thistoken)
3168 sv_free(PL_thistoken);
3169 PL_thistoken = newSVpvn("",0);
5db06880
NC
3170 }
3171#endif
131b3ad0
DM
3172 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3173 if (PL_lex_casemods == 1 && PL_lex_inpat)
3174 OPERATOR(',');
3175 else
3176 Aop(OP_CONCAT);
79072805
LW
3177 }
3178 else
cea2e8a9 3179 return yylex();
79072805
LW
3180 }
3181
55497cff 3182 case LEX_INTERPPUSH:
bbf60fe6 3183 return REPORT(sublex_push());
55497cff 3184
79072805 3185 case LEX_INTERPSTART:
3280af22 3186 if (PL_bufptr == PL_bufend)
bbf60fe6 3187 return REPORT(sublex_done());
607df283 3188 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3189 "### Interpolated variable\n"); });
3280af22
NIS
3190 PL_expect = XTERM;
3191 PL_lex_dojoin = (*PL_bufptr == '@');
3192 PL_lex_state = LEX_INTERPNORMAL;
3193 if (PL_lex_dojoin) {
cd81e915 3194 start_force(PL_curforce);
9ded7720 3195 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3196 force_next(',');
cd81e915 3197 start_force(PL_curforce);
a0d0e21e 3198 force_ident("\"", '$');
cd81e915 3199 start_force(PL_curforce);
9ded7720 3200 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3201 force_next('$');
cd81e915 3202 start_force(PL_curforce);
9ded7720 3203 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3204 force_next('(');
cd81e915 3205 start_force(PL_curforce);
9ded7720 3206 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3207 force_next(FUNC);
3208 }
3280af22
NIS
3209 if (PL_lex_starts++) {
3210 s = PL_bufptr;
5db06880
NC
3211#ifdef PERL_MAD
3212 if (PL_madskills) {
cd81e915
NC
3213 if (PL_thistoken)
3214 sv_free(PL_thistoken);
3215 PL_thistoken = newSVpvn("",0);
5db06880
NC
3216 }
3217#endif
131b3ad0
DM
3218 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3219 if (!PL_lex_casemods && PL_lex_inpat)
3220 OPERATOR(',');
3221 else
3222 Aop(OP_CONCAT);
79072805 3223 }
cea2e8a9 3224 return yylex();
79072805
LW
3225
3226 case LEX_INTERPENDMAYBE:
3280af22
NIS
3227 if (intuit_more(PL_bufptr)) {
3228 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3229 break;
3230 }
3231 /* FALL THROUGH */
3232
3233 case LEX_INTERPEND:
3280af22
NIS
3234 if (PL_lex_dojoin) {
3235 PL_lex_dojoin = FALSE;
3236 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3237#ifdef PERL_MAD
3238 if (PL_madskills) {
cd81e915
NC
3239 if (PL_thistoken)
3240 sv_free(PL_thistoken);
3241 PL_thistoken = newSVpvn("",0);
5db06880
NC
3242 }
3243#endif
bbf60fe6 3244 return REPORT(')');
79072805 3245 }
43a16006 3246 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3247 && SvEVALED(PL_lex_repl))
43a16006 3248 {
e9fa98b2 3249 if (PL_bufptr != PL_bufend)
cea2e8a9 3250 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3251 PL_lex_repl = NULL;
e9fa98b2 3252 }
79072805
LW
3253 /* FALLTHROUGH */
3254 case LEX_INTERPCONCAT:
3255#ifdef DEBUGGING
3280af22 3256 if (PL_lex_brackets)
cea2e8a9 3257 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3258#endif
3280af22 3259 if (PL_bufptr == PL_bufend)
bbf60fe6 3260 return REPORT(sublex_done());
79072805 3261
3280af22
NIS
3262 if (SvIVX(PL_linestr) == '\'') {
3263 SV *sv = newSVsv(PL_linestr);
3264 if (!PL_lex_inpat)
76e3520e 3265 sv = tokeq(sv);
3280af22 3266 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3267 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3268 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3269 s = PL_bufend;
79072805
LW
3270 }
3271 else {
3280af22 3272 s = scan_const(PL_bufptr);
79072805 3273 if (*s == '\\')
3280af22 3274 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3275 else
3280af22 3276 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3277 }
3278
3280af22 3279 if (s != PL_bufptr) {
cd81e915 3280 start_force(PL_curforce);
5db06880
NC
3281 if (PL_madskills) {
3282 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3283 }
9ded7720 3284 NEXTVAL_NEXTTOKE = yylval;
3280af22 3285 PL_expect = XTERM;
79072805 3286 force_next(THING);
131b3ad0 3287 if (PL_lex_starts++) {
5db06880
NC
3288#ifdef PERL_MAD
3289 if (PL_madskills) {
cd81e915
NC
3290 if (PL_thistoken)
3291 sv_free(PL_thistoken);
3292 PL_thistoken = newSVpvn("",0);
5db06880
NC
3293 }
3294#endif
131b3ad0
DM
3295 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3296 if (!PL_lex_casemods && PL_lex_inpat)
3297 OPERATOR(',');
3298 else
3299 Aop(OP_CONCAT);
3300 }
79072805 3301 else {
3280af22 3302 PL_bufptr = s;
cea2e8a9 3303 return yylex();
79072805
LW
3304 }
3305 }
3306
cea2e8a9 3307 return yylex();
a0d0e21e 3308 case LEX_FORMLINE:
3280af22
NIS
3309 PL_lex_state = LEX_NORMAL;
3310 s = scan_formline(PL_bufptr);
3311 if (!PL_lex_formbrack)
a0d0e21e
LW
3312 goto rightbracket;
3313 OPERATOR(';');
79072805
LW
3314 }
3315
3280af22
NIS
3316 s = PL_bufptr;
3317 PL_oldoldbufptr = PL_oldbufptr;
3318 PL_oldbufptr = s;
463ee0b2
LW
3319
3320 retry:
5db06880 3321#ifdef PERL_MAD
cd81e915
NC
3322 if (PL_thistoken) {
3323 sv_free(PL_thistoken);
3324 PL_thistoken = 0;
5db06880 3325 }
cd81e915 3326 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3327#endif
378cc40b
LW
3328 switch (*s) {
3329 default:
7e2040f0 3330 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3331 goto keylookup;
cea2e8a9 3332 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3333 case 4:
3334 case 26:
3335 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3336 case 0:
5db06880
NC
3337#ifdef PERL_MAD
3338 if (PL_madskills)
cd81e915 3339 PL_faketokens = 0;
5db06880 3340#endif
3280af22
NIS
3341 if (!PL_rsfp) {
3342 PL_last_uni = 0;
3343 PL_last_lop = 0;
c5ee2135 3344 if (PL_lex_brackets) {
0bd48802
AL
3345 yyerror(PL_lex_formbrack
3346 ? "Format not terminated"
3347 : "Missing right curly or square bracket");
c5ee2135 3348 }
4e553d73 3349 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3350 "### Tokener got EOF\n");
5f80b19c 3351 } );
79072805 3352 TOKEN(0);
463ee0b2 3353 }
3280af22 3354 if (s++ < PL_bufend)
a687059c 3355 goto retry; /* ignore stray nulls */
3280af22
NIS
3356 PL_last_uni = 0;
3357 PL_last_lop = 0;
3358 if (!PL_in_eval && !PL_preambled) {
3359 PL_preambled = TRUE;
5db06880
NC
3360#ifdef PERL_MAD
3361 if (PL_madskills)
cd81e915 3362 PL_faketokens = 1;
5db06880 3363#endif
3280af22
NIS
3364 sv_setpv(PL_linestr,incl_perldb());
3365 if (SvCUR(PL_linestr))
396482e1 3366 sv_catpvs(PL_linestr,";");
3280af22
NIS
3367 if (PL_preambleav){
3368 while(AvFILLp(PL_preambleav) >= 0) {
3369 SV *tmpsv = av_shift(PL_preambleav);
3370 sv_catsv(PL_linestr, tmpsv);
396482e1 3371 sv_catpvs(PL_linestr, ";");
91b7def8 3372 sv_free(tmpsv);
3373 }
3280af22
NIS
3374 sv_free((SV*)PL_preambleav);
3375 PL_preambleav = NULL;
91b7def8 3376 }
3280af22 3377 if (PL_minus_n || PL_minus_p) {
396482e1 3378 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3379 if (PL_minus_l)
396482e1 3380 sv_catpvs(PL_linestr,"chomp;");
3280af22 3381 if (PL_minus_a) {
3280af22 3382 if (PL_minus_F) {
3792a11b
NC
3383 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3384 || *PL_splitstr == '"')
3280af22 3385 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3386 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3387 else {
c8ef6a4b
NC
3388 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3389 bytes can be used as quoting characters. :-) */
dd374669 3390 const char *splits = PL_splitstr;
91d456ae 3391 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3392 do {
3393 /* Need to \ \s */
dd374669
AL
3394 if (*splits == '\\')
3395 sv_catpvn(PL_linestr, splits, 1);
3396 sv_catpvn(PL_linestr, splits, 1);
3397 } while (*splits++);
48c4c863
NC
3398 /* This loop will embed the trailing NUL of
3399 PL_linestr as the last thing it does before
3400 terminating. */
396482e1 3401 sv_catpvs(PL_linestr, ");");
54310121 3402 }
2304df62
AD
3403 }
3404 else
396482e1 3405 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3406 }
79072805 3407 }
bc9b29db 3408 if (PL_minus_E)
396482e1
GA
3409 sv_catpvs(PL_linestr,"use feature ':5.10';");
3410 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3411 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3412 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3413 PL_last_lop = PL_last_uni = NULL;
3280af22 3414 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3415 SV * const sv = newSV(0);
a0d0e21e
LW
3416
3417 sv_upgrade(sv, SVt_PVMG);
3280af22 3418 sv_setsv(sv,PL_linestr);
0ac0412a 3419 (void)SvIOK_on(sv);
45977657 3420 SvIV_set(sv, 0);
36c7798d 3421 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3422 }
79072805 3423 goto retry;
a687059c 3424 }
e929a76b 3425 do {
aa7440fb 3426 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3427 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3428 fake_eof:
5db06880 3429#ifdef PERL_MAD
cd81e915 3430 PL_realtokenstart = -1;
5db06880 3431#endif
7e28d3af
JH
3432 if (PL_rsfp) {
3433 if (PL_preprocess && !PL_in_eval)
3434 (void)PerlProc_pclose(PL_rsfp);
3435 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3436 PerlIO_clearerr(PL_rsfp);
3437 else
3438 (void)PerlIO_close(PL_rsfp);
4608196e 3439 PL_rsfp = NULL;
7e28d3af
JH
3440 PL_doextract = FALSE;
3441 }
3442 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3443#ifdef PERL_MAD
3444 if (PL_madskills)
cd81e915 3445 PL_faketokens = 1;
5db06880 3446#endif
a23c4656
NC
3447 sv_setpv(PL_linestr,PL_minus_p
3448 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3449 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3450 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3451 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3452 PL_minus_n = PL_minus_p = 0;
3453 goto retry;
3454 }
3455 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3456 PL_last_lop = PL_last_uni = NULL;
c69006e4 3457 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3458 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3459 }
7aa207d6
JH
3460 /* If it looks like the start of a BOM or raw UTF-16,
3461 * check if it in fact is. */
3462 else if (bof &&
3463 (*s == 0 ||
3464 *(U8*)s == 0xEF ||
3465 *(U8*)s >= 0xFE ||
3466 s[1] == 0)) {
226017aa 3467#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3468# ifdef __GNU_LIBRARY__
3469# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3470# define FTELL_FOR_PIPE_IS_BROKEN
3471# endif
e3f494f1
JH
3472# else
3473# ifdef __GLIBC__
3474# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3475# define FTELL_FOR_PIPE_IS_BROKEN
3476# endif
3477# endif
226017aa
DD
3478# endif
3479#endif
3480#ifdef FTELL_FOR_PIPE_IS_BROKEN
3481 /* This loses the possibility to detect the bof
3482 * situation on perl -P when the libc5 is being used.
3483 * Workaround? Maybe attach some extra state to PL_rsfp?
3484 */
3485 if (!PL_preprocess)
7e28d3af 3486 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3487#else
eb160463 3488 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3489#endif
7e28d3af 3490 if (bof) {
3280af22 3491 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3492 s = swallow_bom((U8*)s);
e929a76b 3493 }
378cc40b 3494 }
3280af22 3495 if (PL_doextract) {
a0d0e21e 3496 /* Incest with pod. */
5db06880
NC
3497#ifdef PERL_MAD
3498 if (PL_madskills)
cd81e915 3499 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 3500#endif
a0d0e21e 3501 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3502 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3503 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3504 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3505 PL_last_lop = PL_last_uni = NULL;
3280af22 3506 PL_doextract = FALSE;
a0d0e21e 3507 }
4e553d73 3508 }
463ee0b2 3509 incline(s);
3280af22
NIS
3510 } while (PL_doextract);
3511 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3512 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3513 SV * const sv = newSV(0);
a687059c 3514
93a17b20 3515 sv_upgrade(sv, SVt_PVMG);
3280af22 3516 sv_setsv(sv,PL_linestr);
0ac0412a 3517 (void)SvIOK_on(sv);
45977657 3518 SvIV_set(sv, 0);
36c7798d 3519 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 3520 }
3280af22 3521 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3522 PL_last_lop = PL_last_uni = NULL;
57843af0 3523 if (CopLINE(PL_curcop) == 1) {
3280af22 3524 while (s < PL_bufend && isSPACE(*s))
79072805 3525 s++;
a0d0e21e 3526 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 3527 s++;
5db06880
NC
3528#ifdef PERL_MAD
3529 if (PL_madskills)