This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
gozer took over from gsar (some time ago).
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2
NC
38#ifdef PERL_MAD
39/* XXX these probably need to be made into PL vars */
40static I32 realtokenstart;
41static I32 faketokens = 0;
42static MADPROP *thismad;
43static SV *thistoken;
44static SV *thisopen;
45static SV *thisstuff;
46static SV *thisclose;
47static SV *thiswhite;
48static SV *nextwhite;
49static SV *skipwhite;
50static SV *endwhite;
51static I32 curforce = -1;
52
53# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
5db06880 54# define NEXTVAL_NEXTTOKE PL_nexttoke[curforce].next_val
9ded7720 55#else
5db06880 56# define CURMAD(slot,sv)
9ded7720 57# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
58#endif
59
9059aa12
LW
60#define XFAKEBRACK 128
61#define XENUMMASK 127
62
39e02b42
JH
63#ifdef USE_UTF8_SCRIPTS
64# define UTF (!IN_BYTES)
2b9d42f0 65#else
746b446a 66# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 67#endif
a0ed51b3 68
61f0cdd9 69/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
70 * 1999-02-27 mjd-perl-patch@plover.com */
71#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
72
bf4acbe4
GS
73/* On MacOS, respect nonbreaking spaces */
74#ifdef MACOS_TRADITIONAL
75#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
76#else
77#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
78#endif
79
ffb4593c
NT
80/* LEX_* are values for PL_lex_state, the state of the lexer.
81 * They are arranged oddly so that the guard on the switch statement
79072805
LW
82 * can get by with a single comparison (if the compiler is smart enough).
83 */
84
fb73857a 85/* #define LEX_NOTPARSING 11 is done in perl.h. */
86
b6007c36
DM
87#define LEX_NORMAL 10 /* normal code (ie not within "...") */
88#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
89#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
90#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
91#define LEX_INTERPSTART 6 /* expecting the start of a $var */
92
93 /* at end of code, eg "$x" followed by: */
94#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
95#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
96
97#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
98 string or after \E, $foo, etc */
99#define LEX_INTERPCONST 2 /* NOT USED */
100#define LEX_FORMLINE 1 /* expecting a format line */
101#define LEX_KNOWNEXT 0 /* next token known; just return it */
102
79072805 103
bbf60fe6 104#ifdef DEBUGGING
27da23d5 105static const char* const lex_state_names[] = {
bbf60fe6
DM
106 "KNOWNEXT",
107 "FORMLINE",
108 "INTERPCONST",
109 "INTERPCONCAT",
110 "INTERPENDMAYBE",
111 "INTERPEND",
112 "INTERPSTART",
113 "INTERPPUSH",
114 "INTERPCASEMOD",
115 "INTERPNORMAL",
116 "NORMAL"
117};
118#endif
119
79072805
LW
120#ifdef ff_next
121#undef ff_next
d48672a2
LW
122#endif
123
79072805 124#include "keywords.h"
fe14fcc3 125
ffb4593c
NT
126/* CLINE is a macro that ensures PL_copline has a sane value */
127
ae986130
LW
128#ifdef CLINE
129#undef CLINE
130#endif
57843af0 131#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 132
5db06880 133#ifdef PERL_MAD
29595ff2
NC
134# define SKIPSPACE0(s) skipspace0(s)
135# define SKIPSPACE1(s) skipspace1(s)
136# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
137# define PEEKSPACE(s) skipspace2(s,0)
138#else
139# define SKIPSPACE0(s) skipspace(s)
140# define SKIPSPACE1(s) skipspace(s)
141# define SKIPSPACE2(s,tsv) skipspace(s)
142# define PEEKSPACE(s) skipspace(s)
143#endif
144
ffb4593c
NT
145/*
146 * Convenience functions to return different tokens and prime the
9cbb5ea2 147 * lexer for the next token. They all take an argument.
ffb4593c
NT
148 *
149 * TOKEN : generic token (used for '(', DOLSHARP, etc)
150 * OPERATOR : generic operator
151 * AOPERATOR : assignment operator
152 * PREBLOCK : beginning the block after an if, while, foreach, ...
153 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
154 * PREREF : *EXPR where EXPR is not a simple identifier
155 * TERM : expression term
156 * LOOPX : loop exiting command (goto, last, dump, etc)
157 * FTST : file test operator
158 * FUN0 : zero-argument function
2d2e263d 159 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
160 * BOop : bitwise or or xor
161 * BAop : bitwise and
162 * SHop : shift operator
163 * PWop : power operator
9cbb5ea2 164 * PMop : pattern-matching operator
ffb4593c
NT
165 * Aop : addition-level operator
166 * Mop : multiplication-level operator
167 * Eop : equality-testing operator
e5edeb50 168 * Rop : relational operator <= != gt
ffb4593c
NT
169 *
170 * Also see LOP and lop() below.
171 */
172
998054bd 173#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 174# define REPORT(retval) tokereport((I32)retval)
998054bd 175#else
bbf60fe6 176# define REPORT(retval) (retval)
998054bd
SC
177#endif
178
bbf60fe6
DM
179#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
180#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
181#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
182#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
183#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
184#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
185#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
186#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
187#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
188#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
189#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
190#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
191#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
192#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
193#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
194#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
195#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
196#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
197#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
198#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 199
a687059c
LW
200/* This bit of chicanery makes a unary function followed by
201 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
202 * The UNIDOR macro is for unary functions that can be followed by the //
203 * operator (such as C<shift // 0>).
a687059c 204 */
376fcdbf
AL
205#define UNI2(f,x) { \
206 yylval.ival = f; \
207 PL_expect = x; \
208 PL_bufptr = s; \
209 PL_last_uni = PL_oldbufptr; \
210 PL_last_lop_op = f; \
211 if (*s == '(') \
212 return REPORT( (int)FUNC1 ); \
29595ff2 213 s = PEEKSPACE(s); \
376fcdbf
AL
214 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
215 }
6f33ba73
RGS
216#define UNI(f) UNI2(f,XTERM)
217#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 218
376fcdbf
AL
219#define UNIBRACK(f) { \
220 yylval.ival = f; \
221 PL_bufptr = s; \
222 PL_last_uni = PL_oldbufptr; \
223 if (*s == '(') \
224 return REPORT( (int)FUNC1 ); \
29595ff2 225 s = PEEKSPACE(s); \
376fcdbf
AL
226 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
227 }
79072805 228
9f68db38 229/* grandfather return to old style */
3280af22 230#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 231
8fa7f367
JH
232#ifdef DEBUGGING
233
bbf60fe6
DM
234/* how to interpret the yylval associated with the token */
235enum token_type {
236 TOKENTYPE_NONE,
237 TOKENTYPE_IVAL,
238 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
239 TOKENTYPE_PVAL,
240 TOKENTYPE_OPVAL,
241 TOKENTYPE_GVVAL
242};
243
6d4a66ac
NC
244static struct debug_tokens {
245 const int token;
246 enum token_type type;
247 const char *name;
248} const debug_tokens[] =
9041c2e3 249{
bbf60fe6
DM
250 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
251 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
252 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
253 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
254 { ARROW, TOKENTYPE_NONE, "ARROW" },
255 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
256 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
257 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
258 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
259 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 260 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
261 { DO, TOKENTYPE_NONE, "DO" },
262 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
263 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
264 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
265 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
266 { ELSE, TOKENTYPE_NONE, "ELSE" },
267 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
268 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
269 { FOR, TOKENTYPE_IVAL, "FOR" },
270 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
271 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
272 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
273 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
274 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
275 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 276 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
277 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
278 { IF, TOKENTYPE_IVAL, "IF" },
279 { LABEL, TOKENTYPE_PVAL, "LABEL" },
280 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
281 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
282 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
283 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
284 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
285 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
286 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
287 { MY, TOKENTYPE_IVAL, "MY" },
288 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
289 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
290 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
291 { OROP, TOKENTYPE_IVAL, "OROP" },
292 { OROR, TOKENTYPE_NONE, "OROR" },
293 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
294 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
295 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
296 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
297 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
298 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
299 { PREINC, TOKENTYPE_NONE, "PREINC" },
300 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
301 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
302 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
303 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
304 { SUB, TOKENTYPE_NONE, "SUB" },
305 { THING, TOKENTYPE_OPVAL, "THING" },
306 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
307 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
308 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
309 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
310 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
311 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 312 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
313 { WHILE, TOKENTYPE_IVAL, "WHILE" },
314 { WORD, TOKENTYPE_OPVAL, "WORD" },
315 { 0, TOKENTYPE_NONE, 0 }
316};
317
318/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 319
bbf60fe6 320STATIC int
f5bd084c 321S_tokereport(pTHX_ I32 rv)
bbf60fe6 322{
97aff369 323 dVAR;
bbf60fe6 324 if (DEBUG_T_TEST) {
bd61b366 325 const char *name = NULL;
bbf60fe6 326 enum token_type type = TOKENTYPE_NONE;
f54cb97a 327 const struct debug_tokens *p;
396482e1 328 SV* const report = newSVpvs("<== ");
bbf60fe6 329
f54cb97a 330 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
331 if (p->token == (int)rv) {
332 name = p->name;
333 type = p->type;
334 break;
335 }
336 }
337 if (name)
54667de8 338 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
339 else if ((char)rv > ' ' && (char)rv < '~')
340 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
341 else if (!rv)
396482e1 342 sv_catpvs(report, "EOF");
bbf60fe6
DM
343 else
344 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
345 switch (type) {
346 case TOKENTYPE_NONE:
347 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
348 break;
349 case TOKENTYPE_IVAL:
e4584336 350 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
351 break;
352 case TOKENTYPE_OPNUM:
353 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
354 PL_op_name[yylval.ival]);
355 break;
356 case TOKENTYPE_PVAL:
357 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
358 break;
359 case TOKENTYPE_OPVAL:
b6007c36 360 if (yylval.opval) {
401441c0 361 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 362 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
363 if (yylval.opval->op_type == OP_CONST) {
364 Perl_sv_catpvf(aTHX_ report, " %s",
365 SvPEEK(cSVOPx_sv(yylval.opval)));
366 }
367
368 }
401441c0 369 else
396482e1 370 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
371 break;
372 }
b6007c36 373 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
374 };
375 return (int)rv;
998054bd
SC
376}
377
b6007c36
DM
378
379/* print the buffer with suitable escapes */
380
381STATIC void
382S_printbuf(pTHX_ const char* fmt, const char* s)
383{
396482e1 384 SV* const tmp = newSVpvs("");
b6007c36
DM
385 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
386 SvREFCNT_dec(tmp);
387}
388
8fa7f367
JH
389#endif
390
ffb4593c
NT
391/*
392 * S_ao
393 *
c963b151
BD
394 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
395 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
396 */
397
76e3520e 398STATIC int
cea2e8a9 399S_ao(pTHX_ int toketype)
a0d0e21e 400{
97aff369 401 dVAR;
3280af22
NIS
402 if (*PL_bufptr == '=') {
403 PL_bufptr++;
a0d0e21e
LW
404 if (toketype == ANDAND)
405 yylval.ival = OP_ANDASSIGN;
406 else if (toketype == OROR)
407 yylval.ival = OP_ORASSIGN;
c963b151
BD
408 else if (toketype == DORDOR)
409 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
410 toketype = ASSIGNOP;
411 }
412 return toketype;
413}
414
ffb4593c
NT
415/*
416 * S_no_op
417 * When Perl expects an operator and finds something else, no_op
418 * prints the warning. It always prints "<something> found where
419 * operator expected. It prints "Missing semicolon on previous line?"
420 * if the surprise occurs at the start of the line. "do you need to
421 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
422 * where the compiler doesn't know if foo is a method call or a function.
423 * It prints "Missing operator before end of line" if there's nothing
424 * after the missing operator, or "... before <...>" if there is something
425 * after the missing operator.
426 */
427
76e3520e 428STATIC void
bfed75c6 429S_no_op(pTHX_ const char *what, char *s)
463ee0b2 430{
97aff369 431 dVAR;
9d4ba2ae
AL
432 char * const oldbp = PL_bufptr;
433 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 434
1189a94a
GS
435 if (!s)
436 s = oldbp;
07c798fb 437 else
1189a94a 438 PL_bufptr = s;
cea2e8a9 439 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
440 if (ckWARN_d(WARN_SYNTAX)) {
441 if (is_first)
442 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
443 "\t(Missing semicolon on previous line?)\n");
444 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 445 const char *t;
56da5a46
RGS
446 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
447 if (t < PL_bufptr && isSPACE(*t))
448 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
449 "\t(Do you need to predeclare %.*s?)\n",
551405c4 450 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
451 }
452 else {
453 assert(s >= oldbp);
454 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 455 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 456 }
07c798fb 457 }
3280af22 458 PL_bufptr = oldbp;
8990e307
LW
459}
460
ffb4593c
NT
461/*
462 * S_missingterm
463 * Complain about missing quote/regexp/heredoc terminator.
464 * If it's called with (char *)NULL then it cauterizes the line buffer.
465 * If we're in a delimited string and the delimiter is a control
466 * character, it's reformatted into a two-char sequence like ^C.
467 * This is fatal.
468 */
469
76e3520e 470STATIC void
cea2e8a9 471S_missingterm(pTHX_ char *s)
8990e307 472{
97aff369 473 dVAR;
8990e307
LW
474 char tmpbuf[3];
475 char q;
476 if (s) {
9d4ba2ae 477 char * const nl = strrchr(s,'\n');
d2719217 478 if (nl)
8990e307
LW
479 *nl = '\0';
480 }
9d116dd7
JH
481 else if (
482#ifdef EBCDIC
483 iscntrl(PL_multi_close)
484#else
485 PL_multi_close < 32 || PL_multi_close == 127
486#endif
487 ) {
8990e307 488 *tmpbuf = '^';
585ec06d 489 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
490 tmpbuf[2] = '\0';
491 s = tmpbuf;
492 }
493 else {
eb160463 494 *tmpbuf = (char)PL_multi_close;
8990e307
LW
495 tmpbuf[1] = '\0';
496 s = tmpbuf;
497 }
498 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 499 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 500}
79072805 501
ef89dcc3 502#define FEATURE_IS_ENABLED(name) \
0d863452 503 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 504 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
505/*
506 * S_feature_is_enabled
507 * Check whether the named feature is enabled.
508 */
509STATIC bool
510S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
511{
97aff369 512 dVAR;
0d863452 513 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
514 char he_name[32] = "feature_";
515 (void) strncpy(&he_name[8], name, 24);
516
517 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
518}
519
ffb4593c
NT
520/*
521 * Perl_deprecate
ffb4593c
NT
522 */
523
79072805 524void
bfed75c6 525Perl_deprecate(pTHX_ const char *s)
a0d0e21e 526{
599cee73 527 if (ckWARN(WARN_DEPRECATED))
9014280d 528 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
529}
530
12bcd1a6 531void
bfed75c6 532Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
533{
534 /* This function should NOT be called for any new deprecated warnings */
535 /* Use Perl_deprecate instead */
536 /* */
537 /* It is here to maintain backward compatibility with the pre-5.8 */
538 /* warnings category hierarchy. The "deprecated" category used to */
539 /* live under the "syntax" category. It is now a top-level category */
540 /* in its own right. */
541
542 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 543 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
544 "Use of %s is deprecated", s);
545}
546
ffb4593c 547/*
9cbb5ea2
GS
548 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
549 * utf16-to-utf8-reversed.
ffb4593c
NT
550 */
551
c39cd008
GS
552#ifdef PERL_CR_FILTER
553static void
554strip_return(SV *sv)
555{
95a20fc0 556 register const char *s = SvPVX_const(sv);
9d4ba2ae 557 register const char * const e = s + SvCUR(sv);
c39cd008
GS
558 /* outer loop optimized to do nothing if there are no CR-LFs */
559 while (s < e) {
560 if (*s++ == '\r' && *s == '\n') {
561 /* hit a CR-LF, need to copy the rest */
562 register char *d = s - 1;
563 *d++ = *s++;
564 while (s < e) {
565 if (*s == '\r' && s[1] == '\n')
566 s++;
567 *d++ = *s++;
568 }
569 SvCUR(sv) -= s - d;
570 return;
571 }
572 }
573}
a868473f 574
76e3520e 575STATIC I32
c39cd008 576S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 577{
f54cb97a 578 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
579 if (count > 0 && !maxlen)
580 strip_return(sv);
581 return count;
a868473f
NIS
582}
583#endif
584
ffb4593c
NT
585/*
586 * Perl_lex_start
9cbb5ea2
GS
587 * Initialize variables. Uses the Perl save_stack to save its state (for
588 * recursive calls to the parser).
ffb4593c
NT
589 */
590
a0d0e21e 591void
864dbfa3 592Perl_lex_start(pTHX_ SV *line)
79072805 593{
97aff369 594 dVAR;
cfd0369c 595 const char *s;
8990e307
LW
596 STRLEN len;
597
3280af22
NIS
598 SAVEI32(PL_lex_dojoin);
599 SAVEI32(PL_lex_brackets);
3280af22
NIS
600 SAVEI32(PL_lex_casemods);
601 SAVEI32(PL_lex_starts);
602 SAVEI32(PL_lex_state);
7766f137 603 SAVEVPTR(PL_lex_inpat);
3280af22 604 SAVEI32(PL_lex_inwhat);
5db06880
NC
605#ifdef PERL_MAD
606 if (PL_lex_state == LEX_KNOWNEXT) {
607 I32 toke = PL_lasttoke;
608 while (--toke >= 0) {
609 SAVEI32(PL_nexttoke[toke].next_type);
610 SAVEVPTR(PL_nexttoke[toke].next_val);
611 if (PL_madskills)
612 SAVEVPTR(PL_nexttoke[toke].next_mad);
613 }
614 SAVEI32(PL_lasttoke);
615 }
616 if (PL_madskills) {
617 SAVESPTR(thistoken);
618 SAVESPTR(thiswhite);
619 SAVESPTR(nextwhite);
620 SAVESPTR(thisopen);
621 SAVESPTR(thisclose);
622 SAVESPTR(thisstuff);
623 SAVEVPTR(thismad);
624 SAVEI32(realtokenstart);
625 SAVEI32(faketokens);
626 }
627 SAVEI32(curforce);
628#else
18b09519
GS
629 if (PL_lex_state == LEX_KNOWNEXT) {
630 I32 toke = PL_nexttoke;
631 while (--toke >= 0) {
632 SAVEI32(PL_nexttype[toke]);
633 SAVEVPTR(PL_nextval[toke]);
634 }
635 SAVEI32(PL_nexttoke);
18b09519 636 }
5db06880 637#endif
57843af0 638 SAVECOPLINE(PL_curcop);
3280af22
NIS
639 SAVEPPTR(PL_bufptr);
640 SAVEPPTR(PL_bufend);
641 SAVEPPTR(PL_oldbufptr);
642 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
643 SAVEPPTR(PL_last_lop);
644 SAVEPPTR(PL_last_uni);
3280af22
NIS
645 SAVEPPTR(PL_linestart);
646 SAVESPTR(PL_linestr);
8edd5f42
RGS
647 SAVEGENERICPV(PL_lex_brackstack);
648 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 649 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
650 SAVESPTR(PL_lex_stuff);
651 SAVEI32(PL_lex_defer);
09bef843 652 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 653 SAVESPTR(PL_lex_repl);
bebdddfc
GS
654 SAVEINT(PL_expect);
655 SAVEINT(PL_lex_expect);
3280af22
NIS
656
657 PL_lex_state = LEX_NORMAL;
658 PL_lex_defer = 0;
659 PL_expect = XSTATE;
660 PL_lex_brackets = 0;
a02a5408
JC
661 Newx(PL_lex_brackstack, 120, char);
662 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
663 PL_lex_casemods = 0;
664 *PL_lex_casestack = '\0';
665 PL_lex_dojoin = 0;
666 PL_lex_starts = 0;
a0714e2c
SS
667 PL_lex_stuff = NULL;
668 PL_lex_repl = NULL;
3280af22 669 PL_lex_inpat = 0;
5db06880
NC
670#ifdef PERL_MAD
671 PL_lasttoke = 0;
672#else
76be56bc 673 PL_nexttoke = 0;
5db06880 674#endif
3280af22 675 PL_lex_inwhat = 0;
09bef843 676 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
677 PL_linestr = line;
678 if (SvREADONLY(PL_linestr))
679 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 680 s = SvPV_const(PL_linestr, len);
6f27f9a7 681 if (!len || s[len-1] != ';') {
3280af22
NIS
682 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
683 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 684 sv_catpvs(PL_linestr, "\n;");
8990e307 685 }
3280af22
NIS
686 SvTEMP_off(PL_linestr);
687 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
688 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 689 PL_last_lop = PL_last_uni = NULL;
3280af22 690 PL_rsfp = 0;
79072805 691}
a687059c 692
ffb4593c
NT
693/*
694 * Perl_lex_end
9cbb5ea2
GS
695 * Finalizer for lexing operations. Must be called when the parser is
696 * done with the lexer.
ffb4593c
NT
697 */
698
463ee0b2 699void
864dbfa3 700Perl_lex_end(pTHX)
463ee0b2 701{
97aff369 702 dVAR;
3280af22 703 PL_doextract = FALSE;
463ee0b2
LW
704}
705
ffb4593c
NT
706/*
707 * S_incline
708 * This subroutine has nothing to do with tilting, whether at windmills
709 * or pinball tables. Its name is short for "increment line". It
57843af0 710 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 711 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
712 * # line 500 "foo.pm"
713 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_incline(pTHX_ char *s)
463ee0b2 718{
97aff369 719 dVAR;
463ee0b2
LW
720 char *t;
721 char *n;
73659bf1 722 char *e;
463ee0b2 723 char ch;
463ee0b2 724
57843af0 725 CopLINE_inc(PL_curcop);
463ee0b2
LW
726 if (*s++ != '#')
727 return;
bf4acbe4 728 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
729 if (strnEQ(s, "line", 4))
730 s += 4;
731 else
732 return;
084592ab 733 if (SPACE_OR_TAB(*s))
73659bf1 734 s++;
4e553d73 735 else
73659bf1 736 return;
bf4acbe4 737 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
738 if (!isDIGIT(*s))
739 return;
740 n = s;
741 while (isDIGIT(*s))
742 s++;
bf4acbe4 743 while (SPACE_OR_TAB(*s))
463ee0b2 744 s++;
73659bf1 745 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 746 s++;
73659bf1
GS
747 e = t + 1;
748 }
463ee0b2 749 else {
463ee0b2 750 for (t = s; !isSPACE(*t); t++) ;
73659bf1 751 e = t;
463ee0b2 752 }
bf4acbe4 753 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
754 e++;
755 if (*e != '\n' && *e != '\0')
756 return; /* false alarm */
757
463ee0b2
LW
758 ch = *t;
759 *t = '\0';
f4dd75d9 760 if (t - s > 0) {
8a5ee598 761#ifndef USE_ITHREADS
c4420975 762 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
763 STRLEN tmplen = cf ? strlen(cf) : 0;
764 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
765 /* must copy *{"::_<(eval N)[oldfilename:L]"}
766 * to *{"::_<newfilename"} */
767 char smallbuf[256], smallbuf2[256];
768 char *tmpbuf, *tmpbuf2;
8a5ee598 769 GV **gvp, *gv2;
e66cf94c
RGS
770 STRLEN tmplen2 = strlen(s);
771 if (tmplen + 3 < sizeof smallbuf)
772 tmpbuf = smallbuf;
773 else
774 Newx(tmpbuf, tmplen + 3, char);
775 if (tmplen2 + 3 < sizeof smallbuf2)
776 tmpbuf2 = smallbuf2;
777 else
778 Newx(tmpbuf2, tmplen2 + 3, char);
779 tmpbuf[0] = tmpbuf2[0] = '_';
780 tmpbuf[1] = tmpbuf2[1] = '<';
781 memcpy(tmpbuf + 2, cf, ++tmplen);
782 memcpy(tmpbuf2 + 2, s, ++tmplen2);
783 ++tmplen; ++tmplen2;
8a5ee598
RGS
784 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
785 if (gvp) {
786 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
787 if (!isGV(gv2))
788 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
789 /* adjust ${"::_<newfilename"} to store the new file name */
790 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
791 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
792 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
793 }
e66cf94c
RGS
794 if (tmpbuf != smallbuf) Safefree(tmpbuf);
795 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
796 }
8a5ee598 797#endif
05ec9bb3 798 CopFILE_free(PL_curcop);
57843af0 799 CopFILE_set(PL_curcop, s);
f4dd75d9 800 }
463ee0b2 801 *t = ch;
57843af0 802 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
803}
804
29595ff2
NC
805#ifdef PERL_MAD
806/* skip space before thistoken */
807
808STATIC char *
809S_skipspace0(pTHX_ register char *s)
810{
811 s = skipspace(s);
812 if (!PL_madskills)
813 return s;
814 if (skipwhite) {
815 if (!thiswhite)
816 thiswhite = newSVpvn("",0);
817 sv_catsv(thiswhite, skipwhite);
818 sv_free(skipwhite);
819 skipwhite = 0;
820 }
821 realtokenstart = s - SvPVX(PL_linestr);
822 return s;
823}
824
825/* skip space after thistoken */
826
827STATIC char *
828S_skipspace1(pTHX_ register char *s)
829{
830 char *start = s;
831 I32 startoff = start - SvPVX(PL_linestr);
832
833 s = skipspace(s);
834 if (!PL_madskills)
835 return s;
836 start = SvPVX(PL_linestr) + startoff;
837 if (!thistoken && realtokenstart >= 0) {
838 char *tstart = SvPVX(PL_linestr) + realtokenstart;
839 thistoken = newSVpvn(tstart, start - tstart);
840 }
841 realtokenstart = -1;
842 if (skipwhite) {
843 if (!nextwhite)
844 nextwhite = newSVpvn("",0);
845 sv_catsv(nextwhite, skipwhite);
846 sv_free(skipwhite);
847 skipwhite = 0;
848 }
849 return s;
850}
851
852STATIC char *
853S_skipspace2(pTHX_ register char *s, SV **svp)
854{
855 char *start = s;
856 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
857 I32 startoff = start - SvPVX(PL_linestr);
858 s = skipspace(s);
859 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
860 if (!PL_madskills || !svp)
861 return s;
862 start = SvPVX(PL_linestr) + startoff;
863 if (!thistoken && realtokenstart >= 0) {
864 char *tstart = SvPVX(PL_linestr) + realtokenstart;
865 thistoken = newSVpvn(tstart, start - tstart);
866 realtokenstart = -1;
867 }
868 if (skipwhite) {
869 if (!*svp)
870 *svp = newSVpvn("",0);
871 sv_setsv(*svp, skipwhite);
872 sv_free(skipwhite);
873 skipwhite = 0;
874 }
875
876 return s;
877}
878#endif
879
ffb4593c
NT
880/*
881 * S_skipspace
882 * Called to gobble the appropriate amount and type of whitespace.
883 * Skips comments as well.
884 */
885
76e3520e 886STATIC char *
cea2e8a9 887S_skipspace(pTHX_ register char *s)
a687059c 888{
97aff369 889 dVAR;
5db06880
NC
890#ifdef PERL_MAD
891 int curoff;
892 int startoff = s - SvPVX(PL_linestr);
893
894 if (skipwhite) {
895 sv_free(skipwhite);
896 skipwhite = 0;
897 }
898#endif
899
3280af22 900 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 901 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 902 s++;
5db06880
NC
903#ifdef PERL_MAD
904 goto done;
905#else
463ee0b2 906 return s;
5db06880 907#endif
463ee0b2
LW
908 }
909 for (;;) {
fd049845 910 STRLEN prevlen;
09bef843 911 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 912 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
913 while (s < PL_bufend && isSPACE(*s)) {
914 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
915 incline(s);
916 }
ffb4593c
NT
917
918 /* comment */
3280af22
NIS
919 if (s < PL_bufend && *s == '#') {
920 while (s < PL_bufend && *s != '\n')
463ee0b2 921 s++;
60e6418e 922 if (s < PL_bufend) {
463ee0b2 923 s++;
60e6418e
GS
924 if (PL_in_eval && !PL_rsfp) {
925 incline(s);
926 continue;
927 }
928 }
463ee0b2 929 }
ffb4593c
NT
930
931 /* only continue to recharge the buffer if we're at the end
932 * of the buffer, we're not reading from a source filter, and
933 * we're in normal lexing mode
934 */
09bef843
SB
935 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
936 PL_lex_state == LEX_FORMLINE)
5db06880
NC
937#ifdef PERL_MAD
938 goto done;
939#else
463ee0b2 940 return s;
5db06880 941#endif
ffb4593c
NT
942
943 /* try to recharge the buffer */
5db06880
NC
944#ifdef PERL_MAD
945 curoff = s - SvPVX(PL_linestr);
946#endif
947
9cbb5ea2 948 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 949 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 950 {
5db06880
NC
951#ifdef PERL_MAD
952 if (PL_madskills && curoff != startoff) {
953 if (!skipwhite)
954 skipwhite = newSVpvn("",0);
955 sv_catpvn(skipwhite, SvPVX(PL_linestr) + startoff,
956 curoff - startoff);
957 }
958
959 /* mustn't throw out old stuff yet if madpropping */
960 SvCUR(PL_linestr) = curoff;
961 s = SvPVX(PL_linestr) + curoff;
962 *s = 0;
963 if (curoff && s[-1] == '\n')
964 s[-1] = ' ';
965#endif
966
9cbb5ea2 967 /* end of file. Add on the -p or -n magic */
5db06880 968 /* XXX these shouldn't really be added here, can't set faketokens */
01a19ab0 969 if (PL_minus_p) {
5db06880
NC
970#ifdef PERL_MAD
971 sv_catpv(PL_linestr,
972 ";}continue{print or die qq(-p destination: $!\\n);}");
973#else
01a19ab0
NC
974 sv_setpv(PL_linestr,
975 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 976#endif
3280af22 977 PL_minus_n = PL_minus_p = 0;
a0d0e21e 978 }
01a19ab0 979 else if (PL_minus_n) {
5db06880
NC
980#ifdef PERL_MAD
981 sv_catpvn(PL_linestr, ";}", 2);
982#else
01a19ab0 983 sv_setpvn(PL_linestr, ";}", 2);
5db06880 984#endif
01a19ab0
NC
985 PL_minus_n = 0;
986 }
a0d0e21e 987 else
5db06880
NC
988#ifdef PERL_MAD
989 sv_catpvn(PL_linestr,";", 1);
990#else
4147a61b 991 sv_setpvn(PL_linestr,";", 1);
5db06880 992#endif
ffb4593c
NT
993
994 /* reset variables for next time we lex */
9cbb5ea2 995 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
996 = SvPVX(PL_linestr)
997#ifdef PERL_MAD
998 + curoff
999#endif
1000 ;
3280af22 1001 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1002 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1003
1004 /* Close the filehandle. Could be from -P preprocessor,
1005 * STDIN, or a regular file. If we were reading code from
1006 * STDIN (because the commandline held no -e or filename)
1007 * then we don't close it, we reset it so the code can
1008 * read from STDIN too.
1009 */
1010
3280af22
NIS
1011 if (PL_preprocess && !PL_in_eval)
1012 (void)PerlProc_pclose(PL_rsfp);
1013 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1014 PerlIO_clearerr(PL_rsfp);
8990e307 1015 else
3280af22 1016 (void)PerlIO_close(PL_rsfp);
4608196e 1017 PL_rsfp = NULL;
463ee0b2
LW
1018 return s;
1019 }
ffb4593c
NT
1020
1021 /* not at end of file, so we only read another line */
09bef843
SB
1022 /* make corresponding updates to old pointers, for yyerror() */
1023 oldprevlen = PL_oldbufptr - PL_bufend;
1024 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1025 if (PL_last_uni)
1026 oldunilen = PL_last_uni - PL_bufend;
1027 if (PL_last_lop)
1028 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1029 PL_linestart = PL_bufptr = s + prevlen;
1030 PL_bufend = s + SvCUR(PL_linestr);
1031 s = PL_bufptr;
09bef843
SB
1032 PL_oldbufptr = s + oldprevlen;
1033 PL_oldoldbufptr = s + oldoldprevlen;
1034 if (PL_last_uni)
1035 PL_last_uni = s + oldunilen;
1036 if (PL_last_lop)
1037 PL_last_lop = s + oldloplen;
a0d0e21e 1038 incline(s);
ffb4593c
NT
1039
1040 /* debugger active and we're not compiling the debugger code,
1041 * so store the line into the debugger's array of lines
1042 */
3280af22 1043 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1044 SV * const sv = newSV(0);
8990e307
LW
1045
1046 sv_upgrade(sv, SVt_PVMG);
3280af22 1047 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1048 (void)SvIOK_on(sv);
45977657 1049 SvIV_set(sv, 0);
36c7798d 1050 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1051 }
463ee0b2 1052 }
5db06880
NC
1053
1054#ifdef PERL_MAD
1055 done:
1056 if (PL_madskills) {
1057 if (!skipwhite)
1058 skipwhite = newSVpvn("",0);
1059 curoff = s - SvPVX(PL_linestr);
1060 if (curoff - startoff)
1061 sv_catpvn(skipwhite, SvPVX(PL_linestr) + startoff,
1062 curoff - startoff);
1063 }
1064 return s;
1065#endif
a687059c 1066}
378cc40b 1067
ffb4593c
NT
1068/*
1069 * S_check_uni
1070 * Check the unary operators to ensure there's no ambiguity in how they're
1071 * used. An ambiguous piece of code would be:
1072 * rand + 5
1073 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1074 * the +5 is its argument.
1075 */
1076
76e3520e 1077STATIC void
cea2e8a9 1078S_check_uni(pTHX)
ba106d47 1079{
97aff369 1080 dVAR;
2f3197b3 1081 char *s;
a0d0e21e 1082 char *t;
2f3197b3 1083
3280af22 1084 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1085 return;
3280af22
NIS
1086 while (isSPACE(*PL_last_uni))
1087 PL_last_uni++;
7e2040f0 1088 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 1089 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1090 return;
6136c704 1091
0453d815 1092 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1093 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1094 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1095 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1096 }
2f3197b3
LW
1097}
1098
ffb4593c
NT
1099/*
1100 * LOP : macro to build a list operator. Its behaviour has been replaced
1101 * with a subroutine, S_lop() for which LOP is just another name.
1102 */
1103
a0d0e21e
LW
1104#define LOP(f,x) return lop(f,x,s)
1105
ffb4593c
NT
1106/*
1107 * S_lop
1108 * Build a list operator (or something that might be one). The rules:
1109 * - if we have a next token, then it's a list operator [why?]
1110 * - if the next thing is an opening paren, then it's a function
1111 * - else it's a list operator
1112 */
1113
76e3520e 1114STATIC I32
a0be28da 1115S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1116{
97aff369 1117 dVAR;
79072805 1118 yylval.ival = f;
35c8bce7 1119 CLINE;
3280af22
NIS
1120 PL_expect = x;
1121 PL_bufptr = s;
1122 PL_last_lop = PL_oldbufptr;
eb160463 1123 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1124#ifdef PERL_MAD
1125 if (PL_lasttoke)
1126 return REPORT(LSTOP);
1127#else
3280af22 1128 if (PL_nexttoke)
bbf60fe6 1129 return REPORT(LSTOP);
5db06880 1130#endif
79072805 1131 if (*s == '(')
bbf60fe6 1132 return REPORT(FUNC);
29595ff2 1133 s = PEEKSPACE(s);
79072805 1134 if (*s == '(')
bbf60fe6 1135 return REPORT(FUNC);
79072805 1136 else
bbf60fe6 1137 return REPORT(LSTOP);
79072805
LW
1138}
1139
5db06880
NC
1140#ifdef PERL_MAD
1141 /*
1142 * S_start_force
1143 * Sets up for an eventual force_next(). start_force(0) basically does
1144 * an unshift, while start_force(-1) does a push. yylex removes items
1145 * on the "pop" end.
1146 */
1147
1148STATIC void
1149S_start_force(pTHX_ int where)
1150{
1151 int i;
1152
1153 if (where < 0) /* so people can duplicate start_force(curforce) */
1154 where = PL_lasttoke;
1155 assert(curforce < 0 || curforce == where);
1156 if (curforce != where) {
1157 for (i = PL_lasttoke; i > where; --i) {
1158 PL_nexttoke[i] = PL_nexttoke[i-1];
1159 }
1160 PL_lasttoke++;
1161 }
1162 if (curforce < 0) /* in case of duplicate start_force() */
1163 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1164 curforce = where;
1165 if (nextwhite) {
1166 if (PL_madskills)
1167 curmad('^', newSVpvn("",0));
1168 CURMAD('_', nextwhite);
1169 }
1170}
1171
1172STATIC void
1173S_curmad(pTHX_ char slot, SV *sv)
1174{
1175 MADPROP **where;
1176
1177 if (!sv)
1178 return;
1179 if (curforce < 0)
1180 where = &thismad;
1181 else
1182 where = &PL_nexttoke[curforce].next_mad;
1183
1184 if (faketokens)
1185 sv_setpvn(sv, "", 0);
1186 else {
1187 if (!IN_BYTES) {
1188 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1189 SvUTF8_on(sv);
1190 else if (PL_encoding) {
1191 sv_recode_to_utf8(sv, PL_encoding);
1192 }
1193 }
1194 }
1195
1196 /* keep a slot open for the head of the list? */
1197 if (slot != '_' && *where && (*where)->mad_key == '^') {
1198 (*where)->mad_key = slot;
1199 sv_free((*where)->mad_val);
1200 (*where)->mad_val = (void*)sv;
1201 }
1202 else
1203 addmad(newMADsv(slot, sv), where, 0);
1204}
1205#else
1206# define start_force(where)
1207# define curmad(slot, sv)
1208#endif
1209
ffb4593c
NT
1210/*
1211 * S_force_next
9cbb5ea2 1212 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1213 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1214 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1215 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1216 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1217 */
1218
4e553d73 1219STATIC void
cea2e8a9 1220S_force_next(pTHX_ I32 type)
79072805 1221{
97aff369 1222 dVAR;
5db06880
NC
1223#ifdef PERL_MAD
1224 if (curforce < 0)
1225 start_force(PL_lasttoke);
1226 PL_nexttoke[curforce].next_type = type;
1227 if (PL_lex_state != LEX_KNOWNEXT)
1228 PL_lex_defer = PL_lex_state;
1229 PL_lex_state = LEX_KNOWNEXT;
1230 PL_lex_expect = PL_expect;
1231 curforce = -1;
1232#else
3280af22
NIS
1233 PL_nexttype[PL_nexttoke] = type;
1234 PL_nexttoke++;
1235 if (PL_lex_state != LEX_KNOWNEXT) {
1236 PL_lex_defer = PL_lex_state;
1237 PL_lex_expect = PL_expect;
1238 PL_lex_state = LEX_KNOWNEXT;
79072805 1239 }
5db06880 1240#endif
79072805
LW
1241}
1242
d0a148a6
NC
1243STATIC SV *
1244S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1245{
97aff369 1246 dVAR;
9d4ba2ae 1247 SV * const sv = newSVpvn(start,len);
bfed75c6 1248 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1249 SvUTF8_on(sv);
1250 return sv;
1251}
1252
ffb4593c
NT
1253/*
1254 * S_force_word
1255 * When the lexer knows the next thing is a word (for instance, it has
1256 * just seen -> and it knows that the next char is a word char, then
1257 * it calls S_force_word to stick the next word into the PL_next lookahead.
1258 *
1259 * Arguments:
b1b65b59 1260 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1261 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1262 * int check_keyword : if true, Perl checks to make sure the word isn't
1263 * a keyword (do this if the word is a label, e.g. goto FOO)
1264 * int allow_pack : if true, : characters will also be allowed (require,
1265 * use, etc. do this)
9cbb5ea2 1266 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1267 */
1268
76e3520e 1269STATIC char *
cea2e8a9 1270S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1271{
97aff369 1272 dVAR;
463ee0b2
LW
1273 register char *s;
1274 STRLEN len;
4e553d73 1275
29595ff2 1276 start = SKIPSPACE1(start);
463ee0b2 1277 s = start;
7e2040f0 1278 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1279 (allow_pack && *s == ':') ||
15f0808c 1280 (allow_initial_tick && *s == '\'') )
a0d0e21e 1281 {
3280af22
NIS
1282 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1283 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1284 return start;
5db06880
NC
1285 start_force(curforce);
1286 if (PL_madskills)
1287 curmad('X', newSVpvn(start,s-start));
463ee0b2 1288 if (token == METHOD) {
29595ff2 1289 s = SKIPSPACE1(s);
463ee0b2 1290 if (*s == '(')
3280af22 1291 PL_expect = XTERM;
463ee0b2 1292 else {
3280af22 1293 PL_expect = XOPERATOR;
463ee0b2 1294 }
79072805 1295 }
9ded7720 1296 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1297 = (OP*)newSVOP(OP_CONST,0,
1298 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1299 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1300 force_next(token);
1301 }
1302 return s;
1303}
1304
ffb4593c
NT
1305/*
1306 * S_force_ident
9cbb5ea2 1307 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1308 * text only contains the "foo" portion. The first argument is a pointer
1309 * to the "foo", and the second argument is the type symbol to prefix.
1310 * Forces the next token to be a "WORD".
9cbb5ea2 1311 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1312 */
1313
76e3520e 1314STATIC void
bfed75c6 1315S_force_ident(pTHX_ register const char *s, int kind)
79072805 1316{
97aff369 1317 dVAR;
79072805 1318 if (s && *s) {
90e5519e
NC
1319 const STRLEN len = strlen(s);
1320 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
5db06880 1321 start_force(curforce);
9ded7720 1322 NEXTVAL_NEXTTOKE.opval = o;
79072805 1323 force_next(WORD);
748a9306 1324 if (kind) {
11343788 1325 o->op_private = OPpCONST_ENTERED;
55497cff 1326 /* XXX see note in pp_entereval() for why we forgo typo
1327 warnings if the symbol must be introduced in an eval.
1328 GSAR 96-10-12 */
90e5519e
NC
1329 gv_fetchpvn_flags(s, len,
1330 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1331 : GV_ADD,
1332 kind == '$' ? SVt_PV :
1333 kind == '@' ? SVt_PVAV :
1334 kind == '%' ? SVt_PVHV :
a0d0e21e 1335 SVt_PVGV
90e5519e 1336 );
748a9306 1337 }
79072805
LW
1338 }
1339}
1340
1571675a
GS
1341NV
1342Perl_str_to_version(pTHX_ SV *sv)
1343{
1344 NV retval = 0.0;
1345 NV nshift = 1.0;
1346 STRLEN len;
cfd0369c 1347 const char *start = SvPV_const(sv,len);
9d4ba2ae 1348 const char * const end = start + len;
504618e9 1349 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1350 while (start < end) {
ba210ebe 1351 STRLEN skip;
1571675a
GS
1352 UV n;
1353 if (utf)
9041c2e3 1354 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1355 else {
1356 n = *(U8*)start;
1357 skip = 1;
1358 }
1359 retval += ((NV)n)/nshift;
1360 start += skip;
1361 nshift *= 1000;
1362 }
1363 return retval;
1364}
1365
4e553d73 1366/*
ffb4593c
NT
1367 * S_force_version
1368 * Forces the next token to be a version number.
e759cc13
RGS
1369 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1370 * and if "guessing" is TRUE, then no new token is created (and the caller
1371 * must use an alternative parsing method).
ffb4593c
NT
1372 */
1373
76e3520e 1374STATIC char *
e759cc13 1375S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1376{
97aff369 1377 dVAR;
5f66b61c 1378 OP *version = NULL;
44dcb63b 1379 char *d;
5db06880
NC
1380#ifdef PERL_MAD
1381 I32 startoff = s - SvPVX(PL_linestr);
1382#endif
89bfa8cd 1383
29595ff2 1384 s = SKIPSPACE1(s);
89bfa8cd 1385
44dcb63b 1386 d = s;
dd629d5b 1387 if (*d == 'v')
44dcb63b 1388 d++;
44dcb63b 1389 if (isDIGIT(*d)) {
e759cc13
RGS
1390 while (isDIGIT(*d) || *d == '_' || *d == '.')
1391 d++;
5db06880
NC
1392#ifdef PERL_MAD
1393 if (PL_madskills) {
1394 start_force(curforce);
1395 curmad('X', newSVpvn(s,d-s));
1396 }
1397#endif
9f3d182e 1398 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1399 SV *ver;
b73d6f50 1400 s = scan_num(s, &yylval);
89bfa8cd 1401 version = yylval.opval;
dd629d5b
GS
1402 ver = cSVOPx(version)->op_sv;
1403 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1404 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1405 SvNV_set(ver, str_to_version(ver));
1571675a 1406 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1407 }
89bfa8cd 1408 }
5db06880
NC
1409 else if (guessing) {
1410#ifdef PERL_MAD
1411 if (PL_madskills) {
1412 sv_free(nextwhite); /* let next token collect whitespace */
1413 nextwhite = 0;
1414 s = SvPVX(PL_linestr) + startoff;
1415 }
1416#endif
e759cc13 1417 return s;
5db06880 1418 }
89bfa8cd 1419 }
1420
5db06880
NC
1421#ifdef PERL_MAD
1422 if (PL_madskills && !version) {
1423 sv_free(nextwhite); /* let next token collect whitespace */
1424 nextwhite = 0;
1425 s = SvPVX(PL_linestr) + startoff;
1426 }
1427#endif
89bfa8cd 1428 /* NOTE: The parser sees the package name and the VERSION swapped */
5db06880 1429 start_force(curforce);
9ded7720 1430 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1431 force_next(WORD);
89bfa8cd 1432
e759cc13 1433 return s;
89bfa8cd 1434}
1435
ffb4593c
NT
1436/*
1437 * S_tokeq
1438 * Tokenize a quoted string passed in as an SV. It finds the next
1439 * chunk, up to end of string or a backslash. It may make a new
1440 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1441 * turns \\ into \.
1442 */
1443
76e3520e 1444STATIC SV *
cea2e8a9 1445S_tokeq(pTHX_ SV *sv)
79072805 1446{
97aff369 1447 dVAR;
79072805
LW
1448 register char *s;
1449 register char *send;
1450 register char *d;
b3ac6de7
IZ
1451 STRLEN len = 0;
1452 SV *pv = sv;
79072805
LW
1453
1454 if (!SvLEN(sv))
b3ac6de7 1455 goto finish;
79072805 1456
a0d0e21e 1457 s = SvPV_force(sv, len);
21a311ee 1458 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1459 goto finish;
463ee0b2 1460 send = s + len;
79072805
LW
1461 while (s < send && *s != '\\')
1462 s++;
1463 if (s == send)
b3ac6de7 1464 goto finish;
79072805 1465 d = s;
be4731d2 1466 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1467 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1468 if (SvUTF8(sv))
1469 SvUTF8_on(pv);
1470 }
79072805
LW
1471 while (s < send) {
1472 if (*s == '\\') {
a0d0e21e 1473 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1474 s++; /* all that, just for this */
1475 }
1476 *d++ = *s++;
1477 }
1478 *d = '\0';
95a20fc0 1479 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1480 finish:
3280af22 1481 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1482 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1483 return sv;
1484}
1485
ffb4593c
NT
1486/*
1487 * Now come three functions related to double-quote context,
1488 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1489 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1490 * interact with PL_lex_state, and create fake ( ... ) argument lists
1491 * to handle functions and concatenation.
1492 * They assume that whoever calls them will be setting up a fake
1493 * join call, because each subthing puts a ',' after it. This lets
1494 * "lower \luPpEr"
1495 * become
1496 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1497 *
1498 * (I'm not sure whether the spurious commas at the end of lcfirst's
1499 * arguments and join's arguments are created or not).
1500 */
1501
1502/*
1503 * S_sublex_start
1504 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1505 *
1506 * Pattern matching will set PL_lex_op to the pattern-matching op to
1507 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1508 *
1509 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1510 *
1511 * Everything else becomes a FUNC.
1512 *
1513 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1514 * had an OP_CONST or OP_READLINE). This just sets us up for a
1515 * call to S_sublex_push().
1516 */
1517
76e3520e 1518STATIC I32
cea2e8a9 1519S_sublex_start(pTHX)
79072805 1520{
97aff369 1521 dVAR;
0d46e09a 1522 register const I32 op_type = yylval.ival;
79072805
LW
1523
1524 if (op_type == OP_NULL) {
3280af22 1525 yylval.opval = PL_lex_op;
5f66b61c 1526 PL_lex_op = NULL;
79072805
LW
1527 return THING;
1528 }
1529 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1530 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1531
1532 if (SvTYPE(sv) == SVt_PVIV) {
1533 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1534 STRLEN len;
96a5add6 1535 const char * const p = SvPV_const(sv, len);
f54cb97a 1536 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1537 if (SvUTF8(sv))
1538 SvUTF8_on(nsv);
b3ac6de7
IZ
1539 SvREFCNT_dec(sv);
1540 sv = nsv;
4e553d73 1541 }
b3ac6de7 1542 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1543 PL_lex_stuff = NULL;
6f33ba73
RGS
1544 /* Allow <FH> // "foo" */
1545 if (op_type == OP_READLINE)
1546 PL_expect = XTERMORDORDOR;
79072805
LW
1547 return THING;
1548 }
1549
3280af22
NIS
1550 PL_sublex_info.super_state = PL_lex_state;
1551 PL_sublex_info.sub_inwhat = op_type;
1552 PL_sublex_info.sub_op = PL_lex_op;
1553 PL_lex_state = LEX_INTERPPUSH;
55497cff 1554
3280af22
NIS
1555 PL_expect = XTERM;
1556 if (PL_lex_op) {
1557 yylval.opval = PL_lex_op;
5f66b61c 1558 PL_lex_op = NULL;
55497cff 1559 return PMFUNC;
1560 }
1561 else
1562 return FUNC;
1563}
1564
ffb4593c
NT
1565/*
1566 * S_sublex_push
1567 * Create a new scope to save the lexing state. The scope will be
1568 * ended in S_sublex_done. Returns a '(', starting the function arguments
1569 * to the uc, lc, etc. found before.
1570 * Sets PL_lex_state to LEX_INTERPCONCAT.
1571 */
1572
76e3520e 1573STATIC I32
cea2e8a9 1574S_sublex_push(pTHX)
55497cff 1575{
27da23d5 1576 dVAR;
f46d017c 1577 ENTER;
55497cff 1578
3280af22
NIS
1579 PL_lex_state = PL_sublex_info.super_state;
1580 SAVEI32(PL_lex_dojoin);
1581 SAVEI32(PL_lex_brackets);
3280af22
NIS
1582 SAVEI32(PL_lex_casemods);
1583 SAVEI32(PL_lex_starts);
1584 SAVEI32(PL_lex_state);
7766f137 1585 SAVEVPTR(PL_lex_inpat);
3280af22 1586 SAVEI32(PL_lex_inwhat);
57843af0 1587 SAVECOPLINE(PL_curcop);
3280af22 1588 SAVEPPTR(PL_bufptr);
8452ff4b 1589 SAVEPPTR(PL_bufend);
3280af22
NIS
1590 SAVEPPTR(PL_oldbufptr);
1591 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1592 SAVEPPTR(PL_last_lop);
1593 SAVEPPTR(PL_last_uni);
3280af22
NIS
1594 SAVEPPTR(PL_linestart);
1595 SAVESPTR(PL_linestr);
8edd5f42
RGS
1596 SAVEGENERICPV(PL_lex_brackstack);
1597 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1598
1599 PL_linestr = PL_lex_stuff;
a0714e2c 1600 PL_lex_stuff = NULL;
3280af22 1601
9cbb5ea2
GS
1602 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1603 = SvPVX(PL_linestr);
3280af22 1604 PL_bufend += SvCUR(PL_linestr);
bd61b366 1605 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1606 SAVEFREESV(PL_linestr);
1607
1608 PL_lex_dojoin = FALSE;
1609 PL_lex_brackets = 0;
a02a5408
JC
1610 Newx(PL_lex_brackstack, 120, char);
1611 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1612 PL_lex_casemods = 0;
1613 *PL_lex_casestack = '\0';
1614 PL_lex_starts = 0;
1615 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1616 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1617
1618 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1619 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1620 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1621 else
5f66b61c 1622 PL_lex_inpat = NULL;
79072805 1623
55497cff 1624 return '(';
79072805
LW
1625}
1626
ffb4593c
NT
1627/*
1628 * S_sublex_done
1629 * Restores lexer state after a S_sublex_push.
1630 */
1631
76e3520e 1632STATIC I32
cea2e8a9 1633S_sublex_done(pTHX)
79072805 1634{
27da23d5 1635 dVAR;
3280af22 1636 if (!PL_lex_starts++) {
396482e1 1637 SV * const sv = newSVpvs("");
9aa983d2
JH
1638 if (SvUTF8(PL_linestr))
1639 SvUTF8_on(sv);
3280af22 1640 PL_expect = XOPERATOR;
9aa983d2 1641 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1642 return THING;
1643 }
1644
3280af22
NIS
1645 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1646 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1647 return yylex();
79072805
LW
1648 }
1649
ffb4593c 1650 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1651 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1652 PL_linestr = PL_lex_repl;
1653 PL_lex_inpat = 0;
1654 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1655 PL_bufend += SvCUR(PL_linestr);
bd61b366 1656 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1657 SAVEFREESV(PL_linestr);
1658 PL_lex_dojoin = FALSE;
1659 PL_lex_brackets = 0;
3280af22
NIS
1660 PL_lex_casemods = 0;
1661 *PL_lex_casestack = '\0';
1662 PL_lex_starts = 0;
25da4f38 1663 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1664 PL_lex_state = LEX_INTERPNORMAL;
1665 PL_lex_starts++;
e9fa98b2
HS
1666 /* we don't clear PL_lex_repl here, so that we can check later
1667 whether this is an evalled subst; that means we rely on the
1668 logic to ensure sublex_done() is called again only via the
1669 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1670 }
e9fa98b2 1671 else {
3280af22 1672 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1673 PL_lex_repl = NULL;
e9fa98b2 1674 }
79072805 1675 return ',';
ffed7fef
LW
1676 }
1677 else {
5db06880
NC
1678#ifdef PERL_MAD
1679 if (PL_madskills) {
1680 if (thiswhite) {
1681 if (!endwhite)
1682 endwhite = newSVpvn("",0);
1683 sv_catsv(endwhite, thiswhite);
1684 thiswhite = 0;
1685 }
1686 if (thistoken)
1687 sv_setpvn(thistoken,"",0);
1688 else
1689 realtokenstart = -1;
1690 }
1691#endif
f46d017c 1692 LEAVE;
3280af22
NIS
1693 PL_bufend = SvPVX(PL_linestr);
1694 PL_bufend += SvCUR(PL_linestr);
1695 PL_expect = XOPERATOR;
09bef843 1696 PL_sublex_info.sub_inwhat = 0;
79072805 1697 return ')';
ffed7fef
LW
1698 }
1699}
1700
02aa26ce
NT
1701/*
1702 scan_const
1703
1704 Extracts a pattern, double-quoted string, or transliteration. This
1705 is terrifying code.
1706
3280af22
NIS
1707 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1708 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1709 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1710
9b599b2a
GS
1711 Returns a pointer to the character scanned up to. Iff this is
1712 advanced from the start pointer supplied (ie if anything was
1713 successfully parsed), will leave an OP for the substring scanned
1714 in yylval. Caller must intuit reason for not parsing further
1715 by looking at the next characters herself.
1716
02aa26ce
NT
1717 In patterns:
1718 backslashes:
1719 double-quoted style: \r and \n
1720 regexp special ones: \D \s
1721 constants: \x3
1722 backrefs: \1 (deprecated in substitution replacements)
1723 case and quoting: \U \Q \E
1724 stops on @ and $, but not for $ as tail anchor
1725
1726 In transliterations:
1727 characters are VERY literal, except for - not at the start or end
1728 of the string, which indicates a range. scan_const expands the
1729 range to the full set of intermediate characters.
1730
1731 In double-quoted strings:
1732 backslashes:
1733 double-quoted style: \r and \n
1734 constants: \x3
1735 backrefs: \1 (deprecated)
1736 case and quoting: \U \Q \E
1737 stops on @ and $
1738
1739 scan_const does *not* construct ops to handle interpolated strings.
1740 It stops processing as soon as it finds an embedded $ or @ variable
1741 and leaves it to the caller to work out what's going on.
1742
da6eedaa 1743 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1744
1745 $ in pattern could be $foo or could be tail anchor. Assumption:
1746 it's a tail anchor if $ is the last thing in the string, or if it's
1747 followed by one of ")| \n\t"
1748
1749 \1 (backreferences) are turned into $1
1750
1751 The structure of the code is
1752 while (there's a character to process) {
1753 handle transliteration ranges
1754 skip regexp comments
1755 skip # initiated comments in //x patterns
1756 check for embedded @foo
1757 check for embedded scalars
1758 if (backslash) {
1759 leave intact backslashes from leave (below)
1760 deprecate \1 in strings and sub replacements
1761 handle string-changing backslashes \l \U \Q \E, etc.
1762 switch (what was escaped) {
1763 handle - in a transliteration (becomes a literal -)
1764 handle \132 octal characters
1765 handle 0x15 hex characters
1766 handle \cV (control V)
1767 handle printf backslashes (\f, \r, \n, etc)
1768 } (end switch)
1769 } (end if backslash)
1770 } (end while character to read)
4e553d73 1771
02aa26ce
NT
1772*/
1773
76e3520e 1774STATIC char *
cea2e8a9 1775S_scan_const(pTHX_ char *start)
79072805 1776{
97aff369 1777 dVAR;
3280af22 1778 register char *send = PL_bufend; /* end of the constant */
561b68a9 1779 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1780 register char *s = start; /* start of the constant */
1781 register char *d = SvPVX(sv); /* destination for copies */
1782 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1783 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1784 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1785 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1786 UV uv;
4c3a8340
TS
1787#ifdef EBCDIC
1788 UV literal_endpoint = 0;
1789#endif
012bcf8d 1790
dff6d3cd 1791 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1792 PL_lex_inpat
b6d5fef8 1793 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1794 : "";
79072805 1795
2b9d42f0
NIS
1796 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1797 /* If we are doing a trans and we know we want UTF8 set expectation */
1798 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1799 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1800 }
1801
1802
79072805 1803 while (s < send || dorange) {
02aa26ce 1804 /* get transliterations out of the way (they're most literal) */
3280af22 1805 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1806 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1807 if (dorange) {
1ba5c669
JH
1808 I32 i; /* current expanded character */
1809 I32 min; /* first character in range */
1810 I32 max; /* last character in range */
02aa26ce 1811
2b9d42f0 1812 if (has_utf8) {
9d4ba2ae 1813 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1814 char *e = d++;
1815 while (e-- > c)
1816 *(e + 1) = *e;
25716404 1817 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1818 /* mark the range as done, and continue */
1819 dorange = FALSE;
1820 didrange = TRUE;
1821 continue;
1822 }
2b9d42f0 1823
95a20fc0 1824 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1825 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1826 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1827 d -= 2; /* eat the first char and the - */
1828
8ada0baa
JH
1829 min = (U8)*d; /* first char in range */
1830 max = (U8)d[1]; /* last char in range */
1831
c2e66d9e 1832 if (min > max) {
01ec43d0 1833 Perl_croak(aTHX_
d1573ac7 1834 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1835 (char)min, (char)max);
c2e66d9e
GS
1836 }
1837
c7f1f016 1838#ifdef EBCDIC
4c3a8340
TS
1839 if (literal_endpoint == 2 &&
1840 ((isLOWER(min) && isLOWER(max)) ||
1841 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1842 if (isLOWER(min)) {
1843 for (i = min; i <= max; i++)
1844 if (isLOWER(i))
db42d148 1845 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1846 } else {
1847 for (i = min; i <= max; i++)
1848 if (isUPPER(i))
db42d148 1849 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1850 }
1851 }
1852 else
1853#endif
1854 for (i = min; i <= max; i++)
eb160463 1855 *d++ = (char)i;
02aa26ce
NT
1856
1857 /* mark the range as done, and continue */
79072805 1858 dorange = FALSE;
01ec43d0 1859 didrange = TRUE;
4c3a8340
TS
1860#ifdef EBCDIC
1861 literal_endpoint = 0;
1862#endif
79072805 1863 continue;
4e553d73 1864 }
02aa26ce
NT
1865
1866 /* range begins (ignore - as first or last char) */
79072805 1867 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1868 if (didrange) {
1fafa243 1869 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1870 }
2b9d42f0 1871 if (has_utf8) {
25716404 1872 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1873 s++;
1874 continue;
1875 }
79072805
LW
1876 dorange = TRUE;
1877 s++;
01ec43d0
GS
1878 }
1879 else {
1880 didrange = FALSE;
4c3a8340
TS
1881#ifdef EBCDIC
1882 literal_endpoint = 0;
1883#endif
01ec43d0 1884 }
79072805 1885 }
02aa26ce
NT
1886
1887 /* if we get here, we're not doing a transliteration */
1888
0f5d15d6
IZ
1889 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1890 except for the last char, which will be done separately. */
3280af22 1891 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1892 if (s[2] == '#') {
e994fd66 1893 while (s+1 < send && *s != ')')
db42d148 1894 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1895 }
1896 else if (s[2] == '{' /* This should match regcomp.c */
1897 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1898 {
cc6b7395 1899 I32 count = 1;
0f5d15d6 1900 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1901 char c;
1902
d9f97599
GS
1903 while (count && (c = *regparse)) {
1904 if (c == '\\' && regparse[1])
1905 regparse++;
4e553d73 1906 else if (c == '{')
cc6b7395 1907 count++;
4e553d73 1908 else if (c == '}')
cc6b7395 1909 count--;
d9f97599 1910 regparse++;
cc6b7395 1911 }
e994fd66 1912 if (*regparse != ')')
5bdf89e7 1913 regparse--; /* Leave one char for continuation. */
0f5d15d6 1914 while (s < regparse)
db42d148 1915 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1916 }
748a9306 1917 }
02aa26ce
NT
1918
1919 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1920 else if (*s == '#' && PL_lex_inpat &&
1921 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1922 while (s+1 < send && *s != '\n')
db42d148 1923 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1924 }
02aa26ce 1925
5d1d4326 1926 /* check for embedded arrays
da6eedaa 1927 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1928 */
7e2040f0 1929 else if (*s == '@' && s[1]
5d1d4326 1930 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1931 break;
02aa26ce
NT
1932
1933 /* check for embedded scalars. only stop if we're sure it's a
1934 variable.
1935 */
79072805 1936 else if (*s == '$') {
3280af22 1937 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1938 break;
6002328a 1939 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1940 break; /* in regexp, $ might be tail anchor */
1941 }
02aa26ce 1942
2b9d42f0
NIS
1943 /* End of else if chain - OP_TRANS rejoin rest */
1944
02aa26ce 1945 /* backslashes */
79072805
LW
1946 if (*s == '\\' && s+1 < send) {
1947 s++;
02aa26ce
NT
1948
1949 /* some backslashes we leave behind */
c9f97d15 1950 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1951 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1952 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1953 continue;
1954 }
02aa26ce
NT
1955
1956 /* deprecate \1 in strings and substitution replacements */
3280af22 1957 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1958 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1959 {
599cee73 1960 if (ckWARN(WARN_SYNTAX))
9014280d 1961 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1962 *--s = '$';
1963 break;
1964 }
02aa26ce
NT
1965
1966 /* string-change backslash escapes */
3280af22 1967 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1968 --s;
1969 break;
1970 }
02aa26ce
NT
1971
1972 /* if we get here, it's either a quoted -, or a digit */
79072805 1973 switch (*s) {
02aa26ce
NT
1974
1975 /* quoted - in transliterations */
79072805 1976 case '-':
3280af22 1977 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1978 *d++ = *s++;
1979 continue;
1980 }
1981 /* FALL THROUGH */
1982 default:
11b8faa4 1983 {
041457d9
DM
1984 if (isALNUM(*s) &&
1985 *s != '_' &&
1986 ckWARN(WARN_MISC))
9014280d 1987 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1988 "Unrecognized escape \\%c passed through",
1989 *s);
1990 /* default action is to copy the quoted character */
f9a63242 1991 goto default_action;
11b8faa4 1992 }
02aa26ce
NT
1993
1994 /* \132 indicates an octal constant */
79072805
LW
1995 case '0': case '1': case '2': case '3':
1996 case '4': case '5': case '6': case '7':
ba210ebe 1997 {
53305cf1
NC
1998 I32 flags = 0;
1999 STRLEN len = 3;
2000 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2001 s += len;
2002 }
012bcf8d 2003 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2004
2005 /* \x24 indicates a hex constant */
79072805 2006 case 'x':
a0ed51b3
LW
2007 ++s;
2008 if (*s == '{') {
9d4ba2ae 2009 char* const e = strchr(s, '}');
a4c04bdc
NC
2010 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2011 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2012 STRLEN len;
355860ce 2013
53305cf1 2014 ++s;
adaeee49 2015 if (!e) {
a0ed51b3 2016 yyerror("Missing right brace on \\x{}");
355860ce 2017 continue;
ba210ebe 2018 }
53305cf1
NC
2019 len = e - s;
2020 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2021 s = e + 1;
a0ed51b3
LW
2022 }
2023 else {
ba210ebe 2024 {
53305cf1 2025 STRLEN len = 2;
a4c04bdc 2026 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2027 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2028 s += len;
2029 }
012bcf8d
GS
2030 }
2031
2032 NUM_ESCAPE_INSERT:
2033 /* Insert oct or hex escaped character.
301d3d20 2034 * There will always enough room in sv since such
db42d148 2035 * escapes will be longer than any UTF-8 sequence
301d3d20 2036 * they can end up as. */
ba7cea30 2037
c7f1f016
NIS
2038 /* We need to map to chars to ASCII before doing the tests
2039 to cover EBCDIC
2040 */
c4d5f83a 2041 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2042 if (!has_utf8 && uv > 255) {
301d3d20
JH
2043 /* Might need to recode whatever we have
2044 * accumulated so far if it contains any
2045 * hibit chars.
2046 *
2047 * (Can't we keep track of that and avoid
2048 * this rescan? --jhi)
012bcf8d 2049 */
c7f1f016 2050 int hicount = 0;
63cd0674
NIS
2051 U8 *c;
2052 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2053 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2054 hicount++;
db42d148 2055 }
012bcf8d 2056 }
63cd0674 2057 if (hicount) {
9d4ba2ae 2058 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2059 U8 *src, *dst;
2060 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2061 src = (U8 *)d - 1;
2062 dst = src+hicount;
2063 d += hicount;
cfd0369c 2064 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2065 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2066 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2067 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2068 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2069 }
2070 else {
63cd0674 2071 *dst-- = *src;
012bcf8d 2072 }
c7f1f016 2073 src--;
012bcf8d
GS
2074 }
2075 }
2076 }
2077
9aa983d2 2078 if (has_utf8 || uv > 255) {
9041c2e3 2079 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2080 has_utf8 = TRUE;
f9a63242
JH
2081 if (PL_lex_inwhat == OP_TRANS &&
2082 PL_sublex_info.sub_op) {
2083 PL_sublex_info.sub_op->op_private |=
2084 (PL_lex_repl ? OPpTRANS_FROM_UTF
2085 : OPpTRANS_TO_UTF);
f9a63242 2086 }
012bcf8d 2087 }
a0ed51b3 2088 else {
012bcf8d 2089 *d++ = (char)uv;
a0ed51b3 2090 }
012bcf8d
GS
2091 }
2092 else {
c4d5f83a 2093 *d++ = (char) uv;
a0ed51b3 2094 }
79072805 2095 continue;
02aa26ce 2096
b239daa5 2097 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2098 case 'N':
55eda711 2099 ++s;
423cee85
JH
2100 if (*s == '{') {
2101 char* e = strchr(s, '}');
155aba94 2102 SV *res;
423cee85 2103 STRLEN len;
cfd0369c 2104 const char *str;
4e553d73 2105
423cee85 2106 if (!e) {
5777a3f7 2107 yyerror("Missing right brace on \\N{}");
423cee85
JH
2108 e = s - 1;
2109 goto cont_scan;
2110 }
dbc0d4f2
JH
2111 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2112 /* \N{U+...} */
2113 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2114 PERL_SCAN_DISALLOW_PREFIX;
2115 s += 3;
2116 len = e - s;
2117 uv = grok_hex(s, &len, &flags, NULL);
2118 s = e + 1;
2119 goto NUM_ESCAPE_INSERT;
2120 }
55eda711 2121 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2122 res = new_constant( NULL, 0, "charnames",
a0714e2c 2123 res, NULL, "\\N{...}" );
f9a63242
JH
2124 if (has_utf8)
2125 sv_utf8_upgrade(res);
cfd0369c 2126 str = SvPV_const(res,len);
1c47067b
JH
2127#ifdef EBCDIC_NEVER_MIND
2128 /* charnames uses pack U and that has been
2129 * recently changed to do the below uni->native
2130 * mapping, so this would be redundant (and wrong,
2131 * the code point would be doubly converted).
2132 * But leave this in just in case the pack U change
2133 * gets revoked, but the semantics is still
2134 * desireable for charnames. --jhi */
cddc7ef4 2135 {
cfd0369c 2136 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2137
2138 if (uv < 0x100) {
89ebb4a3 2139 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2140
2141 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2142 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2143 str = SvPV_const(res, len);
cddc7ef4
JH
2144 }
2145 }
2146#endif
89491803 2147 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2148 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2149 SvCUR_set(sv, d - ostart);
2150 SvPOK_on(sv);
e4f3eed8 2151 *d = '\0';
f08d6ad9 2152 sv_utf8_upgrade(sv);
d2f449dd 2153 /* this just broke our allocation above... */
eb160463 2154 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2155 d = SvPVX(sv) + SvCUR(sv);
89491803 2156 has_utf8 = TRUE;
f08d6ad9 2157 }
eb160463 2158 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2159 const char * const odest = SvPVX_const(sv);
423cee85 2160
8973db79 2161 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2162 d = SvPVX(sv) + (d - odest);
2163 }
2164 Copy(str, d, len, char);
2165 d += len;
2166 SvREFCNT_dec(res);
2167 cont_scan:
2168 s = e + 1;
2169 }
2170 else
5777a3f7 2171 yyerror("Missing braces on \\N{}");
423cee85
JH
2172 continue;
2173
02aa26ce 2174 /* \c is a control character */
79072805
LW
2175 case 'c':
2176 s++;
961ce445 2177 if (s < send) {
ba210ebe 2178 U8 c = *s++;
c7f1f016
NIS
2179#ifdef EBCDIC
2180 if (isLOWER(c))
2181 c = toUPPER(c);
2182#endif
db42d148 2183 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2184 }
961ce445
RGS
2185 else {
2186 yyerror("Missing control char name in \\c");
2187 }
79072805 2188 continue;
02aa26ce
NT
2189
2190 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2191 case 'b':
db42d148 2192 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2193 break;
2194 case 'n':
db42d148 2195 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2196 break;
2197 case 'r':
db42d148 2198 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2199 break;
2200 case 'f':
db42d148 2201 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2202 break;
2203 case 't':
db42d148 2204 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2205 break;
34a3fe2a 2206 case 'e':
db42d148 2207 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2208 break;
2209 case 'a':
db42d148 2210 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2211 break;
02aa26ce
NT
2212 } /* end switch */
2213
79072805
LW
2214 s++;
2215 continue;
02aa26ce 2216 } /* end if (backslash) */
4c3a8340
TS
2217#ifdef EBCDIC
2218 else
2219 literal_endpoint++;
2220#endif
02aa26ce 2221
f9a63242 2222 default_action:
2b9d42f0
NIS
2223 /* If we started with encoded form, or already know we want it
2224 and then encode the next character */
2225 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2226 STRLEN len = 1;
5f66b61c
AL
2227 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2228 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2229 s += len;
2230 if (need > len) {
2231 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2232 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2233 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2234 }
5f66b61c 2235 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2236 has_utf8 = TRUE;
2237 }
2238 else {
2239 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2240 }
02aa26ce
NT
2241 } /* while loop to process each character */
2242
2243 /* terminate the string and set up the sv */
79072805 2244 *d = '\0';
95a20fc0 2245 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2246 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2247 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2248
79072805 2249 SvPOK_on(sv);
9f4817db 2250 if (PL_encoding && !has_utf8) {
d0063567
DK
2251 sv_recode_to_utf8(sv, PL_encoding);
2252 if (SvUTF8(sv))
2253 has_utf8 = TRUE;
9f4817db 2254 }
2b9d42f0 2255 if (has_utf8) {
7e2040f0 2256 SvUTF8_on(sv);
2b9d42f0 2257 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2258 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2259 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2260 }
2261 }
79072805 2262
02aa26ce 2263 /* shrink the sv if we allocated more than we used */
79072805 2264 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2265 SvPV_shrink_to_cur(sv);
79072805 2266 }
02aa26ce 2267
9b599b2a 2268 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2269 if (s > PL_bufptr) {
2270 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2271 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2272 sv, NULL,
4e553d73 2273 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2274 ? "tr"
3280af22 2275 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2276 ? "s"
2277 : "qq")));
79072805 2278 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2279 } else
8990e307 2280 SvREFCNT_dec(sv);
79072805
LW
2281 return s;
2282}
2283
ffb4593c
NT
2284/* S_intuit_more
2285 * Returns TRUE if there's more to the expression (e.g., a subscript),
2286 * FALSE otherwise.
ffb4593c
NT
2287 *
2288 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2289 *
2290 * ->[ and ->{ return TRUE
2291 * { and [ outside a pattern are always subscripts, so return TRUE
2292 * if we're outside a pattern and it's not { or [, then return FALSE
2293 * if we're in a pattern and the first char is a {
2294 * {4,5} (any digits around the comma) returns FALSE
2295 * if we're in a pattern and the first char is a [
2296 * [] returns FALSE
2297 * [SOMETHING] has a funky algorithm to decide whether it's a
2298 * character class or not. It has to deal with things like
2299 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2300 * anything else returns TRUE
2301 */
2302
9cbb5ea2
GS
2303/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2304
76e3520e 2305STATIC int
cea2e8a9 2306S_intuit_more(pTHX_ register char *s)
79072805 2307{
97aff369 2308 dVAR;
3280af22 2309 if (PL_lex_brackets)
79072805
LW
2310 return TRUE;
2311 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2312 return TRUE;
2313 if (*s != '{' && *s != '[')
2314 return FALSE;
3280af22 2315 if (!PL_lex_inpat)
79072805
LW
2316 return TRUE;
2317
2318 /* In a pattern, so maybe we have {n,m}. */
2319 if (*s == '{') {
2320 s++;
2321 if (!isDIGIT(*s))
2322 return TRUE;
2323 while (isDIGIT(*s))
2324 s++;
2325 if (*s == ',')
2326 s++;
2327 while (isDIGIT(*s))
2328 s++;
2329 if (*s == '}')
2330 return FALSE;
2331 return TRUE;
2332
2333 }
2334
2335 /* On the other hand, maybe we have a character class */
2336
2337 s++;
2338 if (*s == ']' || *s == '^')
2339 return FALSE;
2340 else {
ffb4593c 2341 /* this is terrifying, and it works */
79072805
LW
2342 int weight = 2; /* let's weigh the evidence */
2343 char seen[256];
f27ffc4a 2344 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2345 const char * const send = strchr(s,']');
3280af22 2346 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2347
2348 if (!send) /* has to be an expression */
2349 return TRUE;
2350
2351 Zero(seen,256,char);
2352 if (*s == '$')
2353 weight -= 3;
2354 else if (isDIGIT(*s)) {
2355 if (s[1] != ']') {
2356 if (isDIGIT(s[1]) && s[2] == ']')
2357 weight -= 10;
2358 }
2359 else
2360 weight -= 100;
2361 }
2362 for (; s < send; s++) {
2363 last_un_char = un_char;
2364 un_char = (unsigned char)*s;
2365 switch (*s) {
2366 case '@':
2367 case '&':
2368 case '$':
2369 weight -= seen[un_char] * 10;
7e2040f0 2370 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2371 int len;
8903cb82 2372 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2373 len = (int)strlen(tmpbuf);
2374 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2375 weight -= 100;
2376 else
2377 weight -= 10;
2378 }
2379 else if (*s == '$' && s[1] &&
93a17b20
LW
2380 strchr("[#!%*<>()-=",s[1])) {
2381 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2382 weight -= 10;
2383 else
2384 weight -= 1;
2385 }
2386 break;
2387 case '\\':
2388 un_char = 254;
2389 if (s[1]) {
93a17b20 2390 if (strchr("wds]",s[1]))
79072805
LW
2391 weight += 100;
2392 else if (seen['\''] || seen['"'])
2393 weight += 1;
93a17b20 2394 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2395 weight += 40;
2396 else if (isDIGIT(s[1])) {
2397 weight += 40;
2398 while (s[1] && isDIGIT(s[1]))
2399 s++;
2400 }
2401 }
2402 else
2403 weight += 100;
2404 break;
2405 case '-':
2406 if (s[1] == '\\')
2407 weight += 50;
93a17b20 2408 if (strchr("aA01! ",last_un_char))
79072805 2409 weight += 30;
93a17b20 2410 if (strchr("zZ79~",s[1]))
79072805 2411 weight += 30;
f27ffc4a
GS
2412 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2413 weight -= 5; /* cope with negative subscript */
79072805
LW
2414 break;
2415 default:
3792a11b
NC
2416 if (!isALNUM(last_un_char)
2417 && !(last_un_char == '$' || last_un_char == '@'
2418 || last_un_char == '&')
2419 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2420 char *d = tmpbuf;
2421 while (isALPHA(*s))
2422 *d++ = *s++;
2423 *d = '\0';
2424 if (keyword(tmpbuf, d - tmpbuf))
2425 weight -= 150;
2426 }
2427 if (un_char == last_un_char + 1)
2428 weight += 5;
2429 weight -= seen[un_char];
2430 break;
2431 }
2432 seen[un_char]++;
2433 }
2434 if (weight >= 0) /* probably a character class */
2435 return FALSE;
2436 }
2437
2438 return TRUE;
2439}
ffed7fef 2440
ffb4593c
NT
2441/*
2442 * S_intuit_method
2443 *
2444 * Does all the checking to disambiguate
2445 * foo bar
2446 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2447 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2448 *
2449 * First argument is the stuff after the first token, e.g. "bar".
2450 *
2451 * Not a method if bar is a filehandle.
2452 * Not a method if foo is a subroutine prototyped to take a filehandle.
2453 * Not a method if it's really "Foo $bar"
2454 * Method if it's "foo $bar"
2455 * Not a method if it's really "print foo $bar"
2456 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2457 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2458 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2459 * =>
2460 */
2461
76e3520e 2462STATIC int
62d55b22 2463S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2464{
97aff369 2465 dVAR;
a0d0e21e 2466 char *s = start + (*start == '$');
3280af22 2467 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2468 STRLEN len;
2469 GV* indirgv;
5db06880
NC
2470#ifdef PERL_MAD
2471 int soff;
2472#endif
a0d0e21e
LW
2473
2474 if (gv) {
62d55b22 2475 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2476 return 0;
62d55b22
NC
2477 if (cv) {
2478 if (SvPOK(cv)) {
2479 const char *proto = SvPVX_const(cv);
2480 if (proto) {
2481 if (*proto == ';')
2482 proto++;
2483 if (*proto == '*')
2484 return 0;
2485 }
b6c543e3
IZ
2486 }
2487 } else
a0d0e21e
LW
2488 gv = 0;
2489 }
8903cb82 2490 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2491 /* start is the beginning of the possible filehandle/object,
2492 * and s is the end of it
2493 * tmpbuf is a copy of it
2494 */
2495
a0d0e21e 2496 if (*start == '$') {
3280af22 2497 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2498 return 0;
5db06880
NC
2499#ifdef PERL_MAD
2500 len = start - SvPVX(PL_linestr);
2501#endif
29595ff2 2502 s = PEEKSPACE(s);
5db06880
NC
2503#ifdef PERLMAD
2504 start = SvPVX(PL_linestr) + len;
2505#endif
3280af22
NIS
2506 PL_bufptr = start;
2507 PL_expect = XREF;
a0d0e21e
LW
2508 return *s == '(' ? FUNCMETH : METHOD;
2509 }
2510 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2511 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2512 len -= 2;
2513 tmpbuf[len] = '\0';
5db06880
NC
2514#ifdef PERL_MAD
2515 soff = s - SvPVX(PL_linestr);
2516#endif
c3e0f903
GS
2517 goto bare_package;
2518 }
90e5519e 2519 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2520 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2521 return 0;
2522 /* filehandle or package name makes it a method */
89bfa8cd 2523 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2524#ifdef PERL_MAD
2525 soff = s - SvPVX(PL_linestr);
2526#endif
29595ff2 2527 s = PEEKSPACE(s);
3280af22 2528 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2529 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2530 bare_package:
5db06880 2531 start_force(curforce);
9ded7720 2532 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2533 newSVpvn(tmpbuf,len));
9ded7720 2534 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2535 if (PL_madskills)
2536 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2537 PL_expect = XTERM;
a0d0e21e 2538 force_next(WORD);
3280af22 2539 PL_bufptr = s;
5db06880
NC
2540#ifdef PERL_MAD
2541 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2542#endif
a0d0e21e
LW
2543 return *s == '(' ? FUNCMETH : METHOD;
2544 }
2545 }
2546 return 0;
2547}
2548
ffb4593c
NT
2549/*
2550 * S_incl_perldb
2551 * Return a string of Perl code to load the debugger. If PERL5DB
2552 * is set, it will return the contents of that, otherwise a
2553 * compile-time require of perl5db.pl.
2554 */
2555
bfed75c6 2556STATIC const char*
cea2e8a9 2557S_incl_perldb(pTHX)
a0d0e21e 2558{
97aff369 2559 dVAR;
3280af22 2560 if (PL_perldb) {
9d4ba2ae 2561 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2562
2563 if (pdb)
2564 return pdb;
93189314 2565 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2566 return "BEGIN { require 'perl5db.pl' }";
2567 }
2568 return "";
2569}
2570
2571
16d20bd9 2572/* Encoded script support. filter_add() effectively inserts a
4e553d73 2573 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2574 * Note that the filter function only applies to the current source file
2575 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2576 *
2577 * The datasv parameter (which may be NULL) can be used to pass
2578 * private data to this instance of the filter. The filter function
2579 * can recover the SV using the FILTER_DATA macro and use it to
2580 * store private buffers and state information.
2581 *
2582 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2583 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2584 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2585 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2586 * private use must be set using malloc'd pointers.
2587 */
16d20bd9
AD
2588
2589SV *
864dbfa3 2590Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2591{
97aff369 2592 dVAR;
f4c556ac 2593 if (!funcp)
a0714e2c 2594 return NULL;
f4c556ac 2595
3280af22
NIS
2596 if (!PL_rsfp_filters)
2597 PL_rsfp_filters = newAV();
16d20bd9 2598 if (!datasv)
561b68a9 2599 datasv = newSV(0);
862a34c6 2600 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2601 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2602 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2603 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2604 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2605 av_unshift(PL_rsfp_filters, 1);
2606 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2607 return(datasv);
2608}
4e553d73 2609
16d20bd9
AD
2610
2611/* Delete most recently added instance of this filter function. */
a0d0e21e 2612void
864dbfa3 2613Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2614{
97aff369 2615 dVAR;
e0c19803 2616 SV *datasv;
24801a4b 2617
33073adb 2618#ifdef DEBUGGING
8141890a 2619 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2620#endif
3280af22 2621 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2622 return;
2623 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2624 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2625 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2626 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2627 IoANY(datasv) = (void *)NULL;
3280af22 2628 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2629
16d20bd9
AD
2630 return;
2631 }
2632 /* we need to search for the correct entry and clear it */
cea2e8a9 2633 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2634}
2635
2636
1de9afcd
RGS
2637/* Invoke the idxth filter function for the current rsfp. */
2638/* maxlen 0 = read one text line */
16d20bd9 2639I32
864dbfa3 2640Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2641{
97aff369 2642 dVAR;
16d20bd9
AD
2643 filter_t funcp;
2644 SV *datasv = NULL;
e50aee73 2645
3280af22 2646 if (!PL_rsfp_filters)
16d20bd9 2647 return -1;
1de9afcd 2648 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2649 /* Provide a default input filter to make life easy. */
2650 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2651 DEBUG_P(PerlIO_printf(Perl_debug_log,
2652 "filter_read %d: from rsfp\n", idx));
4e553d73 2653 if (maxlen) {
16d20bd9
AD
2654 /* Want a block */
2655 int len ;
f54cb97a 2656 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2657
2658 /* ensure buf_sv is large enough */
eb160463 2659 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2660 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2661 if (PerlIO_error(PL_rsfp))
37120919
AD
2662 return -1; /* error */
2663 else
2664 return 0 ; /* end of file */
2665 }
16d20bd9
AD
2666 SvCUR_set(buf_sv, old_len + len) ;
2667 } else {
2668 /* Want a line */
3280af22
NIS
2669 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2670 if (PerlIO_error(PL_rsfp))
37120919
AD
2671 return -1; /* error */
2672 else
2673 return 0 ; /* end of file */
2674 }
16d20bd9
AD
2675 }
2676 return SvCUR(buf_sv);
2677 }
2678 /* Skip this filter slot if filter has been deleted */
1de9afcd 2679 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2680 DEBUG_P(PerlIO_printf(Perl_debug_log,
2681 "filter_read %d: skipped (filter deleted)\n",
2682 idx));
16d20bd9
AD
2683 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2684 }
2685 /* Get function pointer hidden within datasv */
8141890a 2686 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2687 DEBUG_P(PerlIO_printf(Perl_debug_log,
2688 "filter_read %d: via function %p (%s)\n",
cfd0369c 2689 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2690 /* Call function. The function is expected to */
2691 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2692 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2693 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2694}
2695
76e3520e 2696STATIC char *
cea2e8a9 2697S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2698{
97aff369 2699 dVAR;
c39cd008 2700#ifdef PERL_CR_FILTER
3280af22 2701 if (!PL_rsfp_filters) {
c39cd008 2702 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2703 }
2704#endif
3280af22 2705 if (PL_rsfp_filters) {
55497cff 2706 if (!append)
2707 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2708 if (FILTER_READ(0, sv, 0) > 0)
2709 return ( SvPVX(sv) ) ;
2710 else
bd61b366 2711 return NULL ;
16d20bd9 2712 }
9d116dd7 2713 else
fd049845 2714 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2715}
2716
01ec43d0 2717STATIC HV *
7fc63493 2718S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2719{
97aff369 2720 dVAR;
def3634b
GS
2721 GV *gv;
2722
01ec43d0 2723 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2724 return PL_curstash;
2725
2726 if (len > 2 &&
2727 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2728 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2729 {
2730 return GvHV(gv); /* Foo:: */
def3634b
GS
2731 }
2732
2733 /* use constant CLASS => 'MyClass' */
90e5519e 2734 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2735 SV *sv;
2736 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2737 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2738 }
2739 }
2740
2741 return gv_stashpv(pkgname, FALSE);
2742}
a0d0e21e 2743
5db06880
NC
2744#ifdef PERL_MAD
2745 /*
2746 * Perl_madlex
2747 * The intent of this yylex wrapper is to minimize the changes to the
2748 * tokener when we aren't interested in collecting madprops. It remains
2749 * to be seen how successful this strategy will be...
2750 */
2751
2752int
2753Perl_madlex(pTHX)
2754{
2755 int optype;
2756 char *s = PL_bufptr;
2757
2758 /* make sure thiswhite is initialized */
2759 thiswhite = 0;
2760 thismad = 0;
2761
2762 /* just do what yylex would do on pending identifier; leave thiswhite alone */
2763 if (PL_pending_ident)
2764 return S_pending_ident(aTHX);
2765
2766 /* previous token ate up our whitespace? */
2767 if (!PL_lasttoke && nextwhite) {
2768 thiswhite = nextwhite;
2769 nextwhite = 0;
2770 }
2771
2772 /* isolate the token, and figure out where it is without whitespace */
2773 realtokenstart = -1;
2774 thistoken = 0;
2775 optype = yylex();
2776 s = PL_bufptr;
2777 assert(curforce < 0);
2778
2779 if (!thismad || thismad->mad_key == '^') { /* not forced already? */
2780 if (!thistoken) {
2781 if (realtokenstart < 0 || !CopLINE(PL_curcop))
2782 thistoken = newSVpvn("",0);
2783 else {
2784 char *tstart = SvPVX(PL_linestr) + realtokenstart;
2785 thistoken = newSVpvn(tstart, s - tstart);
2786 }
2787 }
2788 if (thismad) /* install head */
2789 CURMAD('X', thistoken);
2790 }
2791
2792 /* last whitespace of a sublex? */
2793 if (optype == ')' && endwhite) {
2794 CURMAD('X', endwhite);
2795 }
2796
2797 if (!thismad) {
2798
2799 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2800 if (!thiswhite && !endwhite && !optype) {
2801 sv_free(thistoken);
2802 thistoken = 0;
2803 return 0;
2804 }
2805
2806 /* put off final whitespace till peg */
2807 if (optype == ';' && !PL_rsfp) {
2808 nextwhite = thiswhite;
2809 thiswhite = 0;
2810 }
2811 else if (thisopen) {
2812 CURMAD('q', thisopen);
2813 if (thistoken)
2814 sv_free(thistoken);
2815 thistoken = 0;
2816 }
2817 else {
2818 /* Store actual token text as madprop X */
2819 CURMAD('X', thistoken);
2820 }
2821
2822 if (thiswhite) {
2823 /* add preceding whitespace as madprop _ */
2824 CURMAD('_', thiswhite);
2825 }
2826
2827 if (thisstuff) {
2828 /* add quoted material as madprop = */
2829 CURMAD('=', thisstuff);
2830 }
2831
2832 if (thisclose) {
2833 /* add terminating quote as madprop Q */
2834 CURMAD('Q', thisclose);
2835 }
2836 }
2837
2838 /* special processing based on optype */
2839
2840 switch (optype) {
2841
2842 /* opval doesn't need a TOKEN since it can already store mp */
2843 case WORD:
2844 case METHOD:
2845 case FUNCMETH:
2846 case THING:
2847 case PMFUNC:
2848 case PRIVATEREF:
2849 case FUNC0SUB:
2850 case UNIOPSUB:
2851 case LSTOPSUB:
2852 if (yylval.opval)
2853 append_madprops(thismad, yylval.opval, 0);
2854 thismad = 0;
2855 return optype;
2856
2857 /* fake EOF */
2858 case 0:
2859 optype = PEG;
2860 if (endwhite) {
2861 addmad(newMADsv('p', endwhite), &thismad, 0);
2862 endwhite = 0;
2863 }
2864 break;
2865
2866 case ']':
2867 case '}':
2868 if (faketokens)
2869 break;
2870 /* remember any fake bracket that lexer is about to discard */
2871 if (PL_lex_brackets == 1 &&
2872 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2873 {
2874 s = PL_bufptr;
2875 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2876 s++;
2877 if (*s == '}') {
2878 thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2879 addmad(newMADsv('#', thiswhite), &thismad, 0);
2880 thiswhite = 0;
2881 PL_bufptr = s - 1;
2882 break; /* don't bother looking for trailing comment */
2883 }
2884 else
2885 s = PL_bufptr;
2886 }
2887 if (optype == ']')
2888 break;
2889 /* FALLTHROUGH */
2890
2891 /* attach a trailing comment to its statement instead of next token */
2892 case ';':
2893 if (faketokens)
2894 break;
2895 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2896 s = PL_bufptr;
2897 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2898 s++;
2899 if (*s == '\n' || *s == '#') {
2900 while (s < PL_bufend && *s != '\n')
2901 s++;
2902 if (s < PL_bufend)
2903 s++;
2904 thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2905 addmad(newMADsv('#', thiswhite), &thismad, 0);
2906 thiswhite = 0;
2907 PL_bufptr = s;
2908 }
2909 }
2910 break;
2911
2912 /* pval */
2913 case LABEL:
2914 break;
2915
2916 /* ival */
2917 default:
2918 break;
2919
2920 }
2921
2922 /* Create new token struct. Note: opvals return early above. */
2923 yylval.tkval = newTOKEN(optype, yylval, thismad);
2924 thismad = 0;
2925 return optype;
2926}
2927#endif
2928
468aa647 2929STATIC char *
cc6ed77d 2930S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2931 dVAR;
468aa647
RGS
2932 if (PL_expect != XSTATE)
2933 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2934 is_use ? "use" : "no"));
29595ff2 2935 s = SKIPSPACE1(s);
468aa647
RGS
2936 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2937 s = force_version(s, TRUE);
29595ff2 2938 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
5db06880 2939 start_force(curforce);
9ded7720 2940 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2941 force_next(WORD);
2942 }
2943 else if (*s == 'v') {
2944 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2945 s = force_version(s, FALSE);
2946 }
2947 }
2948 else {
2949 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2950 s = force_version(s, FALSE);
2951 }
2952 yylval.ival = is_use;
2953 return s;
2954}
748a9306 2955#ifdef DEBUGGING
27da23d5 2956 static const char* const exp_name[] =
09bef843 2957 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2958 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2959 };
748a9306 2960#endif
463ee0b2 2961
02aa26ce
NT
2962/*
2963 yylex
2964
2965 Works out what to call the token just pulled out of the input
2966 stream. The yacc parser takes care of taking the ops we return and
2967 stitching them into a tree.
2968
2969 Returns:
2970 PRIVATEREF
2971
2972 Structure:
2973 if read an identifier
2974 if we're in a my declaration
2975 croak if they tried to say my($foo::bar)
2976 build the ops for a my() declaration
2977 if it's an access to a my() variable
2978 are we in a sort block?
2979 croak if my($a); $a <=> $b
2980 build ops for access to a my() variable
2981 if in a dq string, and they've said @foo and we can't find @foo
2982 croak
2983 build ops for a bareword
2984 if we already built the token before, use it.
2985*/
2986
20141f0e 2987
dba4d153
JH
2988#ifdef __SC__
2989#pragma segment Perl_yylex
2990#endif
dba4d153 2991int
dba4d153 2992Perl_yylex(pTHX)
20141f0e 2993{
97aff369 2994 dVAR;
3afc138a 2995 register char *s = PL_bufptr;
378cc40b 2996 register char *d;
463ee0b2 2997 STRLEN len;
aa7440fb 2998 bool bof = FALSE;
a687059c 2999
bbf60fe6 3000 DEBUG_T( {
396482e1 3001 SV* tmp = newSVpvs("");
b6007c36
DM
3002 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3003 (IV)CopLINE(PL_curcop),
3004 lex_state_names[PL_lex_state],
3005 exp_name[PL_expect],
3006 pv_display(tmp, s, strlen(s), 0, 60));
3007 SvREFCNT_dec(tmp);
bbf60fe6 3008 } );
02aa26ce 3009 /* check if there's an identifier for us to look at */
ba979b31 3010 if (PL_pending_ident)
bbf60fe6 3011 return REPORT(S_pending_ident(aTHX));
bbce6d69 3012
02aa26ce
NT
3013 /* no identifier pending identification */
3014
3280af22 3015 switch (PL_lex_state) {
79072805
LW
3016#ifdef COMMENTARY
3017 case LEX_NORMAL: /* Some compilers will produce faster */
3018 case LEX_INTERPNORMAL: /* code if we comment these out. */
3019 break;
3020#endif
3021
09bef843 3022 /* when we've already built the next token, just pull it out of the queue */
79072805 3023 case LEX_KNOWNEXT:
5db06880
NC
3024#ifdef PERL_MAD
3025 PL_lasttoke--;
3026 yylval = PL_nexttoke[PL_lasttoke].next_val;
3027 if (PL_madskills) {
3028 thismad = PL_nexttoke[PL_lasttoke].next_mad;
3029 PL_nexttoke[PL_lasttoke].next_mad = 0;
3030 if (thismad && thismad->mad_key == '_') {
3031 thiswhite = (SV*)thismad->mad_val;
3032 thismad->mad_val = 0;
3033 mad_free(thismad);
3034 thismad = 0;
3035 }
3036 }
3037 if (!PL_lasttoke) {
3038 PL_lex_state = PL_lex_defer;
3039 PL_expect = PL_lex_expect;
3040 PL_lex_defer = LEX_NORMAL;
3041 if (!PL_nexttoke[PL_lasttoke].next_type)
3042 return yylex();
3043 }
3044#else
3280af22 3045 PL_nexttoke--;
5db06880 3046 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3047 if (!PL_nexttoke) {
3048 PL_lex_state = PL_lex_defer;
3049 PL_expect = PL_lex_expect;
3050 PL_lex_defer = LEX_NORMAL;
463ee0b2 3051 }
5db06880
NC
3052#endif
3053#ifdef PERL_MAD
3054 /* FIXME - can these be merged? */
3055 return(PL_nexttoke[PL_lasttoke].next_type);
3056#else
bbf60fe6 3057 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3058#endif
79072805 3059
02aa26ce 3060 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3061 when we get here, PL_bufptr is at the \
02aa26ce 3062 */
79072805
LW
3063 case LEX_INTERPCASEMOD:
3064#ifdef DEBUGGING
3280af22 3065 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3066 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3067#endif
02aa26ce 3068 /* handle \E or end of string */
3280af22 3069 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3070 /* if at a \E */
3280af22 3071 if (PL_lex_casemods) {
f54cb97a 3072 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3073 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3074
3792a11b
NC
3075 if (PL_bufptr != PL_bufend
3076 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3077 PL_bufptr += 2;
3078 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3079#ifdef PERL_MAD
3080 if (PL_madskills)
3081 thistoken = newSVpvn("\\E",2);
3082#endif
a0d0e21e 3083 }
bbf60fe6 3084 return REPORT(')');
79072805 3085 }
5db06880
NC
3086#ifdef PERL_MAD
3087 while (PL_bufptr != PL_bufend &&
3088 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3089 if (!thiswhite)
3090 thiswhite = newSVpvn("",0);
3091 sv_catpvn(thiswhite, PL_bufptr, 2);
3092 PL_bufptr += 2;
3093 }
3094#else
3280af22
NIS
3095 if (PL_bufptr != PL_bufend)
3096 PL_bufptr += 2;
5db06880 3097#endif
3280af22 3098 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3099 return yylex();
79072805
LW
3100 }
3101 else {
607df283 3102 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3103 "### Saw case modifier\n"); });
3280af22 3104 s = PL_bufptr + 1;
6e909404 3105 if (s[1] == '\\' && s[2] == 'E') {
5db06880
NC
3106#ifdef PERL_MAD
3107 if (!thiswhite)
3108 thiswhite = newSVpvn("",0);
3109 sv_catpvn(thiswhite, PL_bufptr, 4);
3110#endif
89122651 3111 PL_bufptr = s + 3;
6e909404
JH
3112 PL_lex_state = LEX_INTERPCONCAT;
3113 return yylex();
a0d0e21e 3114 }
6e909404 3115 else {
90771dc0 3116 I32 tmp;
5db06880
NC
3117 if (!PL_madskills) /* when just compiling don't need correct */
3118 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3119 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3120 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3121 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3122 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3123 return REPORT(')');
6e909404
JH
3124 }
3125 if (PL_lex_casemods > 10)
3126 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3127 PL_lex_casestack[PL_lex_casemods++] = *s;
3128 PL_lex_casestack[PL_lex_casemods] = '\0';
3129 PL_lex_state = LEX_INTERPCONCAT;
5db06880 3130 start_force(curforce);
9ded7720 3131 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3132 force_next('(');
5db06880 3133 start_force(curforce);
6e909404 3134 if (*s == 'l')
9ded7720 3135 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3136 else if (*s == 'u')
9ded7720 3137 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3138 else if (*s == 'L')
9ded7720 3139 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3140 else if (*s == 'U')
9ded7720 3141 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3142 else if (*s == 'Q')
9ded7720 3143 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3144 else
3145 Perl_croak(aTHX_ "panic: yylex");
5db06880
NC
3146 if (PL_madskills) {
3147 SV* tmpsv = newSVpvn("",0);
3148 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3149 curmad('_', tmpsv);
3150 }
6e909404 3151 PL_bufptr = s + 1;
a0d0e21e 3152 }
79072805 3153 force_next(FUNC);
3280af22
NIS
3154 if (PL_lex_starts) {
3155 s = PL_bufptr;
3156 PL_lex_starts = 0;
5db06880
NC
3157#ifdef PERL_MAD
3158 if (PL_madskills) {
3159 if (thistoken)
3160 sv_free(thistoken);
3161 thistoken = newSVpvn("",0);
3162 }
3163#endif
131b3ad0
DM
3164 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3165 if (PL_lex_casemods == 1 && PL_lex_inpat)
3166 OPERATOR(',');
3167 else
3168 Aop(OP_CONCAT);
79072805
LW
3169 }
3170 else
cea2e8a9 3171 return yylex();
79072805
LW
3172 }
3173
55497cff 3174 case LEX_INTERPPUSH:
bbf60fe6 3175 return REPORT(sublex_push());
55497cff 3176
79072805 3177 case LEX_INTERPSTART:
3280af22 3178 if (PL_bufptr == PL_bufend)
bbf60fe6 3179 return REPORT(sublex_done());
607df283 3180 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3181 "### Interpolated variable\n"); });
3280af22
NIS
3182 PL_expect = XTERM;
3183 PL_lex_dojoin = (*PL_bufptr == '@');
3184 PL_lex_state = LEX_INTERPNORMAL;
3185 if (PL_lex_dojoin) {
5db06880 3186 start_force(curforce);
9ded7720 3187 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3188 force_next(',');
5db06880 3189 start_force(curforce);
a0d0e21e 3190 force_ident("\"", '$');
5db06880 3191 start_force(curforce);
9ded7720 3192 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3193 force_next('$');
5db06880 3194 start_force(curforce);
9ded7720 3195 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3196 force_next('(');
5db06880 3197 start_force(curforce);
9ded7720 3198 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3199 force_next(FUNC);
3200 }
3280af22
NIS
3201 if (PL_lex_starts++) {
3202 s = PL_bufptr;
5db06880
NC
3203#ifdef PERL_MAD
3204 if (PL_madskills) {
3205 if (thistoken)
3206 sv_free(thistoken);
3207 thistoken = newSVpvn("",0);
3208 }
3209#endif
131b3ad0
DM
3210 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3211 if (!PL_lex_casemods && PL_lex_inpat)
3212 OPERATOR(',');
3213 else
3214 Aop(OP_CONCAT);
79072805 3215 }
cea2e8a9 3216 return yylex();
79072805
LW
3217
3218 case LEX_INTERPENDMAYBE:
3280af22
NIS
3219 if (intuit_more(PL_bufptr)) {
3220 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3221 break;
3222 }
3223 /* FALL THROUGH */
3224
3225 case LEX_INTERPEND:
3280af22
NIS
3226 if (PL_lex_dojoin) {
3227 PL_lex_dojoin = FALSE;
3228 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3229#ifdef PERL_MAD
3230 if (PL_madskills) {
3231 if (thistoken)
3232 sv_free(thistoken);
3233 thistoken = newSVpvn("",0);
3234 }
3235#endif
bbf60fe6 3236 return REPORT(')');
79072805 3237 }
43a16006 3238 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3239 && SvEVALED(PL_lex_repl))
43a16006 3240 {
e9fa98b2 3241 if (PL_bufptr != PL_bufend)
cea2e8a9 3242 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3243 PL_lex_repl = NULL;
e9fa98b2 3244 }
79072805
LW
3245 /* FALLTHROUGH */
3246 case LEX_INTERPCONCAT:
3247#ifdef DEBUGGING
3280af22 3248 if (PL_lex_brackets)
cea2e8a9 3249 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3250#endif
3280af22 3251 if (PL_bufptr == PL_bufend)
bbf60fe6 3252 return REPORT(sublex_done());
79072805 3253
3280af22
NIS
3254 if (SvIVX(PL_linestr) == '\'') {
3255 SV *sv = newSVsv(PL_linestr);
3256 if (!PL_lex_inpat)
76e3520e 3257 sv = tokeq(sv);
3280af22 3258 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3259 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3260 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3261 s = PL_bufend;
79072805
LW
3262 }
3263 else {
3280af22 3264 s = scan_const(PL_bufptr);
79072805 3265 if (*s == '\\')
3280af22 3266 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3267 else
3280af22 3268 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3269 }
3270
3280af22 3271 if (s != PL_bufptr) {
5db06880
NC
3272 start_force(curforce);
3273 if (PL_madskills) {
3274 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3275 }
9ded7720 3276 NEXTVAL_NEXTTOKE = yylval;
3280af22 3277 PL_expect = XTERM;
79072805 3278 force_next(THING);
131b3ad0 3279 if (PL_lex_starts++) {
5db06880
NC
3280#ifdef PERL_MAD
3281 if (PL_madskills) {
3282 if (thistoken)
3283 sv_free(thistoken);
3284 thistoken = newSVpvn("",0);
3285 }
3286#endif
131b3ad0
DM
3287 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3288 if (!PL_lex_casemods && PL_lex_inpat)
3289 OPERATOR(',');
3290 else
3291 Aop(OP_CONCAT);
3292 }
79072805 3293 else {
3280af22 3294 PL_bufptr = s;
cea2e8a9 3295 return yylex();
79072805
LW
3296 }
3297 }
3298
cea2e8a9 3299 return yylex();
a0d0e21e 3300 case LEX_FORMLINE:
3280af22
NIS
3301 PL_lex_state = LEX_NORMAL;
3302 s = scan_formline(PL_bufptr);
3303 if (!PL_lex_formbrack)
a0d0e21e
LW
3304 goto rightbracket;
3305 OPERATOR(';');
79072805
LW
3306 }
3307
3280af22
NIS
3308 s = PL_bufptr;
3309 PL_oldoldbufptr = PL_oldbufptr;
3310 PL_oldbufptr = s;
463ee0b2
LW
3311
3312 retry:
5db06880
NC
3313#ifdef PERL_MAD
3314 if (thistoken) {
3315 sv_free(thistoken);
3316 thistoken = 0;
3317 }
3318 realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3319#endif
378cc40b
LW
3320 switch (*s) {
3321 default:
7e2040f0 3322 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3323 goto keylookup;
cea2e8a9 3324 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3325 case 4:
3326 case 26:
3327 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3328 case 0:
5db06880
NC
3329#ifdef PERL_MAD
3330 if (PL_madskills)
3331 faketokens = 0;
3332#endif
3280af22
NIS
3333 if (!PL_rsfp) {
3334 PL_last_uni = 0;
3335 PL_last_lop = 0;
c5ee2135 3336 if (PL_lex_brackets) {
0bd48802
AL
3337 yyerror(PL_lex_formbrack
3338 ? "Format not terminated"
3339 : "Missing right curly or square bracket");
c5ee2135 3340 }
4e553d73 3341 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3342 "### Tokener got EOF\n");
5f80b19c 3343 } );
79072805 3344 TOKEN(0);
463ee0b2 3345 }
3280af22 3346 if (s++ < PL_bufend)
a687059c 3347 goto retry; /* ignore stray nulls */
3280af22
NIS
3348 PL_last_uni = 0;
3349 PL_last_lop = 0;
3350 if (!PL_in_eval && !PL_preambled) {
3351 PL_preambled = TRUE;
5db06880
NC
3352#ifdef PERL_MAD
3353 if (PL_madskills)
3354 faketokens = 1;
3355#endif
3280af22
NIS
3356 sv_setpv(PL_linestr,incl_perldb());
3357 if (SvCUR(PL_linestr))
396482e1 3358 sv_catpvs(PL_linestr,";");
3280af22
NIS
3359 if (PL_preambleav){
3360 while(AvFILLp(PL_preambleav) >= 0) {
3361 SV *tmpsv = av_shift(PL_preambleav);
3362 sv_catsv(PL_linestr, tmpsv);
396482e1 3363 sv_catpvs(PL_linestr, ";");
91b7def8 3364 sv_free(tmpsv);
3365 }
3280af22
NIS
3366 sv_free((SV*)PL_preambleav);
3367 PL_preambleav = NULL;
91b7def8 3368 }
3280af22 3369 if (PL_minus_n || PL_minus_p) {
396482e1 3370 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3371 if (PL_minus_l)
396482e1 3372 sv_catpvs(PL_linestr,"chomp;");
3280af22 3373 if (PL_minus_a) {
3280af22 3374 if (PL_minus_F) {
3792a11b
NC
3375 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3376 || *PL_splitstr == '"')
3280af22 3377 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3378 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3379 else {
c8ef6a4b
NC
3380 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3381 bytes can be used as quoting characters. :-) */
dd374669 3382 const char *splits = PL_splitstr;
91d456ae 3383 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3384 do {
3385 /* Need to \ \s */
dd374669
AL
3386 if (*splits == '\\')
3387 sv_catpvn(PL_linestr, splits, 1);
3388 sv_catpvn(PL_linestr, splits, 1);
3389 } while (*splits++);
48c4c863
NC
3390 /* This loop will embed the trailing NUL of
3391 PL_linestr as the last thing it does before
3392 terminating. */
396482e1 3393 sv_catpvs(PL_linestr, ");");
54310121 3394 }
2304df62
AD
3395 }
3396 else
396482e1 3397 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3398 }
79072805 3399 }
bc9b29db 3400 if (PL_minus_E)
396482e1
GA
3401 sv_catpvs(PL_linestr,"use feature ':5.10';");
3402 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3403 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3404 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3405 PL_last_lop = PL_last_uni = NULL;
3280af22 3406 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3407 SV * const sv = newSV(0);
a0d0e21e
LW
3408
3409 sv_upgrade(sv, SVt_PVMG);
3280af22 3410 sv_setsv(sv,PL_linestr);
0ac0412a 3411 (void)SvIOK_on(sv);
45977657 3412 SvIV_set(sv, 0);
36c7798d 3413 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3414 }
79072805 3415 goto retry;
a687059c 3416 }
e929a76b 3417 do {
aa7440fb 3418 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3419 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3420 fake_eof:
5db06880
NC
3421#ifdef PERL_MAD
3422 realtokenstart = -1;
3423#endif
7e28d3af
JH
3424 if (PL_rsfp) {
3425 if (PL_preprocess && !PL_in_eval)
3426 (void)PerlProc_pclose(PL_rsfp);
3427 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3428 PerlIO_clearerr(PL_rsfp);
3429 else
3430 (void)PerlIO_close(PL_rsfp);
4608196e 3431 PL_rsfp = NULL;
7e28d3af
JH
3432 PL_doextract = FALSE;
3433 }
3434 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3435#ifdef PERL_MAD
3436 if (PL_madskills)
3437 faketokens = 1;
3438#endif
a23c4656
NC
3439 sv_setpv(PL_linestr,PL_minus_p
3440 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3441 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3442 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3443 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3444 PL_minus_n = PL_minus_p = 0;
3445 goto retry;
3446 }
3447 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3448 PL_last_lop = PL_last_uni = NULL;
c69006e4 3449 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3450 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3451 }
7aa207d6
JH
3452 /* If it looks like the start of a BOM or raw UTF-16,
3453 * check if it in fact is. */
3454 else if (bof &&
3455 (*s == 0 ||
3456 *(U8*)s == 0xEF ||
3457 *(U8*)s >= 0xFE ||
3458 s[1] == 0)) {
226017aa 3459#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3460# ifdef __GNU_LIBRARY__
3461# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3462# define FTELL_FOR_PIPE_IS_BROKEN
3463# endif
e3f494f1
JH
3464# else
3465# ifdef __GLIBC__
3466# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3467# define FTELL_FOR_PIPE_IS_BROKEN
3468# endif
3469# endif
226017aa
DD
3470# endif
3471#endif
3472#ifdef FTELL_FOR_PIPE_IS_BROKEN
3473 /* This loses the possibility to detect the bof
3474 * situation on perl -P when the libc5 is being used.
3475 * Workaround? Maybe attach some extra state to PL_rsfp?
3476 */
3477 if (!PL_preprocess)
7e28d3af 3478 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3479#else
eb160463 3480 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3481#endif
7e28d3af 3482 if (bof) {
3280af22 3483 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3484 s = swallow_bom((U8*)s);
e929a76b 3485 }
378cc40b 3486 }
3280af22 3487 if (PL_doextract) {
a0d0e21e 3488 /* Incest with pod. */
5db06880
NC
3489#ifdef PERL_MAD
3490 if (PL_madskills)
3491 sv_catsv(thiswhite, PL_linestr);
3492#endif
a0d0e21e 3493 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3494 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3495 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3496 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3497 PL_last_lop = PL_last_uni = NULL;
3280af22 3498 PL_doextract = FALSE;
a0d0e21e 3499 }
4e553d73 3500 }
463ee0b2 3501 incline(s);
3280af22
NIS
3502 } while (PL_doextract);
3503 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3504 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3505 SV * const sv = newSV(0);
a687059c 3506
93a17b20 3507 sv_upgrade(sv, SVt_PVMG);
3280af22 3508 sv_setsv(sv,PL_linestr);
0ac0412a 3509 (void)SvIOK_on(sv);
45977657 3510 SvIV_set(sv, 0);
36c7798d 3511 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 3512 }
3280af22 3513 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3514 PL_last_lop = PL_last_uni = NULL;
57843af0 3515 if (CopLINE(PL_curcop) == 1) {
3280af22 3516 while (s < PL_bufend && isSPACE(*s))
79072805 3517 s++;
a0d0e21e 3518 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 3519 s++;
5db06880
NC
3520#ifdef PERL_MAD
3521 if (PL_madskills)
3522 thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3523#endif
bd61b366 3524 d = NULL;
3280af22 3525 if (!PL_in_eval) {
44a8e56a 3526 if (*s == '#' && *(s+1) == '!')
3527 d = s + 2;
3528#ifdef ALTERNATE_SHEBANG
3529 else {
bfed75c6 3530 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 3531 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3532 d = s + (sizeof(as) - 1);
3533 }
3534#endif /* ALTERNATE_SHEBANG */
3535 }
3536 if (d) {
b8378b72 3537 char *ipath;
774d564b 3538 char *ipathend;
b8378b72 3539
774d564b 3540 while (isSPACE(*d))
b8378b72
CS
3541 d++;
3542 ipath = d;
774d564b 3543 while (*d && !isSPACE(*d))
3544 d++;
3545 ipathend = d;
3546
3547#ifdef ARG_ZERO_IS_SCRIPT
3548 if (ipathend > ipath) {
3549 /*
3550 * HP-UX (at least) sets argv[0] to the script name,
3551 * which makes $^X incorrect. And Digital UNIX and Linux,
3552 * at least, set argv[0] to the basename of the Perl
3553 * interpreter. So, having found "#!", we'll set it right.
3554 */
fafc274c
NC
3555 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3556 SVt_PV)); /* $^X */
774d564b 3557 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 3558 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 3559 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 3560 SvSETMAGIC(x);
3561 }
556c1dec
JH
3562 else {
3563 STRLEN blen;
3564 STRLEN llen;
cfd0369c 3565 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 3566 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
3567 if (llen < blen) {
3568 bstart += blen - llen;
3569 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3570 sv_setpvn(x, ipath, ipathend - ipath);
3571 SvSETMAGIC(x);
3572 }
3573 }
3574 }
774d564b 3575 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 3576 }
774d564b 3577#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
3578
3579 /*
3580 * Look for options.
3581 */
748a9306 3582 d = instr(s,"perl -");
84e30d1a 3583 if (!d) {
748a9306 3584 d = instr(s,"perl");
84e30d1a
GS
3585#if defined(DOSISH)
3586 /* avoid getting into infinite loops when shebang
3587 * line contains "Perl" rather than "perl" */
3588 if (!d) {
3589 for (d = ipathend-4; d >= ipath; --d) {
3590 if ((*d == 'p' || *d == 'P')
3591 && !ibcmp(d, "perl", 4))
3592 {
3593 break;
3594 }
3595 }
3596 if (d < ipath)
bd61b366 3597 d = NULL;
84e30d1a
GS
3598 }
3599#endif
3600 }
44a8e56a 3601#ifdef ALTERNATE_SHEBANG
3602 /*
3603 * If the ALTERNATE_SHEBANG on this system starts with a
3604 * character that can be part of a Perl expression, then if
3605 * we see it but not "perl", we're probably looking at the
3606 * start of Perl code, not a request to hand off to some
3607 * other interpreter. Similarly, if "perl" is there, but
3608 * not in the first 'word' of the line, we assume the line
3609 * contains the start of the Perl program.
44a8e56a 3610 */
3611 if (d && *s != '#') {
f54cb97a 3612 const char *c = ipath;
44a8e56a 3613 while (*c && !strchr("; \t\r\n\f\v#", *c))
3614 c++;
3615 if (c < d)
bd61b366 3616 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 3617 else
3618 *s = '#'; /* Don't try to parse shebang line */
3619 }
774d564b 3620#endif /* ALTERNATE_SHEBANG */
bf4acbe4 3621#ifndef MACOS_TRADITIONAL
748a9306 3622 if (!d &&
44a8e56a 3623 *s == '#' &&
774d564b 3624 ipathend > ipath &&
3280af22 3625 !PL_minus_c &&
748a9306 3626 !instr(s,"indir") &&
3280af22 3627 instr(PL_origargv[0],"perl"))
748a9306 3628 {
27da23d5 3629 dVAR;
9f68db38 3630 char **newargv;
9f68db38 3631
774d564b 3632 *ipathend = '\0';
3633 s = ipathend + 1;
3280af22