This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
-Dmad works, albeit with some test failures:
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2
NC
38#ifdef PERL_MAD
39/* XXX these probably need to be made into PL vars */
40static I32 realtokenstart;
41static I32 faketokens = 0;
42static MADPROP *thismad;
43static SV *thistoken;
44static SV *thisopen;
45static SV *thisstuff;
46static SV *thisclose;
47static SV *thiswhite;
48static SV *nextwhite;
49static SV *skipwhite;
50static SV *endwhite;
51static I32 curforce = -1;
52
53# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
5db06880 54# define NEXTVAL_NEXTTOKE PL_nexttoke[curforce].next_val
9ded7720 55#else
5db06880 56# define CURMAD(slot,sv)
9ded7720 57# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
58#endif
59
9059aa12
LW
60#define XFAKEBRACK 128
61#define XENUMMASK 127
62
39e02b42
JH
63#ifdef USE_UTF8_SCRIPTS
64# define UTF (!IN_BYTES)
2b9d42f0 65#else
746b446a 66# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 67#endif
a0ed51b3 68
61f0cdd9 69/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
70 * 1999-02-27 mjd-perl-patch@plover.com */
71#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
72
bf4acbe4
GS
73/* On MacOS, respect nonbreaking spaces */
74#ifdef MACOS_TRADITIONAL
75#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
76#else
77#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
78#endif
79
ffb4593c
NT
80/* LEX_* are values for PL_lex_state, the state of the lexer.
81 * They are arranged oddly so that the guard on the switch statement
79072805
LW
82 * can get by with a single comparison (if the compiler is smart enough).
83 */
84
fb73857a 85/* #define LEX_NOTPARSING 11 is done in perl.h. */
86
b6007c36
DM
87#define LEX_NORMAL 10 /* normal code (ie not within "...") */
88#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
89#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
90#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
91#define LEX_INTERPSTART 6 /* expecting the start of a $var */
92
93 /* at end of code, eg "$x" followed by: */
94#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
95#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
96
97#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
98 string or after \E, $foo, etc */
99#define LEX_INTERPCONST 2 /* NOT USED */
100#define LEX_FORMLINE 1 /* expecting a format line */
101#define LEX_KNOWNEXT 0 /* next token known; just return it */
102
79072805 103
bbf60fe6 104#ifdef DEBUGGING
27da23d5 105static const char* const lex_state_names[] = {
bbf60fe6
DM
106 "KNOWNEXT",
107 "FORMLINE",
108 "INTERPCONST",
109 "INTERPCONCAT",
110 "INTERPENDMAYBE",
111 "INTERPEND",
112 "INTERPSTART",
113 "INTERPPUSH",
114 "INTERPCASEMOD",
115 "INTERPNORMAL",
116 "NORMAL"
117};
118#endif
119
79072805
LW
120#ifdef ff_next
121#undef ff_next
d48672a2
LW
122#endif
123
79072805 124#include "keywords.h"
fe14fcc3 125
ffb4593c
NT
126/* CLINE is a macro that ensures PL_copline has a sane value */
127
ae986130
LW
128#ifdef CLINE
129#undef CLINE
130#endif
57843af0 131#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 132
5db06880 133#ifdef PERL_MAD
29595ff2
NC
134# define SKIPSPACE0(s) skipspace0(s)
135# define SKIPSPACE1(s) skipspace1(s)
136# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
137# define PEEKSPACE(s) skipspace2(s,0)
138#else
139# define SKIPSPACE0(s) skipspace(s)
140# define SKIPSPACE1(s) skipspace(s)
141# define SKIPSPACE2(s,tsv) skipspace(s)
142# define PEEKSPACE(s) skipspace(s)
143#endif
144
ffb4593c
NT
145/*
146 * Convenience functions to return different tokens and prime the
9cbb5ea2 147 * lexer for the next token. They all take an argument.
ffb4593c
NT
148 *
149 * TOKEN : generic token (used for '(', DOLSHARP, etc)
150 * OPERATOR : generic operator
151 * AOPERATOR : assignment operator
152 * PREBLOCK : beginning the block after an if, while, foreach, ...
153 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
154 * PREREF : *EXPR where EXPR is not a simple identifier
155 * TERM : expression term
156 * LOOPX : loop exiting command (goto, last, dump, etc)
157 * FTST : file test operator
158 * FUN0 : zero-argument function
2d2e263d 159 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
160 * BOop : bitwise or or xor
161 * BAop : bitwise and
162 * SHop : shift operator
163 * PWop : power operator
9cbb5ea2 164 * PMop : pattern-matching operator
ffb4593c
NT
165 * Aop : addition-level operator
166 * Mop : multiplication-level operator
167 * Eop : equality-testing operator
e5edeb50 168 * Rop : relational operator <= != gt
ffb4593c
NT
169 *
170 * Also see LOP and lop() below.
171 */
172
998054bd 173#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 174# define REPORT(retval) tokereport((I32)retval)
998054bd 175#else
bbf60fe6 176# define REPORT(retval) (retval)
998054bd
SC
177#endif
178
bbf60fe6
DM
179#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
180#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
181#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
182#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
183#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
184#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
185#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
186#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
187#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
188#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
189#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
190#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
191#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
192#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
193#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
194#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
195#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
196#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
197#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
198#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 199
a687059c
LW
200/* This bit of chicanery makes a unary function followed by
201 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
202 * The UNIDOR macro is for unary functions that can be followed by the //
203 * operator (such as C<shift // 0>).
a687059c 204 */
376fcdbf
AL
205#define UNI2(f,x) { \
206 yylval.ival = f; \
207 PL_expect = x; \
208 PL_bufptr = s; \
209 PL_last_uni = PL_oldbufptr; \
210 PL_last_lop_op = f; \
211 if (*s == '(') \
212 return REPORT( (int)FUNC1 ); \
29595ff2 213 s = PEEKSPACE(s); \
376fcdbf
AL
214 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
215 }
6f33ba73
RGS
216#define UNI(f) UNI2(f,XTERM)
217#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 218
376fcdbf
AL
219#define UNIBRACK(f) { \
220 yylval.ival = f; \
221 PL_bufptr = s; \
222 PL_last_uni = PL_oldbufptr; \
223 if (*s == '(') \
224 return REPORT( (int)FUNC1 ); \
29595ff2 225 s = PEEKSPACE(s); \
376fcdbf
AL
226 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
227 }
79072805 228
9f68db38 229/* grandfather return to old style */
3280af22 230#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 231
8fa7f367
JH
232#ifdef DEBUGGING
233
bbf60fe6
DM
234/* how to interpret the yylval associated with the token */
235enum token_type {
236 TOKENTYPE_NONE,
237 TOKENTYPE_IVAL,
238 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
239 TOKENTYPE_PVAL,
240 TOKENTYPE_OPVAL,
241 TOKENTYPE_GVVAL
242};
243
6d4a66ac
NC
244static struct debug_tokens {
245 const int token;
246 enum token_type type;
247 const char *name;
248} const debug_tokens[] =
9041c2e3 249{
bbf60fe6
DM
250 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
251 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
252 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
253 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
254 { ARROW, TOKENTYPE_NONE, "ARROW" },
255 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
256 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
257 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
258 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
259 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 260 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
261 { DO, TOKENTYPE_NONE, "DO" },
262 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
263 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
264 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
265 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
266 { ELSE, TOKENTYPE_NONE, "ELSE" },
267 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
268 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
269 { FOR, TOKENTYPE_IVAL, "FOR" },
270 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
271 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
272 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
273 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
274 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
275 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 276 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
277 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
278 { IF, TOKENTYPE_IVAL, "IF" },
279 { LABEL, TOKENTYPE_PVAL, "LABEL" },
280 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
281 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
282 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
283 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
284 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
285 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
286 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
287 { MY, TOKENTYPE_IVAL, "MY" },
288 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
289 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
290 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
291 { OROP, TOKENTYPE_IVAL, "OROP" },
292 { OROR, TOKENTYPE_NONE, "OROR" },
293 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
294 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
295 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
296 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
297 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
298 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
299 { PREINC, TOKENTYPE_NONE, "PREINC" },
300 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
301 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
302 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
303 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
304 { SUB, TOKENTYPE_NONE, "SUB" },
305 { THING, TOKENTYPE_OPVAL, "THING" },
306 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
307 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
308 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
309 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
310 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
311 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 312 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
313 { WHILE, TOKENTYPE_IVAL, "WHILE" },
314 { WORD, TOKENTYPE_OPVAL, "WORD" },
315 { 0, TOKENTYPE_NONE, 0 }
316};
317
318/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 319
bbf60fe6 320STATIC int
f5bd084c 321S_tokereport(pTHX_ I32 rv)
bbf60fe6 322{
97aff369 323 dVAR;
bbf60fe6 324 if (DEBUG_T_TEST) {
bd61b366 325 const char *name = NULL;
bbf60fe6 326 enum token_type type = TOKENTYPE_NONE;
f54cb97a 327 const struct debug_tokens *p;
396482e1 328 SV* const report = newSVpvs("<== ");
bbf60fe6 329
f54cb97a 330 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
331 if (p->token == (int)rv) {
332 name = p->name;
333 type = p->type;
334 break;
335 }
336 }
337 if (name)
54667de8 338 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
339 else if ((char)rv > ' ' && (char)rv < '~')
340 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
341 else if (!rv)
396482e1 342 sv_catpvs(report, "EOF");
bbf60fe6
DM
343 else
344 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
345 switch (type) {
346 case TOKENTYPE_NONE:
347 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
348 break;
349 case TOKENTYPE_IVAL:
e4584336 350 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
351 break;
352 case TOKENTYPE_OPNUM:
353 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
354 PL_op_name[yylval.ival]);
355 break;
356 case TOKENTYPE_PVAL:
357 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
358 break;
359 case TOKENTYPE_OPVAL:
b6007c36 360 if (yylval.opval) {
401441c0 361 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 362 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
363 if (yylval.opval->op_type == OP_CONST) {
364 Perl_sv_catpvf(aTHX_ report, " %s",
365 SvPEEK(cSVOPx_sv(yylval.opval)));
366 }
367
368 }
401441c0 369 else
396482e1 370 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
371 break;
372 }
b6007c36 373 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
374 };
375 return (int)rv;
998054bd
SC
376}
377
b6007c36
DM
378
379/* print the buffer with suitable escapes */
380
381STATIC void
382S_printbuf(pTHX_ const char* fmt, const char* s)
383{
396482e1 384 SV* const tmp = newSVpvs("");
b6007c36
DM
385 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
386 SvREFCNT_dec(tmp);
387}
388
8fa7f367
JH
389#endif
390
ffb4593c
NT
391/*
392 * S_ao
393 *
c963b151
BD
394 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
395 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
396 */
397
76e3520e 398STATIC int
cea2e8a9 399S_ao(pTHX_ int toketype)
a0d0e21e 400{
97aff369 401 dVAR;
3280af22
NIS
402 if (*PL_bufptr == '=') {
403 PL_bufptr++;
a0d0e21e
LW
404 if (toketype == ANDAND)
405 yylval.ival = OP_ANDASSIGN;
406 else if (toketype == OROR)
407 yylval.ival = OP_ORASSIGN;
c963b151
BD
408 else if (toketype == DORDOR)
409 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
410 toketype = ASSIGNOP;
411 }
412 return toketype;
413}
414
ffb4593c
NT
415/*
416 * S_no_op
417 * When Perl expects an operator and finds something else, no_op
418 * prints the warning. It always prints "<something> found where
419 * operator expected. It prints "Missing semicolon on previous line?"
420 * if the surprise occurs at the start of the line. "do you need to
421 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
422 * where the compiler doesn't know if foo is a method call or a function.
423 * It prints "Missing operator before end of line" if there's nothing
424 * after the missing operator, or "... before <...>" if there is something
425 * after the missing operator.
426 */
427
76e3520e 428STATIC void
bfed75c6 429S_no_op(pTHX_ const char *what, char *s)
463ee0b2 430{
97aff369 431 dVAR;
9d4ba2ae
AL
432 char * const oldbp = PL_bufptr;
433 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 434
1189a94a
GS
435 if (!s)
436 s = oldbp;
07c798fb 437 else
1189a94a 438 PL_bufptr = s;
cea2e8a9 439 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
440 if (ckWARN_d(WARN_SYNTAX)) {
441 if (is_first)
442 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
443 "\t(Missing semicolon on previous line?)\n");
444 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 445 const char *t;
56da5a46
RGS
446 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
447 if (t < PL_bufptr && isSPACE(*t))
448 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
449 "\t(Do you need to predeclare %.*s?)\n",
551405c4 450 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
451 }
452 else {
453 assert(s >= oldbp);
454 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 455 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 456 }
07c798fb 457 }
3280af22 458 PL_bufptr = oldbp;
8990e307
LW
459}
460
ffb4593c
NT
461/*
462 * S_missingterm
463 * Complain about missing quote/regexp/heredoc terminator.
464 * If it's called with (char *)NULL then it cauterizes the line buffer.
465 * If we're in a delimited string and the delimiter is a control
466 * character, it's reformatted into a two-char sequence like ^C.
467 * This is fatal.
468 */
469
76e3520e 470STATIC void
cea2e8a9 471S_missingterm(pTHX_ char *s)
8990e307 472{
97aff369 473 dVAR;
8990e307
LW
474 char tmpbuf[3];
475 char q;
476 if (s) {
9d4ba2ae 477 char * const nl = strrchr(s,'\n');
d2719217 478 if (nl)
8990e307
LW
479 *nl = '\0';
480 }
9d116dd7
JH
481 else if (
482#ifdef EBCDIC
483 iscntrl(PL_multi_close)
484#else
485 PL_multi_close < 32 || PL_multi_close == 127
486#endif
487 ) {
8990e307 488 *tmpbuf = '^';
585ec06d 489 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
490 tmpbuf[2] = '\0';
491 s = tmpbuf;
492 }
493 else {
eb160463 494 *tmpbuf = (char)PL_multi_close;
8990e307
LW
495 tmpbuf[1] = '\0';
496 s = tmpbuf;
497 }
498 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 499 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 500}
79072805 501
ef89dcc3 502#define FEATURE_IS_ENABLED(name) \
0d863452 503 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 504 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
505/*
506 * S_feature_is_enabled
507 * Check whether the named feature is enabled.
508 */
509STATIC bool
510S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
511{
97aff369 512 dVAR;
0d863452 513 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
514 char he_name[32] = "feature_";
515 (void) strncpy(&he_name[8], name, 24);
516
517 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
518}
519
ffb4593c
NT
520/*
521 * Perl_deprecate
ffb4593c
NT
522 */
523
79072805 524void
bfed75c6 525Perl_deprecate(pTHX_ const char *s)
a0d0e21e 526{
599cee73 527 if (ckWARN(WARN_DEPRECATED))
9014280d 528 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
529}
530
12bcd1a6 531void
bfed75c6 532Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
533{
534 /* This function should NOT be called for any new deprecated warnings */
535 /* Use Perl_deprecate instead */
536 /* */
537 /* It is here to maintain backward compatibility with the pre-5.8 */
538 /* warnings category hierarchy. The "deprecated" category used to */
539 /* live under the "syntax" category. It is now a top-level category */
540 /* in its own right. */
541
542 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 543 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
544 "Use of %s is deprecated", s);
545}
546
ffb4593c 547/*
9cbb5ea2
GS
548 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
549 * utf16-to-utf8-reversed.
ffb4593c
NT
550 */
551
c39cd008
GS
552#ifdef PERL_CR_FILTER
553static void
554strip_return(SV *sv)
555{
95a20fc0 556 register const char *s = SvPVX_const(sv);
9d4ba2ae 557 register const char * const e = s + SvCUR(sv);
c39cd008
GS
558 /* outer loop optimized to do nothing if there are no CR-LFs */
559 while (s < e) {
560 if (*s++ == '\r' && *s == '\n') {
561 /* hit a CR-LF, need to copy the rest */
562 register char *d = s - 1;
563 *d++ = *s++;
564 while (s < e) {
565 if (*s == '\r' && s[1] == '\n')
566 s++;
567 *d++ = *s++;
568 }
569 SvCUR(sv) -= s - d;
570 return;
571 }
572 }
573}
a868473f 574
76e3520e 575STATIC I32
c39cd008 576S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 577{
f54cb97a 578 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
579 if (count > 0 && !maxlen)
580 strip_return(sv);
581 return count;
a868473f
NIS
582}
583#endif
584
ffb4593c
NT
585/*
586 * Perl_lex_start
9cbb5ea2
GS
587 * Initialize variables. Uses the Perl save_stack to save its state (for
588 * recursive calls to the parser).
ffb4593c
NT
589 */
590
a0d0e21e 591void
864dbfa3 592Perl_lex_start(pTHX_ SV *line)
79072805 593{
97aff369 594 dVAR;
cfd0369c 595 const char *s;
8990e307
LW
596 STRLEN len;
597
3280af22
NIS
598 SAVEI32(PL_lex_dojoin);
599 SAVEI32(PL_lex_brackets);
3280af22
NIS
600 SAVEI32(PL_lex_casemods);
601 SAVEI32(PL_lex_starts);
602 SAVEI32(PL_lex_state);
7766f137 603 SAVEVPTR(PL_lex_inpat);
3280af22 604 SAVEI32(PL_lex_inwhat);
5db06880
NC
605#ifdef PERL_MAD
606 if (PL_lex_state == LEX_KNOWNEXT) {
607 I32 toke = PL_lasttoke;
608 while (--toke >= 0) {
609 SAVEI32(PL_nexttoke[toke].next_type);
610 SAVEVPTR(PL_nexttoke[toke].next_val);
611 if (PL_madskills)
612 SAVEVPTR(PL_nexttoke[toke].next_mad);
613 }
614 SAVEI32(PL_lasttoke);
615 }
616 if (PL_madskills) {
617 SAVESPTR(thistoken);
618 SAVESPTR(thiswhite);
619 SAVESPTR(nextwhite);
620 SAVESPTR(thisopen);
621 SAVESPTR(thisclose);
622 SAVESPTR(thisstuff);
623 SAVEVPTR(thismad);
624 SAVEI32(realtokenstart);
625 SAVEI32(faketokens);
626 }
627 SAVEI32(curforce);
628#else
18b09519
GS
629 if (PL_lex_state == LEX_KNOWNEXT) {
630 I32 toke = PL_nexttoke;
631 while (--toke >= 0) {
632 SAVEI32(PL_nexttype[toke]);
633 SAVEVPTR(PL_nextval[toke]);
634 }
635 SAVEI32(PL_nexttoke);
18b09519 636 }
5db06880 637#endif
57843af0 638 SAVECOPLINE(PL_curcop);
3280af22
NIS
639 SAVEPPTR(PL_bufptr);
640 SAVEPPTR(PL_bufend);
641 SAVEPPTR(PL_oldbufptr);
642 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
643 SAVEPPTR(PL_last_lop);
644 SAVEPPTR(PL_last_uni);
3280af22
NIS
645 SAVEPPTR(PL_linestart);
646 SAVESPTR(PL_linestr);
8edd5f42
RGS
647 SAVEGENERICPV(PL_lex_brackstack);
648 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 649 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
650 SAVESPTR(PL_lex_stuff);
651 SAVEI32(PL_lex_defer);
09bef843 652 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 653 SAVESPTR(PL_lex_repl);
bebdddfc
GS
654 SAVEINT(PL_expect);
655 SAVEINT(PL_lex_expect);
3280af22
NIS
656
657 PL_lex_state = LEX_NORMAL;
658 PL_lex_defer = 0;
659 PL_expect = XSTATE;
660 PL_lex_brackets = 0;
a02a5408
JC
661 Newx(PL_lex_brackstack, 120, char);
662 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
663 PL_lex_casemods = 0;
664 *PL_lex_casestack = '\0';
665 PL_lex_dojoin = 0;
666 PL_lex_starts = 0;
a0714e2c
SS
667 PL_lex_stuff = NULL;
668 PL_lex_repl = NULL;
3280af22 669 PL_lex_inpat = 0;
5db06880
NC
670#ifdef PERL_MAD
671 PL_lasttoke = 0;
672#else
76be56bc 673 PL_nexttoke = 0;
5db06880 674#endif
3280af22 675 PL_lex_inwhat = 0;
09bef843 676 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
677 PL_linestr = line;
678 if (SvREADONLY(PL_linestr))
679 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 680 s = SvPV_const(PL_linestr, len);
6f27f9a7 681 if (!len || s[len-1] != ';') {
3280af22
NIS
682 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
683 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 684 sv_catpvs(PL_linestr, "\n;");
8990e307 685 }
3280af22
NIS
686 SvTEMP_off(PL_linestr);
687 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
688 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 689 PL_last_lop = PL_last_uni = NULL;
3280af22 690 PL_rsfp = 0;
79072805 691}
a687059c 692
ffb4593c
NT
693/*
694 * Perl_lex_end
9cbb5ea2
GS
695 * Finalizer for lexing operations. Must be called when the parser is
696 * done with the lexer.
ffb4593c
NT
697 */
698
463ee0b2 699void
864dbfa3 700Perl_lex_end(pTHX)
463ee0b2 701{
97aff369 702 dVAR;
3280af22 703 PL_doextract = FALSE;
463ee0b2
LW
704}
705
ffb4593c
NT
706/*
707 * S_incline
708 * This subroutine has nothing to do with tilting, whether at windmills
709 * or pinball tables. Its name is short for "increment line". It
57843af0 710 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 711 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
712 * # line 500 "foo.pm"
713 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_incline(pTHX_ char *s)
463ee0b2 718{
97aff369 719 dVAR;
463ee0b2
LW
720 char *t;
721 char *n;
73659bf1 722 char *e;
463ee0b2 723 char ch;
463ee0b2 724
57843af0 725 CopLINE_inc(PL_curcop);
463ee0b2
LW
726 if (*s++ != '#')
727 return;
bf4acbe4 728 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
729 if (strnEQ(s, "line", 4))
730 s += 4;
731 else
732 return;
084592ab 733 if (SPACE_OR_TAB(*s))
73659bf1 734 s++;
4e553d73 735 else
73659bf1 736 return;
bf4acbe4 737 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
738 if (!isDIGIT(*s))
739 return;
740 n = s;
741 while (isDIGIT(*s))
742 s++;
bf4acbe4 743 while (SPACE_OR_TAB(*s))
463ee0b2 744 s++;
73659bf1 745 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 746 s++;
73659bf1
GS
747 e = t + 1;
748 }
463ee0b2 749 else {
463ee0b2 750 for (t = s; !isSPACE(*t); t++) ;
73659bf1 751 e = t;
463ee0b2 752 }
bf4acbe4 753 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
754 e++;
755 if (*e != '\n' && *e != '\0')
756 return; /* false alarm */
757
463ee0b2
LW
758 ch = *t;
759 *t = '\0';
f4dd75d9 760 if (t - s > 0) {
8a5ee598 761#ifndef USE_ITHREADS
c4420975 762 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
763 STRLEN tmplen = cf ? strlen(cf) : 0;
764 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
765 /* must copy *{"::_<(eval N)[oldfilename:L]"}
766 * to *{"::_<newfilename"} */
767 char smallbuf[256], smallbuf2[256];
768 char *tmpbuf, *tmpbuf2;
8a5ee598 769 GV **gvp, *gv2;
e66cf94c
RGS
770 STRLEN tmplen2 = strlen(s);
771 if (tmplen + 3 < sizeof smallbuf)
772 tmpbuf = smallbuf;
773 else
774 Newx(tmpbuf, tmplen + 3, char);
775 if (tmplen2 + 3 < sizeof smallbuf2)
776 tmpbuf2 = smallbuf2;
777 else
778 Newx(tmpbuf2, tmplen2 + 3, char);
779 tmpbuf[0] = tmpbuf2[0] = '_';
780 tmpbuf[1] = tmpbuf2[1] = '<';
781 memcpy(tmpbuf + 2, cf, ++tmplen);
782 memcpy(tmpbuf2 + 2, s, ++tmplen2);
783 ++tmplen; ++tmplen2;
8a5ee598
RGS
784 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
785 if (gvp) {
786 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
787 if (!isGV(gv2))
788 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
789 /* adjust ${"::_<newfilename"} to store the new file name */
790 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
791 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
792 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
793 }
e66cf94c
RGS
794 if (tmpbuf != smallbuf) Safefree(tmpbuf);
795 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
796 }
8a5ee598 797#endif
05ec9bb3 798 CopFILE_free(PL_curcop);
57843af0 799 CopFILE_set(PL_curcop, s);
f4dd75d9 800 }
463ee0b2 801 *t = ch;
57843af0 802 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
803}
804
29595ff2
NC
805#ifdef PERL_MAD
806/* skip space before thistoken */
807
808STATIC char *
809S_skipspace0(pTHX_ register char *s)
810{
811 s = skipspace(s);
812 if (!PL_madskills)
813 return s;
814 if (skipwhite) {
815 if (!thiswhite)
816 thiswhite = newSVpvn("",0);
817 sv_catsv(thiswhite, skipwhite);
818 sv_free(skipwhite);
819 skipwhite = 0;
820 }
821 realtokenstart = s - SvPVX(PL_linestr);
822 return s;
823}
824
825/* skip space after thistoken */
826
827STATIC char *
828S_skipspace1(pTHX_ register char *s)
829{
830 char *start = s;
831 I32 startoff = start - SvPVX(PL_linestr);
832
833 s = skipspace(s);
834 if (!PL_madskills)
835 return s;
836 start = SvPVX(PL_linestr) + startoff;
837 if (!thistoken && realtokenstart >= 0) {
838 char *tstart = SvPVX(PL_linestr) + realtokenstart;
839 thistoken = newSVpvn(tstart, start - tstart);
840 }
841 realtokenstart = -1;
842 if (skipwhite) {
843 if (!nextwhite)
844 nextwhite = newSVpvn("",0);
845 sv_catsv(nextwhite, skipwhite);
846 sv_free(skipwhite);
847 skipwhite = 0;
848 }
849 return s;
850}
851
852STATIC char *
853S_skipspace2(pTHX_ register char *s, SV **svp)
854{
855 char *start = s;
856 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
857 I32 startoff = start - SvPVX(PL_linestr);
858 s = skipspace(s);
859 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
860 if (!PL_madskills || !svp)
861 return s;
862 start = SvPVX(PL_linestr) + startoff;
863 if (!thistoken && realtokenstart >= 0) {
864 char *tstart = SvPVX(PL_linestr) + realtokenstart;
865 thistoken = newSVpvn(tstart, start - tstart);
866 realtokenstart = -1;
867 }
868 if (skipwhite) {
869 if (!*svp)
870 *svp = newSVpvn("",0);
871 sv_setsv(*svp, skipwhite);
872 sv_free(skipwhite);
873 skipwhite = 0;
874 }
875
876 return s;
877}
878#endif
879
ffb4593c
NT
880/*
881 * S_skipspace
882 * Called to gobble the appropriate amount and type of whitespace.
883 * Skips comments as well.
884 */
885
76e3520e 886STATIC char *
cea2e8a9 887S_skipspace(pTHX_ register char *s)
a687059c 888{
97aff369 889 dVAR;
5db06880
NC
890#ifdef PERL_MAD
891 int curoff;
892 int startoff = s - SvPVX(PL_linestr);
893
894 if (skipwhite) {
895 sv_free(skipwhite);
896 skipwhite = 0;
897 }
898#endif
899
3280af22 900 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 901 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 902 s++;
5db06880
NC
903#ifdef PERL_MAD
904 goto done;
905#else
463ee0b2 906 return s;
5db06880 907#endif
463ee0b2
LW
908 }
909 for (;;) {
fd049845 910 STRLEN prevlen;
09bef843 911 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 912 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
913 while (s < PL_bufend && isSPACE(*s)) {
914 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
915 incline(s);
916 }
ffb4593c
NT
917
918 /* comment */
3280af22
NIS
919 if (s < PL_bufend && *s == '#') {
920 while (s < PL_bufend && *s != '\n')
463ee0b2 921 s++;
60e6418e 922 if (s < PL_bufend) {
463ee0b2 923 s++;
60e6418e
GS
924 if (PL_in_eval && !PL_rsfp) {
925 incline(s);
926 continue;
927 }
928 }
463ee0b2 929 }
ffb4593c
NT
930
931 /* only continue to recharge the buffer if we're at the end
932 * of the buffer, we're not reading from a source filter, and
933 * we're in normal lexing mode
934 */
09bef843
SB
935 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
936 PL_lex_state == LEX_FORMLINE)
5db06880
NC
937#ifdef PERL_MAD
938 goto done;
939#else
463ee0b2 940 return s;
5db06880 941#endif
ffb4593c
NT
942
943 /* try to recharge the buffer */
5db06880
NC
944#ifdef PERL_MAD
945 curoff = s - SvPVX(PL_linestr);
946#endif
947
9cbb5ea2 948 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 949 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 950 {
5db06880
NC
951#ifdef PERL_MAD
952 if (PL_madskills && curoff != startoff) {
953 if (!skipwhite)
954 skipwhite = newSVpvn("",0);
955 sv_catpvn(skipwhite, SvPVX(PL_linestr) + startoff,
956 curoff - startoff);
957 }
958
959 /* mustn't throw out old stuff yet if madpropping */
960 SvCUR(PL_linestr) = curoff;
961 s = SvPVX(PL_linestr) + curoff;
962 *s = 0;
963 if (curoff && s[-1] == '\n')
964 s[-1] = ' ';
965#endif
966
9cbb5ea2 967 /* end of file. Add on the -p or -n magic */
5db06880 968 /* XXX these shouldn't really be added here, can't set faketokens */
01a19ab0 969 if (PL_minus_p) {
5db06880
NC
970#ifdef PERL_MAD
971 sv_catpv(PL_linestr,
972 ";}continue{print or die qq(-p destination: $!\\n);}");
973#else
01a19ab0
NC
974 sv_setpv(PL_linestr,
975 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 976#endif
3280af22 977 PL_minus_n = PL_minus_p = 0;
a0d0e21e 978 }
01a19ab0 979 else if (PL_minus_n) {
5db06880
NC
980#ifdef PERL_MAD
981 sv_catpvn(PL_linestr, ";}", 2);
982#else
01a19ab0 983 sv_setpvn(PL_linestr, ";}", 2);
5db06880 984#endif
01a19ab0
NC
985 PL_minus_n = 0;
986 }
a0d0e21e 987 else
5db06880
NC
988#ifdef PERL_MAD
989 sv_catpvn(PL_linestr,";", 1);
990#else
4147a61b 991 sv_setpvn(PL_linestr,";", 1);
5db06880 992#endif
ffb4593c
NT
993
994 /* reset variables for next time we lex */
9cbb5ea2 995 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
996 = SvPVX(PL_linestr)
997#ifdef PERL_MAD
998 + curoff
999#endif
1000 ;
3280af22 1001 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1002 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1003
1004 /* Close the filehandle. Could be from -P preprocessor,
1005 * STDIN, or a regular file. If we were reading code from
1006 * STDIN (because the commandline held no -e or filename)
1007 * then we don't close it, we reset it so the code can
1008 * read from STDIN too.
1009 */
1010
3280af22
NIS
1011 if (PL_preprocess && !PL_in_eval)
1012 (void)PerlProc_pclose(PL_rsfp);
1013 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1014 PerlIO_clearerr(PL_rsfp);
8990e307 1015 else
3280af22 1016 (void)PerlIO_close(PL_rsfp);
4608196e 1017 PL_rsfp = NULL;
463ee0b2
LW
1018 return s;
1019 }
ffb4593c
NT
1020
1021 /* not at end of file, so we only read another line */
09bef843
SB
1022 /* make corresponding updates to old pointers, for yyerror() */
1023 oldprevlen = PL_oldbufptr - PL_bufend;
1024 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1025 if (PL_last_uni)
1026 oldunilen = PL_last_uni - PL_bufend;
1027 if (PL_last_lop)
1028 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1029 PL_linestart = PL_bufptr = s + prevlen;
1030 PL_bufend = s + SvCUR(PL_linestr);
1031 s = PL_bufptr;
09bef843
SB
1032 PL_oldbufptr = s + oldprevlen;
1033 PL_oldoldbufptr = s + oldoldprevlen;
1034 if (PL_last_uni)
1035 PL_last_uni = s + oldunilen;
1036 if (PL_last_lop)
1037 PL_last_lop = s + oldloplen;
a0d0e21e 1038 incline(s);
ffb4593c
NT
1039
1040 /* debugger active and we're not compiling the debugger code,
1041 * so store the line into the debugger's array of lines
1042 */
3280af22 1043 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1044 SV * const sv = newSV(0);
8990e307
LW
1045
1046 sv_upgrade(sv, SVt_PVMG);
3280af22 1047 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1048 (void)SvIOK_on(sv);
45977657 1049 SvIV_set(sv, 0);
36c7798d 1050 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1051 }
463ee0b2 1052 }
5db06880
NC
1053
1054#ifdef PERL_MAD
1055 done:
1056 if (PL_madskills) {
1057 if (!skipwhite)
1058 skipwhite = newSVpvn("",0);
1059 curoff = s - SvPVX(PL_linestr);
1060 if (curoff - startoff)
1061 sv_catpvn(skipwhite, SvPVX(PL_linestr) + startoff,
1062 curoff - startoff);
1063 }
1064 return s;
1065#endif
a687059c 1066}
378cc40b 1067
ffb4593c
NT
1068/*
1069 * S_check_uni
1070 * Check the unary operators to ensure there's no ambiguity in how they're
1071 * used. An ambiguous piece of code would be:
1072 * rand + 5
1073 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1074 * the +5 is its argument.
1075 */
1076
76e3520e 1077STATIC void
cea2e8a9 1078S_check_uni(pTHX)
ba106d47 1079{
97aff369 1080 dVAR;
2f3197b3 1081 char *s;
a0d0e21e 1082 char *t;
2f3197b3 1083
3280af22 1084 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1085 return;
3280af22
NIS
1086 while (isSPACE(*PL_last_uni))
1087 PL_last_uni++;
7e2040f0 1088 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 1089 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1090 return;
6136c704
AL
1091
1092 /* XXX Things like this are just so nasty. We shouldn't be modifying
1093 source code, even if we realquick set it back. */
0453d815 1094 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 1095 const char ch = *s;
0453d815 1096 *s = '\0';
9014280d 1097 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 1098 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
1099 PL_last_uni);
1100 *s = ch;
1101 }
2f3197b3
LW
1102}
1103
ffb4593c
NT
1104/*
1105 * LOP : macro to build a list operator. Its behaviour has been replaced
1106 * with a subroutine, S_lop() for which LOP is just another name.
1107 */
1108
a0d0e21e
LW
1109#define LOP(f,x) return lop(f,x,s)
1110
ffb4593c
NT
1111/*
1112 * S_lop
1113 * Build a list operator (or something that might be one). The rules:
1114 * - if we have a next token, then it's a list operator [why?]
1115 * - if the next thing is an opening paren, then it's a function
1116 * - else it's a list operator
1117 */
1118
76e3520e 1119STATIC I32
a0be28da 1120S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1121{
97aff369 1122 dVAR;
79072805 1123 yylval.ival = f;
35c8bce7 1124 CLINE;
3280af22
NIS
1125 PL_expect = x;
1126 PL_bufptr = s;
1127 PL_last_lop = PL_oldbufptr;
eb160463 1128 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1129#ifdef PERL_MAD
1130 if (PL_lasttoke)
1131 return REPORT(LSTOP);
1132#else
3280af22 1133 if (PL_nexttoke)
bbf60fe6 1134 return REPORT(LSTOP);
5db06880 1135#endif
79072805 1136 if (*s == '(')
bbf60fe6 1137 return REPORT(FUNC);
29595ff2 1138 s = PEEKSPACE(s);
79072805 1139 if (*s == '(')
bbf60fe6 1140 return REPORT(FUNC);
79072805 1141 else
bbf60fe6 1142 return REPORT(LSTOP);
79072805
LW
1143}
1144
5db06880
NC
1145#ifdef PERL_MAD
1146 /*
1147 * S_start_force
1148 * Sets up for an eventual force_next(). start_force(0) basically does
1149 * an unshift, while start_force(-1) does a push. yylex removes items
1150 * on the "pop" end.
1151 */
1152
1153STATIC void
1154S_start_force(pTHX_ int where)
1155{
1156 int i;
1157
1158 if (where < 0) /* so people can duplicate start_force(curforce) */
1159 where = PL_lasttoke;
1160 assert(curforce < 0 || curforce == where);
1161 if (curforce != where) {
1162 for (i = PL_lasttoke; i > where; --i) {
1163 PL_nexttoke[i] = PL_nexttoke[i-1];
1164 }
1165 PL_lasttoke++;
1166 }
1167 if (curforce < 0) /* in case of duplicate start_force() */
1168 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1169 curforce = where;
1170 if (nextwhite) {
1171 if (PL_madskills)
1172 curmad('^', newSVpvn("",0));
1173 CURMAD('_', nextwhite);
1174 }
1175}
1176
1177STATIC void
1178S_curmad(pTHX_ char slot, SV *sv)
1179{
1180 MADPROP **where;
1181
1182 if (!sv)
1183 return;
1184 if (curforce < 0)
1185 where = &thismad;
1186 else
1187 where = &PL_nexttoke[curforce].next_mad;
1188
1189 if (faketokens)
1190 sv_setpvn(sv, "", 0);
1191 else {
1192 if (!IN_BYTES) {
1193 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1194 SvUTF8_on(sv);
1195 else if (PL_encoding) {
1196 sv_recode_to_utf8(sv, PL_encoding);
1197 }
1198 }
1199 }
1200
1201 /* keep a slot open for the head of the list? */
1202 if (slot != '_' && *where && (*where)->mad_key == '^') {
1203 (*where)->mad_key = slot;
1204 sv_free((*where)->mad_val);
1205 (*where)->mad_val = (void*)sv;
1206 }
1207 else
1208 addmad(newMADsv(slot, sv), where, 0);
1209}
1210#else
1211# define start_force(where)
1212# define curmad(slot, sv)
1213#endif
1214
ffb4593c
NT
1215/*
1216 * S_force_next
9cbb5ea2 1217 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1218 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1219 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1220 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1221 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1222 */
1223
4e553d73 1224STATIC void
cea2e8a9 1225S_force_next(pTHX_ I32 type)
79072805 1226{
97aff369 1227 dVAR;
5db06880
NC
1228#ifdef PERL_MAD
1229 if (curforce < 0)
1230 start_force(PL_lasttoke);
1231 PL_nexttoke[curforce].next_type = type;
1232 if (PL_lex_state != LEX_KNOWNEXT)
1233 PL_lex_defer = PL_lex_state;
1234 PL_lex_state = LEX_KNOWNEXT;
1235 PL_lex_expect = PL_expect;
1236 curforce = -1;
1237#else
3280af22
NIS
1238 PL_nexttype[PL_nexttoke] = type;
1239 PL_nexttoke++;
1240 if (PL_lex_state != LEX_KNOWNEXT) {
1241 PL_lex_defer = PL_lex_state;
1242 PL_lex_expect = PL_expect;
1243 PL_lex_state = LEX_KNOWNEXT;
79072805 1244 }
5db06880 1245#endif
79072805
LW
1246}
1247
d0a148a6
NC
1248STATIC SV *
1249S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1250{
97aff369 1251 dVAR;
9d4ba2ae 1252 SV * const sv = newSVpvn(start,len);
bfed75c6 1253 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1254 SvUTF8_on(sv);
1255 return sv;
1256}
1257
ffb4593c
NT
1258/*
1259 * S_force_word
1260 * When the lexer knows the next thing is a word (for instance, it has
1261 * just seen -> and it knows that the next char is a word char, then
1262 * it calls S_force_word to stick the next word into the PL_next lookahead.
1263 *
1264 * Arguments:
b1b65b59 1265 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1266 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1267 * int check_keyword : if true, Perl checks to make sure the word isn't
1268 * a keyword (do this if the word is a label, e.g. goto FOO)
1269 * int allow_pack : if true, : characters will also be allowed (require,
1270 * use, etc. do this)
9cbb5ea2 1271 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1272 */
1273
76e3520e 1274STATIC char *
cea2e8a9 1275S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1276{
97aff369 1277 dVAR;
463ee0b2
LW
1278 register char *s;
1279 STRLEN len;
4e553d73 1280
29595ff2 1281 start = SKIPSPACE1(start);
463ee0b2 1282 s = start;
7e2040f0 1283 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1284 (allow_pack && *s == ':') ||
15f0808c 1285 (allow_initial_tick && *s == '\'') )
a0d0e21e 1286 {
3280af22
NIS
1287 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1288 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1289 return start;
5db06880
NC
1290 start_force(curforce);
1291 if (PL_madskills)
1292 curmad('X', newSVpvn(start,s-start));
463ee0b2 1293 if (token == METHOD) {
29595ff2 1294 s = SKIPSPACE1(s);
463ee0b2 1295 if (*s == '(')
3280af22 1296 PL_expect = XTERM;
463ee0b2 1297 else {
3280af22 1298 PL_expect = XOPERATOR;
463ee0b2 1299 }
79072805 1300 }
9ded7720 1301 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1302 = (OP*)newSVOP(OP_CONST,0,
1303 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1304 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1305 force_next(token);
1306 }
1307 return s;
1308}
1309
ffb4593c
NT
1310/*
1311 * S_force_ident
9cbb5ea2 1312 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1313 * text only contains the "foo" portion. The first argument is a pointer
1314 * to the "foo", and the second argument is the type symbol to prefix.
1315 * Forces the next token to be a "WORD".
9cbb5ea2 1316 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1317 */
1318
76e3520e 1319STATIC void
bfed75c6 1320S_force_ident(pTHX_ register const char *s, int kind)
79072805 1321{
97aff369 1322 dVAR;
79072805 1323 if (s && *s) {
90e5519e
NC
1324 const STRLEN len = strlen(s);
1325 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
5db06880 1326 start_force(curforce);
9ded7720 1327 NEXTVAL_NEXTTOKE.opval = o;
79072805 1328 force_next(WORD);
748a9306 1329 if (kind) {
11343788 1330 o->op_private = OPpCONST_ENTERED;
55497cff 1331 /* XXX see note in pp_entereval() for why we forgo typo
1332 warnings if the symbol must be introduced in an eval.
1333 GSAR 96-10-12 */
90e5519e
NC
1334 gv_fetchpvn_flags(s, len,
1335 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1336 : GV_ADD,
1337 kind == '$' ? SVt_PV :
1338 kind == '@' ? SVt_PVAV :
1339 kind == '%' ? SVt_PVHV :
a0d0e21e 1340 SVt_PVGV
90e5519e 1341 );
748a9306 1342 }
79072805
LW
1343 }
1344}
1345
1571675a
GS
1346NV
1347Perl_str_to_version(pTHX_ SV *sv)
1348{
1349 NV retval = 0.0;
1350 NV nshift = 1.0;
1351 STRLEN len;
cfd0369c 1352 const char *start = SvPV_const(sv,len);
9d4ba2ae 1353 const char * const end = start + len;
504618e9 1354 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1355 while (start < end) {
ba210ebe 1356 STRLEN skip;
1571675a
GS
1357 UV n;
1358 if (utf)
9041c2e3 1359 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1360 else {
1361 n = *(U8*)start;
1362 skip = 1;
1363 }
1364 retval += ((NV)n)/nshift;
1365 start += skip;
1366 nshift *= 1000;
1367 }
1368 return retval;
1369}
1370
4e553d73 1371/*
ffb4593c
NT
1372 * S_force_version
1373 * Forces the next token to be a version number.
e759cc13
RGS
1374 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1375 * and if "guessing" is TRUE, then no new token is created (and the caller
1376 * must use an alternative parsing method).
ffb4593c
NT
1377 */
1378
76e3520e 1379STATIC char *
e759cc13 1380S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1381{
97aff369 1382 dVAR;
5f66b61c 1383 OP *version = NULL;
44dcb63b 1384 char *d;
5db06880
NC
1385#ifdef PERL_MAD
1386 I32 startoff = s - SvPVX(PL_linestr);
1387#endif
89bfa8cd 1388
29595ff2 1389 s = SKIPSPACE1(s);
89bfa8cd 1390
44dcb63b 1391 d = s;
dd629d5b 1392 if (*d == 'v')
44dcb63b 1393 d++;
44dcb63b 1394 if (isDIGIT(*d)) {
e759cc13
RGS
1395 while (isDIGIT(*d) || *d == '_' || *d == '.')
1396 d++;
5db06880
NC
1397#ifdef PERL_MAD
1398 if (PL_madskills) {
1399 start_force(curforce);
1400 curmad('X', newSVpvn(s,d-s));
1401 }
1402#endif
9f3d182e 1403 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1404 SV *ver;
b73d6f50 1405 s = scan_num(s, &yylval);
89bfa8cd 1406 version = yylval.opval;
dd629d5b
GS
1407 ver = cSVOPx(version)->op_sv;
1408 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1409 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1410 SvNV_set(ver, str_to_version(ver));
1571675a 1411 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1412 }
89bfa8cd 1413 }
5db06880
NC
1414 else if (guessing) {
1415#ifdef PERL_MAD
1416 if (PL_madskills) {
1417 sv_free(nextwhite); /* let next token collect whitespace */
1418 nextwhite = 0;
1419 s = SvPVX(PL_linestr) + startoff;
1420 }
1421#endif
e759cc13 1422 return s;
5db06880 1423 }
89bfa8cd 1424 }
1425
5db06880
NC
1426#ifdef PERL_MAD
1427 if (PL_madskills && !version) {
1428 sv_free(nextwhite); /* let next token collect whitespace */
1429 nextwhite = 0;
1430 s = SvPVX(PL_linestr) + startoff;
1431 }
1432#endif
89bfa8cd 1433 /* NOTE: The parser sees the package name and the VERSION swapped */
5db06880 1434 start_force(curforce);
9ded7720 1435 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1436 force_next(WORD);
89bfa8cd 1437
e759cc13 1438 return s;
89bfa8cd 1439}
1440
ffb4593c
NT
1441/*
1442 * S_tokeq
1443 * Tokenize a quoted string passed in as an SV. It finds the next
1444 * chunk, up to end of string or a backslash. It may make a new
1445 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1446 * turns \\ into \.
1447 */
1448
76e3520e 1449STATIC SV *
cea2e8a9 1450S_tokeq(pTHX_ SV *sv)
79072805 1451{
97aff369 1452 dVAR;
79072805
LW
1453 register char *s;
1454 register char *send;
1455 register char *d;
b3ac6de7
IZ
1456 STRLEN len = 0;
1457 SV *pv = sv;
79072805
LW
1458
1459 if (!SvLEN(sv))
b3ac6de7 1460 goto finish;
79072805 1461
a0d0e21e 1462 s = SvPV_force(sv, len);
21a311ee 1463 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1464 goto finish;
463ee0b2 1465 send = s + len;
79072805
LW
1466 while (s < send && *s != '\\')
1467 s++;
1468 if (s == send)
b3ac6de7 1469 goto finish;
79072805 1470 d = s;
be4731d2 1471 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1472 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1473 if (SvUTF8(sv))
1474 SvUTF8_on(pv);
1475 }
79072805
LW
1476 while (s < send) {
1477 if (*s == '\\') {
a0d0e21e 1478 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1479 s++; /* all that, just for this */
1480 }
1481 *d++ = *s++;
1482 }
1483 *d = '\0';
95a20fc0 1484 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1485 finish:
3280af22 1486 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1487 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1488 return sv;
1489}
1490
ffb4593c
NT
1491/*
1492 * Now come three functions related to double-quote context,
1493 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1494 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1495 * interact with PL_lex_state, and create fake ( ... ) argument lists
1496 * to handle functions and concatenation.
1497 * They assume that whoever calls them will be setting up a fake
1498 * join call, because each subthing puts a ',' after it. This lets
1499 * "lower \luPpEr"
1500 * become
1501 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1502 *
1503 * (I'm not sure whether the spurious commas at the end of lcfirst's
1504 * arguments and join's arguments are created or not).
1505 */
1506
1507/*
1508 * S_sublex_start
1509 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1510 *
1511 * Pattern matching will set PL_lex_op to the pattern-matching op to
1512 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1513 *
1514 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1515 *
1516 * Everything else becomes a FUNC.
1517 *
1518 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1519 * had an OP_CONST or OP_READLINE). This just sets us up for a
1520 * call to S_sublex_push().
1521 */
1522
76e3520e 1523STATIC I32
cea2e8a9 1524S_sublex_start(pTHX)
79072805 1525{
97aff369 1526 dVAR;
0d46e09a 1527 register const I32 op_type = yylval.ival;
79072805
LW
1528
1529 if (op_type == OP_NULL) {
3280af22 1530 yylval.opval = PL_lex_op;
5f66b61c 1531 PL_lex_op = NULL;
79072805
LW
1532 return THING;
1533 }
1534 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1535 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1536
1537 if (SvTYPE(sv) == SVt_PVIV) {
1538 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1539 STRLEN len;
96a5add6 1540 const char * const p = SvPV_const(sv, len);
f54cb97a 1541 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1542 if (SvUTF8(sv))
1543 SvUTF8_on(nsv);
b3ac6de7
IZ
1544 SvREFCNT_dec(sv);
1545 sv = nsv;
4e553d73 1546 }
b3ac6de7 1547 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1548 PL_lex_stuff = NULL;
6f33ba73
RGS
1549 /* Allow <FH> // "foo" */
1550 if (op_type == OP_READLINE)
1551 PL_expect = XTERMORDORDOR;
79072805
LW
1552 return THING;
1553 }
1554
3280af22
NIS
1555 PL_sublex_info.super_state = PL_lex_state;
1556 PL_sublex_info.sub_inwhat = op_type;
1557 PL_sublex_info.sub_op = PL_lex_op;
1558 PL_lex_state = LEX_INTERPPUSH;
55497cff 1559
3280af22
NIS
1560 PL_expect = XTERM;
1561 if (PL_lex_op) {
1562 yylval.opval = PL_lex_op;
5f66b61c 1563 PL_lex_op = NULL;
55497cff 1564 return PMFUNC;
1565 }
1566 else
1567 return FUNC;
1568}
1569
ffb4593c
NT
1570/*
1571 * S_sublex_push
1572 * Create a new scope to save the lexing state. The scope will be
1573 * ended in S_sublex_done. Returns a '(', starting the function arguments
1574 * to the uc, lc, etc. found before.
1575 * Sets PL_lex_state to LEX_INTERPCONCAT.
1576 */
1577
76e3520e 1578STATIC I32
cea2e8a9 1579S_sublex_push(pTHX)
55497cff 1580{
27da23d5 1581 dVAR;
f46d017c 1582 ENTER;
55497cff 1583
3280af22
NIS
1584 PL_lex_state = PL_sublex_info.super_state;
1585 SAVEI32(PL_lex_dojoin);
1586 SAVEI32(PL_lex_brackets);
3280af22
NIS
1587 SAVEI32(PL_lex_casemods);
1588 SAVEI32(PL_lex_starts);
1589 SAVEI32(PL_lex_state);
7766f137 1590 SAVEVPTR(PL_lex_inpat);
3280af22 1591 SAVEI32(PL_lex_inwhat);
57843af0 1592 SAVECOPLINE(PL_curcop);
3280af22 1593 SAVEPPTR(PL_bufptr);
8452ff4b 1594 SAVEPPTR(PL_bufend);
3280af22
NIS
1595 SAVEPPTR(PL_oldbufptr);
1596 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1597 SAVEPPTR(PL_last_lop);
1598 SAVEPPTR(PL_last_uni);
3280af22
NIS
1599 SAVEPPTR(PL_linestart);
1600 SAVESPTR(PL_linestr);
8edd5f42
RGS
1601 SAVEGENERICPV(PL_lex_brackstack);
1602 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1603
1604 PL_linestr = PL_lex_stuff;
a0714e2c 1605 PL_lex_stuff = NULL;
3280af22 1606
9cbb5ea2
GS
1607 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1608 = SvPVX(PL_linestr);
3280af22 1609 PL_bufend += SvCUR(PL_linestr);
bd61b366 1610 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1611 SAVEFREESV(PL_linestr);
1612
1613 PL_lex_dojoin = FALSE;
1614 PL_lex_brackets = 0;
a02a5408
JC
1615 Newx(PL_lex_brackstack, 120, char);
1616 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1617 PL_lex_casemods = 0;
1618 *PL_lex_casestack = '\0';
1619 PL_lex_starts = 0;
1620 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1621 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1622
1623 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1624 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1625 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1626 else
5f66b61c 1627 PL_lex_inpat = NULL;
79072805 1628
55497cff 1629 return '(';
79072805
LW
1630}
1631
ffb4593c
NT
1632/*
1633 * S_sublex_done
1634 * Restores lexer state after a S_sublex_push.
1635 */
1636
76e3520e 1637STATIC I32
cea2e8a9 1638S_sublex_done(pTHX)
79072805 1639{
27da23d5 1640 dVAR;
3280af22 1641 if (!PL_lex_starts++) {
396482e1 1642 SV * const sv = newSVpvs("");
9aa983d2
JH
1643 if (SvUTF8(PL_linestr))
1644 SvUTF8_on(sv);
3280af22 1645 PL_expect = XOPERATOR;
9aa983d2 1646 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1647 return THING;
1648 }
1649
3280af22
NIS
1650 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1651 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1652 return yylex();
79072805
LW
1653 }
1654
ffb4593c 1655 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1656 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1657 PL_linestr = PL_lex_repl;
1658 PL_lex_inpat = 0;
1659 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1660 PL_bufend += SvCUR(PL_linestr);
bd61b366 1661 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1662 SAVEFREESV(PL_linestr);
1663 PL_lex_dojoin = FALSE;
1664 PL_lex_brackets = 0;
3280af22
NIS
1665 PL_lex_casemods = 0;
1666 *PL_lex_casestack = '\0';
1667 PL_lex_starts = 0;
25da4f38 1668 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1669 PL_lex_state = LEX_INTERPNORMAL;
1670 PL_lex_starts++;
e9fa98b2
HS
1671 /* we don't clear PL_lex_repl here, so that we can check later
1672 whether this is an evalled subst; that means we rely on the
1673 logic to ensure sublex_done() is called again only via the
1674 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1675 }
e9fa98b2 1676 else {
3280af22 1677 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1678 PL_lex_repl = NULL;
e9fa98b2 1679 }
79072805 1680 return ',';
ffed7fef
LW
1681 }
1682 else {
5db06880
NC
1683#ifdef PERL_MAD
1684 if (PL_madskills) {
1685 if (thiswhite) {
1686 if (!endwhite)
1687 endwhite = newSVpvn("",0);
1688 sv_catsv(endwhite, thiswhite);
1689 thiswhite = 0;
1690 }
1691 if (thistoken)
1692 sv_setpvn(thistoken,"",0);
1693 else
1694 realtokenstart = -1;
1695 }
1696#endif
f46d017c 1697 LEAVE;
3280af22
NIS
1698 PL_bufend = SvPVX(PL_linestr);
1699 PL_bufend += SvCUR(PL_linestr);
1700 PL_expect = XOPERATOR;
09bef843 1701 PL_sublex_info.sub_inwhat = 0;
79072805 1702 return ')';
ffed7fef
LW
1703 }
1704}
1705
02aa26ce
NT
1706/*
1707 scan_const
1708
1709 Extracts a pattern, double-quoted string, or transliteration. This
1710 is terrifying code.
1711
3280af22
NIS
1712 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1713 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1714 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1715
9b599b2a
GS
1716 Returns a pointer to the character scanned up to. Iff this is
1717 advanced from the start pointer supplied (ie if anything was
1718 successfully parsed), will leave an OP for the substring scanned
1719 in yylval. Caller must intuit reason for not parsing further
1720 by looking at the next characters herself.
1721
02aa26ce
NT
1722 In patterns:
1723 backslashes:
1724 double-quoted style: \r and \n
1725 regexp special ones: \D \s
1726 constants: \x3
1727 backrefs: \1 (deprecated in substitution replacements)
1728 case and quoting: \U \Q \E
1729 stops on @ and $, but not for $ as tail anchor
1730
1731 In transliterations:
1732 characters are VERY literal, except for - not at the start or end
1733 of the string, which indicates a range. scan_const expands the
1734 range to the full set of intermediate characters.
1735
1736 In double-quoted strings:
1737 backslashes:
1738 double-quoted style: \r and \n
1739 constants: \x3
1740 backrefs: \1 (deprecated)
1741 case and quoting: \U \Q \E
1742 stops on @ and $
1743
1744 scan_const does *not* construct ops to handle interpolated strings.
1745 It stops processing as soon as it finds an embedded $ or @ variable
1746 and leaves it to the caller to work out what's going on.
1747
da6eedaa 1748 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1749
1750 $ in pattern could be $foo or could be tail anchor. Assumption:
1751 it's a tail anchor if $ is the last thing in the string, or if it's
1752 followed by one of ")| \n\t"
1753
1754 \1 (backreferences) are turned into $1
1755
1756 The structure of the code is
1757 while (there's a character to process) {
1758 handle transliteration ranges
1759 skip regexp comments
1760 skip # initiated comments in //x patterns
1761 check for embedded @foo
1762 check for embedded scalars
1763 if (backslash) {
1764 leave intact backslashes from leave (below)
1765 deprecate \1 in strings and sub replacements
1766 handle string-changing backslashes \l \U \Q \E, etc.
1767 switch (what was escaped) {
1768 handle - in a transliteration (becomes a literal -)
1769 handle \132 octal characters
1770 handle 0x15 hex characters
1771 handle \cV (control V)
1772 handle printf backslashes (\f, \r, \n, etc)
1773 } (end switch)
1774 } (end if backslash)
1775 } (end while character to read)
4e553d73 1776
02aa26ce
NT
1777*/
1778
76e3520e 1779STATIC char *
cea2e8a9 1780S_scan_const(pTHX_ char *start)
79072805 1781{
97aff369 1782 dVAR;
3280af22 1783 register char *send = PL_bufend; /* end of the constant */
561b68a9 1784 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1785 register char *s = start; /* start of the constant */
1786 register char *d = SvPVX(sv); /* destination for copies */
1787 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1788 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1789 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1790 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1791 UV uv;
4c3a8340
TS
1792#ifdef EBCDIC
1793 UV literal_endpoint = 0;
1794#endif
012bcf8d 1795
dff6d3cd 1796 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1797 PL_lex_inpat
b6d5fef8 1798 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1799 : "";
79072805 1800
2b9d42f0
NIS
1801 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1802 /* If we are doing a trans and we know we want UTF8 set expectation */
1803 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1804 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1805 }
1806
1807
79072805 1808 while (s < send || dorange) {
02aa26ce 1809 /* get transliterations out of the way (they're most literal) */
3280af22 1810 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1811 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1812 if (dorange) {
1ba5c669
JH
1813 I32 i; /* current expanded character */
1814 I32 min; /* first character in range */
1815 I32 max; /* last character in range */
02aa26ce 1816
2b9d42f0 1817 if (has_utf8) {
9d4ba2ae 1818 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1819 char *e = d++;
1820 while (e-- > c)
1821 *(e + 1) = *e;
25716404 1822 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1823 /* mark the range as done, and continue */
1824 dorange = FALSE;
1825 didrange = TRUE;
1826 continue;
1827 }
2b9d42f0 1828
95a20fc0 1829 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1830 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1831 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1832 d -= 2; /* eat the first char and the - */
1833
8ada0baa
JH
1834 min = (U8)*d; /* first char in range */
1835 max = (U8)d[1]; /* last char in range */
1836
c2e66d9e 1837 if (min > max) {
01ec43d0 1838 Perl_croak(aTHX_
d1573ac7 1839 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1840 (char)min, (char)max);
c2e66d9e
GS
1841 }
1842
c7f1f016 1843#ifdef EBCDIC
4c3a8340
TS
1844 if (literal_endpoint == 2 &&
1845 ((isLOWER(min) && isLOWER(max)) ||
1846 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1847 if (isLOWER(min)) {
1848 for (i = min; i <= max; i++)
1849 if (isLOWER(i))
db42d148 1850 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1851 } else {
1852 for (i = min; i <= max; i++)
1853 if (isUPPER(i))
db42d148 1854 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1855 }
1856 }
1857 else
1858#endif
1859 for (i = min; i <= max; i++)
eb160463 1860 *d++ = (char)i;
02aa26ce
NT
1861
1862 /* mark the range as done, and continue */
79072805 1863 dorange = FALSE;
01ec43d0 1864 didrange = TRUE;
4c3a8340
TS
1865#ifdef EBCDIC
1866 literal_endpoint = 0;
1867#endif
79072805 1868 continue;
4e553d73 1869 }
02aa26ce
NT
1870
1871 /* range begins (ignore - as first or last char) */
79072805 1872 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1873 if (didrange) {
1fafa243 1874 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1875 }
2b9d42f0 1876 if (has_utf8) {
25716404 1877 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1878 s++;
1879 continue;
1880 }
79072805
LW
1881 dorange = TRUE;
1882 s++;
01ec43d0
GS
1883 }
1884 else {
1885 didrange = FALSE;
4c3a8340
TS
1886#ifdef EBCDIC
1887 literal_endpoint = 0;
1888#endif
01ec43d0 1889 }
79072805 1890 }
02aa26ce
NT
1891
1892 /* if we get here, we're not doing a transliteration */
1893
0f5d15d6
IZ
1894 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1895 except for the last char, which will be done separately. */
3280af22 1896 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1897 if (s[2] == '#') {
e994fd66 1898 while (s+1 < send && *s != ')')
db42d148 1899 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1900 }
1901 else if (s[2] == '{' /* This should match regcomp.c */
1902 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1903 {
cc6b7395 1904 I32 count = 1;
0f5d15d6 1905 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1906 char c;
1907
d9f97599
GS
1908 while (count && (c = *regparse)) {
1909 if (c == '\\' && regparse[1])
1910 regparse++;
4e553d73 1911 else if (c == '{')
cc6b7395 1912 count++;
4e553d73 1913 else if (c == '}')
cc6b7395 1914 count--;
d9f97599 1915 regparse++;
cc6b7395 1916 }
e994fd66 1917 if (*regparse != ')')
5bdf89e7 1918 regparse--; /* Leave one char for continuation. */
0f5d15d6 1919 while (s < regparse)
db42d148 1920 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1921 }
748a9306 1922 }
02aa26ce
NT
1923
1924 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1925 else if (*s == '#' && PL_lex_inpat &&
1926 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1927 while (s+1 < send && *s != '\n')
db42d148 1928 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1929 }
02aa26ce 1930
5d1d4326 1931 /* check for embedded arrays
da6eedaa 1932 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1933 */
7e2040f0 1934 else if (*s == '@' && s[1]
5d1d4326 1935 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1936 break;
02aa26ce
NT
1937
1938 /* check for embedded scalars. only stop if we're sure it's a
1939 variable.
1940 */
79072805 1941 else if (*s == '$') {
3280af22 1942 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1943 break;
6002328a 1944 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1945 break; /* in regexp, $ might be tail anchor */
1946 }
02aa26ce 1947
2b9d42f0
NIS
1948 /* End of else if chain - OP_TRANS rejoin rest */
1949
02aa26ce 1950 /* backslashes */
79072805
LW
1951 if (*s == '\\' && s+1 < send) {
1952 s++;
02aa26ce
NT
1953
1954 /* some backslashes we leave behind */
c9f97d15 1955 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1956 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1957 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1958 continue;
1959 }
02aa26ce
NT
1960
1961 /* deprecate \1 in strings and substitution replacements */
3280af22 1962 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1963 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1964 {
599cee73 1965 if (ckWARN(WARN_SYNTAX))
9014280d 1966 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1967 *--s = '$';
1968 break;
1969 }
02aa26ce
NT
1970
1971 /* string-change backslash escapes */
3280af22 1972 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1973 --s;
1974 break;
1975 }
02aa26ce
NT
1976
1977 /* if we get here, it's either a quoted -, or a digit */
79072805 1978 switch (*s) {
02aa26ce
NT
1979
1980 /* quoted - in transliterations */
79072805 1981 case '-':
3280af22 1982 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1983 *d++ = *s++;
1984 continue;
1985 }
1986 /* FALL THROUGH */
1987 default:
11b8faa4 1988 {
041457d9
DM
1989 if (isALNUM(*s) &&
1990 *s != '_' &&
1991 ckWARN(WARN_MISC))
9014280d 1992 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1993 "Unrecognized escape \\%c passed through",
1994 *s);
1995 /* default action is to copy the quoted character */
f9a63242 1996 goto default_action;
11b8faa4 1997 }
02aa26ce
NT
1998
1999 /* \132 indicates an octal constant */
79072805
LW
2000 case '0': case '1': case '2': case '3':
2001 case '4': case '5': case '6': case '7':
ba210ebe 2002 {
53305cf1
NC
2003 I32 flags = 0;
2004 STRLEN len = 3;
2005 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2006 s += len;
2007 }
012bcf8d 2008 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2009
2010 /* \x24 indicates a hex constant */
79072805 2011 case 'x':
a0ed51b3
LW
2012 ++s;
2013 if (*s == '{') {
9d4ba2ae 2014 char* const e = strchr(s, '}');
a4c04bdc
NC
2015 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2016 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2017 STRLEN len;
355860ce 2018
53305cf1 2019 ++s;
adaeee49 2020 if (!e) {
a0ed51b3 2021 yyerror("Missing right brace on \\x{}");
355860ce 2022 continue;
ba210ebe 2023 }
53305cf1
NC
2024 len = e - s;
2025 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2026 s = e + 1;
a0ed51b3
LW
2027 }
2028 else {
ba210ebe 2029 {
53305cf1 2030 STRLEN len = 2;
a4c04bdc 2031 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2032 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2033 s += len;
2034 }
012bcf8d
GS
2035 }
2036
2037 NUM_ESCAPE_INSERT:
2038 /* Insert oct or hex escaped character.
301d3d20 2039 * There will always enough room in sv since such
db42d148 2040 * escapes will be longer than any UTF-8 sequence
301d3d20 2041 * they can end up as. */
ba7cea30 2042
c7f1f016
NIS
2043 /* We need to map to chars to ASCII before doing the tests
2044 to cover EBCDIC
2045 */
c4d5f83a 2046 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2047 if (!has_utf8 && uv > 255) {
301d3d20
JH
2048 /* Might need to recode whatever we have
2049 * accumulated so far if it contains any
2050 * hibit chars.
2051 *
2052 * (Can't we keep track of that and avoid
2053 * this rescan? --jhi)
012bcf8d 2054 */
c7f1f016 2055 int hicount = 0;
63cd0674
NIS
2056 U8 *c;
2057 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2058 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2059 hicount++;
db42d148 2060 }
012bcf8d 2061 }
63cd0674 2062 if (hicount) {
9d4ba2ae 2063 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2064 U8 *src, *dst;
2065 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2066 src = (U8 *)d - 1;
2067 dst = src+hicount;
2068 d += hicount;
cfd0369c 2069 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2070 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2071 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2072 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2073 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2074 }
2075 else {
63cd0674 2076 *dst-- = *src;
012bcf8d 2077 }
c7f1f016 2078 src--;
012bcf8d
GS
2079 }
2080 }
2081 }
2082
9aa983d2 2083 if (has_utf8 || uv > 255) {
9041c2e3 2084 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2085 has_utf8 = TRUE;
f9a63242
JH
2086 if (PL_lex_inwhat == OP_TRANS &&
2087 PL_sublex_info.sub_op) {
2088 PL_sublex_info.sub_op->op_private |=
2089 (PL_lex_repl ? OPpTRANS_FROM_UTF
2090 : OPpTRANS_TO_UTF);
f9a63242 2091 }
012bcf8d 2092 }
a0ed51b3 2093 else {
012bcf8d 2094 *d++ = (char)uv;
a0ed51b3 2095 }
012bcf8d
GS
2096 }
2097 else {
c4d5f83a 2098 *d++ = (char) uv;
a0ed51b3 2099 }
79072805 2100 continue;
02aa26ce 2101
b239daa5 2102 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2103 case 'N':
55eda711 2104 ++s;
423cee85
JH
2105 if (*s == '{') {
2106 char* e = strchr(s, '}');
155aba94 2107 SV *res;
423cee85 2108 STRLEN len;
cfd0369c 2109 const char *str;
4e553d73 2110
423cee85 2111 if (!e) {
5777a3f7 2112 yyerror("Missing right brace on \\N{}");
423cee85
JH
2113 e = s - 1;
2114 goto cont_scan;
2115 }
dbc0d4f2
JH
2116 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2117 /* \N{U+...} */
2118 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2119 PERL_SCAN_DISALLOW_PREFIX;
2120 s += 3;
2121 len = e - s;
2122 uv = grok_hex(s, &len, &flags, NULL);
2123 s = e + 1;
2124 goto NUM_ESCAPE_INSERT;
2125 }
55eda711 2126 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2127 res = new_constant( NULL, 0, "charnames",
a0714e2c 2128 res, NULL, "\\N{...}" );
f9a63242
JH
2129 if (has_utf8)
2130 sv_utf8_upgrade(res);
cfd0369c 2131 str = SvPV_const(res,len);
1c47067b
JH
2132#ifdef EBCDIC_NEVER_MIND
2133 /* charnames uses pack U and that has been
2134 * recently changed to do the below uni->native
2135 * mapping, so this would be redundant (and wrong,
2136 * the code point would be doubly converted).
2137 * But leave this in just in case the pack U change
2138 * gets revoked, but the semantics is still
2139 * desireable for charnames. --jhi */
cddc7ef4 2140 {
cfd0369c 2141 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2142
2143 if (uv < 0x100) {
89ebb4a3 2144 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2145
2146 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2147 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2148 str = SvPV_const(res, len);
cddc7ef4
JH
2149 }
2150 }
2151#endif
89491803 2152 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2153 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2154 SvCUR_set(sv, d - ostart);
2155 SvPOK_on(sv);
e4f3eed8 2156 *d = '\0';
f08d6ad9 2157 sv_utf8_upgrade(sv);
d2f449dd 2158 /* this just broke our allocation above... */
eb160463 2159 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2160 d = SvPVX(sv) + SvCUR(sv);
89491803 2161 has_utf8 = TRUE;
f08d6ad9 2162 }
eb160463 2163 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2164 const char * const odest = SvPVX_const(sv);
423cee85 2165
8973db79 2166 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2167 d = SvPVX(sv) + (d - odest);
2168 }
2169 Copy(str, d, len, char);
2170 d += len;
2171 SvREFCNT_dec(res);
2172 cont_scan:
2173 s = e + 1;
2174 }
2175 else
5777a3f7 2176 yyerror("Missing braces on \\N{}");
423cee85
JH
2177 continue;
2178
02aa26ce 2179 /* \c is a control character */
79072805
LW
2180 case 'c':
2181 s++;
961ce445 2182 if (s < send) {
ba210ebe 2183 U8 c = *s++;
c7f1f016
NIS
2184#ifdef EBCDIC
2185 if (isLOWER(c))
2186 c = toUPPER(c);
2187#endif
db42d148 2188 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2189 }
961ce445
RGS
2190 else {
2191 yyerror("Missing control char name in \\c");
2192 }
79072805 2193 continue;
02aa26ce
NT
2194
2195 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2196 case 'b':
db42d148 2197 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2198 break;
2199 case 'n':
db42d148 2200 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2201 break;
2202 case 'r':
db42d148 2203 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2204 break;
2205 case 'f':
db42d148 2206 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2207 break;
2208 case 't':
db42d148 2209 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2210 break;
34a3fe2a 2211 case 'e':
db42d148 2212 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2213 break;
2214 case 'a':
db42d148 2215 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2216 break;
02aa26ce
NT
2217 } /* end switch */
2218
79072805
LW
2219 s++;
2220 continue;
02aa26ce 2221 } /* end if (backslash) */
4c3a8340
TS
2222#ifdef EBCDIC
2223 else
2224 literal_endpoint++;
2225#endif
02aa26ce 2226
f9a63242 2227 default_action:
2b9d42f0
NIS
2228 /* If we started with encoded form, or already know we want it
2229 and then encode the next character */
2230 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2231 STRLEN len = 1;
5f66b61c
AL
2232 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2233 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2234 s += len;
2235 if (need > len) {
2236 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2237 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2238 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2239 }
5f66b61c 2240 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2241 has_utf8 = TRUE;
2242 }
2243 else {
2244 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2245 }
02aa26ce
NT
2246 } /* while loop to process each character */
2247
2248 /* terminate the string and set up the sv */
79072805 2249 *d = '\0';
95a20fc0 2250 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2251 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2252 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2253
79072805 2254 SvPOK_on(sv);
9f4817db 2255 if (PL_encoding && !has_utf8) {
d0063567
DK
2256 sv_recode_to_utf8(sv, PL_encoding);
2257 if (SvUTF8(sv))
2258 has_utf8 = TRUE;
9f4817db 2259 }
2b9d42f0 2260 if (has_utf8) {
7e2040f0 2261 SvUTF8_on(sv);
2b9d42f0 2262 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2263 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2264 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2265 }
2266 }
79072805 2267
02aa26ce 2268 /* shrink the sv if we allocated more than we used */
79072805 2269 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2270 SvPV_shrink_to_cur(sv);
79072805 2271 }
02aa26ce 2272
9b599b2a 2273 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2274 if (s > PL_bufptr) {
2275 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2276 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2277 sv, NULL,
4e553d73 2278 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2279 ? "tr"
3280af22 2280 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2281 ? "s"
2282 : "qq")));
79072805 2283 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2284 } else
8990e307 2285 SvREFCNT_dec(sv);
79072805
LW
2286 return s;
2287}
2288
ffb4593c
NT
2289/* S_intuit_more
2290 * Returns TRUE if there's more to the expression (e.g., a subscript),
2291 * FALSE otherwise.
ffb4593c
NT
2292 *
2293 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2294 *
2295 * ->[ and ->{ return TRUE
2296 * { and [ outside a pattern are always subscripts, so return TRUE
2297 * if we're outside a pattern and it's not { or [, then return FALSE
2298 * if we're in a pattern and the first char is a {
2299 * {4,5} (any digits around the comma) returns FALSE
2300 * if we're in a pattern and the first char is a [
2301 * [] returns FALSE
2302 * [SOMETHING] has a funky algorithm to decide whether it's a
2303 * character class or not. It has to deal with things like
2304 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2305 * anything else returns TRUE
2306 */
2307
9cbb5ea2
GS
2308/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2309
76e3520e 2310STATIC int
cea2e8a9 2311S_intuit_more(pTHX_ register char *s)
79072805 2312{
97aff369 2313 dVAR;
3280af22 2314 if (PL_lex_brackets)
79072805
LW
2315 return TRUE;
2316 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2317 return TRUE;
2318 if (*s != '{' && *s != '[')
2319 return FALSE;
3280af22 2320 if (!PL_lex_inpat)
79072805
LW
2321 return TRUE;
2322
2323 /* In a pattern, so maybe we have {n,m}. */
2324 if (*s == '{') {
2325 s++;
2326 if (!isDIGIT(*s))
2327 return TRUE;
2328 while (isDIGIT(*s))
2329 s++;
2330 if (*s == ',')
2331 s++;
2332 while (isDIGIT(*s))
2333 s++;
2334 if (*s == '}')
2335 return FALSE;
2336 return TRUE;
2337
2338 }
2339
2340 /* On the other hand, maybe we have a character class */
2341
2342 s++;
2343 if (*s == ']' || *s == '^')
2344 return FALSE;
2345 else {
ffb4593c 2346 /* this is terrifying, and it works */
79072805
LW
2347 int weight = 2; /* let's weigh the evidence */
2348 char seen[256];
f27ffc4a 2349 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2350 const char * const send = strchr(s,']');
3280af22 2351 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2352
2353 if (!send) /* has to be an expression */
2354 return TRUE;
2355
2356 Zero(seen,256,char);
2357 if (*s == '$')
2358 weight -= 3;
2359 else if (isDIGIT(*s)) {
2360 if (s[1] != ']') {
2361 if (isDIGIT(s[1]) && s[2] == ']')
2362 weight -= 10;
2363 }
2364 else
2365 weight -= 100;
2366 }
2367 for (; s < send; s++) {
2368 last_un_char = un_char;
2369 un_char = (unsigned char)*s;
2370 switch (*s) {
2371 case '@':
2372 case '&':
2373 case '$':
2374 weight -= seen[un_char] * 10;
7e2040f0 2375 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2376 int len;
8903cb82 2377 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2378 len = (int)strlen(tmpbuf);
2379 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2380 weight -= 100;
2381 else
2382 weight -= 10;
2383 }
2384 else if (*s == '$' && s[1] &&
93a17b20
LW
2385 strchr("[#!%*<>()-=",s[1])) {
2386 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2387 weight -= 10;
2388 else
2389 weight -= 1;
2390 }
2391 break;
2392 case '\\':
2393 un_char = 254;
2394 if (s[1]) {
93a17b20 2395 if (strchr("wds]",s[1]))
79072805
LW
2396 weight += 100;
2397 else if (seen['\''] || seen['"'])
2398 weight += 1;
93a17b20 2399 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2400 weight += 40;
2401 else if (isDIGIT(s[1])) {
2402 weight += 40;
2403 while (s[1] && isDIGIT(s[1]))
2404 s++;
2405 }
2406 }
2407 else
2408 weight += 100;
2409 break;
2410 case '-':
2411 if (s[1] == '\\')
2412 weight += 50;
93a17b20 2413 if (strchr("aA01! ",last_un_char))
79072805 2414 weight += 30;
93a17b20 2415 if (strchr("zZ79~",s[1]))
79072805 2416 weight += 30;
f27ffc4a
GS
2417 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2418 weight -= 5; /* cope with negative subscript */
79072805
LW
2419 break;
2420 default:
3792a11b
NC
2421 if (!isALNUM(last_un_char)
2422 && !(last_un_char == '$' || last_un_char == '@'
2423 || last_un_char == '&')
2424 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2425 char *d = tmpbuf;
2426 while (isALPHA(*s))
2427 *d++ = *s++;
2428 *d = '\0';
2429 if (keyword(tmpbuf, d - tmpbuf))
2430 weight -= 150;
2431 }
2432 if (un_char == last_un_char + 1)
2433 weight += 5;
2434 weight -= seen[un_char];
2435 break;
2436 }
2437 seen[un_char]++;
2438 }
2439 if (weight >= 0) /* probably a character class */
2440 return FALSE;
2441 }
2442
2443 return TRUE;
2444}
ffed7fef 2445
ffb4593c
NT
2446/*
2447 * S_intuit_method
2448 *
2449 * Does all the checking to disambiguate
2450 * foo bar
2451 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2452 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2453 *
2454 * First argument is the stuff after the first token, e.g. "bar".
2455 *
2456 * Not a method if bar is a filehandle.
2457 * Not a method if foo is a subroutine prototyped to take a filehandle.
2458 * Not a method if it's really "Foo $bar"
2459 * Method if it's "foo $bar"
2460 * Not a method if it's really "print foo $bar"
2461 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2462 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2463 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2464 * =>
2465 */
2466
76e3520e 2467STATIC int
62d55b22 2468S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2469{
97aff369 2470 dVAR;
a0d0e21e 2471 char *s = start + (*start == '$');
3280af22 2472 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2473 STRLEN len;
2474 GV* indirgv;
5db06880
NC
2475#ifdef PERL_MAD
2476 int soff;
2477#endif
a0d0e21e
LW
2478
2479 if (gv) {
62d55b22 2480 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2481 return 0;
62d55b22
NC
2482 if (cv) {
2483 if (SvPOK(cv)) {
2484 const char *proto = SvPVX_const(cv);
2485 if (proto) {
2486 if (*proto == ';')
2487 proto++;
2488 if (*proto == '*')
2489 return 0;
2490 }
b6c543e3
IZ
2491 }
2492 } else
a0d0e21e
LW
2493 gv = 0;
2494 }
8903cb82 2495 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2496 /* start is the beginning of the possible filehandle/object,
2497 * and s is the end of it
2498 * tmpbuf is a copy of it
2499 */
2500
a0d0e21e 2501 if (*start == '$') {
3280af22 2502 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2503 return 0;
5db06880
NC
2504#ifdef PERL_MAD
2505 len = start - SvPVX(PL_linestr);
2506#endif
29595ff2 2507 s = PEEKSPACE(s);
5db06880
NC
2508#ifdef PERLMAD
2509 start = SvPVX(PL_linestr) + len;
2510#endif
3280af22
NIS
2511 PL_bufptr = start;
2512 PL_expect = XREF;
a0d0e21e
LW
2513 return *s == '(' ? FUNCMETH : METHOD;
2514 }
2515 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2516 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2517 len -= 2;
2518 tmpbuf[len] = '\0';
5db06880
NC
2519#ifdef PERL_MAD
2520 soff = s - SvPVX(PL_linestr);
2521#endif
c3e0f903
GS
2522 goto bare_package;
2523 }
90e5519e 2524 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2525 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2526 return 0;
2527 /* filehandle or package name makes it a method */
89bfa8cd 2528 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2529#ifdef PERL_MAD
2530 soff = s - SvPVX(PL_linestr);
2531#endif
29595ff2 2532 s = PEEKSPACE(s);
3280af22 2533 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2534 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2535 bare_package:
5db06880 2536 start_force(curforce);
9ded7720 2537 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2538 newSVpvn(tmpbuf,len));
9ded7720 2539 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2540 if (PL_madskills)
2541 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2542 PL_expect = XTERM;
a0d0e21e 2543 force_next(WORD);
3280af22 2544 PL_bufptr = s;
5db06880
NC
2545#ifdef PERL_MAD
2546 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2547#endif
a0d0e21e
LW
2548 return *s == '(' ? FUNCMETH : METHOD;
2549 }
2550 }
2551 return 0;
2552}
2553
ffb4593c
NT
2554/*
2555 * S_incl_perldb
2556 * Return a string of Perl code to load the debugger. If PERL5DB
2557 * is set, it will return the contents of that, otherwise a
2558 * compile-time require of perl5db.pl.
2559 */
2560
bfed75c6 2561STATIC const char*
cea2e8a9 2562S_incl_perldb(pTHX)
a0d0e21e 2563{
97aff369 2564 dVAR;
3280af22 2565 if (PL_perldb) {
9d4ba2ae 2566 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2567
2568 if (pdb)
2569 return pdb;
93189314 2570 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2571 return "BEGIN { require 'perl5db.pl' }";
2572 }
2573 return "";
2574}
2575
2576
16d20bd9 2577/* Encoded script support. filter_add() effectively inserts a
4e553d73 2578 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2579 * Note that the filter function only applies to the current source file
2580 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2581 *
2582 * The datasv parameter (which may be NULL) can be used to pass
2583 * private data to this instance of the filter. The filter function
2584 * can recover the SV using the FILTER_DATA macro and use it to
2585 * store private buffers and state information.
2586 *
2587 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2588 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2589 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2590 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2591 * private use must be set using malloc'd pointers.
2592 */
16d20bd9
AD
2593
2594SV *
864dbfa3 2595Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2596{
97aff369 2597 dVAR;
f4c556ac 2598 if (!funcp)
a0714e2c 2599 return NULL;
f4c556ac 2600
3280af22
NIS
2601 if (!PL_rsfp_filters)
2602 PL_rsfp_filters = newAV();
16d20bd9 2603 if (!datasv)
561b68a9 2604 datasv = newSV(0);
862a34c6 2605 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2606 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2607 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2608 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2609 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2610 av_unshift(PL_rsfp_filters, 1);
2611 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2612 return(datasv);
2613}
4e553d73 2614
16d20bd9
AD
2615
2616/* Delete most recently added instance of this filter function. */
a0d0e21e 2617void
864dbfa3 2618Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2619{
97aff369 2620 dVAR;
e0c19803 2621 SV *datasv;
24801a4b 2622
33073adb 2623#ifdef DEBUGGING
8141890a 2624 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2625#endif
3280af22 2626 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2627 return;
2628 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2629 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2630 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2631 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2632 IoANY(datasv) = (void *)NULL;
3280af22 2633 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2634
16d20bd9
AD
2635 return;
2636 }
2637 /* we need to search for the correct entry and clear it */
cea2e8a9 2638 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2639}
2640
2641
1de9afcd
RGS
2642/* Invoke the idxth filter function for the current rsfp. */
2643/* maxlen 0 = read one text line */
16d20bd9 2644I32
864dbfa3 2645Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2646{
97aff369 2647 dVAR;
16d20bd9
AD
2648 filter_t funcp;
2649 SV *datasv = NULL;
e50aee73 2650
3280af22 2651 if (!PL_rsfp_filters)
16d20bd9 2652 return -1;
1de9afcd 2653 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2654 /* Provide a default input filter to make life easy. */
2655 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2656 DEBUG_P(PerlIO_printf(Perl_debug_log,
2657 "filter_read %d: from rsfp\n", idx));
4e553d73 2658 if (maxlen) {
16d20bd9
AD
2659 /* Want a block */
2660 int len ;
f54cb97a 2661 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2662
2663 /* ensure buf_sv is large enough */
eb160463 2664 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2665 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2666 if (PerlIO_error(PL_rsfp))
37120919
AD
2667 return -1; /* error */
2668 else
2669 return 0 ; /* end of file */
2670 }
16d20bd9
AD
2671 SvCUR_set(buf_sv, old_len + len) ;
2672 } else {
2673 /* Want a line */
3280af22
NIS
2674 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2675 if (PerlIO_error(PL_rsfp))
37120919
AD
2676 return -1; /* error */
2677 else
2678 return 0 ; /* end of file */
2679 }
16d20bd9
AD
2680 }
2681 return SvCUR(buf_sv);
2682 }
2683 /* Skip this filter slot if filter has been deleted */
1de9afcd 2684 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2685 DEBUG_P(PerlIO_printf(Perl_debug_log,
2686 "filter_read %d: skipped (filter deleted)\n",
2687 idx));
16d20bd9
AD
2688 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2689 }
2690 /* Get function pointer hidden within datasv */
8141890a 2691 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2692 DEBUG_P(PerlIO_printf(Perl_debug_log,
2693 "filter_read %d: via function %p (%s)\n",
cfd0369c 2694 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2695 /* Call function. The function is expected to */
2696 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2697 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2698 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2699}
2700
76e3520e 2701STATIC char *
cea2e8a9 2702S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2703{
97aff369 2704 dVAR;
c39cd008 2705#ifdef PERL_CR_FILTER
3280af22 2706 if (!PL_rsfp_filters) {
c39cd008 2707 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2708 }
2709#endif
3280af22 2710 if (PL_rsfp_filters) {
55497cff 2711 if (!append)
2712 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2713 if (FILTER_READ(0, sv, 0) > 0)
2714 return ( SvPVX(sv) ) ;
2715 else
bd61b366 2716 return NULL ;
16d20bd9 2717 }
9d116dd7 2718 else
fd049845 2719 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2720}
2721
01ec43d0 2722STATIC HV *
7fc63493 2723S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2724{
97aff369 2725 dVAR;
def3634b
GS
2726 GV *gv;
2727
01ec43d0 2728 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2729 return PL_curstash;
2730
2731 if (len > 2 &&
2732 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2733 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2734 {
2735 return GvHV(gv); /* Foo:: */
def3634b
GS
2736 }
2737
2738 /* use constant CLASS => 'MyClass' */
90e5519e 2739 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2740 SV *sv;
2741 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2742 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2743 }
2744 }
2745
2746 return gv_stashpv(pkgname, FALSE);
2747}
a0d0e21e 2748
5db06880
NC
2749#ifdef PERL_MAD
2750 /*
2751 * Perl_madlex
2752 * The intent of this yylex wrapper is to minimize the changes to the
2753 * tokener when we aren't interested in collecting madprops. It remains
2754 * to be seen how successful this strategy will be...
2755 */
2756
2757int
2758Perl_madlex(pTHX)
2759{
2760 int optype;
2761 char *s = PL_bufptr;
2762
2763 /* make sure thiswhite is initialized */
2764 thiswhite = 0;
2765 thismad = 0;
2766
2767 /* just do what yylex would do on pending identifier; leave thiswhite alone */
2768 if (PL_pending_ident)
2769 return S_pending_ident(aTHX);
2770
2771 /* previous token ate up our whitespace? */
2772 if (!PL_lasttoke && nextwhite) {
2773 thiswhite = nextwhite;
2774 nextwhite = 0;
2775 }
2776
2777 /* isolate the token, and figure out where it is without whitespace */
2778 realtokenstart = -1;
2779 thistoken = 0;
2780 optype = yylex();
2781 s = PL_bufptr;
2782 assert(curforce < 0);
2783
2784 if (!thismad || thismad->mad_key == '^') { /* not forced already? */
2785 if (!thistoken) {
2786 if (realtokenstart < 0 || !CopLINE(PL_curcop))
2787 thistoken = newSVpvn("",0);
2788 else {
2789 char *tstart = SvPVX(PL_linestr) + realtokenstart;
2790 thistoken = newSVpvn(tstart, s - tstart);
2791 }
2792 }
2793 if (thismad) /* install head */
2794 CURMAD('X', thistoken);
2795 }
2796
2797 /* last whitespace of a sublex? */
2798 if (optype == ')' && endwhite) {
2799 CURMAD('X', endwhite);
2800 }
2801
2802 if (!thismad) {
2803
2804 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2805 if (!thiswhite && !endwhite && !optype) {
2806 sv_free(thistoken);
2807 thistoken = 0;
2808 return 0;
2809 }
2810
2811 /* put off final whitespace till peg */
2812 if (optype == ';' && !PL_rsfp) {
2813 nextwhite = thiswhite;
2814 thiswhite = 0;
2815 }
2816 else if (thisopen) {
2817 CURMAD('q', thisopen);
2818 if (thistoken)
2819 sv_free(thistoken);
2820 thistoken = 0;
2821 }
2822 else {
2823 /* Store actual token text as madprop X */
2824 CURMAD('X', thistoken);
2825 }
2826
2827 if (thiswhite) {
2828 /* add preceding whitespace as madprop _ */
2829 CURMAD('_', thiswhite);
2830 }
2831
2832 if (thisstuff) {
2833 /* add quoted material as madprop = */
2834 CURMAD('=', thisstuff);
2835 }
2836
2837 if (thisclose) {
2838 /* add terminating quote as madprop Q */
2839 CURMAD('Q', thisclose);
2840 }
2841 }
2842
2843 /* special processing based on optype */
2844
2845 switch (optype) {
2846
2847 /* opval doesn't need a TOKEN since it can already store mp */
2848 case WORD:
2849 case METHOD:
2850 case FUNCMETH:
2851 case THING:
2852 case PMFUNC:
2853 case PRIVATEREF:
2854 case FUNC0SUB:
2855 case UNIOPSUB:
2856 case LSTOPSUB:
2857 if (yylval.opval)
2858 append_madprops(thismad, yylval.opval, 0);
2859 thismad = 0;
2860 return optype;
2861
2862 /* fake EOF */
2863 case 0:
2864 optype = PEG;
2865 if (endwhite) {
2866 addmad(newMADsv('p', endwhite), &thismad, 0);
2867 endwhite = 0;
2868 }
2869 break;
2870
2871 case ']':
2872 case '}':
2873 if (faketokens)
2874 break;
2875 /* remember any fake bracket that lexer is about to discard */
2876 if (PL_lex_brackets == 1 &&
2877 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2878 {
2879 s = PL_bufptr;
2880 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2881 s++;
2882 if (*s == '}') {
2883 thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2884 addmad(newMADsv('#', thiswhite), &thismad, 0);
2885 thiswhite = 0;
2886 PL_bufptr = s - 1;
2887 break; /* don't bother looking for trailing comment */
2888 }
2889 else
2890 s = PL_bufptr;
2891 }
2892 if (optype == ']')
2893 break;
2894 /* FALLTHROUGH */
2895
2896 /* attach a trailing comment to its statement instead of next token */
2897 case ';':
2898 if (faketokens)
2899 break;
2900 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2901 s = PL_bufptr;
2902 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2903 s++;
2904 if (*s == '\n' || *s == '#') {
2905 while (s < PL_bufend && *s != '\n')
2906 s++;
2907 if (s < PL_bufend)
2908 s++;
2909 thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2910 addmad(newMADsv('#', thiswhite), &thismad, 0);
2911 thiswhite = 0;
2912 PL_bufptr = s;
2913 }
2914 }
2915 break;
2916
2917 /* pval */
2918 case LABEL:
2919 break;
2920
2921 /* ival */
2922 default:
2923 break;
2924
2925 }
2926
2927 /* Create new token struct. Note: opvals return early above. */
2928 yylval.tkval = newTOKEN(optype, yylval, thismad);
2929 thismad = 0;
2930 return optype;
2931}
2932#endif
2933
468aa647 2934STATIC char *
cc6ed77d 2935S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2936 dVAR;
468aa647
RGS
2937 if (PL_expect != XSTATE)
2938 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2939 is_use ? "use" : "no"));
29595ff2 2940 s = SKIPSPACE1(s);
468aa647
RGS
2941 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2942 s = force_version(s, TRUE);
29595ff2 2943 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
5db06880 2944 start_force(curforce);
9ded7720 2945 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2946 force_next(WORD);
2947 }
2948 else if (*s == 'v') {
2949 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2950 s = force_version(s, FALSE);
2951 }
2952 }
2953 else {
2954 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2955 s = force_version(s, FALSE);
2956 }
2957 yylval.ival = is_use;
2958 return s;
2959}
748a9306 2960#ifdef DEBUGGING
27da23d5 2961 static const char* const exp_name[] =
09bef843 2962 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2963 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2964 };
748a9306 2965#endif
463ee0b2 2966
02aa26ce
NT
2967/*
2968 yylex
2969
2970 Works out what to call the token just pulled out of the input
2971 stream. The yacc parser takes care of taking the ops we return and
2972 stitching them into a tree.
2973
2974 Returns:
2975 PRIVATEREF
2976
2977 Structure:
2978 if read an identifier
2979 if we're in a my declaration
2980 croak if they tried to say my($foo::bar)
2981 build the ops for a my() declaration
2982 if it's an access to a my() variable
2983 are we in a sort block?
2984 croak if my($a); $a <=> $b
2985 build ops for access to a my() variable
2986 if in a dq string, and they've said @foo and we can't find @foo
2987 croak
2988 build ops for a bareword
2989 if we already built the token before, use it.
2990*/
2991
20141f0e 2992
dba4d153
JH
2993#ifdef __SC__
2994#pragma segment Perl_yylex
2995#endif
dba4d153 2996int
dba4d153 2997Perl_yylex(pTHX)
20141f0e 2998{
97aff369 2999 dVAR;
3afc138a 3000 register char *s = PL_bufptr;
378cc40b 3001 register char *d;
463ee0b2 3002 STRLEN len;
aa7440fb 3003 bool bof = FALSE;
a687059c 3004
bbf60fe6 3005 DEBUG_T( {
396482e1 3006 SV* tmp = newSVpvs("");
b6007c36
DM
3007 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3008 (IV)CopLINE(PL_curcop),
3009 lex_state_names[PL_lex_state],
3010 exp_name[PL_expect],
3011 pv_display(tmp, s, strlen(s), 0, 60));
3012 SvREFCNT_dec(tmp);
bbf60fe6 3013 } );
02aa26ce 3014 /* check if there's an identifier for us to look at */
ba979b31 3015 if (PL_pending_ident)
bbf60fe6 3016 return REPORT(S_pending_ident(aTHX));
bbce6d69 3017
02aa26ce
NT
3018 /* no identifier pending identification */
3019
3280af22 3020 switch (PL_lex_state) {
79072805
LW
3021#ifdef COMMENTARY
3022 case LEX_NORMAL: /* Some compilers will produce faster */
3023 case LEX_INTERPNORMAL: /* code if we comment these out. */
3024 break;
3025#endif
3026
09bef843 3027 /* when we've already built the next token, just pull it out of the queue */
79072805 3028 case LEX_KNOWNEXT:
5db06880
NC
3029#ifdef PERL_MAD
3030 PL_lasttoke--;
3031 yylval = PL_nexttoke[PL_lasttoke].next_val;
3032 if (PL_madskills) {
3033 thismad = PL_nexttoke[PL_lasttoke].next_mad;
3034 PL_nexttoke[PL_lasttoke].next_mad = 0;
3035 if (thismad && thismad->mad_key == '_') {
3036 thiswhite = (SV*)thismad->mad_val;
3037 thismad->mad_val = 0;
3038 mad_free(thismad);
3039 thismad = 0;
3040 }
3041 }
3042 if (!PL_lasttoke) {
3043 PL_lex_state = PL_lex_defer;
3044 PL_expect = PL_lex_expect;
3045 PL_lex_defer = LEX_NORMAL;
3046 if (!PL_nexttoke[PL_lasttoke].next_type)
3047 return yylex();
3048 }
3049#else
3280af22 3050 PL_nexttoke--;
5db06880 3051 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3052 if (!PL_nexttoke) {
3053 PL_lex_state = PL_lex_defer;
3054 PL_expect = PL_lex_expect;
3055 PL_lex_defer = LEX_NORMAL;
463ee0b2 3056 }
5db06880
NC
3057#endif
3058#ifdef PERL_MAD
3059 /* FIXME - can these be merged? */
3060 return(PL_nexttoke[PL_lasttoke].next_type);
3061#else
bbf60fe6 3062 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3063#endif
79072805 3064
02aa26ce 3065 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3066 when we get here, PL_bufptr is at the \
02aa26ce 3067 */
79072805
LW
3068 case LEX_INTERPCASEMOD:
3069#ifdef DEBUGGING
3280af22 3070 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3071 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3072#endif
02aa26ce 3073 /* handle \E or end of string */
3280af22 3074 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3075 /* if at a \E */
3280af22 3076 if (PL_lex_casemods) {
f54cb97a 3077 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3078 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3079
3792a11b
NC
3080 if (PL_bufptr != PL_bufend
3081 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3082 PL_bufptr += 2;
3083 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3084#ifdef PERL_MAD
3085 if (PL_madskills)
3086 thistoken = newSVpvn("\\E",2);
3087#endif
a0d0e21e 3088 }
bbf60fe6 3089 return REPORT(')');
79072805 3090 }
5db06880
NC
3091#ifdef PERL_MAD
3092 while (PL_bufptr != PL_bufend &&
3093 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3094 if (!thiswhite)
3095 thiswhite = newSVpvn("",0);
3096 sv_catpvn(thiswhite, PL_bufptr, 2);
3097 PL_bufptr += 2;
3098 }
3099#else
3280af22
NIS
3100 if (PL_bufptr != PL_bufend)
3101 PL_bufptr += 2;
5db06880 3102#endif
3280af22 3103 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3104 return yylex();
79072805
LW
3105 }
3106 else {
607df283 3107 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3108 "### Saw case modifier\n"); });
3280af22 3109 s = PL_bufptr + 1;
6e909404 3110 if (s[1] == '\\' && s[2] == 'E') {
5db06880
NC
3111#ifdef PERL_MAD
3112 if (!thiswhite)
3113 thiswhite = newSVpvn("",0);
3114 sv_catpvn(thiswhite, PL_bufptr, 4);
3115#endif
89122651 3116 PL_bufptr = s + 3;
6e909404
JH
3117 PL_lex_state = LEX_INTERPCONCAT;
3118 return yylex();
a0d0e21e 3119 }
6e909404 3120 else {
90771dc0 3121 I32 tmp;
5db06880
NC
3122 if (!PL_madskills) /* when just compiling don't need correct */
3123 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3124 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3125 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3126 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3127 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3128 return REPORT(')');
6e909404
JH
3129 }
3130 if (PL_lex_casemods > 10)
3131 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3132 PL_lex_casestack[PL_lex_casemods++] = *s;
3133 PL_lex_casestack[PL_lex_casemods] = '\0';
3134 PL_lex_state = LEX_INTERPCONCAT;
5db06880 3135 start_force(curforce);
9ded7720 3136 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3137 force_next('(');
5db06880 3138 start_force(curforce);
6e909404 3139 if (*s == 'l')
9ded7720 3140 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3141 else if (*s == 'u')
9ded7720 3142 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3143 else if (*s == 'L')
9ded7720 3144 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3145 else if (*s == 'U')
9ded7720 3146 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3147 else if (*s == 'Q')
9ded7720 3148 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3149 else
3150 Perl_croak(aTHX_ "panic: yylex");
5db06880
NC
3151 if (PL_madskills) {
3152 SV* tmpsv = newSVpvn("",0);
3153 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3154 curmad('_', tmpsv);
3155 }
6e909404 3156 PL_bufptr = s + 1;
a0d0e21e 3157 }
79072805 3158 force_next(FUNC);
3280af22
NIS
3159 if (PL_lex_starts) {
3160 s = PL_bufptr;
3161 PL_lex_starts = 0;
5db06880
NC
3162#ifdef PERL_MAD
3163 if (PL_madskills) {
3164 if (thistoken)
3165 sv_free(thistoken);
3166 thistoken = newSVpvn("",0);
3167 }
3168#endif
131b3ad0
DM
3169 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3170 if (PL_lex_casemods == 1 && PL_lex_inpat)
3171 OPERATOR(',');
3172 else
3173 Aop(OP_CONCAT);
79072805
LW
3174 }
3175 else
cea2e8a9 3176 return yylex();
79072805
LW
3177 }
3178
55497cff 3179 case LEX_INTERPPUSH:
bbf60fe6 3180 return REPORT(sublex_push());
55497cff 3181
79072805 3182 case LEX_INTERPSTART:
3280af22 3183 if (PL_bufptr == PL_bufend)
bbf60fe6 3184 return REPORT(sublex_done());
607df283 3185 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3186 "### Interpolated variable\n"); });
3280af22
NIS
3187 PL_expect = XTERM;
3188 PL_lex_dojoin = (*PL_bufptr == '@');
3189 PL_lex_state = LEX_INTERPNORMAL;
3190 if (PL_lex_dojoin) {
5db06880 3191 start_force(curforce);
9ded7720 3192 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3193 force_next(',');
5db06880 3194 start_force(curforce);
a0d0e21e 3195 force_ident("\"", '$');
5db06880 3196 start_force(curforce);
9ded7720 3197 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3198 force_next('$');
5db06880 3199 start_force(curforce);
9ded7720 3200 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3201 force_next('(');
5db06880 3202 start_force(curforce);
9ded7720 3203 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3204 force_next(FUNC);
3205 }
3280af22
NIS
3206 if (PL_lex_starts++) {
3207 s = PL_bufptr;
5db06880
NC
3208#ifdef PERL_MAD
3209 if (PL_madskills) {
3210 if (thistoken)
3211 sv_free(thistoken);
3212 thistoken = newSVpvn("",0);
3213 }
3214#endif
131b3ad0
DM
3215 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3216 if (!PL_lex_casemods && PL_lex_inpat)
3217 OPERATOR(',');
3218 else
3219 Aop(OP_CONCAT);
79072805 3220 }
cea2e8a9 3221 return yylex();
79072805
LW
3222
3223 case LEX_INTERPENDMAYBE:
3280af22
NIS
3224 if (intuit_more(PL_bufptr)) {
3225 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3226 break;
3227 }
3228 /* FALL THROUGH */
3229
3230 case LEX_INTERPEND:
3280af22
NIS
3231 if (PL_lex_dojoin) {
3232 PL_lex_dojoin = FALSE;
3233 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3234#ifdef PERL_MAD
3235 if (PL_madskills) {
3236 if (thistoken)
3237 sv_free(thistoken);
3238 thistoken = newSVpvn("",0);
3239 }
3240#endif
bbf60fe6 3241 return REPORT(')');
79072805 3242 }
43a16006 3243 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3244 && SvEVALED(PL_lex_repl))
43a16006 3245 {
e9fa98b2 3246 if (PL_bufptr != PL_bufend)
cea2e8a9 3247 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3248 PL_lex_repl = NULL;
e9fa98b2 3249 }
79072805
LW
3250 /* FALLTHROUGH */
3251 case LEX_INTERPCONCAT:
3252#ifdef DEBUGGING
3280af22 3253 if (PL_lex_brackets)
cea2e8a9 3254 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3255#endif
3280af22 3256 if (PL_bufptr == PL_bufend)
bbf60fe6 3257 return REPORT(sublex_done());
79072805 3258
3280af22
NIS
3259 if (SvIVX(PL_linestr) == '\'') {
3260 SV *sv = newSVsv(PL_linestr);
3261 if (!PL_lex_inpat)
76e3520e 3262 sv = tokeq(sv);
3280af22 3263 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3264 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3265 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3266 s = PL_bufend;
79072805
LW
3267 }
3268 else {
3280af22 3269 s = scan_const(PL_bufptr);
79072805 3270 if (*s == '\\')
3280af22 3271 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3272 else
3280af22 3273 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3274 }
3275
3280af22 3276 if (s != PL_bufptr) {
5db06880
NC
3277 start_force(curforce);
3278 if (PL_madskills) {
3279 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3280 }
9ded7720 3281 NEXTVAL_NEXTTOKE = yylval;
3280af22 3282 PL_expect = XTERM;
79072805 3283 force_next(THING);
131b3ad0 3284 if (PL_lex_starts++) {
5db06880
NC
3285#ifdef PERL_MAD
3286 if (PL_madskills) {
3287 if (thistoken)
3288 sv_free(thistoken);
3289 thistoken = newSVpvn("",0);
3290 }
3291#endif
131b3ad0
DM
3292 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3293 if (!PL_lex_casemods && PL_lex_inpat)
3294 OPERATOR(',');
3295 else
3296 Aop(OP_CONCAT);
3297 }
79072805 3298 else {
3280af22 3299 PL_bufptr = s;
cea2e8a9 3300 return yylex();
79072805
LW
3301 }
3302 }
3303
cea2e8a9 3304 return yylex();
a0d0e21e 3305 case LEX_FORMLINE:
3280af22
NIS
3306 PL_lex_state = LEX_NORMAL;
3307 s = scan_formline(PL_bufptr);
3308 if (!PL_lex_formbrack)
a0d0e21e
LW
3309 goto rightbracket;
3310 OPERATOR(';');
79072805
LW
3311 }
3312
3280af22
NIS
3313 s = PL_bufptr;
3314 PL_oldoldbufptr = PL_oldbufptr;
3315 PL_oldbufptr = s;
463ee0b2
LW
3316
3317 retry:
5db06880
NC
3318#ifdef PERL_MAD
3319 if (thistoken) {
3320 sv_free(thistoken);
3321 thistoken = 0;
3322 }
3323 realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3324#endif
378cc40b
LW
3325 switch (*s) {
3326 default:
7e2040f0 3327 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3328 goto keylookup;
cea2e8a9 3329 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3330 case 4:
3331 case 26:
3332 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3333 case 0:
5db06880
NC
3334#ifdef PERL_MAD
3335 if (PL_madskills)
3336 faketokens = 0;
3337#endif
3280af22
NIS
3338 if (!PL_rsfp) {
3339 PL_last_uni = 0;
3340 PL_last_lop = 0;
c5ee2135 3341 if (PL_lex_brackets) {
0bd48802
AL
3342 yyerror(PL_lex_formbrack
3343 ? "Format not terminated"
3344 : "Missing right curly or square bracket");
c5ee2135 3345 }
4e553d73 3346 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3347 "### Tokener got EOF\n");
5f80b19c 3348 } );
79072805 3349 TOKEN(0);
463ee0b2 3350 }
3280af22 3351 if (s++ < PL_bufend)
a687059c 3352 goto retry; /* ignore stray nulls */
3280af22
NIS
3353 PL_last_uni = 0;
3354 PL_last_lop = 0;
3355 if (!PL_in_eval && !PL_preambled) {
3356 PL_preambled = TRUE;
5db06880
NC
3357#ifdef PERL_MAD
3358 if (PL_madskills)
3359 faketokens = 1;
3360#endif
3280af22
NIS
3361 sv_setpv(PL_linestr,incl_perldb());
3362 if (SvCUR(PL_linestr))
396482e1 3363 sv_catpvs(PL_linestr,";");
3280af22
NIS
3364 if (PL_preambleav){
3365 while(AvFILLp(PL_preambleav) >= 0) {
3366 SV *tmpsv = av_shift(PL_preambleav);
3367 sv_catsv(PL_linestr, tmpsv);
396482e1 3368 sv_catpvs(PL_linestr, ";");
91b7def8 3369 sv_free(tmpsv);
3370 }
3280af22
NIS
3371 sv_free((SV*)PL_preambleav);
3372 PL_preambleav = NULL;
91b7def8 3373 }
3280af22 3374 if (PL_minus_n || PL_minus_p) {
396482e1 3375 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3376 if (PL_minus_l)
396482e1 3377 sv_catpvs(PL_linestr,"chomp;");
3280af22 3378 if (PL_minus_a) {
3280af22 3379 if (PL_minus_F) {
3792a11b
NC
3380 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3381 || *PL_splitstr == '"')
3280af22 3382 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3383 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3384 else {
c8ef6a4b
NC
3385 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3386 bytes can be used as quoting characters. :-) */
dd374669 3387 const char *splits = PL_splitstr;
91d456ae 3388 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3389 do {
3390 /* Need to \ \s */
dd374669
AL
3391 if (*splits == '\\')
3392 sv_catpvn(PL_linestr, splits, 1);
3393 sv_catpvn(PL_linestr, splits, 1);
3394 } while (*splits++);
48c4c863
NC
3395 /* This loop will embed the trailing NUL of
3396 PL_linestr as the last thing it does before
3397 terminating. */
396482e1 3398 sv_catpvs(PL_linestr, ");");
54310121 3399 }
2304df62
AD
3400 }
3401 else
396482e1 3402 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3403 }
79072805 3404 }
bc9b29db 3405 if (PL_minus_E)
396482e1
GA
3406 sv_catpvs(PL_linestr,"use feature ':5.10';");
3407 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3408 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3409 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3410 PL_last_lop = PL_last_uni = NULL;
3280af22 3411 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3412 SV * const sv = newSV(0);
a0d0e21e
LW
3413
3414 sv_upgrade(sv, SVt_PVMG);
3280af22 3415 sv_setsv(sv,PL_linestr);
0ac0412a 3416 (void)SvIOK_on(sv);
45977657 3417 SvIV_set(sv, 0);
36c7798d 3418 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3419 }
79072805 3420 goto retry;
a687059c 3421 }
e929a76b 3422 do {
aa7440fb 3423 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3424 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3425 fake_eof:
5db06880
NC
3426#ifdef PERL_MAD
3427 realtokenstart = -1;
3428#endif
7e28d3af
JH
3429 if (PL_rsfp) {
3430 if (PL_preprocess && !PL_in_eval)
3431 (void)PerlProc_pclose(PL_rsfp);
3432 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3433 PerlIO_clearerr(PL_rsfp);
3434 else
3435 (void)PerlIO_close(PL_rsfp);
4608196e 3436 PL_rsfp = NULL;
7e28d3af
JH
3437 PL_doextract = FALSE;
3438 }
3439 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3440#ifdef PERL_MAD
3441 if (PL_madskills)
3442 faketokens = 1;
3443#endif
a23c4656
NC
3444 sv_setpv(PL_linestr,PL_minus_p
3445 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3446 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3447 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3448 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3449 PL_minus_n = PL_minus_p = 0;
3450 goto retry;
3451 }
3452 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3453 PL_last_lop = PL_last_uni = NULL;
c69006e4 3454 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3455 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3456 }
7aa207d6
JH
3457 /* If it looks like the start of a BOM or raw UTF-16,
3458 * check if it in fact is. */
3459 else if (bof &&
3460 (*s == 0 ||
3461 *(U8*)s == 0xEF ||
3462 *(U8*)s >= 0xFE ||
3463 s[1] == 0)) {
226017aa 3464#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3465# ifdef __GNU_LIBRARY__
3466# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3467# define FTELL_FOR_PIPE_IS_BROKEN
3468# endif
e3f494f1
JH
3469# else
3470# ifdef __GLIBC__
3471# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3472# define FTELL_FOR_PIPE_IS_BROKEN
3473# endif
3474# endif
226017aa
DD
3475# endif
3476#endif
3477#ifdef FTELL_FOR_PIPE_IS_BROKEN
3478 /* This loses the possibility to detect the bof
3479 * situation on perl -P when the libc5 is being used.
3480 * Workaround? Maybe attach some extra state to PL_rsfp?
3481 */
3482 if (!PL_preprocess)
7e28d3af 3483 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3484#else
eb160463 3485 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3486#endif
7e28d3af 3487 if (bof) {
3280af22 3488 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3489 s = swallow_bom((U8*)s);
e929a76b 3490 }
378cc40b 3491 }
3280af22 3492 if (PL_doextract) {
a0d0e21e 3493 /* Incest with pod. */
5db06880
NC
3494#ifdef PERL_MAD
3495 if (PL_madskills)
3496 sv_catsv(thiswhite, PL_linestr);
3497#endif
a0d0e21e 3498 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3499 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3500 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3501 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3502 PL_last_lop = PL_last_uni = NULL;
3280af22 3503 PL_doextract = FALSE;
a0d0e21e 3504 }
4e553d73 3505 }
463ee0b2 3506 incline(s);
3280af22
NIS
3507 } while (PL_doextract);
3508 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3509 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3510 SV * const sv = newSV(0);
a687059c 3511
93a17b20 3512 sv_upgrade(sv, SVt_PVMG);
3280af22 3513 sv_setsv(sv,PL_linestr);
0ac0412a 3514 (void)SvIOK_on(sv);
45977657 3515 SvIV_set(sv, 0);
36c7798d 3516 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 3517 }
3280af22 3518 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3519 PL_last_lop = PL_last_uni = NULL;
57843af0 3520 if (CopLINE(PL_curcop) == 1) {
3280af22 3521 while (s < PL_bufend && isSPACE(*s))
79072805 3522 s++;
a0d0e21e 3523 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 3524 s++;
5db06880
NC
3525#ifdef PERL_MAD
3526 if (PL_madskills)
3527 thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3528#endif
bd61b366 3529 d = NULL;
3280af22 3530 if (!PL_in_eval) {
44a8e56a 3531 if (*s == '#' && *(s+1) == '!')
3532 d = s + 2;
3533#ifdef ALTERNATE_SHEBANG
3534 else {
bfed75c6 3535 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 3536 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3537 d = s + (sizeof(as) - 1);
3538 }
3539#endif /* ALTERNATE_SHEBANG */
3540 }
3541 if (d) {
b8378b72 3542 char *ipath;
774d564b 3543 char *ipathend;
b8378b72 3544
774d564b 3545 while (isSPACE(*d))
b8378b72
CS
3546 d++;
3547 ipath = d;
774d564b 3548 while (*d && !isSPACE(*d))
3549 d++;
3550 ipathend = d;
3551
3552#ifdef ARG_ZERO_IS_SCRIPT
3553 if (ipathend > ipath) {
3554 /*
3555 * HP-UX (at least) sets argv[0] to the script name,
3556 * which makes $^X incorrect. And Digital UNIX and Linux,
3557 * at least, set argv[0] to the basename of the Perl
3558 * interpreter. So, having found "#!", we'll set it right.
3559 */
fafc274c
NC
3560 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3561 SVt_PV)); /* $^X */
774d564b 3562 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 3563 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 3564 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 3565 SvSETMAGIC(x);
3566 }
556c1dec
JH
3567 else {
3568 STRLEN blen;
3569 STRLEN llen;
cfd0369c 3570 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 3571 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
3572 if (llen < blen) {
3573 bstart += blen - llen;
3574 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3575 sv_setpvn(x, ipath, ipathend - ipath);
3576 SvSETMAGIC(x);
3577 }
3578 }
3579 }
774d564b 3580 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 3581 }
774d564b 3582#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
3583
3584 /*
3585 * Look for options.
3586 */
748a9306 3587 d = instr(s,"perl -");
84e30d1a 3588 if (!d) {
748a9306 3589 d = instr(s,"perl");
84e30d1a
GS
3590#if defined(DOSISH)
3591 /* avoid getting into infinite loops when shebang
3592 * line contains "Perl" rather than "perl" */
3593 if (!d) {
3594 for (d = ipathend-4; d >= ipath; --d) {
3595 if ((*d == 'p' || *d == 'P')
3596 && !ibcmp(d, "perl", 4))
3597 {
3598 break;
3599 }
3600 }
3601 if (d < ipath)
bd61b366 3602 d = NULL;
84e30d1a
GS
3603 }
3604#endif
3605 }
44a8e56a 3606#ifdef ALTERNATE_SHEBANG
3607 /*
3608 * If the ALTERNATE_SHEBANG on this system starts with a
3609 * character that can be part of a Perl expression, then if
3610 * we see it but not "perl", we're probably looking at the
3611 * start of Perl code, not a request to hand off to some
3612 * other interpreter. Similarly, if "perl" is there, but
3613 * not in the first 'word' of the line, we assume the line
3614 * contains the start of the Perl program.
44a8e56a 3615 */
3616 if (d && *s != '#') {
f54cb97a 3617 const char *c = ipath;
44a8e56a 3618 while (*c && !strchr("; \t\r\n\f\v#", *c))
3619 c++;
3620 if (c < d)
bd61b366 3621 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 3622 else
3623 *s = '#'; /* Don't try to parse shebang line */
3624 }
774d564b 3625#endif /* ALTERNATE_SHEBANG */
bf4acbe4 3626#ifndef MACOS_TRADITIONAL
748a9306 3627 if (!d &&
44a8e56a 3628 *s == '#' &&
774d564b 3629 ipathend > ipath &&
3280af22 3630 !PL_minus_c &&
748a9306 3631 !instr(s,"indir") &&
3280af22 3632 instr(PL_origargv[0],"perl"))
748a9306 3633 {
27da23d5 3634 dVAR;