This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Another refactoring from Larry implicit in the MAD patch.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2
NC
38#ifdef PERL_MAD
39/* XXX these probably need to be made into PL vars */
40static I32 realtokenstart;
41static I32 faketokens = 0;
42static MADPROP *thismad;
43static SV *thistoken;
44static SV *thisopen;
45static SV *thisstuff;
46static SV *thisclose;
47static SV *thiswhite;
48static SV *nextwhite;
49static SV *skipwhite;
50static SV *endwhite;
51static I32 curforce = -1;
52
53# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
9ded7720
NC
54
55# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
56#else
57# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
58#endif
59
9059aa12
LW
60#define XFAKEBRACK 128
61#define XENUMMASK 127
62
39e02b42
JH
63#ifdef USE_UTF8_SCRIPTS
64# define UTF (!IN_BYTES)
2b9d42f0 65#else
746b446a 66# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 67#endif
a0ed51b3 68
61f0cdd9 69/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
70 * 1999-02-27 mjd-perl-patch@plover.com */
71#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
72
bf4acbe4
GS
73/* On MacOS, respect nonbreaking spaces */
74#ifdef MACOS_TRADITIONAL
75#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
76#else
77#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
78#endif
79
ffb4593c
NT
80/* LEX_* are values for PL_lex_state, the state of the lexer.
81 * They are arranged oddly so that the guard on the switch statement
79072805
LW
82 * can get by with a single comparison (if the compiler is smart enough).
83 */
84
fb73857a
PP
85/* #define LEX_NOTPARSING 11 is done in perl.h. */
86
b6007c36
DM
87#define LEX_NORMAL 10 /* normal code (ie not within "...") */
88#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
89#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
90#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
91#define LEX_INTERPSTART 6 /* expecting the start of a $var */
92
93 /* at end of code, eg "$x" followed by: */
94#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
95#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
96
97#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
98 string or after \E, $foo, etc */
99#define LEX_INTERPCONST 2 /* NOT USED */
100#define LEX_FORMLINE 1 /* expecting a format line */
101#define LEX_KNOWNEXT 0 /* next token known; just return it */
102
79072805 103
bbf60fe6 104#ifdef DEBUGGING
27da23d5 105static const char* const lex_state_names[] = {
bbf60fe6
DM
106 "KNOWNEXT",
107 "FORMLINE",
108 "INTERPCONST",
109 "INTERPCONCAT",
110 "INTERPENDMAYBE",
111 "INTERPEND",
112 "INTERPSTART",
113 "INTERPPUSH",
114 "INTERPCASEMOD",
115 "INTERPNORMAL",
116 "NORMAL"
117};
118#endif
119
79072805
LW
120#ifdef ff_next
121#undef ff_next
d48672a2
LW
122#endif
123
79072805 124#include "keywords.h"
fe14fcc3 125
ffb4593c
NT
126/* CLINE is a macro that ensures PL_copline has a sane value */
127
ae986130
LW
128#ifdef CLINE
129#undef CLINE
130#endif
57843af0 131#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 132
29595ff2
NC
133#if 0 && defined(PERL_MAD)
134# define SKIPSPACE0(s) skipspace0(s)
135# define SKIPSPACE1(s) skipspace1(s)
136# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
137# define PEEKSPACE(s) skipspace2(s,0)
138#else
139# define SKIPSPACE0(s) skipspace(s)
140# define SKIPSPACE1(s) skipspace(s)
141# define SKIPSPACE2(s,tsv) skipspace(s)
142# define PEEKSPACE(s) skipspace(s)
143#endif
144
ffb4593c
NT
145/*
146 * Convenience functions to return different tokens and prime the
9cbb5ea2 147 * lexer for the next token. They all take an argument.
ffb4593c
NT
148 *
149 * TOKEN : generic token (used for '(', DOLSHARP, etc)
150 * OPERATOR : generic operator
151 * AOPERATOR : assignment operator
152 * PREBLOCK : beginning the block after an if, while, foreach, ...
153 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
154 * PREREF : *EXPR where EXPR is not a simple identifier
155 * TERM : expression term
156 * LOOPX : loop exiting command (goto, last, dump, etc)
157 * FTST : file test operator
158 * FUN0 : zero-argument function
2d2e263d 159 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
160 * BOop : bitwise or or xor
161 * BAop : bitwise and
162 * SHop : shift operator
163 * PWop : power operator
9cbb5ea2 164 * PMop : pattern-matching operator
ffb4593c
NT
165 * Aop : addition-level operator
166 * Mop : multiplication-level operator
167 * Eop : equality-testing operator
e5edeb50 168 * Rop : relational operator <= != gt
ffb4593c
NT
169 *
170 * Also see LOP and lop() below.
171 */
172
998054bd 173#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 174# define REPORT(retval) tokereport((I32)retval)
998054bd 175#else
bbf60fe6 176# define REPORT(retval) (retval)
998054bd
SC
177#endif
178
bbf60fe6
DM
179#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
180#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
181#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
182#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
183#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
184#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
185#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
186#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
187#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
188#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
189#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
190#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
191#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
192#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
193#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
194#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
195#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
196#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
197#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
198#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 199
a687059c
LW
200/* This bit of chicanery makes a unary function followed by
201 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
202 * The UNIDOR macro is for unary functions that can be followed by the //
203 * operator (such as C<shift // 0>).
a687059c 204 */
376fcdbf
AL
205#define UNI2(f,x) { \
206 yylval.ival = f; \
207 PL_expect = x; \
208 PL_bufptr = s; \
209 PL_last_uni = PL_oldbufptr; \
210 PL_last_lop_op = f; \
211 if (*s == '(') \
212 return REPORT( (int)FUNC1 ); \
29595ff2 213 s = PEEKSPACE(s); \
376fcdbf
AL
214 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
215 }
6f33ba73
RGS
216#define UNI(f) UNI2(f,XTERM)
217#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 218
376fcdbf
AL
219#define UNIBRACK(f) { \
220 yylval.ival = f; \
221 PL_bufptr = s; \
222 PL_last_uni = PL_oldbufptr; \
223 if (*s == '(') \
224 return REPORT( (int)FUNC1 ); \
29595ff2 225 s = PEEKSPACE(s); \
376fcdbf
AL
226 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
227 }
79072805 228
9f68db38 229/* grandfather return to old style */
3280af22 230#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 231
8fa7f367
JH
232#ifdef DEBUGGING
233
bbf60fe6
DM
234/* how to interpret the yylval associated with the token */
235enum token_type {
236 TOKENTYPE_NONE,
237 TOKENTYPE_IVAL,
238 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
239 TOKENTYPE_PVAL,
240 TOKENTYPE_OPVAL,
241 TOKENTYPE_GVVAL
242};
243
6d4a66ac
NC
244static struct debug_tokens {
245 const int token;
246 enum token_type type;
247 const char *name;
248} const debug_tokens[] =
9041c2e3 249{
bbf60fe6
DM
250 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
251 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
252 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
253 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
254 { ARROW, TOKENTYPE_NONE, "ARROW" },
255 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
256 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
257 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
258 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
259 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 260 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
261 { DO, TOKENTYPE_NONE, "DO" },
262 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
263 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
264 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
265 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
266 { ELSE, TOKENTYPE_NONE, "ELSE" },
267 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
268 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
269 { FOR, TOKENTYPE_IVAL, "FOR" },
270 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
271 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
272 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
273 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
274 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
275 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 276 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
277 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
278 { IF, TOKENTYPE_IVAL, "IF" },
279 { LABEL, TOKENTYPE_PVAL, "LABEL" },
280 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
281 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
282 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
283 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
284 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
285 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
286 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
287 { MY, TOKENTYPE_IVAL, "MY" },
288 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
289 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
290 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
291 { OROP, TOKENTYPE_IVAL, "OROP" },
292 { OROR, TOKENTYPE_NONE, "OROR" },
293 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
294 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
295 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
296 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
297 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
298 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
299 { PREINC, TOKENTYPE_NONE, "PREINC" },
300 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
301 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
302 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
303 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
304 { SUB, TOKENTYPE_NONE, "SUB" },
305 { THING, TOKENTYPE_OPVAL, "THING" },
306 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
307 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
308 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
309 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
310 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
311 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 312 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
313 { WHILE, TOKENTYPE_IVAL, "WHILE" },
314 { WORD, TOKENTYPE_OPVAL, "WORD" },
315 { 0, TOKENTYPE_NONE, 0 }
316};
317
318/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 319
bbf60fe6 320STATIC int
f5bd084c 321S_tokereport(pTHX_ I32 rv)
bbf60fe6 322{
97aff369 323 dVAR;
bbf60fe6 324 if (DEBUG_T_TEST) {
bd61b366 325 const char *name = NULL;
bbf60fe6 326 enum token_type type = TOKENTYPE_NONE;
f54cb97a 327 const struct debug_tokens *p;
396482e1 328 SV* const report = newSVpvs("<== ");
bbf60fe6 329
f54cb97a 330 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
331 if (p->token == (int)rv) {
332 name = p->name;
333 type = p->type;
334 break;
335 }
336 }
337 if (name)
54667de8 338 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
339 else if ((char)rv > ' ' && (char)rv < '~')
340 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
341 else if (!rv)
396482e1 342 sv_catpvs(report, "EOF");
bbf60fe6
DM
343 else
344 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
345 switch (type) {
346 case TOKENTYPE_NONE:
347 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
348 break;
349 case TOKENTYPE_IVAL:
e4584336 350 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
351 break;
352 case TOKENTYPE_OPNUM:
353 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
354 PL_op_name[yylval.ival]);
355 break;
356 case TOKENTYPE_PVAL:
357 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
358 break;
359 case TOKENTYPE_OPVAL:
b6007c36 360 if (yylval.opval) {
401441c0 361 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 362 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
363 if (yylval.opval->op_type == OP_CONST) {
364 Perl_sv_catpvf(aTHX_ report, " %s",
365 SvPEEK(cSVOPx_sv(yylval.opval)));
366 }
367
368 }
401441c0 369 else
396482e1 370 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
371 break;
372 }
b6007c36 373 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
374 };
375 return (int)rv;
998054bd
SC
376}
377
b6007c36
DM
378
379/* print the buffer with suitable escapes */
380
381STATIC void
382S_printbuf(pTHX_ const char* fmt, const char* s)
383{
396482e1 384 SV* const tmp = newSVpvs("");
b6007c36
DM
385 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
386 SvREFCNT_dec(tmp);
387}
388
8fa7f367
JH
389#endif
390
ffb4593c
NT
391/*
392 * S_ao
393 *
c963b151
BD
394 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
395 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
396 */
397
76e3520e 398STATIC int
cea2e8a9 399S_ao(pTHX_ int toketype)
a0d0e21e 400{
97aff369 401 dVAR;
3280af22
NIS
402 if (*PL_bufptr == '=') {
403 PL_bufptr++;
a0d0e21e
LW
404 if (toketype == ANDAND)
405 yylval.ival = OP_ANDASSIGN;
406 else if (toketype == OROR)
407 yylval.ival = OP_ORASSIGN;
c963b151
BD
408 else if (toketype == DORDOR)
409 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
410 toketype = ASSIGNOP;
411 }
412 return toketype;
413}
414
ffb4593c
NT
415/*
416 * S_no_op
417 * When Perl expects an operator and finds something else, no_op
418 * prints the warning. It always prints "<something> found where
419 * operator expected. It prints "Missing semicolon on previous line?"
420 * if the surprise occurs at the start of the line. "do you need to
421 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
422 * where the compiler doesn't know if foo is a method call or a function.
423 * It prints "Missing operator before end of line" if there's nothing
424 * after the missing operator, or "... before <...>" if there is something
425 * after the missing operator.
426 */
427
76e3520e 428STATIC void
bfed75c6 429S_no_op(pTHX_ const char *what, char *s)
463ee0b2 430{
97aff369 431 dVAR;
9d4ba2ae
AL
432 char * const oldbp = PL_bufptr;
433 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 434
1189a94a
GS
435 if (!s)
436 s = oldbp;
07c798fb 437 else
1189a94a 438 PL_bufptr = s;
cea2e8a9 439 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
440 if (ckWARN_d(WARN_SYNTAX)) {
441 if (is_first)
442 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
443 "\t(Missing semicolon on previous line?)\n");
444 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 445 const char *t;
56da5a46
RGS
446 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
447 if (t < PL_bufptr && isSPACE(*t))
448 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
449 "\t(Do you need to predeclare %.*s?)\n",
551405c4 450 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
451 }
452 else {
453 assert(s >= oldbp);
454 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 455 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 456 }
07c798fb 457 }
3280af22 458 PL_bufptr = oldbp;
8990e307
LW
459}
460
ffb4593c
NT
461/*
462 * S_missingterm
463 * Complain about missing quote/regexp/heredoc terminator.
464 * If it's called with (char *)NULL then it cauterizes the line buffer.
465 * If we're in a delimited string and the delimiter is a control
466 * character, it's reformatted into a two-char sequence like ^C.
467 * This is fatal.
468 */
469
76e3520e 470STATIC void
cea2e8a9 471S_missingterm(pTHX_ char *s)
8990e307 472{
97aff369 473 dVAR;
8990e307
LW
474 char tmpbuf[3];
475 char q;
476 if (s) {
9d4ba2ae 477 char * const nl = strrchr(s,'\n');
d2719217 478 if (nl)
8990e307
LW
479 *nl = '\0';
480 }
9d116dd7
JH
481 else if (
482#ifdef EBCDIC
483 iscntrl(PL_multi_close)
484#else
485 PL_multi_close < 32 || PL_multi_close == 127
486#endif
487 ) {
8990e307 488 *tmpbuf = '^';
585ec06d 489 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
490 tmpbuf[2] = '\0';
491 s = tmpbuf;
492 }
493 else {
eb160463 494 *tmpbuf = (char)PL_multi_close;
8990e307
LW
495 tmpbuf[1] = '\0';
496 s = tmpbuf;
497 }
498 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 499 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 500}
79072805 501
ef89dcc3 502#define FEATURE_IS_ENABLED(name) \
0d863452 503 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 504 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
505/*
506 * S_feature_is_enabled
507 * Check whether the named feature is enabled.
508 */
509STATIC bool
510S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
511{
97aff369 512 dVAR;
0d863452 513 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
514 char he_name[32] = "feature_";
515 (void) strncpy(&he_name[8], name, 24);
516
517 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
518}
519
ffb4593c
NT
520/*
521 * Perl_deprecate
ffb4593c
NT
522 */
523
79072805 524void
bfed75c6 525Perl_deprecate(pTHX_ const char *s)
a0d0e21e 526{
599cee73 527 if (ckWARN(WARN_DEPRECATED))
9014280d 528 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
529}
530
12bcd1a6 531void
bfed75c6 532Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
533{
534 /* This function should NOT be called for any new deprecated warnings */
535 /* Use Perl_deprecate instead */
536 /* */
537 /* It is here to maintain backward compatibility with the pre-5.8 */
538 /* warnings category hierarchy. The "deprecated" category used to */
539 /* live under the "syntax" category. It is now a top-level category */
540 /* in its own right. */
541
542 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 543 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
544 "Use of %s is deprecated", s);
545}
546
ffb4593c 547/*
9cbb5ea2
GS
548 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
549 * utf16-to-utf8-reversed.
ffb4593c
NT
550 */
551
c39cd008
GS
552#ifdef PERL_CR_FILTER
553static void
554strip_return(SV *sv)
555{
95a20fc0 556 register const char *s = SvPVX_const(sv);
9d4ba2ae 557 register const char * const e = s + SvCUR(sv);
c39cd008
GS
558 /* outer loop optimized to do nothing if there are no CR-LFs */
559 while (s < e) {
560 if (*s++ == '\r' && *s == '\n') {
561 /* hit a CR-LF, need to copy the rest */
562 register char *d = s - 1;
563 *d++ = *s++;
564 while (s < e) {
565 if (*s == '\r' && s[1] == '\n')
566 s++;
567 *d++ = *s++;
568 }
569 SvCUR(sv) -= s - d;
570 return;
571 }
572 }
573}
a868473f 574
76e3520e 575STATIC I32
c39cd008 576S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 577{
f54cb97a 578 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
579 if (count > 0 && !maxlen)
580 strip_return(sv);
581 return count;
a868473f
NIS
582}
583#endif
584
ffb4593c
NT
585/*
586 * Perl_lex_start
9cbb5ea2
GS
587 * Initialize variables. Uses the Perl save_stack to save its state (for
588 * recursive calls to the parser).
ffb4593c
NT
589 */
590
a0d0e21e 591void
864dbfa3 592Perl_lex_start(pTHX_ SV *line)
79072805 593{
97aff369 594 dVAR;
cfd0369c 595 const char *s;
8990e307
LW
596 STRLEN len;
597
3280af22
NIS
598 SAVEI32(PL_lex_dojoin);
599 SAVEI32(PL_lex_brackets);
3280af22
NIS
600 SAVEI32(PL_lex_casemods);
601 SAVEI32(PL_lex_starts);
602 SAVEI32(PL_lex_state);
7766f137 603 SAVEVPTR(PL_lex_inpat);
3280af22 604 SAVEI32(PL_lex_inwhat);
18b09519
GS
605 if (PL_lex_state == LEX_KNOWNEXT) {
606 I32 toke = PL_nexttoke;
607 while (--toke >= 0) {
608 SAVEI32(PL_nexttype[toke]);
609 SAVEVPTR(PL_nextval[toke]);
610 }
611 SAVEI32(PL_nexttoke);
18b09519 612 }
57843af0 613 SAVECOPLINE(PL_curcop);
3280af22
NIS
614 SAVEPPTR(PL_bufptr);
615 SAVEPPTR(PL_bufend);
616 SAVEPPTR(PL_oldbufptr);
617 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
618 SAVEPPTR(PL_last_lop);
619 SAVEPPTR(PL_last_uni);
3280af22
NIS
620 SAVEPPTR(PL_linestart);
621 SAVESPTR(PL_linestr);
8edd5f42
RGS
622 SAVEGENERICPV(PL_lex_brackstack);
623 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 624 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
625 SAVESPTR(PL_lex_stuff);
626 SAVEI32(PL_lex_defer);
09bef843 627 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 628 SAVESPTR(PL_lex_repl);
bebdddfc
GS
629 SAVEINT(PL_expect);
630 SAVEINT(PL_lex_expect);
3280af22
NIS
631
632 PL_lex_state = LEX_NORMAL;
633 PL_lex_defer = 0;
634 PL_expect = XSTATE;
635 PL_lex_brackets = 0;
a02a5408
JC
636 Newx(PL_lex_brackstack, 120, char);
637 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
638 PL_lex_casemods = 0;
639 *PL_lex_casestack = '\0';
640 PL_lex_dojoin = 0;
641 PL_lex_starts = 0;
a0714e2c
SS
642 PL_lex_stuff = NULL;
643 PL_lex_repl = NULL;
3280af22 644 PL_lex_inpat = 0;
76be56bc 645 PL_nexttoke = 0;
3280af22 646 PL_lex_inwhat = 0;
09bef843 647 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
648 PL_linestr = line;
649 if (SvREADONLY(PL_linestr))
650 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 651 s = SvPV_const(PL_linestr, len);
6f27f9a7 652 if (!len || s[len-1] != ';') {
3280af22
NIS
653 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
654 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 655 sv_catpvs(PL_linestr, "\n;");
8990e307 656 }
3280af22
NIS
657 SvTEMP_off(PL_linestr);
658 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
659 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 660 PL_last_lop = PL_last_uni = NULL;
3280af22 661 PL_rsfp = 0;
79072805 662}
a687059c 663
ffb4593c
NT
664/*
665 * Perl_lex_end
9cbb5ea2
GS
666 * Finalizer for lexing operations. Must be called when the parser is
667 * done with the lexer.
ffb4593c
NT
668 */
669
463ee0b2 670void
864dbfa3 671Perl_lex_end(pTHX)
463ee0b2 672{
97aff369 673 dVAR;
3280af22 674 PL_doextract = FALSE;
463ee0b2
LW
675}
676
ffb4593c
NT
677/*
678 * S_incline
679 * This subroutine has nothing to do with tilting, whether at windmills
680 * or pinball tables. Its name is short for "increment line". It
57843af0 681 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 682 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
683 * # line 500 "foo.pm"
684 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
685 */
686
76e3520e 687STATIC void
cea2e8a9 688S_incline(pTHX_ char *s)
463ee0b2 689{
97aff369 690 dVAR;
463ee0b2
LW
691 char *t;
692 char *n;
73659bf1 693 char *e;
463ee0b2 694 char ch;
463ee0b2 695
57843af0 696 CopLINE_inc(PL_curcop);
463ee0b2
LW
697 if (*s++ != '#')
698 return;
bf4acbe4 699 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
700 if (strnEQ(s, "line", 4))
701 s += 4;
702 else
703 return;
084592ab 704 if (SPACE_OR_TAB(*s))
73659bf1 705 s++;
4e553d73 706 else
73659bf1 707 return;
bf4acbe4 708 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
709 if (!isDIGIT(*s))
710 return;
711 n = s;
712 while (isDIGIT(*s))
713 s++;
bf4acbe4 714 while (SPACE_OR_TAB(*s))
463ee0b2 715 s++;
73659bf1 716 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 717 s++;
73659bf1
GS
718 e = t + 1;
719 }
463ee0b2 720 else {
463ee0b2 721 for (t = s; !isSPACE(*t); t++) ;
73659bf1 722 e = t;
463ee0b2 723 }
bf4acbe4 724 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
725 e++;
726 if (*e != '\n' && *e != '\0')
727 return; /* false alarm */
728
463ee0b2
LW
729 ch = *t;
730 *t = '\0';
f4dd75d9 731 if (t - s > 0) {
8a5ee598 732#ifndef USE_ITHREADS
c4420975 733 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
734 STRLEN tmplen = cf ? strlen(cf) : 0;
735 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
736 /* must copy *{"::_<(eval N)[oldfilename:L]"}
737 * to *{"::_<newfilename"} */
738 char smallbuf[256], smallbuf2[256];
739 char *tmpbuf, *tmpbuf2;
8a5ee598 740 GV **gvp, *gv2;
e66cf94c
RGS
741 STRLEN tmplen2 = strlen(s);
742 if (tmplen + 3 < sizeof smallbuf)
743 tmpbuf = smallbuf;
744 else
745 Newx(tmpbuf, tmplen + 3, char);
746 if (tmplen2 + 3 < sizeof smallbuf2)
747 tmpbuf2 = smallbuf2;
748 else
749 Newx(tmpbuf2, tmplen2 + 3, char);
750 tmpbuf[0] = tmpbuf2[0] = '_';
751 tmpbuf[1] = tmpbuf2[1] = '<';
752 memcpy(tmpbuf + 2, cf, ++tmplen);
753 memcpy(tmpbuf2 + 2, s, ++tmplen2);
754 ++tmplen; ++tmplen2;
8a5ee598
RGS
755 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
756 if (gvp) {
757 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
758 if (!isGV(gv2))
759 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
760 /* adjust ${"::_<newfilename"} to store the new file name */
761 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
762 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
763 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
764 }
e66cf94c
RGS
765 if (tmpbuf != smallbuf) Safefree(tmpbuf);
766 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
767 }
8a5ee598 768#endif
05ec9bb3 769 CopFILE_free(PL_curcop);
57843af0 770 CopFILE_set(PL_curcop, s);
f4dd75d9 771 }
463ee0b2 772 *t = ch;
57843af0 773 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
774}
775
29595ff2
NC
776#ifdef PERL_MAD
777/* skip space before thistoken */
778
779STATIC char *
780S_skipspace0(pTHX_ register char *s)
781{
782 s = skipspace(s);
783 if (!PL_madskills)
784 return s;
785 if (skipwhite) {
786 if (!thiswhite)
787 thiswhite = newSVpvn("",0);
788 sv_catsv(thiswhite, skipwhite);
789 sv_free(skipwhite);
790 skipwhite = 0;
791 }
792 realtokenstart = s - SvPVX(PL_linestr);
793 return s;
794}
795
796/* skip space after thistoken */
797
798STATIC char *
799S_skipspace1(pTHX_ register char *s)
800{
801 char *start = s;
802 I32 startoff = start - SvPVX(PL_linestr);
803
804 s = skipspace(s);
805 if (!PL_madskills)
806 return s;
807 start = SvPVX(PL_linestr) + startoff;
808 if (!thistoken && realtokenstart >= 0) {
809 char *tstart = SvPVX(PL_linestr) + realtokenstart;
810 thistoken = newSVpvn(tstart, start - tstart);
811 }
812 realtokenstart = -1;
813 if (skipwhite) {
814 if (!nextwhite)
815 nextwhite = newSVpvn("",0);
816 sv_catsv(nextwhite, skipwhite);
817 sv_free(skipwhite);
818 skipwhite = 0;
819 }
820 return s;
821}
822
823STATIC char *
824S_skipspace2(pTHX_ register char *s, SV **svp)
825{
826 char *start = s;
827 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
828 I32 startoff = start - SvPVX(PL_linestr);
829 s = skipspace(s);
830 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
831 if (!PL_madskills || !svp)
832 return s;
833 start = SvPVX(PL_linestr) + startoff;
834 if (!thistoken && realtokenstart >= 0) {
835 char *tstart = SvPVX(PL_linestr) + realtokenstart;
836 thistoken = newSVpvn(tstart, start - tstart);
837 realtokenstart = -1;
838 }
839 if (skipwhite) {
840 if (!*svp)
841 *svp = newSVpvn("",0);
842 sv_setsv(*svp, skipwhite);
843 sv_free(skipwhite);
844 skipwhite = 0;
845 }
846
847 return s;
848}
849#endif
850
ffb4593c
NT
851/*
852 * S_skipspace
853 * Called to gobble the appropriate amount and type of whitespace.
854 * Skips comments as well.
855 */
856
76e3520e 857STATIC char *
cea2e8a9 858S_skipspace(pTHX_ register char *s)
a687059c 859{
97aff369 860 dVAR;
3280af22 861 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 862 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
863 s++;
864 return s;
865 }
866 for (;;) {
fd049845 867 STRLEN prevlen;
09bef843 868 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 869 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
870 while (s < PL_bufend && isSPACE(*s)) {
871 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
872 incline(s);
873 }
ffb4593c
NT
874
875 /* comment */
3280af22
NIS
876 if (s < PL_bufend && *s == '#') {
877 while (s < PL_bufend && *s != '\n')
463ee0b2 878 s++;
60e6418e 879 if (s < PL_bufend) {
463ee0b2 880 s++;
60e6418e
GS
881 if (PL_in_eval && !PL_rsfp) {
882 incline(s);
883 continue;
884 }
885 }
463ee0b2 886 }
ffb4593c
NT
887
888 /* only continue to recharge the buffer if we're at the end
889 * of the buffer, we're not reading from a source filter, and
890 * we're in normal lexing mode
891 */
09bef843
SB
892 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
893 PL_lex_state == LEX_FORMLINE)
463ee0b2 894 return s;
ffb4593c
NT
895
896 /* try to recharge the buffer */
9cbb5ea2 897 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 898 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2
GS
899 {
900 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
901 if (PL_minus_p) {
902 sv_setpv(PL_linestr,
903 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 904 PL_minus_n = PL_minus_p = 0;
a0d0e21e 905 }
01a19ab0
NC
906 else if (PL_minus_n) {
907 sv_setpvn(PL_linestr, ";}", 2);
908 PL_minus_n = 0;
909 }
a0d0e21e 910 else
4147a61b 911 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
912
913 /* reset variables for next time we lex */
9cbb5ea2
GS
914 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
915 = SvPVX(PL_linestr);
3280af22 916 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 917 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
918
919 /* Close the filehandle. Could be from -P preprocessor,
920 * STDIN, or a regular file. If we were reading code from
921 * STDIN (because the commandline held no -e or filename)
922 * then we don't close it, we reset it so the code can
923 * read from STDIN too.
924 */
925
3280af22
NIS
926 if (PL_preprocess && !PL_in_eval)
927 (void)PerlProc_pclose(PL_rsfp);
928 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
929 PerlIO_clearerr(PL_rsfp);
8990e307 930 else
3280af22 931 (void)PerlIO_close(PL_rsfp);
4608196e 932 PL_rsfp = NULL;
463ee0b2
LW
933 return s;
934 }
ffb4593c
NT
935
936 /* not at end of file, so we only read another line */
09bef843
SB
937 /* make corresponding updates to old pointers, for yyerror() */
938 oldprevlen = PL_oldbufptr - PL_bufend;
939 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
940 if (PL_last_uni)
941 oldunilen = PL_last_uni - PL_bufend;
942 if (PL_last_lop)
943 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
944 PL_linestart = PL_bufptr = s + prevlen;
945 PL_bufend = s + SvCUR(PL_linestr);
946 s = PL_bufptr;
09bef843
SB
947 PL_oldbufptr = s + oldprevlen;
948 PL_oldoldbufptr = s + oldoldprevlen;
949 if (PL_last_uni)
950 PL_last_uni = s + oldunilen;
951 if (PL_last_lop)
952 PL_last_lop = s + oldloplen;
a0d0e21e 953 incline(s);
ffb4593c
NT
954
955 /* debugger active and we're not compiling the debugger code,
956 * so store the line into the debugger's array of lines
957 */
3280af22 958 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 959 SV * const sv = newSV(0);
8990e307
LW
960
961 sv_upgrade(sv, SVt_PVMG);
3280af22 962 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 963 (void)SvIOK_on(sv);
45977657 964 SvIV_set(sv, 0);
36c7798d 965 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 966 }
463ee0b2 967 }
a687059c 968}
378cc40b 969
ffb4593c
NT
970/*
971 * S_check_uni
972 * Check the unary operators to ensure there's no ambiguity in how they're
973 * used. An ambiguous piece of code would be:
974 * rand + 5
975 * This doesn't mean rand() + 5. Because rand() is a unary operator,
976 * the +5 is its argument.
977 */
978
76e3520e 979STATIC void
cea2e8a9 980S_check_uni(pTHX)
ba106d47 981{
97aff369 982 dVAR;
2f3197b3 983 char *s;
a0d0e21e 984 char *t;
2f3197b3 985
3280af22 986 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 987 return;
3280af22
NIS
988 while (isSPACE(*PL_last_uni))
989 PL_last_uni++;
7e2040f0 990 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 991 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 992 return;
6136c704
AL
993
994 /* XXX Things like this are just so nasty. We shouldn't be modifying
995 source code, even if we realquick set it back. */
0453d815 996 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 997 const char ch = *s;
0453d815 998 *s = '\0';
9014280d 999 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 1000 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
1001 PL_last_uni);
1002 *s = ch;
1003 }
2f3197b3
LW
1004}
1005
ffb4593c
NT
1006/*
1007 * LOP : macro to build a list operator. Its behaviour has been replaced
1008 * with a subroutine, S_lop() for which LOP is just another name.
1009 */
1010
a0d0e21e
LW
1011#define LOP(f,x) return lop(f,x,s)
1012
ffb4593c
NT
1013/*
1014 * S_lop
1015 * Build a list operator (or something that might be one). The rules:
1016 * - if we have a next token, then it's a list operator [why?]
1017 * - if the next thing is an opening paren, then it's a function
1018 * - else it's a list operator
1019 */
1020
76e3520e 1021STATIC I32
a0be28da 1022S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1023{
97aff369 1024 dVAR;
79072805 1025 yylval.ival = f;
35c8bce7 1026 CLINE;
3280af22
NIS
1027 PL_expect = x;
1028 PL_bufptr = s;
1029 PL_last_lop = PL_oldbufptr;
eb160463 1030 PL_last_lop_op = (OPCODE)f;
3280af22 1031 if (PL_nexttoke)
bbf60fe6 1032 return REPORT(LSTOP);
79072805 1033 if (*s == '(')
bbf60fe6 1034 return REPORT(FUNC);
29595ff2 1035 s = PEEKSPACE(s);
79072805 1036 if (*s == '(')
bbf60fe6 1037 return REPORT(FUNC);
79072805 1038 else
bbf60fe6 1039 return REPORT(LSTOP);
79072805
LW
1040}
1041
ffb4593c
NT
1042/*
1043 * S_force_next
9cbb5ea2 1044 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1045 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
1046 * to know what token to return the next time the lexer is called. Caller
1047 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
1048 * handles the token correctly.
ffb4593c
NT
1049 */
1050
4e553d73 1051STATIC void
cea2e8a9 1052S_force_next(pTHX_ I32 type)
79072805 1053{
97aff369 1054 dVAR;
3280af22
NIS
1055 PL_nexttype[PL_nexttoke] = type;
1056 PL_nexttoke++;
1057 if (PL_lex_state != LEX_KNOWNEXT) {
1058 PL_lex_defer = PL_lex_state;
1059 PL_lex_expect = PL_expect;
1060 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
1061 }
1062}
1063
d0a148a6
NC
1064STATIC SV *
1065S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1066{
97aff369 1067 dVAR;
9d4ba2ae 1068 SV * const sv = newSVpvn(start,len);
bfed75c6 1069 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1070 SvUTF8_on(sv);
1071 return sv;
1072}
1073
ffb4593c
NT
1074/*
1075 * S_force_word
1076 * When the lexer knows the next thing is a word (for instance, it has
1077 * just seen -> and it knows that the next char is a word char, then
1078 * it calls S_force_word to stick the next word into the PL_next lookahead.
1079 *
1080 * Arguments:
b1b65b59 1081 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1082 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1083 * int check_keyword : if true, Perl checks to make sure the word isn't
1084 * a keyword (do this if the word is a label, e.g. goto FOO)
1085 * int allow_pack : if true, : characters will also be allowed (require,
1086 * use, etc. do this)
9cbb5ea2 1087 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1088 */
1089
76e3520e 1090STATIC char *
cea2e8a9 1091S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1092{
97aff369 1093 dVAR;
463ee0b2
LW
1094 register char *s;
1095 STRLEN len;
4e553d73 1096
29595ff2 1097 start = SKIPSPACE1(start);
463ee0b2 1098 s = start;
7e2040f0 1099 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1100 (allow_pack && *s == ':') ||
15f0808c 1101 (allow_initial_tick && *s == '\'') )
a0d0e21e 1102 {
3280af22
NIS
1103 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1104 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
1105 return start;
1106 if (token == METHOD) {
29595ff2 1107 s = SKIPSPACE1(s);
463ee0b2 1108 if (*s == '(')
3280af22 1109 PL_expect = XTERM;
463ee0b2 1110 else {
3280af22 1111 PL_expect = XOPERATOR;
463ee0b2 1112 }
79072805 1113 }
9ded7720 1114 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1115 = (OP*)newSVOP(OP_CONST,0,
1116 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1117 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1118 force_next(token);
1119 }
1120 return s;
1121}
1122
ffb4593c
NT
1123/*
1124 * S_force_ident
9cbb5ea2 1125 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1126 * text only contains the "foo" portion. The first argument is a pointer
1127 * to the "foo", and the second argument is the type symbol to prefix.
1128 * Forces the next token to be a "WORD".
9cbb5ea2 1129 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1130 */
1131
76e3520e 1132STATIC void
bfed75c6 1133S_force_ident(pTHX_ register const char *s, int kind)
79072805 1134{
97aff369 1135 dVAR;
79072805 1136 if (s && *s) {
90e5519e
NC
1137 const STRLEN len = strlen(s);
1138 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
9ded7720 1139 NEXTVAL_NEXTTOKE.opval = o;
79072805 1140 force_next(WORD);
748a9306 1141 if (kind) {
11343788 1142 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1143 /* XXX see note in pp_entereval() for why we forgo typo
1144 warnings if the symbol must be introduced in an eval.
1145 GSAR 96-10-12 */
90e5519e
NC
1146 gv_fetchpvn_flags(s, len,
1147 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1148 : GV_ADD,
1149 kind == '$' ? SVt_PV :
1150 kind == '@' ? SVt_PVAV :
1151 kind == '%' ? SVt_PVHV :
a0d0e21e 1152 SVt_PVGV
90e5519e 1153 );
748a9306 1154 }
79072805
LW
1155 }
1156}
1157
1571675a
GS
1158NV
1159Perl_str_to_version(pTHX_ SV *sv)
1160{
1161 NV retval = 0.0;
1162 NV nshift = 1.0;
1163 STRLEN len;
cfd0369c 1164 const char *start = SvPV_const(sv,len);
9d4ba2ae 1165 const char * const end = start + len;
504618e9 1166 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1167 while (start < end) {
ba210ebe 1168 STRLEN skip;
1571675a
GS
1169 UV n;
1170 if (utf)
9041c2e3 1171 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1172 else {
1173 n = *(U8*)start;
1174 skip = 1;
1175 }
1176 retval += ((NV)n)/nshift;
1177 start += skip;
1178 nshift *= 1000;
1179 }
1180 return retval;
1181}
1182
4e553d73 1183/*
ffb4593c
NT
1184 * S_force_version
1185 * Forces the next token to be a version number.
e759cc13
RGS
1186 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1187 * and if "guessing" is TRUE, then no new token is created (and the caller
1188 * must use an alternative parsing method).
ffb4593c
NT
1189 */
1190
76e3520e 1191STATIC char *
e759cc13 1192S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1193{
97aff369 1194 dVAR;
5f66b61c 1195 OP *version = NULL;
44dcb63b 1196 char *d;
89bfa8cd 1197
29595ff2 1198 s = SKIPSPACE1(s);
89bfa8cd 1199
44dcb63b 1200 d = s;
dd629d5b 1201 if (*d == 'v')
44dcb63b 1202 d++;
44dcb63b 1203 if (isDIGIT(*d)) {
e759cc13
RGS
1204 while (isDIGIT(*d) || *d == '_' || *d == '.')
1205 d++;
9f3d182e 1206 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1207 SV *ver;
b73d6f50 1208 s = scan_num(s, &yylval);
89bfa8cd 1209 version = yylval.opval;
dd629d5b
GS
1210 ver = cSVOPx(version)->op_sv;
1211 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1212 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1213 SvNV_set(ver, str_to_version(ver));
1571675a 1214 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1215 }
89bfa8cd 1216 }
e759cc13
RGS
1217 else if (guessing)
1218 return s;
89bfa8cd
PP
1219 }
1220
1221 /* NOTE: The parser sees the package name and the VERSION swapped */
9ded7720 1222 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1223 force_next(WORD);
89bfa8cd 1224
e759cc13 1225 return s;
89bfa8cd
PP
1226}
1227
ffb4593c
NT
1228/*
1229 * S_tokeq
1230 * Tokenize a quoted string passed in as an SV. It finds the next
1231 * chunk, up to end of string or a backslash. It may make a new
1232 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1233 * turns \\ into \.
1234 */
1235
76e3520e 1236STATIC SV *
cea2e8a9 1237S_tokeq(pTHX_ SV *sv)
79072805 1238{
97aff369 1239 dVAR;
79072805
LW
1240 register char *s;
1241 register char *send;
1242 register char *d;
b3ac6de7
IZ
1243 STRLEN len = 0;
1244 SV *pv = sv;
79072805
LW
1245
1246 if (!SvLEN(sv))
b3ac6de7 1247 goto finish;
79072805 1248
a0d0e21e 1249 s = SvPV_force(sv, len);
21a311ee 1250 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1251 goto finish;
463ee0b2 1252 send = s + len;
79072805
LW
1253 while (s < send && *s != '\\')
1254 s++;
1255 if (s == send)
b3ac6de7 1256 goto finish;
79072805 1257 d = s;
be4731d2 1258 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1259 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1260 if (SvUTF8(sv))
1261 SvUTF8_on(pv);
1262 }
79072805
LW
1263 while (s < send) {
1264 if (*s == '\\') {
a0d0e21e 1265 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1266 s++; /* all that, just for this */
1267 }
1268 *d++ = *s++;
1269 }
1270 *d = '\0';
95a20fc0 1271 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1272 finish:
3280af22 1273 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1274 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1275 return sv;
1276}
1277
ffb4593c
NT
1278/*
1279 * Now come three functions related to double-quote context,
1280 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1281 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1282 * interact with PL_lex_state, and create fake ( ... ) argument lists
1283 * to handle functions and concatenation.
1284 * They assume that whoever calls them will be setting up a fake
1285 * join call, because each subthing puts a ',' after it. This lets
1286 * "lower \luPpEr"
1287 * become
1288 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1289 *
1290 * (I'm not sure whether the spurious commas at the end of lcfirst's
1291 * arguments and join's arguments are created or not).
1292 */
1293
1294/*
1295 * S_sublex_start
1296 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1297 *
1298 * Pattern matching will set PL_lex_op to the pattern-matching op to
1299 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1300 *
1301 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1302 *
1303 * Everything else becomes a FUNC.
1304 *
1305 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1306 * had an OP_CONST or OP_READLINE). This just sets us up for a
1307 * call to S_sublex_push().
1308 */
1309
76e3520e 1310STATIC I32
cea2e8a9 1311S_sublex_start(pTHX)
79072805 1312{
97aff369 1313 dVAR;
0d46e09a 1314 register const I32 op_type = yylval.ival;
79072805
LW
1315
1316 if (op_type == OP_NULL) {
3280af22 1317 yylval.opval = PL_lex_op;
5f66b61c 1318 PL_lex_op = NULL;
79072805
LW
1319 return THING;
1320 }
1321 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1322 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1323
1324 if (SvTYPE(sv) == SVt_PVIV) {
1325 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1326 STRLEN len;
96a5add6 1327 const char * const p = SvPV_const(sv, len);
f54cb97a 1328 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1329 if (SvUTF8(sv))
1330 SvUTF8_on(nsv);
b3ac6de7
IZ
1331 SvREFCNT_dec(sv);
1332 sv = nsv;
4e553d73 1333 }
b3ac6de7 1334 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1335 PL_lex_stuff = NULL;
6f33ba73
RGS
1336 /* Allow <FH> // "foo" */
1337 if (op_type == OP_READLINE)
1338 PL_expect = XTERMORDORDOR;
79072805
LW
1339 return THING;
1340 }
1341
3280af22
NIS
1342 PL_sublex_info.super_state = PL_lex_state;
1343 PL_sublex_info.sub_inwhat = op_type;
1344 PL_sublex_info.sub_op = PL_lex_op;
1345 PL_lex_state = LEX_INTERPPUSH;
55497cff 1346
3280af22
NIS
1347 PL_expect = XTERM;
1348 if (PL_lex_op) {
1349 yylval.opval = PL_lex_op;
5f66b61c 1350 PL_lex_op = NULL;
55497cff
PP
1351 return PMFUNC;
1352 }
1353 else
1354 return FUNC;
1355}
1356
ffb4593c
NT
1357/*
1358 * S_sublex_push
1359 * Create a new scope to save the lexing state. The scope will be
1360 * ended in S_sublex_done. Returns a '(', starting the function arguments
1361 * to the uc, lc, etc. found before.
1362 * Sets PL_lex_state to LEX_INTERPCONCAT.
1363 */
1364
76e3520e 1365STATIC I32
cea2e8a9 1366S_sublex_push(pTHX)
55497cff 1367{
27da23d5 1368 dVAR;
f46d017c 1369 ENTER;
55497cff 1370
3280af22
NIS
1371 PL_lex_state = PL_sublex_info.super_state;
1372 SAVEI32(PL_lex_dojoin);
1373 SAVEI32(PL_lex_brackets);
3280af22
NIS
1374 SAVEI32(PL_lex_casemods);
1375 SAVEI32(PL_lex_starts);
1376 SAVEI32(PL_lex_state);
7766f137 1377 SAVEVPTR(PL_lex_inpat);
3280af22 1378 SAVEI32(PL_lex_inwhat);
57843af0 1379 SAVECOPLINE(PL_curcop);
3280af22 1380 SAVEPPTR(PL_bufptr);
8452ff4b 1381 SAVEPPTR(PL_bufend);
3280af22
NIS
1382 SAVEPPTR(PL_oldbufptr);
1383 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1384 SAVEPPTR(PL_last_lop);
1385 SAVEPPTR(PL_last_uni);
3280af22
NIS
1386 SAVEPPTR(PL_linestart);
1387 SAVESPTR(PL_linestr);
8edd5f42
RGS
1388 SAVEGENERICPV(PL_lex_brackstack);
1389 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1390
1391 PL_linestr = PL_lex_stuff;
a0714e2c 1392 PL_lex_stuff = NULL;
3280af22 1393
9cbb5ea2
GS
1394 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1395 = SvPVX(PL_linestr);
3280af22 1396 PL_bufend += SvCUR(PL_linestr);
bd61b366 1397 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1398 SAVEFREESV(PL_linestr);
1399
1400 PL_lex_dojoin = FALSE;
1401 PL_lex_brackets = 0;
a02a5408
JC
1402 Newx(PL_lex_brackstack, 120, char);
1403 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1404 PL_lex_casemods = 0;
1405 *PL_lex_casestack = '\0';
1406 PL_lex_starts = 0;
1407 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1408 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1409
1410 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1411 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1412 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1413 else
5f66b61c 1414 PL_lex_inpat = NULL;
79072805 1415
55497cff 1416 return '(';
79072805
LW
1417}
1418
ffb4593c
NT
1419/*
1420 * S_sublex_done
1421 * Restores lexer state after a S_sublex_push.
1422 */
1423
76e3520e 1424STATIC I32
cea2e8a9 1425S_sublex_done(pTHX)
79072805 1426{
27da23d5 1427 dVAR;
3280af22 1428 if (!PL_lex_starts++) {
396482e1 1429 SV * const sv = newSVpvs("");
9aa983d2
JH
1430 if (SvUTF8(PL_linestr))
1431 SvUTF8_on(sv);
3280af22 1432 PL_expect = XOPERATOR;
9aa983d2 1433 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1434 return THING;
1435 }
1436
3280af22
NIS
1437 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1438 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1439 return yylex();
79072805
LW
1440 }
1441
ffb4593c 1442 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1443 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1444 PL_linestr = PL_lex_repl;
1445 PL_lex_inpat = 0;
1446 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1447 PL_bufend += SvCUR(PL_linestr);
bd61b366 1448 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1449 SAVEFREESV(PL_linestr);
1450 PL_lex_dojoin = FALSE;
1451 PL_lex_brackets = 0;
3280af22
NIS
1452 PL_lex_casemods = 0;
1453 *PL_lex_casestack = '\0';
1454 PL_lex_starts = 0;
25da4f38 1455 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1456 PL_lex_state = LEX_INTERPNORMAL;
1457 PL_lex_starts++;
e9fa98b2
HS
1458 /* we don't clear PL_lex_repl here, so that we can check later
1459 whether this is an evalled subst; that means we rely on the
1460 logic to ensure sublex_done() is called again only via the
1461 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1462 }
e9fa98b2 1463 else {
3280af22 1464 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1465 PL_lex_repl = NULL;
e9fa98b2 1466 }
79072805 1467 return ',';
ffed7fef
LW
1468 }
1469 else {
f46d017c 1470 LEAVE;
3280af22
NIS
1471 PL_bufend = SvPVX(PL_linestr);
1472 PL_bufend += SvCUR(PL_linestr);
1473 PL_expect = XOPERATOR;
09bef843 1474 PL_sublex_info.sub_inwhat = 0;
79072805 1475 return ')';
ffed7fef
LW
1476 }
1477}
1478
02aa26ce
NT
1479/*
1480 scan_const
1481
1482 Extracts a pattern, double-quoted string, or transliteration. This
1483 is terrifying code.
1484
3280af22
NIS
1485 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1486 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1487 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1488
9b599b2a
GS
1489 Returns a pointer to the character scanned up to. Iff this is
1490 advanced from the start pointer supplied (ie if anything was
1491 successfully parsed), will leave an OP for the substring scanned
1492 in yylval. Caller must intuit reason for not parsing further
1493 by looking at the next characters herself.
1494
02aa26ce
NT
1495 In patterns:
1496 backslashes:
1497 double-quoted style: \r and \n
1498 regexp special ones: \D \s
1499 constants: \x3
1500 backrefs: \1 (deprecated in substitution replacements)
1501 case and quoting: \U \Q \E
1502 stops on @ and $, but not for $ as tail anchor
1503
1504 In transliterations:
1505 characters are VERY literal, except for - not at the start or end
1506 of the string, which indicates a range. scan_const expands the
1507 range to the full set of intermediate characters.
1508
1509 In double-quoted strings:
1510 backslashes:
1511 double-quoted style: \r and \n
1512 constants: \x3
1513 backrefs: \1 (deprecated)
1514 case and quoting: \U \Q \E
1515 stops on @ and $
1516
1517 scan_const does *not* construct ops to handle interpolated strings.
1518 It stops processing as soon as it finds an embedded $ or @ variable
1519 and leaves it to the caller to work out what's going on.
1520
da6eedaa 1521 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1522
1523 $ in pattern could be $foo or could be tail anchor. Assumption:
1524 it's a tail anchor if $ is the last thing in the string, or if it's
1525 followed by one of ")| \n\t"
1526
1527 \1 (backreferences) are turned into $1
1528
1529 The structure of the code is
1530 while (there's a character to process) {
1531 handle transliteration ranges
1532 skip regexp comments
1533 skip # initiated comments in //x patterns
1534 check for embedded @foo
1535 check for embedded scalars
1536 if (backslash) {
1537 leave intact backslashes from leave (below)
1538 deprecate \1 in strings and sub replacements
1539 handle string-changing backslashes \l \U \Q \E, etc.
1540 switch (what was escaped) {
1541 handle - in a transliteration (becomes a literal -)
1542 handle \132 octal characters
1543 handle 0x15 hex characters
1544 handle \cV (control V)
1545 handle printf backslashes (\f, \r, \n, etc)
1546 } (end switch)
1547 } (end if backslash)
1548 } (end while character to read)
4e553d73 1549
02aa26ce
NT
1550*/
1551
76e3520e 1552STATIC char *
cea2e8a9 1553S_scan_const(pTHX_ char *start)
79072805 1554{
97aff369 1555 dVAR;
3280af22 1556 register char *send = PL_bufend; /* end of the constant */
561b68a9 1557 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1558 register char *s = start; /* start of the constant */
1559 register char *d = SvPVX(sv); /* destination for copies */
1560 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1561 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1562 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1563 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1564 UV uv;
4c3a8340
ST
1565#ifdef EBCDIC
1566 UV literal_endpoint = 0;
1567#endif
012bcf8d 1568
dff6d3cd 1569 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1570 PL_lex_inpat
b6d5fef8 1571 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1572 : "";
79072805 1573
2b9d42f0
NIS
1574 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1575 /* If we are doing a trans and we know we want UTF8 set expectation */
1576 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1577 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1578 }
1579
1580
79072805 1581 while (s < send || dorange) {
02aa26ce 1582 /* get transliterations out of the way (they're most literal) */
3280af22 1583 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1584 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1585 if (dorange) {
1ba5c669
JH
1586 I32 i; /* current expanded character */
1587 I32 min; /* first character in range */
1588 I32 max; /* last character in range */
02aa26ce 1589
2b9d42f0 1590 if (has_utf8) {
9d4ba2ae 1591 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1592 char *e = d++;
1593 while (e-- > c)
1594 *(e + 1) = *e;
25716404 1595 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1596 /* mark the range as done, and continue */
1597 dorange = FALSE;
1598 didrange = TRUE;
1599 continue;
1600 }
2b9d42f0 1601
95a20fc0 1602 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1603 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1604 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1605 d -= 2; /* eat the first char and the - */
1606
8ada0baa
JH
1607 min = (U8)*d; /* first char in range */
1608 max = (U8)d[1]; /* last char in range */
1609
c2e66d9e 1610 if (min > max) {
01ec43d0 1611 Perl_croak(aTHX_
d1573ac7 1612 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1613 (char)min, (char)max);
c2e66d9e
GS
1614 }
1615
c7f1f016 1616#ifdef EBCDIC
4c3a8340
ST
1617 if (literal_endpoint == 2 &&
1618 ((isLOWER(min) && isLOWER(max)) ||
1619 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1620 if (isLOWER(min)) {
1621 for (i = min; i <= max; i++)
1622 if (isLOWER(i))
db42d148 1623 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1624 } else {
1625 for (i = min; i <= max; i++)
1626 if (isUPPER(i))
db42d148 1627 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1628 }
1629 }
1630 else
1631#endif
1632 for (i = min; i <= max; i++)
eb160463 1633 *d++ = (char)i;
02aa26ce
NT
1634
1635 /* mark the range as done, and continue */
79072805 1636 dorange = FALSE;
01ec43d0 1637 didrange = TRUE;
4c3a8340
ST
1638#ifdef EBCDIC
1639 literal_endpoint = 0;
1640#endif
79072805 1641 continue;
4e553d73 1642 }
02aa26ce
NT
1643
1644 /* range begins (ignore - as first or last char) */
79072805 1645 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1646 if (didrange) {
1fafa243 1647 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1648 }
2b9d42f0 1649 if (has_utf8) {
25716404 1650 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1651 s++;
1652 continue;
1653 }
79072805
LW
1654 dorange = TRUE;
1655 s++;
01ec43d0
GS
1656 }
1657 else {
1658 didrange = FALSE;
4c3a8340
ST
1659#ifdef EBCDIC
1660 literal_endpoint = 0;
1661#endif
01ec43d0 1662 }
79072805 1663 }
02aa26ce
NT
1664
1665 /* if we get here, we're not doing a transliteration */
1666
0f5d15d6
IZ
1667 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1668 except for the last char, which will be done separately. */
3280af22 1669 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1670 if (s[2] == '#') {
e994fd66 1671 while (s+1 < send && *s != ')')
db42d148 1672 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1673 }
1674 else if (s[2] == '{' /* This should match regcomp.c */
1675 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1676 {
cc6b7395 1677 I32 count = 1;
0f5d15d6 1678 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1679 char c;
1680
d9f97599
GS
1681 while (count && (c = *regparse)) {
1682 if (c == '\\' && regparse[1])
1683 regparse++;
4e553d73 1684 else if (c == '{')
cc6b7395 1685 count++;
4e553d73 1686 else if (c == '}')
cc6b7395 1687 count--;
d9f97599 1688 regparse++;
cc6b7395 1689 }
e994fd66 1690 if (*regparse != ')')
5bdf89e7 1691 regparse--; /* Leave one char for continuation. */
0f5d15d6 1692 while (s < regparse)
db42d148 1693 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1694 }
748a9306 1695 }
02aa26ce
NT
1696
1697 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1698 else if (*s == '#' && PL_lex_inpat &&
1699 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1700 while (s+1 < send && *s != '\n')
db42d148 1701 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1702 }
02aa26ce 1703
5d1d4326 1704 /* check for embedded arrays
da6eedaa 1705 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1706 */
7e2040f0 1707 else if (*s == '@' && s[1]
5d1d4326 1708 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1709 break;
02aa26ce
NT
1710
1711 /* check for embedded scalars. only stop if we're sure it's a
1712 variable.
1713 */
79072805 1714 else if (*s == '$') {
3280af22 1715 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1716 break;
6002328a 1717 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1718 break; /* in regexp, $ might be tail anchor */
1719 }
02aa26ce 1720
2b9d42f0
NIS
1721 /* End of else if chain - OP_TRANS rejoin rest */
1722
02aa26ce 1723 /* backslashes */
79072805
LW
1724 if (*s == '\\' && s+1 < send) {
1725 s++;
02aa26ce
NT
1726
1727 /* some backslashes we leave behind */
c9f97d15 1728 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1729 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1730 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1731 continue;
1732 }
02aa26ce
NT
1733
1734 /* deprecate \1 in strings and substitution replacements */
3280af22 1735 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1736 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1737 {
599cee73 1738 if (ckWARN(WARN_SYNTAX))
9014280d 1739 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1740 *--s = '$';
1741 break;
1742 }
02aa26ce
NT
1743
1744 /* string-change backslash escapes */
3280af22 1745 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1746 --s;
1747 break;
1748 }
02aa26ce
NT
1749
1750 /* if we get here, it's either a quoted -, or a digit */
79072805 1751 switch (*s) {
02aa26ce
NT
1752
1753 /* quoted - in transliterations */
79072805 1754 case '-':
3280af22 1755 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1756 *d++ = *s++;
1757 continue;
1758 }
1759 /* FALL THROUGH */
1760 default:
11b8faa4 1761 {
041457d9
DM
1762 if (isALNUM(*s) &&
1763 *s != '_' &&
1764 ckWARN(WARN_MISC))
9014280d 1765 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1766 "Unrecognized escape \\%c passed through",
1767 *s);
1768 /* default action is to copy the quoted character */
f9a63242 1769 goto default_action;
11b8faa4 1770 }
02aa26ce
NT
1771
1772 /* \132 indicates an octal constant */
79072805
LW
1773 case '0': case '1': case '2': case '3':
1774 case '4': case '5': case '6': case '7':
ba210ebe 1775 {
53305cf1
NC
1776 I32 flags = 0;
1777 STRLEN len = 3;
1778 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1779 s += len;
1780 }
012bcf8d 1781 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1782
1783 /* \x24 indicates a hex constant */
79072805 1784 case 'x':
a0ed51b3
LW
1785 ++s;
1786 if (*s == '{') {
9d4ba2ae 1787 char* const e = strchr(s, '}');
a4c04bdc
NC
1788 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1789 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1790 STRLEN len;
355860ce 1791
53305cf1 1792 ++s;
adaeee49 1793 if (!e) {
a0ed51b3 1794 yyerror("Missing right brace on \\x{}");
355860ce 1795 continue;
ba210ebe 1796 }
53305cf1
NC
1797 len = e - s;
1798 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1799 s = e + 1;
a0ed51b3
LW
1800 }
1801 else {
ba210ebe 1802 {
53305cf1 1803 STRLEN len = 2;
a4c04bdc 1804 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1805 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1806 s += len;
1807 }
012bcf8d
GS
1808 }
1809
1810 NUM_ESCAPE_INSERT:
1811 /* Insert oct or hex escaped character.
301d3d20 1812 * There will always enough room in sv since such
db42d148 1813 * escapes will be longer than any UTF-8 sequence
301d3d20 1814 * they can end up as. */
ba7cea30 1815
c7f1f016
NIS
1816 /* We need to map to chars to ASCII before doing the tests
1817 to cover EBCDIC
1818 */
c4d5f83a 1819 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1820 if (!has_utf8 && uv > 255) {
301d3d20
JH
1821 /* Might need to recode whatever we have
1822 * accumulated so far if it contains any
1823 * hibit chars.
1824 *
1825 * (Can't we keep track of that and avoid
1826 * this rescan? --jhi)
012bcf8d 1827 */
c7f1f016 1828 int hicount = 0;
63cd0674
NIS
1829 U8 *c;
1830 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1831 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1832 hicount++;
db42d148 1833 }
012bcf8d 1834 }
63cd0674 1835 if (hicount) {
9d4ba2ae 1836 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1837 U8 *src, *dst;
1838 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1839 src = (U8 *)d - 1;
1840 dst = src+hicount;
1841 d += hicount;
cfd0369c 1842 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1843 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1844 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1845 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1846 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1847 }
1848 else {
63cd0674 1849 *dst-- = *src;
012bcf8d 1850 }
c7f1f016 1851 src--;
012bcf8d
GS
1852 }
1853 }
1854 }
1855
9aa983d2 1856 if (has_utf8 || uv > 255) {
9041c2e3 1857 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1858 has_utf8 = TRUE;
f9a63242
JH
1859 if (PL_lex_inwhat == OP_TRANS &&
1860 PL_sublex_info.sub_op) {
1861 PL_sublex_info.sub_op->op_private |=
1862 (PL_lex_repl ? OPpTRANS_FROM_UTF
1863 : OPpTRANS_TO_UTF);
f9a63242 1864 }
012bcf8d 1865 }
a0ed51b3 1866 else {
012bcf8d 1867 *d++ = (char)uv;
a0ed51b3 1868 }
012bcf8d
GS
1869 }
1870 else {
c4d5f83a 1871 *d++ = (char) uv;
a0ed51b3 1872 }
79072805 1873 continue;
02aa26ce 1874
b239daa5 1875 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1876 case 'N':
55eda711 1877 ++s;
423cee85
JH
1878 if (*s == '{') {
1879 char* e = strchr(s, '}');
155aba94 1880 SV *res;
423cee85 1881 STRLEN len;
cfd0369c 1882 const char *str;
4e553d73 1883
423cee85 1884 if (!e) {
5777a3f7 1885 yyerror("Missing right brace on \\N{}");
423cee85
JH
1886 e = s - 1;
1887 goto cont_scan;
1888 }
dbc0d4f2
JH
1889 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1890 /* \N{U+...} */
1891 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1892 PERL_SCAN_DISALLOW_PREFIX;
1893 s += 3;
1894 len = e - s;
1895 uv = grok_hex(s, &len, &flags, NULL);
1896 s = e + 1;
1897 goto NUM_ESCAPE_INSERT;
1898 }
55eda711 1899 res = newSVpvn(s + 1, e - s - 1);
bd61b366 1900 res = new_constant( NULL, 0, "charnames",
a0714e2c 1901 res, NULL, "\\N{...}" );
f9a63242
JH
1902 if (has_utf8)
1903 sv_utf8_upgrade(res);
cfd0369c 1904 str = SvPV_const(res,len);
1c47067b
JH
1905#ifdef EBCDIC_NEVER_MIND
1906 /* charnames uses pack U and that has been
1907 * recently changed to do the below uni->native
1908 * mapping, so this would be redundant (and wrong,
1909 * the code point would be doubly converted).
1910 * But leave this in just in case the pack U change
1911 * gets revoked, but the semantics is still
1912 * desireable for charnames. --jhi */
cddc7ef4 1913 {
cfd0369c 1914 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1915
1916 if (uv < 0x100) {
89ebb4a3 1917 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1918
1919 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1920 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1921 str = SvPV_const(res, len);
cddc7ef4
JH
1922 }
1923 }
1924#endif
89491803 1925 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1926 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1927 SvCUR_set(sv, d - ostart);
1928 SvPOK_on(sv);
e4f3eed8 1929 *d = '\0';
f08d6ad9 1930 sv_utf8_upgrade(sv);
d2f449dd 1931 /* this just broke our allocation above... */
eb160463 1932 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1933 d = SvPVX(sv) + SvCUR(sv);
89491803 1934 has_utf8 = TRUE;
f08d6ad9 1935 }
eb160463 1936 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1937 const char * const odest = SvPVX_const(sv);
423cee85 1938
8973db79 1939 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1940 d = SvPVX(sv) + (d - odest);
1941 }
1942 Copy(str, d, len, char);
1943 d += len;
1944 SvREFCNT_dec(res);
1945 cont_scan:
1946 s = e + 1;
1947 }
1948 else
5777a3f7 1949 yyerror("Missing braces on \\N{}");
423cee85
JH
1950 continue;
1951
02aa26ce 1952 /* \c is a control character */
79072805
LW
1953 case 'c':
1954 s++;
961ce445 1955 if (s < send) {
ba210ebe 1956 U8 c = *s++;
c7f1f016
NIS
1957#ifdef EBCDIC
1958 if (isLOWER(c))
1959 c = toUPPER(c);
1960#endif
db42d148 1961 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1962 }
961ce445
RGS
1963 else {
1964 yyerror("Missing control char name in \\c");
1965 }
79072805 1966 continue;
02aa26ce
NT
1967
1968 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1969 case 'b':
db42d148 1970 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1971 break;
1972 case 'n':
db42d148 1973 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1974 break;
1975 case 'r':
db42d148 1976 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1977 break;
1978 case 'f':
db42d148 1979 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1980 break;
1981 case 't':
db42d148 1982 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1983 break;
34a3fe2a 1984 case 'e':
db42d148 1985 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1986 break;
1987 case 'a':
db42d148 1988 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1989 break;
02aa26ce
NT
1990 } /* end switch */
1991
79072805
LW
1992 s++;
1993 continue;
02aa26ce 1994 } /* end if (backslash) */
4c3a8340
ST
1995#ifdef EBCDIC
1996 else
1997 literal_endpoint++;
1998#endif
02aa26ce 1999
f9a63242 2000 default_action:
2b9d42f0
NIS
2001 /* If we started with encoded form, or already know we want it
2002 and then encode the next character */
2003 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2004 STRLEN len = 1;
5f66b61c
AL
2005 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2006 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2007 s += len;
2008 if (need > len) {
2009 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2010 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2011 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2012 }
5f66b61c 2013 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2014 has_utf8 = TRUE;
2015 }
2016 else {
2017 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2018 }
02aa26ce
NT
2019 } /* while loop to process each character */
2020
2021 /* terminate the string and set up the sv */
79072805 2022 *d = '\0';
95a20fc0 2023 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2024 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2025 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2026
79072805 2027 SvPOK_on(sv);
9f4817db 2028 if (PL_encoding && !has_utf8) {
d0063567
DK
2029 sv_recode_to_utf8(sv, PL_encoding);
2030 if (SvUTF8(sv))
2031 has_utf8 = TRUE;
9f4817db 2032 }
2b9d42f0 2033 if (has_utf8) {
7e2040f0 2034 SvUTF8_on(sv);
2b9d42f0 2035 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2036 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2037 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2038 }
2039 }
79072805 2040
02aa26ce 2041 /* shrink the sv if we allocated more than we used */
79072805 2042 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2043 SvPV_shrink_to_cur(sv);
79072805 2044 }
02aa26ce 2045
9b599b2a 2046 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2047 if (s > PL_bufptr) {
2048 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2049 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2050 sv, NULL,
4e553d73 2051 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2052 ? "tr"
3280af22 2053 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2054 ? "s"
2055 : "qq")));
79072805 2056 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2057 } else
8990e307 2058 SvREFCNT_dec(sv);
79072805
LW
2059 return s;
2060}
2061
ffb4593c
NT
2062/* S_intuit_more
2063 * Returns TRUE if there's more to the expression (e.g., a subscript),
2064 * FALSE otherwise.
ffb4593c
NT
2065 *
2066 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2067 *
2068 * ->[ and ->{ return TRUE
2069 * { and [ outside a pattern are always subscripts, so return TRUE
2070 * if we're outside a pattern and it's not { or [, then return FALSE
2071 * if we're in a pattern and the first char is a {
2072 * {4,5} (any digits around the comma) returns FALSE
2073 * if we're in a pattern and the first char is a [
2074 * [] returns FALSE
2075 * [SOMETHING] has a funky algorithm to decide whether it's a
2076 * character class or not. It has to deal with things like
2077 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2078 * anything else returns TRUE
2079 */
2080
9cbb5ea2
GS
2081/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2082
76e3520e 2083STATIC int
cea2e8a9 2084S_intuit_more(pTHX_ register char *s)
79072805 2085{
97aff369 2086 dVAR;
3280af22 2087 if (PL_lex_brackets)
79072805
LW
2088 return TRUE;
2089 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2090 return TRUE;
2091 if (*s != '{' && *s != '[')
2092 return FALSE;
3280af22 2093 if (!PL_lex_inpat)
79072805
LW
2094 return TRUE;
2095
2096 /* In a pattern, so maybe we have {n,m}. */
2097 if (*s == '{') {
2098 s++;
2099 if (!isDIGIT(*s))
2100 return TRUE;
2101 while (isDIGIT(*s))
2102 s++;
2103 if (*s == ',')
2104 s++;
2105 while (isDIGIT(*s))
2106 s++;
2107 if (*s == '}')
2108 return FALSE;
2109 return TRUE;
2110
2111 }
2112
2113 /* On the other hand, maybe we have a character class */
2114
2115 s++;
2116 if (*s == ']' || *s == '^')
2117 return FALSE;
2118 else {
ffb4593c 2119 /* this is terrifying, and it works */
79072805
LW
2120 int weight = 2; /* let's weigh the evidence */
2121 char seen[256];
f27ffc4a 2122 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2123 const char * const send = strchr(s,']');
3280af22 2124 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2125
2126 if (!send) /* has to be an expression */
2127 return TRUE;
2128
2129 Zero(seen,256,char);
2130 if (*s == '$')
2131 weight -= 3;
2132 else if (isDIGIT(*s)) {
2133 if (s[1] != ']') {
2134 if (isDIGIT(s[1]) && s[2] == ']')
2135 weight -= 10;
2136 }
2137 else
2138 weight -= 100;
2139 }
2140 for (; s < send; s++) {
2141 last_un_char = un_char;
2142 un_char = (unsigned char)*s;
2143 switch (*s) {
2144 case '@':
2145 case '&':
2146 case '$':
2147 weight -= seen[un_char] * 10;
7e2040f0 2148 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2149 int len;
8903cb82 2150 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2151 len = (int)strlen(tmpbuf);
2152 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2153 weight -= 100;
2154 else
2155 weight -= 10;
2156 }
2157 else if (*s == '$' && s[1] &&
93a17b20
LW
2158 strchr("[#!%*<>()-=",s[1])) {
2159 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2160 weight -= 10;
2161 else
2162 weight -= 1;
2163 }
2164 break;
2165 case '\\':
2166 un_char = 254;
2167 if (s[1]) {
93a17b20 2168 if (strchr("wds]",s[1]))
79072805
LW
2169 weight += 100;
2170 else if (seen['\''] || seen['"'])
2171 weight += 1;
93a17b20 2172 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2173 weight += 40;
2174 else if (isDIGIT(s[1])) {
2175 weight += 40;
2176 while (s[1] && isDIGIT(s[1]))
2177 s++;
2178 }
2179 }
2180 else
2181 weight += 100;
2182 break;
2183 case '-':
2184 if (s[1] == '\\')
2185 weight += 50;
93a17b20 2186 if (strchr("aA01! ",last_un_char))
79072805 2187 weight += 30;
93a17b20 2188 if (strchr("zZ79~",s[1]))
79072805 2189 weight += 30;
f27ffc4a
GS
2190 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2191 weight -= 5; /* cope with negative subscript */
79072805
LW
2192 break;
2193 default:
3792a11b
NC
2194 if (!isALNUM(last_un_char)
2195 && !(last_un_char == '$' || last_un_char == '@'
2196 || last_un_char == '&')
2197 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2198 char *d = tmpbuf;
2199 while (isALPHA(*s))
2200 *d++ = *s++;
2201 *d = '\0';
2202 if (keyword(tmpbuf, d - tmpbuf))
2203 weight -= 150;
2204 }
2205 if (un_char == last_un_char + 1)
2206 weight += 5;
2207 weight -= seen[un_char];
2208 break;
2209 }
2210 seen[un_char]++;
2211 }
2212 if (weight >= 0) /* probably a character class */
2213 return FALSE;
2214 }
2215
2216 return TRUE;
2217}
ffed7fef 2218
ffb4593c
NT
2219/*
2220 * S_intuit_method
2221 *
2222 * Does all the checking to disambiguate
2223 * foo bar
2224 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2225 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2226 *
2227 * First argument is the stuff after the first token, e.g. "bar".
2228 *
2229 * Not a method if bar is a filehandle.
2230 * Not a method if foo is a subroutine prototyped to take a filehandle.
2231 * Not a method if it's really "Foo $bar"
2232 * Method if it's "foo $bar"
2233 * Not a method if it's really "print foo $bar"
2234 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2235 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2236 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2237 * =>
2238 */
2239
76e3520e 2240STATIC int
62d55b22 2241S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2242{
97aff369 2243 dVAR;
a0d0e21e 2244 char *s = start + (*start == '$');
3280af22 2245 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2246 STRLEN len;
2247 GV* indirgv;
2248
2249 if (gv) {
62d55b22 2250 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2251 return 0;
62d55b22
NC
2252 if (cv) {
2253 if (SvPOK(cv)) {
2254 const char *proto = SvPVX_const(cv);
2255 if (proto) {
2256 if (*proto == ';')
2257 proto++;
2258 if (*proto == '*')
2259 return 0;
2260 }
b6c543e3
IZ
2261 }
2262 } else
a0d0e21e
LW
2263 gv = 0;
2264 }
8903cb82 2265 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2266 /* start is the beginning of the possible filehandle/object,
2267 * and s is the end of it
2268 * tmpbuf is a copy of it
2269 */
2270
a0d0e21e 2271 if (*start == '$') {
3280af22 2272 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2273 return 0;
29595ff2 2274 s = PEEKSPACE(s);
3280af22
NIS
2275 PL_bufptr = start;
2276 PL_expect = XREF;
a0d0e21e
LW
2277 return *s == '(' ? FUNCMETH : METHOD;
2278 }
2279 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2280 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2281 len -= 2;
2282 tmpbuf[len] = '\0';
2283 goto bare_package;
2284 }
90e5519e 2285 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2286 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2287 return 0;
2288 /* filehandle or package name makes it a method */
89bfa8cd 2289 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
29595ff2 2290 s = PEEKSPACE(s);
3280af22 2291 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2292 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2293 bare_package:
9ded7720 2294 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2295 newSVpvn(tmpbuf,len));
9ded7720 2296 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3280af22 2297 PL_expect = XTERM;
a0d0e21e 2298 force_next(WORD);
3280af22 2299 PL_bufptr = s;
a0d0e21e
LW
2300 return *s == '(' ? FUNCMETH : METHOD;
2301 }
2302 }
2303 return 0;
2304}
2305
ffb4593c
NT
2306/*
2307 * S_incl_perldb
2308 * Return a string of Perl code to load the debugger. If PERL5DB
2309 * is set, it will return the contents of that, otherwise a
2310 * compile-time require of perl5db.pl.
2311 */
2312
bfed75c6 2313STATIC const char*
cea2e8a9 2314S_incl_perldb(pTHX)
a0d0e21e 2315{
97aff369 2316 dVAR;
3280af22 2317 if (PL_perldb) {
9d4ba2ae 2318 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2319
2320 if (pdb)
2321 return pdb;
93189314 2322 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2323 return "BEGIN { require 'perl5db.pl' }";
2324 }
2325 return "";
2326}
2327
2328
16d20bd9 2329/* Encoded script support. filter_add() effectively inserts a
4e553d73 2330 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2331 * Note that the filter function only applies to the current source file
2332 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2333 *
2334 * The datasv parameter (which may be NULL) can be used to pass
2335 * private data to this instance of the filter. The filter function
2336 * can recover the SV using the FILTER_DATA macro and use it to
2337 * store private buffers and state information.
2338 *
2339 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2340 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2341 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2342 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2343 * private use must be set using malloc'd pointers.
2344 */
16d20bd9
AD
2345
2346SV *
864dbfa3 2347Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2348{
97aff369 2349 dVAR;
f4c556ac 2350 if (!funcp)
a0714e2c 2351 return NULL;
f4c556ac 2352
3280af22
NIS
2353 if (!PL_rsfp_filters)
2354 PL_rsfp_filters = newAV();
16d20bd9 2355 if (!datasv)
561b68a9 2356 datasv = newSV(0);
862a34c6 2357 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2358 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2359 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2360 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2361 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2362 av_unshift(PL_rsfp_filters, 1);
2363 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2364 return(datasv);
2365}
4e553d73 2366
16d20bd9
AD
2367
2368/* Delete most recently added instance of this filter function. */
a0d0e21e 2369void
864dbfa3 2370Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2371{
97aff369 2372 dVAR;
e0c19803 2373 SV *datasv;
24801a4b 2374
33073adb 2375#ifdef DEBUGGING
8141890a 2376 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2377#endif
3280af22 2378 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2379 return;
2380 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2381 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2382 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2383 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2384 IoANY(datasv) = (void *)NULL;
3280af22 2385 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2386
16d20bd9
AD
2387 return;
2388 }
2389 /* we need to search for the correct entry and clear it */
cea2e8a9 2390 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2391}
2392
2393
1de9afcd
RGS
2394/* Invoke the idxth filter function for the current rsfp. */
2395/* maxlen 0 = read one text line */
16d20bd9 2396I32
864dbfa3 2397Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2398{
97aff369 2399 dVAR;
16d20bd9
AD
2400 filter_t funcp;
2401 SV *datasv = NULL;
e50aee73 2402
3280af22 2403 if (!PL_rsfp_filters)
16d20bd9 2404 return -1;
1de9afcd 2405 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2406 /* Provide a default input filter to make life easy. */
2407 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2408 DEBUG_P(PerlIO_printf(Perl_debug_log,
2409 "filter_read %d: from rsfp\n", idx));
4e553d73 2410 if (maxlen) {
16d20bd9
AD
2411 /* Want a block */
2412 int len ;
f54cb97a 2413 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2414
2415 /* ensure buf_sv is large enough */
eb160463 2416 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2417 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2418 if (PerlIO_error(PL_rsfp))
37120919
AD
2419 return -1; /* error */
2420 else
2421 return 0 ; /* end of file */
2422 }
16d20bd9
AD
2423 SvCUR_set(buf_sv, old_len + len) ;
2424 } else {
2425 /* Want a line */
3280af22
NIS
2426 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2427 if (PerlIO_error(PL_rsfp))
37120919
AD
2428 return -1; /* error */
2429 else
2430 return 0 ; /* end of file */
2431 }
16d20bd9
AD
2432 }
2433 return SvCUR(buf_sv);
2434 }
2435 /* Skip this filter slot if filter has been deleted */
1de9afcd 2436 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2437 DEBUG_P(PerlIO_printf(Perl_debug_log,
2438 "filter_read %d: skipped (filter deleted)\n",
2439 idx));
16d20bd9
AD
2440 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2441 }
2442 /* Get function pointer hidden within datasv */
8141890a 2443 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2444 DEBUG_P(PerlIO_printf(Perl_debug_log,
2445 "filter_read %d: via function %p (%s)\n",
cfd0369c 2446 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2447 /* Call function. The function is expected to */
2448 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2449 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2450 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2451}
2452
76e3520e 2453STATIC char *
cea2e8a9 2454S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2455{
97aff369 2456 dVAR;
c39cd008 2457#ifdef PERL_CR_FILTER
3280af22 2458 if (!PL_rsfp_filters) {
c39cd008 2459 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2460 }
2461#endif
3280af22 2462 if (PL_rsfp_filters) {
55497cff
PP
2463 if (!append)
2464 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2465 if (FILTER_READ(0, sv, 0) > 0)
2466 return ( SvPVX(sv) ) ;
2467 else
bd61b366 2468 return NULL ;
16d20bd9 2469 }
9d116dd7 2470 else
fd049845 2471 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2472}
2473
01ec43d0 2474STATIC HV *
7fc63493 2475S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2476{
97aff369 2477 dVAR;
def3634b
GS
2478 GV *gv;
2479
01ec43d0 2480 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2481 return PL_curstash;
2482
2483 if (len > 2 &&
2484 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2485 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2486 {
2487 return GvHV(gv); /* Foo:: */
def3634b
GS
2488 }
2489
2490 /* use constant CLASS => 'MyClass' */
90e5519e 2491 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2492 SV *sv;
2493 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2494 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2495 }
2496 }
2497
2498 return gv_stashpv(pkgname, FALSE);
2499}
a0d0e21e 2500
468aa647 2501STATIC char *
cc6ed77d 2502S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2503 dVAR;
468aa647
RGS
2504 if (PL_expect != XSTATE)
2505 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2506 is_use ? "use" : "no"));
29595ff2 2507 s = SKIPSPACE1(s);
468aa647
RGS
2508 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2509 s = force_version(s, TRUE);
29595ff2 2510 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
9ded7720 2511 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2512 force_next(WORD);
2513 }
2514 else if (*s == 'v') {
2515 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2516 s = force_version(s, FALSE);
2517 }
2518 }
2519 else {
2520 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2521 s = force_version(s, FALSE);
2522 }
2523 yylval.ival = is_use;
2524 return s;
2525}
748a9306 2526#ifdef DEBUGGING
27da23d5 2527 static const char* const exp_name[] =
09bef843 2528 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2529 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2530 };
748a9306 2531#endif
463ee0b2 2532
02aa26ce
NT
2533/*
2534 yylex
2535
2536 Works out what to call the token just pulled out of the input
2537 stream. The yacc parser takes care of taking the ops we return and
2538 stitching them into a tree.
2539
2540 Returns:
2541 PRIVATEREF
2542
2543 Structure:
2544 if read an identifier
2545 if we're in a my declaration
2546 croak if they tried to say my($foo::bar)
2547 build the ops for a my() declaration
2548 if it's an access to a my() variable
2549 are we in a sort block?
2550 croak if my($a); $a <=> $b
2551 build ops for access to a my() variable
2552 if in a dq string, and they've said @foo and we can't find @foo
2553 croak
2554 build ops for a bareword
2555 if we already built the token before, use it.
2556*/
2557
20141f0e 2558
dba4d153
JH
2559#ifdef __SC__
2560#pragma segment Perl_yylex
2561#endif
dba4d153 2562int
dba4d153 2563Perl_yylex(pTHX)
20141f0e 2564{
97aff369 2565 dVAR;
3afc138a 2566 register char *s = PL_bufptr;
378cc40b 2567 register char *d;
463ee0b2 2568 STRLEN len;
aa7440fb 2569 bool bof = FALSE;
a687059c 2570
bbf60fe6 2571 DEBUG_T( {
396482e1 2572 SV* tmp = newSVpvs("");
b6007c36
DM
2573 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2574 (IV)CopLINE(PL_curcop),
2575 lex_state_names[PL_lex_state],
2576 exp_name[PL_expect],
2577 pv_display(tmp, s, strlen(s), 0, 60));
2578 SvREFCNT_dec(tmp);
bbf60fe6 2579 } );
02aa26ce 2580 /* check if there's an identifier for us to look at */
ba979b31 2581 if (PL_pending_ident)
bbf60fe6 2582 return REPORT(S_pending_ident(aTHX));
bbce6d69 2583
02aa26ce
NT
2584 /* no identifier pending identification */
2585
3280af22 2586 switch (PL_lex_state) {
79072805
LW
2587#ifdef COMMENTARY
2588 case LEX_NORMAL: /* Some compilers will produce faster */
2589 case LEX_INTERPNORMAL: /* code if we comment these out. */
2590 break;
2591#endif
2592
09bef843 2593 /* when we've already built the next token, just pull it out of the queue */
79072805 2594 case LEX_KNOWNEXT:
3280af22 2595 PL_nexttoke--;
9ded7720 2596 yylval = NEXTVAL_NEXTTOKE;
3280af22
NIS
2597 if (!PL_nexttoke) {
2598 PL_lex_state = PL_lex_defer;
2599 PL_expect = PL_lex_expect;
2600 PL_lex_defer = LEX_NORMAL;
463ee0b2 2601 }
bbf60fe6 2602 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2603
02aa26ce 2604 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2605 when we get here, PL_bufptr is at the \
02aa26ce 2606 */
79072805
LW
2607 case LEX_INTERPCASEMOD:
2608#ifdef DEBUGGING
3280af22 2609 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2610 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2611#endif
02aa26ce 2612 /* handle \E or end of string */
3280af22 2613 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2614 /* if at a \E */
3280af22 2615 if (PL_lex_casemods) {
f54cb97a 2616 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2617 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2618
3792a11b
NC
2619 if (PL_bufptr != PL_bufend
2620 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2621 PL_bufptr += 2;
2622 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2623 }
bbf60fe6 2624 return REPORT(')');
79072805 2625 }
3280af22
NIS
2626 if (PL_bufptr != PL_bufend)
2627 PL_bufptr += 2;
2628 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2629 return yylex();
79072805
LW
2630 }
2631 else {
607df283 2632 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2633 "### Saw case modifier\n"); });
3280af22 2634 s = PL_bufptr + 1;
6e909404
JH
2635 if (s[1] == '\\' && s[2] == 'E') {
2636 PL_bufptr = s + 3;
2637 PL_lex_state = LEX_INTERPCONCAT;
2638 return yylex();
a0d0e21e 2639 }
6e909404 2640 else {
90771dc0 2641 I32 tmp;
6e909404
JH
2642 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2643 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2644 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2645 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2646 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2647 return REPORT(')');
6e909404
JH
2648 }
2649 if (PL_lex_casemods > 10)
2650 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2651 PL_lex_casestack[PL_lex_casemods++] = *s;
2652 PL_lex_casestack[PL_lex_casemods] = '\0';
2653 PL_lex_state = LEX_INTERPCONCAT;
9ded7720 2654 NEXTVAL_NEXTTOKE.ival = 0;
6e909404
JH
2655 force_next('(');
2656 if (*s == 'l')
9ded7720 2657 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 2658 else if (*s == 'u')
9ded7720 2659 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 2660 else if (*s == 'L')
9ded7720 2661 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 2662 else if (*s == 'U')
9ded7720 2663 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 2664 else if (*s == 'Q')
9ded7720 2665 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
2666 else
2667 Perl_croak(aTHX_ "panic: yylex");
2668 PL_bufptr = s + 1;
a0d0e21e 2669 }
79072805 2670 force_next(FUNC);
3280af22
NIS
2671 if (PL_lex_starts) {
2672 s = PL_bufptr;
2673 PL_lex_starts = 0;
131b3ad0
DM
2674 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2675 if (PL_lex_casemods == 1 && PL_lex_inpat)
2676 OPERATOR(',');
2677 else
2678 Aop(OP_CONCAT);
79072805
LW
2679 }
2680 else
cea2e8a9 2681 return yylex();
79072805
LW
2682 }
2683
55497cff 2684 case LEX_INTERPPUSH:
bbf60fe6 2685 return REPORT(sublex_push());
55497cff 2686
79072805 2687 case LEX_INTERPSTART:
3280af22 2688 if (PL_bufptr == PL_bufend)
bbf60fe6 2689 return REPORT(sublex_done());
607df283 2690 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2691 "### Interpolated variable\n"); });
3280af22
NIS
2692 PL_expect = XTERM;
2693 PL_lex_dojoin = (*PL_bufptr == '@');
2694 PL_lex_state = LEX_INTERPNORMAL;
2695 if (PL_lex_dojoin) {
9ded7720 2696 NEXTVAL_NEXTTOKE.ival = 0;
79072805 2697 force_next(',');
a0d0e21e 2698 force_ident("\"", '$');
9ded7720 2699 NEXTVAL_NEXTTOKE.ival = 0;
79072805 2700 force_next('$');
9ded7720 2701 NEXTVAL_NEXTTOKE.ival = 0;
79072805 2702 force_next('(');
9ded7720 2703 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2704 force_next(FUNC);
2705 }
3280af22
NIS
2706 if (PL_lex_starts++) {
2707 s = PL_bufptr;
131b3ad0
DM
2708 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2709 if (!PL_lex_casemods && PL_lex_inpat)
2710 OPERATOR(',');
2711 else
2712 Aop(OP_CONCAT);
79072805 2713 }
cea2e8a9 2714 return yylex();
79072805
LW
2715
2716 case LEX_INTERPENDMAYBE:
3280af22
NIS
2717 if (intuit_more(PL_bufptr)) {
2718 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2719 break;
2720 }
2721 /* FALL THROUGH */
2722
2723 case LEX_INTERPEND:
3280af22
NIS
2724 if (PL_lex_dojoin) {
2725 PL_lex_dojoin = FALSE;
2726 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2727 return REPORT(')');
79072805 2728 }
43a16006 2729 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2730 && SvEVALED(PL_lex_repl))
43a16006 2731 {
e9fa98b2 2732 if (PL_bufptr != PL_bufend)
cea2e8a9 2733 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 2734 PL_lex_repl = NULL;
e9fa98b2 2735 }
79072805
LW
2736 /* FALLTHROUGH */
2737 case LEX_INTERPCONCAT:
2738#ifdef DEBUGGING
3280af22 2739 if (PL_lex_brackets)
cea2e8a9 2740 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2741#endif
3280af22 2742 if (PL_bufptr == PL_bufend)
bbf60fe6 2743 return REPORT(sublex_done());
79072805 2744
3280af22
NIS
2745 if (SvIVX(PL_linestr) == '\'') {
2746 SV *sv = newSVsv(PL_linestr);
2747 if (!PL_lex_inpat)
76e3520e 2748 sv = tokeq(sv);
3280af22 2749 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2750 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2751 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2752 s = PL_bufend;
79072805
LW
2753 }
2754 else {
3280af22 2755 s = scan_const(PL_bufptr);
79072805 2756 if (*s == '\\')
3280af22 2757 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2758 else
3280af22 2759 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2760 }
2761
3280af22 2762 if (s != PL_bufptr) {
9ded7720 2763 NEXTVAL_NEXTTOKE = yylval;
3280af22 2764 PL_expect = XTERM;
79072805 2765 force_next(THING);
131b3ad0
DM
2766 if (PL_lex_starts++) {
2767 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2768 if (!PL_lex_casemods && PL_lex_inpat)
2769 OPERATOR(',');
2770 else
2771 Aop(OP_CONCAT);
2772 }
79072805 2773 else {
3280af22 2774 PL_bufptr = s;
cea2e8a9 2775 return yylex();
79072805
LW
2776 }
2777 }
2778
cea2e8a9 2779 return yylex();
a0d0e21e 2780 case LEX_FORMLINE:
3280af22
NIS
2781 PL_lex_state = LEX_NORMAL;
2782 s = scan_formline(PL_bufptr);
2783 if (!PL_lex_formbrack)
a0d0e21e
LW
2784 goto rightbracket;
2785 OPERATOR(';');
79072805
LW
2786 }
2787
3280af22
NIS
2788 s = PL_bufptr;
2789 PL_oldoldbufptr = PL_oldbufptr;
2790 PL_oldbufptr = s;
463ee0b2
LW
2791
2792 retry:
378cc40b
LW
2793 switch (*s) {
2794 default:
7e2040f0 2795 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2796 goto keylookup;
cea2e8a9 2797 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2798 case 4:
2799 case 26:
2800 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2801 case 0:
3280af22
NIS
2802 if (!PL_rsfp) {
2803 PL_last_uni = 0;
2804 PL_last_lop = 0;
c5ee2135 2805 if (PL_lex_brackets) {
0bd48802
AL
2806 yyerror(PL_lex_formbrack
2807 ? "Format not terminated"
2808 : "Missing right curly or square bracket");
c5ee2135 2809 }
4e553d73 2810 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2811 "### Tokener got EOF\n");
5f80b19c 2812 } );
79072805 2813 TOKEN(0);
463ee0b2 2814 }
3280af22 2815 if (s++ < PL_bufend)
a687059c 2816 goto retry; /* ignore stray nulls */
3280af22
NIS
2817 PL_last_uni = 0;
2818 PL_last_lop = 0;
2819 if (!PL_in_eval && !PL_preambled) {
2820 PL_preambled = TRUE;
2821 sv_setpv(PL_linestr,incl_perldb());
2822 if (SvCUR(PL_linestr))
396482e1 2823 sv_catpvs(PL_linestr,";");
3280af22
NIS
2824 if (PL_preambleav){
2825 while(AvFILLp(PL_preambleav) >= 0) {
2826 SV *tmpsv = av_shift(PL_preambleav);
2827 sv_catsv(PL_linestr, tmpsv);
396482e1 2828 sv_catpvs(PL_linestr, ";");
91b7def8
PP
2829 sv_free(tmpsv);
2830 }
3280af22
NIS
2831 sv_free((SV*)PL_preambleav);
2832 PL_preambleav = NULL;
91b7def8 2833 }
3280af22 2834 if (PL_minus_n || PL_minus_p) {
396482e1 2835 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2836 if (PL_minus_l)
396482e1 2837 sv_catpvs(PL_linestr,"chomp;");
3280af22 2838 if (PL_minus_a) {
3280af22 2839 if (PL_minus_F) {
3792a11b
NC
2840 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2841 || *PL_splitstr == '"')
3280af22 2842 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2843 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2844 else {
c8ef6a4b
NC
2845 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2846 bytes can be used as quoting characters. :-) */
dd374669 2847 const char *splits = PL_splitstr;
91d456ae 2848 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2849 do {
2850 /* Need to \ \s */
dd374669
AL
2851 if (*splits == '\\')
2852 sv_catpvn(PL_linestr, splits, 1);
2853 sv_catpvn(PL_linestr, splits, 1);
2854 } while (*splits++);
48c4c863
NC
2855 /* This loop will embed the trailing NUL of
2856 PL_linestr as the last thing it does before
2857 terminating. */
396482e1 2858 sv_catpvs(PL_linestr, ");");
54310121 2859 }
2304df62
AD
2860 }
2861 else
396482e1 2862 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2863 }
79072805 2864 }
bc9b29db 2865 if (PL_minus_E)
396482e1
GA
2866 sv_catpvs(PL_linestr,"use feature ':5.10';");
2867 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2868 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2869 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2870 PL_last_lop = PL_last_uni = NULL;
3280af22 2871 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2872 SV * const sv = newSV(0);
a0d0e21e
LW
2873
2874 sv_upgrade(sv, SVt_PVMG);
3280af22 2875 sv_setsv(sv,PL_linestr);
0ac0412a 2876 (void)SvIOK_on(sv);
45977657 2877 SvIV_set(sv, 0);
36c7798d 2878 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2879 }
79072805 2880 goto retry;
a687059c 2881 }
e929a76b 2882 do {
aa7440fb 2883 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 2884 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af
JH
2885 fake_eof:
2886 if (PL_rsfp) {
2887 if (PL_preprocess && !PL_in_eval)
2888 (void)PerlProc_pclose(PL_rsfp);
2889 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2890 PerlIO_clearerr(PL_rsfp);
2891 else
2892 (void)PerlIO_close(PL_rsfp);
4608196e 2893 PL_rsfp = NULL;
7e28d3af
JH
2894 PL_doextract = FALSE;
2895 }
2896 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2897 sv_setpv(PL_linestr,PL_minus_p
2898 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2899 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2900 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2901 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
2902 PL_minus_n = PL_minus_p = 0;
2903 goto retry;
2904 }
2905 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 2906 PL_last_lop = PL_last_uni = NULL;
c69006e4 2907 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2908 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2909 }
7aa207d6
JH
2910 /* If it looks like the start of a BOM or raw UTF-16,
2911 * check if it in fact is. */
2912 else if (bof &&
2913 (*s == 0 ||
2914 *(U8*)s == 0xEF ||
2915 *(U8*)s >= 0xFE ||
2916 s[1] == 0)) {
226017aa 2917#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2918# ifdef __GNU_LIBRARY__
2919# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2920# define FTELL_FOR_PIPE_IS_BROKEN
2921# endif
e3f494f1
JH
2922# else
2923# ifdef __GLIBC__
2924# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2925# define FTELL_FOR_PIPE_IS_BROKEN
2926# endif
2927# endif
226017aa
DD
2928# endif
2929#endif
2930#ifdef FTELL_FOR_PIPE_IS_BROKEN
2931 /* This loses the possibility to detect the bof
2932 * situation on perl -P when the libc5 is being used.
2933 * Workaround? Maybe attach some extra state to PL_rsfp?
2934 */
2935 if (!PL_preprocess)
7e28d3af 2936 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2937#else
eb160463 2938 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2939#endif
7e28d3af 2940 if (bof) {
3280af22 2941 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2942 s = swallow_bom((U8*)s);
e929a76b 2943 }
378cc40b 2944 }
3280af22 2945 if (PL_doextract) {
a0d0e21e
LW
2946 /* Incest with pod. */
2947 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2948 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2949 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2950 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2951 PL_last_lop = PL_last_uni = NULL;
3280af22 2952 PL_doextract = FALSE;
a0d0e21e 2953 }
4e553d73 2954 }
463ee0b2 2955 incline(s);
3280af22
NIS
2956 } while (PL_doextract);
2957 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2958 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2959 SV * const sv = newSV(0);
a687059c 2960
93a17b20 2961 sv_upgrade(sv, SVt_PVMG);
3280af22 2962 sv_setsv(sv,PL_linestr);
0ac0412a 2963 (void)SvIOK_on(sv);
45977657 2964 SvIV_set(sv, 0);
36c7798d 2965 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2966 }
3280af22 2967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2968 PL_last_lop = PL_last_uni = NULL;
57843af0 2969 if (CopLINE(PL_curcop) == 1) {
3280af22 2970 while (s < PL_bufend && isSPACE(*s))
79072805 2971 s++;
a0d0e21e 2972 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2973 s++;
bd61b366 2974 d = NULL;
3280af22 2975 if (!PL_in_eval) {
44a8e56a
PP
2976 if (*s == '#' && *(s+1) == '!')
2977 d = s + 2;
2978#ifdef ALTERNATE_SHEBANG
2979 else {
bfed75c6 2980 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2981 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2982 d = s + (sizeof(as) - 1);
2983 }
2984#endif /* ALTERNATE_SHEBANG */
2985 }
2986 if (d) {
b8378b72 2987 char *ipath;
774d564b 2988 char *ipathend;
b8378b72 2989
774d564b 2990 while (isSPACE(*d))
b8378b72
CS
2991 d++;
2992 ipath = d;
774d564b
PP
2993 while (*d && !isSPACE(*d))
2994 d++;
2995 ipathend = d;
2996
2997#ifdef ARG_ZERO_IS_SCRIPT
2998 if (ipathend > ipath) {
2999 /*
3000 * HP-UX (at least) sets argv[0] to the script name,
3001 * which makes $^X incorrect. And Digital UNIX and Linux,
3002 * at least, set argv[0] to the basename of the Perl
3003 * interpreter. So, having found "#!", we'll set it right.
3004 */
fafc274c
NC
3005 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3006 SVt_PV)); /* $^X */
774d564b 3007 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 3008 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 3009 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
3010 SvSETMAGIC(x);
3011 }
556c1dec
JH
3012 else {
3013 STRLEN blen;
3014 STRLEN llen;
cfd0369c 3015 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 3016 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
3017 if (llen < blen) {
3018 bstart += blen - llen;
3019 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3020 sv_setpvn(x, ipath, ipathend - ipath);
3021 SvSETMAGIC(x);
3022 }
3023 }
3024 }
774d564b 3025 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 3026 }
774d564b 3027#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
3028
3029 /*
3030 * Look for options.
3031 */
748a9306 3032 d = instr(s,"perl -");
84e30d1a 3033 if (!d) {
748a9306 3034 d = instr(s,"perl");
84e30d1a
GS
3035#if defined(DOSISH)
3036 /* avoid getting into infinite loops when shebang
3037 * line contains "Perl" rather than "perl" */
3038 if (!d) {
3039 for (d = ipathend-4; d >= ipath; --d) {
3040 if ((*d == 'p' || *d == 'P')
3041 && !ibcmp(d, "perl", 4))
3042 {
3043 break;
3044 }
3045 }
3046 if (d < ipath)
bd61b366 3047 d = NULL;
84e30d1a
GS
3048 }
3049#endif
3050 }
44a8e56a
PP
3051#ifdef ALTERNATE_SHEBANG
3052 /*
3053 * If the ALTERNATE_SHEBANG on this system starts with a
3054 * character that can be part of a Perl expression, then if
3055 * we see it but not "perl", we're probably looking at the
3056 * start of Perl code, not a request to hand off to some
3057 * other interpreter. Similarly, if "perl" is there, but
3058 * not in the first 'word' of the line, we assume the line
3059 * contains the start of the Perl program.
44a8e56a
PP
3060 */
3061 if (d && *s != '#') {
f54cb97a 3062 const char *c = ipath;
44a8e56a
PP
3063 while (*c && !strchr("; \t\r\n\f\v#", *c))
3064 c++;
3065 if (c < d)
bd61b366 3066 d = NULL; /* "perl" not in first word; ignore */
44a8e56a
PP
3067 else
3068 *s = '#'; /* Don't try to parse shebang line */
3069 }
774d564b 3070#endif /* ALTERNATE_SHEBANG */
bf4acbe4 3071#ifndef MACOS_TRADITIONAL
748a9306 3072 if (!d &&
44a8e56a 3073 *s == '#' &&
774d564b 3074 ipathend > ipath &&
3280af22 3075 !PL_minus_c &&
748a9306 3076 !instr(s,"indir") &&
3280af22 3077 instr(PL_origargv[0],"perl"))
748a9306 3078 {
27da23d5 3079 dVAR;
9f68db38 3080 char **newargv;
9f68db38 3081
774d564b
PP
3082 *ipathend = '\0';
3083 s = ipathend + 1;
3280af22 3084 while (s < PL_bufend && isSPACE(*s))
9f68db38 3085 s++;
3280af22 3086 if (s < PL_bufend) {
a02a5408 3087 Newxz(newargv,PL_origargc+3,char*);
9f68db38 3088 newargv[1] = s;
3280af22 3089 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
3090 s++;
3091 *s = '\0';
3280af22 3092 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
3093 }
3094 else
3280af22 3095 newargv = PL_origargv;
774d564b 3096 newargv[0] = ipath;
b35112e7 3097 PERL_FPU_PRE_EXEC
b4748376 3098 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 3099 PERL_FPU_POST_EXEC
cea2e8a9 3100 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 3101 }
bf4acbe4 3102#endif
748a9306 3103 if (d) {
748a9306 3104 while (*d && !isSPACE(*d)) d++;
bf4acbe4 3105 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
3106
3107 if (*d++ == '-') {
f54cb97a 3108 const bool switches_done = PL_doswitches;
fb993905
GA
3109 const U32 oldpdb = PL_perldb;
3110 const bool oldn = PL_minus_n;
3111 const bool oldp = PL_minus_p;
3112
8cc95fdb 3113 do {
3ffe3ee4 3114 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 3115 const char * const m = d;
8cc95fdb 3116 while (*d && !isSPACE(*d)) d++;
cea2e8a9 3117 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
3118 (int)(d - m), m);
3119 }
97bd5664 3120 d = moreswitches(d);
8cc95fdb 3121 } while (d);
f0b2cf55
YST
3122 if (PL_doswitches && !switches_done) {
3123 int argc = PL_origargc;
3124 char **argv = PL_origargv;
3125 do {
3126 argc--,argv++;
3127 } while (argc && argv[0][0] == '-' && argv[0][1]);
3128 init_argv_symbols(argc,argv);
3129 }
155aba94
GS
3130 if ((PERLDB_LINE && !oldpdb) ||
3131 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
3132 /* if we have already added "LINE: while (<>) {",
3133 we must not do it again */
748a9306 3134 {
c69006e4 3135 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3136 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3137 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3138 PL_last_lop = PL_last_uni = NULL;
3280af22 3139 PL_preambled = FALSE;
84902520 3140 if (PERLDB_LINE)
3280af22 3141 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3142 goto retry;
3143 }
a0d0e21e 3144 }
79072805 3145 }
9f68db38 3146 }
79072805 3147 }
3280af22
NIS
3148 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3149 PL_bufptr = s;
3150 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3151 return yylex();
ae986130 3152 }
378cc40b 3153 goto retry;
4fdae800 3154 case '\r':
6a27c188 3155#ifdef PERL_STRICT_CR
cea2e8a9 3156 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3157 Perl_croak(aTHX_
cc507455 3158 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3159#endif
4fdae800 3160 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3161#ifdef MACOS_TRADITIONAL
3162 case '\312':
3163#endif
378cc40b
LW
3164 s++;
3165 goto retry;
378cc40b 3166 case '#':
e929a76b 3167 case '\n':
3280af22 3168 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3169 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3170 /* handle eval qq[#line 1 "foo"\n ...] */
3171 CopLINE_dec(PL_curcop);
3172 incline(s);
3173 }
afaf3e61
NC
3174 d = s;
3175 while (d < PL_bufend && *d != '\n')
3176 d++;
3177 if (d < PL_bufend)
3178 d++;
3179 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3180 Perl_croak(aTHX_ "panic: input overflow");
3181 s = d;
463ee0b2 3182 incline(s);
3280af22
NIS
3183 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3184 PL_bufptr = s;
3185 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3186 return yylex();
a687059c 3187 }
378cc40b 3188 }
a687059c 3189 else {
378cc40b 3190 *s = '\0';
3280af22 3191 PL_bufend = s;
a687059c 3192 }
378cc40b
LW
3193 goto retry;
3194 case '-':
79072805 3195 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3196 I32 ftst = 0;
90771dc0 3197 char tmp;
e5edeb50 3198
378cc40b 3199 s++;
3280af22 3200 PL_bufptr = s;
748a9306
LW
3201 tmp = *s++;
3202
bf4acbe4 3203 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3204 s++;
3205
3206 if (strnEQ(s,"=>",2)) {
3280af22 3207 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3208 DEBUG_T( { S_printbuf(aTHX_
3209 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3210 } );
748a9306
LW
3211 OPERATOR('-'); /* unary minus */
3212 }
3280af22 3213 PL_last_uni = PL_oldbufptr;
748a9306 3214 switch (tmp) {
e5edeb50
JH
3215 case 'r': ftst = OP_FTEREAD; break;
3216 case 'w': ftst = OP_FTEWRITE; break;
3217 case 'x': ftst = OP_FTEEXEC; break;
3218 case 'o': ftst = OP_FTEOWNED; break;
3219 case 'R': ftst = OP_FTRREAD; break;
3220 case 'W': ftst = OP_FTRWRITE; break;
3221 case 'X': ftst = OP_FTREXEC; break;
3222 case 'O': ftst = OP_FTROWNED; break;
3223 case 'e': ftst = OP_FTIS; break;
3224 case 'z': ftst = OP_FTZERO; break;
3225 case 's': ftst = OP_FTSIZE; break;
3226 case 'f': ftst = OP_FTFILE; break;
3227 case 'd': ftst = OP_FTDIR; break;
3228 case 'l': ftst = OP_FTLINK; break;
3229 case 'p': ftst = OP_FTPIPE; break;
3230 case 'S': ftst = OP_FTSOCK; break;
3231 case 'u': ftst = OP_FTSUID; break;
3232 case 'g': ftst = OP_FTSGID; break;
3233 case 'k': ftst = OP_FTSVTX; break;
3234 case 'b': ftst = OP_FTBLK; break;
3235 case 'c': ftst = OP_FTCHR; break;
3236 case 't': ftst = OP_FTTTY; break;
3237 case 'T': ftst = OP_FTTEXT; break;
3238 case 'B': ftst = OP_FTBINARY; break;
3239 case 'M': case 'A': case 'C':
fafc274c 3240 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
3241 switch (tmp) {
3242 case 'M': ftst = OP_FTMTIME; break;
3243 case 'A': ftst = OP_FTATIME; break;
3244 case 'C': ftst = OP_FTCTIME; break;
3245 default: break;
3246 }
3247 break;
378cc40b 3248 default:
378cc40b
LW
3249 break;
3250 }
e5edeb50 3251 if (ftst) {
eb160463 3252 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3253 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3254 "### Saw file test %c\n", (int)tmp);
5f80b19c 3255 } );
e5edeb50
JH
3256 FTST(ftst);
3257 }
3258 else {
3259 /* Assume it was a minus followed by a one-letter named
3260 * subroutine call (or a -bareword), then. */
95c31fe3 3261 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3262 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3263 (int) tmp);
5f80b19c 3264 } );
3cf7b4c4 3265 s = --PL_bufptr;
e5edeb50 3266 }
378cc40b 3267 }
90771dc0
NC
3268 {
3269 const char tmp = *s++;
3270 if (*s == tmp) {
3271 s++;
3272 if (PL_expect == XOPERATOR)
3273 TERM(POSTDEC);
3274 else
3275 OPERATOR(PREDEC);
3276 }
3277 else if (*s == '>') {
3278 s++;
29595ff2 3279 s = SKIPSPACE1(s);
90771dc0
NC
3280 if (isIDFIRST_lazy_if(s,UTF)) {
3281 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3282 TOKEN(ARROW);
3283 }
3284 else if (*s == '$')
3285 OPERATOR(ARROW);
3286 else
3287 TERM(ARROW);
3288 }
3280af22 3289 if (PL_expect == XOPERATOR)
90771dc0
NC
3290 Aop(OP_SUBTRACT);
3291 else {
3292 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3293 check_uni();
3294 OPERATOR('-'); /* unary minus */
79072805 3295 }
2f3197b3 3296 }
79072805 3297
378cc40b 3298 case '+':
90771dc0
NC
3299 {
3300 const char tmp = *s++;
3301 if (*s == tmp) {
3302 s++;
3303 if (PL_expect == XOPERATOR)
3304 TERM(POSTINC);
3305 else
3306 OPERATOR(PREINC);
3307 }
3280af22 3308 if (PL_expect == XOPERATOR)
90771dc0
NC
3309 Aop(OP_ADD);
3310 else {
3311 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3312 check_uni();
3313 OPERATOR('+');
3314 }
2f3197b3 3315 }
a687059c 3316
378cc40b 3317 case '*':
3280af22
NIS
3318 if (PL_expect != XOPERATOR) {
3319 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3320 PL_expect = XOPERATOR;
3321 force_ident(PL_tokenbuf, '*');
3322 if (!*PL_tokenbuf)
a0d0e21e 3323 PREREF('*');
79072805 3324 TERM('*');
a687059c 3325 }
79072805
LW
3326 s++;
3327 if (*s == '*') {
a687059c 3328 s++;
79072805 3329 PWop(OP_POW);
a687059c 3330 }
79072805
LW
3331 Mop(OP_MULTIPLY);
3332
378cc40b 3333 case '%':
3280af22 3334 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3335 ++s;
3336 Mop(OP_MODULO);
a687059c 3337 }
3280af22
NIS
3338 PL_tokenbuf[0] = '%';
3339 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3340 if (!PL_tokenbuf[1]) {
bbce6d69 3341 PREREF('%');
a687059c 3342 }
3280af22 3343 PL_pending_ident = '%';
bbce6d69 3344 TERM('%');
a687059c 3345
378cc40b 3346 case '^':
79072805 3347 s++;
a0d0e21e 3348 BOop(OP_BIT_XOR);
79072805 3349 case '[':
3280af22 3350 PL_lex_brackets++;
79072805 3351 /* FALL THROUGH */
378cc40b 3352 case '~':
0d863452
RH
3353 if (s[1] == '~'
3354 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3355 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3356 {
3357 s += 2;
3358 Eop(OP_SMARTMATCH);
3359 }
378cc40b 3360 case ',':
90771dc0
NC
3361 {
3362 const char tmp = *s++;
3363 OPERATOR(tmp);
3364 }
a0d0e21e
LW
3365 case ':':
3366 if (s[1] == ':') {
3367 len = 0;
0bfa2a8a 3368 goto just_a_word_zero_gv;
a0d0e21e
LW
3369 }
3370 s++;
09bef843
SB
3371 switch (PL_expect) {
3372 OP *attrs;
3373 case XOPERATOR:
3374 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3375 break;
3376 PL_bufptr = s; /* update in case we back off */
3377 goto grabattrs;
3378 case XATTRBLOCK:
3379 PL_expect = XBLOCK;
3380 goto grabattrs;
3381 case XATTRTERM:
3382 PL_expect = XTERMBLOCK;
3383 grabattrs:
29595ff2 3384 s = PEEKSPACE(s);
5f66b61c 3385 attrs = NULL;
7e2040f0 3386 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3387 I32 tmp;
09bef843 3388 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3389 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3390 if (tmp < 0) tmp = -tmp;
3391 switch (tmp) {
3392 case KEY_or:
3393 case KEY_and:
c963b151 3394 case KEY_err:
f9829d6b
GS
3395 case KEY_for:
3396 case KEY_unless:
3397 case KEY_if:
3398 case KEY_while:
3399 case KEY_until:
3400 goto got_attrs;
3401 default:
3402 break;
3403 }
3404 }
09bef843
SB
3405 if (*d == '(') {
3406 d = scan_str(d,TRUE,TRUE);
3407 if (!d) {
09bef843
SB
3408 /* MUST advance bufptr here to avoid bogus
3409 "at end of line" context messages from yyerror().
3410 */
3411 PL_bufptr = s + len;
3412 yyerror("Unterminated attribute parameter in attribute list");
3413 if (attrs)
3414 op_free(attrs);
bbf60fe6 3415 return REPORT(0); /* EOF indicator */
09bef843
SB
3416 }
3417 }
3418 if (PL_lex_stuff) {
3419 SV *sv = newSVpvn(s, len);
3420 sv_catsv(sv, PL_lex_stuff);
3421 attrs = append_elem(OP_LIST, attrs,
3422 newSVOP(OP_CONST, 0, sv));
3423 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 3424 PL_lex_stuff = NULL;
09bef843
SB
3425 }
3426 else {
371fce9b
DM
3427 if (len == 6 && strnEQ(s, "unique", len)) {
3428 if (PL_in_my == KEY_our)
3429#ifdef USE_ITHREADS
3430 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3431#else
bb263b4e 3432 /*EMPTY*/; /* skip to avoid loading attributes.pm */
371fce9b 3433#endif
bfed75c6 3434 else
371fce9b
DM
3435 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3436 }
3437
d3cea301
SB
3438 /* NOTE: any CV attrs applied here need to be part of
3439 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3440 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3441 CvLVALUE_on(PL_compcv);
3442 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3443 CvLOCKED_on(PL_compcv);
3444 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3445 CvMETHOD_on(PL_compcv);
06492da6
SF
3446 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3447 CvASSERTION_on(PL_compcv);
78f9721b
SM
3448 /* After we've set the flags, it could be argued that
3449 we don't need to do the attributes.pm-based setting
3450 process, and shouldn't bother appending recognized
d3cea301
SB
3451 flags. To experiment with that, uncomment the
3452 following "else". (Note that's already been
3453 uncommented. That keeps the above-applied built-in
3454 attributes from being intercepted (and possibly
3455 rejected) by a package's attribute routines, but is
3456 justified by the performance win for the common case
3457 of applying only built-in attributes.) */
0256094b 3458 else
78f9721b
SM
3459 attrs = append_elem(OP_LIST, attrs,
3460 newSVOP(OP_CONST, 0,
3461 newSVpvn(s, len)));
09bef843 3462 }
29595ff2 3463 s = PEEKSPACE(d);
0120eecf 3464 if (*s == ':' && s[1] != ':')
29595ff2 3465 s = PEEKSPACE(s+1);
0120eecf
GS
3466 else if (s == d)
3467 break; /* require real whitespace or :'s */
29595ff2 3468 /* XXX losing whitespace on sequential attributes here */
09bef843 3469 }
90771dc0
NC
3470 {
3471 const char tmp
3472 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3473 if (*s != ';' && *s != '}' && *s != tmp
3474 && (tmp != '=' || *s != ')')) {
3475 const char q = ((*s == '\'') ? '"' : '\'');
3476 /* If here for an expression, and parsed no attrs, back
3477 off. */
3478 if (tmp == '=' && !attrs) {
3479 s = PL_bufptr;
3480 break;
3481 }
3482 /* MUST advance bufptr here to avoid bogus "at end of line"
3483 context messages from yyerror().
3484 */
3485 PL_bufptr = s;
3486 yyerror( *s
3487 ? Perl_form(aTHX_ "Invalid separator character "
3488 "%c%c%c in attribute list", q, *s, q)
3489 : "Unterminated attribute list" );
3490 if (attrs)
3491 op_free(attrs);
3492 OPERATOR(':');
09bef843 3493 }
09bef843 3494 }
f9829d6b 3495 got_attrs:
09bef843 3496 if (attrs) {
9ded7720 3497 NEXTVAL_NEXTTOKE.opval = attrs;
09bef843
SB
3498 force_next(THING);
3499 }
3500 TOKEN(COLONATTR);
3501 }
a0d0e21e 3502 OPERATOR(':');
8990e307
LW
3503 case '(':
3504 s++;
3280af22
NIS
3505 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3506 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3507 else
3280af22 3508 PL_expect = XTERM;
29595ff2 3509 s = SKIPSPACE1(s);
a0d0e21e 3510 TOKEN('(');
378cc40b 3511 case ';':
f4dd75d9 3512 CLINE;
90771dc0
NC
3513 {
3514 const char tmp = *s++;
3515 OPERATOR(tmp);
3516 }
378cc40b 3517 case ')':
90771dc0
NC
3518 {
3519 const char tmp = *s++;
29595ff2 3520 s = SKIPSPACE1(s);
90771dc0
NC
3521 if (*s == '{')
3522 PREBLOCK(tmp);
3523 TERM(tmp);
3524 }