This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Need to update test count from change #27440.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2
NC
38#ifdef PERL_MAD
39/* XXX these probably need to be made into PL vars */
40static I32 realtokenstart;
41static I32 faketokens = 0;
42static MADPROP *thismad;
43static SV *thistoken;
44static SV *thisopen;
45static SV *thisstuff;
46static SV *thisclose;
47static SV *thiswhite;
48static SV *nextwhite;
49static SV *skipwhite;
50static SV *endwhite;
51static I32 curforce = -1;
52
53# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
54#endif
55
9059aa12
LW
56#define XFAKEBRACK 128
57#define XENUMMASK 127
58
39e02b42
JH
59#ifdef USE_UTF8_SCRIPTS
60# define UTF (!IN_BYTES)
2b9d42f0 61#else
746b446a 62# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 63#endif
a0ed51b3 64
61f0cdd9 65/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
66 * 1999-02-27 mjd-perl-patch@plover.com */
67#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
68
bf4acbe4
GS
69/* On MacOS, respect nonbreaking spaces */
70#ifdef MACOS_TRADITIONAL
71#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
72#else
73#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
74#endif
75
ffb4593c
NT
76/* LEX_* are values for PL_lex_state, the state of the lexer.
77 * They are arranged oddly so that the guard on the switch statement
79072805
LW
78 * can get by with a single comparison (if the compiler is smart enough).
79 */
80
fb73857a 81/* #define LEX_NOTPARSING 11 is done in perl.h. */
82
b6007c36
DM
83#define LEX_NORMAL 10 /* normal code (ie not within "...") */
84#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
85#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
86#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
87#define LEX_INTERPSTART 6 /* expecting the start of a $var */
88
89 /* at end of code, eg "$x" followed by: */
90#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
91#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
92
93#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
94 string or after \E, $foo, etc */
95#define LEX_INTERPCONST 2 /* NOT USED */
96#define LEX_FORMLINE 1 /* expecting a format line */
97#define LEX_KNOWNEXT 0 /* next token known; just return it */
98
79072805 99
bbf60fe6 100#ifdef DEBUGGING
27da23d5 101static const char* const lex_state_names[] = {
bbf60fe6
DM
102 "KNOWNEXT",
103 "FORMLINE",
104 "INTERPCONST",
105 "INTERPCONCAT",
106 "INTERPENDMAYBE",
107 "INTERPEND",
108 "INTERPSTART",
109 "INTERPPUSH",
110 "INTERPCASEMOD",
111 "INTERPNORMAL",
112 "NORMAL"
113};
114#endif
115
79072805
LW
116#ifdef ff_next
117#undef ff_next
d48672a2
LW
118#endif
119
79072805 120#include "keywords.h"
fe14fcc3 121
ffb4593c
NT
122/* CLINE is a macro that ensures PL_copline has a sane value */
123
ae986130
LW
124#ifdef CLINE
125#undef CLINE
126#endif
57843af0 127#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 128
29595ff2
NC
129#if 0 && defined(PERL_MAD)
130# define SKIPSPACE0(s) skipspace0(s)
131# define SKIPSPACE1(s) skipspace1(s)
132# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
133# define PEEKSPACE(s) skipspace2(s,0)
134#else
135# define SKIPSPACE0(s) skipspace(s)
136# define SKIPSPACE1(s) skipspace(s)
137# define SKIPSPACE2(s,tsv) skipspace(s)
138# define PEEKSPACE(s) skipspace(s)
139#endif
140
ffb4593c
NT
141/*
142 * Convenience functions to return different tokens and prime the
9cbb5ea2 143 * lexer for the next token. They all take an argument.
ffb4593c
NT
144 *
145 * TOKEN : generic token (used for '(', DOLSHARP, etc)
146 * OPERATOR : generic operator
147 * AOPERATOR : assignment operator
148 * PREBLOCK : beginning the block after an if, while, foreach, ...
149 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
150 * PREREF : *EXPR where EXPR is not a simple identifier
151 * TERM : expression term
152 * LOOPX : loop exiting command (goto, last, dump, etc)
153 * FTST : file test operator
154 * FUN0 : zero-argument function
2d2e263d 155 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
156 * BOop : bitwise or or xor
157 * BAop : bitwise and
158 * SHop : shift operator
159 * PWop : power operator
9cbb5ea2 160 * PMop : pattern-matching operator
ffb4593c
NT
161 * Aop : addition-level operator
162 * Mop : multiplication-level operator
163 * Eop : equality-testing operator
e5edeb50 164 * Rop : relational operator <= != gt
ffb4593c
NT
165 *
166 * Also see LOP and lop() below.
167 */
168
998054bd 169#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 170# define REPORT(retval) tokereport((I32)retval)
998054bd 171#else
bbf60fe6 172# define REPORT(retval) (retval)
998054bd
SC
173#endif
174
bbf60fe6
DM
175#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
176#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
177#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
178#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
179#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
180#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
181#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
182#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
183#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
184#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
185#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
186#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
187#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
188#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
189#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
190#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
191#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
192#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
193#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
194#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 195
a687059c
LW
196/* This bit of chicanery makes a unary function followed by
197 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
198 * The UNIDOR macro is for unary functions that can be followed by the //
199 * operator (such as C<shift // 0>).
a687059c 200 */
376fcdbf
AL
201#define UNI2(f,x) { \
202 yylval.ival = f; \
203 PL_expect = x; \
204 PL_bufptr = s; \
205 PL_last_uni = PL_oldbufptr; \
206 PL_last_lop_op = f; \
207 if (*s == '(') \
208 return REPORT( (int)FUNC1 ); \
29595ff2 209 s = PEEKSPACE(s); \
376fcdbf
AL
210 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
211 }
6f33ba73
RGS
212#define UNI(f) UNI2(f,XTERM)
213#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 214
376fcdbf
AL
215#define UNIBRACK(f) { \
216 yylval.ival = f; \
217 PL_bufptr = s; \
218 PL_last_uni = PL_oldbufptr; \
219 if (*s == '(') \
220 return REPORT( (int)FUNC1 ); \
29595ff2 221 s = PEEKSPACE(s); \
376fcdbf
AL
222 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
223 }
79072805 224
9f68db38 225/* grandfather return to old style */
3280af22 226#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 227
8fa7f367
JH
228#ifdef DEBUGGING
229
bbf60fe6
DM
230/* how to interpret the yylval associated with the token */
231enum token_type {
232 TOKENTYPE_NONE,
233 TOKENTYPE_IVAL,
234 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
235 TOKENTYPE_PVAL,
236 TOKENTYPE_OPVAL,
237 TOKENTYPE_GVVAL
238};
239
6d4a66ac
NC
240static struct debug_tokens {
241 const int token;
242 enum token_type type;
243 const char *name;
244} const debug_tokens[] =
9041c2e3 245{
bbf60fe6
DM
246 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
247 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
248 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
249 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
250 { ARROW, TOKENTYPE_NONE, "ARROW" },
251 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
252 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
253 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
254 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
255 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 256 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
257 { DO, TOKENTYPE_NONE, "DO" },
258 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
259 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
260 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
261 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
262 { ELSE, TOKENTYPE_NONE, "ELSE" },
263 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
264 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
265 { FOR, TOKENTYPE_IVAL, "FOR" },
266 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
267 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
268 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
269 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
270 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
271 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 272 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
273 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
274 { IF, TOKENTYPE_IVAL, "IF" },
275 { LABEL, TOKENTYPE_PVAL, "LABEL" },
276 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
277 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
278 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
279 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
280 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
281 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
282 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
283 { MY, TOKENTYPE_IVAL, "MY" },
284 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
285 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
286 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
287 { OROP, TOKENTYPE_IVAL, "OROP" },
288 { OROR, TOKENTYPE_NONE, "OROR" },
289 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
290 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
291 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
292 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
293 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
294 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
295 { PREINC, TOKENTYPE_NONE, "PREINC" },
296 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
297 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
298 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
299 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
300 { SUB, TOKENTYPE_NONE, "SUB" },
301 { THING, TOKENTYPE_OPVAL, "THING" },
302 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
303 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
304 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
305 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
306 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
307 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 308 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
309 { WHILE, TOKENTYPE_IVAL, "WHILE" },
310 { WORD, TOKENTYPE_OPVAL, "WORD" },
311 { 0, TOKENTYPE_NONE, 0 }
312};
313
314/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 315
bbf60fe6 316STATIC int
f5bd084c 317S_tokereport(pTHX_ I32 rv)
bbf60fe6 318{
97aff369 319 dVAR;
bbf60fe6 320 if (DEBUG_T_TEST) {
bd61b366 321 const char *name = NULL;
bbf60fe6 322 enum token_type type = TOKENTYPE_NONE;
f54cb97a 323 const struct debug_tokens *p;
396482e1 324 SV* const report = newSVpvs("<== ");
bbf60fe6 325
f54cb97a 326 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
327 if (p->token == (int)rv) {
328 name = p->name;
329 type = p->type;
330 break;
331 }
332 }
333 if (name)
54667de8 334 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
335 else if ((char)rv > ' ' && (char)rv < '~')
336 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
337 else if (!rv)
396482e1 338 sv_catpvs(report, "EOF");
bbf60fe6
DM
339 else
340 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
341 switch (type) {
342 case TOKENTYPE_NONE:
343 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
344 break;
345 case TOKENTYPE_IVAL:
e4584336 346 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
347 break;
348 case TOKENTYPE_OPNUM:
349 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
350 PL_op_name[yylval.ival]);
351 break;
352 case TOKENTYPE_PVAL:
353 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
354 break;
355 case TOKENTYPE_OPVAL:
b6007c36 356 if (yylval.opval) {
401441c0 357 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 358 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
359 if (yylval.opval->op_type == OP_CONST) {
360 Perl_sv_catpvf(aTHX_ report, " %s",
361 SvPEEK(cSVOPx_sv(yylval.opval)));
362 }
363
364 }
401441c0 365 else
396482e1 366 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
367 break;
368 }
b6007c36 369 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
370 };
371 return (int)rv;
998054bd
SC
372}
373
b6007c36
DM
374
375/* print the buffer with suitable escapes */
376
377STATIC void
378S_printbuf(pTHX_ const char* fmt, const char* s)
379{
396482e1 380 SV* const tmp = newSVpvs("");
b6007c36
DM
381 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
382 SvREFCNT_dec(tmp);
383}
384
8fa7f367
JH
385#endif
386
ffb4593c
NT
387/*
388 * S_ao
389 *
c963b151
BD
390 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
391 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
392 */
393
76e3520e 394STATIC int
cea2e8a9 395S_ao(pTHX_ int toketype)
a0d0e21e 396{
97aff369 397 dVAR;
3280af22
NIS
398 if (*PL_bufptr == '=') {
399 PL_bufptr++;
a0d0e21e
LW
400 if (toketype == ANDAND)
401 yylval.ival = OP_ANDASSIGN;
402 else if (toketype == OROR)
403 yylval.ival = OP_ORASSIGN;
c963b151
BD
404 else if (toketype == DORDOR)
405 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
406 toketype = ASSIGNOP;
407 }
408 return toketype;
409}
410
ffb4593c
NT
411/*
412 * S_no_op
413 * When Perl expects an operator and finds something else, no_op
414 * prints the warning. It always prints "<something> found where
415 * operator expected. It prints "Missing semicolon on previous line?"
416 * if the surprise occurs at the start of the line. "do you need to
417 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
418 * where the compiler doesn't know if foo is a method call or a function.
419 * It prints "Missing operator before end of line" if there's nothing
420 * after the missing operator, or "... before <...>" if there is something
421 * after the missing operator.
422 */
423
76e3520e 424STATIC void
bfed75c6 425S_no_op(pTHX_ const char *what, char *s)
463ee0b2 426{
97aff369 427 dVAR;
9d4ba2ae
AL
428 char * const oldbp = PL_bufptr;
429 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 430
1189a94a
GS
431 if (!s)
432 s = oldbp;
07c798fb 433 else
1189a94a 434 PL_bufptr = s;
cea2e8a9 435 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
436 if (ckWARN_d(WARN_SYNTAX)) {
437 if (is_first)
438 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
439 "\t(Missing semicolon on previous line?)\n");
440 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 441 const char *t;
56da5a46
RGS
442 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
443 if (t < PL_bufptr && isSPACE(*t))
444 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
445 "\t(Do you need to predeclare %.*s?)\n",
551405c4 446 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
447 }
448 else {
449 assert(s >= oldbp);
450 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 451 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 452 }
07c798fb 453 }
3280af22 454 PL_bufptr = oldbp;
8990e307
LW
455}
456
ffb4593c
NT
457/*
458 * S_missingterm
459 * Complain about missing quote/regexp/heredoc terminator.
460 * If it's called with (char *)NULL then it cauterizes the line buffer.
461 * If we're in a delimited string and the delimiter is a control
462 * character, it's reformatted into a two-char sequence like ^C.
463 * This is fatal.
464 */
465
76e3520e 466STATIC void
cea2e8a9 467S_missingterm(pTHX_ char *s)
8990e307 468{
97aff369 469 dVAR;
8990e307
LW
470 char tmpbuf[3];
471 char q;
472 if (s) {
9d4ba2ae 473 char * const nl = strrchr(s,'\n');
d2719217 474 if (nl)
8990e307
LW
475 *nl = '\0';
476 }
9d116dd7
JH
477 else if (
478#ifdef EBCDIC
479 iscntrl(PL_multi_close)
480#else
481 PL_multi_close < 32 || PL_multi_close == 127
482#endif
483 ) {
8990e307 484 *tmpbuf = '^';
585ec06d 485 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
486 tmpbuf[2] = '\0';
487 s = tmpbuf;
488 }
489 else {
eb160463 490 *tmpbuf = (char)PL_multi_close;
8990e307
LW
491 tmpbuf[1] = '\0';
492 s = tmpbuf;
493 }
494 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 495 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 496}
79072805 497
ef89dcc3 498#define FEATURE_IS_ENABLED(name) \
0d863452 499 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 500 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
501/*
502 * S_feature_is_enabled
503 * Check whether the named feature is enabled.
504 */
505STATIC bool
506S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
507{
97aff369 508 dVAR;
0d863452 509 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
510 char he_name[32] = "feature_";
511 (void) strncpy(&he_name[8], name, 24);
512
513 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
514}
515
ffb4593c
NT
516/*
517 * Perl_deprecate
ffb4593c
NT
518 */
519
79072805 520void
bfed75c6 521Perl_deprecate(pTHX_ const char *s)
a0d0e21e 522{
599cee73 523 if (ckWARN(WARN_DEPRECATED))
9014280d 524 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
525}
526
12bcd1a6 527void
bfed75c6 528Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
529{
530 /* This function should NOT be called for any new deprecated warnings */
531 /* Use Perl_deprecate instead */
532 /* */
533 /* It is here to maintain backward compatibility with the pre-5.8 */
534 /* warnings category hierarchy. The "deprecated" category used to */
535 /* live under the "syntax" category. It is now a top-level category */
536 /* in its own right. */
537
538 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 539 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
540 "Use of %s is deprecated", s);
541}
542
ffb4593c 543/*
9cbb5ea2
GS
544 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
545 * utf16-to-utf8-reversed.
ffb4593c
NT
546 */
547
c39cd008
GS
548#ifdef PERL_CR_FILTER
549static void
550strip_return(SV *sv)
551{
95a20fc0 552 register const char *s = SvPVX_const(sv);
9d4ba2ae 553 register const char * const e = s + SvCUR(sv);
c39cd008
GS
554 /* outer loop optimized to do nothing if there are no CR-LFs */
555 while (s < e) {
556 if (*s++ == '\r' && *s == '\n') {
557 /* hit a CR-LF, need to copy the rest */
558 register char *d = s - 1;
559 *d++ = *s++;
560 while (s < e) {
561 if (*s == '\r' && s[1] == '\n')
562 s++;
563 *d++ = *s++;
564 }
565 SvCUR(sv) -= s - d;
566 return;
567 }
568 }
569}
a868473f 570
76e3520e 571STATIC I32
c39cd008 572S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 573{
f54cb97a 574 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
575 if (count > 0 && !maxlen)
576 strip_return(sv);
577 return count;
a868473f
NIS
578}
579#endif
580
ffb4593c
NT
581/*
582 * Perl_lex_start
9cbb5ea2
GS
583 * Initialize variables. Uses the Perl save_stack to save its state (for
584 * recursive calls to the parser).
ffb4593c
NT
585 */
586
a0d0e21e 587void
864dbfa3 588Perl_lex_start(pTHX_ SV *line)
79072805 589{
97aff369 590 dVAR;
cfd0369c 591 const char *s;
8990e307
LW
592 STRLEN len;
593
3280af22
NIS
594 SAVEI32(PL_lex_dojoin);
595 SAVEI32(PL_lex_brackets);
3280af22
NIS
596 SAVEI32(PL_lex_casemods);
597 SAVEI32(PL_lex_starts);
598 SAVEI32(PL_lex_state);
7766f137 599 SAVEVPTR(PL_lex_inpat);
3280af22 600 SAVEI32(PL_lex_inwhat);
18b09519
GS
601 if (PL_lex_state == LEX_KNOWNEXT) {
602 I32 toke = PL_nexttoke;
603 while (--toke >= 0) {
604 SAVEI32(PL_nexttype[toke]);
605 SAVEVPTR(PL_nextval[toke]);
606 }
607 SAVEI32(PL_nexttoke);
18b09519 608 }
57843af0 609 SAVECOPLINE(PL_curcop);
3280af22
NIS
610 SAVEPPTR(PL_bufptr);
611 SAVEPPTR(PL_bufend);
612 SAVEPPTR(PL_oldbufptr);
613 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
614 SAVEPPTR(PL_last_lop);
615 SAVEPPTR(PL_last_uni);
3280af22
NIS
616 SAVEPPTR(PL_linestart);
617 SAVESPTR(PL_linestr);
8edd5f42
RGS
618 SAVEGENERICPV(PL_lex_brackstack);
619 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 620 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
621 SAVESPTR(PL_lex_stuff);
622 SAVEI32(PL_lex_defer);
09bef843 623 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 624 SAVESPTR(PL_lex_repl);
bebdddfc
GS
625 SAVEINT(PL_expect);
626 SAVEINT(PL_lex_expect);
3280af22
NIS
627
628 PL_lex_state = LEX_NORMAL;
629 PL_lex_defer = 0;
630 PL_expect = XSTATE;
631 PL_lex_brackets = 0;
a02a5408
JC
632 Newx(PL_lex_brackstack, 120, char);
633 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
634 PL_lex_casemods = 0;
635 *PL_lex_casestack = '\0';
636 PL_lex_dojoin = 0;
637 PL_lex_starts = 0;
a0714e2c
SS
638 PL_lex_stuff = NULL;
639 PL_lex_repl = NULL;
3280af22 640 PL_lex_inpat = 0;
76be56bc 641 PL_nexttoke = 0;
3280af22 642 PL_lex_inwhat = 0;
09bef843 643 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
644 PL_linestr = line;
645 if (SvREADONLY(PL_linestr))
646 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 647 s = SvPV_const(PL_linestr, len);
6f27f9a7 648 if (!len || s[len-1] != ';') {
3280af22
NIS
649 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
650 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 651 sv_catpvs(PL_linestr, "\n;");
8990e307 652 }
3280af22
NIS
653 SvTEMP_off(PL_linestr);
654 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
655 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 656 PL_last_lop = PL_last_uni = NULL;
3280af22 657 PL_rsfp = 0;
79072805 658}
a687059c 659
ffb4593c
NT
660/*
661 * Perl_lex_end
9cbb5ea2
GS
662 * Finalizer for lexing operations. Must be called when the parser is
663 * done with the lexer.
ffb4593c
NT
664 */
665
463ee0b2 666void
864dbfa3 667Perl_lex_end(pTHX)
463ee0b2 668{
97aff369 669 dVAR;
3280af22 670 PL_doextract = FALSE;
463ee0b2
LW
671}
672
ffb4593c
NT
673/*
674 * S_incline
675 * This subroutine has nothing to do with tilting, whether at windmills
676 * or pinball tables. Its name is short for "increment line". It
57843af0 677 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 678 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
679 * # line 500 "foo.pm"
680 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
681 */
682
76e3520e 683STATIC void
cea2e8a9 684S_incline(pTHX_ char *s)
463ee0b2 685{
97aff369 686 dVAR;
463ee0b2
LW
687 char *t;
688 char *n;
73659bf1 689 char *e;
463ee0b2 690 char ch;
463ee0b2 691
57843af0 692 CopLINE_inc(PL_curcop);
463ee0b2
LW
693 if (*s++ != '#')
694 return;
bf4acbe4 695 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
696 if (strnEQ(s, "line", 4))
697 s += 4;
698 else
699 return;
084592ab 700 if (SPACE_OR_TAB(*s))
73659bf1 701 s++;
4e553d73 702 else
73659bf1 703 return;
bf4acbe4 704 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
705 if (!isDIGIT(*s))
706 return;
707 n = s;
708 while (isDIGIT(*s))
709 s++;
bf4acbe4 710 while (SPACE_OR_TAB(*s))
463ee0b2 711 s++;
73659bf1 712 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 713 s++;
73659bf1
GS
714 e = t + 1;
715 }
463ee0b2 716 else {
463ee0b2 717 for (t = s; !isSPACE(*t); t++) ;
73659bf1 718 e = t;
463ee0b2 719 }
bf4acbe4 720 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
721 e++;
722 if (*e != '\n' && *e != '\0')
723 return; /* false alarm */
724
463ee0b2
LW
725 ch = *t;
726 *t = '\0';
f4dd75d9 727 if (t - s > 0) {
8a5ee598 728#ifndef USE_ITHREADS
c4420975 729 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
730 STRLEN tmplen = cf ? strlen(cf) : 0;
731 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
732 /* must copy *{"::_<(eval N)[oldfilename:L]"}
733 * to *{"::_<newfilename"} */
734 char smallbuf[256], smallbuf2[256];
735 char *tmpbuf, *tmpbuf2;
8a5ee598 736 GV **gvp, *gv2;
e66cf94c
RGS
737 STRLEN tmplen2 = strlen(s);
738 if (tmplen + 3 < sizeof smallbuf)
739 tmpbuf = smallbuf;
740 else
741 Newx(tmpbuf, tmplen + 3, char);
742 if (tmplen2 + 3 < sizeof smallbuf2)
743 tmpbuf2 = smallbuf2;
744 else
745 Newx(tmpbuf2, tmplen2 + 3, char);
746 tmpbuf[0] = tmpbuf2[0] = '_';
747 tmpbuf[1] = tmpbuf2[1] = '<';
748 memcpy(tmpbuf + 2, cf, ++tmplen);
749 memcpy(tmpbuf2 + 2, s, ++tmplen2);
750 ++tmplen; ++tmplen2;
8a5ee598
RGS
751 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
752 if (gvp) {
753 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
754 if (!isGV(gv2))
755 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
756 /* adjust ${"::_<newfilename"} to store the new file name */
757 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
758 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
759 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
760 }
e66cf94c
RGS
761 if (tmpbuf != smallbuf) Safefree(tmpbuf);
762 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
763 }
8a5ee598 764#endif
05ec9bb3 765 CopFILE_free(PL_curcop);
57843af0 766 CopFILE_set(PL_curcop, s);
f4dd75d9 767 }
463ee0b2 768 *t = ch;
57843af0 769 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
770}
771
29595ff2
NC
772#ifdef PERL_MAD
773/* skip space before thistoken */
774
775STATIC char *
776S_skipspace0(pTHX_ register char *s)
777{
778 s = skipspace(s);
779 if (!PL_madskills)
780 return s;
781 if (skipwhite) {
782 if (!thiswhite)
783 thiswhite = newSVpvn("",0);
784 sv_catsv(thiswhite, skipwhite);
785 sv_free(skipwhite);
786 skipwhite = 0;
787 }
788 realtokenstart = s - SvPVX(PL_linestr);
789 return s;
790}
791
792/* skip space after thistoken */
793
794STATIC char *
795S_skipspace1(pTHX_ register char *s)
796{
797 char *start = s;
798 I32 startoff = start - SvPVX(PL_linestr);
799
800 s = skipspace(s);
801 if (!PL_madskills)
802 return s;
803 start = SvPVX(PL_linestr) + startoff;
804 if (!thistoken && realtokenstart >= 0) {
805 char *tstart = SvPVX(PL_linestr) + realtokenstart;
806 thistoken = newSVpvn(tstart, start - tstart);
807 }
808 realtokenstart = -1;
809 if (skipwhite) {
810 if (!nextwhite)
811 nextwhite = newSVpvn("",0);
812 sv_catsv(nextwhite, skipwhite);
813 sv_free(skipwhite);
814 skipwhite = 0;
815 }
816 return s;
817}
818
819STATIC char *
820S_skipspace2(pTHX_ register char *s, SV **svp)
821{
822 char *start = s;
823 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
824 I32 startoff = start - SvPVX(PL_linestr);
825 s = skipspace(s);
826 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
827 if (!PL_madskills || !svp)
828 return s;
829 start = SvPVX(PL_linestr) + startoff;
830 if (!thistoken && realtokenstart >= 0) {
831 char *tstart = SvPVX(PL_linestr) + realtokenstart;
832 thistoken = newSVpvn(tstart, start - tstart);
833 realtokenstart = -1;
834 }
835 if (skipwhite) {
836 if (!*svp)
837 *svp = newSVpvn("",0);
838 sv_setsv(*svp, skipwhite);
839 sv_free(skipwhite);
840 skipwhite = 0;
841 }
842
843 return s;
844}
845#endif
846
ffb4593c
NT
847/*
848 * S_skipspace
849 * Called to gobble the appropriate amount and type of whitespace.
850 * Skips comments as well.
851 */
852
76e3520e 853STATIC char *
cea2e8a9 854S_skipspace(pTHX_ register char *s)
a687059c 855{
97aff369 856 dVAR;
3280af22 857 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 858 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
859 s++;
860 return s;
861 }
862 for (;;) {
fd049845 863 STRLEN prevlen;
09bef843 864 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 865 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
866 while (s < PL_bufend && isSPACE(*s)) {
867 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
868 incline(s);
869 }
ffb4593c
NT
870
871 /* comment */
3280af22
NIS
872 if (s < PL_bufend && *s == '#') {
873 while (s < PL_bufend && *s != '\n')
463ee0b2 874 s++;
60e6418e 875 if (s < PL_bufend) {
463ee0b2 876 s++;
60e6418e
GS
877 if (PL_in_eval && !PL_rsfp) {
878 incline(s);
879 continue;
880 }
881 }
463ee0b2 882 }
ffb4593c
NT
883
884 /* only continue to recharge the buffer if we're at the end
885 * of the buffer, we're not reading from a source filter, and
886 * we're in normal lexing mode
887 */
09bef843
SB
888 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
889 PL_lex_state == LEX_FORMLINE)
463ee0b2 890 return s;
ffb4593c
NT
891
892 /* try to recharge the buffer */
9cbb5ea2 893 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 894 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2
GS
895 {
896 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
897 if (PL_minus_p) {
898 sv_setpv(PL_linestr,
899 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 900 PL_minus_n = PL_minus_p = 0;
a0d0e21e 901 }
01a19ab0
NC
902 else if (PL_minus_n) {
903 sv_setpvn(PL_linestr, ";}", 2);
904 PL_minus_n = 0;
905 }
a0d0e21e 906 else
4147a61b 907 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
908
909 /* reset variables for next time we lex */
9cbb5ea2
GS
910 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
911 = SvPVX(PL_linestr);
3280af22 912 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 913 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
914
915 /* Close the filehandle. Could be from -P preprocessor,
916 * STDIN, or a regular file. If we were reading code from
917 * STDIN (because the commandline held no -e or filename)
918 * then we don't close it, we reset it so the code can
919 * read from STDIN too.
920 */
921
3280af22
NIS
922 if (PL_preprocess && !PL_in_eval)
923 (void)PerlProc_pclose(PL_rsfp);
924 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
925 PerlIO_clearerr(PL_rsfp);
8990e307 926 else
3280af22 927 (void)PerlIO_close(PL_rsfp);
4608196e 928 PL_rsfp = NULL;
463ee0b2
LW
929 return s;
930 }
ffb4593c
NT
931
932 /* not at end of file, so we only read another line */
09bef843
SB
933 /* make corresponding updates to old pointers, for yyerror() */
934 oldprevlen = PL_oldbufptr - PL_bufend;
935 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
936 if (PL_last_uni)
937 oldunilen = PL_last_uni - PL_bufend;
938 if (PL_last_lop)
939 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
940 PL_linestart = PL_bufptr = s + prevlen;
941 PL_bufend = s + SvCUR(PL_linestr);
942 s = PL_bufptr;
09bef843
SB
943 PL_oldbufptr = s + oldprevlen;
944 PL_oldoldbufptr = s + oldoldprevlen;
945 if (PL_last_uni)
946 PL_last_uni = s + oldunilen;
947 if (PL_last_lop)
948 PL_last_lop = s + oldloplen;
a0d0e21e 949 incline(s);
ffb4593c
NT
950
951 /* debugger active and we're not compiling the debugger code,
952 * so store the line into the debugger's array of lines
953 */
3280af22 954 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 955 SV * const sv = newSV(0);
8990e307
LW
956
957 sv_upgrade(sv, SVt_PVMG);
3280af22 958 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 959 (void)SvIOK_on(sv);
45977657 960 SvIV_set(sv, 0);
36c7798d 961 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 962 }
463ee0b2 963 }
a687059c 964}
378cc40b 965
ffb4593c
NT
966/*
967 * S_check_uni
968 * Check the unary operators to ensure there's no ambiguity in how they're
969 * used. An ambiguous piece of code would be:
970 * rand + 5
971 * This doesn't mean rand() + 5. Because rand() is a unary operator,
972 * the +5 is its argument.
973 */
974
76e3520e 975STATIC void
cea2e8a9 976S_check_uni(pTHX)
ba106d47 977{
97aff369 978 dVAR;
2f3197b3 979 char *s;
a0d0e21e 980 char *t;
2f3197b3 981
3280af22 982 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 983 return;
3280af22
NIS
984 while (isSPACE(*PL_last_uni))
985 PL_last_uni++;
7e2040f0 986 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 987 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 988 return;
6136c704
AL
989
990 /* XXX Things like this are just so nasty. We shouldn't be modifying
991 source code, even if we realquick set it back. */
0453d815 992 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 993 const char ch = *s;
0453d815 994 *s = '\0';
9014280d 995 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 996 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
997 PL_last_uni);
998 *s = ch;
999 }
2f3197b3
LW
1000}
1001
ffb4593c
NT
1002/*
1003 * LOP : macro to build a list operator. Its behaviour has been replaced
1004 * with a subroutine, S_lop() for which LOP is just another name.
1005 */
1006
a0d0e21e
LW
1007#define LOP(f,x) return lop(f,x,s)
1008
ffb4593c
NT
1009/*
1010 * S_lop
1011 * Build a list operator (or something that might be one). The rules:
1012 * - if we have a next token, then it's a list operator [why?]
1013 * - if the next thing is an opening paren, then it's a function
1014 * - else it's a list operator
1015 */
1016
76e3520e 1017STATIC I32
a0be28da 1018S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1019{
97aff369 1020 dVAR;
79072805 1021 yylval.ival = f;
35c8bce7 1022 CLINE;
3280af22
NIS
1023 PL_expect = x;
1024 PL_bufptr = s;
1025 PL_last_lop = PL_oldbufptr;
eb160463 1026 PL_last_lop_op = (OPCODE)f;
3280af22 1027 if (PL_nexttoke)
bbf60fe6 1028 return REPORT(LSTOP);
79072805 1029 if (*s == '(')
bbf60fe6 1030 return REPORT(FUNC);
29595ff2 1031 s = PEEKSPACE(s);
79072805 1032 if (*s == '(')
bbf60fe6 1033 return REPORT(FUNC);
79072805 1034 else
bbf60fe6 1035 return REPORT(LSTOP);
79072805
LW
1036}
1037
ffb4593c
NT
1038/*
1039 * S_force_next
9cbb5ea2 1040 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1041 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
1042 * to know what token to return the next time the lexer is called. Caller
1043 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
1044 * handles the token correctly.
ffb4593c
NT
1045 */
1046
4e553d73 1047STATIC void
cea2e8a9 1048S_force_next(pTHX_ I32 type)
79072805 1049{
97aff369 1050 dVAR;
3280af22
NIS
1051 PL_nexttype[PL_nexttoke] = type;
1052 PL_nexttoke++;
1053 if (PL_lex_state != LEX_KNOWNEXT) {
1054 PL_lex_defer = PL_lex_state;
1055 PL_lex_expect = PL_expect;
1056 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
1057 }
1058}
1059
d0a148a6
NC
1060STATIC SV *
1061S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1062{
97aff369 1063 dVAR;
9d4ba2ae 1064 SV * const sv = newSVpvn(start,len);
bfed75c6 1065 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1066 SvUTF8_on(sv);
1067 return sv;
1068}
1069
ffb4593c
NT
1070/*
1071 * S_force_word
1072 * When the lexer knows the next thing is a word (for instance, it has
1073 * just seen -> and it knows that the next char is a word char, then
1074 * it calls S_force_word to stick the next word into the PL_next lookahead.
1075 *
1076 * Arguments:
b1b65b59 1077 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1078 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1079 * int check_keyword : if true, Perl checks to make sure the word isn't
1080 * a keyword (do this if the word is a label, e.g. goto FOO)
1081 * int allow_pack : if true, : characters will also be allowed (require,
1082 * use, etc. do this)
9cbb5ea2 1083 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1084 */
1085
76e3520e 1086STATIC char *
cea2e8a9 1087S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1088{
97aff369 1089 dVAR;
463ee0b2
LW
1090 register char *s;
1091 STRLEN len;
4e553d73 1092
29595ff2 1093 start = SKIPSPACE1(start);
463ee0b2 1094 s = start;
7e2040f0 1095 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1096 (allow_pack && *s == ':') ||
15f0808c 1097 (allow_initial_tick && *s == '\'') )
a0d0e21e 1098 {
3280af22
NIS
1099 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1100 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
1101 return start;
1102 if (token == METHOD) {
29595ff2 1103 s = SKIPSPACE1(s);
463ee0b2 1104 if (*s == '(')
3280af22 1105 PL_expect = XTERM;
463ee0b2 1106 else {
3280af22 1107 PL_expect = XOPERATOR;
463ee0b2 1108 }
79072805 1109 }
d0a148a6
NC
1110 PL_nextval[PL_nexttoke].opval
1111 = (OP*)newSVOP(OP_CONST,0,
1112 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 1113 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
1114 force_next(token);
1115 }
1116 return s;
1117}
1118
ffb4593c
NT
1119/*
1120 * S_force_ident
9cbb5ea2 1121 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1122 * text only contains the "foo" portion. The first argument is a pointer
1123 * to the "foo", and the second argument is the type symbol to prefix.
1124 * Forces the next token to be a "WORD".
9cbb5ea2 1125 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1126 */
1127
76e3520e 1128STATIC void
bfed75c6 1129S_force_ident(pTHX_ register const char *s, int kind)
79072805 1130{
97aff369 1131 dVAR;
79072805 1132 if (s && *s) {
90e5519e
NC
1133 const STRLEN len = strlen(s);
1134 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
3280af22 1135 PL_nextval[PL_nexttoke].opval = o;
79072805 1136 force_next(WORD);
748a9306 1137 if (kind) {
11343788 1138 o->op_private = OPpCONST_ENTERED;
55497cff 1139 /* XXX see note in pp_entereval() for why we forgo typo
1140 warnings if the symbol must be introduced in an eval.
1141 GSAR 96-10-12 */
90e5519e
NC
1142 gv_fetchpvn_flags(s, len,
1143 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1144 : GV_ADD,
1145 kind == '$' ? SVt_PV :
1146 kind == '@' ? SVt_PVAV :
1147 kind == '%' ? SVt_PVHV :
a0d0e21e 1148 SVt_PVGV
90e5519e 1149 );
748a9306 1150 }
79072805
LW
1151 }
1152}
1153
1571675a
GS
1154NV
1155Perl_str_to_version(pTHX_ SV *sv)
1156{
1157 NV retval = 0.0;
1158 NV nshift = 1.0;
1159 STRLEN len;
cfd0369c 1160 const char *start = SvPV_const(sv,len);
9d4ba2ae 1161 const char * const end = start + len;
504618e9 1162 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1163 while (start < end) {
ba210ebe 1164 STRLEN skip;
1571675a
GS
1165 UV n;
1166 if (utf)
9041c2e3 1167 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1168 else {
1169 n = *(U8*)start;
1170 skip = 1;
1171 }
1172 retval += ((NV)n)/nshift;
1173 start += skip;
1174 nshift *= 1000;
1175 }
1176 return retval;
1177}
1178
4e553d73 1179/*
ffb4593c
NT
1180 * S_force_version
1181 * Forces the next token to be a version number.
e759cc13
RGS
1182 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1183 * and if "guessing" is TRUE, then no new token is created (and the caller
1184 * must use an alternative parsing method).
ffb4593c
NT
1185 */
1186
76e3520e 1187STATIC char *
e759cc13 1188S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1189{
97aff369 1190 dVAR;
5f66b61c 1191 OP *version = NULL;
44dcb63b 1192 char *d;
89bfa8cd 1193
29595ff2 1194 s = SKIPSPACE1(s);
89bfa8cd 1195
44dcb63b 1196 d = s;
dd629d5b 1197 if (*d == 'v')
44dcb63b 1198 d++;
44dcb63b 1199 if (isDIGIT(*d)) {
e759cc13
RGS
1200 while (isDIGIT(*d) || *d == '_' || *d == '.')
1201 d++;
9f3d182e 1202 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1203 SV *ver;
b73d6f50 1204 s = scan_num(s, &yylval);
89bfa8cd 1205 version = yylval.opval;
dd629d5b
GS
1206 ver = cSVOPx(version)->op_sv;
1207 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1208 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1209 SvNV_set(ver, str_to_version(ver));
1571675a 1210 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1211 }
89bfa8cd 1212 }
e759cc13
RGS
1213 else if (guessing)
1214 return s;
89bfa8cd 1215 }
1216
1217 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1218 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1219 force_next(WORD);
89bfa8cd 1220
e759cc13 1221 return s;
89bfa8cd 1222}
1223
ffb4593c
NT
1224/*
1225 * S_tokeq
1226 * Tokenize a quoted string passed in as an SV. It finds the next
1227 * chunk, up to end of string or a backslash. It may make a new
1228 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1229 * turns \\ into \.
1230 */
1231
76e3520e 1232STATIC SV *
cea2e8a9 1233S_tokeq(pTHX_ SV *sv)
79072805 1234{
97aff369 1235 dVAR;
79072805
LW
1236 register char *s;
1237 register char *send;
1238 register char *d;
b3ac6de7
IZ
1239 STRLEN len = 0;
1240 SV *pv = sv;
79072805
LW
1241
1242 if (!SvLEN(sv))
b3ac6de7 1243 goto finish;
79072805 1244
a0d0e21e 1245 s = SvPV_force(sv, len);
21a311ee 1246 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1247 goto finish;
463ee0b2 1248 send = s + len;
79072805
LW
1249 while (s < send && *s != '\\')
1250 s++;
1251 if (s == send)
b3ac6de7 1252 goto finish;
79072805 1253 d = s;
be4731d2 1254 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1255 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1256 if (SvUTF8(sv))
1257 SvUTF8_on(pv);
1258 }
79072805
LW
1259 while (s < send) {
1260 if (*s == '\\') {
a0d0e21e 1261 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1262 s++; /* all that, just for this */
1263 }
1264 *d++ = *s++;
1265 }
1266 *d = '\0';
95a20fc0 1267 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1268 finish:
3280af22 1269 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1270 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1271 return sv;
1272}
1273
ffb4593c
NT
1274/*
1275 * Now come three functions related to double-quote context,
1276 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1277 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1278 * interact with PL_lex_state, and create fake ( ... ) argument lists
1279 * to handle functions and concatenation.
1280 * They assume that whoever calls them will be setting up a fake
1281 * join call, because each subthing puts a ',' after it. This lets
1282 * "lower \luPpEr"
1283 * become
1284 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1285 *
1286 * (I'm not sure whether the spurious commas at the end of lcfirst's
1287 * arguments and join's arguments are created or not).
1288 */
1289
1290/*
1291 * S_sublex_start
1292 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1293 *
1294 * Pattern matching will set PL_lex_op to the pattern-matching op to
1295 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1296 *
1297 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1298 *
1299 * Everything else becomes a FUNC.
1300 *
1301 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1302 * had an OP_CONST or OP_READLINE). This just sets us up for a
1303 * call to S_sublex_push().
1304 */
1305
76e3520e 1306STATIC I32
cea2e8a9 1307S_sublex_start(pTHX)
79072805 1308{
97aff369 1309 dVAR;
0d46e09a 1310 register const I32 op_type = yylval.ival;
79072805
LW
1311
1312 if (op_type == OP_NULL) {
3280af22 1313 yylval.opval = PL_lex_op;
5f66b61c 1314 PL_lex_op = NULL;
79072805
LW
1315 return THING;
1316 }
1317 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1318 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1319
1320 if (SvTYPE(sv) == SVt_PVIV) {
1321 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1322 STRLEN len;
96a5add6 1323 const char * const p = SvPV_const(sv, len);
f54cb97a 1324 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1325 if (SvUTF8(sv))
1326 SvUTF8_on(nsv);
b3ac6de7
IZ
1327 SvREFCNT_dec(sv);
1328 sv = nsv;
4e553d73 1329 }
b3ac6de7 1330 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1331 PL_lex_stuff = NULL;
6f33ba73
RGS
1332 /* Allow <FH> // "foo" */
1333 if (op_type == OP_READLINE)
1334 PL_expect = XTERMORDORDOR;
79072805
LW
1335 return THING;
1336 }
1337
3280af22
NIS
1338 PL_sublex_info.super_state = PL_lex_state;
1339 PL_sublex_info.sub_inwhat = op_type;
1340 PL_sublex_info.sub_op = PL_lex_op;
1341 PL_lex_state = LEX_INTERPPUSH;
55497cff 1342
3280af22
NIS
1343 PL_expect = XTERM;
1344 if (PL_lex_op) {
1345 yylval.opval = PL_lex_op;
5f66b61c 1346 PL_lex_op = NULL;
55497cff 1347 return PMFUNC;
1348 }
1349 else
1350 return FUNC;
1351}
1352
ffb4593c
NT
1353/*
1354 * S_sublex_push
1355 * Create a new scope to save the lexing state. The scope will be
1356 * ended in S_sublex_done. Returns a '(', starting the function arguments
1357 * to the uc, lc, etc. found before.
1358 * Sets PL_lex_state to LEX_INTERPCONCAT.
1359 */
1360
76e3520e 1361STATIC I32
cea2e8a9 1362S_sublex_push(pTHX)
55497cff 1363{
27da23d5 1364 dVAR;
f46d017c 1365 ENTER;
55497cff 1366
3280af22
NIS
1367 PL_lex_state = PL_sublex_info.super_state;
1368 SAVEI32(PL_lex_dojoin);
1369 SAVEI32(PL_lex_brackets);
3280af22
NIS
1370 SAVEI32(PL_lex_casemods);
1371 SAVEI32(PL_lex_starts);
1372 SAVEI32(PL_lex_state);
7766f137 1373 SAVEVPTR(PL_lex_inpat);
3280af22 1374 SAVEI32(PL_lex_inwhat);
57843af0 1375 SAVECOPLINE(PL_curcop);
3280af22 1376 SAVEPPTR(PL_bufptr);
8452ff4b 1377 SAVEPPTR(PL_bufend);
3280af22
NIS
1378 SAVEPPTR(PL_oldbufptr);
1379 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1380 SAVEPPTR(PL_last_lop);
1381 SAVEPPTR(PL_last_uni);
3280af22
NIS
1382 SAVEPPTR(PL_linestart);
1383 SAVESPTR(PL_linestr);
8edd5f42
RGS
1384 SAVEGENERICPV(PL_lex_brackstack);
1385 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1386
1387 PL_linestr = PL_lex_stuff;
a0714e2c 1388 PL_lex_stuff = NULL;
3280af22 1389
9cbb5ea2
GS
1390 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1391 = SvPVX(PL_linestr);
3280af22 1392 PL_bufend += SvCUR(PL_linestr);
bd61b366 1393 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1394 SAVEFREESV(PL_linestr);
1395
1396 PL_lex_dojoin = FALSE;
1397 PL_lex_brackets = 0;
a02a5408
JC
1398 Newx(PL_lex_brackstack, 120, char);
1399 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1400 PL_lex_casemods = 0;
1401 *PL_lex_casestack = '\0';
1402 PL_lex_starts = 0;
1403 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1404 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1405
1406 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1407 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1408 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1409 else
5f66b61c 1410 PL_lex_inpat = NULL;
79072805 1411
55497cff 1412 return '(';
79072805
LW
1413}
1414
ffb4593c
NT
1415/*
1416 * S_sublex_done
1417 * Restores lexer state after a S_sublex_push.
1418 */
1419
76e3520e 1420STATIC I32
cea2e8a9 1421S_sublex_done(pTHX)
79072805 1422{
27da23d5 1423 dVAR;
3280af22 1424 if (!PL_lex_starts++) {
396482e1 1425 SV * const sv = newSVpvs("");
9aa983d2
JH
1426 if (SvUTF8(PL_linestr))
1427 SvUTF8_on(sv);
3280af22 1428 PL_expect = XOPERATOR;
9aa983d2 1429 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1430 return THING;
1431 }
1432
3280af22
NIS
1433 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1434 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1435 return yylex();
79072805
LW
1436 }
1437
ffb4593c 1438 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1439 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1440 PL_linestr = PL_lex_repl;
1441 PL_lex_inpat = 0;
1442 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1443 PL_bufend += SvCUR(PL_linestr);
bd61b366 1444 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1445 SAVEFREESV(PL_linestr);
1446 PL_lex_dojoin = FALSE;
1447 PL_lex_brackets = 0;
3280af22
NIS
1448 PL_lex_casemods = 0;
1449 *PL_lex_casestack = '\0';
1450 PL_lex_starts = 0;
25da4f38 1451 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1452 PL_lex_state = LEX_INTERPNORMAL;
1453 PL_lex_starts++;
e9fa98b2
HS
1454 /* we don't clear PL_lex_repl here, so that we can check later
1455 whether this is an evalled subst; that means we rely on the
1456 logic to ensure sublex_done() is called again only via the
1457 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1458 }
e9fa98b2 1459 else {
3280af22 1460 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1461 PL_lex_repl = NULL;
e9fa98b2 1462 }
79072805 1463 return ',';
ffed7fef
LW
1464 }
1465 else {
f46d017c 1466 LEAVE;
3280af22
NIS
1467 PL_bufend = SvPVX(PL_linestr);
1468 PL_bufend += SvCUR(PL_linestr);
1469 PL_expect = XOPERATOR;
09bef843 1470 PL_sublex_info.sub_inwhat = 0;
79072805 1471 return ')';
ffed7fef
LW
1472 }
1473}
1474
02aa26ce
NT
1475/*
1476 scan_const
1477
1478 Extracts a pattern, double-quoted string, or transliteration. This
1479 is terrifying code.
1480
3280af22
NIS
1481 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1482 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1483 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1484
9b599b2a
GS
1485 Returns a pointer to the character scanned up to. Iff this is
1486 advanced from the start pointer supplied (ie if anything was
1487 successfully parsed), will leave an OP for the substring scanned
1488 in yylval. Caller must intuit reason for not parsing further
1489 by looking at the next characters herself.
1490
02aa26ce
NT
1491 In patterns:
1492 backslashes:
1493 double-quoted style: \r and \n
1494 regexp special ones: \D \s
1495 constants: \x3
1496 backrefs: \1 (deprecated in substitution replacements)
1497 case and quoting: \U \Q \E
1498 stops on @ and $, but not for $ as tail anchor
1499
1500 In transliterations:
1501 characters are VERY literal, except for - not at the start or end
1502 of the string, which indicates a range. scan_const expands the
1503 range to the full set of intermediate characters.
1504
1505 In double-quoted strings:
1506 backslashes:
1507 double-quoted style: \r and \n
1508 constants: \x3
1509 backrefs: \1 (deprecated)
1510 case and quoting: \U \Q \E
1511 stops on @ and $
1512
1513 scan_const does *not* construct ops to handle interpolated strings.
1514 It stops processing as soon as it finds an embedded $ or @ variable
1515 and leaves it to the caller to work out what's going on.
1516
da6eedaa 1517 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1518
1519 $ in pattern could be $foo or could be tail anchor. Assumption:
1520 it's a tail anchor if $ is the last thing in the string, or if it's
1521 followed by one of ")| \n\t"
1522
1523 \1 (backreferences) are turned into $1
1524
1525 The structure of the code is
1526 while (there's a character to process) {
1527 handle transliteration ranges
1528 skip regexp comments
1529 skip # initiated comments in //x patterns
1530 check for embedded @foo
1531 check for embedded scalars
1532 if (backslash) {
1533 leave intact backslashes from leave (below)
1534 deprecate \1 in strings and sub replacements
1535 handle string-changing backslashes \l \U \Q \E, etc.
1536 switch (what was escaped) {
1537 handle - in a transliteration (becomes a literal -)
1538 handle \132 octal characters
1539 handle 0x15 hex characters
1540 handle \cV (control V)
1541 handle printf backslashes (\f, \r, \n, etc)
1542 } (end switch)
1543 } (end if backslash)
1544 } (end while character to read)
4e553d73 1545
02aa26ce
NT
1546*/
1547
76e3520e 1548STATIC char *
cea2e8a9 1549S_scan_const(pTHX_ char *start)
79072805 1550{
97aff369 1551 dVAR;
3280af22 1552 register char *send = PL_bufend; /* end of the constant */
561b68a9 1553 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1554 register char *s = start; /* start of the constant */
1555 register char *d = SvPVX(sv); /* destination for copies */
1556 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1557 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1558 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1559 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1560 UV uv;
4c3a8340
TS
1561#ifdef EBCDIC
1562 UV literal_endpoint = 0;
1563#endif
012bcf8d 1564
dff6d3cd 1565 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1566 PL_lex_inpat
b6d5fef8 1567 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1568 : "";
79072805 1569
2b9d42f0
NIS
1570 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1571 /* If we are doing a trans and we know we want UTF8 set expectation */
1572 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1573 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1574 }
1575
1576
79072805 1577 while (s < send || dorange) {
02aa26ce 1578 /* get transliterations out of the way (they're most literal) */
3280af22 1579 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1580 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1581 if (dorange) {
1ba5c669
JH
1582 I32 i; /* current expanded character */
1583 I32 min; /* first character in range */
1584 I32 max; /* last character in range */
02aa26ce 1585
2b9d42f0 1586 if (has_utf8) {
9d4ba2ae 1587 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1588 char *e = d++;
1589 while (e-- > c)
1590 *(e + 1) = *e;
25716404 1591 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1592 /* mark the range as done, and continue */
1593 dorange = FALSE;
1594 didrange = TRUE;
1595 continue;
1596 }
2b9d42f0 1597
95a20fc0 1598 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1599 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1600 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1601 d -= 2; /* eat the first char and the - */
1602
8ada0baa
JH
1603 min = (U8)*d; /* first char in range */
1604 max = (U8)d[1]; /* last char in range */
1605
c2e66d9e 1606 if (min > max) {
01ec43d0 1607 Perl_croak(aTHX_
d1573ac7 1608 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1609 (char)min, (char)max);
c2e66d9e
GS
1610 }
1611
c7f1f016 1612#ifdef EBCDIC
4c3a8340
TS
1613 if (literal_endpoint == 2 &&
1614 ((isLOWER(min) && isLOWER(max)) ||
1615 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1616 if (isLOWER(min)) {
1617 for (i = min; i <= max; i++)
1618 if (isLOWER(i))
db42d148 1619 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1620 } else {
1621 for (i = min; i <= max; i++)
1622 if (isUPPER(i))
db42d148 1623 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1624 }
1625 }
1626 else
1627#endif
1628 for (i = min; i <= max; i++)
eb160463 1629 *d++ = (char)i;
02aa26ce
NT
1630
1631 /* mark the range as done, and continue */
79072805 1632 dorange = FALSE;
01ec43d0 1633 didrange = TRUE;
4c3a8340
TS
1634#ifdef EBCDIC
1635 literal_endpoint = 0;
1636#endif
79072805 1637 continue;
4e553d73 1638 }
02aa26ce
NT
1639
1640 /* range begins (ignore - as first or last char) */
79072805 1641 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1642 if (didrange) {
1fafa243 1643 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1644 }
2b9d42f0 1645 if (has_utf8) {
25716404 1646 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1647 s++;
1648 continue;
1649 }
79072805
LW
1650 dorange = TRUE;
1651 s++;
01ec43d0
GS
1652 }
1653 else {
1654 didrange = FALSE;
4c3a8340
TS
1655#ifdef EBCDIC
1656 literal_endpoint = 0;
1657#endif
01ec43d0 1658 }
79072805 1659 }
02aa26ce
NT
1660
1661 /* if we get here, we're not doing a transliteration */
1662
0f5d15d6
IZ
1663 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1664 except for the last char, which will be done separately. */
3280af22 1665 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1666 if (s[2] == '#') {
e994fd66 1667 while (s+1 < send && *s != ')')
db42d148 1668 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1669 }
1670 else if (s[2] == '{' /* This should match regcomp.c */
1671 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1672 {
cc6b7395 1673 I32 count = 1;
0f5d15d6 1674 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1675 char c;
1676
d9f97599
GS
1677 while (count && (c = *regparse)) {
1678 if (c == '\\' && regparse[1])
1679 regparse++;
4e553d73 1680 else if (c == '{')
cc6b7395 1681 count++;
4e553d73 1682 else if (c == '}')
cc6b7395 1683 count--;
d9f97599 1684 regparse++;
cc6b7395 1685 }
e994fd66 1686 if (*regparse != ')')
5bdf89e7 1687 regparse--; /* Leave one char for continuation. */
0f5d15d6 1688 while (s < regparse)
db42d148 1689 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1690 }
748a9306 1691 }
02aa26ce
NT
1692
1693 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1694 else if (*s == '#' && PL_lex_inpat &&
1695 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1696 while (s+1 < send && *s != '\n')
db42d148 1697 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1698 }
02aa26ce 1699
5d1d4326 1700 /* check for embedded arrays
da6eedaa 1701 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1702 */
7e2040f0 1703 else if (*s == '@' && s[1]
5d1d4326 1704 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1705 break;
02aa26ce
NT
1706
1707 /* check for embedded scalars. only stop if we're sure it's a
1708 variable.
1709 */
79072805 1710 else if (*s == '$') {
3280af22 1711 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1712 break;
6002328a 1713 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1714 break; /* in regexp, $ might be tail anchor */
1715 }
02aa26ce 1716
2b9d42f0
NIS
1717 /* End of else if chain - OP_TRANS rejoin rest */
1718
02aa26ce 1719 /* backslashes */
79072805
LW
1720 if (*s == '\\' && s+1 < send) {
1721 s++;
02aa26ce
NT
1722
1723 /* some backslashes we leave behind */
c9f97d15 1724 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1725 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1726 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1727 continue;
1728 }
02aa26ce
NT
1729
1730 /* deprecate \1 in strings and substitution replacements */
3280af22 1731 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1732 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1733 {
599cee73 1734 if (ckWARN(WARN_SYNTAX))
9014280d 1735 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1736 *--s = '$';
1737 break;
1738 }
02aa26ce
NT
1739
1740 /* string-change backslash escapes */
3280af22 1741 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1742 --s;
1743 break;
1744 }
02aa26ce
NT
1745
1746 /* if we get here, it's either a quoted -, or a digit */
79072805 1747 switch (*s) {
02aa26ce
NT
1748
1749 /* quoted - in transliterations */
79072805 1750 case '-':
3280af22 1751 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1752 *d++ = *s++;
1753 continue;
1754 }
1755 /* FALL THROUGH */
1756 default:
11b8faa4 1757 {
041457d9
DM
1758 if (isALNUM(*s) &&
1759 *s != '_' &&
1760 ckWARN(WARN_MISC))
9014280d 1761 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1762 "Unrecognized escape \\%c passed through",
1763 *s);
1764 /* default action is to copy the quoted character */
f9a63242 1765 goto default_action;
11b8faa4 1766 }
02aa26ce
NT
1767
1768 /* \132 indicates an octal constant */
79072805
LW
1769 case '0': case '1': case '2': case '3':
1770 case '4': case '5': case '6': case '7':
ba210ebe 1771 {
53305cf1
NC
1772 I32 flags = 0;
1773 STRLEN len = 3;
1774 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1775 s += len;
1776 }
012bcf8d 1777 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1778
1779 /* \x24 indicates a hex constant */
79072805 1780 case 'x':
a0ed51b3
LW
1781 ++s;
1782 if (*s == '{') {
9d4ba2ae 1783 char* const e = strchr(s, '}');
a4c04bdc
NC
1784 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1785 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1786 STRLEN len;
355860ce 1787
53305cf1 1788 ++s;
adaeee49 1789 if (!e) {
a0ed51b3 1790 yyerror("Missing right brace on \\x{}");
355860ce 1791 continue;
ba210ebe 1792 }
53305cf1
NC
1793 len = e - s;
1794 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1795 s = e + 1;
a0ed51b3
LW
1796 }
1797 else {
ba210ebe 1798 {
53305cf1 1799 STRLEN len = 2;
a4c04bdc 1800 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1801 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1802 s += len;
1803 }
012bcf8d
GS
1804 }
1805
1806 NUM_ESCAPE_INSERT:
1807 /* Insert oct or hex escaped character.
301d3d20 1808 * There will always enough room in sv since such
db42d148 1809 * escapes will be longer than any UTF-8 sequence
301d3d20 1810 * they can end up as. */
ba7cea30 1811
c7f1f016
NIS
1812 /* We need to map to chars to ASCII before doing the tests
1813 to cover EBCDIC
1814 */
c4d5f83a 1815 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1816 if (!has_utf8 && uv > 255) {
301d3d20
JH
1817 /* Might need to recode whatever we have
1818 * accumulated so far if it contains any
1819 * hibit chars.
1820 *
1821 * (Can't we keep track of that and avoid
1822 * this rescan? --jhi)
012bcf8d 1823 */
c7f1f016 1824 int hicount = 0;
63cd0674
NIS
1825 U8 *c;
1826 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1827 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1828 hicount++;
db42d148 1829 }
012bcf8d 1830 }
63cd0674 1831 if (hicount) {
9d4ba2ae 1832 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1833 U8 *src, *dst;
1834 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1835 src = (U8 *)d - 1;
1836 dst = src+hicount;
1837 d += hicount;
cfd0369c 1838 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1839 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1840 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1841 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1842 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1843 }
1844 else {
63cd0674 1845 *dst-- = *src;
012bcf8d 1846 }
c7f1f016 1847 src--;
012bcf8d
GS
1848 }
1849 }
1850 }
1851
9aa983d2 1852 if (has_utf8 || uv > 255) {
9041c2e3 1853 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1854 has_utf8 = TRUE;
f9a63242
JH
1855 if (PL_lex_inwhat == OP_TRANS &&
1856 PL_sublex_info.sub_op) {
1857 PL_sublex_info.sub_op->op_private |=
1858 (PL_lex_repl ? OPpTRANS_FROM_UTF
1859 : OPpTRANS_TO_UTF);
f9a63242 1860 }
012bcf8d 1861 }
a0ed51b3 1862 else {
012bcf8d 1863 *d++ = (char)uv;
a0ed51b3 1864 }
012bcf8d
GS
1865 }
1866 else {
c4d5f83a 1867 *d++ = (char) uv;
a0ed51b3 1868 }
79072805 1869 continue;
02aa26ce 1870
b239daa5 1871 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1872 case 'N':
55eda711 1873 ++s;
423cee85
JH
1874 if (*s == '{') {
1875 char* e = strchr(s, '}');
155aba94 1876 SV *res;
423cee85 1877 STRLEN len;
cfd0369c 1878 const char *str;
4e553d73 1879
423cee85 1880 if (!e) {
5777a3f7 1881 yyerror("Missing right brace on \\N{}");
423cee85
JH
1882 e = s - 1;
1883 goto cont_scan;
1884 }
dbc0d4f2
JH
1885 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1886 /* \N{U+...} */
1887 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1888 PERL_SCAN_DISALLOW_PREFIX;
1889 s += 3;
1890 len = e - s;
1891 uv = grok_hex(s, &len, &flags, NULL);
1892 s = e + 1;
1893 goto NUM_ESCAPE_INSERT;
1894 }
55eda711 1895 res = newSVpvn(s + 1, e - s - 1);
bd61b366 1896 res = new_constant( NULL, 0, "charnames",
a0714e2c 1897 res, NULL, "\\N{...}" );
f9a63242
JH
1898 if (has_utf8)
1899 sv_utf8_upgrade(res);
cfd0369c 1900 str = SvPV_const(res,len);
1c47067b
JH
1901#ifdef EBCDIC_NEVER_MIND
1902 /* charnames uses pack U and that has been
1903 * recently changed to do the below uni->native
1904 * mapping, so this would be redundant (and wrong,
1905 * the code point would be doubly converted).
1906 * But leave this in just in case the pack U change
1907 * gets revoked, but the semantics is still
1908 * desireable for charnames. --jhi */
cddc7ef4 1909 {
cfd0369c 1910 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1911
1912 if (uv < 0x100) {
89ebb4a3 1913 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1914
1915 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1916 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1917 str = SvPV_const(res, len);
cddc7ef4
JH
1918 }
1919 }
1920#endif
89491803 1921 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1922 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1923 SvCUR_set(sv, d - ostart);
1924 SvPOK_on(sv);
e4f3eed8 1925 *d = '\0';
f08d6ad9 1926 sv_utf8_upgrade(sv);
d2f449dd 1927 /* this just broke our allocation above... */
eb160463 1928 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1929 d = SvPVX(sv) + SvCUR(sv);
89491803 1930 has_utf8 = TRUE;
f08d6ad9 1931 }
eb160463 1932 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1933 const char * const odest = SvPVX_const(sv);
423cee85 1934
8973db79 1935 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1936 d = SvPVX(sv) + (d - odest);
1937 }
1938 Copy(str, d, len, char);
1939 d += len;
1940 SvREFCNT_dec(res);
1941 cont_scan:
1942 s = e + 1;
1943 }
1944 else
5777a3f7 1945 yyerror("Missing braces on \\N{}");
423cee85
JH
1946 continue;
1947
02aa26ce 1948 /* \c is a control character */
79072805
LW
1949 case 'c':
1950 s++;
961ce445 1951 if (s < send) {
ba210ebe 1952 U8 c = *s++;
c7f1f016
NIS
1953#ifdef EBCDIC
1954 if (isLOWER(c))
1955 c = toUPPER(c);
1956#endif
db42d148 1957 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1958 }
961ce445
RGS
1959 else {
1960 yyerror("Missing control char name in \\c");
1961 }
79072805 1962 continue;
02aa26ce
NT
1963
1964 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1965 case 'b':
db42d148 1966 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1967 break;
1968 case 'n':
db42d148 1969 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1970 break;
1971 case 'r':
db42d148 1972 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1973 break;
1974 case 'f':
db42d148 1975 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1976 break;
1977 case 't':
db42d148 1978 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1979 break;
34a3fe2a 1980 case 'e':
db42d148 1981 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1982 break;
1983 case 'a':
db42d148 1984 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1985 break;
02aa26ce
NT
1986 } /* end switch */
1987
79072805
LW
1988 s++;
1989 continue;
02aa26ce 1990 } /* end if (backslash) */
4c3a8340
TS
1991#ifdef EBCDIC
1992 else
1993 literal_endpoint++;
1994#endif
02aa26ce 1995
f9a63242 1996 default_action:
2b9d42f0
NIS
1997 /* If we started with encoded form, or already know we want it
1998 and then encode the next character */
1999 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2000 STRLEN len = 1;
5f66b61c
AL
2001 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2002 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2003 s += len;
2004 if (need > len) {
2005 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2006 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2007 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2008 }
5f66b61c 2009 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2010 has_utf8 = TRUE;
2011 }
2012 else {
2013 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2014 }
02aa26ce
NT
2015 } /* while loop to process each character */
2016
2017 /* terminate the string and set up the sv */
79072805 2018 *d = '\0';
95a20fc0 2019 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2020 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2021 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2022
79072805 2023 SvPOK_on(sv);
9f4817db 2024 if (PL_encoding && !has_utf8) {
d0063567
DK
2025 sv_recode_to_utf8(sv, PL_encoding);
2026 if (SvUTF8(sv))
2027 has_utf8 = TRUE;
9f4817db 2028 }
2b9d42f0 2029 if (has_utf8) {
7e2040f0 2030 SvUTF8_on(sv);
2b9d42f0 2031 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2032 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2033 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2034 }
2035 }
79072805 2036
02aa26ce 2037 /* shrink the sv if we allocated more than we used */
79072805 2038 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2039 SvPV_shrink_to_cur(sv);
79072805 2040 }
02aa26ce 2041
9b599b2a 2042 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2043 if (s > PL_bufptr) {
2044 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2045 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2046 sv, NULL,
4e553d73 2047 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2048 ? "tr"
3280af22 2049 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2050 ? "s"
2051 : "qq")));
79072805 2052 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2053 } else
8990e307 2054 SvREFCNT_dec(sv);
79072805
LW
2055 return s;
2056}
2057
ffb4593c
NT
2058/* S_intuit_more
2059 * Returns TRUE if there's more to the expression (e.g., a subscript),
2060 * FALSE otherwise.
ffb4593c
NT
2061 *
2062 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2063 *
2064 * ->[ and ->{ return TRUE
2065 * { and [ outside a pattern are always subscripts, so return TRUE
2066 * if we're outside a pattern and it's not { or [, then return FALSE
2067 * if we're in a pattern and the first char is a {
2068 * {4,5} (any digits around the comma) returns FALSE
2069 * if we're in a pattern and the first char is a [
2070 * [] returns FALSE
2071 * [SOMETHING] has a funky algorithm to decide whether it's a
2072 * character class or not. It has to deal with things like
2073 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2074 * anything else returns TRUE
2075 */
2076
9cbb5ea2
GS
2077/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2078
76e3520e 2079STATIC int
cea2e8a9 2080S_intuit_more(pTHX_ register char *s)
79072805 2081{
97aff369 2082 dVAR;
3280af22 2083 if (PL_lex_brackets)
79072805
LW
2084 return TRUE;
2085 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2086 return TRUE;
2087 if (*s != '{' && *s != '[')
2088 return FALSE;
3280af22 2089 if (!PL_lex_inpat)
79072805
LW
2090 return TRUE;
2091
2092 /* In a pattern, so maybe we have {n,m}. */
2093 if (*s == '{') {
2094 s++;
2095 if (!isDIGIT(*s))
2096 return TRUE;
2097 while (isDIGIT(*s))
2098 s++;
2099 if (*s == ',')
2100 s++;
2101 while (isDIGIT(*s))
2102 s++;
2103 if (*s == '}')
2104 return FALSE;
2105 return TRUE;
2106
2107 }
2108
2109 /* On the other hand, maybe we have a character class */
2110
2111 s++;
2112 if (*s == ']' || *s == '^')
2113 return FALSE;
2114 else {
ffb4593c 2115 /* this is terrifying, and it works */
79072805
LW
2116 int weight = 2; /* let's weigh the evidence */
2117 char seen[256];
f27ffc4a 2118 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2119 const char * const send = strchr(s,']');
3280af22 2120 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2121
2122 if (!send) /* has to be an expression */
2123 return TRUE;
2124
2125 Zero(seen,256,char);
2126 if (*s == '$')
2127 weight -= 3;
2128 else if (isDIGIT(*s)) {
2129 if (s[1] != ']') {
2130 if (isDIGIT(s[1]) && s[2] == ']')
2131 weight -= 10;
2132 }
2133 else
2134 weight -= 100;
2135 }
2136 for (; s < send; s++) {
2137 last_un_char = un_char;
2138 un_char = (unsigned char)*s;
2139 switch (*s) {
2140 case '@':
2141 case '&':
2142 case '$':
2143 weight -= seen[un_char] * 10;
7e2040f0 2144 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2145 int len;
8903cb82 2146 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2147 len = (int)strlen(tmpbuf);
2148 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2149 weight -= 100;
2150 else
2151 weight -= 10;
2152 }
2153 else if (*s == '$' && s[1] &&
93a17b20
LW
2154 strchr("[#!%*<>()-=",s[1])) {
2155 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2156 weight -= 10;
2157 else
2158 weight -= 1;
2159 }
2160 break;
2161 case '\\':
2162 un_char = 254;
2163 if (s[1]) {
93a17b20 2164 if (strchr("wds]",s[1]))
79072805
LW
2165 weight += 100;
2166 else if (seen['\''] || seen['"'])
2167 weight += 1;
93a17b20 2168 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2169 weight += 40;
2170 else if (isDIGIT(s[1])) {
2171 weight += 40;
2172 while (s[1] && isDIGIT(s[1]))
2173 s++;
2174 }
2175 }
2176 else
2177 weight += 100;
2178 break;
2179 case '-':
2180 if (s[1] == '\\')
2181 weight += 50;
93a17b20 2182 if (strchr("aA01! ",last_un_char))
79072805 2183 weight += 30;
93a17b20 2184 if (strchr("zZ79~",s[1]))
79072805 2185 weight += 30;
f27ffc4a
GS
2186 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2187 weight -= 5; /* cope with negative subscript */
79072805
LW
2188 break;
2189 default:
3792a11b
NC
2190 if (!isALNUM(last_un_char)
2191 && !(last_un_char == '$' || last_un_char == '@'
2192 || last_un_char == '&')
2193 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2194 char *d = tmpbuf;
2195 while (isALPHA(*s))
2196 *d++ = *s++;
2197 *d = '\0';
2198 if (keyword(tmpbuf, d - tmpbuf))
2199 weight -= 150;
2200 }
2201 if (un_char == last_un_char + 1)
2202 weight += 5;
2203 weight -= seen[un_char];
2204 break;
2205 }
2206 seen[un_char]++;
2207 }
2208 if (weight >= 0) /* probably a character class */
2209 return FALSE;
2210 }
2211
2212 return TRUE;
2213}
ffed7fef 2214
ffb4593c
NT
2215/*
2216 * S_intuit_method
2217 *
2218 * Does all the checking to disambiguate
2219 * foo bar
2220 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2221 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2222 *
2223 * First argument is the stuff after the first token, e.g. "bar".
2224 *
2225 * Not a method if bar is a filehandle.
2226 * Not a method if foo is a subroutine prototyped to take a filehandle.
2227 * Not a method if it's really "Foo $bar"
2228 * Method if it's "foo $bar"
2229 * Not a method if it's really "print foo $bar"
2230 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2231 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2232 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2233 * =>
2234 */
2235
76e3520e 2236STATIC int
62d55b22 2237S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2238{
97aff369 2239 dVAR;
a0d0e21e 2240 char *s = start + (*start == '$');
3280af22 2241 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2242 STRLEN len;
2243 GV* indirgv;
2244
2245 if (gv) {
62d55b22 2246 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2247 return 0;
62d55b22
NC
2248 if (cv) {
2249 if (SvPOK(cv)) {
2250 const char *proto = SvPVX_const(cv);
2251 if (proto) {
2252 if (*proto == ';')
2253 proto++;
2254 if (*proto == '*')
2255 return 0;
2256 }
b6c543e3
IZ
2257 }
2258 } else
a0d0e21e
LW
2259 gv = 0;
2260 }
8903cb82 2261 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2262 /* start is the beginning of the possible filehandle/object,
2263 * and s is the end of it
2264 * tmpbuf is a copy of it
2265 */
2266
a0d0e21e 2267 if (*start == '$') {
3280af22 2268 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2269 return 0;
29595ff2 2270 s = PEEKSPACE(s);
3280af22
NIS
2271 PL_bufptr = start;
2272 PL_expect = XREF;
a0d0e21e
LW
2273 return *s == '(' ? FUNCMETH : METHOD;
2274 }
2275 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2276 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2277 len -= 2;
2278 tmpbuf[len] = '\0';
2279 goto bare_package;
2280 }
90e5519e 2281 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2282 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2283 return 0;
2284 /* filehandle or package name makes it a method */
89bfa8cd 2285 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
29595ff2 2286 s = PEEKSPACE(s);
3280af22 2287 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2288 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2289 bare_package:
3280af22 2290 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2291 newSVpvn(tmpbuf,len));
3280af22
NIS
2292 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2293 PL_expect = XTERM;
a0d0e21e 2294 force_next(WORD);
3280af22 2295 PL_bufptr = s;
a0d0e21e
LW
2296 return *s == '(' ? FUNCMETH : METHOD;
2297 }
2298 }
2299 return 0;
2300}
2301
ffb4593c
NT
2302/*
2303 * S_incl_perldb
2304 * Return a string of Perl code to load the debugger. If PERL5DB
2305 * is set, it will return the contents of that, otherwise a
2306 * compile-time require of perl5db.pl.
2307 */
2308
bfed75c6 2309STATIC const char*
cea2e8a9 2310S_incl_perldb(pTHX)
a0d0e21e 2311{
97aff369 2312 dVAR;
3280af22 2313 if (PL_perldb) {
9d4ba2ae 2314 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2315
2316 if (pdb)
2317 return pdb;
93189314 2318 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2319 return "BEGIN { require 'perl5db.pl' }";
2320 }
2321 return "";
2322}
2323
2324
16d20bd9 2325/* Encoded script support. filter_add() effectively inserts a
4e553d73 2326 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2327 * Note that the filter function only applies to the current source file
2328 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2329 *
2330 * The datasv parameter (which may be NULL) can be used to pass
2331 * private data to this instance of the filter. The filter function
2332 * can recover the SV using the FILTER_DATA macro and use it to
2333 * store private buffers and state information.
2334 *
2335 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2336 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2337 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2338 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2339 * private use must be set using malloc'd pointers.
2340 */
16d20bd9
AD
2341
2342SV *
864dbfa3 2343Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2344{
97aff369 2345 dVAR;
f4c556ac 2346 if (!funcp)
a0714e2c 2347 return NULL;
f4c556ac 2348
3280af22
NIS
2349 if (!PL_rsfp_filters)
2350 PL_rsfp_filters = newAV();
16d20bd9 2351 if (!datasv)
561b68a9 2352 datasv = newSV(0);
862a34c6 2353 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2354 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2355 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2356 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2357 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2358 av_unshift(PL_rsfp_filters, 1);
2359 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2360 return(datasv);
2361}
4e553d73 2362
16d20bd9
AD
2363
2364/* Delete most recently added instance of this filter function. */
a0d0e21e 2365void
864dbfa3 2366Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2367{
97aff369 2368 dVAR;
e0c19803 2369 SV *datasv;
24801a4b 2370
33073adb 2371#ifdef DEBUGGING
8141890a 2372 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2373#endif
3280af22 2374 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2375 return;
2376 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2377 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2378 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2379 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2380 IoANY(datasv) = (void *)NULL;
3280af22 2381 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2382
16d20bd9
AD
2383 return;
2384 }
2385 /* we need to search for the correct entry and clear it */
cea2e8a9 2386 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2387}
2388
2389
1de9afcd
RGS
2390/* Invoke the idxth filter function for the current rsfp. */
2391/* maxlen 0 = read one text line */
16d20bd9 2392I32
864dbfa3 2393Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2394{
97aff369 2395 dVAR;
16d20bd9
AD
2396 filter_t funcp;
2397 SV *datasv = NULL;
e50aee73 2398
3280af22 2399 if (!PL_rsfp_filters)
16d20bd9 2400 return -1;
1de9afcd 2401 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2402 /* Provide a default input filter to make life easy. */
2403 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2404 DEBUG_P(PerlIO_printf(Perl_debug_log,
2405 "filter_read %d: from rsfp\n", idx));
4e553d73 2406 if (maxlen) {
16d20bd9
AD
2407 /* Want a block */
2408 int len ;
f54cb97a 2409 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2410
2411 /* ensure buf_sv is large enough */
eb160463 2412 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2413 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2414 if (PerlIO_error(PL_rsfp))
37120919
AD
2415 return -1; /* error */
2416 else
2417 return 0 ; /* end of file */
2418 }
16d20bd9
AD
2419 SvCUR_set(buf_sv, old_len + len) ;
2420 } else {
2421 /* Want a line */
3280af22
NIS
2422 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2423 if (PerlIO_error(PL_rsfp))
37120919
AD
2424 return -1; /* error */
2425 else
2426 return 0 ; /* end of file */
2427 }
16d20bd9
AD
2428 }
2429 return SvCUR(buf_sv);
2430 }
2431 /* Skip this filter slot if filter has been deleted */
1de9afcd 2432 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2433 DEBUG_P(PerlIO_printf(Perl_debug_log,
2434 "filter_read %d: skipped (filter deleted)\n",
2435 idx));
16d20bd9
AD
2436 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2437 }
2438 /* Get function pointer hidden within datasv */
8141890a 2439 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2440 DEBUG_P(PerlIO_printf(Perl_debug_log,
2441 "filter_read %d: via function %p (%s)\n",
cfd0369c 2442 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2443 /* Call function. The function is expected to */
2444 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2445 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2446 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2447}
2448
76e3520e 2449STATIC char *
cea2e8a9 2450S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2451{
97aff369 2452 dVAR;
c39cd008 2453#ifdef PERL_CR_FILTER
3280af22 2454 if (!PL_rsfp_filters) {
c39cd008 2455 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2456 }
2457#endif
3280af22 2458 if (PL_rsfp_filters) {
55497cff 2459 if (!append)
2460 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2461 if (FILTER_READ(0, sv, 0) > 0)
2462 return ( SvPVX(sv) ) ;
2463 else
bd61b366 2464 return NULL ;
16d20bd9 2465 }
9d116dd7 2466 else
fd049845 2467 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2468}
2469
01ec43d0 2470STATIC HV *
7fc63493 2471S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2472{
97aff369 2473 dVAR;
def3634b
GS
2474 GV *gv;
2475
01ec43d0 2476 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2477 return PL_curstash;
2478
2479 if (len > 2 &&
2480 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2481 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2482 {
2483 return GvHV(gv); /* Foo:: */
def3634b
GS
2484 }
2485
2486 /* use constant CLASS => 'MyClass' */
90e5519e 2487 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2488 SV *sv;
2489 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2490 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2491 }
2492 }
2493
2494 return gv_stashpv(pkgname, FALSE);
2495}
a0d0e21e 2496
468aa647 2497STATIC char *
cc6ed77d 2498S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2499 dVAR;
468aa647
RGS
2500 if (PL_expect != XSTATE)
2501 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2502 is_use ? "use" : "no"));
29595ff2 2503 s = SKIPSPACE1(s);
468aa647
RGS
2504 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2505 s = force_version(s, TRUE);
29595ff2 2506 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
5f66b61c 2507 PL_nextval[PL_nexttoke].opval = NULL;
468aa647
RGS
2508 force_next(WORD);
2509 }
2510 else if (*s == 'v') {
2511 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2512 s = force_version(s, FALSE);
2513 }
2514 }
2515 else {
2516 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2517 s = force_version(s, FALSE);
2518 }
2519 yylval.ival = is_use;
2520 return s;
2521}
748a9306 2522#ifdef DEBUGGING
27da23d5 2523 static const char* const exp_name[] =
09bef843 2524 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2525 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2526 };
748a9306 2527#endif
463ee0b2 2528
02aa26ce
NT
2529/*
2530 yylex
2531
2532 Works out what to call the token just pulled out of the input
2533 stream. The yacc parser takes care of taking the ops we return and
2534 stitching them into a tree.
2535
2536 Returns:
2537 PRIVATEREF
2538
2539 Structure:
2540 if read an identifier
2541 if we're in a my declaration
2542 croak if they tried to say my($foo::bar)
2543 build the ops for a my() declaration
2544 if it's an access to a my() variable
2545 are we in a sort block?
2546 croak if my($a); $a <=> $b
2547 build ops for access to a my() variable
2548 if in a dq string, and they've said @foo and we can't find @foo
2549 croak
2550 build ops for a bareword
2551 if we already built the token before, use it.
2552*/
2553
20141f0e 2554
dba4d153
JH
2555#ifdef __SC__
2556#pragma segment Perl_yylex
2557#endif
dba4d153 2558int
dba4d153 2559Perl_yylex(pTHX)
20141f0e 2560{
97aff369 2561 dVAR;
3afc138a 2562 register char *s = PL_bufptr;
378cc40b 2563 register char *d;
463ee0b2 2564 STRLEN len;
aa7440fb 2565 bool bof = FALSE;
a687059c 2566
bbf60fe6 2567 DEBUG_T( {
396482e1 2568 SV* tmp = newSVpvs("");
b6007c36
DM
2569 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2570 (IV)CopLINE(PL_curcop),
2571 lex_state_names[PL_lex_state],
2572 exp_name[PL_expect],
2573 pv_display(tmp, s, strlen(s), 0, 60));
2574 SvREFCNT_dec(tmp);
bbf60fe6 2575 } );
02aa26ce 2576 /* check if there's an identifier for us to look at */
ba979b31 2577 if (PL_pending_ident)
bbf60fe6 2578 return REPORT(S_pending_ident(aTHX));
bbce6d69 2579
02aa26ce
NT
2580 /* no identifier pending identification */
2581
3280af22 2582 switch (PL_lex_state) {
79072805
LW
2583#ifdef COMMENTARY
2584 case LEX_NORMAL: /* Some compilers will produce faster */
2585 case LEX_INTERPNORMAL: /* code if we comment these out. */
2586 break;
2587#endif
2588
09bef843 2589 /* when we've already built the next token, just pull it out of the queue */
79072805 2590 case LEX_KNOWNEXT:
3280af22
NIS
2591 PL_nexttoke--;
2592 yylval = PL_nextval[PL_nexttoke];
2593 if (!PL_nexttoke) {
2594 PL_lex_state = PL_lex_defer;
2595 PL_expect = PL_lex_expect;
2596 PL_lex_defer = LEX_NORMAL;
463ee0b2 2597 }
bbf60fe6 2598 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2599
02aa26ce 2600 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2601 when we get here, PL_bufptr is at the \
02aa26ce 2602 */
79072805
LW
2603 case LEX_INTERPCASEMOD:
2604#ifdef DEBUGGING
3280af22 2605 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2606 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2607#endif
02aa26ce 2608 /* handle \E or end of string */
3280af22 2609 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2610 /* if at a \E */
3280af22 2611 if (PL_lex_casemods) {
f54cb97a 2612 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2613 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2614
3792a11b
NC
2615 if (PL_bufptr != PL_bufend
2616 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2617 PL_bufptr += 2;
2618 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2619 }
bbf60fe6 2620 return REPORT(')');
79072805 2621 }
3280af22
NIS
2622 if (PL_bufptr != PL_bufend)
2623 PL_bufptr += 2;
2624 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2625 return yylex();
79072805
LW
2626 }
2627 else {
607df283 2628 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2629 "### Saw case modifier\n"); });
3280af22 2630 s = PL_bufptr + 1;
6e909404
JH
2631 if (s[1] == '\\' && s[2] == 'E') {
2632 PL_bufptr = s + 3;
2633 PL_lex_state = LEX_INTERPCONCAT;
2634 return yylex();
a0d0e21e 2635 }
6e909404 2636 else {
90771dc0 2637 I32 tmp;
6e909404
JH
2638 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2639 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2640 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2641 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2642 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2643 return REPORT(')');
6e909404
JH
2644 }
2645 if (PL_lex_casemods > 10)
2646 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2647 PL_lex_casestack[PL_lex_casemods++] = *s;
2648 PL_lex_casestack[PL_lex_casemods] = '\0';
2649 PL_lex_state = LEX_INTERPCONCAT;
2650 PL_nextval[PL_nexttoke].ival = 0;
2651 force_next('(');
2652 if (*s == 'l')
2653 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2654 else if (*s == 'u')
2655 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2656 else if (*s == 'L')
2657 PL_nextval[PL_nexttoke].ival = OP_LC;
2658 else if (*s == 'U')
2659 PL_nextval[PL_nexttoke].ival = OP_UC;
2660 else if (*s == 'Q')
2661 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2662 else
2663 Perl_croak(aTHX_ "panic: yylex");
2664 PL_bufptr = s + 1;
a0d0e21e 2665 }
79072805 2666 force_next(FUNC);
3280af22
NIS
2667 if (PL_lex_starts) {
2668 s = PL_bufptr;
2669 PL_lex_starts = 0;
131b3ad0
DM
2670 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2671 if (PL_lex_casemods == 1 && PL_lex_inpat)
2672 OPERATOR(',');
2673 else
2674 Aop(OP_CONCAT);
79072805
LW
2675 }
2676 else
cea2e8a9 2677 return yylex();
79072805
LW
2678 }
2679
55497cff 2680 case LEX_INTERPPUSH:
bbf60fe6 2681 return REPORT(sublex_push());
55497cff 2682
79072805 2683 case LEX_INTERPSTART:
3280af22 2684 if (PL_bufptr == PL_bufend)
bbf60fe6 2685 return REPORT(sublex_done());
607df283 2686 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2687 "### Interpolated variable\n"); });
3280af22
NIS
2688 PL_expect = XTERM;
2689 PL_lex_dojoin = (*PL_bufptr == '@');
2690 PL_lex_state = LEX_INTERPNORMAL;
2691 if (PL_lex_dojoin) {
2692 PL_nextval[PL_nexttoke].ival = 0;
79072805 2693 force_next(',');
a0d0e21e 2694 force_ident("\"", '$');
3280af22 2695 PL_nextval[PL_nexttoke].ival = 0;
79072805 2696 force_next('$');
3280af22 2697 PL_nextval[PL_nexttoke].ival = 0;
79072805 2698 force_next('(');
3280af22 2699 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2700 force_next(FUNC);
2701 }
3280af22
NIS
2702 if (PL_lex_starts++) {
2703 s = PL_bufptr;
131b3ad0
DM
2704 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2705 if (!PL_lex_casemods && PL_lex_inpat)
2706 OPERATOR(',');
2707 else
2708 Aop(OP_CONCAT);
79072805 2709 }
cea2e8a9 2710 return yylex();
79072805
LW
2711
2712 case LEX_INTERPENDMAYBE:
3280af22
NIS
2713 if (intuit_more(PL_bufptr)) {
2714 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2715 break;
2716 }
2717 /* FALL THROUGH */
2718
2719 case LEX_INTERPEND:
3280af22
NIS
2720 if (PL_lex_dojoin) {
2721 PL_lex_dojoin = FALSE;
2722 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2723 return REPORT(')');
79072805 2724 }
43a16006 2725 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2726 && SvEVALED(PL_lex_repl))
43a16006 2727 {
e9fa98b2 2728 if (PL_bufptr != PL_bufend)
cea2e8a9 2729 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 2730 PL_lex_repl = NULL;
e9fa98b2 2731 }
79072805
LW
2732 /* FALLTHROUGH */
2733 case LEX_INTERPCONCAT:
2734#ifdef DEBUGGING
3280af22 2735 if (PL_lex_brackets)
cea2e8a9 2736 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2737#endif
3280af22 2738 if (PL_bufptr == PL_bufend)
bbf60fe6 2739 return REPORT(sublex_done());
79072805 2740
3280af22
NIS
2741 if (SvIVX(PL_linestr) == '\'') {
2742 SV *sv = newSVsv(PL_linestr);
2743 if (!PL_lex_inpat)
76e3520e 2744 sv = tokeq(sv);
3280af22 2745 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2746 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2747 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2748 s = PL_bufend;
79072805
LW
2749 }
2750 else {
3280af22 2751 s = scan_const(PL_bufptr);
79072805 2752 if (*s == '\\')
3280af22 2753 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2754 else
3280af22 2755 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2756 }
2757
3280af22
NIS
2758 if (s != PL_bufptr) {
2759 PL_nextval[PL_nexttoke] = yylval;
2760 PL_expect = XTERM;
79072805 2761 force_next(THING);
131b3ad0
DM
2762 if (PL_lex_starts++) {
2763 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2764 if (!PL_lex_casemods && PL_lex_inpat)
2765 OPERATOR(',');
2766 else
2767 Aop(OP_CONCAT);
2768 }
79072805 2769 else {
3280af22 2770 PL_bufptr = s;
cea2e8a9 2771 return yylex();
79072805
LW
2772 }
2773 }
2774
cea2e8a9 2775 return yylex();
a0d0e21e 2776 case LEX_FORMLINE:
3280af22
NIS
2777 PL_lex_state = LEX_NORMAL;
2778 s = scan_formline(PL_bufptr);
2779 if (!PL_lex_formbrack)
a0d0e21e
LW
2780 goto rightbracket;
2781 OPERATOR(';');
79072805
LW
2782 }
2783
3280af22
NIS
2784 s = PL_bufptr;
2785 PL_oldoldbufptr = PL_oldbufptr;
2786 PL_oldbufptr = s;
463ee0b2
LW
2787
2788 retry:
378cc40b
LW
2789 switch (*s) {
2790 default:
7e2040f0 2791 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2792 goto keylookup;
cea2e8a9 2793 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2794 case 4:
2795 case 26:
2796 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2797 case 0:
3280af22
NIS
2798 if (!PL_rsfp) {
2799 PL_last_uni = 0;
2800 PL_last_lop = 0;
c5ee2135 2801 if (PL_lex_brackets) {
0bd48802
AL
2802 yyerror(PL_lex_formbrack
2803 ? "Format not terminated"
2804 : "Missing right curly or square bracket");
c5ee2135 2805 }
4e553d73 2806 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2807 "### Tokener got EOF\n");
5f80b19c 2808 } );
79072805 2809 TOKEN(0);
463ee0b2 2810 }
3280af22 2811 if (s++ < PL_bufend)
a687059c 2812 goto retry; /* ignore stray nulls */
3280af22
NIS
2813 PL_last_uni = 0;
2814 PL_last_lop = 0;
2815 if (!PL_in_eval && !PL_preambled) {
2816 PL_preambled = TRUE;
2817 sv_setpv(PL_linestr,incl_perldb());
2818 if (SvCUR(PL_linestr))
396482e1 2819 sv_catpvs(PL_linestr,";");
3280af22
NIS
2820 if (PL_preambleav){
2821 while(AvFILLp(PL_preambleav) >= 0) {
2822 SV *tmpsv = av_shift(PL_preambleav);
2823 sv_catsv(PL_linestr, tmpsv);
396482e1 2824 sv_catpvs(PL_linestr, ";");
91b7def8 2825 sv_free(tmpsv);
2826 }
3280af22
NIS
2827 sv_free((SV*)PL_preambleav);
2828 PL_preambleav = NULL;
91b7def8 2829 }
3280af22 2830 if (PL_minus_n || PL_minus_p) {
396482e1 2831 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2832 if (PL_minus_l)
396482e1 2833 sv_catpvs(PL_linestr,"chomp;");
3280af22 2834 if (PL_minus_a) {
3280af22 2835 if (PL_minus_F) {
3792a11b
NC
2836 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2837 || *PL_splitstr == '"')
3280af22 2838 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2839 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2840 else {
c8ef6a4b
NC
2841 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2842 bytes can be used as quoting characters. :-) */
dd374669 2843 const char *splits = PL_splitstr;
91d456ae 2844 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2845 do {
2846 /* Need to \ \s */
dd374669
AL
2847 if (*splits == '\\')
2848 sv_catpvn(PL_linestr, splits, 1);
2849 sv_catpvn(PL_linestr, splits, 1);
2850 } while (*splits++);
48c4c863
NC
2851 /* This loop will embed the trailing NUL of
2852 PL_linestr as the last thing it does before
2853 terminating. */
396482e1 2854 sv_catpvs(PL_linestr, ");");
54310121 2855 }
2304df62
AD
2856 }
2857 else
396482e1 2858 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2859 }
79072805 2860 }
bc9b29db 2861 if (PL_minus_E)
396482e1
GA
2862 sv_catpvs(PL_linestr,"use feature ':5.10';");
2863 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2864 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2865 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2866 PL_last_lop = PL_last_uni = NULL;
3280af22 2867 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2868 SV * const sv = newSV(0);
a0d0e21e
LW
2869
2870 sv_upgrade(sv, SVt_PVMG);
3280af22 2871 sv_setsv(sv,PL_linestr);
0ac0412a 2872 (void)SvIOK_on(sv);
45977657 2873 SvIV_set(sv, 0);
36c7798d 2874 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2875 }
79072805 2876 goto retry;
a687059c 2877 }
e929a76b 2878 do {
aa7440fb 2879 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 2880 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af
JH
2881 fake_eof:
2882 if (PL_rsfp) {
2883 if (PL_preprocess && !PL_in_eval)
2884 (void)PerlProc_pclose(PL_rsfp);
2885 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2886 PerlIO_clearerr(PL_rsfp);
2887 else
2888 (void)PerlIO_close(PL_rsfp);
4608196e 2889 PL_rsfp = NULL;
7e28d3af
JH
2890 PL_doextract = FALSE;
2891 }
2892 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2893 sv_setpv(PL_linestr,PL_minus_p
2894 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2895 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2896 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2897 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
2898 PL_minus_n = PL_minus_p = 0;
2899 goto retry;
2900 }
2901 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 2902 PL_last_lop = PL_last_uni = NULL;
c69006e4 2903 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2904 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2905 }
7aa207d6
JH
2906 /* If it looks like the start of a BOM or raw UTF-16,
2907 * check if it in fact is. */
2908 else if (bof &&
2909 (*s == 0 ||
2910 *(U8*)s == 0xEF ||
2911 *(U8*)s >= 0xFE ||
2912 s[1] == 0)) {
226017aa 2913#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2914# ifdef __GNU_LIBRARY__
2915# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2916# define FTELL_FOR_PIPE_IS_BROKEN
2917# endif
e3f494f1
JH
2918# else
2919# ifdef __GLIBC__
2920# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2921# define FTELL_FOR_PIPE_IS_BROKEN
2922# endif
2923# endif
226017aa
DD
2924# endif
2925#endif
2926#ifdef FTELL_FOR_PIPE_IS_BROKEN
2927 /* This loses the possibility to detect the bof
2928 * situation on perl -P when the libc5 is being used.
2929 * Workaround? Maybe attach some extra state to PL_rsfp?
2930 */
2931 if (!PL_preprocess)
7e28d3af 2932 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2933#else
eb160463 2934 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2935#endif
7e28d3af 2936 if (bof) {
3280af22 2937 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2938 s = swallow_bom((U8*)s);
e929a76b 2939 }
378cc40b 2940 }
3280af22 2941 if (PL_doextract) {
a0d0e21e
LW
2942 /* Incest with pod. */
2943 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2944 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2945 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2946 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2947 PL_last_lop = PL_last_uni = NULL;
3280af22 2948 PL_doextract = FALSE;
a0d0e21e 2949 }
4e553d73 2950 }
463ee0b2 2951 incline(s);
3280af22
NIS
2952 } while (PL_doextract);
2953 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2954 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2955 SV * const sv = newSV(0);
a687059c 2956
93a17b20 2957 sv_upgrade(sv, SVt_PVMG);
3280af22 2958 sv_setsv(sv,PL_linestr);
0ac0412a 2959 (void)SvIOK_on(sv);
45977657 2960 SvIV_set(sv, 0);
36c7798d 2961 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2962 }
3280af22 2963 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2964 PL_last_lop = PL_last_uni = NULL;
57843af0 2965 if (CopLINE(PL_curcop) == 1) {
3280af22 2966 while (s < PL_bufend && isSPACE(*s))
79072805 2967 s++;
a0d0e21e 2968 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2969 s++;
bd61b366 2970 d = NULL;
3280af22 2971 if (!PL_in_eval) {
44a8e56a 2972 if (*s == '#' && *(s+1) == '!')
2973 d = s + 2;
2974#ifdef ALTERNATE_SHEBANG
2975 else {
bfed75c6 2976 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 2977 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2978 d = s + (sizeof(as) - 1);
2979 }
2980#endif /* ALTERNATE_SHEBANG */
2981 }
2982 if (d) {
b8378b72 2983 char *ipath;
774d564b 2984 char *ipathend;
b8378b72 2985
774d564b 2986 while (isSPACE(*d))
b8378b72
CS
2987 d++;
2988 ipath = d;
774d564b 2989 while (*d && !isSPACE(*d))
2990 d++;
2991 ipathend = d;
2992
2993#ifdef ARG_ZERO_IS_SCRIPT
2994 if (ipathend > ipath) {
2995 /*
2996 * HP-UX (at least) sets argv[0] to the script name,
2997 * which makes $^X incorrect. And Digital UNIX and Linux,
2998 * at least, set argv[0] to the basename of the Perl
2999 * interpreter. So, having found "#!", we'll set it right.
3000 */
fafc274c
NC
3001 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3002 SVt_PV)); /* $^X */
774d564b 3003 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 3004 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 3005 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 3006 SvSETMAGIC(x);
3007 }
556c1dec
JH
3008 else {
3009 STRLEN blen;
3010 STRLEN llen;
cfd0369c 3011 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 3012 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
3013 if (llen < blen) {
3014 bstart += blen - llen;
3015 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3016 sv_setpvn(x, ipath, ipathend - ipath);
3017 SvSETMAGIC(x);
3018 }
3019 }
3020 }
774d564b 3021 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 3022 }
774d564b 3023#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
3024
3025 /*
3026 * Look for options.
3027 */
748a9306 3028 d = instr(s,"perl -");
84e30d1a 3029 if (!d) {
748a9306 3030 d = instr(s,"perl");
84e30d1a
GS
3031#if defined(DOSISH)
3032 /* avoid getting into infinite loops when shebang
3033 * line contains "Perl" rather than "perl" */
3034 if (!d) {
3035 for (d = ipathend-4; d >= ipath; --d) {
3036 if ((*d == 'p' || *d == 'P')
3037 && !ibcmp(d, "perl", 4))
3038 {
3039 break;
3040 }
3041 }
3042 if (d < ipath)
bd61b366 3043 d = NULL;
84e30d1a
GS
3044 }
3045#endif
3046 }
44a8e56a 3047#ifdef ALTERNATE_SHEBANG
3048 /*
3049 * If the ALTERNATE_SHEBANG on this system starts with a
3050 * character that can be part of a Perl expression, then if
3051 * we see it but not "perl", we're probably looking at the
3052 * start of Perl code, not a request to hand off to some
3053 * other interpreter. Similarly, if "perl" is there, but
3054 * not in the first 'word' of the line, we assume the line
3055 * contains the start of the Perl program.
44a8e56a 3056 */
3057 if (d && *s != '#') {
f54cb97a 3058 const char *c = ipath;
44a8e56a 3059 while (*c && !strchr("; \t\r\n\f\v#", *c))
3060 c++;
3061 if (c < d)
bd61b366 3062 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 3063 else
3064 *s = '#'; /* Don't try to parse shebang line */
3065 }
774d564b 3066#endif /* ALTERNATE_SHEBANG */
bf4acbe4 3067#ifndef MACOS_TRADITIONAL
748a9306 3068 if (!d &&
44a8e56a 3069 *s == '#' &&
774d564b 3070 ipathend > ipath &&
3280af22 3071 !PL_minus_c &&
748a9306 3072 !instr(s,"indir") &&
3280af22 3073 instr(PL_origargv[0],"perl"))
748a9306 3074 {
27da23d5 3075 dVAR;
9f68db38 3076 char **newargv;
9f68db38 3077
774d564b 3078 *ipathend = '\0';
3079 s = ipathend + 1;
3280af22 3080 while (s < PL_bufend && isSPACE(*s))
9f68db38 3081 s++;
3280af22 3082 if (s < PL_bufend) {
a02a5408 3083 Newxz(newargv,PL_origargc+3,char*);
9f68db38 3084 newargv[1] = s;
3280af22 3085 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
3086 s++;
3087 *s = '\0';
3280af22 3088 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
3089 }
3090 else
3280af22 3091 newargv = PL_origargv;
774d564b 3092 newargv[0] = ipath;
b35112e7 3093 PERL_FPU_PRE_EXEC
b4748376 3094 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 3095 PERL_FPU_POST_EXEC
cea2e8a9 3096 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 3097 }
bf4acbe4 3098#endif
748a9306 3099 if (d) {
748a9306 3100 while (*d && !isSPACE(*d)) d++;
bf4acbe4 3101 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
3102
3103 if (*d++ == '-') {
f54cb97a 3104 const bool switches_done = PL_doswitches;
fb993905
GA
3105 const U32 oldpdb = PL_perldb;
3106 const bool oldn = PL_minus_n;
3107 const bool oldp = PL_minus_p;
3108
8cc95fdb 3109 do {
3ffe3ee4 3110 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 3111 const char * const m = d;
8cc95fdb 3112 while (*d && !isSPACE(*d)) d++;
cea2e8a9 3113 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 3114 (int)(d - m), m);
3115 }
97bd5664 3116 d = moreswitches(d);
8cc95fdb 3117 } while (d);
f0b2cf55
YST
3118 if (PL_doswitches && !switches_done) {
3119 int argc = PL_origargc;
3120 char **argv = PL_origargv;
3121 do {
3122 argc--,argv++;
3123 } while (argc && argv[0][0] == '-' && argv[0][1]);
3124 init_argv_symbols(argc,argv);
3125 }
155aba94
GS
3126 if ((PERLDB_LINE && !oldpdb) ||
3127 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 3128 /* if we have already added "LINE: while (<>) {",
3129 we must not do it again */
748a9306 3130 {
c69006e4 3131 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3132 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3133 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3134 PL_last_lop = PL_last_uni = NULL;
3280af22 3135 PL_preambled = FALSE;
84902520 3136 if (PERLDB_LINE)
3280af22 3137 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3138 goto retry;
3139 }
a0d0e21e 3140 }
79072805 3141 }
9f68db38 3142 }
79072805 3143 }
3280af22
NIS
3144 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3145 PL_bufptr = s;
3146 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3147 return yylex();
ae986130 3148 }
378cc40b 3149 goto retry;
4fdae800 3150 case '\r':
6a27c188 3151#ifdef PERL_STRICT_CR
cea2e8a9 3152 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3153 Perl_croak(aTHX_
cc507455 3154 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3155#endif
4fdae800 3156 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3157#ifdef MACOS_TRADITIONAL
3158 case '\312':
3159#endif
378cc40b
LW
3160 s++;
3161 goto retry;
378cc40b 3162 case '#':
e929a76b 3163 case '\n':
3280af22 3164 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3165 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3166 /* handle eval qq[#line 1 "foo"\n ...] */
3167 CopLINE_dec(PL_curcop);
3168 incline(s);
3169 }
3280af22 3170 d = PL_bufend;
a687059c 3171 while (s < d && *s != '\n')
378cc40b 3172 s++;
0f85fab0 3173 if (s < d)
378cc40b 3174 s++;
78c267c1 3175 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3176 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3177 incline(s);
3280af22
NIS
3178 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3179 PL_bufptr = s;
3180 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3181 return yylex();
a687059c 3182 }
378cc40b 3183 }
a687059c 3184 else {
378cc40b 3185 *s = '\0';
3280af22 3186 PL_bufend = s;
a687059c 3187 }
378cc40b
LW
3188 goto retry;
3189 case '-':
79072805 3190 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3191 I32 ftst = 0;
90771dc0 3192 char tmp;
e5edeb50 3193
378cc40b 3194 s++;
3280af22 3195 PL_bufptr = s;
748a9306
LW
3196 tmp = *s++;
3197
bf4acbe4 3198 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3199 s++;
3200
3201 if (strnEQ(s,"=>",2)) {
3280af22 3202 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3203 DEBUG_T( { S_printbuf(aTHX_
3204 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3205 } );
748a9306
LW
3206 OPERATOR('-'); /* unary minus */
3207 }
3280af22 3208 PL_last_uni = PL_oldbufptr;
748a9306 3209 switch (tmp) {
e5edeb50
JH
3210 case 'r': ftst = OP_FTEREAD; break;
3211 case 'w': ftst = OP_FTEWRITE; break;
3212 case 'x': ftst = OP_FTEEXEC; break;
3213 case 'o': ftst = OP_FTEOWNED; break;
3214 case 'R': ftst = OP_FTRREAD; break;
3215 case 'W': ftst = OP_FTRWRITE; break;
3216 case 'X': ftst = OP_FTREXEC; break;
3217 case 'O': ftst = OP_FTROWNED; break;
3218 case 'e': ftst = OP_FTIS; break;
3219 case 'z': ftst = OP_FTZERO; break;
3220 case 's': ftst = OP_FTSIZE; break;
3221 case 'f': ftst = OP_FTFILE; break;
3222 case 'd': ftst = OP_FTDIR; break;
3223 case 'l': ftst = OP_FTLINK; break;
3224 case 'p': ftst = OP_FTPIPE; break;
3225 case 'S': ftst = OP_FTSOCK; break;
3226 case 'u': ftst = OP_FTSUID; break;
3227 case 'g': ftst = OP_FTSGID; break;
3228 case 'k': ftst = OP_FTSVTX; break;
3229 case 'b': ftst = OP_FTBLK; break;
3230 case 'c': ftst = OP_FTCHR; break;
3231 case 't': ftst = OP_FTTTY; break;
3232 case 'T': ftst = OP_FTTEXT; break;
3233 case 'B': ftst = OP_FTBINARY; break;
3234 case 'M': case 'A': case 'C':
fafc274c 3235 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
3236 switch (tmp) {
3237 case 'M': ftst = OP_FTMTIME; break;
3238 case 'A': ftst = OP_FTATIME; break;
3239 case 'C': ftst = OP_FTCTIME; break;
3240 default: break;
3241 }
3242 break;
378cc40b 3243 default:
378cc40b
LW
3244 break;
3245 }
e5edeb50 3246 if (ftst) {
eb160463 3247 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3248 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3249 "### Saw file test %c\n", (int)tmp);
5f80b19c 3250 } );
e5edeb50
JH
3251 FTST(ftst);
3252 }
3253 else {
3254 /* Assume it was a minus followed by a one-letter named
3255 * subroutine call (or a -bareword), then. */
95c31fe3 3256 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3257 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3258 (int) tmp);
5f80b19c 3259 } );
3cf7b4c4 3260 s = --PL_bufptr;
e5edeb50 3261 }
378cc40b 3262 }
90771dc0
NC
3263 {
3264 const char tmp = *s++;
3265 if (*s == tmp) {
3266 s++;
3267 if (PL_expect == XOPERATOR)
3268 TERM(POSTDEC);
3269 else
3270 OPERATOR(PREDEC);
3271 }
3272 else if (*s == '>') {
3273 s++;
29595ff2 3274 s = SKIPSPACE1(s);
90771dc0
NC
3275 if (isIDFIRST_lazy_if(s,UTF)) {
3276 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3277 TOKEN(ARROW);
3278 }
3279 else if (*s == '$')
3280 OPERATOR(ARROW);
3281 else
3282 TERM(ARROW);
3283 }
3280af22 3284 if (PL_expect == XOPERATOR)
90771dc0
NC
3285 Aop(OP_SUBTRACT);
3286 else {
3287 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3288 check_uni();
3289 OPERATOR('-'); /* unary minus */
79072805 3290 }
2f3197b3 3291 }
79072805 3292
378cc40b 3293 case '+':
90771dc0
NC
3294 {
3295 const char tmp = *s++;
3296 if (*s == tmp) {
3297 s++;
3298 if (PL_expect == XOPERATOR)
3299 TERM(POSTINC);
3300 else
3301 OPERATOR(PREINC);
3302 }
3280af22 3303 if (PL_expect == XOPERATOR)
90771dc0
NC
3304 Aop(OP_ADD);
3305 else {
3306 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3307 check_uni();
3308 OPERATOR('+');
3309 }
2f3197b3 3310 }
a687059c 3311
378cc40b 3312 case '*':
3280af22
NIS
3313 if (PL_expect != XOPERATOR) {
3314 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3315 PL_expect = XOPERATOR;
3316 force_ident(PL_tokenbuf, '*');
3317 if (!*PL_tokenbuf)
a0d0e21e 3318 PREREF('*');
79072805 3319 TERM('*');
a687059c 3320 }
79072805
LW
3321 s++;
3322 if (*s == '*') {
a687059c 3323 s++;
79072805 3324 PWop(OP_POW);
a687059c 3325 }
79072805
LW
3326 Mop(OP_MULTIPLY);
3327
378cc40b 3328 case '%':
3280af22 3329 if (PL_expect == XOPERATOR) {
bbce6d69 3330 ++s;
3331 Mop(OP_MODULO);
a687059c 3332 }
3280af22
NIS
3333 PL_tokenbuf[0] = '%';
3334 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3335 if (!PL_tokenbuf[1]) {
bbce6d69 3336 PREREF('%');
a687059c 3337 }
3280af22 3338 PL_pending_ident = '%';
bbce6d69 3339 TERM('%');
a687059c 3340
378cc40b 3341 case '^':
79072805 3342 s++;
a0d0e21e 3343 BOop(OP_BIT_XOR);
79072805 3344 case '[':
3280af22 3345 PL_lex_brackets++;
79072805 3346 /* FALL THROUGH */
378cc40b 3347 case '~':
0d863452
RH
3348 if (s[1] == '~'
3349 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3350 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3351 {
3352 s += 2;
3353 Eop(OP_SMARTMATCH);
3354 }
378cc40b 3355 case ',':
90771dc0
NC
3356 {
3357 const char tmp = *s++;
3358 OPERATOR(tmp);
3359 }
a0d0e21e
LW
3360 case ':':
3361 if (s[1] == ':') {
3362 len = 0;
0bfa2a8a 3363 goto just_a_word_zero_gv;
a0d0e21e
LW
3364 }
3365 s++;
09bef843
SB
3366 switch (PL_expect) {
3367 OP *attrs;
3368 case XOPERATOR:
3369 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3370 break;
3371 PL_bufptr = s; /* update in case we back off */
3372 goto grabattrs;
3373 case XATTRBLOCK:
3374 PL_expect = XBLOCK;
3375 goto grabattrs;
3376 case XATTRTERM:
3377 PL_expect = XTERMBLOCK;
3378 grabattrs:
29595ff2 3379 s = PEEKSPACE(s);
5f66b61c 3380 attrs = NULL;
7e2040f0 3381 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3382 I32 tmp;
09bef843 3383 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3384 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3385 if (tmp < 0) tmp = -tmp;
3386 switch (tmp) {
3387 case KEY_or:
3388 case KEY_and:
c963b151 3389 case KEY_err:
f9829d6b
GS
3390 case KEY_for:
3391 case KEY_unless:
3392 case KEY_if:
3393 case KEY_while:
3394 case KEY_until:
3395 goto got_attrs;
3396 default:
3397 break;
3398 }
3399 }
09bef843
SB
3400 if (*d == '(') {
3401 d = scan_str(d,TRUE,TRUE);
3402 if (!d) {
09bef843
SB
3403 /* MUST advance bufptr here to avoid bogus
3404 "at end of line" context messages from yyerror().
3405 */
3406 PL_bufptr = s + len;
3407 yyerror("Unterminated attribute parameter in attribute list");
3408 if (attrs)
3409 op_free(attrs);
bbf60fe6 3410 return REPORT(0); /* EOF indicator */
09bef843
SB
3411 }
3412 }
3413 if (PL_lex_stuff) {
3414 SV *sv = newSVpvn(s, len);
3415 sv_catsv(sv, PL_lex_stuff);
3416 attrs = append_elem(OP_LIST, attrs,
3417 newSVOP(OP_CONST, 0, sv));
3418 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 3419 PL_lex_stuff = NULL;
09bef843
SB
3420 }
3421 else {
371fce9b
DM
3422 if (len == 6 && strnEQ(s, "unique", len)) {
3423 if (PL_in_my == KEY_our)
3424#ifdef USE_ITHREADS
3425 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3426#else
bb263b4e 3427 /*EMPTY*/; /* skip to avoid loading attributes.pm */
371fce9b 3428#endif
bfed75c6 3429 else
371fce9b
DM
3430 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3431 }
3432
d3cea301
SB
3433 /* NOTE: any CV attrs applied here need to be part of
3434 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3435 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3436 CvLVALUE_on(PL_compcv);
3437 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3438 CvLOCKED_on(PL_compcv);
3439 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3440 CvMETHOD_on(PL_compcv);
06492da6
SF
3441 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3442 CvASSERTION_on(PL_compcv);
78f9721b
SM
3443 /* After we've set the flags, it could be argued that
3444 we don't need to do the attributes.pm-based setting
3445 process, and shouldn't bother appending recognized
d3cea301
SB
3446 flags. To experiment with that, uncomment the
3447 following "else". (Note that's already been
3448 uncommented. That keeps the above-applied built-in
3449 attributes from being intercepted (and possibly
3450 rejected) by a package's attribute routines, but is
3451 justified by the performance win for the common case
3452 of applying only built-in attributes.) */
0256094b 3453 else
78f9721b
SM
3454 attrs = append_elem(OP_LIST, attrs,
3455 newSVOP(OP_CONST, 0,
3456 newSVpvn(s, len)));
09bef843 3457 }
29595ff2 3458 s = PEEKSPACE(d);
0120eecf 3459 if (*s == ':' && s[1] != ':')
29595ff2 3460 s = PEEKSPACE(s+1);
0120eecf
GS
3461 else if (s == d)
3462 break; /* require real whitespace or :'s */
29595ff2 3463 /* XXX losing whitespace on sequential attributes here */
09bef843 3464 }
90771dc0
NC
3465 {
3466 const char tmp
3467 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3468 if (*s != ';' && *s != '}' && *s != tmp
3469 && (tmp != '=' || *s != ')')) {
3470 const char q = ((*s == '\'') ? '"' : '\'');
3471 /* If here for an expression, and parsed no attrs, back
3472 off. */
3473 if (tmp == '=' && !attrs) {
3474 s = PL_bufptr;
3475 break;
3476 }
3477 /* MUST advance bufptr here to avoid bogus "at end of line"
3478 context messages from yyerror().
3479 */
3480 PL_bufptr = s;
3481 yyerror( *s
3482 ? Perl_form(aTHX_ "Invalid separator character "
3483 "%c%c%c in attribute list", q, *s, q)
3484 : "Unterminated attribute list" );
3485 if (attrs)
3486 op_free(attrs);
3487 OPERATOR(':');
09bef843 3488 }
09bef843 3489 }
f9829d6b 3490 got_attrs:
09bef843
SB
3491 if (attrs) {
3492 PL_nextval[PL_nexttoke].opval = attrs;
3493 force_next(THING);
3494 }
3495 TOKEN(COLONATTR);
3496 }
a0d0e21e 3497 OPERATOR(':');
8990e307
LW
3498 case '(':
3499 s++;
3280af22
NIS
3500 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3501 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3502 else
3280af22 3503 PL_expect = XTERM;
29595ff2 3504 s = SKIPSPACE1(s);
a0d0e21e 3505 TOKEN('(');
378cc40b 3506 case ';':
f4dd75d9 3507 CLINE;
90771dc0
NC
3508 {
3509 const char tmp = *s++;
3510 OPERATOR(tmp);
3511 }
378cc40b 3512 case ')':
90771dc0
NC
3513 {
3514 const char tmp = *s++;
29595ff2 3515 s = SKIPSPACE1(s);
90771dc0
NC
3516 if (*s == '{')
3517 PREBLOCK(tmp);
3518 TERM(tmp);
3519 }
79072805
LW
3520 case ']':
3521 s++;
3280af22 3522 if (PL_lex_brackets <= 0)
d98d5fff 3523 yyerror("Unmatched right square bracket");
463ee0b2 3524 else
3280af22
NIS
3525 --PL_lex_brackets;
3526 if (PL_lex_state == LEX_INTERPNORMAL) {
3527 if (PL_lex_brackets == 0) {
a0d0e21e 3528 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3529 PL_lex_state = LEX_INTERPEND;
79072805
LW
3530 }
3531 }
4633a7c4 3532 TERM(']');
79072805
LW