This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Skip new chdir warning test if fchdir is not available
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a
PP
63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
b6007c36
DM
65#define LEX_NORMAL 10 /* normal code (ie not within "...") */
66#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69#define LEX_INTERPSTART 6 /* expecting the start of a $var */
70
71 /* at end of code, eg "$x" followed by: */
72#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74
75#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77#define LEX_INTERPCONST 2 /* NOT USED */
78#define LEX_FORMLINE 1 /* expecting a format line */
79#define LEX_KNOWNEXT 0 /* next token known; just return it */
80
79072805 81
bbf60fe6 82#ifdef DEBUGGING
27da23d5 83static const char* const lex_state_names[] = {
bbf60fe6
DM
84 "KNOWNEXT",
85 "FORMLINE",
86 "INTERPCONST",
87 "INTERPCONCAT",
88 "INTERPENDMAYBE",
89 "INTERPEND",
90 "INTERPSTART",
91 "INTERPPUSH",
92 "INTERPCASEMOD",
93 "INTERPNORMAL",
94 "NORMAL"
95};
96#endif
97
79072805
LW
98#ifdef ff_next
99#undef ff_next
d48672a2
LW
100#endif
101
79072805 102#include "keywords.h"
fe14fcc3 103
ffb4593c
NT
104/* CLINE is a macro that ensures PL_copline has a sane value */
105
ae986130
LW
106#ifdef CLINE
107#undef CLINE
108#endif
57843af0 109#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 110
ffb4593c
NT
111/*
112 * Convenience functions to return different tokens and prime the
9cbb5ea2 113 * lexer for the next token. They all take an argument.
ffb4593c
NT
114 *
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
2d2e263d 125 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
126 * BOop : bitwise or or xor
127 * BAop : bitwise and
128 * SHop : shift operator
129 * PWop : power operator
9cbb5ea2 130 * PMop : pattern-matching operator
ffb4593c
NT
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
e5edeb50 134 * Rop : relational operator <= != gt
ffb4593c
NT
135 *
136 * Also see LOP and lop() below.
137 */
138
998054bd 139#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 140# define REPORT(retval) tokereport((I32)retval)
998054bd 141#else
bbf60fe6 142# define REPORT(retval) (retval)
998054bd
SC
143#endif
144
bbf60fe6
DM
145#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 165
a687059c
LW
166/* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
a687059c 170 */
376fcdbf
AL
171#define UNI2(f,x) { \
172 yylval.ival = f; \
173 PL_expect = x; \
174 PL_bufptr = s; \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
177 if (*s == '(') \
178 return REPORT( (int)FUNC1 ); \
179 s = skipspace(s); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 }
6f33ba73
RGS
182#define UNI(f) UNI2(f,XTERM)
183#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 184
376fcdbf
AL
185#define UNIBRACK(f) { \
186 yylval.ival = f; \
187 PL_bufptr = s; \
188 PL_last_uni = PL_oldbufptr; \
189 if (*s == '(') \
190 return REPORT( (int)FUNC1 ); \
191 s = skipspace(s); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
193 }
79072805 194
9f68db38 195/* grandfather return to old style */
3280af22 196#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 197
8fa7f367
JH
198#ifdef DEBUGGING
199
bbf60fe6
DM
200/* how to interpret the yylval associated with the token */
201enum token_type {
202 TOKENTYPE_NONE,
203 TOKENTYPE_IVAL,
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
205 TOKENTYPE_PVAL,
206 TOKENTYPE_OPVAL,
207 TOKENTYPE_GVVAL
208};
209
6d4a66ac
NC
210static struct debug_tokens {
211 const int token;
212 enum token_type type;
213 const char *name;
214} const debug_tokens[] =
9041c2e3 215{
bbf60fe6
DM
216 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
217 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
218 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
219 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
220 { ARROW, TOKENTYPE_NONE, "ARROW" },
221 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
222 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
223 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
224 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
225 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 226 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
227 { DO, TOKENTYPE_NONE, "DO" },
228 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
229 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
230 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
231 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
232 { ELSE, TOKENTYPE_NONE, "ELSE" },
233 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
234 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
235 { FOR, TOKENTYPE_IVAL, "FOR" },
236 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
237 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
238 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
239 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
240 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
241 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 242 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
243 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
244 { IF, TOKENTYPE_IVAL, "IF" },
245 { LABEL, TOKENTYPE_PVAL, "LABEL" },
246 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
247 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
248 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
249 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
250 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
251 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
252 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
253 { MY, TOKENTYPE_IVAL, "MY" },
254 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
255 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
256 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
257 { OROP, TOKENTYPE_IVAL, "OROP" },
258 { OROR, TOKENTYPE_NONE, "OROR" },
259 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
260 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
261 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
262 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
263 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
264 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
265 { PREINC, TOKENTYPE_NONE, "PREINC" },
266 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
267 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
268 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
269 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
270 { SUB, TOKENTYPE_NONE, "SUB" },
271 { THING, TOKENTYPE_OPVAL, "THING" },
272 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
273 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
274 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
275 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
276 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
277 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 278 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
279 { WHILE, TOKENTYPE_IVAL, "WHILE" },
280 { WORD, TOKENTYPE_OPVAL, "WORD" },
281 { 0, TOKENTYPE_NONE, 0 }
282};
283
284/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 285
bbf60fe6 286STATIC int
f5bd084c 287S_tokereport(pTHX_ I32 rv)
bbf60fe6 288{
97aff369 289 dVAR;
bbf60fe6 290 if (DEBUG_T_TEST) {
bd61b366 291 const char *name = NULL;
bbf60fe6 292 enum token_type type = TOKENTYPE_NONE;
f54cb97a 293 const struct debug_tokens *p;
396482e1 294 SV* const report = newSVpvs("<== ");
bbf60fe6 295
f54cb97a 296 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
297 if (p->token == (int)rv) {
298 name = p->name;
299 type = p->type;
300 break;
301 }
302 }
303 if (name)
54667de8 304 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
305 else if ((char)rv > ' ' && (char)rv < '~')
306 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
307 else if (!rv)
396482e1 308 sv_catpvs(report, "EOF");
bbf60fe6
DM
309 else
310 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
311 switch (type) {
312 case TOKENTYPE_NONE:
313 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
314 break;
315 case TOKENTYPE_IVAL:
e4584336 316 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
317 break;
318 case TOKENTYPE_OPNUM:
319 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
320 PL_op_name[yylval.ival]);
321 break;
322 case TOKENTYPE_PVAL:
323 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
324 break;
325 case TOKENTYPE_OPVAL:
b6007c36 326 if (yylval.opval) {
401441c0 327 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 328 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
329 if (yylval.opval->op_type == OP_CONST) {
330 Perl_sv_catpvf(aTHX_ report, " %s",
331 SvPEEK(cSVOPx_sv(yylval.opval)));
332 }
333
334 }
401441c0 335 else
396482e1 336 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
337 break;
338 }
b6007c36 339 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
340 };
341 return (int)rv;
998054bd
SC
342}
343
b6007c36
DM
344
345/* print the buffer with suitable escapes */
346
347STATIC void
348S_printbuf(pTHX_ const char* fmt, const char* s)
349{
396482e1 350 SV* const tmp = newSVpvs("");
b6007c36
DM
351 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
352 SvREFCNT_dec(tmp);
353}
354
8fa7f367
JH
355#endif
356
ffb4593c
NT
357/*
358 * S_ao
359 *
c963b151
BD
360 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
361 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
362 */
363
76e3520e 364STATIC int
cea2e8a9 365S_ao(pTHX_ int toketype)
a0d0e21e 366{
97aff369 367 dVAR;
3280af22
NIS
368 if (*PL_bufptr == '=') {
369 PL_bufptr++;
a0d0e21e
LW
370 if (toketype == ANDAND)
371 yylval.ival = OP_ANDASSIGN;
372 else if (toketype == OROR)
373 yylval.ival = OP_ORASSIGN;
c963b151
BD
374 else if (toketype == DORDOR)
375 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
376 toketype = ASSIGNOP;
377 }
378 return toketype;
379}
380
ffb4593c
NT
381/*
382 * S_no_op
383 * When Perl expects an operator and finds something else, no_op
384 * prints the warning. It always prints "<something> found where
385 * operator expected. It prints "Missing semicolon on previous line?"
386 * if the surprise occurs at the start of the line. "do you need to
387 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
388 * where the compiler doesn't know if foo is a method call or a function.
389 * It prints "Missing operator before end of line" if there's nothing
390 * after the missing operator, or "... before <...>" if there is something
391 * after the missing operator.
392 */
393
76e3520e 394STATIC void
bfed75c6 395S_no_op(pTHX_ const char *what, char *s)
463ee0b2 396{
97aff369 397 dVAR;
9d4ba2ae
AL
398 char * const oldbp = PL_bufptr;
399 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 400
1189a94a
GS
401 if (!s)
402 s = oldbp;
07c798fb 403 else
1189a94a 404 PL_bufptr = s;
cea2e8a9 405 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
406 if (ckWARN_d(WARN_SYNTAX)) {
407 if (is_first)
408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
409 "\t(Missing semicolon on previous line?)\n");
410 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 411 const char *t;
56da5a46
RGS
412 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
413 if (t < PL_bufptr && isSPACE(*t))
414 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
415 "\t(Do you need to predeclare %.*s?)\n",
551405c4 416 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
417 }
418 else {
419 assert(s >= oldbp);
420 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 421 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 422 }
07c798fb 423 }
3280af22 424 PL_bufptr = oldbp;
8990e307
LW
425}
426
ffb4593c
NT
427/*
428 * S_missingterm
429 * Complain about missing quote/regexp/heredoc terminator.
430 * If it's called with (char *)NULL then it cauterizes the line buffer.
431 * If we're in a delimited string and the delimiter is a control
432 * character, it's reformatted into a two-char sequence like ^C.
433 * This is fatal.
434 */
435
76e3520e 436STATIC void
cea2e8a9 437S_missingterm(pTHX_ char *s)
8990e307 438{
97aff369 439 dVAR;
8990e307
LW
440 char tmpbuf[3];
441 char q;
442 if (s) {
9d4ba2ae 443 char * const nl = strrchr(s,'\n');
d2719217 444 if (nl)
8990e307
LW
445 *nl = '\0';
446 }
9d116dd7
JH
447 else if (
448#ifdef EBCDIC
449 iscntrl(PL_multi_close)
450#else
451 PL_multi_close < 32 || PL_multi_close == 127
452#endif
453 ) {
8990e307 454 *tmpbuf = '^';
585ec06d 455 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
456 tmpbuf[2] = '\0';
457 s = tmpbuf;
458 }
459 else {
eb160463 460 *tmpbuf = (char)PL_multi_close;
8990e307
LW
461 tmpbuf[1] = '\0';
462 s = tmpbuf;
463 }
464 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 465 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 466}
79072805 467
ef89dcc3 468#define FEATURE_IS_ENABLED(name) \
0d863452 469 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 470 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
471/*
472 * S_feature_is_enabled
473 * Check whether the named feature is enabled.
474 */
475STATIC bool
476S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
477{
97aff369 478 dVAR;
0d863452 479 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
480 char he_name[32] = "feature_";
481 (void) strncpy(&he_name[8], name, 24);
482
483 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
484}
485
ffb4593c
NT
486/*
487 * Perl_deprecate
ffb4593c
NT
488 */
489
79072805 490void
bfed75c6 491Perl_deprecate(pTHX_ const char *s)
a0d0e21e 492{
599cee73 493 if (ckWARN(WARN_DEPRECATED))
9014280d 494 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
495}
496
12bcd1a6 497void
bfed75c6 498Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
499{
500 /* This function should NOT be called for any new deprecated warnings */
501 /* Use Perl_deprecate instead */
502 /* */
503 /* It is here to maintain backward compatibility with the pre-5.8 */
504 /* warnings category hierarchy. The "deprecated" category used to */
505 /* live under the "syntax" category. It is now a top-level category */
506 /* in its own right. */
507
508 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 509 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
510 "Use of %s is deprecated", s);
511}
512
ffb4593c 513/*
9cbb5ea2
GS
514 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
515 * utf16-to-utf8-reversed.
ffb4593c
NT
516 */
517
c39cd008
GS
518#ifdef PERL_CR_FILTER
519static void
520strip_return(SV *sv)
521{
95a20fc0 522 register const char *s = SvPVX_const(sv);
9d4ba2ae 523 register const char * const e = s + SvCUR(sv);
c39cd008
GS
524 /* outer loop optimized to do nothing if there are no CR-LFs */
525 while (s < e) {
526 if (*s++ == '\r' && *s == '\n') {
527 /* hit a CR-LF, need to copy the rest */
528 register char *d = s - 1;
529 *d++ = *s++;
530 while (s < e) {
531 if (*s == '\r' && s[1] == '\n')
532 s++;
533 *d++ = *s++;
534 }
535 SvCUR(sv) -= s - d;
536 return;
537 }
538 }
539}
a868473f 540
76e3520e 541STATIC I32
c39cd008 542S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 543{
f54cb97a 544 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
545 if (count > 0 && !maxlen)
546 strip_return(sv);
547 return count;
a868473f
NIS
548}
549#endif
550
ffb4593c
NT
551/*
552 * Perl_lex_start
9cbb5ea2
GS
553 * Initialize variables. Uses the Perl save_stack to save its state (for
554 * recursive calls to the parser).
ffb4593c
NT
555 */
556
a0d0e21e 557void
864dbfa3 558Perl_lex_start(pTHX_ SV *line)
79072805 559{
97aff369 560 dVAR;
cfd0369c 561 const char *s;
8990e307
LW
562 STRLEN len;
563
3280af22
NIS
564 SAVEI32(PL_lex_dojoin);
565 SAVEI32(PL_lex_brackets);
3280af22
NIS
566 SAVEI32(PL_lex_casemods);
567 SAVEI32(PL_lex_starts);
568 SAVEI32(PL_lex_state);
7766f137 569 SAVEVPTR(PL_lex_inpat);
3280af22 570 SAVEI32(PL_lex_inwhat);
18b09519
GS
571 if (PL_lex_state == LEX_KNOWNEXT) {
572 I32 toke = PL_nexttoke;
573 while (--toke >= 0) {
574 SAVEI32(PL_nexttype[toke]);
575 SAVEVPTR(PL_nextval[toke]);
576 }
577 SAVEI32(PL_nexttoke);
18b09519 578 }
57843af0 579 SAVECOPLINE(PL_curcop);
3280af22
NIS
580 SAVEPPTR(PL_bufptr);
581 SAVEPPTR(PL_bufend);
582 SAVEPPTR(PL_oldbufptr);
583 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
584 SAVEPPTR(PL_last_lop);
585 SAVEPPTR(PL_last_uni);
3280af22
NIS
586 SAVEPPTR(PL_linestart);
587 SAVESPTR(PL_linestr);
8edd5f42
RGS
588 SAVEGENERICPV(PL_lex_brackstack);
589 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 590 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
591 SAVESPTR(PL_lex_stuff);
592 SAVEI32(PL_lex_defer);
09bef843 593 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 594 SAVESPTR(PL_lex_repl);
bebdddfc
GS
595 SAVEINT(PL_expect);
596 SAVEINT(PL_lex_expect);
3280af22
NIS
597
598 PL_lex_state = LEX_NORMAL;
599 PL_lex_defer = 0;
600 PL_expect = XSTATE;
601 PL_lex_brackets = 0;
a02a5408
JC
602 Newx(PL_lex_brackstack, 120, char);
603 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
604 PL_lex_casemods = 0;
605 *PL_lex_casestack = '\0';
606 PL_lex_dojoin = 0;
607 PL_lex_starts = 0;
a0714e2c
SS
608 PL_lex_stuff = NULL;
609 PL_lex_repl = NULL;
3280af22 610 PL_lex_inpat = 0;
76be56bc 611 PL_nexttoke = 0;
3280af22 612 PL_lex_inwhat = 0;
09bef843 613 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
614 PL_linestr = line;
615 if (SvREADONLY(PL_linestr))
616 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 617 s = SvPV_const(PL_linestr, len);
6f27f9a7 618 if (!len || s[len-1] != ';') {
3280af22
NIS
619 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
620 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 621 sv_catpvs(PL_linestr, "\n;");
8990e307 622 }
3280af22
NIS
623 SvTEMP_off(PL_linestr);
624 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
625 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 626 PL_last_lop = PL_last_uni = NULL;
3280af22 627 PL_rsfp = 0;
79072805 628}
a687059c 629
ffb4593c
NT
630/*
631 * Perl_lex_end
9cbb5ea2
GS
632 * Finalizer for lexing operations. Must be called when the parser is
633 * done with the lexer.
ffb4593c
NT
634 */
635
463ee0b2 636void
864dbfa3 637Perl_lex_end(pTHX)
463ee0b2 638{
97aff369 639 dVAR;
3280af22 640 PL_doextract = FALSE;
463ee0b2
LW
641}
642
ffb4593c
NT
643/*
644 * S_incline
645 * This subroutine has nothing to do with tilting, whether at windmills
646 * or pinball tables. Its name is short for "increment line". It
57843af0 647 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 648 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
649 * # line 500 "foo.pm"
650 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
651 */
652
76e3520e 653STATIC void
cea2e8a9 654S_incline(pTHX_ char *s)
463ee0b2 655{
97aff369 656 dVAR;
463ee0b2
LW
657 char *t;
658 char *n;
73659bf1 659 char *e;
463ee0b2 660 char ch;
463ee0b2 661
57843af0 662 CopLINE_inc(PL_curcop);
463ee0b2
LW
663 if (*s++ != '#')
664 return;
bf4acbe4 665 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
666 if (strnEQ(s, "line", 4))
667 s += 4;
668 else
669 return;
084592ab 670 if (SPACE_OR_TAB(*s))
73659bf1 671 s++;
4e553d73 672 else
73659bf1 673 return;
bf4acbe4 674 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
675 if (!isDIGIT(*s))
676 return;
677 n = s;
678 while (isDIGIT(*s))
679 s++;
bf4acbe4 680 while (SPACE_OR_TAB(*s))
463ee0b2 681 s++;
73659bf1 682 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 683 s++;
73659bf1
GS
684 e = t + 1;
685 }
463ee0b2 686 else {
463ee0b2 687 for (t = s; !isSPACE(*t); t++) ;
73659bf1 688 e = t;
463ee0b2 689 }
bf4acbe4 690 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
691 e++;
692 if (*e != '\n' && *e != '\0')
693 return; /* false alarm */
694
463ee0b2
LW
695 ch = *t;
696 *t = '\0';
f4dd75d9 697 if (t - s > 0) {
8a5ee598 698#ifndef USE_ITHREADS
c4420975 699 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
700 STRLEN tmplen = cf ? strlen(cf) : 0;
701 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
702 /* must copy *{"::_<(eval N)[oldfilename:L]"}
703 * to *{"::_<newfilename"} */
704 char smallbuf[256], smallbuf2[256];
705 char *tmpbuf, *tmpbuf2;
8a5ee598 706 GV **gvp, *gv2;
e66cf94c
RGS
707 STRLEN tmplen2 = strlen(s);
708 if (tmplen + 3 < sizeof smallbuf)
709 tmpbuf = smallbuf;
710 else
711 Newx(tmpbuf, tmplen + 3, char);
712 if (tmplen2 + 3 < sizeof smallbuf2)
713 tmpbuf2 = smallbuf2;
714 else
715 Newx(tmpbuf2, tmplen2 + 3, char);
716 tmpbuf[0] = tmpbuf2[0] = '_';
717 tmpbuf[1] = tmpbuf2[1] = '<';
718 memcpy(tmpbuf + 2, cf, ++tmplen);
719 memcpy(tmpbuf2 + 2, s, ++tmplen2);
720 ++tmplen; ++tmplen2;
8a5ee598
RGS
721 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
722 if (gvp) {
723 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
724 if (!isGV(gv2))
725 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
726 /* adjust ${"::_<newfilename"} to store the new file name */
727 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
728 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
729 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
730 }
e66cf94c
RGS
731 if (tmpbuf != smallbuf) Safefree(tmpbuf);
732 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
733 }
8a5ee598 734#endif
05ec9bb3 735 CopFILE_free(PL_curcop);
57843af0 736 CopFILE_set(PL_curcop, s);
f4dd75d9 737 }
463ee0b2 738 *t = ch;
57843af0 739 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
740}
741
ffb4593c
NT
742/*
743 * S_skipspace
744 * Called to gobble the appropriate amount and type of whitespace.
745 * Skips comments as well.
746 */
747
76e3520e 748STATIC char *
cea2e8a9 749S_skipspace(pTHX_ register char *s)
a687059c 750{
97aff369 751 dVAR;
3280af22 752 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 753 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
754 s++;
755 return s;
756 }
757 for (;;) {
fd049845 758 STRLEN prevlen;
09bef843 759 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 760 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
761 while (s < PL_bufend && isSPACE(*s)) {
762 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
763 incline(s);
764 }
ffb4593c
NT
765
766 /* comment */
3280af22
NIS
767 if (s < PL_bufend && *s == '#') {
768 while (s < PL_bufend && *s != '\n')
463ee0b2 769 s++;
60e6418e 770 if (s < PL_bufend) {
463ee0b2 771 s++;
60e6418e
GS
772 if (PL_in_eval && !PL_rsfp) {
773 incline(s);
774 continue;
775 }
776 }
463ee0b2 777 }
ffb4593c
NT
778
779 /* only continue to recharge the buffer if we're at the end
780 * of the buffer, we're not reading from a source filter, and
781 * we're in normal lexing mode
782 */
09bef843
SB
783 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
784 PL_lex_state == LEX_FORMLINE)
463ee0b2 785 return s;
ffb4593c
NT
786
787 /* try to recharge the buffer */
9cbb5ea2 788 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 789 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2
GS
790 {
791 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
792 if (PL_minus_p) {
793 sv_setpv(PL_linestr,
794 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 795 PL_minus_n = PL_minus_p = 0;
a0d0e21e 796 }
01a19ab0
NC
797 else if (PL_minus_n) {
798 sv_setpvn(PL_linestr, ";}", 2);
799 PL_minus_n = 0;
800 }
a0d0e21e 801 else
4147a61b 802 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
803
804 /* reset variables for next time we lex */
9cbb5ea2
GS
805 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
806 = SvPVX(PL_linestr);
3280af22 807 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 808 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
809
810 /* Close the filehandle. Could be from -P preprocessor,
811 * STDIN, or a regular file. If we were reading code from
812 * STDIN (because the commandline held no -e or filename)
813 * then we don't close it, we reset it so the code can
814 * read from STDIN too.
815 */
816
3280af22
NIS
817 if (PL_preprocess && !PL_in_eval)
818 (void)PerlProc_pclose(PL_rsfp);
819 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
820 PerlIO_clearerr(PL_rsfp);
8990e307 821 else
3280af22
NIS
822 (void)PerlIO_close(PL_rsfp);
823 PL_rsfp = Nullfp;
463ee0b2
LW
824 return s;
825 }
ffb4593c
NT
826
827 /* not at end of file, so we only read another line */
09bef843
SB
828 /* make corresponding updates to old pointers, for yyerror() */
829 oldprevlen = PL_oldbufptr - PL_bufend;
830 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
831 if (PL_last_uni)
832 oldunilen = PL_last_uni - PL_bufend;
833 if (PL_last_lop)
834 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
835 PL_linestart = PL_bufptr = s + prevlen;
836 PL_bufend = s + SvCUR(PL_linestr);
837 s = PL_bufptr;
09bef843
SB
838 PL_oldbufptr = s + oldprevlen;
839 PL_oldoldbufptr = s + oldoldprevlen;
840 if (PL_last_uni)
841 PL_last_uni = s + oldunilen;
842 if (PL_last_lop)
843 PL_last_lop = s + oldloplen;
a0d0e21e 844 incline(s);
ffb4593c
NT
845
846 /* debugger active and we're not compiling the debugger code,
847 * so store the line into the debugger's array of lines
848 */
3280af22 849 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 850 SV * const sv = newSV(0);
8990e307
LW
851
852 sv_upgrade(sv, SVt_PVMG);
3280af22 853 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 854 (void)SvIOK_on(sv);
45977657 855 SvIV_set(sv, 0);
36c7798d 856 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 857 }
463ee0b2 858 }
a687059c 859}
378cc40b 860
ffb4593c
NT
861/*
862 * S_check_uni
863 * Check the unary operators to ensure there's no ambiguity in how they're
864 * used. An ambiguous piece of code would be:
865 * rand + 5
866 * This doesn't mean rand() + 5. Because rand() is a unary operator,
867 * the +5 is its argument.
868 */
869
76e3520e 870STATIC void
cea2e8a9 871S_check_uni(pTHX)
ba106d47 872{
97aff369 873 dVAR;
2f3197b3 874 char *s;
a0d0e21e 875 char *t;
2f3197b3 876
3280af22 877 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 878 return;
3280af22
NIS
879 while (isSPACE(*PL_last_uni))
880 PL_last_uni++;
7e2040f0 881 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 882 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 883 return;
6136c704
AL
884
885 /* XXX Things like this are just so nasty. We shouldn't be modifying
886 source code, even if we realquick set it back. */
0453d815 887 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 888 const char ch = *s;
0453d815 889 *s = '\0';
9014280d 890 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 891 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
892 PL_last_uni);
893 *s = ch;
894 }
2f3197b3
LW
895}
896
ffb4593c
NT
897/*
898 * LOP : macro to build a list operator. Its behaviour has been replaced
899 * with a subroutine, S_lop() for which LOP is just another name.
900 */
901
a0d0e21e
LW
902#define LOP(f,x) return lop(f,x,s)
903
ffb4593c
NT
904/*
905 * S_lop
906 * Build a list operator (or something that might be one). The rules:
907 * - if we have a next token, then it's a list operator [why?]
908 * - if the next thing is an opening paren, then it's a function
909 * - else it's a list operator
910 */
911
76e3520e 912STATIC I32
a0be28da 913S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 914{
97aff369 915 dVAR;
79072805 916 yylval.ival = f;
35c8bce7 917 CLINE;
3280af22
NIS
918 PL_expect = x;
919 PL_bufptr = s;
920 PL_last_lop = PL_oldbufptr;
eb160463 921 PL_last_lop_op = (OPCODE)f;
3280af22 922 if (PL_nexttoke)
bbf60fe6 923 return REPORT(LSTOP);
79072805 924 if (*s == '(')
bbf60fe6 925 return REPORT(FUNC);
79072805
LW
926 s = skipspace(s);
927 if (*s == '(')
bbf60fe6 928 return REPORT(FUNC);
79072805 929 else
bbf60fe6 930 return REPORT(LSTOP);
79072805
LW
931}
932
ffb4593c
NT
933/*
934 * S_force_next
9cbb5ea2 935 * When the lexer realizes it knows the next token (for instance,
ffb4593c 936 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
937 * to know what token to return the next time the lexer is called. Caller
938 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
939 * handles the token correctly.
ffb4593c
NT
940 */
941
4e553d73 942STATIC void
cea2e8a9 943S_force_next(pTHX_ I32 type)
79072805 944{
97aff369 945 dVAR;
3280af22
NIS
946 PL_nexttype[PL_nexttoke] = type;
947 PL_nexttoke++;
948 if (PL_lex_state != LEX_KNOWNEXT) {
949 PL_lex_defer = PL_lex_state;
950 PL_lex_expect = PL_expect;
951 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
952 }
953}
954
d0a148a6
NC
955STATIC SV *
956S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
957{
97aff369 958 dVAR;
9d4ba2ae 959 SV * const sv = newSVpvn(start,len);
bfed75c6 960 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
961 SvUTF8_on(sv);
962 return sv;
963}
964
ffb4593c
NT
965/*
966 * S_force_word
967 * When the lexer knows the next thing is a word (for instance, it has
968 * just seen -> and it knows that the next char is a word char, then
969 * it calls S_force_word to stick the next word into the PL_next lookahead.
970 *
971 * Arguments:
b1b65b59 972 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
973 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
974 * int check_keyword : if true, Perl checks to make sure the word isn't
975 * a keyword (do this if the word is a label, e.g. goto FOO)
976 * int allow_pack : if true, : characters will also be allowed (require,
977 * use, etc. do this)
9cbb5ea2 978 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
979 */
980
76e3520e 981STATIC char *
cea2e8a9 982S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 983{
97aff369 984 dVAR;
463ee0b2
LW
985 register char *s;
986 STRLEN len;
4e553d73 987
463ee0b2
LW
988 start = skipspace(start);
989 s = start;
7e2040f0 990 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 991 (allow_pack && *s == ':') ||
15f0808c 992 (allow_initial_tick && *s == '\'') )
a0d0e21e 993 {
3280af22
NIS
994 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
995 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
996 return start;
997 if (token == METHOD) {
998 s = skipspace(s);
999 if (*s == '(')
3280af22 1000 PL_expect = XTERM;
463ee0b2 1001 else {
3280af22 1002 PL_expect = XOPERATOR;
463ee0b2 1003 }
79072805 1004 }
d0a148a6
NC
1005 PL_nextval[PL_nexttoke].opval
1006 = (OP*)newSVOP(OP_CONST,0,
1007 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 1008 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
1009 force_next(token);
1010 }
1011 return s;
1012}
1013
ffb4593c
NT
1014/*
1015 * S_force_ident
9cbb5ea2 1016 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1017 * text only contains the "foo" portion. The first argument is a pointer
1018 * to the "foo", and the second argument is the type symbol to prefix.
1019 * Forces the next token to be a "WORD".
9cbb5ea2 1020 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1021 */
1022
76e3520e 1023STATIC void
bfed75c6 1024S_force_ident(pTHX_ register const char *s, int kind)
79072805 1025{
97aff369 1026 dVAR;
79072805 1027 if (s && *s) {
90e5519e
NC
1028 const STRLEN len = strlen(s);
1029 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
3280af22 1030 PL_nextval[PL_nexttoke].opval = o;
79072805 1031 force_next(WORD);
748a9306 1032 if (kind) {
11343788 1033 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1034 /* XXX see note in pp_entereval() for why we forgo typo
1035 warnings if the symbol must be introduced in an eval.
1036 GSAR 96-10-12 */
90e5519e
NC
1037 gv_fetchpvn_flags(s, len,
1038 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1039 : GV_ADD,
1040 kind == '$' ? SVt_PV :
1041 kind == '@' ? SVt_PVAV :
1042 kind == '%' ? SVt_PVHV :
a0d0e21e 1043 SVt_PVGV
90e5519e 1044 );
748a9306 1045 }
79072805
LW
1046 }
1047}
1048
1571675a
GS
1049NV
1050Perl_str_to_version(pTHX_ SV *sv)
1051{
1052 NV retval = 0.0;
1053 NV nshift = 1.0;
1054 STRLEN len;
cfd0369c 1055 const char *start = SvPV_const(sv,len);
9d4ba2ae 1056 const char * const end = start + len;
504618e9 1057 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1058 while (start < end) {
ba210ebe 1059 STRLEN skip;
1571675a
GS
1060 UV n;
1061 if (utf)
9041c2e3 1062 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1063 else {
1064 n = *(U8*)start;
1065 skip = 1;
1066 }
1067 retval += ((NV)n)/nshift;
1068 start += skip;
1069 nshift *= 1000;
1070 }
1071 return retval;
1072}
1073
4e553d73 1074/*
ffb4593c
NT
1075 * S_force_version
1076 * Forces the next token to be a version number.
e759cc13
RGS
1077 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1078 * and if "guessing" is TRUE, then no new token is created (and the caller
1079 * must use an alternative parsing method).
ffb4593c
NT
1080 */
1081
76e3520e 1082STATIC char *
e759cc13 1083S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1084{
97aff369 1085 dVAR;
89bfa8cd 1086 OP *version = Nullop;
44dcb63b 1087 char *d;
89bfa8cd
PP
1088
1089 s = skipspace(s);
1090
44dcb63b 1091 d = s;
dd629d5b 1092 if (*d == 'v')
44dcb63b 1093 d++;
44dcb63b 1094 if (isDIGIT(*d)) {
e759cc13
RGS
1095 while (isDIGIT(*d) || *d == '_' || *d == '.')
1096 d++;
9f3d182e 1097 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1098 SV *ver;
b73d6f50 1099 s = scan_num(s, &yylval);
89bfa8cd 1100 version = yylval.opval;
dd629d5b
GS
1101 ver = cSVOPx(version)->op_sv;
1102 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1103 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1104 SvNV_set(ver, str_to_version(ver));
1571675a 1105 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1106 }
89bfa8cd 1107 }
e759cc13
RGS
1108 else if (guessing)
1109 return s;
89bfa8cd
PP
1110 }
1111
1112 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1113 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1114 force_next(WORD);
89bfa8cd 1115
e759cc13 1116 return s;
89bfa8cd
PP
1117}
1118
ffb4593c
NT
1119/*
1120 * S_tokeq
1121 * Tokenize a quoted string passed in as an SV. It finds the next
1122 * chunk, up to end of string or a backslash. It may make a new
1123 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1124 * turns \\ into \.
1125 */
1126
76e3520e 1127STATIC SV *
cea2e8a9 1128S_tokeq(pTHX_ SV *sv)
79072805 1129{
97aff369 1130 dVAR;
79072805
LW
1131 register char *s;
1132 register char *send;
1133 register char *d;
b3ac6de7
IZ
1134 STRLEN len = 0;
1135 SV *pv = sv;
79072805
LW
1136
1137 if (!SvLEN(sv))
b3ac6de7 1138 goto finish;
79072805 1139
a0d0e21e 1140 s = SvPV_force(sv, len);
21a311ee 1141 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1142 goto finish;
463ee0b2 1143 send = s + len;
79072805
LW
1144 while (s < send && *s != '\\')
1145 s++;
1146 if (s == send)
b3ac6de7 1147 goto finish;
79072805 1148 d = s;
be4731d2 1149 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1150 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1151 if (SvUTF8(sv))
1152 SvUTF8_on(pv);
1153 }
79072805
LW
1154 while (s < send) {
1155 if (*s == '\\') {
a0d0e21e 1156 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1157 s++; /* all that, just for this */
1158 }
1159 *d++ = *s++;
1160 }
1161 *d = '\0';
95a20fc0 1162 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1163 finish:
3280af22 1164 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1165 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1166 return sv;
1167}
1168
ffb4593c
NT
1169/*
1170 * Now come three functions related to double-quote context,
1171 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1172 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1173 * interact with PL_lex_state, and create fake ( ... ) argument lists
1174 * to handle functions and concatenation.
1175 * They assume that whoever calls them will be setting up a fake
1176 * join call, because each subthing puts a ',' after it. This lets
1177 * "lower \luPpEr"
1178 * become
1179 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1180 *
1181 * (I'm not sure whether the spurious commas at the end of lcfirst's
1182 * arguments and join's arguments are created or not).
1183 */
1184
1185/*
1186 * S_sublex_start
1187 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1188 *
1189 * Pattern matching will set PL_lex_op to the pattern-matching op to
1190 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1191 *
1192 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1193 *
1194 * Everything else becomes a FUNC.
1195 *
1196 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1197 * had an OP_CONST or OP_READLINE). This just sets us up for a
1198 * call to S_sublex_push().
1199 */
1200
76e3520e 1201STATIC I32
cea2e8a9 1202S_sublex_start(pTHX)
79072805 1203{
97aff369 1204 dVAR;
0d46e09a 1205 register const I32 op_type = yylval.ival;
79072805
LW
1206
1207 if (op_type == OP_NULL) {
3280af22
NIS
1208 yylval.opval = PL_lex_op;
1209 PL_lex_op = Nullop;
79072805
LW
1210 return THING;
1211 }
1212 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1213 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1214
1215 if (SvTYPE(sv) == SVt_PVIV) {
1216 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1217 STRLEN len;
cfd0369c 1218 const char *p = SvPV_const(sv, len);
f54cb97a 1219 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1220 if (SvUTF8(sv))
1221 SvUTF8_on(nsv);
b3ac6de7
IZ
1222 SvREFCNT_dec(sv);
1223 sv = nsv;
4e553d73 1224 }
b3ac6de7 1225 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1226 PL_lex_stuff = NULL;
6f33ba73
RGS
1227 /* Allow <FH> // "foo" */
1228 if (op_type == OP_READLINE)
1229 PL_expect = XTERMORDORDOR;
79072805
LW
1230 return THING;
1231 }
1232
3280af22
NIS
1233 PL_sublex_info.super_state = PL_lex_state;
1234 PL_sublex_info.sub_inwhat = op_type;
1235 PL_sublex_info.sub_op = PL_lex_op;
1236 PL_lex_state = LEX_INTERPPUSH;
55497cff 1237
3280af22
NIS
1238 PL_expect = XTERM;
1239 if (PL_lex_op) {
1240 yylval.opval = PL_lex_op;
1241 PL_lex_op = Nullop;
55497cff
PP
1242 return PMFUNC;
1243 }
1244 else
1245 return FUNC;
1246}
1247
ffb4593c
NT
1248/*
1249 * S_sublex_push
1250 * Create a new scope to save the lexing state. The scope will be
1251 * ended in S_sublex_done. Returns a '(', starting the function arguments
1252 * to the uc, lc, etc. found before.
1253 * Sets PL_lex_state to LEX_INTERPCONCAT.
1254 */
1255
76e3520e 1256STATIC I32
cea2e8a9 1257S_sublex_push(pTHX)
55497cff 1258{
27da23d5 1259 dVAR;
f46d017c 1260 ENTER;
55497cff 1261
3280af22
NIS
1262 PL_lex_state = PL_sublex_info.super_state;
1263 SAVEI32(PL_lex_dojoin);
1264 SAVEI32(PL_lex_brackets);
3280af22
NIS
1265 SAVEI32(PL_lex_casemods);
1266 SAVEI32(PL_lex_starts);
1267 SAVEI32(PL_lex_state);
7766f137 1268 SAVEVPTR(PL_lex_inpat);
3280af22 1269 SAVEI32(PL_lex_inwhat);
57843af0 1270 SAVECOPLINE(PL_curcop);
3280af22 1271 SAVEPPTR(PL_bufptr);
8452ff4b 1272 SAVEPPTR(PL_bufend);
3280af22
NIS
1273 SAVEPPTR(PL_oldbufptr);
1274 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1275 SAVEPPTR(PL_last_lop);
1276 SAVEPPTR(PL_last_uni);
3280af22
NIS
1277 SAVEPPTR(PL_linestart);
1278 SAVESPTR(PL_linestr);
8edd5f42
RGS
1279 SAVEGENERICPV(PL_lex_brackstack);
1280 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1281
1282 PL_linestr = PL_lex_stuff;
a0714e2c 1283 PL_lex_stuff = NULL;
3280af22 1284
9cbb5ea2
GS
1285 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1286 = SvPVX(PL_linestr);
3280af22 1287 PL_bufend += SvCUR(PL_linestr);
bd61b366 1288 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1289 SAVEFREESV(PL_linestr);
1290
1291 PL_lex_dojoin = FALSE;
1292 PL_lex_brackets = 0;
a02a5408
JC
1293 Newx(PL_lex_brackstack, 120, char);
1294 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1295 PL_lex_casemods = 0;
1296 *PL_lex_casestack = '\0';
1297 PL_lex_starts = 0;
1298 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1299 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1300
1301 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1302 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1303 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1304 else
3280af22 1305 PL_lex_inpat = Nullop;
79072805 1306
55497cff 1307 return '(';
79072805
LW
1308}
1309
ffb4593c
NT
1310/*
1311 * S_sublex_done
1312 * Restores lexer state after a S_sublex_push.
1313 */
1314
76e3520e 1315STATIC I32
cea2e8a9 1316S_sublex_done(pTHX)
79072805 1317{
27da23d5 1318 dVAR;
3280af22 1319 if (!PL_lex_starts++) {
396482e1 1320 SV * const sv = newSVpvs("");
9aa983d2
JH
1321 if (SvUTF8(PL_linestr))
1322 SvUTF8_on(sv);
3280af22 1323 PL_expect = XOPERATOR;
9aa983d2 1324 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1325 return THING;
1326 }
1327
3280af22
NIS
1328 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1329 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1330 return yylex();
79072805
LW
1331 }
1332
ffb4593c 1333 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1334 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1335 PL_linestr = PL_lex_repl;
1336 PL_lex_inpat = 0;
1337 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1338 PL_bufend += SvCUR(PL_linestr);
bd61b366 1339 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1340 SAVEFREESV(PL_linestr);
1341 PL_lex_dojoin = FALSE;
1342 PL_lex_brackets = 0;
3280af22
NIS
1343 PL_lex_casemods = 0;
1344 *PL_lex_casestack = '\0';
1345 PL_lex_starts = 0;
25da4f38 1346 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1347 PL_lex_state = LEX_INTERPNORMAL;
1348 PL_lex_starts++;
e9fa98b2
HS
1349 /* we don't clear PL_lex_repl here, so that we can check later
1350 whether this is an evalled subst; that means we rely on the
1351 logic to ensure sublex_done() is called again only via the
1352 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1353 }
e9fa98b2 1354 else {
3280af22 1355 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1356 PL_lex_repl = NULL;
e9fa98b2 1357 }
79072805 1358 return ',';
ffed7fef
LW
1359 }
1360 else {
f46d017c 1361 LEAVE;
3280af22
NIS
1362 PL_bufend = SvPVX(PL_linestr);
1363 PL_bufend += SvCUR(PL_linestr);
1364 PL_expect = XOPERATOR;
09bef843 1365 PL_sublex_info.sub_inwhat = 0;
79072805 1366 return ')';
ffed7fef
LW
1367 }
1368}
1369
02aa26ce
NT
1370/*
1371 scan_const
1372
1373 Extracts a pattern, double-quoted string, or transliteration. This
1374 is terrifying code.
1375
3280af22
NIS
1376 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1377 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1378 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1379
9b599b2a
GS
1380 Returns a pointer to the character scanned up to. Iff this is
1381 advanced from the start pointer supplied (ie if anything was
1382 successfully parsed), will leave an OP for the substring scanned
1383 in yylval. Caller must intuit reason for not parsing further
1384 by looking at the next characters herself.
1385
02aa26ce
NT
1386 In patterns:
1387 backslashes:
1388 double-quoted style: \r and \n
1389 regexp special ones: \D \s
1390 constants: \x3
1391 backrefs: \1 (deprecated in substitution replacements)
1392 case and quoting: \U \Q \E
1393 stops on @ and $, but not for $ as tail anchor
1394
1395 In transliterations:
1396 characters are VERY literal, except for - not at the start or end
1397 of the string, which indicates a range. scan_const expands the
1398 range to the full set of intermediate characters.
1399
1400 In double-quoted strings:
1401 backslashes:
1402 double-quoted style: \r and \n
1403 constants: \x3
1404 backrefs: \1 (deprecated)
1405 case and quoting: \U \Q \E
1406 stops on @ and $
1407
1408 scan_const does *not* construct ops to handle interpolated strings.
1409 It stops processing as soon as it finds an embedded $ or @ variable
1410 and leaves it to the caller to work out what's going on.
1411
da6eedaa 1412 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1413
1414 $ in pattern could be $foo or could be tail anchor. Assumption:
1415 it's a tail anchor if $ is the last thing in the string, or if it's
1416 followed by one of ")| \n\t"
1417
1418 \1 (backreferences) are turned into $1
1419
1420 The structure of the code is
1421 while (there's a character to process) {
1422 handle transliteration ranges
1423 skip regexp comments
1424 skip # initiated comments in //x patterns
1425 check for embedded @foo
1426 check for embedded scalars
1427 if (backslash) {
1428 leave intact backslashes from leave (below)
1429 deprecate \1 in strings and sub replacements
1430 handle string-changing backslashes \l \U \Q \E, etc.
1431 switch (what was escaped) {
1432 handle - in a transliteration (becomes a literal -)
1433 handle \132 octal characters
1434 handle 0x15 hex characters
1435 handle \cV (control V)
1436 handle printf backslashes (\f, \r, \n, etc)
1437 } (end switch)
1438 } (end if backslash)
1439 } (end while character to read)
4e553d73 1440
02aa26ce
NT
1441*/
1442
76e3520e 1443STATIC char *
cea2e8a9 1444S_scan_const(pTHX_ char *start)
79072805 1445{
97aff369 1446 dVAR;
3280af22 1447 register char *send = PL_bufend; /* end of the constant */
561b68a9 1448 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1449 register char *s = start; /* start of the constant */
1450 register char *d = SvPVX(sv); /* destination for copies */
1451 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1452 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1453 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1454 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1455 UV uv;
4c3a8340
ST
1456#ifdef EBCDIC
1457 UV literal_endpoint = 0;
1458#endif
012bcf8d 1459
dff6d3cd 1460 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1461 PL_lex_inpat
b6d5fef8 1462 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1463 : "";
79072805 1464
2b9d42f0
NIS
1465 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1466 /* If we are doing a trans and we know we want UTF8 set expectation */
1467 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1468 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1469 }
1470
1471
79072805 1472 while (s < send || dorange) {
02aa26ce 1473 /* get transliterations out of the way (they're most literal) */
3280af22 1474 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1475 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1476 if (dorange) {
1ba5c669
JH
1477 I32 i; /* current expanded character */
1478 I32 min; /* first character in range */
1479 I32 max; /* last character in range */
02aa26ce 1480
2b9d42f0 1481 if (has_utf8) {
9d4ba2ae 1482 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1483 char *e = d++;
1484 while (e-- > c)
1485 *(e + 1) = *e;
25716404 1486 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1487 /* mark the range as done, and continue */
1488 dorange = FALSE;
1489 didrange = TRUE;
1490 continue;
1491 }
2b9d42f0 1492
95a20fc0 1493 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1494 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1495 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1496 d -= 2; /* eat the first char and the - */
1497
8ada0baa
JH
1498 min = (U8)*d; /* first char in range */
1499 max = (U8)d[1]; /* last char in range */
1500
c2e66d9e 1501 if (min > max) {
01ec43d0 1502 Perl_croak(aTHX_
d1573ac7 1503 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1504 (char)min, (char)max);
c2e66d9e
GS
1505 }
1506
c7f1f016 1507#ifdef EBCDIC
4c3a8340
ST
1508 if (literal_endpoint == 2 &&
1509 ((isLOWER(min) && isLOWER(max)) ||
1510 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1511 if (isLOWER(min)) {
1512 for (i = min; i <= max; i++)
1513 if (isLOWER(i))
db42d148 1514 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1515 } else {
1516 for (i = min; i <= max; i++)
1517 if (isUPPER(i))
db42d148 1518 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1519 }
1520 }
1521 else
1522#endif
1523 for (i = min; i <= max; i++)
eb160463 1524 *d++ = (char)i;
02aa26ce
NT
1525
1526 /* mark the range as done, and continue */
79072805 1527 dorange = FALSE;
01ec43d0 1528 didrange = TRUE;
4c3a8340
ST
1529#ifdef EBCDIC
1530 literal_endpoint = 0;
1531#endif
79072805 1532 continue;
4e553d73 1533 }
02aa26ce
NT
1534
1535 /* range begins (ignore - as first or last char) */
79072805 1536 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1537 if (didrange) {
1fafa243 1538 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1539 }
2b9d42f0 1540 if (has_utf8) {
25716404 1541 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1542 s++;
1543 continue;
1544 }
79072805
LW
1545 dorange = TRUE;
1546 s++;
01ec43d0
GS
1547 }
1548 else {
1549 didrange = FALSE;
4c3a8340
ST
1550#ifdef EBCDIC
1551 literal_endpoint = 0;
1552#endif
01ec43d0 1553 }
79072805 1554 }
02aa26ce
NT
1555
1556 /* if we get here, we're not doing a transliteration */
1557
0f5d15d6
IZ
1558 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1559 except for the last char, which will be done separately. */
3280af22 1560 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1561 if (s[2] == '#') {
e994fd66 1562 while (s+1 < send && *s != ')')
db42d148 1563 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1564 }
1565 else if (s[2] == '{' /* This should match regcomp.c */
1566 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1567 {
cc6b7395 1568 I32 count = 1;
0f5d15d6 1569 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1570 char c;
1571
d9f97599
GS
1572 while (count && (c = *regparse)) {
1573 if (c == '\\' && regparse[1])
1574 regparse++;
4e553d73 1575 else if (c == '{')
cc6b7395 1576 count++;
4e553d73 1577 else if (c == '}')
cc6b7395 1578 count--;
d9f97599 1579 regparse++;
cc6b7395 1580 }
e994fd66 1581 if (*regparse != ')')
5bdf89e7 1582 regparse--; /* Leave one char for continuation. */
0f5d15d6 1583 while (s < regparse)
db42d148 1584 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1585 }
748a9306 1586 }
02aa26ce
NT
1587
1588 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1589 else if (*s == '#' && PL_lex_inpat &&
1590 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1591 while (s+1 < send && *s != '\n')
db42d148 1592 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1593 }
02aa26ce 1594
5d1d4326 1595 /* check for embedded arrays
da6eedaa 1596 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1597 */
7e2040f0 1598 else if (*s == '@' && s[1]
5d1d4326 1599 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1600 break;
02aa26ce
NT
1601
1602 /* check for embedded scalars. only stop if we're sure it's a
1603 variable.
1604 */
79072805 1605 else if (*s == '$') {
3280af22 1606 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1607 break;
6002328a 1608 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1609 break; /* in regexp, $ might be tail anchor */
1610 }
02aa26ce 1611
2b9d42f0
NIS
1612 /* End of else if chain - OP_TRANS rejoin rest */
1613
02aa26ce 1614 /* backslashes */
79072805
LW
1615 if (*s == '\\' && s+1 < send) {
1616 s++;
02aa26ce
NT
1617
1618 /* some backslashes we leave behind */
c9f97d15 1619 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1620 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1621 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1622 continue;
1623 }
02aa26ce
NT
1624
1625 /* deprecate \1 in strings and substitution replacements */
3280af22 1626 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1627 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1628 {
599cee73 1629 if (ckWARN(WARN_SYNTAX))
9014280d 1630 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1631 *--s = '$';
1632 break;
1633 }
02aa26ce
NT
1634
1635 /* string-change backslash escapes */
3280af22 1636 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1637 --s;
1638 break;
1639 }
02aa26ce
NT
1640
1641 /* if we get here, it's either a quoted -, or a digit */
79072805 1642 switch (*s) {
02aa26ce
NT
1643
1644 /* quoted - in transliterations */
79072805 1645 case '-':
3280af22 1646 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1647 *d++ = *s++;
1648 continue;
1649 }
1650 /* FALL THROUGH */
1651 default:
11b8faa4 1652 {
041457d9
DM
1653 if (isALNUM(*s) &&
1654 *s != '_' &&
1655 ckWARN(WARN_MISC))
9014280d 1656 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1657 "Unrecognized escape \\%c passed through",
1658 *s);
1659 /* default action is to copy the quoted character */
f9a63242 1660 goto default_action;
11b8faa4 1661 }
02aa26ce
NT
1662
1663 /* \132 indicates an octal constant */
79072805
LW
1664 case '0': case '1': case '2': case '3':
1665 case '4': case '5': case '6': case '7':
ba210ebe 1666 {
53305cf1
NC
1667 I32 flags = 0;
1668 STRLEN len = 3;
1669 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1670 s += len;
1671 }
012bcf8d 1672 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1673
1674 /* \x24 indicates a hex constant */
79072805 1675 case 'x':
a0ed51b3
LW
1676 ++s;
1677 if (*s == '{') {
9d4ba2ae 1678 char* const e = strchr(s, '}');
a4c04bdc
NC
1679 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1680 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1681 STRLEN len;
355860ce 1682
53305cf1 1683 ++s;
adaeee49 1684 if (!e) {
a0ed51b3 1685 yyerror("Missing right brace on \\x{}");
355860ce 1686 continue;
ba210ebe 1687 }
53305cf1
NC
1688 len = e - s;
1689 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1690 s = e + 1;
a0ed51b3
LW
1691 }
1692 else {
ba210ebe 1693 {
53305cf1 1694 STRLEN len = 2;
a4c04bdc 1695 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1696 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1697 s += len;
1698 }
012bcf8d
GS
1699 }
1700
1701 NUM_ESCAPE_INSERT:
1702 /* Insert oct or hex escaped character.
301d3d20 1703 * There will always enough room in sv since such
db42d148 1704 * escapes will be longer than any UTF-8 sequence
301d3d20 1705 * they can end up as. */
ba7cea30 1706
c7f1f016
NIS
1707 /* We need to map to chars to ASCII before doing the tests
1708 to cover EBCDIC
1709 */
c4d5f83a 1710 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1711 if (!has_utf8 && uv > 255) {
301d3d20
JH
1712 /* Might need to recode whatever we have
1713 * accumulated so far if it contains any
1714 * hibit chars.
1715 *
1716 * (Can't we keep track of that and avoid
1717 * this rescan? --jhi)
012bcf8d 1718 */
c7f1f016 1719 int hicount = 0;
63cd0674
NIS
1720 U8 *c;
1721 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1722 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1723 hicount++;
db42d148 1724 }
012bcf8d 1725 }
63cd0674 1726 if (hicount) {
9d4ba2ae 1727 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1728 U8 *src, *dst;
1729 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1730 src = (U8 *)d - 1;
1731 dst = src+hicount;
1732 d += hicount;
cfd0369c 1733 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1734 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1735 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1736 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1737 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1738 }
1739 else {
63cd0674 1740 *dst-- = *src;
012bcf8d 1741 }
c7f1f016 1742 src--;
012bcf8d
GS
1743 }
1744 }
1745 }
1746
9aa983d2 1747 if (has_utf8 || uv > 255) {
9041c2e3 1748 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1749 has_utf8 = TRUE;
f9a63242
JH
1750 if (PL_lex_inwhat == OP_TRANS &&
1751 PL_sublex_info.sub_op) {
1752 PL_sublex_info.sub_op->op_private |=
1753 (PL_lex_repl ? OPpTRANS_FROM_UTF
1754 : OPpTRANS_TO_UTF);
f9a63242 1755 }
012bcf8d 1756 }
a0ed51b3 1757 else {
012bcf8d 1758 *d++ = (char)uv;
a0ed51b3 1759 }
012bcf8d
GS
1760 }
1761 else {
c4d5f83a 1762 *d++ = (char) uv;
a0ed51b3 1763 }
79072805 1764 continue;
02aa26ce 1765
b239daa5 1766 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1767 case 'N':
55eda711 1768 ++s;
423cee85
JH
1769 if (*s == '{') {
1770 char* e = strchr(s, '}');
155aba94 1771 SV *res;
423cee85 1772 STRLEN len;
cfd0369c 1773 const char *str;
4e553d73 1774
423cee85 1775 if (!e) {
5777a3f7 1776 yyerror("Missing right brace on \\N{}");
423cee85
JH
1777 e = s - 1;
1778 goto cont_scan;
1779 }
dbc0d4f2
JH
1780 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1781 /* \N{U+...} */
1782 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1783 PERL_SCAN_DISALLOW_PREFIX;
1784 s += 3;
1785 len = e - s;
1786 uv = grok_hex(s, &len, &flags, NULL);
1787 s = e + 1;
1788 goto NUM_ESCAPE_INSERT;
1789 }
55eda711 1790 res = newSVpvn(s + 1, e - s - 1);
bd61b366 1791 res = new_constant( NULL, 0, "charnames",
a0714e2c 1792 res, NULL, "\\N{...}" );
f9a63242
JH
1793 if (has_utf8)
1794 sv_utf8_upgrade(res);
cfd0369c 1795 str = SvPV_const(res,len);
1c47067b
JH
1796#ifdef EBCDIC_NEVER_MIND
1797 /* charnames uses pack U and that has been
1798 * recently changed to do the below uni->native
1799 * mapping, so this would be redundant (and wrong,
1800 * the code point would be doubly converted).
1801 * But leave this in just in case the pack U change
1802 * gets revoked, but the semantics is still
1803 * desireable for charnames. --jhi */
cddc7ef4 1804 {
cfd0369c 1805 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1806
1807 if (uv < 0x100) {
89ebb4a3 1808 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1809
1810 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1811 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1812 str = SvPV_const(res, len);
cddc7ef4
JH
1813 }
1814 }
1815#endif
89491803 1816 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1817 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1818 SvCUR_set(sv, d - ostart);
1819 SvPOK_on(sv);
e4f3eed8 1820 *d = '\0';
f08d6ad9 1821 sv_utf8_upgrade(sv);
d2f449dd 1822 /* this just broke our allocation above... */
eb160463 1823 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1824 d = SvPVX(sv) + SvCUR(sv);
89491803 1825 has_utf8 = TRUE;
f08d6ad9 1826 }
eb160463 1827 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1828 const char * const odest = SvPVX_const(sv);
423cee85 1829
8973db79 1830 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1831 d = SvPVX(sv) + (d - odest);
1832 }
1833 Copy(str, d, len, char);
1834 d += len;
1835 SvREFCNT_dec(res);
1836 cont_scan:
1837 s = e + 1;
1838 }
1839 else
5777a3f7 1840 yyerror("Missing braces on \\N{}");
423cee85
JH
1841 continue;
1842
02aa26ce 1843 /* \c is a control character */
79072805
LW
1844 case 'c':
1845 s++;
961ce445 1846 if (s < send) {
ba210ebe 1847 U8 c = *s++;
c7f1f016
NIS
1848#ifdef EBCDIC
1849 if (isLOWER(c))
1850 c = toUPPER(c);
1851#endif
db42d148 1852 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1853 }
961ce445
RGS
1854 else {
1855 yyerror("Missing control char name in \\c");
1856 }
79072805 1857 continue;
02aa26ce
NT
1858
1859 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1860 case 'b':
db42d148 1861 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1862 break;
1863 case 'n':
db42d148 1864 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1865 break;
1866 case 'r':
db42d148 1867 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1868 break;
1869 case 'f':
db42d148 1870 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1871 break;
1872 case 't':
db42d148 1873 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1874 break;
34a3fe2a 1875 case 'e':
db42d148 1876 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1877 break;
1878 case 'a':
db42d148 1879 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1880 break;
02aa26ce
NT
1881 } /* end switch */
1882
79072805
LW
1883 s++;
1884 continue;
02aa26ce 1885 } /* end if (backslash) */
4c3a8340
ST
1886#ifdef EBCDIC
1887 else
1888 literal_endpoint++;
1889#endif
02aa26ce 1890
f9a63242 1891 default_action:
2b9d42f0
NIS
1892 /* If we started with encoded form, or already know we want it
1893 and then encode the next character */
1894 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1895 STRLEN len = 1;
9d4ba2ae
AL
1896 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1897 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1898 s += len;
1899 if (need > len) {
1900 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1901 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1902 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1903 }
1904 d = (char*)uvchr_to_utf8((U8*)d, uv);
1905 has_utf8 = TRUE;
1906 }
1907 else {
1908 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1909 }
02aa26ce
NT
1910 } /* while loop to process each character */
1911
1912 /* terminate the string and set up the sv */
79072805 1913 *d = '\0';
95a20fc0 1914 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1915 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1916 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1917
79072805 1918 SvPOK_on(sv);
9f4817db 1919 if (PL_encoding && !has_utf8) {
d0063567
DK
1920 sv_recode_to_utf8(sv, PL_encoding);
1921 if (SvUTF8(sv))
1922 has_utf8 = TRUE;
9f4817db 1923 }
2b9d42f0 1924 if (has_utf8) {
7e2040f0 1925 SvUTF8_on(sv);
2b9d42f0 1926 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1927 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1928 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1929 }
1930 }
79072805 1931
02aa26ce 1932 /* shrink the sv if we allocated more than we used */
79072805 1933 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1934 SvPV_shrink_to_cur(sv);
79072805 1935 }
02aa26ce 1936
9b599b2a 1937 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1938 if (s > PL_bufptr) {
1939 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1940 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 1941 sv, NULL,
4e553d73 1942 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1943 ? "tr"
3280af22 1944 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1945 ? "s"
1946 : "qq")));
79072805 1947 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1948 } else
8990e307 1949 SvREFCNT_dec(sv);
79072805
LW
1950 return s;
1951}
1952
ffb4593c
NT
1953/* S_intuit_more
1954 * Returns TRUE if there's more to the expression (e.g., a subscript),
1955 * FALSE otherwise.
ffb4593c
NT
1956 *
1957 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1958 *
1959 * ->[ and ->{ return TRUE
1960 * { and [ outside a pattern are always subscripts, so return TRUE
1961 * if we're outside a pattern and it's not { or [, then return FALSE
1962 * if we're in a pattern and the first char is a {
1963 * {4,5} (any digits around the comma) returns FALSE
1964 * if we're in a pattern and the first char is a [
1965 * [] returns FALSE
1966 * [SOMETHING] has a funky algorithm to decide whether it's a
1967 * character class or not. It has to deal with things like
1968 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1969 * anything else returns TRUE
1970 */
1971
9cbb5ea2
GS
1972/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1973
76e3520e 1974STATIC int
cea2e8a9 1975S_intuit_more(pTHX_ register char *s)
79072805 1976{
97aff369 1977 dVAR;
3280af22 1978 if (PL_lex_brackets)
79072805
LW
1979 return TRUE;
1980 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1981 return TRUE;
1982 if (*s != '{' && *s != '[')
1983 return FALSE;
3280af22 1984 if (!PL_lex_inpat)
79072805
LW
1985 return TRUE;
1986
1987 /* In a pattern, so maybe we have {n,m}. */
1988 if (*s == '{') {
1989 s++;
1990 if (!isDIGIT(*s))
1991 return TRUE;
1992 while (isDIGIT(*s))
1993 s++;
1994 if (*s == ',')
1995 s++;
1996 while (isDIGIT(*s))
1997 s++;
1998 if (*s == '}')
1999 return FALSE;
2000 return TRUE;
2001
2002 }
2003
2004 /* On the other hand, maybe we have a character class */
2005
2006 s++;
2007 if (*s == ']' || *s == '^')
2008 return FALSE;
2009 else {
ffb4593c 2010 /* this is terrifying, and it works */
79072805
LW
2011 int weight = 2; /* let's weigh the evidence */
2012 char seen[256];
f27ffc4a 2013 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2014 const char * const send = strchr(s,']');
3280af22 2015 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2016
2017 if (!send) /* has to be an expression */
2018 return TRUE;
2019
2020 Zero(seen,256,char);
2021 if (*s == '$')
2022 weight -= 3;
2023 else if (isDIGIT(*s)) {
2024 if (s[1] != ']') {
2025 if (isDIGIT(s[1]) && s[2] == ']')
2026 weight -= 10;
2027 }
2028 else
2029 weight -= 100;
2030 }
2031 for (; s < send; s++) {
2032 last_un_char = un_char;
2033 un_char = (unsigned char)*s;
2034 switch (*s) {
2035 case '@':
2036 case '&':
2037 case '$':
2038 weight -= seen[un_char] * 10;
7e2040f0 2039 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2040 int len;
8903cb82 2041 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2042 len = (int)strlen(tmpbuf);
2043 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2044 weight -= 100;
2045 else
2046 weight -= 10;
2047 }
2048 else if (*s == '$' && s[1] &&
93a17b20
LW
2049 strchr("[#!%*<>()-=",s[1])) {
2050 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2051 weight -= 10;
2052 else
2053 weight -= 1;
2054 }
2055 break;
2056 case '\\':
2057 un_char = 254;
2058 if (s[1]) {
93a17b20 2059 if (strchr("wds]",s[1]))
79072805
LW
2060 weight += 100;
2061 else if (seen['\''] || seen['"'])
2062 weight += 1;
93a17b20 2063 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2064 weight += 40;
2065 else if (isDIGIT(s[1])) {
2066 weight += 40;
2067 while (s[1] && isDIGIT(s[1]))
2068 s++;
2069 }
2070 }
2071 else
2072 weight += 100;
2073 break;
2074 case '-':
2075 if (s[1] == '\\')
2076 weight += 50;
93a17b20 2077 if (strchr("aA01! ",last_un_char))
79072805 2078 weight += 30;
93a17b20 2079 if (strchr("zZ79~",s[1]))
79072805 2080 weight += 30;
f27ffc4a
GS
2081 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2082 weight -= 5; /* cope with negative subscript */
79072805
LW
2083 break;
2084 default:
3792a11b
NC
2085 if (!isALNUM(last_un_char)
2086 && !(last_un_char == '$' || last_un_char == '@'
2087 || last_un_char == '&')
2088 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2089 char *d = tmpbuf;
2090 while (isALPHA(*s))
2091 *d++ = *s++;
2092 *d = '\0';
2093 if (keyword(tmpbuf, d - tmpbuf))
2094 weight -= 150;
2095 }
2096 if (un_char == last_un_char + 1)
2097 weight += 5;
2098 weight -= seen[un_char];
2099 break;
2100 }
2101 seen[un_char]++;
2102 }
2103 if (weight >= 0) /* probably a character class */
2104 return FALSE;
2105 }
2106
2107 return TRUE;
2108}
ffed7fef 2109
ffb4593c
NT
2110/*
2111 * S_intuit_method
2112 *
2113 * Does all the checking to disambiguate
2114 * foo bar
2115 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2116 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2117 *
2118 * First argument is the stuff after the first token, e.g. "bar".
2119 *
2120 * Not a method if bar is a filehandle.
2121 * Not a method if foo is a subroutine prototyped to take a filehandle.
2122 * Not a method if it's really "Foo $bar"
2123 * Method if it's "foo $bar"
2124 * Not a method if it's really "print foo $bar"
2125 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2126 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2127 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2128 * =>
2129 */
2130
76e3520e 2131STATIC int
62d55b22 2132S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2133{
97aff369 2134 dVAR;
a0d0e21e 2135 char *s = start + (*start == '$');
3280af22 2136 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2137 STRLEN len;
2138 GV* indirgv;
2139
2140 if (gv) {
62d55b22 2141 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2142 return 0;
62d55b22
NC
2143 if (cv) {
2144 if (SvPOK(cv)) {
2145 const char *proto = SvPVX_const(cv);
2146 if (proto) {
2147 if (*proto == ';')
2148 proto++;
2149 if (*proto == '*')
2150 return 0;
2151 }
b6c543e3
IZ
2152 }
2153 } else
a0d0e21e
LW
2154 gv = 0;
2155 }
8903cb82 2156 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2157 /* start is the beginning of the possible filehandle/object,
2158 * and s is the end of it
2159 * tmpbuf is a copy of it
2160 */
2161
a0d0e21e 2162 if (*start == '$') {
3280af22 2163 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2164 return 0;
2165 s = skipspace(s);
3280af22
NIS
2166 PL_bufptr = start;
2167 PL_expect = XREF;
a0d0e21e
LW
2168 return *s == '(' ? FUNCMETH : METHOD;
2169 }
2170 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2171 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2172 len -= 2;
2173 tmpbuf[len] = '\0';
2174 goto bare_package;
2175 }
90e5519e 2176 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2177 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2178 return 0;
2179 /* filehandle or package name makes it a method */
89bfa8cd 2180 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2181 s = skipspace(s);
3280af22 2182 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2183 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2184 bare_package:
3280af22 2185 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2186 newSVpvn(tmpbuf,len));
3280af22
NIS
2187 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2188 PL_expect = XTERM;
a0d0e21e 2189 force_next(WORD);
3280af22 2190 PL_bufptr = s;
a0d0e21e
LW
2191 return *s == '(' ? FUNCMETH : METHOD;
2192 }
2193 }
2194 return 0;
2195}
2196
ffb4593c
NT
2197/*
2198 * S_incl_perldb
2199 * Return a string of Perl code to load the debugger. If PERL5DB
2200 * is set, it will return the contents of that, otherwise a
2201 * compile-time require of perl5db.pl.
2202 */
2203
bfed75c6 2204STATIC const char*
cea2e8a9 2205S_incl_perldb(pTHX)
a0d0e21e 2206{
97aff369 2207 dVAR;
3280af22 2208 if (PL_perldb) {
9d4ba2ae 2209 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2210
2211 if (pdb)
2212 return pdb;
93189314 2213 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2214 return "BEGIN { require 'perl5db.pl' }";
2215 }
2216 return "";
2217}
2218
2219
16d20bd9 2220/* Encoded script support. filter_add() effectively inserts a
4e553d73 2221 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2222 * Note that the filter function only applies to the current source file
2223 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2224 *
2225 * The datasv parameter (which may be NULL) can be used to pass
2226 * private data to this instance of the filter. The filter function
2227 * can recover the SV using the FILTER_DATA macro and use it to
2228 * store private buffers and state information.
2229 *
2230 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2231 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2232 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2233 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2234 * private use must be set using malloc'd pointers.
2235 */
16d20bd9
AD
2236
2237SV *
864dbfa3 2238Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2239{
97aff369 2240 dVAR;
f4c556ac 2241 if (!funcp)
a0714e2c 2242 return NULL;
f4c556ac 2243
3280af22
NIS
2244 if (!PL_rsfp_filters)
2245 PL_rsfp_filters = newAV();
16d20bd9 2246 if (!datasv)
561b68a9 2247 datasv = newSV(0);
862a34c6 2248 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2249 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2250 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2251 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2252 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2253 av_unshift(PL_rsfp_filters, 1);
2254 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2255 return(datasv);
2256}
4e553d73 2257
16d20bd9
AD
2258
2259/* Delete most recently added instance of this filter function. */
a0d0e21e 2260void
864dbfa3 2261Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2262{
97aff369 2263 dVAR;
e0c19803 2264 SV *datasv;
24801a4b 2265
33073adb 2266#ifdef DEBUGGING
8141890a 2267 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2268#endif
3280af22 2269 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2270 return;
2271 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2272 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2273 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2274 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2275 IoANY(datasv) = (void *)NULL;
3280af22 2276 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2277
16d20bd9
AD
2278 return;
2279 }
2280 /* we need to search for the correct entry and clear it */
cea2e8a9 2281 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2282}
2283
2284
1de9afcd
RGS
2285/* Invoke the idxth filter function for the current rsfp. */
2286/* maxlen 0 = read one text line */
16d20bd9 2287I32
864dbfa3 2288Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2289{
97aff369 2290 dVAR;
16d20bd9
AD
2291 filter_t funcp;
2292 SV *datasv = NULL;
e50aee73 2293
3280af22 2294 if (!PL_rsfp_filters)
16d20bd9 2295 return -1;
1de9afcd 2296 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2297 /* Provide a default input filter to make life easy. */
2298 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2299 DEBUG_P(PerlIO_printf(Perl_debug_log,
2300 "filter_read %d: from rsfp\n", idx));
4e553d73 2301 if (maxlen) {
16d20bd9
AD
2302 /* Want a block */
2303 int len ;
f54cb97a 2304 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2305
2306 /* ensure buf_sv is large enough */
eb160463 2307 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2308 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2309 if (PerlIO_error(PL_rsfp))
37120919
AD
2310 return -1; /* error */
2311 else
2312 return 0 ; /* end of file */
2313 }
16d20bd9
AD
2314 SvCUR_set(buf_sv, old_len + len) ;
2315 } else {
2316 /* Want a line */
3280af22
NIS
2317 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2318 if (PerlIO_error(PL_rsfp))
37120919
AD
2319 return -1; /* error */
2320 else
2321 return 0 ; /* end of file */
2322 }
16d20bd9
AD
2323 }
2324 return SvCUR(buf_sv);
2325 }
2326 /* Skip this filter slot if filter has been deleted */
1de9afcd 2327 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2328 DEBUG_P(PerlIO_printf(Perl_debug_log,
2329 "filter_read %d: skipped (filter deleted)\n",
2330 idx));
16d20bd9
AD
2331 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2332 }
2333 /* Get function pointer hidden within datasv */
8141890a 2334 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2335 DEBUG_P(PerlIO_printf(Perl_debug_log,
2336 "filter_read %d: via function %p (%s)\n",
cfd0369c 2337 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2338 /* Call function. The function is expected to */
2339 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2340 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2341 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2342}
2343
76e3520e 2344STATIC char *
cea2e8a9 2345S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2346{
97aff369 2347 dVAR;
c39cd008 2348#ifdef PERL_CR_FILTER
3280af22 2349 if (!PL_rsfp_filters) {
c39cd008 2350 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2351 }
2352#endif
3280af22 2353 if (PL_rsfp_filters) {
55497cff
PP
2354 if (!append)
2355 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2356 if (FILTER_READ(0, sv, 0) > 0)
2357 return ( SvPVX(sv) ) ;
2358 else
bd61b366 2359 return NULL ;
16d20bd9 2360 }
9d116dd7 2361 else
fd049845 2362 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2363}
2364
01ec43d0 2365STATIC HV *
7fc63493 2366S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2367{
97aff369 2368 dVAR;
def3634b
GS
2369 GV *gv;
2370
01ec43d0 2371 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2372 return PL_curstash;
2373
2374 if (len > 2 &&
2375 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2376 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2377 {
2378 return GvHV(gv); /* Foo:: */
def3634b
GS
2379 }
2380
2381 /* use constant CLASS => 'MyClass' */
90e5519e 2382 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2383 SV *sv;
2384 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2385 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2386 }
2387 }
2388
2389 return gv_stashpv(pkgname, FALSE);
2390}
a0d0e21e 2391
468aa647 2392STATIC char *
cc6ed77d 2393S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2394 dVAR;
468aa647
RGS
2395 if (PL_expect != XSTATE)
2396 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2397 is_use ? "use" : "no"));
2398 s = skipspace(s);
2399 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2400 s = force_version(s, TRUE);
2401 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2402 PL_nextval[PL_nexttoke].opval = Nullop;
2403 force_next(WORD);
2404 }
2405 else if (*s == 'v') {
2406 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2407 s = force_version(s, FALSE);
2408 }
2409 }
2410 else {
2411 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2412 s = force_version(s, FALSE);
2413 }
2414 yylval.ival = is_use;
2415 return s;
2416}
748a9306 2417#ifdef DEBUGGING
27da23d5 2418 static const char* const exp_name[] =
09bef843 2419 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2420 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2421 };
748a9306 2422#endif
463ee0b2 2423
02aa26ce
NT
2424/*
2425 yylex
2426
2427 Works out what to call the token just pulled out of the input
2428 stream. The yacc parser takes care of taking the ops we return and
2429 stitching them into a tree.
2430
2431 Returns:
2432 PRIVATEREF
2433
2434 Structure:
2435 if read an identifier
2436 if we're in a my declaration
2437 croak if they tried to say my($foo::bar)
2438 build the ops for a my() declaration
2439 if it's an access to a my() variable
2440 are we in a sort block?
2441 croak if my($a); $a <=> $b
2442 build ops for access to a my() variable
2443 if in a dq string, and they've said @foo and we can't find @foo
2444 croak
2445 build ops for a bareword
2446 if we already built the token before, use it.
2447*/
2448
20141f0e 2449
dba4d153
JH
2450#ifdef __SC__
2451#pragma segment Perl_yylex
2452#endif
dba4d153 2453int
dba4d153 2454Perl_yylex(pTHX)
20141f0e 2455{
97aff369 2456 dVAR;
3afc138a 2457 register char *s = PL_bufptr;
378cc40b 2458 register char *d;
463ee0b2 2459 STRLEN len;
aa7440fb 2460 bool bof = FALSE;
a687059c 2461
bbf60fe6 2462 DEBUG_T( {
396482e1 2463 SV* tmp = newSVpvs("");
b6007c36
DM
2464 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2465 (IV)CopLINE(PL_curcop),
2466 lex_state_names[PL_lex_state],
2467 exp_name[PL_expect],
2468 pv_display(tmp, s, strlen(s), 0, 60));
2469 SvREFCNT_dec(tmp);
bbf60fe6 2470 } );
02aa26ce 2471 /* check if there's an identifier for us to look at */
ba979b31 2472 if (PL_pending_ident)
bbf60fe6 2473 return REPORT(S_pending_ident(aTHX));
bbce6d69 2474
02aa26ce
NT
2475 /* no identifier pending identification */
2476
3280af22 2477 switch (PL_lex_state) {
79072805
LW
2478#ifdef COMMENTARY
2479 case LEX_NORMAL: /* Some compilers will produce faster */
2480 case LEX_INTERPNORMAL: /* code if we comment these out. */
2481 break;
2482#endif
2483
09bef843 2484 /* when we've already built the next token, just pull it out of the queue */
79072805 2485 case LEX_KNOWNEXT:
3280af22
NIS
2486 PL_nexttoke--;
2487 yylval = PL_nextval[PL_nexttoke];
2488 if (!PL_nexttoke) {
2489 PL_lex_state = PL_lex_defer;
2490 PL_expect = PL_lex_expect;
2491 PL_lex_defer = LEX_NORMAL;
463ee0b2 2492 }
bbf60fe6 2493 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2494
02aa26ce 2495 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2496 when we get here, PL_bufptr is at the \
02aa26ce 2497 */
79072805
LW
2498 case LEX_INTERPCASEMOD:
2499#ifdef DEBUGGING
3280af22 2500 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2501 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2502#endif
02aa26ce 2503 /* handle \E or end of string */
3280af22 2504 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2505 /* if at a \E */
3280af22 2506 if (PL_lex_casemods) {
f54cb97a 2507 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2508 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2509
3792a11b
NC
2510 if (PL_bufptr != PL_bufend
2511 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2512 PL_bufptr += 2;
2513 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2514 }
bbf60fe6 2515 return REPORT(')');
79072805 2516 }
3280af22
NIS
2517 if (PL_bufptr != PL_bufend)
2518 PL_bufptr += 2;
2519 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2520 return yylex();
79072805
LW
2521 }
2522 else {
607df283 2523 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2524 "### Saw case modifier\n"); });
3280af22 2525 s = PL_bufptr + 1;
6e909404
JH
2526 if (s[1] == '\\' && s[2] == 'E') {
2527 PL_bufptr = s + 3;
2528 PL_lex_state = LEX_INTERPCONCAT;
2529 return yylex();
a0d0e21e 2530 }
6e909404 2531 else {
90771dc0 2532 I32 tmp;
6e909404
JH
2533 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2534 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2535 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2536 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2537 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2538 return REPORT(')');
6e909404
JH
2539 }
2540 if (PL_lex_casemods > 10)
2541 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2542 PL_lex_casestack[PL_lex_casemods++] = *s;
2543 PL_lex_casestack[PL_lex_casemods] = '\0';
2544 PL_lex_state = LEX_INTERPCONCAT;
2545 PL_nextval[PL_nexttoke].ival = 0;
2546 force_next('(');
2547 if (*s == 'l')
2548 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2549 else if (*s == 'u')
2550 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2551 else if (*s == 'L')
2552 PL_nextval[PL_nexttoke].ival = OP_LC;
2553 else if (*s == 'U')
2554 PL_nextval[PL_nexttoke].ival = OP_UC;
2555 else if (*s == 'Q')
2556 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2557 else
2558 Perl_croak(aTHX_ "panic: yylex");
2559 PL_bufptr = s + 1;
a0d0e21e 2560 }
79072805 2561 force_next(FUNC);
3280af22
NIS
2562 if (PL_lex_starts) {
2563 s = PL_bufptr;
2564 PL_lex_starts = 0;
131b3ad0
DM
2565 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2566 if (PL_lex_casemods == 1 && PL_lex_inpat)
2567 OPERATOR(',');
2568 else
2569 Aop(OP_CONCAT);
79072805
LW
2570 }
2571 else
cea2e8a9 2572 return yylex();
79072805
LW
2573 }
2574
55497cff 2575 case LEX_INTERPPUSH:
bbf60fe6 2576 return REPORT(sublex_push());
55497cff 2577
79072805 2578 case LEX_INTERPSTART:
3280af22 2579 if (PL_bufptr == PL_bufend)
bbf60fe6 2580 return REPORT(sublex_done());
607df283 2581 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2582 "### Interpolated variable\n"); });
3280af22
NIS
2583 PL_expect = XTERM;
2584 PL_lex_dojoin = (*PL_bufptr == '@');
2585 PL_lex_state = LEX_INTERPNORMAL;
2586 if (PL_lex_dojoin) {
2587 PL_nextval[PL_nexttoke].ival = 0;
79072805 2588 force_next(',');
a0d0e21e 2589 force_ident("\"", '$');
3280af22 2590 PL_nextval[PL_nexttoke].ival = 0;
79072805 2591 force_next('$');
3280af22 2592 PL_nextval[PL_nexttoke].ival = 0;
79072805 2593 force_next('(');
3280af22 2594 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2595 force_next(FUNC);
2596 }
3280af22
NIS
2597 if (PL_lex_starts++) {
2598 s = PL_bufptr;
131b3ad0
DM
2599 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2600 if (!PL_lex_casemods && PL_lex_inpat)
2601 OPERATOR(',');
2602 else
2603 Aop(OP_CONCAT);
79072805 2604 }
cea2e8a9 2605 return yylex();
79072805
LW
2606
2607 case LEX_INTERPENDMAYBE:
3280af22
NIS
2608 if (intuit_more(PL_bufptr)) {
2609 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2610 break;
2611 }
2612 /* FALL THROUGH */
2613
2614 case LEX_INTERPEND:
3280af22
NIS
2615 if (PL_lex_dojoin) {
2616 PL_lex_dojoin = FALSE;
2617 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2618 return REPORT(')');
79072805 2619 }
43a16006 2620 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2621 && SvEVALED(PL_lex_repl))
43a16006 2622 {
e9fa98b2 2623 if (PL_bufptr != PL_bufend)
cea2e8a9 2624 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 2625 PL_lex_repl = NULL;
e9fa98b2 2626 }
79072805
LW
2627 /* FALLTHROUGH */
2628 case LEX_INTERPCONCAT:
2629#ifdef DEBUGGING
3280af22 2630 if (PL_lex_brackets)
cea2e8a9 2631 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2632#endif
3280af22 2633 if (PL_bufptr == PL_bufend)
bbf60fe6 2634 return REPORT(sublex_done());
79072805 2635
3280af22
NIS
2636 if (SvIVX(PL_linestr) == '\'') {
2637 SV *sv = newSVsv(PL_linestr);
2638 if (!PL_lex_inpat)
76e3520e 2639 sv = tokeq(sv);
3280af22 2640 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2641 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2642 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2643 s = PL_bufend;
79072805
LW
2644 }
2645 else {
3280af22 2646 s = scan_const(PL_bufptr);
79072805 2647 if (*s == '\\')
3280af22 2648 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2649 else
3280af22 2650 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2651 }
2652
3280af22
NIS
2653 if (s != PL_bufptr) {
2654 PL_nextval[PL_nexttoke] = yylval;
2655 PL_expect = XTERM;
79072805 2656 force_next(THING);
131b3ad0
DM
2657 if (PL_lex_starts++) {
2658 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2659 if (!PL_lex_casemods && PL_lex_inpat)
2660 OPERATOR(',');
2661 else
2662 Aop(OP_CONCAT);
2663 }
79072805 2664 else {
3280af22 2665 PL_bufptr = s;
cea2e8a9 2666 return yylex();
79072805
LW
2667 }
2668 }
2669
cea2e8a9 2670 return yylex();
a0d0e21e 2671 case LEX_FORMLINE:
3280af22
NIS
2672 PL_lex_state = LEX_NORMAL;
2673 s = scan_formline(PL_bufptr);
2674 if (!PL_lex_formbrack)
a0d0e21e
LW
2675 goto rightbracket;
2676 OPERATOR(';');
79072805
LW
2677 }
2678
3280af22
NIS
2679 s = PL_bufptr;
2680 PL_oldoldbufptr = PL_oldbufptr;
2681 PL_oldbufptr = s;
463ee0b2
LW
2682
2683 retry:
378cc40b
LW
2684 switch (*s) {
2685 default:
7e2040f0 2686 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2687 goto keylookup;
cea2e8a9 2688 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2689 case 4:
2690 case 26:
2691 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2692 case 0:
3280af22
NIS
2693 if (!PL_rsfp) {
2694 PL_last_uni = 0;
2695 PL_last_lop = 0;
c5ee2135 2696 if (PL_lex_brackets) {
0bd48802
AL
2697 yyerror(PL_lex_formbrack
2698 ? "Format not terminated"
2699 : "Missing right curly or square bracket");
c5ee2135 2700 }
4e553d73 2701 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2702 "### Tokener got EOF\n");
5f80b19c 2703 } );
79072805 2704 TOKEN(0);
463ee0b2 2705 }
3280af22 2706 if (s++ < PL_bufend)
a687059c 2707 goto retry; /* ignore stray nulls */
3280af22
NIS
2708 PL_last_uni = 0;
2709 PL_last_lop = 0;
2710 if (!PL_in_eval && !PL_preambled) {
2711 PL_preambled = TRUE;
2712 sv_setpv(PL_linestr,incl_perldb());
2713 if (SvCUR(PL_linestr))
396482e1 2714 sv_catpvs(PL_linestr,";");
3280af22
NIS
2715 if (PL_preambleav){
2716 while(AvFILLp(PL_preambleav) >= 0) {
2717 SV *tmpsv = av_shift(PL_preambleav);
2718 sv_catsv(PL_linestr, tmpsv);
396482e1 2719 sv_catpvs(PL_linestr, ";");
91b7def8
PP
2720 sv_free(tmpsv);
2721 }
3280af22
NIS
2722 sv_free((SV*)PL_preambleav);
2723 PL_preambleav = NULL;
91b7def8 2724 }
3280af22 2725 if (PL_minus_n || PL_minus_p) {
396482e1 2726 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2727 if (PL_minus_l)
396482e1 2728 sv_catpvs(PL_linestr,"chomp;");
3280af22 2729 if (PL_minus_a) {
3280af22 2730 if (PL_minus_F) {
3792a11b
NC
2731 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2732 || *PL_splitstr == '"')
3280af22 2733 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2734 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2735 else {
c8ef6a4b
NC
2736 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2737 bytes can be used as quoting characters. :-) */
dd374669 2738 const char *splits = PL_splitstr;
91d456ae 2739 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2740 do {
2741 /* Need to \ \s */
dd374669
AL
2742 if (*splits == '\\')
2743 sv_catpvn(PL_linestr, splits, 1);
2744 sv_catpvn(PL_linestr, splits, 1);
2745 } while (*splits++);
48c4c863
NC
2746 /* This loop will embed the trailing NUL of
2747 PL_linestr as the last thing it does before
2748 terminating. */
396482e1 2749 sv_catpvs(PL_linestr, ");");
54310121 2750 }
2304df62
AD
2751 }
2752 else
396482e1 2753 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2754 }
79072805 2755 }
bc9b29db 2756 if (PL_minus_E)
396482e1
GA
2757 sv_catpvs(PL_linestr,"use feature ':5.10';");
2758 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2759 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2760 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2761 PL_last_lop = PL_last_uni = NULL;
3280af22 2762 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2763 SV * const sv = newSV(0);
a0d0e21e
LW
2764
2765 sv_upgrade(sv, SVt_PVMG);
3280af22 2766 sv_setsv(sv,PL_linestr);
0ac0412a 2767 (void)SvIOK_on(sv);
45977657 2768 SvIV_set(sv, 0);
36c7798d 2769 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2770 }
79072805 2771 goto retry;
a687059c 2772 }
e929a76b 2773 do {
aa7440fb 2774 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 2775 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af
JH
2776 fake_eof:
2777 if (PL_rsfp) {
2778 if (PL_preprocess && !PL_in_eval)
2779 (void)PerlProc_pclose(PL_rsfp);
2780 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2781 PerlIO_clearerr(PL_rsfp);
2782 else
2783 (void)PerlIO_close(PL_rsfp);
2784 PL_rsfp = Nullfp;
2785 PL_doextract = FALSE;
2786 }
2787 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2788 sv_setpv(PL_linestr,PL_minus_p
2789 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2790 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2791 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2792 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
2793 PL_minus_n = PL_minus_p = 0;
2794 goto retry;
2795 }
2796 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 2797 PL_last_lop = PL_last_uni = NULL;
c69006e4 2798 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2799 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2800 }
7aa207d6
JH
2801 /* If it looks like the start of a BOM or raw UTF-16,
2802 * check if it in fact is. */
2803 else if (bof &&
2804 (*s == 0 ||
2805 *(U8*)s == 0xEF ||
2806 *(U8*)s >= 0xFE ||
2807 s[1] == 0)) {
226017aa 2808#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2809# ifdef __GNU_LIBRARY__
2810# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2811# define FTELL_FOR_PIPE_IS_BROKEN
2812# endif
e3f494f1
JH
2813# else
2814# ifdef __GLIBC__
2815# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2816# define FTELL_FOR_PIPE_IS_BROKEN
2817# endif
2818# endif
226017aa
DD
2819# endif
2820#endif
2821#ifdef FTELL_FOR_PIPE_IS_BROKEN
2822 /* This loses the possibility to detect the bof
2823 * situation on perl -P when the libc5 is being used.
2824 * Workaround? Maybe attach some extra state to PL_rsfp?
2825 */
2826 if (!PL_preprocess)
7e28d3af 2827 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2828#else
eb160463 2829 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2830#endif
7e28d3af 2831 if (bof) {
3280af22 2832 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2833 s = swallow_bom((U8*)s);
e929a76b 2834 }
378cc40b 2835 }
3280af22 2836 if (PL_doextract) {
a0d0e21e
LW
2837 /* Incest with pod. */
2838 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2839 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2840 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2841 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2842 PL_last_lop = PL_last_uni = NULL;
3280af22 2843 PL_doextract = FALSE;
a0d0e21e 2844 }
4e553d73 2845 }
463ee0b2 2846 incline(s);
3280af22
NIS
2847 } while (PL_doextract);
2848 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2849 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2850 SV * const sv = newSV(0);
a687059c 2851
93a17b20 2852 sv_upgrade(sv, SVt_PVMG);
3280af22 2853 sv_setsv(sv,PL_linestr);
0ac0412a 2854 (void)SvIOK_on(sv);
45977657 2855 SvIV_set(sv, 0);
36c7798d 2856 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2857 }
3280af22 2858 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 2859 PL_last_lop = PL_last_uni = NULL;
57843af0 2860 if (CopLINE(PL_curcop) == 1) {
3280af22 2861 while (s < PL_bufend && isSPACE(*s))
79072805 2862 s++;
a0d0e21e 2863 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2864 s++;
bd61b366 2865 d = NULL;
3280af22 2866 if (!PL_in_eval) {
44a8e56a
PP
2867 if (*s == '#' && *(s+1) == '!')
2868 d = s + 2;
2869#ifdef ALTERNATE_SHEBANG
2870 else {
bfed75c6 2871 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2872 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2873 d = s + (sizeof(as) - 1);
2874 }
2875#endif /* ALTERNATE_SHEBANG */
2876 }
2877 if (d) {
b8378b72 2878 char *ipath;
774d564b 2879 char *ipathend;
b8378b72 2880
774d564b 2881 while (isSPACE(*d))
b8378b72
CS
2882 d++;
2883 ipath = d;
774d564b
PP
2884 while (*d && !isSPACE(*d))
2885 d++;
2886 ipathend = d;
2887
2888#ifdef ARG_ZERO_IS_SCRIPT
2889 if (ipathend > ipath) {
2890 /*
2891 * HP-UX (at least) sets argv[0] to the script name,
2892 * which makes $^X incorrect. And Digital UNIX and Linux,
2893 * at least, set argv[0] to the basename of the Perl
2894 * interpreter. So, having found "#!", we'll set it right.
2895 */
fafc274c
NC
2896 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
2897 SVt_PV)); /* $^X */
774d564b 2898 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2899 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2900 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2901 SvSETMAGIC(x);
2902 }
556c1dec
JH
2903 else {
2904 STRLEN blen;
2905 STRLEN llen;
cfd0369c 2906 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2907 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2908 if (llen < blen) {
2909 bstart += blen - llen;
2910 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2911 sv_setpvn(x, ipath, ipathend - ipath);
2912 SvSETMAGIC(x);
2913 }
2914 }
2915 }
774d564b 2916 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2917 }
774d564b 2918#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2919
2920 /*
2921 * Look for options.
2922 */
748a9306 2923 d = instr(s,"perl -");
84e30d1a 2924 if (!d) {
748a9306 2925 d = instr(s,"perl");
84e30d1a
GS
2926#if defined(DOSISH)
2927 /* avoid getting into infinite loops when shebang
2928 * line contains "Perl" rather than "perl" */
2929 if (!d) {
2930 for (d = ipathend-4; d >= ipath; --d) {
2931 if ((*d == 'p' || *d == 'P')
2932 && !ibcmp(d, "perl", 4))
2933 {
2934 break;
2935 }
2936 }
2937 if (d < ipath)
bd61b366 2938 d = NULL;
84e30d1a
GS
2939 }
2940#endif
2941 }
44a8e56a
PP
2942#ifdef ALTERNATE_SHEBANG
2943 /*
2944 * If the ALTERNATE_SHEBANG on this system starts with a
2945 * character that can be part of a Perl expression, then if
2946 * we see it but not "perl", we're probably looking at the
2947 * start of Perl code, not a request to hand off to some
2948 * other interpreter. Similarly, if "perl" is there, but
2949 * not in the first 'word' of the line, we assume the line
2950 * contains the start of the Perl program.
44a8e56a
PP
2951 */
2952 if (d && *s != '#') {
f54cb97a 2953 const char *c = ipath;
44a8e56a
PP
2954 while (*c && !strchr("; \t\r\n\f\v#", *c))
2955 c++;
2956 if (c < d)
bd61b366 2957 d = NULL; /* "perl" not in first word; ignore */
44a8e56a
PP
2958 else
2959 *s = '#'; /* Don't try to parse shebang line */
2960 }
774d564b 2961#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2962#ifndef MACOS_TRADITIONAL
748a9306 2963 if (!d &&
44a8e56a 2964 *s == '#' &&
774d564b 2965 ipathend > ipath &&
3280af22 2966 !PL_minus_c &&
748a9306 2967 !instr(s,"indir") &&
3280af22 2968 instr(PL_origargv[0],"perl"))
748a9306 2969 {
27da23d5 2970 dVAR;
9f68db38 2971 char **newargv;
9f68db38 2972
774d564b
PP
2973 *ipathend = '\0';
2974 s = ipathend + 1;
3280af22 2975 while (s < PL_bufend && isSPACE(*s))
9f68db38 2976 s++;
3280af22 2977 if (s < PL_bufend) {
a02a5408 2978 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2979 newargv[1] = s;
3280af22 2980 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2981 s++;
2982 *s = '\0';
3280af22 2983 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2984 }
2985 else
3280af22 2986 newargv = PL_origargv;
774d564b 2987 newargv[0] = ipath;
b35112e7 2988 PERL_FPU_PRE_EXEC
b4748376 2989 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2990 PERL_FPU_POST_EXEC
cea2e8a9 2991 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2992 }
bf4acbe4 2993#endif
748a9306 2994 if (d) {
748a9306 2995 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2996 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2997
2998 if (*d++ == '-') {
f54cb97a 2999 const bool switches_done = PL_doswitches;
fb993905
GA
3000 const U32 oldpdb = PL_perldb;
3001 const bool oldn = PL_minus_n;
3002 const bool oldp = PL_minus_p;
3003
8cc95fdb 3004 do {
3ffe3ee4 3005 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 3006 const char * const m = d;
8cc95fdb 3007 while (*d && !isSPACE(*d)) d++;
cea2e8a9 3008 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
3009 (int)(d - m), m);
3010 }
97bd5664 3011 d = moreswitches(d);
8cc95fdb 3012 } while (d);
f0b2cf55
YST
3013 if (PL_doswitches && !switches_done) {
3014 int argc = PL_origargc;
3015 char **argv = PL_origargv;
3016 do {
3017 argc--,argv++;
3018 } while (argc && argv[0][0] == '-' && argv[0][1]);
3019 init_argv_symbols(argc,argv);
3020 }
155aba94
GS
3021 if ((PERLDB_LINE && !oldpdb) ||
3022 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
3023 /* if we have already added "LINE: while (<>) {",
3024 we must not do it again */
748a9306 3025 {
c69006e4 3026 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3027 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3028 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3029 PL_last_lop = PL_last_uni = NULL;
3280af22 3030 PL_preambled = FALSE;
84902520 3031 if (PERLDB_LINE)
3280af22 3032 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3033 goto retry;
3034 }
a0d0e21e 3035 }
79072805 3036 }
9f68db38 3037 }
79072805 3038 }
3280af22
NIS
3039 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3040 PL_bufptr = s;
3041 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3042 return yylex();
ae986130 3043 }
378cc40b 3044 goto retry;
4fdae800 3045 case '\r':
6a27c188 3046#ifdef PERL_STRICT_CR
cea2e8a9 3047 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3048 Perl_croak(aTHX_
cc507455 3049 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3050#endif
4fdae800 3051 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3052#ifdef MACOS_TRADITIONAL
3053 case '\312':
3054#endif
378cc40b
LW
3055 s++;
3056 goto retry;
378cc40b 3057 case '#':
e929a76b 3058 case '\n':
3280af22 3059 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3060 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3061 /* handle eval qq[#line 1 "foo"\n ...] */
3062 CopLINE_dec(PL_curcop);
3063 incline(s);
3064 }
3280af22 3065 d = PL_bufend;
a687059c 3066 while (s < d && *s != '\n')
378cc40b 3067 s++;
0f85fab0 3068 if (s < d)
378cc40b 3069 s++;
78c267c1 3070 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3071 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3072 incline(s);
3280af22
NIS
3073 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3074 PL_bufptr = s;
3075 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3076 return yylex();
a687059c 3077 }
378cc40b 3078 }
a687059c 3079 else {
378cc40b 3080 *s = '\0';
3280af22 3081 PL_bufend = s;
a687059c 3082 }
378cc40b
LW
3083 goto retry;
3084 case '-':
79072805 3085 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3086 I32 ftst = 0;
90771dc0 3087 char tmp;
e5edeb50 3088
378cc40b 3089 s++;
3280af22 3090 PL_bufptr = s;
748a9306
LW
3091 tmp = *s++;
3092
bf4acbe4 3093 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3094 s++;
3095
3096 if (strnEQ(s,"=>",2)) {
3280af22 3097 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3098 DEBUG_T( { S_printbuf(aTHX_
3099 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3100 } );
748a9306
LW
3101 OPERATOR('-'); /* unary minus */
3102 }
3280af22 3103 PL_last_uni = PL_oldbufptr;
748a9306 3104 switch (tmp) {
e5edeb50
JH
3105 case 'r': ftst = OP_FTEREAD; break;
3106 case 'w': ftst = OP_FTEWRITE; break;
3107 case 'x': ftst = OP_FTEEXEC; break;
3108 case 'o': ftst = OP_FTEOWNED; break;
3109 case 'R': ftst = OP_FTRREAD; break;
3110 case 'W': ftst = OP_FTRWRITE; break;
3111 case 'X': ftst = OP_FTREXEC; break;
3112 case 'O': ftst = OP_FTROWNED; break;
3113 case 'e': ftst = OP_FTIS; break;
3114 case 'z': ftst = OP_FTZERO; break;
3115 case 's': ftst = OP_FTSIZE; break;
3116 case 'f': ftst = OP_FTFILE; break;
3117 case 'd': ftst = OP_FTDIR; break;
3118 case 'l': ftst = OP_FTLINK; break;
3119 case 'p': ftst = OP_FTPIPE; break;
3120 case 'S': ftst = OP_FTSOCK; break;
3121 case 'u': ftst = OP_FTSUID; break;
3122 case 'g': ftst = OP_FTSGID; break;
3123 case 'k': ftst = OP_FTSVTX; break;
3124 case 'b': ftst = OP_FTBLK; break;
3125 case 'c': ftst = OP_FTCHR; break;
3126 case 't': ftst = OP_FTTTY; break;
3127 case 'T': ftst = OP_FTTEXT; break;
3128 case 'B': ftst = OP_FTBINARY; break;
3129 case 'M': case 'A': case 'C':
fafc274c 3130 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
3131 switch (tmp) {
3132 case 'M': ftst = OP_FTMTIME; break;
3133 case 'A': ftst = OP_FTATIME; break;
3134 case 'C': ftst = OP_FTCTIME; break;
3135 default: break;
3136 }
3137 break;
378cc40b 3138 default:
378cc40b
LW
3139 break;
3140 }
e5edeb50 3141 if (ftst) {
eb160463 3142 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3143 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3144 "### Saw file test %c\n", (int)tmp);
5f80b19c 3145 } );
e5edeb50
JH
3146 FTST(ftst);
3147 }
3148 else {
3149 /* Assume it was a minus followed by a one-letter named
3150 * subroutine call (or a -bareword), then. */
95c31fe3 3151 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3152 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3153 (int) tmp);
5f80b19c 3154 } );
3cf7b4c4 3155 s = --PL_bufptr;
e5edeb50 3156 }
378cc40b 3157 }
90771dc0
NC
3158 {
3159 const char tmp = *s++;
3160 if (*s == tmp) {
3161 s++;
3162 if (PL_expect == XOPERATOR)
3163 TERM(POSTDEC);
3164 else
3165 OPERATOR(PREDEC);
3166 }
3167 else if (*s == '>') {
3168 s++;
3169 s = skipspace(s);
3170 if (isIDFIRST_lazy_if(s,UTF)) {
3171 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3172 TOKEN(ARROW);
3173 }
3174 else if (*s == '$')
3175 OPERATOR(ARROW);
3176 else
3177 TERM(ARROW);
3178 }
3280af22 3179 if (PL_expect == XOPERATOR)
90771dc0
NC
3180 Aop(OP_SUBTRACT);
3181 else {
3182 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3183 check_uni();
3184 OPERATOR('-'); /* unary minus */
79072805 3185 }
2f3197b3 3186 }
79072805 3187
378cc40b 3188 case '+':
90771dc0
NC
3189 {
3190 const char tmp = *s++;
3191 if (*s == tmp) {
3192 s++;
3193 if (PL_expect == XOPERATOR)
3194 TERM(POSTINC);
3195 else
3196 OPERATOR(PREINC);
3197 }
3280af22 3198 if (PL_expect == XOPERATOR)
90771dc0
NC
3199 Aop(OP_ADD);
3200 else {
3201 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3202 check_uni();
3203 OPERATOR('+');
3204 }
2f3197b3 3205 }
a687059c 3206
378cc40b 3207 case '*':
3280af22
NIS
3208 if (PL_expect != XOPERATOR) {
3209 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3210 PL_expect = XOPERATOR;
3211 force_ident(PL_tokenbuf, '*');
3212 if (!*PL_tokenbuf)
a0d0e21e 3213 PREREF('*');
79072805 3214 TERM('*');
a687059c 3215 }
79072805
LW
3216 s++;
3217 if (*s == '*') {
a687059c 3218 s++;
79072805 3219 PWop(OP_POW);
a687059c 3220 }
79072805
LW
3221 Mop(OP_MULTIPLY);
3222
378cc40b 3223 case '%':
3280af22 3224 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3225 ++s;
3226 Mop(OP_MODULO);
a687059c 3227 }
3280af22
NIS
3228 PL_tokenbuf[0] = '%';
3229 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3230 if (!PL_tokenbuf[1]) {
bbce6d69 3231 PREREF('%');
a687059c 3232 }
3280af22 3233 PL_pending_ident = '%';
bbce6d69 3234 TERM('%');
a687059c 3235
378cc40b 3236 case '^':
79072805 3237 s++;
a0d0e21e 3238 BOop(OP_BIT_XOR);
79072805 3239 case '[':
3280af22 3240 PL_lex_brackets++;
79072805 3241 /* FALL THROUGH */
378cc40b 3242 case '~':
0d863452
RH
3243 if (s[1] == '~'
3244 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3245 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3246 {
3247 s += 2;
3248 Eop(OP_SMARTMATCH);
3249 }
378cc40b 3250 case ',':
90771dc0
NC
3251 {
3252 const char tmp = *s++;
3253 OPERATOR(tmp);
3254 }
a0d0e21e
LW
3255 case ':':
3256 if (s[1] == ':') {
3257 len = 0;
0bfa2a8a 3258 goto just_a_word_zero_gv;
a0d0e21e
LW
3259 }
3260 s++;
09bef843
SB
3261 switch (PL_expect) {
3262 OP *attrs;
3263 case XOPERATOR:
3264 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3265 break;
3266 PL_bufptr = s; /* update in case we back off */
3267 goto grabattrs;
3268 case XATTRBLOCK:
3269 PL_expect = XBLOCK;
3270 goto grabattrs;
3271 case XATTRTERM:
3272 PL_expect = XTERMBLOCK;
3273 grabattrs:
3274 s = skipspace(s);
3275 attrs = Nullop;
7e2040f0 3276 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3277 I32 tmp;
09bef843 3278 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3279 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3280 if (tmp < 0) tmp = -tmp;
3281 switch (tmp) {
3282 case KEY_or:
3283 case KEY_and:
c963b151 3284 case KEY_err:
f9829d6b
GS
3285 case KEY_for:
3286 case KEY_unless:
3287 case KEY_if:
3288 case KEY_while:
3289 case KEY_until:
3290 goto got_attrs;
3291 default:
3292 break;
3293 }
3294 }
09bef843
SB
3295 if (*d == '(') {
3296 d = scan_str(d,TRUE,TRUE);
3297 if (!d) {
09bef843
SB
3298 /* MUST advance bufptr here to avoid bogus
3299 "at end of line" context messages from yyerror().
3300 */
3301 PL_bufptr = s + len;
3302 yyerror("Unterminated attribute parameter in attribute list");
3303 if (attrs)
3304 op_free(attrs);
bbf60fe6 3305 return REPORT(0); /* EOF indicator */
09bef843
SB
3306 }
3307 }
3308 if (PL_lex_stuff) {
3309 SV *sv = newSVpvn(s, len);
3310 sv_catsv(sv, PL_lex_stuff);
3311 attrs = append_elem(OP_LIST, attrs,
3312 newSVOP(OP_CONST, 0, sv));
3313 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 3314 PL_lex_stuff = NULL;
09bef843
SB
3315 }
3316 else {
371fce9b
DM
3317 if (len == 6 && strnEQ(s, "unique", len)) {
3318 if (PL_in_my == KEY_our)
3319#ifdef USE_ITHREADS
3320 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3321#else
3322 ; /* skip to avoid loading attributes.pm */
3323#endif
bfed75c6 3324 else
371fce9b
DM
3325 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3326 }
3327
d3cea301
SB
3328 /* NOTE: any CV attrs applied here need to be part of
3329 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3330 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3331 CvLVALUE_on(PL_compcv);
3332 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3333 CvLOCKED_on(PL_compcv);
3334 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3335 CvMETHOD_on(PL_compcv);
06492da6
SF
3336 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3337 CvASSERTION_on(PL_compcv);
78f9721b
SM
3338 /* After we've set the flags, it could be argued that
3339 we don't need to do the attributes.pm-based setting
3340 process, and shouldn't bother appending recognized
d3cea301
SB
3341 flags. To experiment with that, uncomment the
3342 following "else". (Note that's already been
3343 uncommented. That keeps the above-applied built-in
3344 attributes from being intercepted (and possibly
3345 rejected) by a package's attribute routines, but is
3346 justified by the performance win for the common case
3347 of applying only built-in attributes.) */
0256094b 3348 else
78f9721b
SM
3349 attrs = append_elem(OP_LIST, attrs,
3350 newSVOP(OP_CONST, 0,
3351 newSVpvn(s, len)));
09bef843
SB
3352 }
3353 s = skipspace(d);
0120eecf 3354 if (*s == ':' && s[1] != ':')
09bef843 3355 s = skipspace(s+1);
0120eecf
GS
3356 else if (s == d)
3357 break; /* require real whitespace or :'s */
09bef843 3358 }
90771dc0
NC
3359 {
3360 const char tmp
3361 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3362 if (*s != ';' && *s != '}' && *s != tmp
3363 && (tmp != '=' || *s != ')')) {
3364 const char q = ((*s == '\'') ? '"' : '\'');
3365 /* If here for an expression, and parsed no attrs, back
3366 off. */
3367 if (tmp == '=' && !attrs) {
3368 s = PL_bufptr;
3369 break;
3370 }
3371 /* MUST advance bufptr here to avoid bogus "at end of line"
3372 context messages from yyerror().
3373 */
3374 PL_bufptr = s;
3375 yyerror( *s
3376 ? Perl_form(aTHX_ "Invalid separator character "
3377 "%c%c%c in attribute list", q, *s, q)
3378 : "Unterminated attribute list" );
3379 if (attrs)
3380 op_free(attrs);
3381 OPERATOR(':');
09bef843 3382 }
09bef843 3383 }
f9829d6b 3384 got_attrs:
09bef843
SB
3385 if (attrs) {
3386 PL_nextval[PL_nexttoke].opval = attrs;
3387 force_next(THING);
3388 }
3389 TOKEN(COLONATTR);
3390 }
a0d0e21e 3391 OPERATOR(':');
8990e307
LW
3392 case '(':
3393 s++;
3280af22
NIS
3394 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3395 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3396 else
3280af22 3397 PL_expect = XTERM;
4a202259 3398 s = skipspace(s);
a0d0e21e 3399 TOKEN('(');
378cc40b 3400 case ';':
f4dd75d9 3401 CLINE;
90771dc0
NC
3402 {
3403 const char tmp = *s++;
3404 OPERATOR(tmp);
3405 }
378cc40b 3406 case ')':
90771dc0
NC
3407 {
3408 const char tmp = *s++;
3409 s = skipspace(s);
3410 if (*s == '{')
3411 PREBLOCK(tmp);
3412 TERM(tmp);
3413 }
79072805
LW
3414 case ']':
3415 s++;
3280af22 3416 if (PL_lex_brackets <= 0)
d98d5fff 3417 yyerror("Unmatched right square bracket");
463ee0b2 3418 else
3280af22
NIS
3419 --PL_lex_brackets;
3420 if (PL_lex_state == LEX_INTERPNORMAL) {
3421 if (PL_lex_brackets == 0) {
a0d0e21e 3422 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3423 PL_lex_state = LEX_INTERPEND;
79072805
LW
3424 }
3425 }
4633a7c4 3426 TERM(']');
79072805
LW
3427 case '{':
3428 leftbracket:
79072805 3429 s++;
3280af22 3430 if (PL_lex_brackets > 100) {
8edd5f42 3431 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3432 }
3280af22 3433 switch (PL_expect) {
a0d0e21e 3434 case XTERM:
3280af22 3435 if (PL_lex_formbrack) {
a0d0e21e
LW
3436 s--;
3437 PRETERMBLOCK(DO);
3438 }
3280af22
NIS
3439 if (PL_oldoldbufptr == PL_last_lop)
3440 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3441 else
3280af22 3442 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3443 OPERATOR(HASHBRACK);
a0d0e21e 3444 case XOPERATOR:
bf4acbe4 3445 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3446 s++;
44a8e56a 3447 d = s;
3280af22
NIS
3448 PL_tokenbuf[0] = '\0';
3449 if (d < PL_bufend && *d == '-') {
3450 PL_tokenbuf[0] = '-';
44a8e56a 3451 d++;
bf4acbe4 3452 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3453 d++;
3454 }
7e2040f0 3455 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3456 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3457 FALSE, &len);
bf4acbe4 3458 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3459 d++;
3460 if (*d == '}') {
f54cb97a 3461 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3462 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3463 if (minus)
3464 force_next('-');
748a9306
LW
3465 }
3466 }
3467 /* FALL THROUGH */
09bef843 3468 case XATTRBLOCK:
748a9306 3469 case XBLOCK:
3280af22
NIS
3470 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3471 PL_expect = XSTATE;
a0d0e21e 3472 break;
09bef843 3473 case XATTRTERM:
a0d0e21e 3474 case XTERMBLOCK:
3280af22
NIS
3475 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3476 PL_expect = XSTATE;
a0d0e21e
LW
3477 break;
3478 default: {
f54cb97a 3479 const char *t;
3280af22
NIS
3480 if (PL_oldoldbufptr == PL_last_lop)
3481 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3482 else
3280af22 3483 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3484 s = skipspace(s);
8452ff4b
SB
3485 if (*s == '}') {
3486 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3487 PL_expect = XTERM;
3488 /* This hack is to get the ${} in the message. */
3489 PL_bufptr = s+1;
3490 yyerror("syntax error");
3491 break;
3492 }
a0d0e21e 3493 OPERATOR(HASHBRACK);
8452ff4b 3494 }
b8a4b1be
GS
3495 /* This hack serves to disambiguate a pair of curlies
3496 * as being a block or an anon hash. Normally, expectation
3497 * determines that, but in cases where we're not in a
3498 * position to expect anything in particular (like inside
3499 * eval"") we have to resolve the ambiguity. This code
3500 * covers the case where the first term in the curlies is a
3501 * quoted string. Most other cases need to be explicitly
a0288114 3502 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3503 * curly in order to force resolution as an anon hash.
3504 *
3505 * XXX should probably propagate the outer expectation
3506 * into eval"" to rely less on this hack, but that could
3507 * potentially break current behavior of eval"".
3508 * GSAR 97-07-21
3509 */
3510 t = s;
3511 if (*s == '\'' || *s == '"' || *s == '`') {
3512 /* common case: get past first string, handling escapes */
3280af22 3513 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3514 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3515 t++;