This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
More SvPV consting, including some code cleanup and living dangerously
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
27da23d5
JH
29static const char ident_too_long[] =
30 "Identifier too long";
31static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
8903cb82 35
acfe0abc 36static void restore_rsfp(pTHX_ void *f);
6e3aabd6 37#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
38static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 40#endif
51371543 41
9059aa12
LW
42#define XFAKEBRACK 128
43#define XENUMMASK 127
44
39e02b42
JH
45#ifdef USE_UTF8_SCRIPTS
46# define UTF (!IN_BYTES)
2b9d42f0 47#else
746b446a 48# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 49#endif
a0ed51b3 50
61f0cdd9 51/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
52 * 1999-02-27 mjd-perl-patch@plover.com */
53#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
54
bf4acbe4
GS
55/* On MacOS, respect nonbreaking spaces */
56#ifdef MACOS_TRADITIONAL
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
58#else
59#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60#endif
61
ffb4593c
NT
62/* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
79072805
LW
64 * can get by with a single comparison (if the compiler is smart enough).
65 */
66
fb73857a 67/* #define LEX_NOTPARSING 11 is done in perl.h. */
68
55497cff 69#define LEX_NORMAL 10
70#define LEX_INTERPNORMAL 9
71#define LEX_INTERPCASEMOD 8
72#define LEX_INTERPPUSH 7
73#define LEX_INTERPSTART 6
74#define LEX_INTERPEND 5
75#define LEX_INTERPENDMAYBE 4
76#define LEX_INTERPCONCAT 3
77#define LEX_INTERPCONST 2
78#define LEX_FORMLINE 1
79#define LEX_KNOWNEXT 0
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 139# define REPORT(retval) tokereport(s,(int)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
376fcdbf
AL
170#define UNI2(f,x) { \
171 yylval.ival = f; \
172 PL_expect = x; \
173 PL_bufptr = s; \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
176 if (*s == '(') \
177 return REPORT( (int)FUNC1 ); \
178 s = skipspace(s); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
180 }
6f33ba73
RGS
181#define UNI(f) UNI2(f,XTERM)
182#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 183
376fcdbf
AL
184#define UNIBRACK(f) { \
185 yylval.ival = f; \
186 PL_bufptr = s; \
187 PL_last_uni = PL_oldbufptr; \
188 if (*s == '(') \
189 return REPORT( (int)FUNC1 ); \
190 s = skipspace(s); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
192 }
79072805 193
9f68db38 194/* grandfather return to old style */
3280af22 195#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 196
8fa7f367
JH
197#ifdef DEBUGGING
198
bbf60fe6
DM
199/* how to interpret the yylval associated with the token */
200enum token_type {
201 TOKENTYPE_NONE,
202 TOKENTYPE_IVAL,
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
204 TOKENTYPE_PVAL,
205 TOKENTYPE_OPVAL,
206 TOKENTYPE_GVVAL
207};
208
27da23d5
JH
209static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
9041c2e3 211{
bbf60fe6
DM
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DO, TOKENTYPE_NONE, "DO" },
223 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
224 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
225 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
226 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
227 { ELSE, TOKENTYPE_NONE, "ELSE" },
228 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
229 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
230 { FOR, TOKENTYPE_IVAL, "FOR" },
231 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
232 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
233 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
234 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
235 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
236 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
237 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
238 { IF, TOKENTYPE_IVAL, "IF" },
239 { LABEL, TOKENTYPE_PVAL, "LABEL" },
240 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
241 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
242 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
243 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
244 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
245 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
246 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
247 { MY, TOKENTYPE_IVAL, "MY" },
248 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
249 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
250 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
251 { OROP, TOKENTYPE_IVAL, "OROP" },
252 { OROR, TOKENTYPE_NONE, "OROR" },
253 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
254 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
255 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
256 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
257 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
258 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
259 { PREINC, TOKENTYPE_NONE, "PREINC" },
260 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
261 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
262 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
263 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
264 { SUB, TOKENTYPE_NONE, "SUB" },
265 { THING, TOKENTYPE_OPVAL, "THING" },
266 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
267 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
268 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
269 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
270 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
271 { USE, TOKENTYPE_IVAL, "USE" },
272 { WHILE, TOKENTYPE_IVAL, "WHILE" },
273 { WORD, TOKENTYPE_OPVAL, "WORD" },
274 { 0, TOKENTYPE_NONE, 0 }
275};
276
277/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 278
bbf60fe6 279STATIC int
bfed75c6 280S_tokereport(pTHX_ const char* s, I32 rv)
bbf60fe6
DM
281{
282 if (DEBUG_T_TEST) {
bfed75c6 283 const char *name = Nullch;
bbf60fe6 284 enum token_type type = TOKENTYPE_NONE;
f54cb97a 285 const struct debug_tokens *p;
bfed75c6 286 SV* report = newSVpvn("<== ", 4);
bbf60fe6 287
f54cb97a 288 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
289 if (p->token == (int)rv) {
290 name = p->name;
291 type = p->type;
292 break;
293 }
294 }
295 if (name)
54667de8 296 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
297 else if ((char)rv > ' ' && (char)rv < '~')
298 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
299 else if (!rv)
54667de8 300 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
301 else
302 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
303 switch (type) {
304 case TOKENTYPE_NONE:
305 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
306 break;
307 case TOKENTYPE_IVAL:
e4584336 308 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
309 break;
310 case TOKENTYPE_OPNUM:
311 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
312 PL_op_name[yylval.ival]);
313 break;
314 case TOKENTYPE_PVAL:
315 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
316 break;
317 case TOKENTYPE_OPVAL:
401441c0
RGS
318 if (yylval.opval)
319 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 320 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
321 else
322 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
323 break;
324 }
e4584336 325 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
998054bd
SC
326 if (s - PL_bufptr > 0)
327 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
328 else {
329 if (PL_oldbufptr && *PL_oldbufptr)
330 sv_catpv(report, PL_tokenbuf);
331 }
bbf60fe6
DM
332 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
333 };
334 return (int)rv;
998054bd
SC
335}
336
8fa7f367
JH
337#endif
338
ffb4593c
NT
339/*
340 * S_ao
341 *
c963b151
BD
342 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
343 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
344 */
345
76e3520e 346STATIC int
cea2e8a9 347S_ao(pTHX_ int toketype)
a0d0e21e 348{
3280af22
NIS
349 if (*PL_bufptr == '=') {
350 PL_bufptr++;
a0d0e21e
LW
351 if (toketype == ANDAND)
352 yylval.ival = OP_ANDASSIGN;
353 else if (toketype == OROR)
354 yylval.ival = OP_ORASSIGN;
c963b151
BD
355 else if (toketype == DORDOR)
356 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
357 toketype = ASSIGNOP;
358 }
359 return toketype;
360}
361
ffb4593c
NT
362/*
363 * S_no_op
364 * When Perl expects an operator and finds something else, no_op
365 * prints the warning. It always prints "<something> found where
366 * operator expected. It prints "Missing semicolon on previous line?"
367 * if the surprise occurs at the start of the line. "do you need to
368 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
369 * where the compiler doesn't know if foo is a method call or a function.
370 * It prints "Missing operator before end of line" if there's nothing
371 * after the missing operator, or "... before <...>" if there is something
372 * after the missing operator.
373 */
374
76e3520e 375STATIC void
bfed75c6 376S_no_op(pTHX_ const char *what, char *s)
463ee0b2 377{
3280af22
NIS
378 char *oldbp = PL_bufptr;
379 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 380
1189a94a
GS
381 if (!s)
382 s = oldbp;
07c798fb 383 else
1189a94a 384 PL_bufptr = s;
cea2e8a9 385 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
386 if (ckWARN_d(WARN_SYNTAX)) {
387 if (is_first)
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Missing semicolon on previous line?)\n");
390 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 391 const char *t;
56da5a46
RGS
392 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
393 if (t < PL_bufptr && isSPACE(*t))
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Do you need to predeclare %.*s?)\n",
396 t - PL_oldoldbufptr, PL_oldoldbufptr);
397 }
398 else {
399 assert(s >= oldbp);
400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
401 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
402 }
07c798fb 403 }
3280af22 404 PL_bufptr = oldbp;
8990e307
LW
405}
406
ffb4593c
NT
407/*
408 * S_missingterm
409 * Complain about missing quote/regexp/heredoc terminator.
410 * If it's called with (char *)NULL then it cauterizes the line buffer.
411 * If we're in a delimited string and the delimiter is a control
412 * character, it's reformatted into a two-char sequence like ^C.
413 * This is fatal.
414 */
415
76e3520e 416STATIC void
cea2e8a9 417S_missingterm(pTHX_ char *s)
8990e307
LW
418{
419 char tmpbuf[3];
420 char q;
421 if (s) {
422 char *nl = strrchr(s,'\n');
d2719217 423 if (nl)
8990e307
LW
424 *nl = '\0';
425 }
9d116dd7
JH
426 else if (
427#ifdef EBCDIC
428 iscntrl(PL_multi_close)
429#else
430 PL_multi_close < 32 || PL_multi_close == 127
431#endif
432 ) {
8990e307 433 *tmpbuf = '^';
3280af22 434 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
435 tmpbuf[2] = '\0';
436 s = tmpbuf;
437 }
438 else {
eb160463 439 *tmpbuf = (char)PL_multi_close;
8990e307
LW
440 tmpbuf[1] = '\0';
441 s = tmpbuf;
442 }
443 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 444 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 445}
79072805 446
ffb4593c
NT
447/*
448 * Perl_deprecate
ffb4593c
NT
449 */
450
79072805 451void
bfed75c6 452Perl_deprecate(pTHX_ const char *s)
a0d0e21e 453{
599cee73 454 if (ckWARN(WARN_DEPRECATED))
9014280d 455 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
456}
457
12bcd1a6 458void
bfed75c6 459Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
460{
461 /* This function should NOT be called for any new deprecated warnings */
462 /* Use Perl_deprecate instead */
463 /* */
464 /* It is here to maintain backward compatibility with the pre-5.8 */
465 /* warnings category hierarchy. The "deprecated" category used to */
466 /* live under the "syntax" category. It is now a top-level category */
467 /* in its own right. */
468
469 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 470 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
471 "Use of %s is deprecated", s);
472}
473
ffb4593c
NT
474/*
475 * depcom
9cbb5ea2 476 * Deprecate a comma-less variable list.
ffb4593c
NT
477 */
478
76e3520e 479STATIC void
cea2e8a9 480S_depcom(pTHX)
a0d0e21e 481{
12bcd1a6 482 deprecate_old("comma-less variable list");
a0d0e21e
LW
483}
484
ffb4593c 485/*
9cbb5ea2
GS
486 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
487 * utf16-to-utf8-reversed.
ffb4593c
NT
488 */
489
c39cd008
GS
490#ifdef PERL_CR_FILTER
491static void
492strip_return(SV *sv)
493{
95a20fc0 494 register const char *s = SvPVX_const(sv);
f54cb97a 495 register const char *e = s + SvCUR(sv);
c39cd008
GS
496 /* outer loop optimized to do nothing if there are no CR-LFs */
497 while (s < e) {
498 if (*s++ == '\r' && *s == '\n') {
499 /* hit a CR-LF, need to copy the rest */
500 register char *d = s - 1;
501 *d++ = *s++;
502 while (s < e) {
503 if (*s == '\r' && s[1] == '\n')
504 s++;
505 *d++ = *s++;
506 }
507 SvCUR(sv) -= s - d;
508 return;
509 }
510 }
511}
a868473f 512
76e3520e 513STATIC I32
c39cd008 514S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 515{
f54cb97a 516 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
517 if (count > 0 && !maxlen)
518 strip_return(sv);
519 return count;
a868473f
NIS
520}
521#endif
522
ffb4593c
NT
523/*
524 * Perl_lex_start
9cbb5ea2
GS
525 * Initialize variables. Uses the Perl save_stack to save its state (for
526 * recursive calls to the parser).
ffb4593c
NT
527 */
528
a0d0e21e 529void
864dbfa3 530Perl_lex_start(pTHX_ SV *line)
79072805 531{
8990e307
LW
532 char *s;
533 STRLEN len;
534
3280af22
NIS
535 SAVEI32(PL_lex_dojoin);
536 SAVEI32(PL_lex_brackets);
3280af22
NIS
537 SAVEI32(PL_lex_casemods);
538 SAVEI32(PL_lex_starts);
539 SAVEI32(PL_lex_state);
7766f137 540 SAVEVPTR(PL_lex_inpat);
3280af22 541 SAVEI32(PL_lex_inwhat);
18b09519
GS
542 if (PL_lex_state == LEX_KNOWNEXT) {
543 I32 toke = PL_nexttoke;
544 while (--toke >= 0) {
545 SAVEI32(PL_nexttype[toke]);
546 SAVEVPTR(PL_nextval[toke]);
547 }
548 SAVEI32(PL_nexttoke);
18b09519 549 }
57843af0 550 SAVECOPLINE(PL_curcop);
3280af22
NIS
551 SAVEPPTR(PL_bufptr);
552 SAVEPPTR(PL_bufend);
553 SAVEPPTR(PL_oldbufptr);
554 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
555 SAVEPPTR(PL_last_lop);
556 SAVEPPTR(PL_last_uni);
3280af22
NIS
557 SAVEPPTR(PL_linestart);
558 SAVESPTR(PL_linestr);
8edd5f42
RGS
559 SAVEGENERICPV(PL_lex_brackstack);
560 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 561 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
562 SAVESPTR(PL_lex_stuff);
563 SAVEI32(PL_lex_defer);
09bef843 564 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 565 SAVESPTR(PL_lex_repl);
bebdddfc
GS
566 SAVEINT(PL_expect);
567 SAVEINT(PL_lex_expect);
3280af22
NIS
568
569 PL_lex_state = LEX_NORMAL;
570 PL_lex_defer = 0;
571 PL_expect = XSTATE;
572 PL_lex_brackets = 0;
3280af22
NIS
573 New(899, PL_lex_brackstack, 120, char);
574 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
575 PL_lex_casemods = 0;
576 *PL_lex_casestack = '\0';
577 PL_lex_dojoin = 0;
578 PL_lex_starts = 0;
579 PL_lex_stuff = Nullsv;
580 PL_lex_repl = Nullsv;
581 PL_lex_inpat = 0;
76be56bc 582 PL_nexttoke = 0;
3280af22 583 PL_lex_inwhat = 0;
09bef843 584 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
585 PL_linestr = line;
586 if (SvREADONLY(PL_linestr))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
588 s = SvPV(PL_linestr, len);
6f27f9a7 589 if (!len || s[len-1] != ';') {
3280af22
NIS
590 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
591 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
592 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 593 }
3280af22
NIS
594 SvTEMP_off(PL_linestr);
595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 597 PL_last_lop = PL_last_uni = Nullch;
3280af22 598 PL_rsfp = 0;
79072805 599}
a687059c 600
ffb4593c
NT
601/*
602 * Perl_lex_end
9cbb5ea2
GS
603 * Finalizer for lexing operations. Must be called when the parser is
604 * done with the lexer.
ffb4593c
NT
605 */
606
463ee0b2 607void
864dbfa3 608Perl_lex_end(pTHX)
463ee0b2 609{
3280af22 610 PL_doextract = FALSE;
463ee0b2
LW
611}
612
ffb4593c
NT
613/*
614 * S_incline
615 * This subroutine has nothing to do with tilting, whether at windmills
616 * or pinball tables. Its name is short for "increment line". It
57843af0 617 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 618 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
619 * # line 500 "foo.pm"
620 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
621 */
622
76e3520e 623STATIC void
cea2e8a9 624S_incline(pTHX_ char *s)
463ee0b2
LW
625{
626 char *t;
627 char *n;
73659bf1 628 char *e;
463ee0b2 629 char ch;
463ee0b2 630
57843af0 631 CopLINE_inc(PL_curcop);
463ee0b2
LW
632 if (*s++ != '#')
633 return;
bf4acbe4 634 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
635 if (strnEQ(s, "line", 4))
636 s += 4;
637 else
638 return;
084592ab 639 if (SPACE_OR_TAB(*s))
73659bf1 640 s++;
4e553d73 641 else
73659bf1 642 return;
bf4acbe4 643 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
644 if (!isDIGIT(*s))
645 return;
646 n = s;
647 while (isDIGIT(*s))
648 s++;
bf4acbe4 649 while (SPACE_OR_TAB(*s))
463ee0b2 650 s++;
73659bf1 651 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 652 s++;
73659bf1
GS
653 e = t + 1;
654 }
463ee0b2 655 else {
463ee0b2 656 for (t = s; !isSPACE(*t); t++) ;
73659bf1 657 e = t;
463ee0b2 658 }
bf4acbe4 659 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
660 e++;
661 if (*e != '\n' && *e != '\0')
662 return; /* false alarm */
663
463ee0b2
LW
664 ch = *t;
665 *t = '\0';
f4dd75d9 666 if (t - s > 0) {
05ec9bb3 667 CopFILE_free(PL_curcop);
57843af0 668 CopFILE_set(PL_curcop, s);
f4dd75d9 669 }
463ee0b2 670 *t = ch;
57843af0 671 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
672}
673
ffb4593c
NT
674/*
675 * S_skipspace
676 * Called to gobble the appropriate amount and type of whitespace.
677 * Skips comments as well.
678 */
679
76e3520e 680STATIC char *
cea2e8a9 681S_skipspace(pTHX_ register char *s)
a687059c 682{
3280af22 683 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 684 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
685 s++;
686 return s;
687 }
688 for (;;) {
fd049845 689 STRLEN prevlen;
09bef843 690 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 691 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
692 while (s < PL_bufend && isSPACE(*s)) {
693 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
694 incline(s);
695 }
ffb4593c
NT
696
697 /* comment */
3280af22
NIS
698 if (s < PL_bufend && *s == '#') {
699 while (s < PL_bufend && *s != '\n')
463ee0b2 700 s++;
60e6418e 701 if (s < PL_bufend) {
463ee0b2 702 s++;
60e6418e
GS
703 if (PL_in_eval && !PL_rsfp) {
704 incline(s);
705 continue;
706 }
707 }
463ee0b2 708 }
ffb4593c
NT
709
710 /* only continue to recharge the buffer if we're at the end
711 * of the buffer, we're not reading from a source filter, and
712 * we're in normal lexing mode
713 */
09bef843
SB
714 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
715 PL_lex_state == LEX_FORMLINE)
463ee0b2 716 return s;
ffb4593c
NT
717
718 /* try to recharge the buffer */
9cbb5ea2
GS
719 if ((s = filter_gets(PL_linestr, PL_rsfp,
720 (prevlen = SvCUR(PL_linestr)))) == Nullch)
721 {
722 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
723 if (PL_minus_p) {
724 sv_setpv(PL_linestr,
725 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 726 PL_minus_n = PL_minus_p = 0;
a0d0e21e 727 }
01a19ab0
NC
728 else if (PL_minus_n) {
729 sv_setpvn(PL_linestr, ";}", 2);
730 PL_minus_n = 0;
731 }
a0d0e21e 732 else
4147a61b 733 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
734
735 /* reset variables for next time we lex */
9cbb5ea2
GS
736 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
737 = SvPVX(PL_linestr);
3280af22 738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 739 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
740
741 /* Close the filehandle. Could be from -P preprocessor,
742 * STDIN, or a regular file. If we were reading code from
743 * STDIN (because the commandline held no -e or filename)
744 * then we don't close it, we reset it so the code can
745 * read from STDIN too.
746 */
747
3280af22
NIS
748 if (PL_preprocess && !PL_in_eval)
749 (void)PerlProc_pclose(PL_rsfp);
750 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
751 PerlIO_clearerr(PL_rsfp);
8990e307 752 else
3280af22
NIS
753 (void)PerlIO_close(PL_rsfp);
754 PL_rsfp = Nullfp;
463ee0b2
LW
755 return s;
756 }
ffb4593c
NT
757
758 /* not at end of file, so we only read another line */
09bef843
SB
759 /* make corresponding updates to old pointers, for yyerror() */
760 oldprevlen = PL_oldbufptr - PL_bufend;
761 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
762 if (PL_last_uni)
763 oldunilen = PL_last_uni - PL_bufend;
764 if (PL_last_lop)
765 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
766 PL_linestart = PL_bufptr = s + prevlen;
767 PL_bufend = s + SvCUR(PL_linestr);
768 s = PL_bufptr;
09bef843
SB
769 PL_oldbufptr = s + oldprevlen;
770 PL_oldoldbufptr = s + oldoldprevlen;
771 if (PL_last_uni)
772 PL_last_uni = s + oldunilen;
773 if (PL_last_lop)
774 PL_last_lop = s + oldloplen;
a0d0e21e 775 incline(s);
ffb4593c
NT
776
777 /* debugger active and we're not compiling the debugger code,
778 * so store the line into the debugger's array of lines
779 */
3280af22 780 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
781 SV *sv = NEWSV(85,0);
782
783 sv_upgrade(sv, SVt_PVMG);
3280af22 784 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 785 (void)SvIOK_on(sv);
45977657 786 SvIV_set(sv, 0);
57843af0 787 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 788 }
463ee0b2 789 }
a687059c 790}
378cc40b 791
ffb4593c
NT
792/*
793 * S_check_uni
794 * Check the unary operators to ensure there's no ambiguity in how they're
795 * used. An ambiguous piece of code would be:
796 * rand + 5
797 * This doesn't mean rand() + 5. Because rand() is a unary operator,
798 * the +5 is its argument.
799 */
800
76e3520e 801STATIC void
cea2e8a9 802S_check_uni(pTHX)
ba106d47 803{
2f3197b3 804 char *s;
a0d0e21e 805 char *t;
2f3197b3 806
3280af22 807 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 808 return;
3280af22
NIS
809 while (isSPACE(*PL_last_uni))
810 PL_last_uni++;
7e2040f0 811 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 812 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 813 return;
0453d815 814 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 815 char ch = *s;
0453d815 816 *s = '\0';
9014280d 817 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 818 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
819 PL_last_uni);
820 *s = ch;
821 }
2f3197b3
LW
822}
823
ffb4593c
NT
824/*
825 * LOP : macro to build a list operator. Its behaviour has been replaced
826 * with a subroutine, S_lop() for which LOP is just another name.
827 */
828
a0d0e21e
LW
829#define LOP(f,x) return lop(f,x,s)
830
ffb4593c
NT
831/*
832 * S_lop
833 * Build a list operator (or something that might be one). The rules:
834 * - if we have a next token, then it's a list operator [why?]
835 * - if the next thing is an opening paren, then it's a function
836 * - else it's a list operator
837 */
838
76e3520e 839STATIC I32
a0be28da 840S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 841{
79072805 842 yylval.ival = f;
35c8bce7 843 CLINE;
3280af22
NIS
844 PL_expect = x;
845 PL_bufptr = s;
846 PL_last_lop = PL_oldbufptr;
eb160463 847 PL_last_lop_op = (OPCODE)f;
3280af22 848 if (PL_nexttoke)
bbf60fe6 849 return REPORT(LSTOP);
79072805 850 if (*s == '(')
bbf60fe6 851 return REPORT(FUNC);
79072805
LW
852 s = skipspace(s);
853 if (*s == '(')
bbf60fe6 854 return REPORT(FUNC);
79072805 855 else
bbf60fe6 856 return REPORT(LSTOP);
79072805
LW
857}
858
ffb4593c
NT
859/*
860 * S_force_next
9cbb5ea2 861 * When the lexer realizes it knows the next token (for instance,
ffb4593c 862 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
863 * to know what token to return the next time the lexer is called. Caller
864 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
865 * handles the token correctly.
ffb4593c
NT
866 */
867
4e553d73 868STATIC void
cea2e8a9 869S_force_next(pTHX_ I32 type)
79072805 870{
3280af22
NIS
871 PL_nexttype[PL_nexttoke] = type;
872 PL_nexttoke++;
873 if (PL_lex_state != LEX_KNOWNEXT) {
874 PL_lex_defer = PL_lex_state;
875 PL_lex_expect = PL_expect;
876 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
877 }
878}
879
d0a148a6
NC
880STATIC SV *
881S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
882{
883 SV *sv = newSVpvn(start,len);
bfed75c6 884 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
885 SvUTF8_on(sv);
886 return sv;
887}
888
ffb4593c
NT
889/*
890 * S_force_word
891 * When the lexer knows the next thing is a word (for instance, it has
892 * just seen -> and it knows that the next char is a word char, then
893 * it calls S_force_word to stick the next word into the PL_next lookahead.
894 *
895 * Arguments:
b1b65b59 896 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
897 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
898 * int check_keyword : if true, Perl checks to make sure the word isn't
899 * a keyword (do this if the word is a label, e.g. goto FOO)
900 * int allow_pack : if true, : characters will also be allowed (require,
901 * use, etc. do this)
9cbb5ea2 902 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
903 */
904
76e3520e 905STATIC char *
cea2e8a9 906S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 907{
463ee0b2
LW
908 register char *s;
909 STRLEN len;
4e553d73 910
463ee0b2
LW
911 start = skipspace(start);
912 s = start;
7e2040f0 913 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 914 (allow_pack && *s == ':') ||
15f0808c 915 (allow_initial_tick && *s == '\'') )
a0d0e21e 916 {
3280af22
NIS
917 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
918 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
919 return start;
920 if (token == METHOD) {
921 s = skipspace(s);
922 if (*s == '(')
3280af22 923 PL_expect = XTERM;
463ee0b2 924 else {
3280af22 925 PL_expect = XOPERATOR;
463ee0b2 926 }
79072805 927 }
d0a148a6
NC
928 PL_nextval[PL_nexttoke].opval
929 = (OP*)newSVOP(OP_CONST,0,
930 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 931 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
932 force_next(token);
933 }
934 return s;
935}
936
ffb4593c
NT
937/*
938 * S_force_ident
9cbb5ea2 939 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
940 * text only contains the "foo" portion. The first argument is a pointer
941 * to the "foo", and the second argument is the type symbol to prefix.
942 * Forces the next token to be a "WORD".
9cbb5ea2 943 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
944 */
945
76e3520e 946STATIC void
bfed75c6 947S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
948{
949 if (s && *s) {
bfed75c6 950 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 951 PL_nextval[PL_nexttoke].opval = o;
79072805 952 force_next(WORD);
748a9306 953 if (kind) {
11343788 954 o->op_private = OPpCONST_ENTERED;
55497cff 955 /* XXX see note in pp_entereval() for why we forgo typo
956 warnings if the symbol must be introduced in an eval.
957 GSAR 96-10-12 */
3280af22 958 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
959 kind == '$' ? SVt_PV :
960 kind == '@' ? SVt_PVAV :
961 kind == '%' ? SVt_PVHV :
962 SVt_PVGV
963 );
748a9306 964 }
79072805
LW
965 }
966}
967
1571675a
GS
968NV
969Perl_str_to_version(pTHX_ SV *sv)
970{
971 NV retval = 0.0;
972 NV nshift = 1.0;
973 STRLEN len;
e62f0680 974 const char *start = SvPVx_const(sv,len);
f54cb97a 975 const char *end = start + len;
504618e9 976 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 977 while (start < end) {
ba210ebe 978 STRLEN skip;
1571675a
GS
979 UV n;
980 if (utf)
9041c2e3 981 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
982 else {
983 n = *(U8*)start;
984 skip = 1;
985 }
986 retval += ((NV)n)/nshift;
987 start += skip;
988 nshift *= 1000;
989 }
990 return retval;
991}
992
4e553d73 993/*
ffb4593c
NT
994 * S_force_version
995 * Forces the next token to be a version number.
e759cc13
RGS
996 * If the next token appears to be an invalid version number, (e.g. "v2b"),
997 * and if "guessing" is TRUE, then no new token is created (and the caller
998 * must use an alternative parsing method).
ffb4593c
NT
999 */
1000
76e3520e 1001STATIC char *
e759cc13 1002S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1003{
1004 OP *version = Nullop;
44dcb63b 1005 char *d;
89bfa8cd 1006
1007 s = skipspace(s);
1008
44dcb63b 1009 d = s;
dd629d5b 1010 if (*d == 'v')
44dcb63b 1011 d++;
44dcb63b 1012 if (isDIGIT(*d)) {
e759cc13
RGS
1013 while (isDIGIT(*d) || *d == '_' || *d == '.')
1014 d++;
9f3d182e 1015 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1016 SV *ver;
b73d6f50 1017 s = scan_num(s, &yylval);
89bfa8cd 1018 version = yylval.opval;
dd629d5b
GS
1019 ver = cSVOPx(version)->op_sv;
1020 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1021 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1022 SvNV_set(ver, str_to_version(ver));
1571675a 1023 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1024 }
89bfa8cd 1025 }
e759cc13
RGS
1026 else if (guessing)
1027 return s;
89bfa8cd 1028 }
1029
1030 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1031 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1032 force_next(WORD);
89bfa8cd 1033
e759cc13 1034 return s;
89bfa8cd 1035}
1036
ffb4593c
NT
1037/*
1038 * S_tokeq
1039 * Tokenize a quoted string passed in as an SV. It finds the next
1040 * chunk, up to end of string or a backslash. It may make a new
1041 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1042 * turns \\ into \.
1043 */
1044
76e3520e 1045STATIC SV *
cea2e8a9 1046S_tokeq(pTHX_ SV *sv)
79072805
LW
1047{
1048 register char *s;
1049 register char *send;
1050 register char *d;
b3ac6de7
IZ
1051 STRLEN len = 0;
1052 SV *pv = sv;
79072805
LW
1053
1054 if (!SvLEN(sv))
b3ac6de7 1055 goto finish;
79072805 1056
a0d0e21e 1057 s = SvPV_force(sv, len);
21a311ee 1058 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1059 goto finish;
463ee0b2 1060 send = s + len;
79072805
LW
1061 while (s < send && *s != '\\')
1062 s++;
1063 if (s == send)
b3ac6de7 1064 goto finish;
79072805 1065 d = s;
be4731d2 1066 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1067 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1068 if (SvUTF8(sv))
1069 SvUTF8_on(pv);
1070 }
79072805
LW
1071 while (s < send) {
1072 if (*s == '\\') {
a0d0e21e 1073 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1074 s++; /* all that, just for this */
1075 }
1076 *d++ = *s++;
1077 }
1078 *d = '\0';
95a20fc0 1079 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1080 finish:
3280af22 1081 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1082 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1083 return sv;
1084}
1085
ffb4593c
NT
1086/*
1087 * Now come three functions related to double-quote context,
1088 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1089 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1090 * interact with PL_lex_state, and create fake ( ... ) argument lists
1091 * to handle functions and concatenation.
1092 * They assume that whoever calls them will be setting up a fake
1093 * join call, because each subthing puts a ',' after it. This lets
1094 * "lower \luPpEr"
1095 * become
1096 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1097 *
1098 * (I'm not sure whether the spurious commas at the end of lcfirst's
1099 * arguments and join's arguments are created or not).
1100 */
1101
1102/*
1103 * S_sublex_start
1104 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1105 *
1106 * Pattern matching will set PL_lex_op to the pattern-matching op to
1107 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1108 *
1109 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1110 *
1111 * Everything else becomes a FUNC.
1112 *
1113 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1114 * had an OP_CONST or OP_READLINE). This just sets us up for a
1115 * call to S_sublex_push().
1116 */
1117
76e3520e 1118STATIC I32
cea2e8a9 1119S_sublex_start(pTHX)
79072805 1120{
f54cb97a 1121 const register I32 op_type = yylval.ival;
79072805
LW
1122
1123 if (op_type == OP_NULL) {
3280af22
NIS
1124 yylval.opval = PL_lex_op;
1125 PL_lex_op = Nullop;
79072805
LW
1126 return THING;
1127 }
1128 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1129 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1130
1131 if (SvTYPE(sv) == SVt_PVIV) {
1132 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1133 STRLEN len;
f54cb97a
AL
1134 const char *p = SvPV(sv, len);
1135 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1136 if (SvUTF8(sv))
1137 SvUTF8_on(nsv);
b3ac6de7
IZ
1138 SvREFCNT_dec(sv);
1139 sv = nsv;
4e553d73 1140 }
b3ac6de7 1141 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1142 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1143 /* Allow <FH> // "foo" */
1144 if (op_type == OP_READLINE)
1145 PL_expect = XTERMORDORDOR;
79072805
LW
1146 return THING;
1147 }
1148
3280af22
NIS
1149 PL_sublex_info.super_state = PL_lex_state;
1150 PL_sublex_info.sub_inwhat = op_type;
1151 PL_sublex_info.sub_op = PL_lex_op;
1152 PL_lex_state = LEX_INTERPPUSH;
55497cff 1153
3280af22
NIS
1154 PL_expect = XTERM;
1155 if (PL_lex_op) {
1156 yylval.opval = PL_lex_op;
1157 PL_lex_op = Nullop;
55497cff 1158 return PMFUNC;
1159 }
1160 else
1161 return FUNC;
1162}
1163
ffb4593c
NT
1164/*
1165 * S_sublex_push
1166 * Create a new scope to save the lexing state. The scope will be
1167 * ended in S_sublex_done. Returns a '(', starting the function arguments
1168 * to the uc, lc, etc. found before.
1169 * Sets PL_lex_state to LEX_INTERPCONCAT.
1170 */
1171
76e3520e 1172STATIC I32
cea2e8a9 1173S_sublex_push(pTHX)
55497cff 1174{
27da23d5 1175 dVAR;
f46d017c 1176 ENTER;
55497cff 1177
3280af22
NIS
1178 PL_lex_state = PL_sublex_info.super_state;
1179 SAVEI32(PL_lex_dojoin);
1180 SAVEI32(PL_lex_brackets);
3280af22
NIS
1181 SAVEI32(PL_lex_casemods);
1182 SAVEI32(PL_lex_starts);
1183 SAVEI32(PL_lex_state);
7766f137 1184 SAVEVPTR(PL_lex_inpat);
3280af22 1185 SAVEI32(PL_lex_inwhat);
57843af0 1186 SAVECOPLINE(PL_curcop);
3280af22 1187 SAVEPPTR(PL_bufptr);
8452ff4b 1188 SAVEPPTR(PL_bufend);
3280af22
NIS
1189 SAVEPPTR(PL_oldbufptr);
1190 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1191 SAVEPPTR(PL_last_lop);
1192 SAVEPPTR(PL_last_uni);
3280af22
NIS
1193 SAVEPPTR(PL_linestart);
1194 SAVESPTR(PL_linestr);
8edd5f42
RGS
1195 SAVEGENERICPV(PL_lex_brackstack);
1196 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1197
1198 PL_linestr = PL_lex_stuff;
1199 PL_lex_stuff = Nullsv;
1200
9cbb5ea2
GS
1201 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1202 = SvPVX(PL_linestr);
3280af22 1203 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1204 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1205 SAVEFREESV(PL_linestr);
1206
1207 PL_lex_dojoin = FALSE;
1208 PL_lex_brackets = 0;
3280af22
NIS
1209 New(899, PL_lex_brackstack, 120, char);
1210 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1211 PL_lex_casemods = 0;
1212 *PL_lex_casestack = '\0';
1213 PL_lex_starts = 0;
1214 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1215 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1216
1217 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1218 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1219 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1220 else
3280af22 1221 PL_lex_inpat = Nullop;
79072805 1222
55497cff 1223 return '(';
79072805
LW
1224}
1225
ffb4593c
NT
1226/*
1227 * S_sublex_done
1228 * Restores lexer state after a S_sublex_push.
1229 */
1230
76e3520e 1231STATIC I32
cea2e8a9 1232S_sublex_done(pTHX)
79072805 1233{
27da23d5 1234 dVAR;
3280af22 1235 if (!PL_lex_starts++) {
9aa983d2
JH
1236 SV *sv = newSVpvn("",0);
1237 if (SvUTF8(PL_linestr))
1238 SvUTF8_on(sv);
3280af22 1239 PL_expect = XOPERATOR;
9aa983d2 1240 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1241 return THING;
1242 }
1243
3280af22
NIS
1244 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1245 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1246 return yylex();
79072805
LW
1247 }
1248
ffb4593c 1249 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1250 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1251 PL_linestr = PL_lex_repl;
1252 PL_lex_inpat = 0;
1253 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1254 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1255 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1256 SAVEFREESV(PL_linestr);
1257 PL_lex_dojoin = FALSE;
1258 PL_lex_brackets = 0;
3280af22
NIS
1259 PL_lex_casemods = 0;
1260 *PL_lex_casestack = '\0';
1261 PL_lex_starts = 0;
25da4f38 1262 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1263 PL_lex_state = LEX_INTERPNORMAL;
1264 PL_lex_starts++;
e9fa98b2
HS
1265 /* we don't clear PL_lex_repl here, so that we can check later
1266 whether this is an evalled subst; that means we rely on the
1267 logic to ensure sublex_done() is called again only via the
1268 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1269 }
e9fa98b2 1270 else {
3280af22 1271 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1272 PL_lex_repl = Nullsv;
1273 }
79072805 1274 return ',';
ffed7fef
LW
1275 }
1276 else {
f46d017c 1277 LEAVE;
3280af22
NIS
1278 PL_bufend = SvPVX(PL_linestr);
1279 PL_bufend += SvCUR(PL_linestr);
1280 PL_expect = XOPERATOR;
09bef843 1281 PL_sublex_info.sub_inwhat = 0;
79072805 1282 return ')';
ffed7fef
LW
1283 }
1284}
1285
02aa26ce
NT
1286/*
1287 scan_const
1288
1289 Extracts a pattern, double-quoted string, or transliteration. This
1290 is terrifying code.
1291
3280af22
NIS
1292 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1293 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1294 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1295
9b599b2a
GS
1296 Returns a pointer to the character scanned up to. Iff this is
1297 advanced from the start pointer supplied (ie if anything was
1298 successfully parsed), will leave an OP for the substring scanned
1299 in yylval. Caller must intuit reason for not parsing further
1300 by looking at the next characters herself.
1301
02aa26ce
NT
1302 In patterns:
1303 backslashes:
1304 double-quoted style: \r and \n
1305 regexp special ones: \D \s
1306 constants: \x3
1307 backrefs: \1 (deprecated in substitution replacements)
1308 case and quoting: \U \Q \E
1309 stops on @ and $, but not for $ as tail anchor
1310
1311 In transliterations:
1312 characters are VERY literal, except for - not at the start or end
1313 of the string, which indicates a range. scan_const expands the
1314 range to the full set of intermediate characters.
1315
1316 In double-quoted strings:
1317 backslashes:
1318 double-quoted style: \r and \n
1319 constants: \x3
1320 backrefs: \1 (deprecated)
1321 case and quoting: \U \Q \E
1322 stops on @ and $
1323
1324 scan_const does *not* construct ops to handle interpolated strings.
1325 It stops processing as soon as it finds an embedded $ or @ variable
1326 and leaves it to the caller to work out what's going on.
1327
da6eedaa 1328 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1329
1330 $ in pattern could be $foo or could be tail anchor. Assumption:
1331 it's a tail anchor if $ is the last thing in the string, or if it's
1332 followed by one of ")| \n\t"
1333
1334 \1 (backreferences) are turned into $1
1335
1336 The structure of the code is
1337 while (there's a character to process) {
1338 handle transliteration ranges
1339 skip regexp comments
1340 skip # initiated comments in //x patterns
1341 check for embedded @foo
1342 check for embedded scalars
1343 if (backslash) {
1344 leave intact backslashes from leave (below)
1345 deprecate \1 in strings and sub replacements
1346 handle string-changing backslashes \l \U \Q \E, etc.
1347 switch (what was escaped) {
1348 handle - in a transliteration (becomes a literal -)
1349 handle \132 octal characters
1350 handle 0x15 hex characters
1351 handle \cV (control V)
1352 handle printf backslashes (\f, \r, \n, etc)
1353 } (end switch)
1354 } (end if backslash)
1355 } (end while character to read)
4e553d73 1356
02aa26ce
NT
1357*/
1358
76e3520e 1359STATIC char *
cea2e8a9 1360S_scan_const(pTHX_ char *start)
79072805 1361{
3280af22 1362 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1363 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1364 register char *s = start; /* start of the constant */
1365 register char *d = SvPVX(sv); /* destination for copies */
1366 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1367 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1368 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1369 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1370 UV uv;
1371
dff6d3cd 1372 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1373 PL_lex_inpat
b6d5fef8 1374 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1375 : "";
79072805 1376
2b9d42f0
NIS
1377 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1378 /* If we are doing a trans and we know we want UTF8 set expectation */
1379 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1380 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1381 }
1382
1383
79072805 1384 while (s < send || dorange) {
02aa26ce 1385 /* get transliterations out of the way (they're most literal) */
3280af22 1386 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1387 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1388 if (dorange) {
1ba5c669
JH
1389 I32 i; /* current expanded character */
1390 I32 min; /* first character in range */
1391 I32 max; /* last character in range */
02aa26ce 1392
2b9d42f0 1393 if (has_utf8) {
8973db79
JH
1394 char *c = (char*)utf8_hop((U8*)d, -1);
1395 char *e = d++;
1396 while (e-- > c)
1397 *(e + 1) = *e;
25716404 1398 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1399 /* mark the range as done, and continue */
1400 dorange = FALSE;
1401 didrange = TRUE;
1402 continue;
1403 }
2b9d42f0 1404
95a20fc0 1405 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1406 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1407 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1408 d -= 2; /* eat the first char and the - */
1409
8ada0baa
JH
1410 min = (U8)*d; /* first char in range */
1411 max = (U8)d[1]; /* last char in range */
1412
c2e66d9e 1413 if (min > max) {
01ec43d0 1414 Perl_croak(aTHX_
d1573ac7 1415 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1416 (char)min, (char)max);
c2e66d9e
GS
1417 }
1418
c7f1f016 1419#ifdef EBCDIC
8ada0baa
JH
1420 if ((isLOWER(min) && isLOWER(max)) ||
1421 (isUPPER(min) && isUPPER(max))) {
1422 if (isLOWER(min)) {
1423 for (i = min; i <= max; i++)
1424 if (isLOWER(i))
db42d148 1425 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1426 } else {
1427 for (i = min; i <= max; i++)
1428 if (isUPPER(i))
db42d148 1429 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1430 }
1431 }
1432 else
1433#endif
1434 for (i = min; i <= max; i++)
eb160463 1435 *d++ = (char)i;
02aa26ce
NT
1436
1437 /* mark the range as done, and continue */
79072805 1438 dorange = FALSE;
01ec43d0 1439 didrange = TRUE;
79072805 1440 continue;
4e553d73 1441 }
02aa26ce
NT
1442
1443 /* range begins (ignore - as first or last char) */
79072805 1444 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1445 if (didrange) {
1fafa243 1446 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1447 }
2b9d42f0 1448 if (has_utf8) {
25716404 1449 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1450 s++;
1451 continue;
1452 }
79072805
LW
1453 dorange = TRUE;
1454 s++;
01ec43d0
GS
1455 }
1456 else {
1457 didrange = FALSE;
1458 }
79072805 1459 }
02aa26ce
NT
1460
1461 /* if we get here, we're not doing a transliteration */
1462
0f5d15d6
IZ
1463 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1464 except for the last char, which will be done separately. */
3280af22 1465 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1466 if (s[2] == '#') {
e994fd66 1467 while (s+1 < send && *s != ')')
db42d148 1468 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1469 }
1470 else if (s[2] == '{' /* This should match regcomp.c */
1471 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1472 {
cc6b7395 1473 I32 count = 1;
0f5d15d6 1474 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1475 char c;
1476
d9f97599
GS
1477 while (count && (c = *regparse)) {
1478 if (c == '\\' && regparse[1])
1479 regparse++;
4e553d73 1480 else if (c == '{')
cc6b7395 1481 count++;
4e553d73 1482 else if (c == '}')
cc6b7395 1483 count--;
d9f97599 1484 regparse++;
cc6b7395 1485 }
e994fd66 1486 if (*regparse != ')')
5bdf89e7 1487 regparse--; /* Leave one char for continuation. */
0f5d15d6 1488 while (s < regparse)
db42d148 1489 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1490 }
748a9306 1491 }
02aa26ce
NT
1492
1493 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1494 else if (*s == '#' && PL_lex_inpat &&
1495 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1496 while (s+1 < send && *s != '\n')
db42d148 1497 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1498 }
02aa26ce 1499
5d1d4326 1500 /* check for embedded arrays
da6eedaa 1501 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1502 */
7e2040f0 1503 else if (*s == '@' && s[1]
5d1d4326 1504 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1505 break;
02aa26ce
NT
1506
1507 /* check for embedded scalars. only stop if we're sure it's a
1508 variable.
1509 */
79072805 1510 else if (*s == '$') {
3280af22 1511 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1512 break;
6002328a 1513 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1514 break; /* in regexp, $ might be tail anchor */
1515 }
02aa26ce 1516
2b9d42f0
NIS
1517 /* End of else if chain - OP_TRANS rejoin rest */
1518
02aa26ce 1519 /* backslashes */
79072805
LW
1520 if (*s == '\\' && s+1 < send) {
1521 s++;
02aa26ce
NT
1522
1523 /* some backslashes we leave behind */
c9f97d15 1524 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1525 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1526 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1527 continue;
1528 }
02aa26ce
NT
1529
1530 /* deprecate \1 in strings and substitution replacements */
3280af22 1531 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1532 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1533 {
599cee73 1534 if (ckWARN(WARN_SYNTAX))
9014280d 1535 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1536 *--s = '$';
1537 break;
1538 }
02aa26ce
NT
1539
1540 /* string-change backslash escapes */
3280af22 1541 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1542 --s;
1543 break;
1544 }
02aa26ce
NT
1545
1546 /* if we get here, it's either a quoted -, or a digit */
79072805 1547 switch (*s) {
02aa26ce
NT
1548
1549 /* quoted - in transliterations */
79072805 1550 case '-':
3280af22 1551 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1552 *d++ = *s++;
1553 continue;
1554 }
1555 /* FALL THROUGH */
1556 default:
11b8faa4 1557 {
707afd92 1558 if (ckWARN(WARN_MISC) &&
bfed75c6 1559 isALNUM(*s) &&
707afd92 1560 *s != '_')
9014280d 1561 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1562 "Unrecognized escape \\%c passed through",
1563 *s);
1564 /* default action is to copy the quoted character */
f9a63242 1565 goto default_action;
11b8faa4 1566 }
02aa26ce
NT
1567
1568 /* \132 indicates an octal constant */
79072805
LW
1569 case '0': case '1': case '2': case '3':
1570 case '4': case '5': case '6': case '7':
ba210ebe 1571 {
53305cf1
NC
1572 I32 flags = 0;
1573 STRLEN len = 3;
1574 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1575 s += len;
1576 }
012bcf8d 1577 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1578
1579 /* \x24 indicates a hex constant */
79072805 1580 case 'x':
a0ed51b3
LW
1581 ++s;
1582 if (*s == '{') {
1583 char* e = strchr(s, '}');
a4c04bdc
NC
1584 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1585 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1586 STRLEN len;
355860ce 1587
53305cf1 1588 ++s;
adaeee49 1589 if (!e) {
a0ed51b3 1590 yyerror("Missing right brace on \\x{}");
355860ce 1591 continue;
ba210ebe 1592 }
53305cf1
NC
1593 len = e - s;
1594 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1595 s = e + 1;
a0ed51b3
LW
1596 }
1597 else {
ba210ebe 1598 {
53305cf1 1599 STRLEN len = 2;
a4c04bdc 1600 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1601 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1602 s += len;
1603 }
012bcf8d
GS
1604 }
1605
1606 NUM_ESCAPE_INSERT:
1607 /* Insert oct or hex escaped character.
301d3d20 1608 * There will always enough room in sv since such
db42d148 1609 * escapes will be longer than any UTF-8 sequence
301d3d20 1610 * they can end up as. */
ba7cea30 1611
c7f1f016
NIS
1612 /* We need to map to chars to ASCII before doing the tests
1613 to cover EBCDIC
1614 */
c4d5f83a 1615 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1616 if (!has_utf8 && uv > 255) {
301d3d20
JH
1617 /* Might need to recode whatever we have
1618 * accumulated so far if it contains any
1619 * hibit chars.
1620 *
1621 * (Can't we keep track of that and avoid
1622 * this rescan? --jhi)
012bcf8d 1623 */
c7f1f016 1624 int hicount = 0;
63cd0674
NIS
1625 U8 *c;
1626 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1627 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1628 hicount++;
db42d148 1629 }
012bcf8d 1630 }
63cd0674 1631 if (hicount) {
95a20fc0 1632 STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1633 U8 *src, *dst;
1634 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1635 src = (U8 *)d - 1;
1636 dst = src+hicount;
1637 d += hicount;
1638 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1639 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1640 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1641 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1642 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1643 }
1644 else {
63cd0674 1645 *dst-- = *src;
012bcf8d 1646 }
c7f1f016 1647 src--;
012bcf8d
GS
1648 }
1649 }
1650 }
1651
9aa983d2 1652 if (has_utf8 || uv > 255) {
9041c2e3 1653 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1654 has_utf8 = TRUE;
f9a63242
JH
1655 if (PL_lex_inwhat == OP_TRANS &&
1656 PL_sublex_info.sub_op) {
1657 PL_sublex_info.sub_op->op_private |=
1658 (PL_lex_repl ? OPpTRANS_FROM_UTF
1659 : OPpTRANS_TO_UTF);
f9a63242 1660 }
012bcf8d 1661 }
a0ed51b3 1662 else {
012bcf8d 1663 *d++ = (char)uv;
a0ed51b3 1664 }
012bcf8d
GS
1665 }
1666 else {
c4d5f83a 1667 *d++ = (char) uv;
a0ed51b3 1668 }
79072805 1669 continue;
02aa26ce 1670
b239daa5 1671 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1672 case 'N':
55eda711 1673 ++s;
423cee85
JH
1674 if (*s == '{') {
1675 char* e = strchr(s, '}');
155aba94 1676 SV *res;
423cee85
JH
1677 STRLEN len;
1678 char *str;
4e553d73 1679
423cee85 1680 if (!e) {
5777a3f7 1681 yyerror("Missing right brace on \\N{}");
423cee85
JH
1682 e = s - 1;
1683 goto cont_scan;
1684 }
dbc0d4f2
JH
1685 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1686 /* \N{U+...} */
1687 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1688 PERL_SCAN_DISALLOW_PREFIX;
1689 s += 3;
1690 len = e - s;
1691 uv = grok_hex(s, &len, &flags, NULL);
1692 s = e + 1;
1693 goto NUM_ESCAPE_INSERT;
1694 }
55eda711
JH
1695 res = newSVpvn(s + 1, e - s - 1);
1696 res = new_constant( Nullch, 0, "charnames",
1697 res, Nullsv, "\\N{...}" );
f9a63242
JH
1698 if (has_utf8)
1699 sv_utf8_upgrade(res);
423cee85 1700 str = SvPV(res,len);
1c47067b
JH
1701#ifdef EBCDIC_NEVER_MIND
1702 /* charnames uses pack U and that has been
1703 * recently changed to do the below uni->native
1704 * mapping, so this would be redundant (and wrong,
1705 * the code point would be doubly converted).
1706 * But leave this in just in case the pack U change
1707 * gets revoked, but the semantics is still
1708 * desireable for charnames. --jhi */
cddc7ef4
JH
1709 {
1710 UV uv = utf8_to_uvchr((U8*)str, 0);
1711
1712 if (uv < 0x100) {
89ebb4a3 1713 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1714
1715 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1716 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1717 str = SvPV(res, len);
1718 }
1719 }
1720#endif
89491803 1721 if (!has_utf8 && SvUTF8(res)) {
95a20fc0 1722 const char *ostart = SvPVX_const(sv);
f08d6ad9
GS
1723 SvCUR_set(sv, d - ostart);
1724 SvPOK_on(sv);
e4f3eed8 1725 *d = '\0';
f08d6ad9 1726 sv_utf8_upgrade(sv);
d2f449dd 1727 /* this just broke our allocation above... */
eb160463 1728 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1729 d = SvPVX(sv) + SvCUR(sv);
89491803 1730 has_utf8 = TRUE;
f08d6ad9 1731 }
eb160463 1732 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
95a20fc0 1733 const char *odest = SvPVX_const(sv);
423cee85 1734
8973db79 1735 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1736 d = SvPVX(sv) + (d - odest);
1737 }
1738 Copy(str, d, len, char);
1739 d += len;
1740 SvREFCNT_dec(res);
1741 cont_scan:
1742 s = e + 1;
1743 }
1744 else
5777a3f7 1745 yyerror("Missing braces on \\N{}");
423cee85
JH
1746 continue;
1747
02aa26ce 1748 /* \c is a control character */
79072805
LW
1749 case 'c':
1750 s++;
961ce445 1751 if (s < send) {
ba210ebe 1752 U8 c = *s++;
c7f1f016
NIS
1753#ifdef EBCDIC
1754 if (isLOWER(c))
1755 c = toUPPER(c);
1756#endif
db42d148 1757 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1758 }
961ce445
RGS
1759 else {
1760 yyerror("Missing control char name in \\c");
1761 }
79072805 1762 continue;
02aa26ce
NT
1763
1764 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1765 case 'b':
db42d148 1766 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1767 break;
1768 case 'n':
db42d148 1769 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1770 break;
1771 case 'r':
db42d148 1772 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1773 break;
1774 case 'f':
db42d148 1775 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1776 break;
1777 case 't':
db42d148 1778 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1779 break;
34a3fe2a 1780 case 'e':
db42d148 1781 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1782 break;
1783 case 'a':
db42d148 1784 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1785 break;
02aa26ce
NT
1786 } /* end switch */
1787
79072805
LW
1788 s++;
1789 continue;
02aa26ce
NT
1790 } /* end if (backslash) */
1791
f9a63242 1792 default_action:
2b9d42f0
NIS
1793 /* If we started with encoded form, or already know we want it
1794 and then encode the next character */
1795 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1796 STRLEN len = 1;
1797 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1798 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1799 s += len;
1800 if (need > len) {
1801 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
95a20fc0 1802 STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1803 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1804 }
1805 d = (char*)uvchr_to_utf8((U8*)d, uv);
1806 has_utf8 = TRUE;
1807 }
1808 else {
1809 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1810 }
02aa26ce
NT
1811 } /* while loop to process each character */
1812
1813 /* terminate the string and set up the sv */
79072805 1814 *d = '\0';
95a20fc0 1815 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1816 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1817 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1818
79072805 1819 SvPOK_on(sv);
9f4817db 1820 if (PL_encoding && !has_utf8) {
d0063567
DK
1821 sv_recode_to_utf8(sv, PL_encoding);
1822 if (SvUTF8(sv))
1823 has_utf8 = TRUE;
9f4817db 1824 }
2b9d42f0 1825 if (has_utf8) {
7e2040f0 1826 SvUTF8_on(sv);
2b9d42f0 1827 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1828 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1829 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1830 }
1831 }
79072805 1832
02aa26ce 1833 /* shrink the sv if we allocated more than we used */
79072805 1834 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1835 SvPV_shrink_to_cur(sv);
79072805 1836 }
02aa26ce 1837
9b599b2a 1838 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1839 if (s > PL_bufptr) {
1840 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1841 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1842 sv, Nullsv,
4e553d73 1843 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1844 ? "tr"
3280af22 1845 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1846 ? "s"
1847 : "qq")));
79072805 1848 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1849 } else
8990e307 1850 SvREFCNT_dec(sv);
79072805
LW
1851 return s;
1852}
1853
ffb4593c
NT
1854/* S_intuit_more
1855 * Returns TRUE if there's more to the expression (e.g., a subscript),
1856 * FALSE otherwise.
ffb4593c
NT
1857 *
1858 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1859 *
1860 * ->[ and ->{ return TRUE
1861 * { and [ outside a pattern are always subscripts, so return TRUE
1862 * if we're outside a pattern and it's not { or [, then return FALSE
1863 * if we're in a pattern and the first char is a {
1864 * {4,5} (any digits around the comma) returns FALSE
1865 * if we're in a pattern and the first char is a [
1866 * [] returns FALSE
1867 * [SOMETHING] has a funky algorithm to decide whether it's a
1868 * character class or not. It has to deal with things like
1869 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1870 * anything else returns TRUE
1871 */
1872
9cbb5ea2
GS
1873/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1874
76e3520e 1875STATIC int
cea2e8a9 1876S_intuit_more(pTHX_ register char *s)
79072805 1877{
3280af22 1878 if (PL_lex_brackets)
79072805
LW
1879 return TRUE;
1880 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1881 return TRUE;
1882 if (*s != '{' && *s != '[')
1883 return FALSE;
3280af22 1884 if (!PL_lex_inpat)
79072805
LW
1885 return TRUE;
1886
1887 /* In a pattern, so maybe we have {n,m}. */
1888 if (*s == '{') {
1889 s++;
1890 if (!isDIGIT(*s))
1891 return TRUE;
1892 while (isDIGIT(*s))
1893 s++;
1894 if (*s == ',')
1895 s++;
1896 while (isDIGIT(*s))
1897 s++;
1898 if (*s == '}')
1899 return FALSE;
1900 return TRUE;
1901
1902 }
1903
1904 /* On the other hand, maybe we have a character class */
1905
1906 s++;
1907 if (*s == ']' || *s == '^')
1908 return FALSE;
1909 else {
ffb4593c 1910 /* this is terrifying, and it works */
79072805
LW
1911 int weight = 2; /* let's weigh the evidence */
1912 char seen[256];
f27ffc4a 1913 unsigned char un_char = 255, last_un_char;
f54cb97a 1914 const char *send = strchr(s,']');
3280af22 1915 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1916
1917 if (!send) /* has to be an expression */
1918 return TRUE;
1919
1920 Zero(seen,256,char);
1921 if (*s == '$')
1922 weight -= 3;
1923 else if (isDIGIT(*s)) {
1924 if (s[1] != ']') {
1925 if (isDIGIT(s[1]) && s[2] == ']')
1926 weight -= 10;
1927 }
1928 else
1929 weight -= 100;
1930 }
1931 for (; s < send; s++) {
1932 last_un_char = un_char;
1933 un_char = (unsigned char)*s;
1934 switch (*s) {
1935 case '@':
1936 case '&':
1937 case '$':
1938 weight -= seen[un_char] * 10;
7e2040f0 1939 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1940 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1941 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1942 weight -= 100;
1943 else
1944 weight -= 10;
1945 }
1946 else if (*s == '$' && s[1] &&
93a17b20
LW
1947 strchr("[#!%*<>()-=",s[1])) {
1948 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1949 weight -= 10;
1950 else
1951 weight -= 1;
1952 }
1953 break;
1954 case '\\':
1955 un_char = 254;
1956 if (s[1]) {
93a17b20 1957 if (strchr("wds]",s[1]))
79072805
LW
1958 weight += 100;
1959 else if (seen['\''] || seen['"'])
1960 weight += 1;
93a17b20 1961 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1962 weight += 40;
1963 else if (isDIGIT(s[1])) {
1964 weight += 40;
1965 while (s[1] && isDIGIT(s[1]))
1966 s++;
1967 }
1968 }
1969 else
1970 weight += 100;
1971 break;
1972 case '-':
1973 if (s[1] == '\\')
1974 weight += 50;
93a17b20 1975 if (strchr("aA01! ",last_un_char))
79072805 1976 weight += 30;
93a17b20 1977 if (strchr("zZ79~",s[1]))
79072805 1978 weight += 30;
f27ffc4a
GS
1979 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1980 weight -= 5; /* cope with negative subscript */
79072805
LW
1981 break;
1982 default:
3792a11b
NC
1983 if (!isALNUM(last_un_char)
1984 && !(last_un_char == '$' || last_un_char == '@'
1985 || last_un_char == '&')
1986 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1987 char *d = tmpbuf;
1988 while (isALPHA(*s))
1989 *d++ = *s++;
1990 *d = '\0';
1991 if (keyword(tmpbuf, d - tmpbuf))
1992 weight -= 150;
1993 }
1994 if (un_char == last_un_char + 1)
1995 weight += 5;
1996 weight -= seen[un_char];
1997 break;
1998 }
1999 seen[un_char]++;
2000 }
2001 if (weight >= 0) /* probably a character class */
2002 return FALSE;
2003 }
2004
2005 return TRUE;
2006}
ffed7fef 2007
ffb4593c
NT
2008/*
2009 * S_intuit_method
2010 *
2011 * Does all the checking to disambiguate
2012 * foo bar
2013 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2014 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2015 *
2016 * First argument is the stuff after the first token, e.g. "bar".
2017 *
2018 * Not a method if bar is a filehandle.
2019 * Not a method if foo is a subroutine prototyped to take a filehandle.
2020 * Not a method if it's really "Foo $bar"
2021 * Method if it's "foo $bar"
2022 * Not a method if it's really "print foo $bar"
2023 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2024 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2025 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2026 * =>
2027 */
2028
76e3520e 2029STATIC int
cea2e8a9 2030S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2031{
2032 char *s = start + (*start == '$');
3280af22 2033 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2034 STRLEN len;
2035 GV* indirgv;
2036
2037 if (gv) {
b6c543e3 2038 CV *cv;
a0d0e21e
LW
2039 if (GvIO(gv))
2040 return 0;
b6c543e3 2041 if ((cv = GvCVu(gv))) {
95a20fc0 2042 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2043 if (proto) {
2044 if (*proto == ';')
2045 proto++;
2046 if (*proto == '*')
2047 return 0;
2048 }
2049 } else
a0d0e21e
LW
2050 gv = 0;
2051 }
8903cb82 2052 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2053 /* start is the beginning of the possible filehandle/object,
2054 * and s is the end of it
2055 * tmpbuf is a copy of it
2056 */
2057
a0d0e21e 2058 if (*start == '$') {
3280af22 2059 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2060 return 0;
2061 s = skipspace(s);
3280af22
NIS
2062 PL_bufptr = start;
2063 PL_expect = XREF;
a0d0e21e
LW
2064 return *s == '(' ? FUNCMETH : METHOD;
2065 }
2066 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2067 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2068 len -= 2;
2069 tmpbuf[len] = '\0';
2070 goto bare_package;
2071 }
2072 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2073 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2074 return 0;
2075 /* filehandle or package name makes it a method */
89bfa8cd 2076 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2077 s = skipspace(s);
3280af22 2078 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2079 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2080 bare_package:
3280af22 2081 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2082 newSVpvn(tmpbuf,len));
3280af22
NIS
2083 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2084 PL_expect = XTERM;
a0d0e21e 2085 force_next(WORD);
3280af22 2086 PL_bufptr = s;
a0d0e21e
LW
2087 return *s == '(' ? FUNCMETH : METHOD;
2088 }
2089 }
2090 return 0;
2091}
2092
ffb4593c
NT
2093/*
2094 * S_incl_perldb
2095 * Return a string of Perl code to load the debugger. If PERL5DB
2096 * is set, it will return the contents of that, otherwise a
2097 * compile-time require of perl5db.pl.
2098 */
2099
bfed75c6 2100STATIC const char*
cea2e8a9 2101S_incl_perldb(pTHX)
a0d0e21e 2102{
3280af22 2103 if (PL_perldb) {
bfed75c6 2104 const char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2105
2106 if (pdb)
2107 return pdb;
93189314 2108 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2109 return "BEGIN { require 'perl5db.pl' }";
2110 }
2111 return "";
2112}
2113
2114
16d20bd9 2115/* Encoded script support. filter_add() effectively inserts a
4e553d73 2116 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2117 * Note that the filter function only applies to the current source file
2118 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2119 *
2120 * The datasv parameter (which may be NULL) can be used to pass
2121 * private data to this instance of the filter. The filter function
2122 * can recover the SV using the FILTER_DATA macro and use it to
2123 * store private buffers and state information.
2124 *
2125 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2126 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2127 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2128 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2129 * private use must be set using malloc'd pointers.
2130 */
16d20bd9
AD
2131
2132SV *
864dbfa3 2133Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2134{
f4c556ac
GS
2135 if (!funcp)
2136 return Nullsv;
2137
3280af22
NIS
2138 if (!PL_rsfp_filters)
2139 PL_rsfp_filters = newAV();
16d20bd9 2140 if (!datasv)
8c52afec 2141 datasv = NEWSV(255,0);
862a34c6 2142 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2143 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2144 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2145 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2146 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2147 av_unshift(PL_rsfp_filters, 1);
2148 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2149 return(datasv);
2150}
4e553d73 2151
16d20bd9
AD
2152
2153/* Delete most recently added instance of this filter function. */
a0d0e21e 2154void
864dbfa3 2155Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2156{
e0c19803 2157 SV *datasv;
24801a4b 2158
33073adb 2159#ifdef DEBUGGING
8141890a 2160 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2161#endif
3280af22 2162 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2163 return;
2164 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2165 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2166 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2167 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2168 IoANY(datasv) = (void *)NULL;
3280af22 2169 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2170
16d20bd9
AD
2171 return;
2172 }
2173 /* we need to search for the correct entry and clear it */
cea2e8a9 2174 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2175}
2176
2177
1de9afcd
RGS
2178/* Invoke the idxth filter function for the current rsfp. */
2179/* maxlen 0 = read one text line */
16d20bd9 2180I32
864dbfa3 2181Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2182{
16d20bd9
AD
2183 filter_t funcp;
2184 SV *datasv = NULL;
e50aee73 2185
3280af22 2186 if (!PL_rsfp_filters)
16d20bd9 2187 return -1;
1de9afcd 2188 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2189 /* Provide a default input filter to make life easy. */
2190 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2191 DEBUG_P(PerlIO_printf(Perl_debug_log,
2192 "filter_read %d: from rsfp\n", idx));
4e553d73 2193 if (maxlen) {
16d20bd9
AD
2194 /* Want a block */
2195 int len ;
f54cb97a 2196 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2197
2198 /* ensure buf_sv is large enough */
eb160463 2199 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2200 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2201 if (PerlIO_error(PL_rsfp))
37120919
AD
2202 return -1; /* error */
2203 else
2204 return 0 ; /* end of file */
2205 }
16d20bd9
AD
2206 SvCUR_set(buf_sv, old_len + len) ;
2207 } else {
2208 /* Want a line */
3280af22
NIS
2209 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2210 if (PerlIO_error(PL_rsfp))
37120919
AD
2211 return -1; /* error */
2212 else
2213 return 0 ; /* end of file */
2214 }
16d20bd9
AD
2215 }
2216 return SvCUR(buf_sv);
2217 }
2218 /* Skip this filter slot if filter has been deleted */
1de9afcd 2219 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: skipped (filter deleted)\n",
2222 idx));
16d20bd9
AD
2223 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2224 }
2225 /* Get function pointer hidden within datasv */
8141890a 2226 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2227 DEBUG_P(PerlIO_printf(Perl_debug_log,
2228 "filter_read %d: via function %p (%s)\n",
8141890a 2229 idx, datasv, SvPV_nolen(datasv)));
16d20bd9
AD
2230 /* Call function. The function is expected to */
2231 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2232 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2233 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2234}
2235
76e3520e 2236STATIC char *
cea2e8a9 2237S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2238{
c39cd008 2239#ifdef PERL_CR_FILTER
3280af22 2240 if (!PL_rsfp_filters) {
c39cd008 2241 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2242 }
2243#endif
3280af22 2244 if (PL_rsfp_filters) {
55497cff 2245 if (!append)
2246 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2247 if (FILTER_READ(0, sv, 0) > 0)
2248 return ( SvPVX(sv) ) ;
2249 else
2250 return Nullch ;
2251 }
9d116dd7 2252 else
fd049845 2253 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2254}
2255
01ec43d0 2256STATIC HV *
7fc63493 2257S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2258{
2259 GV *gv;
2260
01ec43d0 2261 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2262 return PL_curstash;
2263
2264 if (len > 2 &&
2265 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2266 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2267 {
2268 return GvHV(gv); /* Foo:: */
def3634b
GS
2269 }
2270
2271 /* use constant CLASS => 'MyClass' */
2272 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2273 SV *sv;
2274 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2275 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2276 }
2277 }
2278
2279 return gv_stashpv(pkgname, FALSE);
2280}
a0d0e21e 2281
748a9306 2282#ifdef DEBUGGING
27da23d5 2283 static const char* const exp_name[] =
09bef843 2284 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2285 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2286 };
748a9306 2287#endif
463ee0b2 2288
02aa26ce
NT
2289/*
2290 yylex
2291
2292 Works out what to call the token just pulled out of the input
2293 stream. The yacc parser takes care of taking the ops we return and
2294 stitching them into a tree.
2295
2296 Returns:
2297 PRIVATEREF
2298
2299 Structure:
2300 if read an identifier
2301 if we're in a my declaration
2302 croak if they tried to say my($foo::bar)
2303 build the ops for a my() declaration
2304 if it's an access to a my() variable
2305 are we in a sort block?
2306 croak if my($a); $a <=> $b
2307 build ops for access to a my() variable
2308 if in a dq string, and they've said @foo and we can't find @foo
2309 croak
2310 build ops for a bareword
2311 if we already built the token before, use it.
2312*/
2313
20141f0e 2314
dba4d153
JH
2315#ifdef __SC__
2316#pragma segment Perl_yylex
2317#endif
dba4d153 2318int
dba4d153 2319Perl_yylex(pTHX)
20141f0e 2320{
3afc138a 2321 register char *s = PL_bufptr;
378cc40b 2322 register char *d;
79072805 2323 register I32 tmp;
463ee0b2 2324 STRLEN len;
161b471a
NIS
2325 GV *gv = Nullgv;
2326 GV **gvp = 0;
aa7440fb 2327 bool bof = FALSE;
1d239bbb 2328 I32 orig_keyword = 0;
a687059c 2329
bbf60fe6
DM
2330 DEBUG_T( {
2331 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2332 lex_state_names[PL_lex_state]);
2333 } );
02aa26ce 2334 /* check if there's an identifier for us to look at */
ba979b31 2335 if (PL_pending_ident)
bbf60fe6 2336 return REPORT(S_pending_ident(aTHX));
bbce6d69 2337
02aa26ce
NT
2338 /* no identifier pending identification */
2339
3280af22 2340 switch (PL_lex_state) {
79072805
LW
2341#ifdef COMMENTARY
2342 case LEX_NORMAL: /* Some compilers will produce faster */
2343 case LEX_INTERPNORMAL: /* code if we comment these out. */
2344 break;
2345#endif
2346
09bef843 2347 /* when we've already built the next token, just pull it out of the queue */
79072805 2348 case LEX_KNOWNEXT:
3280af22
NIS
2349 PL_nexttoke--;
2350 yylval = PL_nextval[PL_nexttoke];
2351 if (!PL_nexttoke) {
2352 PL_lex_state = PL_lex_defer;
2353 PL_expect = PL_lex_expect;
2354 PL_lex_defer = LEX_NORMAL;
463ee0b2 2355 }
607df283 2356 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2357 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2358 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2359
bbf60fe6 2360 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2361
02aa26ce 2362 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2363 when we get here, PL_bufptr is at the \
02aa26ce 2364 */
79072805
LW
2365 case LEX_INTERPCASEMOD:
2366#ifdef DEBUGGING
3280af22 2367 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2368 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2369#endif
02aa26ce 2370 /* handle \E or end of string */
3280af22 2371 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2372 /* if at a \E */
3280af22 2373 if (PL_lex_casemods) {
f54cb97a 2374 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2375 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2376
3792a11b
NC
2377 if (PL_bufptr != PL_bufend
2378 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2379 PL_bufptr += 2;
2380 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2381 }
bbf60fe6 2382 return REPORT(')');
79072805 2383 }
3280af22
NIS
2384 if (PL_bufptr != PL_bufend)
2385 PL_bufptr += 2;
2386 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2387 return yylex();
79072805
LW
2388 }
2389 else {
607df283 2390 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2391 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2392 s = PL_bufptr + 1;
6e909404
JH
2393 if (s[1] == '\\' && s[2] == 'E') {
2394 PL_bufptr = s + 3;
2395 PL_lex_state = LEX_INTERPCONCAT;
2396 return yylex();
a0d0e21e 2397 }
6e909404
JH
2398 else {
2399 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2400 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2401 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2402 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2403 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2404 return REPORT(')');
6e909404
JH
2405 }
2406 if (PL_lex_casemods > 10)
2407 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2408 PL_lex_casestack[PL_lex_casemods++] = *s;
2409 PL_lex_casestack[PL_lex_casemods] = '\0';
2410 PL_lex_state = LEX_INTERPCONCAT;
2411 PL_nextval[PL_nexttoke].ival = 0;
2412 force_next('(');
2413 if (*s == 'l')
2414 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2415 else if (*s == 'u')
2416 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2417 else if (*s == 'L')
2418 PL_nextval[PL_nexttoke].ival = OP_LC;
2419 else if (*s == 'U')
2420 PL_nextval[PL_nexttoke].ival = OP_UC;
2421 else if (*s == 'Q')
2422 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2423 else
2424 Perl_croak(aTHX_ "panic: yylex");
2425 PL_bufptr = s + 1;
a0d0e21e 2426 }
79072805 2427 force_next(FUNC);
3280af22
NIS
2428 if (PL_lex_starts) {
2429 s = PL_bufptr;
2430 PL_lex_starts = 0;
131b3ad0
DM
2431 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2432 if (PL_lex_casemods == 1 && PL_lex_inpat)
2433 OPERATOR(',');
2434 else
2435 Aop(OP_CONCAT);
79072805
LW
2436 }
2437 else
cea2e8a9 2438 return yylex();
79072805
LW
2439 }
2440
55497cff 2441 case LEX_INTERPPUSH:
bbf60fe6 2442 return REPORT(sublex_push());
55497cff 2443
79072805 2444 case LEX_INTERPSTART:
3280af22 2445 if (PL_bufptr == PL_bufend)
bbf60fe6 2446 return REPORT(sublex_done());
607df283 2447 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2448 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2449 PL_expect = XTERM;
2450 PL_lex_dojoin = (*PL_bufptr == '@');
2451 PL_lex_state = LEX_INTERPNORMAL;
2452 if (PL_lex_dojoin) {
2453 PL_nextval[PL_nexttoke].ival = 0;
79072805 2454 force_next(',');
a0d0e21e 2455 force_ident("\"", '$');
3280af22 2456 PL_nextval[PL_nexttoke].ival = 0;
79072805 2457 force_next('$');
3280af22 2458 PL_nextval[PL_nexttoke].ival = 0;
79072805 2459 force_next('(');
3280af22 2460 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2461 force_next(FUNC);
2462 }
3280af22
NIS
2463 if (PL_lex_starts++) {
2464 s = PL_bufptr;
131b3ad0
DM
2465 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2466 if (!PL_lex_casemods && PL_lex_inpat)
2467 OPERATOR(',');
2468 else
2469 Aop(OP_CONCAT);
79072805 2470 }
cea2e8a9 2471 return yylex();
79072805
LW
2472
2473 case LEX_INTERPENDMAYBE:
3280af22
NIS
2474 if (intuit_more(PL_bufptr)) {
2475 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2476 break;
2477 }
2478 /* FALL THROUGH */
2479
2480 case LEX_INTERPEND:
3280af22
NIS
2481 if (PL_lex_dojoin) {
2482 PL_lex_dojoin = FALSE;
2483 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2484 return REPORT(')');
79072805 2485 }
43a16006 2486 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2487 && SvEVALED(PL_lex_repl))
43a16006 2488 {
e9fa98b2 2489 if (PL_bufptr != PL_bufend)
cea2e8a9 2490 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2491 PL_lex_repl = Nullsv;
2492 }
79072805
LW
2493 /* FALLTHROUGH */
2494 case LEX_INTERPCONCAT:
2495#ifdef DEBUGGING
3280af22 2496 if (PL_lex_brackets)
cea2e8a9 2497 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2498#endif
3280af22 2499 if (PL_bufptr == PL_bufend)
bbf60fe6 2500 return REPORT(sublex_done());
79072805 2501
3280af22
NIS
2502 if (SvIVX(PL_linestr) == '\'') {
2503 SV *sv = newSVsv(PL_linestr);
2504 if (!PL_lex_inpat)
76e3520e 2505 sv = tokeq(sv);
3280af22 2506 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2507 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2508 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2509 s = PL_bufend;
79072805
LW
2510 }
2511 else {
3280af22 2512 s = scan_const(PL_bufptr);
79072805 2513 if (*s == '\\')
3280af22 2514 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2515 else
3280af22 2516 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2517 }
2518
3280af22
NIS
2519 if (s != PL_bufptr) {
2520 PL_nextval[PL_nexttoke] = yylval;
2521 PL_expect = XTERM;
79072805 2522 force_next(THING);
131b3ad0
DM
2523 if (PL_lex_starts++) {
2524 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2525 if (!PL_lex_casemods && PL_lex_inpat)
2526 OPERATOR(',');
2527 else
2528 Aop(OP_CONCAT);
2529 }
79072805 2530 else {
3280af22 2531 PL_bufptr = s;
cea2e8a9 2532 return yylex();
79072805
LW
2533 }
2534 }
2535
cea2e8a9 2536 return yylex();
a0d0e21e 2537 case LEX_FORMLINE:
3280af22
NIS
2538 PL_lex_state = LEX_NORMAL;
2539 s = scan_formline(PL_bufptr);
2540 if (!PL_lex_formbrack)
a0d0e21e
LW
2541 goto rightbracket;
2542 OPERATOR(';');
79072805
LW
2543 }
2544
3280af22
NIS
2545 s = PL_bufptr;
2546 PL_oldoldbufptr = PL_oldbufptr;
2547 PL_oldbufptr = s;
607df283 2548 DEBUG_T( {
bbf60fe6 2549 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2550 exp_name[PL_expect], s);
5f80b19c 2551 } );
463ee0b2
LW
2552
2553 retry:
378cc40b
LW
2554 switch (*s) {
2555 default:
7e2040f0 2556 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2557 goto keylookup;
cea2e8a9 2558 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2559 case 4:
2560 case 26:
2561 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2562 case 0:
3280af22
NIS
2563 if (!PL_rsfp) {
2564 PL_last_uni = 0;
2565 PL_last_lop = 0;
c5ee2135
WL
2566 if (PL_lex_brackets) {
2567 if (PL_lex_formbrack)
2568 yyerror("Format not terminated");
2569 else
2570 yyerror("Missing right curly or square bracket");
2571 }
4e553d73 2572 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2573 "### Tokener got EOF\n");
5f80b19c 2574 } );
79072805 2575 TOKEN(0);
463ee0b2 2576 }
3280af22 2577 if (s++ < PL_bufend)
a687059c 2578 goto retry; /* ignore stray nulls */
3280af22
NIS
2579 PL_last_uni = 0;
2580 PL_last_lop = 0;
2581 if (!PL_in_eval && !PL_preambled) {
2582 PL_preambled = TRUE;
2583 sv_setpv(PL_linestr,incl_perldb());
2584 if (SvCUR(PL_linestr))
4147a61b 2585 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2586 if (PL_preambleav){
2587 while(AvFILLp(PL_preambleav) >= 0) {
2588 SV *tmpsv = av_shift(PL_preambleav);
2589 sv_catsv(PL_linestr, tmpsv);
4147a61b 2590 sv_catpvn(PL_linestr, ";", 1);
91b7def8 2591 sv_free(tmpsv);
2592 }
3280af22
NIS
2593 sv_free((SV*)PL_preambleav);
2594 PL_preambleav = NULL;
91b7def8 2595 }
3280af22
NIS
2596 if (PL_minus_n || PL_minus_p) {
2597 sv_catpv(PL_linestr, "LINE: while (<>) {");
2598 if (PL_minus_l)
2599 sv_catpv(PL_linestr,"chomp;");
2600 if (PL_minus_a) {
3280af22 2601 if (PL_minus_F) {
3792a11b
NC
2602 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2603 || *PL_splitstr == '"')
3280af22 2604 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2605 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2606 else {
c8ef6a4b
NC
2607 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2608 bytes can be used as quoting characters. :-) */
48c4c863
NC
2609 /* The count here deliberately includes the NUL
2610 that terminates the C string constant. This
2611 embeds the opening NUL into the string. */
dd374669 2612 const char *splits = PL_splitstr;
9a3fb652 2613 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2614 do {
2615 /* Need to \ \s */
dd374669
AL
2616 if (*splits == '\\')
2617 sv_catpvn(PL_linestr, splits, 1);
2618 sv_catpvn(PL_linestr, splits, 1);
2619 } while (*splits++);
48c4c863
NC
2620 /* This loop will embed the trailing NUL of
2621 PL_linestr as the last thing it does before
2622 terminating. */
4fccd7c6 2623 sv_catpvn(PL_linestr, ");", 2);
54310121 2624 }
2304df62
AD
2625 }
2626 else
75c72d73 2627 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2628 }
79072805 2629 }
4147a61b 2630 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2631 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2632 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2633 PL_last_lop = PL_last_uni = Nullch;
3280af22 2634 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2635 SV *sv = NEWSV(85,0);
2636
2637 sv_upgrade(sv, SVt_PVMG);
3280af22 2638 sv_setsv(sv,PL_linestr);
0ac0412a 2639 (void)SvIOK_on(sv);
45977657 2640 SvIV_set(sv, 0);
57843af0 2641 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2642 }
79072805 2643 goto retry;
a687059c 2644 }
e929a76b 2645 do {
aa7440fb 2646 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2647 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2648 fake_eof:
2649 if (PL_rsfp) {
2650 if (PL_preprocess && !PL_in_eval)
2651 (void)PerlProc_pclose(PL_rsfp);
2652 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2653 PerlIO_clearerr(PL_rsfp);
2654 else
2655 (void)PerlIO_close(PL_rsfp);
2656 PL_rsfp = Nullfp;
2657 PL_doextract = FALSE;
2658 }
2659 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2660 sv_setpv(PL_linestr,PL_minus_p
2661 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2662 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2663 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2664 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2665 PL_minus_n = PL_minus_p = 0;
2666 goto retry;
2667 }
2668 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2669 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2670 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2671 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2672 }
7aa207d6
JH
2673 /* If it looks like the start of a BOM or raw UTF-16,
2674 * check if it in fact is. */
2675 else if (bof &&
2676 (*s == 0 ||
2677 *(U8*)s == 0xEF ||
2678 *(U8*)s >= 0xFE ||
2679 s[1] == 0)) {
226017aa 2680#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2681# ifdef __GNU_LIBRARY__
2682# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2683# define FTELL_FOR_PIPE_IS_BROKEN
2684# endif
e3f494f1
JH
2685# else
2686# ifdef __GLIBC__
2687# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2688# define FTELL_FOR_PIPE_IS_BROKEN
2689# endif
2690# endif
226017aa
DD
2691# endif
2692#endif
2693#ifdef FTELL_FOR_PIPE_IS_BROKEN
2694 /* This loses the possibility to detect the bof
2695 * situation on perl -P when the libc5 is being used.
2696 * Workaround? Maybe attach some extra state to PL_rsfp?
2697 */
2698 if (!PL_preprocess)
7e28d3af 2699 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2700#else
eb160463 2701 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2702#endif
7e28d3af 2703 if (bof) {
3280af22 2704 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2705 s = swallow_bom((U8*)s);
e929a76b 2706 }
378cc40b 2707 }
3280af22 2708 if (PL_doextract) {
a0d0e21e
LW
2709 /* Incest with pod. */
2710 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2711 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2712 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2713 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2714 PL_last_lop = PL_last_uni = Nullch;
3280af22 2715 PL_doextract = FALSE;
a0d0e21e 2716 }
4e553d73 2717 }
463ee0b2 2718 incline(s);
3280af22
NIS
2719 } while (PL_doextract);
2720 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2721 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2722 SV *sv = NEWSV(85,0);
a687059c 2723
93a17b20 2724 sv_upgrade(sv, SVt_PVMG);
3280af22 2725 sv_setsv(sv,PL_linestr);
0ac0412a 2726 (void)SvIOK_on(sv);
45977657 2727 SvIV_set(sv, 0);
57843af0 2728 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2729 }
3280af22 2730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2731 PL_last_lop = PL_last_uni = Nullch;
57843af0 2732 if (CopLINE(PL_curcop) == 1) {
3280af22 2733 while (s < PL_bufend && isSPACE(*s))
79072805 2734 s++;
a0d0e21e 2735 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2736 s++;
44a8e56a 2737 d = Nullch;
3280af22 2738 if (!PL_in_eval) {
44a8e56a 2739 if (*s == '#' && *(s+1) == '!')
2740 d = s + 2;
2741#ifdef ALTERNATE_SHEBANG
2742 else {
bfed75c6 2743 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 2744 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2745 d = s + (sizeof(as) - 1);
2746 }
2747#endif /* ALTERNATE_SHEBANG */
2748 }
2749 if (d) {
b8378b72 2750 char *ipath;
774d564b 2751 char *ipathend;
b8378b72 2752
774d564b 2753 while (isSPACE(*d))
b8378b72
CS
2754 d++;
2755 ipath = d;
774d564b 2756 while (*d && !isSPACE(*d))
2757 d++;
2758 ipathend = d;
2759
2760#ifdef ARG_ZERO_IS_SCRIPT
2761 if (ipathend > ipath) {
2762 /*
2763 * HP-UX (at least) sets argv[0] to the script name,
2764 * which makes $^X incorrect. And Digital UNIX and Linux,
2765 * at least, set argv[0] to the basename of the Perl
2766 * interpreter. So, having found "#!", we'll set it right.
2767 */
ee2f7564 2768 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2769 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2770 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2771 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2772 SvSETMAGIC(x);
2773 }
556c1dec
JH
2774 else {
2775 STRLEN blen;
2776 STRLEN llen;
f54cb97a
AL
2777 const char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2778 const char *lstart = SvPV(x,llen);
556c1dec
JH
2779 if (llen < blen) {
2780 bstart += blen - llen;
2781 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2782 sv_setpvn(x, ipath, ipathend - ipath);
2783 SvSETMAGIC(x);
2784 }
2785 }
2786 }
774d564b 2787 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2788 }
774d564b 2789#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2790
2791 /*
2792 * Look for options.
2793 */
748a9306 2794 d = instr(s,"perl -");
84e30d1a 2795 if (!d) {
748a9306 2796 d = instr(s,"perl");
84e30d1a
GS
2797#if defined(DOSISH)
2798 /* avoid getting into infinite loops when shebang
2799 * line contains "Perl" rather than "perl" */
2800 if (!d) {
2801 for (d = ipathend-4; d >= ipath; --d) {
2802 if ((*d == 'p' || *d == 'P')
2803 && !ibcmp(d, "perl", 4))
2804 {
2805 break;
2806 }
2807 }
2808 if (d < ipath)
2809 d = Nullch;
2810 }
2811#endif
2812 }
44a8e56a 2813#ifdef ALTERNATE_SHEBANG
2814 /*
2815 * If the ALTERNATE_SHEBANG on this system starts with a
2816 * character that can be part of a Perl expression, then if
2817 * we see it but not "perl", we're probably looking at the
2818 * start of Perl code, not a request to hand off to some
2819 * other interpreter. Similarly, if "perl" is there, but
2820 * not in the first 'word' of the line, we assume the line
2821 * contains the start of the Perl program.
44a8e56a 2822 */
2823 if (d && *s != '#') {
f54cb97a 2824 const char *c = ipath;
44a8e56a 2825 while (*c && !strchr("; \t\r\n\f\v#", *c))
2826 c++;
2827 if (c < d)
2828 d = Nullch; /* "perl" not in first word; ignore */
2829 else
2830 *s = '#'; /* Don't try to parse shebang line */
2831 }
774d564b 2832#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2833#ifndef MACOS_TRADITIONAL
748a9306 2834 if (!d &&
44a8e56a 2835 *s == '#' &&
774d564b 2836 ipathend > ipath &&
3280af22 2837 !PL_minus_c &&
748a9306 2838 !instr(s,"indir") &&
3280af22 2839 instr(PL_origargv[0],"perl"))
748a9306 2840 {
27da23d5 2841 dVAR;
9f68db38 2842 char **newargv;
9f68db38 2843
774d564b 2844 *ipathend = '\0';
2845 s = ipathend + 1;
3280af22 2846 while (s < PL_bufend && isSPACE(*s))
9f68db38 2847 s++;
3280af22
NIS
2848 if (s < PL_bufend) {
2849 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2850 newargv[1] = s;
3280af22 2851 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2852 s++;
2853 *s = '\0';
3280af22 2854 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2855 }
2856 else
3280af22 2857 newargv = PL_origargv;
774d564b 2858 newargv[0] = ipath;
b35112e7 2859 PERL_FPU_PRE_EXEC
b4748376 2860 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2861 PERL_FPU_POST_EXEC
cea2e8a9 2862 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2863 }
bf4acbe4 2864#endif
748a9306 2865 if (d) {
f54cb97a
AL
2866 const U32 oldpdb = PL_perldb;
2867 const bool oldn = PL_minus_n;
2868 const bool oldp = PL_minus_p;
748a9306
LW
2869
2870 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2871 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2872
2873 if (*d++ == '-') {
f54cb97a 2874 const bool switches_done = PL_doswitches;
8cc95fdb 2875 do {
3ffe3ee4 2876 if (*d == 'M' || *d == 'm' || *d == 'C') {
f54cb97a 2877 const char *m = d;
8cc95fdb 2878 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2879 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2880 (int)(d - m), m);
2881 }
2882 d = moreswitches(d);
2883 } while (d);
f0b2cf55
YST
2884 if (PL_doswitches && !switches_done) {
2885 int argc = PL_origargc;
2886 char **argv = PL_origargv;
2887 do {
2888 argc--,argv++;
2889 } while (argc && argv[0][0] == '-' && argv[0][1]);
2890 init_argv_symbols(argc,argv);
2891 }
155aba94
GS
2892 if ((PERLDB_LINE && !oldpdb) ||
2893 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2894 /* if we have already added "LINE: while (<>) {",
2895 we must not do it again */
748a9306 2896 {
c69006e4 2897 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2898 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2899 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2900 PL_last_lop = PL_last_uni = Nullch;
3280af22 2901 PL_preambled = FALSE;
84902520 2902 if (PERLDB_LINE)
3280af22 2903 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2904 goto retry;
2905 }
a11ec5a9
RGS
2906 if (PL_doswitches && !switches_done) {
2907 int argc = PL_origargc;
2908 char **argv = PL_origargv;
2909 do {
2910 argc--,argv++;
2911 } while (argc && argv[0][0] == '-' && argv[0][1]);
2912 init_argv_symbols(argc,argv);
2913 }
a0d0e21e 2914 }
79072805 2915 }
9f68db38 2916 }
79072805 2917 }
3280af22
NIS
2918 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2919 PL_bufptr = s;
2920 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2921 return yylex();
ae986130 2922 }
378cc40b 2923 goto retry;
4fdae800 2924 case '\r':
6a27c188 2925#ifdef PERL_STRICT_CR
cea2e8a9 2926 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2927 Perl_croak(aTHX_
cc507455 2928 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2929#endif
4fdae800 2930 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2931#ifdef MACOS_TRADITIONAL
2932 case '\312':
2933#endif
378cc40b
LW
2934 s++;
2935 goto retry;
378cc40b 2936 case '#':
e929a76b 2937 case '\n':
3280af22 2938 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2939 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2940 /* handle eval qq[#line 1 "foo"\n ...] */
2941 CopLINE_dec(PL_curcop);
2942 incline(s);
2943 }
3280af22 2944 d = PL_bufend;
a687059c 2945 while (s < d && *s != '\n')
378cc40b 2946 s++;
0f85fab0 2947 if (s < d)
378cc40b 2948 s++;
78c267c1 2949 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2950 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2951 incline(s);
3280af22
NIS
2952 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2953 PL_bufptr = s;
2954 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2955 return yylex();
a687059c 2956 }
378cc40b 2957 }
a687059c 2958 else {
378cc40b 2959 *s = '\0';
3280af22 2960 PL_bufend = s;
a687059c 2961 }
378cc40b
LW
2962 goto retry;
2963 case '-':
79072805 2964 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2965 I32 ftst = 0;
2966
378cc40b 2967 s++;
3280af22 2968 PL_bufptr = s;
748a9306
LW
2969 tmp = *s++;
2970
bf4acbe4 2971 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2972 s++;
2973
2974 if (strnEQ(s,"=>",2)) {
3280af22 2975 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2976 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2977 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2978 } );
748a9306
LW
2979 OPERATOR('-'); /* unary minus */
2980 }
3280af22 2981 PL_last_uni = PL_oldbufptr;
748a9306 2982 switch (tmp) {
e5edeb50
JH
2983 case 'r': ftst = OP_FTEREAD; break;
2984 case 'w': ftst = OP_FTEWRITE; break;
2985 case 'x': ftst = OP_FTEEXEC; break;
2986 case 'o': ftst = OP_FTEOWNED; break;
2987 case 'R': ftst = OP_FTRREAD; break;
2988 case 'W': ftst = OP_FTRWRITE; break;
2989 case 'X': ftst = OP_FTREXEC; break;
2990 case 'O': ftst = OP_FTROWNED; break;
2991 case 'e': ftst = OP_FTIS; break;
2992 case 'z': ftst = OP_FTZERO; break;
2993 case 's': ftst = OP_FTSIZE; break;
2994 case 'f': ftst = OP_FTFILE; break;
2995 case 'd': ftst = OP_FTDIR; break;
2996 case 'l': ftst = OP_FTLINK; break;
2997 case 'p': ftst = OP_FTPIPE; break;
2998 case 'S': ftst = OP_FTSOCK; break;
2999 case 'u': ftst = OP_FTSUID; break;
3000 case 'g': ftst = OP_FTSGID; break;
3001 case 'k': ftst = OP_FTSVTX; break;
3002 case 'b': ftst = OP_FTBLK; break;
3003 case 'c': ftst = OP_FTCHR; break;
3004 case 't': ftst = OP_FTTTY; break;
3005 case 'T': ftst = OP_FTTEXT; break;
3006 case 'B': ftst = OP_FTBINARY; break;
3007 case 'M': case 'A': case 'C':
3008 gv_fetchpv("\024",TRUE, SVt_PV);
3009 switch (tmp) {
3010 case 'M': ftst = OP_FTMTIME; break;
3011 case 'A': ftst = OP_FTATIME; break;
3012 case 'C': ftst = OP_FTCTIME; break;
3013 default: break;
3014 }
3015 break;
378cc40b 3016 default:
378cc40b
LW
3017 break;
3018 }
e5edeb50 3019 if (ftst) {
eb160463 3020 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3021 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3022 "### Saw file test %c\n", (int)ftst);
5f80b19c 3023 } );
e5edeb50
JH
3024 FTST(ftst);
3025 }
3026 else {
3027 /* Assume it was a minus followed by a one-letter named
3028 * subroutine call (or a -bareword), then. */
95c31fe3 3029 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3030 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3031 (int) tmp);
5f80b19c 3032 } );
3cf7b4c4 3033 s = --PL_bufptr;
e5edeb50 3034 }
378cc40b 3035 }
a687059c
LW
3036 tmp = *s++;
3037 if (*s == tmp) {
3038 s++;
3280af22 3039 if (PL_expect == XOPERATOR)
79072805
LW
3040 TERM(POSTDEC);
3041 else
3042 OPERATOR(PREDEC);
3043 }
3044 else if (*s == '>') {
3045 s++;
3046 s = skipspace(s);
7e2040f0 3047 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3048 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3049 TOKEN(ARROW);
79072805 3050 }
748a9306
LW
3051 else if (*s == '$')
3052 OPERATOR(ARROW);
463ee0b2 3053 else
748a9306 3054 TERM(ARROW);
a687059c 3055 }
3280af22 3056 if (PL_expect == XOPERATOR)
79072805
LW
3057 Aop(OP_SUBTRACT);
3058 else {
3280af22 3059 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3060 check_uni();
79072805 3061 OPERATOR('-'); /* unary minus */
2f3197b3 3062 }
79072805 3063
378cc40b 3064 case '+':
a687059c
LW
3065 tmp = *s++;
3066 if (*s == tmp) {
378cc40b 3067 s++;
3280af22 3068 if (PL_expect == XOPERATOR)
79072805
LW
3069 TERM(POSTINC);
3070 else
3071 OPERATOR(PREINC);
378cc40b 3072 }
3280af22 3073 if (PL_expect == XOPERATOR)
79072805
LW
3074 Aop(OP_ADD);
3075 else {
3280af22 3076 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3077 check_uni();
a687059c 3078 OPERATOR('+');
2f3197b3 3079 }
a687059c 3080
378cc40b 3081 case '*':
3280af22
NIS
3082 if (PL_expect != XOPERATOR) {
3083 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3084 PL_expect = XOPERATOR;
3085 force_ident(PL_tokenbuf, '*');
3086 if (!*PL_tokenbuf)
a0d0e21e 3087 PREREF('*');
79072805 3088 TERM('*');
a687059c 3089 }
79072805
LW
3090 s++;
3091 if (*s == '*') {
a687059c 3092 s++;
79072805 3093 PWop(OP_POW);
a687059c 3094 }
79072805
LW
3095 Mop(OP_MULTIPLY);
3096
378cc40b 3097 case '%':
3280af22 3098 if (PL_expect == XOPERATOR) {
bbce6d69 3099 ++s;
3100 Mop(OP_MODULO);
a687059c 3101 }
3280af22
NIS
3102 PL_tokenbuf[0] = '%';
3103 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3104 if (!PL_tokenbuf[1]) {
bbce6d69 3105 PREREF('%');
a687059c 3106 }
3280af22 3107 PL_pending_ident = '%';
bbce6d69 3108 TERM('%');
a687059c 3109
378cc40b 3110 case '^':
79072805 3111 s++;
a0d0e21e 3112 BOop(OP_BIT_XOR);
79072805 3113 case '[':
3280af22 3114 PL_lex_brackets++;
79072805 3115 /* FALL THROUGH */
378cc40b 3116 case '~':
378cc40b 3117 case ',':
378cc40b
LW
3118 tmp = *s++;
3119 OPERATOR(tmp);
a0d0e21e
LW
3120 case ':':
3121 if (s[1] == ':') {
3122 len = 0;
3123 goto just_a_word;
3124 }
3125 s++;
09bef843
SB
3126 switch (PL_expect) {
3127 OP *attrs;
3128 case XOPERATOR:
3129 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3130 break;
3131 PL_bufptr = s; /* update in case we back off */
3132 goto grabattrs;
3133 case XATTRBLOCK:
3134 PL_expect = XBLOCK;
3135 goto grabattrs;
3136 case XATTRTERM:
3137 PL_expect = XTERMBLOCK;
3138 grabattrs:
3139 s = skipspace(s);
3140 attrs = Nullop;
7e2040f0 3141 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3142 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3143 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3144 if (tmp < 0) tmp = -tmp;
3145 switch (tmp) {
3146 case KEY_or:
3147 case KEY_and:
c963b151 3148 case KEY_err:
f9829d6b
GS
3149 case KEY_for:
3150 case KEY_unless:
3151 case KEY_if:
3152 case KEY_while:
3153 case KEY_until:
3154 goto got_attrs;
3155 default:
3156 break;
3157 }
3158 }
09bef843
SB
3159 if (*d == '(') {
3160 d = scan_str(d,TRUE,TRUE);
3161 if (!d) {
09bef843
SB
3162 /* MUST advance bufptr here to avoid bogus
3163 "at end of line" context messages from yyerror().
3164 */
3165 PL_bufptr = s + len;
3166 yyerror("Unterminated attribute parameter in attribute list");
3167 if (attrs)
3168 op_free(attrs);
bbf60fe6 3169 return REPORT(0); /* EOF indicator */
09bef843
SB
3170 }
3171 }
3172 if (PL_lex_stuff) {
3173 SV *sv = newSVpvn(s, len);
3174 sv_catsv(sv, PL_lex_stuff);
3175 attrs = append_elem(OP_LIST, attrs,
3176 newSVOP(OP_CONST, 0, sv));
3177 SvREFCNT_dec(PL_lex_stuff);
3178 PL_lex_stuff = Nullsv;
3179 }
3180 else {
371fce9b
DM
3181 if (len == 6 && strnEQ(s, "unique", len)) {
3182 if (PL_in_my == KEY_our)
3183#ifdef USE_ITHREADS
3184 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3185#else
3186 ; /* skip to avoid loading attributes.pm */
3187#endif
bfed75c6 3188 else
371fce9b
DM
3189 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3190 }
3191
d3cea301
SB
3192 /* NOTE: any CV attrs applied here need to be part of
3193 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3194 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3195 CvLVALUE_on(PL_compcv);
3196 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3197 CvLOCKED_on(PL_compcv);
3198 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3199 CvMETHOD_on(PL_compcv);
06492da6
SF
3200 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3201 CvASSERTION_on(PL_compcv);
78f9721b
SM
3202 /* After we've set the flags, it could be argued that
3203 we don't need to do the attributes.pm-based setting
3204 process, and shouldn't bother appending recognized
d3cea301
SB
3205 flags. To experiment with that, uncomment the
3206 following "else". (Note that's already been
3207 uncommented. That keeps the above-applied built-in
3208 attributes from being intercepted (and possibly
3209 rejected) by a package's attribute routines, but is
3210 justified by the performance win for the common case
3211 of applying only built-in attributes.) */
0256094b 3212 else
78f9721b
SM
3213 attrs = append_elem(OP_LIST, attrs,
3214 newSVOP(OP_CONST, 0,
3215 newSVpvn(s, len)));
09bef843
SB
3216 }
3217 s = skipspace(d);
0120eecf 3218 if (*s == ':' && s[1] != ':')
09bef843 3219 s = skipspace(s+1);
0120eecf
GS
3220 else if (s == d)
3221 break; /* require real whitespace or :'s */
09bef843 3222 }
f9829d6b 3223 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3224 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3225 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3226 /* If here for an expression, and parsed no attrs, back off. */
3227 if (tmp == '=' && !attrs) {
3228 s = PL_bufptr;
3229 break;
3230 }
3231 /* MUST advance bufptr here to avoid bogus "at end of line"
3232 context messages from yyerror().
3233 */
3234 PL_bufptr = s;
3235 if (!*s)
3236 yyerror("Unterminated attribute list");
3237 else
3238 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3239 q, *s, q));
3240 if (attrs)
3241 op_free(attrs);
3242 OPERATOR(':');
3243 }
f9829d6b 3244 got_attrs:
09bef843
SB
3245 if (attrs) {
3246 PL_nextval[PL_nexttoke].opval = attrs;
3247 force_next(THING);
3248 }
3249 TOKEN(COLONATTR);
3250 }
a0d0e21e 3251 OPERATOR(':');
8990e307
LW
3252 case '(':
3253 s++;
3280af22
NIS
3254 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3255 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3256 else
3280af22 3257 PL_expect = XTERM;
4a202259 3258 s = skipspace(s);
a0d0e21e 3259 TOKEN('(');
378cc40b 3260 case ';':
f4dd75d9 3261 CLINE;
378cc40b
LW
3262 tmp = *s++;
3263 OPERATOR(tmp);
3264 case ')':
378cc40b 3265 tmp = *s++;
16d20bd9
AD
3266 s = skipspace(s);
3267 if (*s == '{')
3268 PREBLOCK(tmp);
378cc40b 3269 TERM(tmp);
79072805
LW
3270 case ']':
3271 s++;
3280af22 3272 if (PL_lex_brackets <= 0)
d98d5fff 3273 yyerror("Unmatched right square bracket");
463ee0b2 3274 else
3280af22
NIS
3275 --PL_lex_brackets;
3276 if (PL_lex_state == LEX_INTERPNORMAL) {
3277 if (PL_lex_brackets == 0) {
a0d0e21e 3278 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3279 PL_lex_state = LEX_INTERPEND;
79072805
LW
3280 }
3281 }
4633a7c4 3282 TERM(']');
79072805
LW
3283 case '{':
3284 leftbracket:
79072805 3285 s++;
3280af22 3286 if (PL_lex_brackets > 100) {
8edd5f42 3287 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3288 }
3280af22 3289 switch (PL_expect) {
a0d0e21e 3290 case XTERM:
3280af22 3291 if (PL_lex_formbrack) {
a0d0e21e
LW
3292 s--;
3293 PRETERMBLOCK(DO);
3294 }
3280af22
NIS
3295 if (PL_oldoldbufptr == PL_last_lop)
3296 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3297 else
3280af22 3298 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3299 OPERATOR(HASHBRACK);
a0d0e21e 3300 case XOPERATOR:
bf4acbe4 3301 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3302 s++;
44a8e56a 3303 d = s;
3280af22
NIS
3304 PL_tokenbuf[0] = '\0';
3305 if (d < PL_bufend && *d == '-') {
3306 PL_tokenbuf[0] = '-';
44a8e56a 3307 d++;
bf4acbe4 3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3309 d++;
3310 }
7e2040f0 3311 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3312 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3313 FALSE, &len);
bf4acbe4 3314 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3315 d++;
3316 if (*d == '}') {
f54cb97a 3317 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3318 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3319 if (minus)
3320 force_next('-');
748a9306
LW
3321 }
3322 }
3323 /* FALL THROUGH */
09bef843 3324 case XATTRBLOCK:
748a9306 3325 case XBLOCK:
3280af22
NIS
3326 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3327 PL_expect = XSTATE;
a0d0e21e 3328 break;
09bef843 3329 case XATTRTERM:
a0d0e21e 3330 case XTERMBLOCK:
3280af22
NIS
3331 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3332 PL_expect = XSTATE;
a0d0e21e
LW
3333 break;
3334 default: {
f54cb97a 3335 const char *t;
3280af22
NIS
3336 if (PL_oldoldbufptr == PL_last_lop)
3337 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3338 else
3280af22 3339 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3340 s = skipspace(s);
8452ff4b
SB
3341 if (*s == '}') {
3342 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3343 PL_expect = XTERM;
3344 /* This hack is to get the ${} in the message. */
3345 PL_bufptr = s+1;
3346 yyerror("syntax error");
3347 break;
3348 }
a0d0e21e 3349 OPERATOR(HASHBRACK);
8452ff4b 3350 }
b8a4b1be
GS
3351 /* This hack serves to disambiguate a pair of curlies
3352 * as being a block or an anon hash. Normally, expectation
3353 * determines that, but in cases where we're not in a
3354 * position to expect anything in particular (like inside
3355 * eval"") we have to resolve the ambiguity. This code
3356 * covers the case where the first term in the curlies is a
3357 * quoted string. Most other cases need to be explicitly
a0288114 3358 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3359 * curly in order to force resolution as an anon hash.
3360 *
3361 * XXX should probably propagate the outer expectation
3362 * into eval"" to rely less on this hack, but that could
3363 * potentially break current behavior of eval"".
3364 * GSAR 97-07-21
3365 */
3366 t = s;
3367 if (*s == '\'' || *s == '"' || *s == '`') {
3368 /* common case: get past first string, handling escapes */
3280af22 3369 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3370 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3371 t++;
3372 t++;
a0d0e21e 3373 }
b8a4b1be 3374 else if (*s == 'q') {
3280af22 3375 if (++t < PL_bufend
b8a4b1be 3376 && (!isALNUM(*t)
3280af22 3377 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3378 && !isALNUM(*t))))
3379 {
abc667d1 3380 /* skip q//-like construct */
f54cb97a 3381 const char *tmps;
b8a4b1be
GS
3382 char open, close, term;
3383 I32 brackets = 1;
3384
3280af22 3385 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3386 t++;
abc667d1
DM
3387 /* check for q => */
3388 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3389 OPERATOR(HASHBRACK);
3390 }
b8a4b1be
GS
3391 term = *t;
3392 open = term;
3393 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3394 term = tmps[5];
3395 close = term;
3396 if (open == close)
3280af22
NIS
3397 for (t++; t < PL_bufend; t++) {
3398 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3399 t++;
6d07e5e9 3400 else if (*t == open)
b8a4b1be
GS
3401 break;
3402 }
abc667d1 3403 else {
3280af22
NIS
3404 for (t++; t < PL_bufend; t++) {
3405 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3406 t++;
6d07e5e9 3407 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3408 break;
3409 else if (*t == open)
3410 brackets++;
3411 }
abc667d1
DM
3412 }
3413 t++;
b8a4b1be 3414 }
abc667d1
DM
3415 else
3416 /* skip plain q word */
3417 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3418 t += UTF8SKIP(t);
a0d0e21e 3419 }
7e2040f0 3420 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3421 t += UTF8SKIP(t);
7e2040f0 3422 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3423 t += UTF8SKIP(t);
a0d0e21e 3424 }
3280af22 3425 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3426 t++;
b8a4b1be
GS
3427 /* if comma follows first term, call it an anon hash */
3428 /* XXX it could be a comma expression with loop modifiers */
3280af22 3429 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3430 || (*t == '=' && t[1] == '>')))
a0d0e21e 3431 OPERATOR(HASHBRACK);
3280af22 3432 if (PL_expect == XREF)
4e4e412b 3433 PL_expect = XTERM;
a0d0e21e 3434 else {
3280af22
NIS
3435 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3436 PL_expect = XSTATE;
a0d0e21e 3437 }
8990e307 3438 }
a0d0e21e 3439 break;
463ee0b2 3440 }
57843af0 3441 yylval.ival = CopLINE(PL_curcop);
79072805 3442 if (isSPACE(*s) || *s == '#')
3280af22 3443 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3444 TOKEN('{');
378cc40b 3445 case '}':
79072805
LW
3446 rightbracket:
3447 s++;
3280af22 3448 if (PL_lex_brackets <= 0)
d98d5fff 3449 yyerror("Unmatched right curly bracket");
463ee0b2 3450 else
3280af22 3451 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3452 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3453 PL_lex_formbrack = 0;
3454 if (PL_lex_state == LEX_INTERPNORMAL) {
3455 if (PL_lex_brackets == 0) {
9059aa12
LW
3456 if (PL_expect & XFAKEBRACK) {
3457 PL_expect &= XENUMMASK;
3280af22
NIS
3458 PL_lex_state = LEX_INTERPEND;
3459 PL_bufptr = s;
cea2e8a9 3460 return yylex(); /* ignore fake brackets */
79072805 3461 }
fa83b5b6 3462 if (*s == '-' && s[1] == '>')
3280af22 3463 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3464 else if (*s != '[' && *s != '{')
3280af22 3465 PL_lex_state = LEX_INTERPEND;
79072805
LW
3466 }
3467 }
9059aa12
LW
3468 if (PL_expect & XFAKEBRACK) {
3469 PL_expect &= XENUMMASK;
3280af22 3470 PL_bufptr = s;
cea2e8a9 3471 return yylex(); /* ignore fake brackets */
748a9306 3472 }
79072805
LW
3473 force_next('}');
3474 TOKEN(';');
378cc40b
LW
3475 case '&':
3476 s++;
3477 tmp = *s++;
3478 if (tmp == '&')
a0d0e21e 3479 AOPERATOR(ANDAND);
378cc40b 3480 s--;
3280af22 3481 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3482 if (ckWARN(WARN_SEMICOLON)
3483 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3484 {
57843af0 3485 CopLINE_dec(PL_curcop);
9014280d 3486 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3487 CopLINE_inc(PL_curcop);
463ee0b2 3488 }
79072805 3489 BAop(OP_BIT_AND);
463ee0b2 3490 }
79072805 3491
3280af22
NIS
3492 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3493 if (*PL_tokenbuf) {
3494 PL_expect = XOPERATOR;
3495 force_ident(PL_tokenbuf, '&');
463ee0b2 3496 }
79072805
LW
3497 else
3498 PREREF('&');
c07a80fd 3499 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3500 TERM('&');
3501
378cc40b
LW
3502 case '|':
3503 s++;
3504 tmp = *s++;
3505 if (tmp == '|')
a0d0e21e 3506 AOPERATOR(OROR);
378cc40b 3507 s--;
79072805 3508 BOop(OP_BIT_OR);
378cc40b
LW
3509 case '=':
3510 s++;
3511 tmp = *s++;
3512 if (tmp == '=')
79072805
LW
3513 Eop(OP_EQ);
3514 if (tmp == '>')
3515 OPERATOR(',');
378cc40b 3516 if (tmp == '~')
79072805 3517 PMop(OP_MATCH);
599cee73 3518 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3519 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3520 s--;
3280af22
NIS
3521 if (PL_expect == XSTATE && isALPHA(tmp) &&