This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
The debugger wasn't tracing correctly execution in eval("")'ed
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
27da23d5
JH
29static const char ident_too_long[] =
30 "Identifier too long";
31static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
8903cb82 35
acfe0abc 36static void restore_rsfp(pTHX_ void *f);
6e3aabd6 37#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
38static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 40#endif
51371543 41
9059aa12
LW
42#define XFAKEBRACK 128
43#define XENUMMASK 127
44
39e02b42
JH
45#ifdef USE_UTF8_SCRIPTS
46# define UTF (!IN_BYTES)
2b9d42f0 47#else
746b446a 48# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 49#endif
a0ed51b3 50
61f0cdd9 51/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
52 * 1999-02-27 mjd-perl-patch@plover.com */
53#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
54
bf4acbe4
GS
55/* On MacOS, respect nonbreaking spaces */
56#ifdef MACOS_TRADITIONAL
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
58#else
59#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60#endif
61
ffb4593c
NT
62/* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
79072805
LW
64 * can get by with a single comparison (if the compiler is smart enough).
65 */
66
fb73857a 67/* #define LEX_NOTPARSING 11 is done in perl.h. */
68
55497cff 69#define LEX_NORMAL 10
70#define LEX_INTERPNORMAL 9
71#define LEX_INTERPCASEMOD 8
72#define LEX_INTERPPUSH 7
73#define LEX_INTERPSTART 6
74#define LEX_INTERPEND 5
75#define LEX_INTERPENDMAYBE 4
76#define LEX_INTERPCONCAT 3
77#define LEX_INTERPCONST 2
78#define LEX_FORMLINE 1
79#define LEX_KNOWNEXT 0
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 139# define REPORT(retval) tokereport(s,(int)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
376fcdbf
AL
170#define UNI2(f,x) { \
171 yylval.ival = f; \
172 PL_expect = x; \
173 PL_bufptr = s; \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
176 if (*s == '(') \
177 return REPORT( (int)FUNC1 ); \
178 s = skipspace(s); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
180 }
6f33ba73
RGS
181#define UNI(f) UNI2(f,XTERM)
182#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 183
376fcdbf
AL
184#define UNIBRACK(f) { \
185 yylval.ival = f; \
186 PL_bufptr = s; \
187 PL_last_uni = PL_oldbufptr; \
188 if (*s == '(') \
189 return REPORT( (int)FUNC1 ); \
190 s = skipspace(s); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
192 }
79072805 193
9f68db38 194/* grandfather return to old style */
3280af22 195#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 196
8fa7f367
JH
197#ifdef DEBUGGING
198
bbf60fe6
DM
199/* how to interpret the yylval associated with the token */
200enum token_type {
201 TOKENTYPE_NONE,
202 TOKENTYPE_IVAL,
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
204 TOKENTYPE_PVAL,
205 TOKENTYPE_OPVAL,
206 TOKENTYPE_GVVAL
207};
208
27da23d5
JH
209static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
9041c2e3 211{
bbf60fe6
DM
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DO, TOKENTYPE_NONE, "DO" },
223 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
224 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
225 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
226 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
227 { ELSE, TOKENTYPE_NONE, "ELSE" },
228 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
229 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
230 { FOR, TOKENTYPE_IVAL, "FOR" },
231 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
232 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
233 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
234 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
235 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
236 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
237 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
238 { IF, TOKENTYPE_IVAL, "IF" },
239 { LABEL, TOKENTYPE_PVAL, "LABEL" },
240 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
241 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
242 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
243 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
244 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
245 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
246 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
247 { MY, TOKENTYPE_IVAL, "MY" },
248 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
249 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
250 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
251 { OROP, TOKENTYPE_IVAL, "OROP" },
252 { OROR, TOKENTYPE_NONE, "OROR" },
253 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
254 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
255 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
256 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
257 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
258 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
259 { PREINC, TOKENTYPE_NONE, "PREINC" },
260 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
261 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
262 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
263 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
264 { SUB, TOKENTYPE_NONE, "SUB" },
265 { THING, TOKENTYPE_OPVAL, "THING" },
266 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
267 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
268 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
269 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
270 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
271 { USE, TOKENTYPE_IVAL, "USE" },
272 { WHILE, TOKENTYPE_IVAL, "WHILE" },
273 { WORD, TOKENTYPE_OPVAL, "WORD" },
274 { 0, TOKENTYPE_NONE, 0 }
275};
276
277/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 278
bbf60fe6 279STATIC int
bfed75c6 280S_tokereport(pTHX_ const char* s, I32 rv)
bbf60fe6
DM
281{
282 if (DEBUG_T_TEST) {
bfed75c6 283 const char *name = Nullch;
bbf60fe6 284 enum token_type type = TOKENTYPE_NONE;
f54cb97a 285 const struct debug_tokens *p;
9d4ba2ae 286 SV* const report = newSVpvn("<== ", 4);
bbf60fe6 287
f54cb97a 288 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
289 if (p->token == (int)rv) {
290 name = p->name;
291 type = p->type;
292 break;
293 }
294 }
295 if (name)
54667de8 296 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
297 else if ((char)rv > ' ' && (char)rv < '~')
298 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
299 else if (!rv)
54667de8 300 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
301 else
302 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
303 switch (type) {
304 case TOKENTYPE_NONE:
305 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
306 break;
307 case TOKENTYPE_IVAL:
e4584336 308 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
309 break;
310 case TOKENTYPE_OPNUM:
311 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
312 PL_op_name[yylval.ival]);
313 break;
314 case TOKENTYPE_PVAL:
315 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
316 break;
317 case TOKENTYPE_OPVAL:
401441c0
RGS
318 if (yylval.opval)
319 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 320 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
321 else
322 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
323 break;
324 }
e4584336 325 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
998054bd
SC
326 if (s - PL_bufptr > 0)
327 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
328 else {
329 if (PL_oldbufptr && *PL_oldbufptr)
330 sv_catpv(report, PL_tokenbuf);
331 }
cfd0369c 332 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen_const(report));
bbf60fe6
DM
333 };
334 return (int)rv;
998054bd
SC
335}
336
8fa7f367
JH
337#endif
338
ffb4593c
NT
339/*
340 * S_ao
341 *
c963b151
BD
342 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
343 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
344 */
345
76e3520e 346STATIC int
cea2e8a9 347S_ao(pTHX_ int toketype)
a0d0e21e 348{
3280af22
NIS
349 if (*PL_bufptr == '=') {
350 PL_bufptr++;
a0d0e21e
LW
351 if (toketype == ANDAND)
352 yylval.ival = OP_ANDASSIGN;
353 else if (toketype == OROR)
354 yylval.ival = OP_ORASSIGN;
c963b151
BD
355 else if (toketype == DORDOR)
356 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
357 toketype = ASSIGNOP;
358 }
359 return toketype;
360}
361
ffb4593c
NT
362/*
363 * S_no_op
364 * When Perl expects an operator and finds something else, no_op
365 * prints the warning. It always prints "<something> found where
366 * operator expected. It prints "Missing semicolon on previous line?"
367 * if the surprise occurs at the start of the line. "do you need to
368 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
369 * where the compiler doesn't know if foo is a method call or a function.
370 * It prints "Missing operator before end of line" if there's nothing
371 * after the missing operator, or "... before <...>" if there is something
372 * after the missing operator.
373 */
374
76e3520e 375STATIC void
bfed75c6 376S_no_op(pTHX_ const char *what, char *s)
463ee0b2 377{
9d4ba2ae
AL
378 char * const oldbp = PL_bufptr;
379 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 380
1189a94a
GS
381 if (!s)
382 s = oldbp;
07c798fb 383 else
1189a94a 384 PL_bufptr = s;
cea2e8a9 385 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
386 if (ckWARN_d(WARN_SYNTAX)) {
387 if (is_first)
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Missing semicolon on previous line?)\n");
390 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 391 const char *t;
56da5a46
RGS
392 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
393 if (t < PL_bufptr && isSPACE(*t))
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Do you need to predeclare %.*s?)\n",
396 t - PL_oldoldbufptr, PL_oldoldbufptr);
397 }
398 else {
399 assert(s >= oldbp);
400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
401 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
402 }
07c798fb 403 }
3280af22 404 PL_bufptr = oldbp;
8990e307
LW
405}
406
ffb4593c
NT
407/*
408 * S_missingterm
409 * Complain about missing quote/regexp/heredoc terminator.
410 * If it's called with (char *)NULL then it cauterizes the line buffer.
411 * If we're in a delimited string and the delimiter is a control
412 * character, it's reformatted into a two-char sequence like ^C.
413 * This is fatal.
414 */
415
76e3520e 416STATIC void
cea2e8a9 417S_missingterm(pTHX_ char *s)
8990e307
LW
418{
419 char tmpbuf[3];
420 char q;
421 if (s) {
9d4ba2ae 422 char * const nl = strrchr(s,'\n');
d2719217 423 if (nl)
8990e307
LW
424 *nl = '\0';
425 }
9d116dd7
JH
426 else if (
427#ifdef EBCDIC
428 iscntrl(PL_multi_close)
429#else
430 PL_multi_close < 32 || PL_multi_close == 127
431#endif
432 ) {
8990e307 433 *tmpbuf = '^';
3280af22 434 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
435 tmpbuf[2] = '\0';
436 s = tmpbuf;
437 }
438 else {
eb160463 439 *tmpbuf = (char)PL_multi_close;
8990e307
LW
440 tmpbuf[1] = '\0';
441 s = tmpbuf;
442 }
443 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 444 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 445}
79072805 446
ffb4593c
NT
447/*
448 * Perl_deprecate
ffb4593c
NT
449 */
450
79072805 451void
bfed75c6 452Perl_deprecate(pTHX_ const char *s)
a0d0e21e 453{
599cee73 454 if (ckWARN(WARN_DEPRECATED))
9014280d 455 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
456}
457
12bcd1a6 458void
bfed75c6 459Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
460{
461 /* This function should NOT be called for any new deprecated warnings */
462 /* Use Perl_deprecate instead */
463 /* */
464 /* It is here to maintain backward compatibility with the pre-5.8 */
465 /* warnings category hierarchy. The "deprecated" category used to */
466 /* live under the "syntax" category. It is now a top-level category */
467 /* in its own right. */
468
469 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 470 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
471 "Use of %s is deprecated", s);
472}
473
ffb4593c
NT
474/*
475 * depcom
9cbb5ea2 476 * Deprecate a comma-less variable list.
ffb4593c
NT
477 */
478
76e3520e 479STATIC void
cea2e8a9 480S_depcom(pTHX)
a0d0e21e 481{
12bcd1a6 482 deprecate_old("comma-less variable list");
a0d0e21e
LW
483}
484
ffb4593c 485/*
9cbb5ea2
GS
486 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
487 * utf16-to-utf8-reversed.
ffb4593c
NT
488 */
489
c39cd008
GS
490#ifdef PERL_CR_FILTER
491static void
492strip_return(SV *sv)
493{
95a20fc0 494 register const char *s = SvPVX_const(sv);
9d4ba2ae 495 register const char * const e = s + SvCUR(sv);
c39cd008
GS
496 /* outer loop optimized to do nothing if there are no CR-LFs */
497 while (s < e) {
498 if (*s++ == '\r' && *s == '\n') {
499 /* hit a CR-LF, need to copy the rest */
500 register char *d = s - 1;
501 *d++ = *s++;
502 while (s < e) {
503 if (*s == '\r' && s[1] == '\n')
504 s++;
505 *d++ = *s++;
506 }
507 SvCUR(sv) -= s - d;
508 return;
509 }
510 }
511}
a868473f 512
76e3520e 513STATIC I32
c39cd008 514S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 515{
f54cb97a 516 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
517 if (count > 0 && !maxlen)
518 strip_return(sv);
519 return count;
a868473f
NIS
520}
521#endif
522
ffb4593c
NT
523/*
524 * Perl_lex_start
9cbb5ea2
GS
525 * Initialize variables. Uses the Perl save_stack to save its state (for
526 * recursive calls to the parser).
ffb4593c
NT
527 */
528
a0d0e21e 529void
864dbfa3 530Perl_lex_start(pTHX_ SV *line)
79072805 531{
cfd0369c 532 const char *s;
8990e307
LW
533 STRLEN len;
534
3280af22
NIS
535 SAVEI32(PL_lex_dojoin);
536 SAVEI32(PL_lex_brackets);
3280af22
NIS
537 SAVEI32(PL_lex_casemods);
538 SAVEI32(PL_lex_starts);
539 SAVEI32(PL_lex_state);
7766f137 540 SAVEVPTR(PL_lex_inpat);
3280af22 541 SAVEI32(PL_lex_inwhat);
18b09519
GS
542 if (PL_lex_state == LEX_KNOWNEXT) {
543 I32 toke = PL_nexttoke;
544 while (--toke >= 0) {
545 SAVEI32(PL_nexttype[toke]);
546 SAVEVPTR(PL_nextval[toke]);
547 }
548 SAVEI32(PL_nexttoke);
18b09519 549 }
57843af0 550 SAVECOPLINE(PL_curcop);
3280af22
NIS
551 SAVEPPTR(PL_bufptr);
552 SAVEPPTR(PL_bufend);
553 SAVEPPTR(PL_oldbufptr);
554 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
555 SAVEPPTR(PL_last_lop);
556 SAVEPPTR(PL_last_uni);
3280af22
NIS
557 SAVEPPTR(PL_linestart);
558 SAVESPTR(PL_linestr);
8edd5f42
RGS
559 SAVEGENERICPV(PL_lex_brackstack);
560 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 561 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
562 SAVESPTR(PL_lex_stuff);
563 SAVEI32(PL_lex_defer);
09bef843 564 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 565 SAVESPTR(PL_lex_repl);
bebdddfc
GS
566 SAVEINT(PL_expect);
567 SAVEINT(PL_lex_expect);
3280af22
NIS
568
569 PL_lex_state = LEX_NORMAL;
570 PL_lex_defer = 0;
571 PL_expect = XSTATE;
572 PL_lex_brackets = 0;
a02a5408
JC
573 Newx(PL_lex_brackstack, 120, char);
574 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
575 PL_lex_casemods = 0;
576 *PL_lex_casestack = '\0';
577 PL_lex_dojoin = 0;
578 PL_lex_starts = 0;
579 PL_lex_stuff = Nullsv;
580 PL_lex_repl = Nullsv;
581 PL_lex_inpat = 0;
76be56bc 582 PL_nexttoke = 0;
3280af22 583 PL_lex_inwhat = 0;
09bef843 584 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
585 PL_linestr = line;
586 if (SvREADONLY(PL_linestr))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 588 s = SvPV_const(PL_linestr, len);
6f27f9a7 589 if (!len || s[len-1] != ';') {
3280af22
NIS
590 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
591 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
592 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 593 }
3280af22
NIS
594 SvTEMP_off(PL_linestr);
595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 597 PL_last_lop = PL_last_uni = Nullch;
3280af22 598 PL_rsfp = 0;
79072805 599}
a687059c 600
ffb4593c
NT
601/*
602 * Perl_lex_end
9cbb5ea2
GS
603 * Finalizer for lexing operations. Must be called when the parser is
604 * done with the lexer.
ffb4593c
NT
605 */
606
463ee0b2 607void
864dbfa3 608Perl_lex_end(pTHX)
463ee0b2 609{
3280af22 610 PL_doextract = FALSE;
463ee0b2
LW
611}
612
ffb4593c
NT
613/*
614 * S_incline
615 * This subroutine has nothing to do with tilting, whether at windmills
616 * or pinball tables. Its name is short for "increment line". It
57843af0 617 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 618 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
619 * # line 500 "foo.pm"
620 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
621 */
622
76e3520e 623STATIC void
cea2e8a9 624S_incline(pTHX_ char *s)
463ee0b2
LW
625{
626 char *t;
627 char *n;
73659bf1 628 char *e;
463ee0b2 629 char ch;
463ee0b2 630
57843af0 631 CopLINE_inc(PL_curcop);
463ee0b2
LW
632 if (*s++ != '#')
633 return;
bf4acbe4 634 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
635 if (strnEQ(s, "line", 4))
636 s += 4;
637 else
638 return;
084592ab 639 if (SPACE_OR_TAB(*s))
73659bf1 640 s++;
4e553d73 641 else
73659bf1 642 return;
bf4acbe4 643 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
644 if (!isDIGIT(*s))
645 return;
646 n = s;
647 while (isDIGIT(*s))
648 s++;
bf4acbe4 649 while (SPACE_OR_TAB(*s))
463ee0b2 650 s++;
73659bf1 651 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 652 s++;
73659bf1
GS
653 e = t + 1;
654 }
463ee0b2 655 else {
463ee0b2 656 for (t = s; !isSPACE(*t); t++) ;
73659bf1 657 e = t;
463ee0b2 658 }
bf4acbe4 659 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
660 e++;
661 if (*e != '\n' && *e != '\0')
662 return; /* false alarm */
663
463ee0b2
LW
664 ch = *t;
665 *t = '\0';
f4dd75d9 666 if (t - s > 0) {
e66cf94c
RGS
667 const char *cf = CopFILE(PL_curcop);
668 if (cf && strlen(cf) > 7 && strnEQ(cf, "(eval ", 6)) {
669 /* must copy *{"::_<(eval N)[oldfilename:L]"}
670 * to *{"::_<newfilename"} */
671 char smallbuf[256], smallbuf2[256];
672 char *tmpbuf, *tmpbuf2;
673 GV *gv, *gv2;
674 STRLEN tmplen = strlen(cf);
675 STRLEN tmplen2 = strlen(s);
676 if (tmplen + 3 < sizeof smallbuf)
677 tmpbuf = smallbuf;
678 else
679 Newx(tmpbuf, tmplen + 3, char);
680 if (tmplen2 + 3 < sizeof smallbuf2)
681 tmpbuf2 = smallbuf2;
682 else
683 Newx(tmpbuf2, tmplen2 + 3, char);
684 tmpbuf[0] = tmpbuf2[0] = '_';
685 tmpbuf[1] = tmpbuf2[1] = '<';
686 memcpy(tmpbuf + 2, cf, ++tmplen);
687 memcpy(tmpbuf2 + 2, s, ++tmplen2);
688 ++tmplen; ++tmplen2;
689 gv = *(GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
690 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
691 if (!isGV(gv2))
692 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
693 /* adjust ${"::_<newfilename"} to store the new file name */
694 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
695 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(gv));
696 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(gv));
697 if (tmpbuf != smallbuf) Safefree(tmpbuf);
698 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
699 }
05ec9bb3 700 CopFILE_free(PL_curcop);
57843af0 701 CopFILE_set(PL_curcop, s);
f4dd75d9 702 }
463ee0b2 703 *t = ch;
57843af0 704 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
705}
706
ffb4593c
NT
707/*
708 * S_skipspace
709 * Called to gobble the appropriate amount and type of whitespace.
710 * Skips comments as well.
711 */
712
76e3520e 713STATIC char *
cea2e8a9 714S_skipspace(pTHX_ register char *s)
a687059c 715{
3280af22 716 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 717 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
718 s++;
719 return s;
720 }
721 for (;;) {
fd049845 722 STRLEN prevlen;
09bef843 723 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 724 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
725 while (s < PL_bufend && isSPACE(*s)) {
726 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
727 incline(s);
728 }
ffb4593c
NT
729
730 /* comment */
3280af22
NIS
731 if (s < PL_bufend && *s == '#') {
732 while (s < PL_bufend && *s != '\n')
463ee0b2 733 s++;
60e6418e 734 if (s < PL_bufend) {
463ee0b2 735 s++;
60e6418e
GS
736 if (PL_in_eval && !PL_rsfp) {
737 incline(s);
738 continue;
739 }
740 }
463ee0b2 741 }
ffb4593c
NT
742
743 /* only continue to recharge the buffer if we're at the end
744 * of the buffer, we're not reading from a source filter, and
745 * we're in normal lexing mode
746 */
09bef843
SB
747 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
748 PL_lex_state == LEX_FORMLINE)
463ee0b2 749 return s;
ffb4593c
NT
750
751 /* try to recharge the buffer */
9cbb5ea2
GS
752 if ((s = filter_gets(PL_linestr, PL_rsfp,
753 (prevlen = SvCUR(PL_linestr)))) == Nullch)
754 {
755 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
756 if (PL_minus_p) {
757 sv_setpv(PL_linestr,
758 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 759 PL_minus_n = PL_minus_p = 0;
a0d0e21e 760 }
01a19ab0
NC
761 else if (PL_minus_n) {
762 sv_setpvn(PL_linestr, ";}", 2);
763 PL_minus_n = 0;
764 }
a0d0e21e 765 else
4147a61b 766 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
767
768 /* reset variables for next time we lex */
9cbb5ea2
GS
769 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
770 = SvPVX(PL_linestr);
3280af22 771 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 772 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
773
774 /* Close the filehandle. Could be from -P preprocessor,
775 * STDIN, or a regular file. If we were reading code from
776 * STDIN (because the commandline held no -e or filename)
777 * then we don't close it, we reset it so the code can
778 * read from STDIN too.
779 */
780
3280af22
NIS
781 if (PL_preprocess && !PL_in_eval)
782 (void)PerlProc_pclose(PL_rsfp);
783 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
784 PerlIO_clearerr(PL_rsfp);
8990e307 785 else
3280af22
NIS
786 (void)PerlIO_close(PL_rsfp);
787 PL_rsfp = Nullfp;
463ee0b2
LW
788 return s;
789 }
ffb4593c
NT
790
791 /* not at end of file, so we only read another line */
09bef843
SB
792 /* make corresponding updates to old pointers, for yyerror() */
793 oldprevlen = PL_oldbufptr - PL_bufend;
794 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
795 if (PL_last_uni)
796 oldunilen = PL_last_uni - PL_bufend;
797 if (PL_last_lop)
798 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
799 PL_linestart = PL_bufptr = s + prevlen;
800 PL_bufend = s + SvCUR(PL_linestr);
801 s = PL_bufptr;
09bef843
SB
802 PL_oldbufptr = s + oldprevlen;
803 PL_oldoldbufptr = s + oldoldprevlen;
804 if (PL_last_uni)
805 PL_last_uni = s + oldunilen;
806 if (PL_last_lop)
807 PL_last_lop = s + oldloplen;
a0d0e21e 808 incline(s);
ffb4593c
NT
809
810 /* debugger active and we're not compiling the debugger code,
811 * so store the line into the debugger's array of lines
812 */
3280af22 813 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 814 SV * const sv = NEWSV(85,0);
8990e307
LW
815
816 sv_upgrade(sv, SVt_PVMG);
3280af22 817 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 818 (void)SvIOK_on(sv);
45977657 819 SvIV_set(sv, 0);
57843af0 820 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 821 }
463ee0b2 822 }
a687059c 823}
378cc40b 824
ffb4593c
NT
825/*
826 * S_check_uni
827 * Check the unary operators to ensure there's no ambiguity in how they're
828 * used. An ambiguous piece of code would be:
829 * rand + 5
830 * This doesn't mean rand() + 5. Because rand() is a unary operator,
831 * the +5 is its argument.
832 */
833
76e3520e 834STATIC void
cea2e8a9 835S_check_uni(pTHX)
ba106d47 836{
2f3197b3 837 char *s;
a0d0e21e 838 char *t;
2f3197b3 839
3280af22 840 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 841 return;
3280af22
NIS
842 while (isSPACE(*PL_last_uni))
843 PL_last_uni++;
7e2040f0 844 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 845 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 846 return;
0453d815 847 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 848 const char ch = *s;
0453d815 849 *s = '\0';
9014280d 850 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 851 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
852 PL_last_uni);
853 *s = ch;
854 }
2f3197b3
LW
855}
856
ffb4593c
NT
857/*
858 * LOP : macro to build a list operator. Its behaviour has been replaced
859 * with a subroutine, S_lop() for which LOP is just another name.
860 */
861
a0d0e21e
LW
862#define LOP(f,x) return lop(f,x,s)
863
ffb4593c
NT
864/*
865 * S_lop
866 * Build a list operator (or something that might be one). The rules:
867 * - if we have a next token, then it's a list operator [why?]
868 * - if the next thing is an opening paren, then it's a function
869 * - else it's a list operator
870 */
871
76e3520e 872STATIC I32
a0be28da 873S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 874{
79072805 875 yylval.ival = f;
35c8bce7 876 CLINE;
3280af22
NIS
877 PL_expect = x;
878 PL_bufptr = s;
879 PL_last_lop = PL_oldbufptr;
eb160463 880 PL_last_lop_op = (OPCODE)f;
3280af22 881 if (PL_nexttoke)
bbf60fe6 882 return REPORT(LSTOP);
79072805 883 if (*s == '(')
bbf60fe6 884 return REPORT(FUNC);
79072805
LW
885 s = skipspace(s);
886 if (*s == '(')
bbf60fe6 887 return REPORT(FUNC);
79072805 888 else
bbf60fe6 889 return REPORT(LSTOP);
79072805
LW
890}
891
ffb4593c
NT
892/*
893 * S_force_next
9cbb5ea2 894 * When the lexer realizes it knows the next token (for instance,
ffb4593c 895 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
896 * to know what token to return the next time the lexer is called. Caller
897 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
898 * handles the token correctly.
ffb4593c
NT
899 */
900
4e553d73 901STATIC void
cea2e8a9 902S_force_next(pTHX_ I32 type)
79072805 903{
3280af22
NIS
904 PL_nexttype[PL_nexttoke] = type;
905 PL_nexttoke++;
906 if (PL_lex_state != LEX_KNOWNEXT) {
907 PL_lex_defer = PL_lex_state;
908 PL_lex_expect = PL_expect;
909 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
910 }
911}
912
d0a148a6
NC
913STATIC SV *
914S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
915{
9d4ba2ae 916 SV * const sv = newSVpvn(start,len);
bfed75c6 917 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
918 SvUTF8_on(sv);
919 return sv;
920}
921
ffb4593c
NT
922/*
923 * S_force_word
924 * When the lexer knows the next thing is a word (for instance, it has
925 * just seen -> and it knows that the next char is a word char, then
926 * it calls S_force_word to stick the next word into the PL_next lookahead.
927 *
928 * Arguments:
b1b65b59 929 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
930 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
931 * int check_keyword : if true, Perl checks to make sure the word isn't
932 * a keyword (do this if the word is a label, e.g. goto FOO)
933 * int allow_pack : if true, : characters will also be allowed (require,
934 * use, etc. do this)
9cbb5ea2 935 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
936 */
937
76e3520e 938STATIC char *
cea2e8a9 939S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 940{
463ee0b2
LW
941 register char *s;
942 STRLEN len;
4e553d73 943
463ee0b2
LW
944 start = skipspace(start);
945 s = start;
7e2040f0 946 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 947 (allow_pack && *s == ':') ||
15f0808c 948 (allow_initial_tick && *s == '\'') )
a0d0e21e 949 {
3280af22
NIS
950 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
951 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
952 return start;
953 if (token == METHOD) {
954 s = skipspace(s);
955 if (*s == '(')
3280af22 956 PL_expect = XTERM;
463ee0b2 957 else {
3280af22 958 PL_expect = XOPERATOR;
463ee0b2 959 }
79072805 960 }
d0a148a6
NC
961 PL_nextval[PL_nexttoke].opval
962 = (OP*)newSVOP(OP_CONST,0,
963 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 964 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
965 force_next(token);
966 }
967 return s;
968}
969
ffb4593c
NT
970/*
971 * S_force_ident
9cbb5ea2 972 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
973 * text only contains the "foo" portion. The first argument is a pointer
974 * to the "foo", and the second argument is the type symbol to prefix.
975 * Forces the next token to be a "WORD".
9cbb5ea2 976 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
977 */
978
76e3520e 979STATIC void
bfed75c6 980S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
981{
982 if (s && *s) {
bfed75c6 983 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 984 PL_nextval[PL_nexttoke].opval = o;
79072805 985 force_next(WORD);
748a9306 986 if (kind) {
11343788 987 o->op_private = OPpCONST_ENTERED;
55497cff 988 /* XXX see note in pp_entereval() for why we forgo typo
989 warnings if the symbol must be introduced in an eval.
990 GSAR 96-10-12 */
3280af22 991 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
992 kind == '$' ? SVt_PV :
993 kind == '@' ? SVt_PVAV :
994 kind == '%' ? SVt_PVHV :
995 SVt_PVGV
996 );
748a9306 997 }
79072805
LW
998 }
999}
1000
1571675a
GS
1001NV
1002Perl_str_to_version(pTHX_ SV *sv)
1003{
1004 NV retval = 0.0;
1005 NV nshift = 1.0;
1006 STRLEN len;
cfd0369c 1007 const char *start = SvPV_const(sv,len);
9d4ba2ae 1008 const char * const end = start + len;
504618e9 1009 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1010 while (start < end) {
ba210ebe 1011 STRLEN skip;
1571675a
GS
1012 UV n;
1013 if (utf)
9041c2e3 1014 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1015 else {
1016 n = *(U8*)start;
1017 skip = 1;
1018 }
1019 retval += ((NV)n)/nshift;
1020 start += skip;
1021 nshift *= 1000;
1022 }
1023 return retval;
1024}
1025
4e553d73 1026/*
ffb4593c
NT
1027 * S_force_version
1028 * Forces the next token to be a version number.
e759cc13
RGS
1029 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1030 * and if "guessing" is TRUE, then no new token is created (and the caller
1031 * must use an alternative parsing method).
ffb4593c
NT
1032 */
1033
76e3520e 1034STATIC char *
e759cc13 1035S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1036{
1037 OP *version = Nullop;
44dcb63b 1038 char *d;
89bfa8cd 1039
1040 s = skipspace(s);
1041
44dcb63b 1042 d = s;
dd629d5b 1043 if (*d == 'v')
44dcb63b 1044 d++;
44dcb63b 1045 if (isDIGIT(*d)) {
e759cc13
RGS
1046 while (isDIGIT(*d) || *d == '_' || *d == '.')
1047 d++;
9f3d182e 1048 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1049 SV *ver;
b73d6f50 1050 s = scan_num(s, &yylval);
89bfa8cd 1051 version = yylval.opval;
dd629d5b
GS
1052 ver = cSVOPx(version)->op_sv;
1053 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1054 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1055 SvNV_set(ver, str_to_version(ver));
1571675a 1056 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1057 }
89bfa8cd 1058 }
e759cc13
RGS
1059 else if (guessing)
1060 return s;
89bfa8cd 1061 }
1062
1063 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1064 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1065 force_next(WORD);
89bfa8cd 1066
e759cc13 1067 return s;
89bfa8cd 1068}
1069
ffb4593c
NT
1070/*
1071 * S_tokeq
1072 * Tokenize a quoted string passed in as an SV. It finds the next
1073 * chunk, up to end of string or a backslash. It may make a new
1074 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1075 * turns \\ into \.
1076 */
1077
76e3520e 1078STATIC SV *
cea2e8a9 1079S_tokeq(pTHX_ SV *sv)
79072805
LW
1080{
1081 register char *s;
1082 register char *send;
1083 register char *d;
b3ac6de7
IZ
1084 STRLEN len = 0;
1085 SV *pv = sv;
79072805
LW
1086
1087 if (!SvLEN(sv))
b3ac6de7 1088 goto finish;
79072805 1089
a0d0e21e 1090 s = SvPV_force(sv, len);
21a311ee 1091 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1092 goto finish;
463ee0b2 1093 send = s + len;
79072805
LW
1094 while (s < send && *s != '\\')
1095 s++;
1096 if (s == send)
b3ac6de7 1097 goto finish;
79072805 1098 d = s;
be4731d2 1099 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1100 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1101 if (SvUTF8(sv))
1102 SvUTF8_on(pv);
1103 }
79072805
LW
1104 while (s < send) {
1105 if (*s == '\\') {
a0d0e21e 1106 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1107 s++; /* all that, just for this */
1108 }
1109 *d++ = *s++;
1110 }
1111 *d = '\0';
95a20fc0 1112 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1113 finish:
3280af22 1114 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1115 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1116 return sv;
1117}
1118
ffb4593c
NT
1119/*
1120 * Now come three functions related to double-quote context,
1121 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1122 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1123 * interact with PL_lex_state, and create fake ( ... ) argument lists
1124 * to handle functions and concatenation.
1125 * They assume that whoever calls them will be setting up a fake
1126 * join call, because each subthing puts a ',' after it. This lets
1127 * "lower \luPpEr"
1128 * become
1129 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1130 *
1131 * (I'm not sure whether the spurious commas at the end of lcfirst's
1132 * arguments and join's arguments are created or not).
1133 */
1134
1135/*
1136 * S_sublex_start
1137 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1138 *
1139 * Pattern matching will set PL_lex_op to the pattern-matching op to
1140 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1141 *
1142 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1143 *
1144 * Everything else becomes a FUNC.
1145 *
1146 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1147 * had an OP_CONST or OP_READLINE). This just sets us up for a
1148 * call to S_sublex_push().
1149 */
1150
76e3520e 1151STATIC I32
cea2e8a9 1152S_sublex_start(pTHX)
79072805 1153{
f54cb97a 1154 const register I32 op_type = yylval.ival;
79072805
LW
1155
1156 if (op_type == OP_NULL) {
3280af22
NIS
1157 yylval.opval = PL_lex_op;
1158 PL_lex_op = Nullop;
79072805
LW
1159 return THING;
1160 }
1161 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1162 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1163
1164 if (SvTYPE(sv) == SVt_PVIV) {
1165 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1166 STRLEN len;
cfd0369c 1167 const char *p = SvPV_const(sv, len);
f54cb97a 1168 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1169 if (SvUTF8(sv))
1170 SvUTF8_on(nsv);
b3ac6de7
IZ
1171 SvREFCNT_dec(sv);
1172 sv = nsv;
4e553d73 1173 }
b3ac6de7 1174 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1175 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1176 /* Allow <FH> // "foo" */
1177 if (op_type == OP_READLINE)
1178 PL_expect = XTERMORDORDOR;
79072805
LW
1179 return THING;
1180 }
1181
3280af22
NIS
1182 PL_sublex_info.super_state = PL_lex_state;
1183 PL_sublex_info.sub_inwhat = op_type;
1184 PL_sublex_info.sub_op = PL_lex_op;
1185 PL_lex_state = LEX_INTERPPUSH;
55497cff 1186
3280af22
NIS
1187 PL_expect = XTERM;
1188 if (PL_lex_op) {
1189 yylval.opval = PL_lex_op;
1190 PL_lex_op = Nullop;
55497cff 1191 return PMFUNC;
1192 }
1193 else
1194 return FUNC;
1195}
1196
ffb4593c
NT
1197/*
1198 * S_sublex_push
1199 * Create a new scope to save the lexing state. The scope will be
1200 * ended in S_sublex_done. Returns a '(', starting the function arguments
1201 * to the uc, lc, etc. found before.
1202 * Sets PL_lex_state to LEX_INTERPCONCAT.
1203 */
1204
76e3520e 1205STATIC I32
cea2e8a9 1206S_sublex_push(pTHX)
55497cff 1207{
27da23d5 1208 dVAR;
f46d017c 1209 ENTER;
55497cff 1210
3280af22
NIS
1211 PL_lex_state = PL_sublex_info.super_state;
1212 SAVEI32(PL_lex_dojoin);
1213 SAVEI32(PL_lex_brackets);
3280af22
NIS
1214 SAVEI32(PL_lex_casemods);
1215 SAVEI32(PL_lex_starts);
1216 SAVEI32(PL_lex_state);
7766f137 1217 SAVEVPTR(PL_lex_inpat);
3280af22 1218 SAVEI32(PL_lex_inwhat);
57843af0 1219 SAVECOPLINE(PL_curcop);
3280af22 1220 SAVEPPTR(PL_bufptr);
8452ff4b 1221 SAVEPPTR(PL_bufend);
3280af22
NIS
1222 SAVEPPTR(PL_oldbufptr);
1223 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1224 SAVEPPTR(PL_last_lop);
1225 SAVEPPTR(PL_last_uni);
3280af22
NIS
1226 SAVEPPTR(PL_linestart);
1227 SAVESPTR(PL_linestr);
8edd5f42
RGS
1228 SAVEGENERICPV(PL_lex_brackstack);
1229 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1230
1231 PL_linestr = PL_lex_stuff;
1232 PL_lex_stuff = Nullsv;
1233
9cbb5ea2
GS
1234 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1235 = SvPVX(PL_linestr);
3280af22 1236 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1237 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1238 SAVEFREESV(PL_linestr);
1239
1240 PL_lex_dojoin = FALSE;
1241 PL_lex_brackets = 0;
a02a5408
JC
1242 Newx(PL_lex_brackstack, 120, char);
1243 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1244 PL_lex_casemods = 0;
1245 *PL_lex_casestack = '\0';
1246 PL_lex_starts = 0;
1247 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1248 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1249
1250 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1251 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1252 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1253 else
3280af22 1254 PL_lex_inpat = Nullop;
79072805 1255
55497cff 1256 return '(';
79072805
LW
1257}
1258
ffb4593c
NT
1259/*
1260 * S_sublex_done
1261 * Restores lexer state after a S_sublex_push.
1262 */
1263
76e3520e 1264STATIC I32
cea2e8a9 1265S_sublex_done(pTHX)
79072805 1266{
27da23d5 1267 dVAR;
3280af22 1268 if (!PL_lex_starts++) {
9d4ba2ae 1269 SV * const sv = newSVpvn("",0);
9aa983d2
JH
1270 if (SvUTF8(PL_linestr))
1271 SvUTF8_on(sv);
3280af22 1272 PL_expect = XOPERATOR;
9aa983d2 1273 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1274 return THING;
1275 }
1276
3280af22
NIS
1277 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1278 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1279 return yylex();
79072805
LW
1280 }
1281
ffb4593c 1282 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1283 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1284 PL_linestr = PL_lex_repl;
1285 PL_lex_inpat = 0;
1286 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1287 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1288 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1289 SAVEFREESV(PL_linestr);
1290 PL_lex_dojoin = FALSE;
1291 PL_lex_brackets = 0;
3280af22
NIS
1292 PL_lex_casemods = 0;
1293 *PL_lex_casestack = '\0';
1294 PL_lex_starts = 0;
25da4f38 1295 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1296 PL_lex_state = LEX_INTERPNORMAL;
1297 PL_lex_starts++;
e9fa98b2
HS
1298 /* we don't clear PL_lex_repl here, so that we can check later
1299 whether this is an evalled subst; that means we rely on the
1300 logic to ensure sublex_done() is called again only via the
1301 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1302 }
e9fa98b2 1303 else {
3280af22 1304 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1305 PL_lex_repl = Nullsv;
1306 }
79072805 1307 return ',';
ffed7fef
LW
1308 }
1309 else {
f46d017c 1310 LEAVE;
3280af22
NIS
1311 PL_bufend = SvPVX(PL_linestr);
1312 PL_bufend += SvCUR(PL_linestr);
1313 PL_expect = XOPERATOR;
09bef843 1314 PL_sublex_info.sub_inwhat = 0;
79072805 1315 return ')';
ffed7fef
LW
1316 }
1317}
1318
02aa26ce
NT
1319/*
1320 scan_const
1321
1322 Extracts a pattern, double-quoted string, or transliteration. This
1323 is terrifying code.
1324
3280af22
NIS
1325 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1326 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1327 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1328
9b599b2a
GS
1329 Returns a pointer to the character scanned up to. Iff this is
1330 advanced from the start pointer supplied (ie if anything was
1331 successfully parsed), will leave an OP for the substring scanned
1332 in yylval. Caller must intuit reason for not parsing further
1333 by looking at the next characters herself.
1334
02aa26ce
NT
1335 In patterns:
1336 backslashes:
1337 double-quoted style: \r and \n
1338 regexp special ones: \D \s
1339 constants: \x3
1340 backrefs: \1 (deprecated in substitution replacements)
1341 case and quoting: \U \Q \E
1342 stops on @ and $, but not for $ as tail anchor
1343
1344 In transliterations:
1345 characters are VERY literal, except for - not at the start or end
1346 of the string, which indicates a range. scan_const expands the
1347 range to the full set of intermediate characters.
1348
1349 In double-quoted strings:
1350 backslashes:
1351 double-quoted style: \r and \n
1352 constants: \x3
1353 backrefs: \1 (deprecated)
1354 case and quoting: \U \Q \E
1355 stops on @ and $
1356
1357 scan_const does *not* construct ops to handle interpolated strings.
1358 It stops processing as soon as it finds an embedded $ or @ variable
1359 and leaves it to the caller to work out what's going on.
1360
da6eedaa 1361 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1362
1363 $ in pattern could be $foo or could be tail anchor. Assumption:
1364 it's a tail anchor if $ is the last thing in the string, or if it's
1365 followed by one of ")| \n\t"
1366
1367 \1 (backreferences) are turned into $1
1368
1369 The structure of the code is
1370 while (there's a character to process) {
1371 handle transliteration ranges
1372 skip regexp comments
1373 skip # initiated comments in //x patterns
1374 check for embedded @foo
1375 check for embedded scalars
1376 if (backslash) {
1377 leave intact backslashes from leave (below)
1378 deprecate \1 in strings and sub replacements
1379 handle string-changing backslashes \l \U \Q \E, etc.
1380 switch (what was escaped) {
1381 handle - in a transliteration (becomes a literal -)
1382 handle \132 octal characters
1383 handle 0x15 hex characters
1384 handle \cV (control V)
1385 handle printf backslashes (\f, \r, \n, etc)
1386 } (end switch)
1387 } (end if backslash)
1388 } (end while character to read)
4e553d73 1389
02aa26ce
NT
1390*/
1391
76e3520e 1392STATIC char *
cea2e8a9 1393S_scan_const(pTHX_ char *start)
79072805 1394{
3280af22 1395 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1396 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1397 register char *s = start; /* start of the constant */
1398 register char *d = SvPVX(sv); /* destination for copies */
1399 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1400 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1401 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1402 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1403 UV uv;
4c3a8340
TS
1404#ifdef EBCDIC
1405 UV literal_endpoint = 0;
1406#endif
012bcf8d 1407
dff6d3cd 1408 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1409 PL_lex_inpat
b6d5fef8 1410 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1411 : "";
79072805 1412
2b9d42f0
NIS
1413 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1414 /* If we are doing a trans and we know we want UTF8 set expectation */
1415 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1416 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1417 }
1418
1419
79072805 1420 while (s < send || dorange) {
02aa26ce 1421 /* get transliterations out of the way (they're most literal) */
3280af22 1422 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1423 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1424 if (dorange) {
1ba5c669
JH
1425 I32 i; /* current expanded character */
1426 I32 min; /* first character in range */
1427 I32 max; /* last character in range */
02aa26ce 1428
2b9d42f0 1429 if (has_utf8) {
9d4ba2ae 1430 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1431 char *e = d++;
1432 while (e-- > c)
1433 *(e + 1) = *e;
25716404 1434 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1435 /* mark the range as done, and continue */
1436 dorange = FALSE;
1437 didrange = TRUE;
1438 continue;
1439 }
2b9d42f0 1440
95a20fc0 1441 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1442 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1443 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1444 d -= 2; /* eat the first char and the - */
1445
8ada0baa
JH
1446 min = (U8)*d; /* first char in range */
1447 max = (U8)d[1]; /* last char in range */
1448
c2e66d9e 1449 if (min > max) {
01ec43d0 1450 Perl_croak(aTHX_
d1573ac7 1451 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1452 (char)min, (char)max);
c2e66d9e
GS
1453 }
1454
c7f1f016 1455#ifdef EBCDIC
4c3a8340
TS
1456 if (literal_endpoint == 2 &&
1457 ((isLOWER(min) && isLOWER(max)) ||
1458 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1459 if (isLOWER(min)) {
1460 for (i = min; i <= max; i++)
1461 if (isLOWER(i))
db42d148 1462 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1463 } else {
1464 for (i = min; i <= max; i++)
1465 if (isUPPER(i))
db42d148 1466 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1467 }
1468 }
1469 else
1470#endif
1471 for (i = min; i <= max; i++)
eb160463 1472 *d++ = (char)i;
02aa26ce
NT
1473
1474 /* mark the range as done, and continue */
79072805 1475 dorange = FALSE;
01ec43d0 1476 didrange = TRUE;
4c3a8340
TS
1477#ifdef EBCDIC
1478 literal_endpoint = 0;
1479#endif
79072805 1480 continue;
4e553d73 1481 }
02aa26ce
NT
1482
1483 /* range begins (ignore - as first or last char) */
79072805 1484 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1485 if (didrange) {
1fafa243 1486 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1487 }
2b9d42f0 1488 if (has_utf8) {
25716404 1489 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1490 s++;
1491 continue;
1492 }
79072805
LW
1493 dorange = TRUE;
1494 s++;
01ec43d0
GS
1495 }
1496 else {
1497 didrange = FALSE;
4c3a8340
TS
1498#ifdef EBCDIC
1499 literal_endpoint = 0;
1500#endif
01ec43d0 1501 }
79072805 1502 }
02aa26ce
NT
1503
1504 /* if we get here, we're not doing a transliteration */
1505
0f5d15d6
IZ
1506 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1507 except for the last char, which will be done separately. */
3280af22 1508 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1509 if (s[2] == '#') {
e994fd66 1510 while (s+1 < send && *s != ')')
db42d148 1511 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1512 }
1513 else if (s[2] == '{' /* This should match regcomp.c */
1514 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1515 {
cc6b7395 1516 I32 count = 1;
0f5d15d6 1517 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1518 char c;
1519
d9f97599
GS
1520 while (count && (c = *regparse)) {
1521 if (c == '\\' && regparse[1])
1522 regparse++;
4e553d73 1523 else if (c == '{')
cc6b7395 1524 count++;
4e553d73 1525 else if (c == '}')
cc6b7395 1526 count--;
d9f97599 1527 regparse++;
cc6b7395 1528 }
e994fd66 1529 if (*regparse != ')')
5bdf89e7 1530 regparse--; /* Leave one char for continuation. */
0f5d15d6 1531 while (s < regparse)
db42d148 1532 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1533 }
748a9306 1534 }
02aa26ce
NT
1535
1536 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1537 else if (*s == '#' && PL_lex_inpat &&
1538 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1539 while (s+1 < send && *s != '\n')
db42d148 1540 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1541 }
02aa26ce 1542
5d1d4326 1543 /* check for embedded arrays
da6eedaa 1544 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1545 */
7e2040f0 1546 else if (*s == '@' && s[1]
5d1d4326 1547 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1548 break;
02aa26ce
NT
1549
1550 /* check for embedded scalars. only stop if we're sure it's a
1551 variable.
1552 */
79072805 1553 else if (*s == '$') {
3280af22 1554 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1555 break;
6002328a 1556 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1557 break; /* in regexp, $ might be tail anchor */
1558 }
02aa26ce 1559
2b9d42f0
NIS
1560 /* End of else if chain - OP_TRANS rejoin rest */
1561
02aa26ce 1562 /* backslashes */
79072805
LW
1563 if (*s == '\\' && s+1 < send) {
1564 s++;
02aa26ce
NT
1565
1566 /* some backslashes we leave behind */
c9f97d15 1567 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1568 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1569 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1570 continue;
1571 }
02aa26ce
NT
1572
1573 /* deprecate \1 in strings and substitution replacements */
3280af22 1574 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1575 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1576 {
599cee73 1577 if (ckWARN(WARN_SYNTAX))
9014280d 1578 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1579 *--s = '$';
1580 break;
1581 }
02aa26ce
NT
1582
1583 /* string-change backslash escapes */
3280af22 1584 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1585 --s;
1586 break;
1587 }
02aa26ce
NT
1588
1589 /* if we get here, it's either a quoted -, or a digit */
79072805 1590 switch (*s) {
02aa26ce
NT
1591
1592 /* quoted - in transliterations */
79072805 1593 case '-':
3280af22 1594 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1595 *d++ = *s++;
1596 continue;
1597 }
1598 /* FALL THROUGH */
1599 default:
11b8faa4 1600 {
041457d9
DM
1601 if (isALNUM(*s) &&
1602 *s != '_' &&
1603 ckWARN(WARN_MISC))
9014280d 1604 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1605 "Unrecognized escape \\%c passed through",
1606 *s);
1607 /* default action is to copy the quoted character */
f9a63242 1608 goto default_action;
11b8faa4 1609 }
02aa26ce
NT
1610
1611 /* \132 indicates an octal constant */
79072805
LW
1612 case '0': case '1': case '2': case '3':
1613 case '4': case '5': case '6': case '7':
ba210ebe 1614 {
53305cf1
NC
1615 I32 flags = 0;
1616 STRLEN len = 3;
1617 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1618 s += len;
1619 }
012bcf8d 1620 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1621
1622 /* \x24 indicates a hex constant */
79072805 1623 case 'x':
a0ed51b3
LW
1624 ++s;
1625 if (*s == '{') {
9d4ba2ae 1626 char* const e = strchr(s, '}');
a4c04bdc
NC
1627 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1628 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1629 STRLEN len;
355860ce 1630
53305cf1 1631 ++s;
adaeee49 1632 if (!e) {
a0ed51b3 1633 yyerror("Missing right brace on \\x{}");
355860ce 1634 continue;
ba210ebe 1635 }
53305cf1
NC
1636 len = e - s;
1637 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1638 s = e + 1;
a0ed51b3
LW
1639 }
1640 else {
ba210ebe 1641 {
53305cf1 1642 STRLEN len = 2;
a4c04bdc 1643 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1644 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1645 s += len;
1646 }
012bcf8d
GS
1647 }
1648
1649 NUM_ESCAPE_INSERT:
1650 /* Insert oct or hex escaped character.
301d3d20 1651 * There will always enough room in sv since such
db42d148 1652 * escapes will be longer than any UTF-8 sequence
301d3d20 1653 * they can end up as. */
ba7cea30 1654
c7f1f016
NIS
1655 /* We need to map to chars to ASCII before doing the tests
1656 to cover EBCDIC
1657 */
c4d5f83a 1658 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1659 if (!has_utf8 && uv > 255) {
301d3d20
JH
1660 /* Might need to recode whatever we have
1661 * accumulated so far if it contains any
1662 * hibit chars.
1663 *
1664 * (Can't we keep track of that and avoid
1665 * this rescan? --jhi)
012bcf8d 1666 */
c7f1f016 1667 int hicount = 0;
63cd0674
NIS
1668 U8 *c;
1669 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1670 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1671 hicount++;
db42d148 1672 }
012bcf8d 1673 }
63cd0674 1674 if (hicount) {
9d4ba2ae 1675 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1676 U8 *src, *dst;
1677 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1678 src = (U8 *)d - 1;
1679 dst = src+hicount;
1680 d += hicount;
cfd0369c 1681 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1682 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1683 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1684 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1685 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1686 }
1687 else {
63cd0674 1688 *dst-- = *src;
012bcf8d 1689 }
c7f1f016 1690 src--;
012bcf8d
GS
1691 }
1692 }
1693 }
1694
9aa983d2 1695 if (has_utf8 || uv > 255) {
9041c2e3 1696 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1697 has_utf8 = TRUE;
f9a63242
JH
1698 if (PL_lex_inwhat == OP_TRANS &&
1699 PL_sublex_info.sub_op) {
1700 PL_sublex_info.sub_op->op_private |=
1701 (PL_lex_repl ? OPpTRANS_FROM_UTF
1702 : OPpTRANS_TO_UTF);
f9a63242 1703 }
012bcf8d 1704 }
a0ed51b3 1705 else {
012bcf8d 1706 *d++ = (char)uv;
a0ed51b3 1707 }
012bcf8d
GS
1708 }
1709 else {
c4d5f83a 1710 *d++ = (char) uv;
a0ed51b3 1711 }
79072805 1712 continue;
02aa26ce 1713
b239daa5 1714 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1715 case 'N':
55eda711 1716 ++s;
423cee85
JH
1717 if (*s == '{') {
1718 char* e = strchr(s, '}');
155aba94 1719 SV *res;
423cee85 1720 STRLEN len;
cfd0369c 1721 const char *str;
4e553d73 1722
423cee85 1723 if (!e) {
5777a3f7 1724 yyerror("Missing right brace on \\N{}");
423cee85
JH
1725 e = s - 1;
1726 goto cont_scan;
1727 }
dbc0d4f2
JH
1728 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1729 /* \N{U+...} */
1730 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1731 PERL_SCAN_DISALLOW_PREFIX;
1732 s += 3;
1733 len = e - s;
1734 uv = grok_hex(s, &len, &flags, NULL);
1735 s = e + 1;
1736 goto NUM_ESCAPE_INSERT;
1737 }
55eda711
JH
1738 res = newSVpvn(s + 1, e - s - 1);
1739 res = new_constant( Nullch, 0, "charnames",
1740 res, Nullsv, "\\N{...}" );
f9a63242
JH
1741 if (has_utf8)
1742 sv_utf8_upgrade(res);
cfd0369c 1743 str = SvPV_const(res,len);
1c47067b
JH
1744#ifdef EBCDIC_NEVER_MIND
1745 /* charnames uses pack U and that has been
1746 * recently changed to do the below uni->native
1747 * mapping, so this would be redundant (and wrong,
1748 * the code point would be doubly converted).
1749 * But leave this in just in case the pack U change
1750 * gets revoked, but the semantics is still
1751 * desireable for charnames. --jhi */
cddc7ef4 1752 {
cfd0369c 1753 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1754
1755 if (uv < 0x100) {
89ebb4a3 1756 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1757
1758 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1759 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1760 str = SvPV_const(res, len);
cddc7ef4
JH
1761 }
1762 }
1763#endif
89491803 1764 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1765 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1766 SvCUR_set(sv, d - ostart);
1767 SvPOK_on(sv);
e4f3eed8 1768 *d = '\0';
f08d6ad9 1769 sv_utf8_upgrade(sv);
d2f449dd 1770 /* this just broke our allocation above... */
eb160463 1771 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1772 d = SvPVX(sv) + SvCUR(sv);
89491803 1773 has_utf8 = TRUE;
f08d6ad9 1774 }
eb160463 1775 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1776 const char * const odest = SvPVX_const(sv);
423cee85 1777
8973db79 1778 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1779 d = SvPVX(sv) + (d - odest);
1780 }
1781 Copy(str, d, len, char);
1782 d += len;
1783 SvREFCNT_dec(res);
1784 cont_scan:
1785 s = e + 1;
1786 }
1787 else
5777a3f7 1788 yyerror("Missing braces on \\N{}");
423cee85
JH
1789 continue;
1790
02aa26ce 1791 /* \c is a control character */
79072805
LW
1792 case 'c':
1793 s++;
961ce445 1794 if (s < send) {
ba210ebe 1795 U8 c = *s++;
c7f1f016
NIS
1796#ifdef EBCDIC
1797 if (isLOWER(c))
1798 c = toUPPER(c);
1799#endif
db42d148 1800 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1801 }
961ce445
RGS
1802 else {
1803 yyerror("Missing control char name in \\c");
1804 }
79072805 1805 continue;
02aa26ce
NT
1806
1807 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1808 case 'b':
db42d148 1809 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1810 break;
1811 case 'n':
db42d148 1812 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1813 break;
1814 case 'r':
db42d148 1815 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1816 break;
1817 case 'f':
db42d148 1818 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1819 break;
1820 case 't':
db42d148 1821 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1822 break;
34a3fe2a 1823 case 'e':
db42d148 1824 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1825 break;
1826 case 'a':
db42d148 1827 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1828 break;
02aa26ce
NT
1829 } /* end switch */
1830
79072805
LW
1831 s++;
1832 continue;
02aa26ce 1833 } /* end if (backslash) */
4c3a8340
TS
1834#ifdef EBCDIC
1835 else
1836 literal_endpoint++;
1837#endif
02aa26ce 1838
f9a63242 1839 default_action:
2b9d42f0
NIS
1840 /* If we started with encoded form, or already know we want it
1841 and then encode the next character */
1842 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1843 STRLEN len = 1;
9d4ba2ae
AL
1844 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1845 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1846 s += len;
1847 if (need > len) {
1848 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1849 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1850 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1851 }
1852 d = (char*)uvchr_to_utf8((U8*)d, uv);
1853 has_utf8 = TRUE;
1854 }
1855 else {
1856 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1857 }
02aa26ce
NT
1858 } /* while loop to process each character */
1859
1860 /* terminate the string and set up the sv */
79072805 1861 *d = '\0';
95a20fc0 1862 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1863 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1864 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1865
79072805 1866 SvPOK_on(sv);
9f4817db 1867 if (PL_encoding && !has_utf8) {
d0063567
DK
1868 sv_recode_to_utf8(sv, PL_encoding);
1869 if (SvUTF8(sv))
1870 has_utf8 = TRUE;
9f4817db 1871 }
2b9d42f0 1872 if (has_utf8) {
7e2040f0 1873 SvUTF8_on(sv);
2b9d42f0 1874 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1875 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1876 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1877 }
1878 }
79072805 1879
02aa26ce 1880 /* shrink the sv if we allocated more than we used */
79072805 1881 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1882 SvPV_shrink_to_cur(sv);
79072805 1883 }
02aa26ce 1884
9b599b2a 1885 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1886 if (s > PL_bufptr) {
1887 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1888 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1889 sv, Nullsv,
4e553d73 1890 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1891 ? "tr"
3280af22 1892 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1893 ? "s"
1894 : "qq")));
79072805 1895 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1896 } else
8990e307 1897 SvREFCNT_dec(sv);
79072805
LW
1898 return s;
1899}
1900
ffb4593c
NT
1901/* S_intuit_more
1902 * Returns TRUE if there's more to the expression (e.g., a subscript),
1903 * FALSE otherwise.
ffb4593c
NT
1904 *
1905 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1906 *
1907 * ->[ and ->{ return TRUE
1908 * { and [ outside a pattern are always subscripts, so return TRUE
1909 * if we're outside a pattern and it's not { or [, then return FALSE
1910 * if we're in a pattern and the first char is a {
1911 * {4,5} (any digits around the comma) returns FALSE
1912 * if we're in a pattern and the first char is a [
1913 * [] returns FALSE
1914 * [SOMETHING] has a funky algorithm to decide whether it's a
1915 * character class or not. It has to deal with things like
1916 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1917 * anything else returns TRUE
1918 */
1919
9cbb5ea2
GS
1920/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1921
76e3520e 1922STATIC int
cea2e8a9 1923S_intuit_more(pTHX_ register char *s)
79072805 1924{
3280af22 1925 if (PL_lex_brackets)
79072805
LW
1926 return TRUE;
1927 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1928 return TRUE;
1929 if (*s != '{' && *s != '[')
1930 return FALSE;
3280af22 1931 if (!PL_lex_inpat)
79072805
LW
1932 return TRUE;
1933
1934 /* In a pattern, so maybe we have {n,m}. */
1935 if (*s == '{') {
1936 s++;
1937 if (!isDIGIT(*s))
1938 return TRUE;
1939 while (isDIGIT(*s))
1940 s++;
1941 if (*s == ',')
1942 s++;
1943 while (isDIGIT(*s))
1944 s++;
1945 if (*s == '}')
1946 return FALSE;
1947 return TRUE;
1948
1949 }
1950
1951 /* On the other hand, maybe we have a character class */
1952
1953 s++;
1954 if (*s == ']' || *s == '^')
1955 return FALSE;
1956 else {
ffb4593c 1957 /* this is terrifying, and it works */
79072805
LW
1958 int weight = 2; /* let's weigh the evidence */
1959 char seen[256];
f27ffc4a 1960 unsigned char un_char = 255, last_un_char;
9d4ba2ae 1961 const char * const send = strchr(s,']');
3280af22 1962 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1963
1964 if (!send) /* has to be an expression */
1965 return TRUE;
1966
1967 Zero(seen,256,char);
1968 if (*s == '$')
1969 weight -= 3;
1970 else if (isDIGIT(*s)) {
1971 if (s[1] != ']') {
1972 if (isDIGIT(s[1]) && s[2] == ']')
1973 weight -= 10;
1974 }
1975 else
1976 weight -= 100;
1977 }
1978 for (; s < send; s++) {
1979 last_un_char = un_char;
1980 un_char = (unsigned char)*s;
1981 switch (*s) {
1982 case '@':
1983 case '&':
1984 case '$':
1985 weight -= seen[un_char] * 10;
7e2040f0 1986 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1987 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1988 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1989 weight -= 100;
1990 else
1991 weight -= 10;
1992 }
1993 else if (*s == '$' && s[1] &&
93a17b20
LW
1994 strchr("[#!%*<>()-=",s[1])) {
1995 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1996 weight -= 10;
1997 else
1998 weight -= 1;
1999 }
2000 break;
2001 case '\\':
2002 un_char = 254;
2003 if (s[1]) {
93a17b20 2004 if (strchr("wds]",s[1]))
79072805
LW
2005 weight += 100;
2006 else if (seen['\''] || seen['"'])
2007 weight += 1;
93a17b20 2008 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2009 weight += 40;
2010 else if (isDIGIT(s[1])) {
2011 weight += 40;
2012 while (s[1] && isDIGIT(s[1]))
2013 s++;
2014 }
2015 }
2016 else
2017 weight += 100;
2018 break;
2019 case '-':
2020 if (s[1] == '\\')
2021 weight += 50;
93a17b20 2022 if (strchr("aA01! ",last_un_char))
79072805 2023 weight += 30;
93a17b20 2024 if (strchr("zZ79~",s[1]))
79072805 2025 weight += 30;
f27ffc4a
GS
2026 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2027 weight -= 5; /* cope with negative subscript */
79072805
LW
2028 break;
2029 default:
3792a11b
NC
2030 if (!isALNUM(last_un_char)
2031 && !(last_un_char == '$' || last_un_char == '@'
2032 || last_un_char == '&')
2033 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2034 char *d = tmpbuf;
2035 while (isALPHA(*s))
2036 *d++ = *s++;
2037 *d = '\0';
2038 if (keyword(tmpbuf, d - tmpbuf))
2039 weight -= 150;
2040 }
2041 if (un_char == last_un_char + 1)
2042 weight += 5;
2043 weight -= seen[un_char];
2044 break;
2045 }
2046 seen[un_char]++;
2047 }
2048 if (weight >= 0) /* probably a character class */
2049 return FALSE;
2050 }
2051
2052 return TRUE;
2053}
ffed7fef 2054
ffb4593c
NT
2055/*
2056 * S_intuit_method
2057 *
2058 * Does all the checking to disambiguate
2059 * foo bar
2060 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2061 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2062 *
2063 * First argument is the stuff after the first token, e.g. "bar".
2064 *
2065 * Not a method if bar is a filehandle.
2066 * Not a method if foo is a subroutine prototyped to take a filehandle.
2067 * Not a method if it's really "Foo $bar"
2068 * Method if it's "foo $bar"
2069 * Not a method if it's really "print foo $bar"
2070 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2071 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2072 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2073 * =>
2074 */
2075
76e3520e 2076STATIC int
cea2e8a9 2077S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2078{
2079 char *s = start + (*start == '$');
3280af22 2080 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2081 STRLEN len;
2082 GV* indirgv;
2083
2084 if (gv) {
b6c543e3 2085 CV *cv;
a0d0e21e
LW
2086 if (GvIO(gv))
2087 return 0;
b6c543e3 2088 if ((cv = GvCVu(gv))) {
95a20fc0 2089 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2090 if (proto) {
2091 if (*proto == ';')
2092 proto++;
2093 if (*proto == '*')
2094 return 0;
2095 }
2096 } else
a0d0e21e
LW
2097 gv = 0;
2098 }
8903cb82 2099 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2100 /* start is the beginning of the possible filehandle/object,
2101 * and s is the end of it
2102 * tmpbuf is a copy of it
2103 */
2104
a0d0e21e 2105 if (*start == '$') {
3280af22 2106 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2107 return 0;
2108 s = skipspace(s);
3280af22
NIS
2109 PL_bufptr = start;
2110 PL_expect = XREF;
a0d0e21e
LW
2111 return *s == '(' ? FUNCMETH : METHOD;
2112 }
2113 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2114 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2115 len -= 2;
2116 tmpbuf[len] = '\0';
2117 goto bare_package;
2118 }
2119 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2120 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2121 return 0;
2122 /* filehandle or package name makes it a method */
89bfa8cd 2123 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2124 s = skipspace(s);
3280af22 2125 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2126 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2127 bare_package:
3280af22 2128 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2129 newSVpvn(tmpbuf,len));
3280af22
NIS
2130 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2131 PL_expect = XTERM;
a0d0e21e 2132 force_next(WORD);
3280af22 2133 PL_bufptr = s;
a0d0e21e
LW
2134 return *s == '(' ? FUNCMETH : METHOD;
2135 }
2136 }
2137 return 0;
2138}
2139
ffb4593c
NT
2140/*
2141 * S_incl_perldb
2142 * Return a string of Perl code to load the debugger. If PERL5DB
2143 * is set, it will return the contents of that, otherwise a
2144 * compile-time require of perl5db.pl.
2145 */
2146
bfed75c6 2147STATIC const char*
cea2e8a9 2148S_incl_perldb(pTHX)
a0d0e21e 2149{
3280af22 2150 if (PL_perldb) {
9d4ba2ae 2151 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2152
2153 if (pdb)
2154 return pdb;
93189314 2155 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2156 return "BEGIN { require 'perl5db.pl' }";
2157 }
2158 return "";
2159}
2160
2161
16d20bd9 2162/* Encoded script support. filter_add() effectively inserts a
4e553d73 2163 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2164 * Note that the filter function only applies to the current source file
2165 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2166 *
2167 * The datasv parameter (which may be NULL) can be used to pass
2168 * private data to this instance of the filter. The filter function
2169 * can recover the SV using the FILTER_DATA macro and use it to
2170 * store private buffers and state information.
2171 *
2172 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2173 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2174 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2175 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2176 * private use must be set using malloc'd pointers.
2177 */
16d20bd9
AD
2178
2179SV *
864dbfa3 2180Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2181{
f4c556ac
GS
2182 if (!funcp)
2183 return Nullsv;
2184
3280af22
NIS
2185 if (!PL_rsfp_filters)
2186 PL_rsfp_filters = newAV();
16d20bd9 2187 if (!datasv)
8c52afec 2188 datasv = NEWSV(255,0);
862a34c6 2189 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2190 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2191 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2192 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2193 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2194 av_unshift(PL_rsfp_filters, 1);
2195 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2196 return(datasv);
2197}
4e553d73 2198
16d20bd9
AD
2199
2200/* Delete most recently added instance of this filter function. */
a0d0e21e 2201void
864dbfa3 2202Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2203{
e0c19803 2204 SV *datasv;
24801a4b 2205
33073adb 2206#ifdef DEBUGGING
8141890a 2207 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2208#endif
3280af22 2209 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2210 return;
2211 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2212 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2213 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2214 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2215 IoANY(datasv) = (void *)NULL;
3280af22 2216 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2217
16d20bd9
AD
2218 return;
2219 }
2220 /* we need to search for the correct entry and clear it */
cea2e8a9 2221 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2222}
2223
2224
1de9afcd
RGS
2225/* Invoke the idxth filter function for the current rsfp. */
2226/* maxlen 0 = read one text line */
16d20bd9 2227I32
864dbfa3 2228Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2229{
16d20bd9
AD
2230 filter_t funcp;
2231 SV *datasv = NULL;
e50aee73 2232
3280af22 2233 if (!PL_rsfp_filters)
16d20bd9 2234 return -1;
1de9afcd 2235 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2236 /* Provide a default input filter to make life easy. */
2237 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2238 DEBUG_P(PerlIO_printf(Perl_debug_log,
2239 "filter_read %d: from rsfp\n", idx));
4e553d73 2240 if (maxlen) {
16d20bd9
AD
2241 /* Want a block */
2242 int len ;
f54cb97a 2243 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2244
2245 /* ensure buf_sv is large enough */
eb160463 2246 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2247 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2248 if (PerlIO_error(PL_rsfp))
37120919
AD
2249 return -1; /* error */
2250 else
2251 return 0 ; /* end of file */
2252 }
16d20bd9
AD
2253 SvCUR_set(buf_sv, old_len + len) ;
2254 } else {
2255 /* Want a line */
3280af22
NIS
2256 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2257 if (PerlIO_error(PL_rsfp))
37120919
AD
2258 return -1; /* error */
2259 else
2260 return 0 ; /* end of file */
2261 }
16d20bd9
AD
2262 }
2263 return SvCUR(buf_sv);
2264 }
2265 /* Skip this filter slot if filter has been deleted */
1de9afcd 2266 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2267 DEBUG_P(PerlIO_printf(Perl_debug_log,
2268 "filter_read %d: skipped (filter deleted)\n",
2269 idx));
16d20bd9
AD
2270 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2271 }
2272 /* Get function pointer hidden within datasv */
8141890a 2273 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2274 DEBUG_P(PerlIO_printf(Perl_debug_log,
2275 "filter_read %d: via function %p (%s)\n",
cfd0369c 2276 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2277 /* Call function. The function is expected to */
2278 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2279 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2280 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2281}
2282
76e3520e 2283STATIC char *
cea2e8a9 2284S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2285{
c39cd008 2286#ifdef PERL_CR_FILTER
3280af22 2287 if (!PL_rsfp_filters) {
c39cd008 2288 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2289 }
2290#endif
3280af22 2291 if (PL_rsfp_filters) {
55497cff 2292 if (!append)
2293 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2294 if (FILTER_READ(0, sv, 0) > 0)
2295 return ( SvPVX(sv) ) ;
2296 else
2297 return Nullch ;
2298 }
9d116dd7 2299 else
fd049845 2300 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2301}
2302
01ec43d0 2303STATIC HV *
7fc63493 2304S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2305{
2306 GV *gv;
2307
01ec43d0 2308 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2309 return PL_curstash;
2310
2311 if (len > 2 &&
2312 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2313 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2314 {
2315 return GvHV(gv); /* Foo:: */
def3634b
GS
2316 }
2317
2318 /* use constant CLASS => 'MyClass' */
2319 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2320 SV *sv;
2321 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2322 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2323 }
2324 }
2325
2326 return gv_stashpv(pkgname, FALSE);
2327}
a0d0e21e 2328
468aa647 2329STATIC char *
cc6ed77d 2330S_tokenize_use(pTHX_ int is_use, char *s) {
468aa647
RGS
2331 if (PL_expect != XSTATE)
2332 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2333 is_use ? "use" : "no"));
2334 s = skipspace(s);
2335 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2336 s = force_version(s, TRUE);
2337 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2338 PL_nextval[PL_nexttoke].opval = Nullop;
2339 force_next(WORD);
2340 }
2341 else if (*s == 'v') {
2342 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2343 s = force_version(s, FALSE);
2344 }
2345 }
2346 else {
2347 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2348 s = force_version(s, FALSE);
2349 }
2350 yylval.ival = is_use;
2351 return s;
2352}
748a9306 2353#ifdef DEBUGGING
27da23d5 2354 static const char* const exp_name[] =
09bef843 2355 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2356 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2357 };
748a9306 2358#endif
463ee0b2 2359
02aa26ce
NT
2360/*
2361 yylex
2362
2363 Works out what to call the token just pulled out of the input
2364 stream. The yacc parser takes care of taking the ops we return and
2365 stitching them into a tree.
2366
2367 Returns:
2368 PRIVATEREF
2369
2370 Structure:
2371 if read an identifier
2372 if we're in a my declaration
2373 croak if they tried to say my($foo::bar)
2374 build the ops for a my() declaration
2375 if it's an access to a my() variable
2376 are we in a sort block?
2377 croak if my($a); $a <=> $b
2378 build ops for access to a my() variable
2379 if in a dq string, and they've said @foo and we can't find @foo
2380 croak
2381 build ops for a bareword
2382 if we already built the token before, use it.
2383*/
2384
20141f0e 2385
dba4d153
JH
2386#ifdef __SC__
2387#pragma segment Perl_yylex
2388#endif
dba4d153 2389int
dba4d153 2390Perl_yylex(pTHX)
20141f0e 2391{
3afc138a 2392 register char *s = PL_bufptr;
378cc40b 2393 register char *d;
79072805 2394 register I32 tmp;
463ee0b2 2395 STRLEN len;
161b471a
NIS
2396 GV *gv = Nullgv;
2397 GV **gvp = 0;
aa7440fb 2398 bool bof = FALSE;
1d239bbb 2399 I32 orig_keyword = 0;
a687059c 2400
bbf60fe6
DM
2401 DEBUG_T( {
2402 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2403 lex_state_names[PL_lex_state]);
2404 } );
02aa26ce 2405 /* check if there's an identifier for us to look at */
ba979b31 2406 if (PL_pending_ident)
bbf60fe6 2407 return REPORT(S_pending_ident(aTHX));
bbce6d69 2408
02aa26ce
NT
2409 /* no identifier pending identification */
2410
3280af22 2411 switch (PL_lex_state) {
79072805
LW
2412#ifdef COMMENTARY
2413 case LEX_NORMAL: /* Some compilers will produce faster */
2414 case LEX_INTERPNORMAL: /* code if we comment these out. */
2415 break;
2416#endif
2417
09bef843 2418 /* when we've already built the next token, just pull it out of the queue */
79072805 2419 case LEX_KNOWNEXT:
3280af22
NIS
2420 PL_nexttoke--;
2421 yylval = PL_nextval[PL_nexttoke];
2422 if (!PL_nexttoke) {
2423 PL_lex_state = PL_lex_defer;
2424 PL_expect = PL_lex_expect;
2425 PL_lex_defer = LEX_NORMAL;
463ee0b2 2426 }
607df283 2427 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2428 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2429 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2430
bbf60fe6 2431 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2432
02aa26ce 2433 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2434 when we get here, PL_bufptr is at the \
02aa26ce 2435 */
79072805
LW
2436 case LEX_INTERPCASEMOD:
2437#ifdef DEBUGGING
3280af22 2438 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2439 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2440#endif
02aa26ce 2441 /* handle \E or end of string */
3280af22 2442 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2443 /* if at a \E */
3280af22 2444 if (PL_lex_casemods) {
f54cb97a 2445 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2446 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2447
3792a11b
NC
2448 if (PL_bufptr != PL_bufend
2449 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2450 PL_bufptr += 2;
2451 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2452 }
bbf60fe6 2453 return REPORT(')');
79072805 2454 }
3280af22
NIS
2455 if (PL_bufptr != PL_bufend)
2456 PL_bufptr += 2;
2457 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2458 return yylex();
79072805
LW
2459 }
2460 else {
607df283 2461 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2462 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2463 s = PL_bufptr + 1;
6e909404
JH
2464 if (s[1] == '\\' && s[2] == 'E') {
2465 PL_bufptr = s + 3;
2466 PL_lex_state = LEX_INTERPCONCAT;
2467 return yylex();
a0d0e21e 2468 }
6e909404
JH
2469 else {
2470 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2471 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2472 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2473 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2474 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2475 return REPORT(')');
6e909404
JH
2476 }
2477 if (PL_lex_casemods > 10)
2478 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2479 PL_lex_casestack[PL_lex_casemods++] = *s;
2480 PL_lex_casestack[PL_lex_casemods] = '\0';
2481 PL_lex_state = LEX_INTERPCONCAT;
2482 PL_nextval[PL_nexttoke].ival = 0;
2483 force_next('(');
2484 if (*s == 'l')
2485 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2486 else if (*s == 'u')
2487 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2488 else if (*s == 'L')
2489 PL_nextval[PL_nexttoke].ival = OP_LC;
2490 else if (*s == 'U')
2491 PL_nextval[PL_nexttoke].ival = OP_UC;
2492 else if (*s == 'Q')
2493 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2494 else
2495 Perl_croak(aTHX_ "panic: yylex");
2496 PL_bufptr = s + 1;
a0d0e21e 2497 }
79072805 2498 force_next(FUNC);
3280af22
NIS
2499 if (PL_lex_starts) {
2500 s = PL_bufptr;
2501 PL_lex_starts = 0;
131b3ad0
DM
2502 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2503 if (PL_lex_casemods == 1 && PL_lex_inpat)
2504 OPERATOR(',');
2505 else
2506 Aop(OP_CONCAT);
79072805
LW
2507 }
2508 else
cea2e8a9 2509 return yylex();
79072805
LW
2510 }
2511
55497cff 2512 case LEX_INTERPPUSH:
bbf60fe6 2513 return REPORT(sublex_push());
55497cff 2514
79072805 2515 case LEX_INTERPSTART:
3280af22 2516 if (PL_bufptr == PL_bufend)
bbf60fe6 2517 return REPORT(sublex_done());
607df283 2518 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2519 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2520 PL_expect = XTERM;
2521 PL_lex_dojoin = (*PL_bufptr == '@');
2522 PL_lex_state = LEX_INTERPNORMAL;
2523 if (PL_lex_dojoin) {
2524 PL_nextval[PL_nexttoke].ival = 0;
79072805 2525 force_next(',');
a0d0e21e 2526 force_ident("\"", '$');
3280af22 2527 PL_nextval[PL_nexttoke].ival = 0;
79072805 2528 force_next('$');
3280af22 2529 PL_nextval[PL_nexttoke].ival = 0;
79072805 2530 force_next('(');
3280af22 2531 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2532 force_next(FUNC);
2533 }
3280af22
NIS
2534 if (PL_lex_starts++) {
2535 s = PL_bufptr;
131b3ad0
DM
2536 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2537 if (!PL_lex_casemods && PL_lex_inpat)
2538 OPERATOR(',');
2539 else
2540 Aop(OP_CONCAT);
79072805 2541 }
cea2e8a9 2542 return yylex();
79072805
LW
2543
2544 case LEX_INTERPENDMAYBE:
3280af22
NIS
2545 if (intuit_more(PL_bufptr)) {
2546 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2547 break;
2548 }
2549 /* FALL THROUGH */
2550
2551 case LEX_INTERPEND:
3280af22
NIS
2552 if (PL_lex_dojoin) {
2553 PL_lex_dojoin = FALSE;
2554 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2555 return REPORT(')');
79072805 2556 }
43a16006 2557 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2558 && SvEVALED(PL_lex_repl))
43a16006 2559 {
e9fa98b2 2560 if (PL_bufptr != PL_bufend)
cea2e8a9 2561 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2562 PL_lex_repl = Nullsv;
2563 }
79072805
LW
2564 /* FALLTHROUGH */
2565 case LEX_INTERPCONCAT:
2566#ifdef DEBUGGING
3280af22 2567 if (PL_lex_brackets)
cea2e8a9 2568 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2569#endif
3280af22 2570 if (PL_bufptr == PL_bufend)
bbf60fe6 2571 return REPORT(sublex_done());
79072805 2572
3280af22
NIS
2573 if (SvIVX(PL_linestr) == '\'') {
2574 SV *sv = newSVsv(PL_linestr);
2575 if (!PL_lex_inpat)
76e3520e 2576 sv = tokeq(sv);
3280af22 2577 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2578 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2579 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2580 s = PL_bufend;
79072805
LW
2581 }
2582 else {
3280af22 2583 s = scan_const(PL_bufptr);
79072805 2584 if (*s == '\\')
3280af22 2585 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2586 else
3280af22 2587 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2588 }
2589
3280af22
NIS
2590 if (s != PL_bufptr) {
2591 PL_nextval[PL_nexttoke] = yylval;
2592 PL_expect = XTERM;
79072805 2593 force_next(THING);
131b3ad0
DM
2594 if (PL_lex_starts++) {
2595 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2596 if (!PL_lex_casemods && PL_lex_inpat)
2597 OPERATOR(',');
2598 else
2599 Aop(OP_CONCAT);
2600 }
79072805 2601 else {
3280af22 2602 PL_bufptr = s;
cea2e8a9 2603 return yylex();
79072805
LW
2604 }
2605 }
2606
cea2e8a9 2607 return yylex();
a0d0e21e 2608 case LEX_FORMLINE:
3280af22
NIS
2609 PL_lex_state = LEX_NORMAL;
2610 s = scan_formline(PL_bufptr);
2611 if (!PL_lex_formbrack)
a0d0e21e
LW
2612 goto rightbracket;
2613 OPERATOR(';');
79072805
LW
2614 }
2615
3280af22
NIS
2616 s = PL_bufptr;
2617 PL_oldoldbufptr = PL_oldbufptr;
2618 PL_oldbufptr = s;
607df283 2619 DEBUG_T( {
bbf60fe6 2620 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2621 exp_name[PL_expect], s);
5f80b19c 2622 } );
463ee0b2
LW
2623
2624 retry:
378cc40b
LW
2625 switch (*s) {
2626 default:
7e2040f0 2627 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2628 goto keylookup;
cea2e8a9 2629 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2630 case 4:
2631 case 26:
2632 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2633 case 0:
3280af22
NIS
2634 if (!PL_rsfp) {
2635 PL_last_uni = 0;
2636 PL_last_lop = 0;
c5ee2135
WL
2637 if (PL_lex_brackets) {
2638 if (PL_lex_formbrack)
2639 yyerror("Format not terminated");
2640 else
2641 yyerror("Missing right curly or square bracket");
2642 }
4e553d73 2643 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2644 "### Tokener got EOF\n");
5f80b19c 2645 } );
79072805 2646 TOKEN(0);
463ee0b2 2647 }
3280af22 2648 if (s++ < PL_bufend)
a687059c 2649 goto retry; /* ignore stray nulls */
3280af22
NIS
2650 PL_last_uni = 0;
2651 PL_last_lop = 0;
2652 if (!PL_in_eval && !PL_preambled) {
2653 PL_preambled = TRUE;
2654 sv_setpv(PL_linestr,incl_perldb());
2655 if (SvCUR(PL_linestr))
4147a61b 2656 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2657 if (PL_preambleav){
2658 while(AvFILLp(PL_preambleav) >= 0) {
2659 SV *tmpsv = av_shift(PL_preambleav);
2660 sv_catsv(PL_linestr, tmpsv);
4147a61b 2661 sv_catpvn(PL_linestr, ";", 1);
91b7def8 2662 sv_free(tmpsv);
2663 }
3280af22
NIS
2664 sv_free((SV*)PL_preambleav);
2665 PL_preambleav = NULL;
91b7def8 2666 }
3280af22
NIS
2667 if (PL_minus_n || PL_minus_p) {
2668 sv_catpv(PL_linestr, "LINE: while (<>) {");
2669 if (PL_minus_l)
2670 sv_catpv(PL_linestr,"chomp;");
2671 if (PL_minus_a) {
3280af22 2672 if (PL_minus_F) {
3792a11b
NC
2673 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2674 || *PL_splitstr == '"')
3280af22 2675 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2676 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2677 else {
c8ef6a4b
NC
2678 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2679 bytes can be used as quoting characters. :-) */
48c4c863
NC
2680 /* The count here deliberately includes the NUL
2681 that terminates the C string constant. This
2682 embeds the opening NUL into the string. */
dd374669 2683 const char *splits = PL_splitstr;
9a3fb652 2684 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2685 do {
2686 /* Need to \ \s */
dd374669
AL
2687 if (*splits == '\\')
2688 sv_catpvn(PL_linestr, splits, 1);
2689 sv_catpvn(PL_linestr, splits, 1);
2690 } while (*splits++);
48c4c863
NC
2691 /* This loop will embed the trailing NUL of
2692 PL_linestr as the last thing it does before
2693 terminating. */
4fccd7c6 2694 sv_catpvn(PL_linestr, ");", 2);
54310121 2695 }
2304df62
AD
2696 }
2697 else
75c72d73 2698 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2699 }
79072805 2700 }
4147a61b 2701 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2702 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2703 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2704 PL_last_lop = PL_last_uni = Nullch;
3280af22 2705 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2706 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2707
2708 sv_upgrade(sv, SVt_PVMG);
3280af22 2709 sv_setsv(sv,PL_linestr);
0ac0412a 2710 (void)SvIOK_on(sv);
45977657 2711 SvIV_set(sv, 0);
57843af0 2712 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2713 }
79072805 2714 goto retry;
a687059c 2715 }
e929a76b 2716 do {
aa7440fb 2717 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2718 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2719 fake_eof:
2720 if (PL_rsfp) {
2721 if (PL_preprocess && !PL_in_eval)
2722 (void)PerlProc_pclose(PL_rsfp);
2723 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2724 PerlIO_clearerr(PL_rsfp);
2725 else
2726 (void)PerlIO_close(PL_rsfp);
2727 PL_rsfp = Nullfp;
2728 PL_doextract = FALSE;
2729 }
2730 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2731 sv_setpv(PL_linestr,PL_minus_p
2732 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2733 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2735 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2736 PL_minus_n = PL_minus_p = 0;
2737 goto retry;
2738 }
2739 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2740 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2741 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2742 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2743 }
7aa207d6
JH
2744 /* If it looks like the start of a BOM or raw UTF-16,
2745 * check if it in fact is. */
2746 else if (bof &&
2747 (*s == 0 ||
2748 *(U8*)s == 0xEF ||
2749 *(U8*)s >= 0xFE ||
2750 s[1] == 0)) {
226017aa 2751#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2752# ifdef __GNU_LIBRARY__
2753# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2754# define FTELL_FOR_PIPE_IS_BROKEN
2755# endif
e3f494f1
JH
2756# else
2757# ifdef __GLIBC__
2758# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2759# define FTELL_FOR_PIPE_IS_BROKEN
2760# endif
2761# endif
226017aa
DD
2762# endif
2763#endif
2764#ifdef FTELL_FOR_PIPE_IS_BROKEN
2765 /* This loses the possibility to detect the bof
2766 * situation on perl -P when the libc5 is being used.
2767 * Workaround? Maybe attach some extra state to PL_rsfp?
2768 */
2769 if (!PL_preprocess)
7e28d3af 2770 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2771#else
eb160463 2772 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2773#endif
7e28d3af 2774 if (bof) {
3280af22 2775 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2776 s = swallow_bom((U8*)s);
e929a76b 2777 }
378cc40b 2778 }
3280af22 2779 if (PL_doextract) {
a0d0e21e
LW
2780 /* Incest with pod. */
2781 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2782 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2783 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2785 PL_last_lop = PL_last_uni = Nullch;
3280af22 2786 PL_doextract = FALSE;
a0d0e21e 2787 }
4e553d73 2788 }
463ee0b2 2789 incline(s);
3280af22
NIS
2790 } while (PL_doextract);
2791 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2792 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2793 SV * const sv = NEWSV(85,0);
a687059c 2794
93a17b20 2795 sv_upgrade(sv, SVt_PVMG);
3280af22 2796 sv_setsv(sv,PL_linestr);
0ac0412a 2797 (void)SvIOK_on(sv);
45977657 2798 SvIV_set(sv, 0);
57843af0 2799 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2800 }
3280af22 2801 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2802 PL_last_lop = PL_last_uni = Nullch;
57843af0 2803 if (CopLINE(PL_curcop) == 1) {
3280af22 2804 while (s < PL_bufend && isSPACE(*s))
79072805 2805 s++;
a0d0e21e 2806 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2807 s++;
44a8e56a 2808 d = Nullch;
3280af22 2809 if (!PL_in_eval) {
44a8e56a 2810 if (*s == '#' && *(s+1) == '!')
2811 d = s + 2;
2812#ifdef ALTERNATE_SHEBANG
2813 else {
bfed75c6 2814 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 2815 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2816 d = s + (sizeof(as) - 1);
2817 }
2818#endif /* ALTERNATE_SHEBANG */
2819 }
2820 if (d) {
b8378b72 2821 char *ipath;
774d564b 2822 char *ipathend;
b8378b72 2823
774d564b 2824 while (isSPACE(*d))
b8378b72
CS
2825 d++;
2826 ipath = d;
774d564b 2827 while (*d && !isSPACE(*d))
2828 d++;
2829 ipathend = d;
2830
2831#ifdef ARG_ZERO_IS_SCRIPT
2832 if (ipathend > ipath) {
2833 /*
2834 * HP-UX (at least) sets argv[0] to the script name,
2835 * which makes $^X incorrect. And Digital UNIX and Linux,
2836 * at least, set argv[0] to the basename of the Perl
2837 * interpreter. So, having found "#!", we'll set it right.
2838 */
ee2f7564 2839 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2840 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2841 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2842 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2843 SvSETMAGIC(x);
2844 }
556c1dec
JH
2845 else {
2846 STRLEN blen;
2847 STRLEN llen;
cfd0369c 2848 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2849 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2850 if (llen < blen) {
2851 bstart += blen - llen;
2852 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2853 sv_setpvn(x, ipath, ipathend - ipath);
2854 SvSETMAGIC(x);
2855 }
2856 }
2857 }
774d564b 2858 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2859 }
774d564b 2860#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2861
2862 /*
2863 * Look for options.
2864 */
748a9306 2865 d = instr(s,"perl -");
84e30d1a 2866 if (!d) {
748a9306 2867 d = instr(s,"perl");
84e30d1a
GS
2868#if defined(DOSISH)
2869 /* avoid getting into infinite loops when shebang
2870 * line contains "Perl" rather than "perl" */
2871 if (!d) {
2872 for (d = ipathend-4; d >= ipath; --d) {
2873 if ((*d == 'p' || *d == 'P')
2874 && !ibcmp(d, "perl", 4))
2875 {
2876 break;
2877 }
2878 }
2879 if (d < ipath)
2880 d = Nullch;
2881 }
2882#endif
2883 }
44a8e56a 2884#ifdef ALTERNATE_SHEBANG
2885 /*
2886 * If the ALTERNATE_SHEBANG on this system starts with a
2887 * character that can be part of a Perl expression, then if
2888 * we see it but not "perl", we're probably looking at the
2889 * start of Perl code, not a request to hand off to some
2890 * other interpreter. Similarly, if "perl" is there, but
2891 * not in the first 'word' of the line, we assume the line
2892 * contains the start of the Perl program.
44a8e56a 2893 */
2894 if (d && *s != '#') {
f54cb97a 2895 const char *c = ipath;
44a8e56a 2896 while (*c && !strchr("; \t\r\n\f\v#", *c))
2897 c++;
2898 if (c < d)
2899 d = Nullch; /* "perl" not in first word; ignore */
2900 else
2901 *s = '#'; /* Don't try to parse shebang line */
2902 }
774d564b 2903#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2904#ifndef MACOS_TRADITIONAL
748a9306 2905 if (!d &&
44a8e56a 2906 *s == '#' &&
774d564b 2907 ipathend > ipath &&
3280af22 2908 !PL_minus_c &&
748a9306 2909 !instr(s,"indir") &&
3280af22 2910 instr(PL_origargv[0],"perl"))
748a9306 2911 {
27da23d5 2912 dVAR;
9f68db38 2913 char **newargv;
9f68db38 2914
774d564b 2915 *ipathend = '\0';
2916 s = ipathend + 1;
3280af22 2917 while (s < PL_bufend && isSPACE(*s))
9f68db38 2918 s++;
3280af22 2919 if (s < PL_bufend) {
a02a5408 2920 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2921 newargv[1] = s;
3280af22 2922 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2923 s++;
2924 *s = '\0';
3280af22 2925 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2926 }
2927 else
3280af22 2928 newargv = PL_origargv;
774d564b 2929 newargv[0] = ipath;
b35112e7 2930 PERL_FPU_PRE_EXEC
b4748376 2931 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2932 PERL_FPU_POST_EXEC
cea2e8a9 2933 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2934 }
bf4acbe4 2935#endif
748a9306 2936 if (d) {
f54cb97a
AL
2937 const U32 oldpdb = PL_perldb;
2938 const bool oldn = PL_minus_n;
2939 const bool oldp = PL_minus_p;
748a9306
LW
2940
2941 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2942 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2943
2944 if (*d++ == '-') {
f54cb97a 2945 const bool switches_done = PL_doswitches;
8cc95fdb 2946 do {
3ffe3ee4 2947 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2948 const char * const m = d;
8cc95fdb 2949 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2950 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2951 (int)(d - m), m);
2952 }
2953 d = moreswitches(d);
2954 } while (d);
f0b2cf55
YST
2955 if (PL_doswitches && !switches_done) {
2956 int argc = PL_origargc;
2957 char **argv = PL_origargv;
2958 do {
2959 argc--,argv++;
2960 } while (argc && argv[0][0] == '-' && argv[0][1]);
2961 init_argv_symbols(argc,argv);
2962 }
155aba94
GS
2963 if ((PERLDB_LINE && !oldpdb) ||
2964 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2965 /* if we have already added "LINE: while (<>) {",
2966 we must not do it again */
748a9306 2967 {
c69006e4 2968 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2969 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2970 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2971 PL_last_lop = PL_last_uni = Nullch;
3280af22 2972 PL_preambled = FALSE;
84902520 2973 if (PERLDB_LINE)
3280af22 2974 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2975 goto retry;
2976 }
a11ec5a9
RGS
2977 if (PL_doswitches && !switches_done) {
2978 int argc = PL_origargc;
2979 char **argv = PL_origargv;
2980 do {
2981 argc--,argv++;
2982 } while (argc && argv[0][0] == '-' && argv[0][1]);
2983 init_argv_symbols(argc,argv);
2984 }
a0d0e21e 2985 }
79072805 2986 }
9f68db38 2987 }
79072805 2988 }
3280af22
NIS
2989 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2990 PL_bufptr = s;
2991 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2992 return yylex();
ae986130 2993 }
378cc40b 2994 goto retry;
4fdae800 2995 case '\r':
6a27c188 2996#ifdef PERL_STRICT_CR
cea2e8a9 2997 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2998 Perl_croak(aTHX_
cc507455 2999 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3000#endif
4fdae800 3001 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3002#ifdef MACOS_TRADITIONAL
3003 case '\312':
3004#endif
378cc40b
LW
3005 s++;
3006 goto retry;
378cc40b 3007 case '#':
e929a76b 3008 case '\n':
3280af22 3009 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3010 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3011 /* handle eval qq[#line 1 "foo"\n ...] */
3012 CopLINE_dec(PL_curcop);
3013 incline(s);
3014 }
3280af22 3015 d = PL_bufend;
a687059c 3016 while (s < d && *s != '\n')
378cc40b 3017 s++;
0f85fab0 3018 if (s < d)
378cc40b 3019 s++;
78c267c1 3020 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3021 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3022 incline(s);
3280af22
NIS
3023 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3024 PL_bufptr = s;
3025 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3026 return yylex();
a687059c 3027 }
378cc40b 3028 }
a687059c 3029 else {
378cc40b 3030 *s = '\0';
3280af22 3031 PL_bufend = s;
a687059c 3032 }
378cc40b
LW
3033 goto retry;
3034 case '-':
79072805 3035 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
3036 I32 ftst = 0;
3037
378cc40b 3038 s++;
3280af22 3039 PL_bufptr = s;
748a9306
LW
3040 tmp = *s++;
3041
bf4acbe4 3042 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3043 s++;
3044
3045 if (strnEQ(s,"=>",2)) {
3280af22 3046 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 3047 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3048 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 3049 } );
748a9306
LW
3050 OPERATOR('-'); /* unary minus */
3051 }
3280af22 3052 PL_last_uni = PL_oldbufptr;
748a9306 3053 switch (tmp) {
e5edeb50
JH
3054 case 'r': ftst = OP_FTEREAD; break;
3055 case 'w': ftst = OP_FTEWRITE; break;
3056 case 'x': ftst = OP_FTEEXEC; break;
3057 case 'o': ftst = OP_FTEOWNED; break;
3058 case 'R': ftst = OP_FTRREAD; break;
3059 case 'W': ftst = OP_FTRWRITE; break;
3060 case 'X': ftst = OP_FTREXEC; break;
3061 case 'O': ftst = OP_FTROWNED; break;
3062 case 'e': ftst = OP_FTIS; break;
3063 case 'z': ftst = OP_FTZERO; break;
3064 case 's': ftst = OP_FTSIZE; break;
3065 case 'f': ftst = OP_FTFILE; break;
3066 case 'd': ftst = OP_FTDIR; break;
3067 case 'l': ftst = OP_FTLINK; break;
3068 case 'p': ftst = OP_FTPIPE; break;
3069 case 'S': ftst = OP_FTSOCK; break;
3070 case 'u': ftst = OP_FTSUID; break;
3071 case 'g': ftst = OP_FTSGID; break;
3072 case 'k': ftst = OP_FTSVTX; break;
3073 case 'b': ftst = OP_FTBLK; break;
3074 case 'c': ftst = OP_FTCHR; break;
3075 case 't': ftst = OP_FTTTY; break;
3076 case 'T': ftst = OP_FTTEXT; break;
3077 case 'B': ftst = OP_FTBINARY; break;
3078 case 'M': case 'A': case 'C':
3079 gv_fetchpv("\024",TRUE, SVt_PV);
3080 switch (tmp) {
3081 case 'M': ftst = OP_FTMTIME; break;
3082 case 'A': ftst = OP_FTATIME; break;
3083 case 'C': ftst = OP_FTCTIME; break;
3084 default: break;
3085 }
3086 break;
378cc40b 3087 default:
378cc40b
LW
3088 break;
3089 }
e5edeb50 3090 if (ftst) {
eb160463 3091 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3092 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3093 "### Saw file test %c\n", (int)ftst);
5f80b19c 3094 } );
e5edeb50
JH
3095 FTST(ftst);
3096 }
3097 else {
3098 /* Assume it was a minus followed by a one-letter named
3099 * subroutine call (or a -bareword), then. */
95c31fe3 3100 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3101 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3102 (int) tmp);
5f80b19c 3103 } );
3cf7b4c4 3104 s = --PL_bufptr;
e5edeb50 3105 }
378cc40b 3106 }
a687059c
LW
3107 tmp = *s++;
3108 if (*s == tmp) {
3109 s++;
3280af22 3110 if (PL_expect == XOPERATOR)
79072805
LW
3111 TERM(POSTDEC);
3112 else
3113 OPERATOR(PREDEC);
3114 }
3115 else if (*s == '>') {
3116 s++;
3117 s = skipspace(s);
7e2040f0 3118 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3119 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3120 TOKEN(ARROW);
79072805 3121 }
748a9306
LW
3122 else if (*s == '$')
3123 OPERATOR(ARROW);
463ee0b2 3124 else
748a9306 3125 TERM(ARROW);
a687059c 3126 }
3280af22 3127 if (PL_expect == XOPERATOR)
79072805
LW
3128 Aop(OP_SUBTRACT);
3129 else {
3280af22 3130 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3131 check_uni();
79072805 3132 OPERATOR('-'); /* unary minus */
2f3197b3 3133 }
79072805 3134
378cc40b 3135 case '+':
a687059c
LW
3136 tmp = *s++;
3137 if (*s == tmp) {
378cc40b 3138 s++;
3280af22 3139 if (PL_expect == XOPERATOR)
79072805
LW
3140 TERM(POSTINC);
3141 else
3142 OPERATOR(PREINC);
378cc40b 3143 }
3280af22 3144 if (PL_expect == XOPERATOR)
79072805
LW
3145 Aop(OP_ADD);
3146 else {
3280af22 3147 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3148 check_uni();
a687059c 3149 OPERATOR('+');
2f3197b3 3150 }
a687059c 3151
378cc40b 3152 case '*':
3280af22
NIS
3153 if (PL_expect != XOPERATOR) {
3154 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3155 PL_expect = XOPERATOR;
3156 force_ident(PL_tokenbuf, '*');
3157 if (!*PL_tokenbuf)
a0d0e21e 3158 PREREF('*');
79072805 3159 TERM('*');
a687059c 3160 }
79072805
LW
3161 s++;
3162 if (*s == '*') {
a687059c 3163 s++;
79072805 3164 PWop(OP_POW);
a687059c 3165 }
79072805
LW
3166 Mop(OP_MULTIPLY);
3167
378cc40b 3168 case '%':
3280af22 3169 if (PL_expect == XOPERATOR) {
bbce6d69 3170 ++s;
3171 Mop(OP_MODULO);
a687059c 3172 }
3280af22
NIS
3173 PL_tokenbuf[0] = '%';
3174 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3175 if (!PL_tokenbuf[1]) {
bbce6d69 3176 PREREF('%');
a687059c 3177 }
3280af22 3178 PL_pending_ident = '%';
bbce6d69 3179 TERM('%');
a687059c 3180
378cc40b 3181 case '^':
79072805 3182 s++;
a0d0e21e 3183 BOop(OP_BIT_XOR);
79072805 3184 case '[':
3280af22 3185 PL_lex_brackets++;
79072805 3186 /* FALL THROUGH */
378cc40b 3187 case '~':
378cc40b 3188 case ',':
378cc40b
LW
3189 tmp = *s++;
3190 OPERATOR(tmp);
a0d0e21e
LW
3191 case ':':
3192 if (s[1] == ':') {
3193 len = 0;
3194 goto just_a_word;
3195 }
3196 s++;
09bef843
SB
3197 switch (PL_expect) {
3198 OP *attrs;
3199 case XOPERATOR:
3200 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3201 break;
3202 PL_bufptr = s; /* update in case we back off */
3203 goto grabattrs;
3204 case XATTRBLOCK:
3205 PL_expect = XBLOCK;
3206 goto grabattrs;
3207 case XATTRTERM:
3208 PL_expect = XTERMBLOCK;
3209 grabattrs:
3210 s = skipspace(s);
3211 attrs = Nullop;
7e2040f0 3212 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3213 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3214 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3215 if (tmp < 0) tmp = -tmp;
3216 switch (tmp) {
3217 case KEY_or:
3218 case KEY_and:
c963b151 3219 case KEY_err:
f9829d6b
GS
3220 case KEY_for:
3221 case KEY_unless:
3222 case KEY_if:
3223 case KEY_while:
3224 case KEY_until:
3225 goto got_attrs;
3226 default:
3227 break;
3228 }
3229 }
09bef843
SB
3230 if (*d == '(') {
3231 d = scan_str(d,TRUE,TRUE);
3232 if (!d) {
09bef843
SB
3233 /* MUST advance bufptr here to avoid bogus
3234 "at end of line" context messages from yyerror().
3235 */
3236 PL_bufptr = s + len;
3237 yyerror("Unterminated attribute parameter in attribute list");
3238 if (attrs)
3239 op_free(attrs);
bbf60fe6 3240 return REPORT(0); /* EOF indicator */
09bef843
SB
3241 }
3242 }
3243 if (PL_lex_stuff) {
3244 SV *sv = newSVpvn(s, len);
3245 sv_catsv(sv, PL_lex_stuff);
3246 attrs = append_elem(OP_LIST, attrs,
3247 newSVOP(OP_CONST, 0, sv));
3248 SvREFCNT_dec(PL_lex_stuff);
3249 PL_lex_stuff = Nullsv;
3250 }
3251 else {
371fce9b
DM
3252 if (len == 6 && strnEQ(s, "unique", len)) {
3253 if (PL_in_my == KEY_our)
3254#ifdef USE_ITHREADS
3255 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3256#else
3257 ; /* skip to avoid loading attributes.pm */
3258#endif
bfed75c6 3259 else
371fce9b
DM
3260 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3261 }
3262
d3cea301
SB
3263 /* NOTE: any CV attrs applied here need to be part of
3264 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3265 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3266 CvLVALUE_on(PL_compcv);
3267 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3268 CvLOCKED_on(PL_compcv);
3269 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3270 CvMETHOD_on(PL_compcv);
06492da6
SF
3271 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3272 CvASSERTION_on(PL_compcv);
78f9721b
SM
3273 /* After we've set the flags, it could be argued that
3274 we don't need to do the attributes.pm-based setting
3275 process, and shouldn't bother appending recognized
d3cea301
SB
3276 flags. To experiment with that, uncomment the
3277 following "else". (Note that's already been
3278 uncommented. That keeps the above-applied built-in
3279 attributes from being intercepted (and possibly
3280 rejected) by a package's attribute routines, but is
3281 justified by the performance win for the common case
3282 of applying only built-in attributes.) */
0256094b 3283 else
78f9721b
SM
3284 attrs = append_elem(OP_LIST, attrs,
3285 newSVOP(OP_CONST, 0,
3286 newSVpvn(s, len)));
09bef843
SB
3287 }
3288 s = skipspace(d);
0120eecf 3289 if (*s == ':' && s[1] != ':')
09bef843 3290 s = skipspace(s+1);
0120eecf
GS
3291 else if (s == d)
3292 break; /* require real whitespace or :'s */
09bef843 3293 }
f9829d6b 3294 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3295 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3296 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3297 /* If here for an expression, and parsed no attrs, back off. */
3298 if (tmp == '=' && !attrs) {
3299 s = PL_bufptr;
3300 break;
3301 }
3302 /* MUST advance bufptr here to avoid bogus "at end of line"
3303 context messages from yyerror().
3304 */
3305 PL_bufptr = s;
3306 if (!*s)
3307 yyerror("Unterminated attribute list");
3308 else
3309 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3310 q, *s, q));
3311 if (attrs)
3312 op_free(attrs);
3313 OPERATOR(':');
3314 }
f9829d6b 3315 got_attrs:
09bef843
SB
3316 if (attrs) {
3317 PL_nextval[PL_nexttoke].opval = attrs;
3318 force_next(THING);
3319 }
3320 TOKEN(COLONATTR);
3321 }
a0d0e21e 3322 OPERATOR(':');
8990e307
LW
3323 case '(':
3324 s++;
3280af22
NIS
3325 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3326 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3327 else
3280af22 3328 PL_expect = XTERM;
4a202259 3329 s = skipspace(s);
a0d0e21e 3330 TOKEN('(');
378cc40b 3331 case ';':
f4dd75d9 3332 CLINE;
378cc40b
LW
3333 tmp = *s++;
3334 OPERATOR(tmp);
3335 case ')':
378cc40b 3336 tmp = *s++;
16d20bd9
AD
3337 s = skipspace(s);
3338 if (*s == '{')
3339 PREBLOCK(tmp);
378cc40b 3340 TERM(tmp);
79072805
LW
3341 case ']':
3342 s++;
3280af22 3343 if (PL_lex_brackets <= 0)
d98d5fff 3344 yyerror("Unmatched right square bracket");
463ee0b2 3345 else
3280af22
NIS
3346 --PL_lex_brackets;
3347 if (PL_lex_state == LEX_INTERPNORMAL) {
3348 if (PL_lex_brackets == 0) {
a0d0e21e 3349 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3350 PL_lex_state = LEX_INTERPEND;
79072805
LW
3351 }
3352 }
4633a7c4 3353 TERM(']');
79072805
LW
3354 case '{':
3355 leftbracket:
79072805 3356 s++;
3280af22 3357 if (PL_lex_brackets > 100) {
8edd5f42 3358 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3359 }
3280af22 3360 switch (PL_expect) {
a0d0e21e 3361 case XTERM:
3280af22 3362 if (PL_lex_formbrack) {
a0d0e21e
LW
3363 s--;
3364 PRETERMBLOCK(DO);
3365 }
3280af22
NIS
3366 if (PL_oldoldbufptr == PL_last_lop)
3367 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3368 else
3280af22 3369 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3370 OPERATOR(HASHBRACK);
a0d0e21e 3371 case XOPERATOR:
bf4acbe4 3372 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3373 s++;
44a8e56a 3374 d = s;
3280af22
NIS
3375 PL_tokenbuf[0] = '\0';
3376 if (d < PL_bufend && *d == '-') {
3377 PL_tokenbuf[0] = '-';
44a8e56a 3378 d++;
bf4acbe4 3379 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3380 d++;
3381 }
7e2040f0 3382 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3383 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3384 FALSE, &len);
bf4acbe4 3385 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3386 d++;
3387 if (*d == '}') {
f54cb97a 3388 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3389 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3390 if (minus)
3391 force_next('-');
748a9306
LW
3392 }
3393 }
3394 /* FALL THROUGH */
09bef843 3395 case XATTRBLOCK:
748a9306 3396 case XBLOCK:
3280af22
NIS
3397 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3398 PL_expect = XSTATE;
a0d0e21e 3399 break;
09bef843 3400 case XATTRTERM:
a0d0e21e 3401 case XTERMBLOCK:
3280af22
NIS
3402 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3403 PL_expect = XSTATE;
a0d0e21e
LW
3404 break;
3405 default: {
f54cb97a 3406 const char *t;
3280af22
NIS
3407 if (PL_oldoldbufptr == PL_last_lop)
3408 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3409 else
3280af22 3410 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3411 s = skipspace(s);
8452ff4b
SB
3412 if (*s == '}') {
3413 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3414 PL_expect = XTERM;
3415 /* This hack is to get the ${} in the message. */
3416 PL_bufptr = s+1;
3417 yyerror("syntax error");
3418 break;
3419 }
a0d0e21e 3420 OPERATOR(HASHBRACK);
8452ff4b 3421 }
b8a4b1be
GS
3422 /* This hack serves to disambiguate a pair of curlies
3423 * as being a block or an anon hash. Normally, expectation
3424 * determines that, but in cases where we're not in a
3425 * position to expect anything in particular (like inside
3426 * eval"") we have to resolve the ambiguity. This code
3427 * covers the case where the first term in the curlies is a
3428 * quoted string. Most other cases need to be explicitly
a0288114 3429 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3430 * curly in order to force resolution as an anon hash.
3431 *
3432 * XXX should probably propagate the outer expectation
3433 * into eval"" to rely less on this hack, but that could
3434 * potentially break current behavior of eval"".
3435 * GSAR 97-07-21
3436 */
3437 t = s;
3438 if (*s == '\'' || *s == '"' || *s == '`') {
3439 /* common case: get past first string, handling escapes */
3280af22 3440 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3441 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3442 t++;
3443 t++;
a0d0e21e 3444 }
b8a4b1be 3445 else if (*s == 'q') {
3280af22 3446 if (++t < PL_bufend
b8a4b1be 3447 && (!isALNUM(*t)
3280af22 3448 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3449 && !isALNUM(*t))))
3450 {
abc667d1 3451 /* skip q//-like construct */
f54cb97a 3452 const char *tmps;
b8a4b1be
GS
3453 char open, close, term;
3454 I32 brackets = 1;
3455
3280af22 3456 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3457 t++;
abc667d1
DM
3458 /* check for q => */
3459 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3460 OPERATOR(HASHBRACK);
3461 }
b8a4b1be
GS
3462 term = *t;
3463 open = term;
3464 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3465 term = tmps[5];
3466 close = term;
3467 if (open == close)
3280af22
NIS
3468 for (t++; t < PL_bufend; t++) {
3469 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3470 t++;
6d07e5e9 3471 else if (*t == open)
b8a4b1be
GS
3472 break;
3473 }
abc667d1 3474 else {
3280af22
NIS
3475 for (t++; t < PL_bufend; t++) {
3476 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3477 t++;
6d07e5e9 3478 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3479 break;
3480 else if (*t == open)
3481 brackets++;
3482 }
abc667d1
DM
3483 }
3484 t++;
b8a4b1be 3485 }
abc667d1
DM
3486 else
3487 /* skip plain q word */
3488 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3489 t += UTF8SKIP(t);
a0d0e21e 3490 }
7e2040f0 3491 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3492 t += UTF8SKIP(t);
7e2040f0 3493 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3494 t += UTF8SKIP(t);
a0d0e21e 3495 }
3280af22 3496 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3497 t++;
b8a4b1be
GS
3498 /* if comma follows first term, call it an anon hash */
3499 /* XXX it could be a comma expression with loop modifiers */
3280af22 3500 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3501 || (*t == '=' && t[1] == '>')))
a0d0e21e 3502 OPERATOR(HASHBRACK);
3280af22 3503 if (PL_expect == XREF)
4e4e412b 3504 PL_expect = XTERM;
a0d0e21e 3505 else {
3280af22
NIS
3506 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3507 PL_expect = XSTATE;
a0d0e21e 3508 }
8990e307 3509 }
a0d0e21e 3510 break;
463ee0b2 3511 }
57843af0 3512 yylval.ival = CopLINE(PL_curcop);
79072805 3513 if (isSPACE(*s) || *s == '#')
3280af22 3514 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3515 TOKEN('{');
378cc40b 3516 case '}':
79072805
LW
3517 rightbracket:
3518 s++;
3280af22 3519 if (PL_lex_brackets <= 0)
d98d5fff 3520 yyerror("Unmatched right curly bracket");
463ee0b2 3521 else
3280af22 3522 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3523 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3524 PL_lex_formbrack = 0;
3525 if (PL_lex_state == LEX_INTERPNORMAL) {
3526 if (PL_lex_brackets == 0) {
9059aa12
LW
3527 if (PL_expect & XFAKEBRACK) {
3528 PL_expect &= XENUMMA