This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [MAINT24637] fails "make test" on OSX 10.4 (lib/locale.t)
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
27da23d5
JH
29static const char ident_too_long[] =
30 "Identifier too long";
31static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
8903cb82 35
acfe0abc 36static void restore_rsfp(pTHX_ void *f);
6e3aabd6 37#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
38static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 40#endif
51371543 41
9059aa12
LW
42#define XFAKEBRACK 128
43#define XENUMMASK 127
44
39e02b42
JH
45#ifdef USE_UTF8_SCRIPTS
46# define UTF (!IN_BYTES)
2b9d42f0 47#else
746b446a 48# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 49#endif
a0ed51b3 50
61f0cdd9 51/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
52 * 1999-02-27 mjd-perl-patch@plover.com */
53#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
54
bf4acbe4
GS
55/* On MacOS, respect nonbreaking spaces */
56#ifdef MACOS_TRADITIONAL
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
58#else
59#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60#endif
61
ffb4593c
NT
62/* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
79072805
LW
64 * can get by with a single comparison (if the compiler is smart enough).
65 */
66
fb73857a 67/* #define LEX_NOTPARSING 11 is done in perl.h. */
68
55497cff 69#define LEX_NORMAL 10
70#define LEX_INTERPNORMAL 9
71#define LEX_INTERPCASEMOD 8
72#define LEX_INTERPPUSH 7
73#define LEX_INTERPSTART 6
74#define LEX_INTERPEND 5
75#define LEX_INTERPENDMAYBE 4
76#define LEX_INTERPCONCAT 3
77#define LEX_INTERPCONST 2
78#define LEX_FORMLINE 1
79#define LEX_KNOWNEXT 0
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 139# define REPORT(retval) tokereport(s,(int)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
376fcdbf
AL
170#define UNI2(f,x) { \
171 yylval.ival = f; \
172 PL_expect = x; \
173 PL_bufptr = s; \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
176 if (*s == '(') \
177 return REPORT( (int)FUNC1 ); \
178 s = skipspace(s); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
180 }
6f33ba73
RGS
181#define UNI(f) UNI2(f,XTERM)
182#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 183
376fcdbf
AL
184#define UNIBRACK(f) { \
185 yylval.ival = f; \
186 PL_bufptr = s; \
187 PL_last_uni = PL_oldbufptr; \
188 if (*s == '(') \
189 return REPORT( (int)FUNC1 ); \
190 s = skipspace(s); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
192 }
79072805 193
9f68db38 194/* grandfather return to old style */
3280af22 195#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 196
8fa7f367
JH
197#ifdef DEBUGGING
198
bbf60fe6
DM
199/* how to interpret the yylval associated with the token */
200enum token_type {
201 TOKENTYPE_NONE,
202 TOKENTYPE_IVAL,
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
204 TOKENTYPE_PVAL,
205 TOKENTYPE_OPVAL,
206 TOKENTYPE_GVVAL
207};
208
27da23d5
JH
209static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
9041c2e3 211{
bbf60fe6
DM
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DO, TOKENTYPE_NONE, "DO" },
223 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
224 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
225 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
226 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
227 { ELSE, TOKENTYPE_NONE, "ELSE" },
228 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
229 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
230 { FOR, TOKENTYPE_IVAL, "FOR" },
231 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
232 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
233 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
234 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
235 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
236 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
237 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
238 { IF, TOKENTYPE_IVAL, "IF" },
239 { LABEL, TOKENTYPE_PVAL, "LABEL" },
240 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
241 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
242 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
243 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
244 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
245 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
246 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
247 { MY, TOKENTYPE_IVAL, "MY" },
248 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
249 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
250 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
251 { OROP, TOKENTYPE_IVAL, "OROP" },
252 { OROR, TOKENTYPE_NONE, "OROR" },
253 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
254 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
255 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
256 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
257 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
258 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
259 { PREINC, TOKENTYPE_NONE, "PREINC" },
260 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
261 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
262 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
263 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
264 { SUB, TOKENTYPE_NONE, "SUB" },
265 { THING, TOKENTYPE_OPVAL, "THING" },
266 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
267 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
268 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
269 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
270 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
271 { USE, TOKENTYPE_IVAL, "USE" },
272 { WHILE, TOKENTYPE_IVAL, "WHILE" },
273 { WORD, TOKENTYPE_OPVAL, "WORD" },
274 { 0, TOKENTYPE_NONE, 0 }
275};
276
277/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 278
bbf60fe6 279STATIC int
bfed75c6 280S_tokereport(pTHX_ const char* s, I32 rv)
bbf60fe6
DM
281{
282 if (DEBUG_T_TEST) {
bfed75c6 283 const char *name = Nullch;
bbf60fe6 284 enum token_type type = TOKENTYPE_NONE;
f54cb97a 285 const struct debug_tokens *p;
9d4ba2ae 286 SV* const report = newSVpvn("<== ", 4);
bbf60fe6 287
f54cb97a 288 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
289 if (p->token == (int)rv) {
290 name = p->name;
291 type = p->type;
292 break;
293 }
294 }
295 if (name)
54667de8 296 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
297 else if ((char)rv > ' ' && (char)rv < '~')
298 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
299 else if (!rv)
54667de8 300 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
301 else
302 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
303 switch (type) {
304 case TOKENTYPE_NONE:
305 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
306 break;
307 case TOKENTYPE_IVAL:
e4584336 308 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
309 break;
310 case TOKENTYPE_OPNUM:
311 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
312 PL_op_name[yylval.ival]);
313 break;
314 case TOKENTYPE_PVAL:
315 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
316 break;
317 case TOKENTYPE_OPVAL:
401441c0
RGS
318 if (yylval.opval)
319 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 320 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
321 else
322 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
323 break;
324 }
e4584336 325 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
998054bd
SC
326 if (s - PL_bufptr > 0)
327 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
328 else {
329 if (PL_oldbufptr && *PL_oldbufptr)
330 sv_catpv(report, PL_tokenbuf);
331 }
cfd0369c 332 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen_const(report));
bbf60fe6
DM
333 };
334 return (int)rv;
998054bd
SC
335}
336
8fa7f367
JH
337#endif
338
ffb4593c
NT
339/*
340 * S_ao
341 *
c963b151
BD
342 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
343 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
344 */
345
76e3520e 346STATIC int
cea2e8a9 347S_ao(pTHX_ int toketype)
a0d0e21e 348{
3280af22
NIS
349 if (*PL_bufptr == '=') {
350 PL_bufptr++;
a0d0e21e
LW
351 if (toketype == ANDAND)
352 yylval.ival = OP_ANDASSIGN;
353 else if (toketype == OROR)
354 yylval.ival = OP_ORASSIGN;
c963b151
BD
355 else if (toketype == DORDOR)
356 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
357 toketype = ASSIGNOP;
358 }
359 return toketype;
360}
361
ffb4593c
NT
362/*
363 * S_no_op
364 * When Perl expects an operator and finds something else, no_op
365 * prints the warning. It always prints "<something> found where
366 * operator expected. It prints "Missing semicolon on previous line?"
367 * if the surprise occurs at the start of the line. "do you need to
368 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
369 * where the compiler doesn't know if foo is a method call or a function.
370 * It prints "Missing operator before end of line" if there's nothing
371 * after the missing operator, or "... before <...>" if there is something
372 * after the missing operator.
373 */
374
76e3520e 375STATIC void
bfed75c6 376S_no_op(pTHX_ const char *what, char *s)
463ee0b2 377{
9d4ba2ae
AL
378 char * const oldbp = PL_bufptr;
379 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 380
1189a94a
GS
381 if (!s)
382 s = oldbp;
07c798fb 383 else
1189a94a 384 PL_bufptr = s;
cea2e8a9 385 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
386 if (ckWARN_d(WARN_SYNTAX)) {
387 if (is_first)
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Missing semicolon on previous line?)\n");
390 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 391 const char *t;
56da5a46
RGS
392 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
393 if (t < PL_bufptr && isSPACE(*t))
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Do you need to predeclare %.*s?)\n",
396 t - PL_oldoldbufptr, PL_oldoldbufptr);
397 }
398 else {
399 assert(s >= oldbp);
400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
401 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
402 }
07c798fb 403 }
3280af22 404 PL_bufptr = oldbp;
8990e307
LW
405}
406
ffb4593c
NT
407/*
408 * S_missingterm
409 * Complain about missing quote/regexp/heredoc terminator.
410 * If it's called with (char *)NULL then it cauterizes the line buffer.
411 * If we're in a delimited string and the delimiter is a control
412 * character, it's reformatted into a two-char sequence like ^C.
413 * This is fatal.
414 */
415
76e3520e 416STATIC void
cea2e8a9 417S_missingterm(pTHX_ char *s)
8990e307
LW
418{
419 char tmpbuf[3];
420 char q;
421 if (s) {
9d4ba2ae 422 char * const nl = strrchr(s,'\n');
d2719217 423 if (nl)
8990e307
LW
424 *nl = '\0';
425 }
9d116dd7
JH
426 else if (
427#ifdef EBCDIC
428 iscntrl(PL_multi_close)
429#else
430 PL_multi_close < 32 || PL_multi_close == 127
431#endif
432 ) {
8990e307 433 *tmpbuf = '^';
3280af22 434 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
435 tmpbuf[2] = '\0';
436 s = tmpbuf;
437 }
438 else {
eb160463 439 *tmpbuf = (char)PL_multi_close;
8990e307
LW
440 tmpbuf[1] = '\0';
441 s = tmpbuf;
442 }
443 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 444 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 445}
79072805 446
ffb4593c
NT
447/*
448 * Perl_deprecate
ffb4593c
NT
449 */
450
79072805 451void
bfed75c6 452Perl_deprecate(pTHX_ const char *s)
a0d0e21e 453{
599cee73 454 if (ckWARN(WARN_DEPRECATED))
9014280d 455 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
456}
457
12bcd1a6 458void
bfed75c6 459Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
460{
461 /* This function should NOT be called for any new deprecated warnings */
462 /* Use Perl_deprecate instead */
463 /* */
464 /* It is here to maintain backward compatibility with the pre-5.8 */
465 /* warnings category hierarchy. The "deprecated" category used to */
466 /* live under the "syntax" category. It is now a top-level category */
467 /* in its own right. */
468
469 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 470 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
471 "Use of %s is deprecated", s);
472}
473
ffb4593c
NT
474/*
475 * depcom
9cbb5ea2 476 * Deprecate a comma-less variable list.
ffb4593c
NT
477 */
478
76e3520e 479STATIC void
cea2e8a9 480S_depcom(pTHX)
a0d0e21e 481{
12bcd1a6 482 deprecate_old("comma-less variable list");
a0d0e21e
LW
483}
484
ffb4593c 485/*
9cbb5ea2
GS
486 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
487 * utf16-to-utf8-reversed.
ffb4593c
NT
488 */
489
c39cd008
GS
490#ifdef PERL_CR_FILTER
491static void
492strip_return(SV *sv)
493{
95a20fc0 494 register const char *s = SvPVX_const(sv);
9d4ba2ae 495 register const char * const e = s + SvCUR(sv);
c39cd008
GS
496 /* outer loop optimized to do nothing if there are no CR-LFs */
497 while (s < e) {
498 if (*s++ == '\r' && *s == '\n') {
499 /* hit a CR-LF, need to copy the rest */
500 register char *d = s - 1;
501 *d++ = *s++;
502 while (s < e) {
503 if (*s == '\r' && s[1] == '\n')
504 s++;
505 *d++ = *s++;
506 }
507 SvCUR(sv) -= s - d;
508 return;
509 }
510 }
511}
a868473f 512
76e3520e 513STATIC I32
c39cd008 514S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 515{
f54cb97a 516 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
517 if (count > 0 && !maxlen)
518 strip_return(sv);
519 return count;
a868473f
NIS
520}
521#endif
522
ffb4593c
NT
523/*
524 * Perl_lex_start
9cbb5ea2
GS
525 * Initialize variables. Uses the Perl save_stack to save its state (for
526 * recursive calls to the parser).
ffb4593c
NT
527 */
528
a0d0e21e 529void
864dbfa3 530Perl_lex_start(pTHX_ SV *line)
79072805 531{
cfd0369c 532 const char *s;
8990e307
LW
533 STRLEN len;
534
3280af22
NIS
535 SAVEI32(PL_lex_dojoin);
536 SAVEI32(PL_lex_brackets);
3280af22
NIS
537 SAVEI32(PL_lex_casemods);
538 SAVEI32(PL_lex_starts);
539 SAVEI32(PL_lex_state);
7766f137 540 SAVEVPTR(PL_lex_inpat);
3280af22 541 SAVEI32(PL_lex_inwhat);
18b09519
GS
542 if (PL_lex_state == LEX_KNOWNEXT) {
543 I32 toke = PL_nexttoke;
544 while (--toke >= 0) {
545 SAVEI32(PL_nexttype[toke]);
546 SAVEVPTR(PL_nextval[toke]);
547 }
548 SAVEI32(PL_nexttoke);
18b09519 549 }
57843af0 550 SAVECOPLINE(PL_curcop);
3280af22
NIS
551 SAVEPPTR(PL_bufptr);
552 SAVEPPTR(PL_bufend);
553 SAVEPPTR(PL_oldbufptr);
554 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
555 SAVEPPTR(PL_last_lop);
556 SAVEPPTR(PL_last_uni);
3280af22
NIS
557 SAVEPPTR(PL_linestart);
558 SAVESPTR(PL_linestr);
8edd5f42
RGS
559 SAVEGENERICPV(PL_lex_brackstack);
560 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 561 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
562 SAVESPTR(PL_lex_stuff);
563 SAVEI32(PL_lex_defer);
09bef843 564 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 565 SAVESPTR(PL_lex_repl);
bebdddfc
GS
566 SAVEINT(PL_expect);
567 SAVEINT(PL_lex_expect);
3280af22
NIS
568
569 PL_lex_state = LEX_NORMAL;
570 PL_lex_defer = 0;
571 PL_expect = XSTATE;
572 PL_lex_brackets = 0;
a02a5408
JC
573 Newx(PL_lex_brackstack, 120, char);
574 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
575 PL_lex_casemods = 0;
576 *PL_lex_casestack = '\0';
577 PL_lex_dojoin = 0;
578 PL_lex_starts = 0;
579 PL_lex_stuff = Nullsv;
580 PL_lex_repl = Nullsv;
581 PL_lex_inpat = 0;
76be56bc 582 PL_nexttoke = 0;
3280af22 583 PL_lex_inwhat = 0;
09bef843 584 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
585 PL_linestr = line;
586 if (SvREADONLY(PL_linestr))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 588 s = SvPV_const(PL_linestr, len);
6f27f9a7 589 if (!len || s[len-1] != ';') {
3280af22
NIS
590 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
591 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
592 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 593 }
3280af22
NIS
594 SvTEMP_off(PL_linestr);
595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 597 PL_last_lop = PL_last_uni = Nullch;
3280af22 598 PL_rsfp = 0;
79072805 599}
a687059c 600
ffb4593c
NT
601/*
602 * Perl_lex_end
9cbb5ea2
GS
603 * Finalizer for lexing operations. Must be called when the parser is
604 * done with the lexer.
ffb4593c
NT
605 */
606
463ee0b2 607void
864dbfa3 608Perl_lex_end(pTHX)
463ee0b2 609{
3280af22 610 PL_doextract = FALSE;
463ee0b2
LW
611}
612
ffb4593c
NT
613/*
614 * S_incline
615 * This subroutine has nothing to do with tilting, whether at windmills
616 * or pinball tables. Its name is short for "increment line". It
57843af0 617 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 618 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
619 * # line 500 "foo.pm"
620 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
621 */
622
76e3520e 623STATIC void
cea2e8a9 624S_incline(pTHX_ char *s)
463ee0b2
LW
625{
626 char *t;
627 char *n;
73659bf1 628 char *e;
463ee0b2 629 char ch;
463ee0b2 630
57843af0 631 CopLINE_inc(PL_curcop);
463ee0b2
LW
632 if (*s++ != '#')
633 return;
bf4acbe4 634 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
635 if (strnEQ(s, "line", 4))
636 s += 4;
637 else
638 return;
084592ab 639 if (SPACE_OR_TAB(*s))
73659bf1 640 s++;
4e553d73 641 else
73659bf1 642 return;
bf4acbe4 643 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
644 if (!isDIGIT(*s))
645 return;
646 n = s;
647 while (isDIGIT(*s))
648 s++;
bf4acbe4 649 while (SPACE_OR_TAB(*s))
463ee0b2 650 s++;
73659bf1 651 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 652 s++;
73659bf1
GS
653 e = t + 1;
654 }
463ee0b2 655 else {
463ee0b2 656 for (t = s; !isSPACE(*t); t++) ;
73659bf1 657 e = t;
463ee0b2 658 }
bf4acbe4 659 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
660 e++;
661 if (*e != '\n' && *e != '\0')
662 return; /* false alarm */
663
463ee0b2
LW
664 ch = *t;
665 *t = '\0';
f4dd75d9 666 if (t - s > 0) {
05ec9bb3 667 CopFILE_free(PL_curcop);
57843af0 668 CopFILE_set(PL_curcop, s);
f4dd75d9 669 }
463ee0b2 670 *t = ch;
57843af0 671 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
672}
673
ffb4593c
NT
674/*
675 * S_skipspace
676 * Called to gobble the appropriate amount and type of whitespace.
677 * Skips comments as well.
678 */
679
76e3520e 680STATIC char *
cea2e8a9 681S_skipspace(pTHX_ register char *s)
a687059c 682{
3280af22 683 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 684 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
685 s++;
686 return s;
687 }
688 for (;;) {
fd049845 689 STRLEN prevlen;
09bef843 690 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 691 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
692 while (s < PL_bufend && isSPACE(*s)) {
693 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
694 incline(s);
695 }
ffb4593c
NT
696
697 /* comment */
3280af22
NIS
698 if (s < PL_bufend && *s == '#') {
699 while (s < PL_bufend && *s != '\n')
463ee0b2 700 s++;
60e6418e 701 if (s < PL_bufend) {
463ee0b2 702 s++;
60e6418e
GS
703 if (PL_in_eval && !PL_rsfp) {
704 incline(s);
705 continue;
706 }
707 }
463ee0b2 708 }
ffb4593c
NT
709
710 /* only continue to recharge the buffer if we're at the end
711 * of the buffer, we're not reading from a source filter, and
712 * we're in normal lexing mode
713 */
09bef843
SB
714 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
715 PL_lex_state == LEX_FORMLINE)
463ee0b2 716 return s;
ffb4593c
NT
717
718 /* try to recharge the buffer */
9cbb5ea2
GS
719 if ((s = filter_gets(PL_linestr, PL_rsfp,
720 (prevlen = SvCUR(PL_linestr)))) == Nullch)
721 {
722 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
723 if (PL_minus_p) {
724 sv_setpv(PL_linestr,
725 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 726 PL_minus_n = PL_minus_p = 0;
a0d0e21e 727 }
01a19ab0
NC
728 else if (PL_minus_n) {
729 sv_setpvn(PL_linestr, ";}", 2);
730 PL_minus_n = 0;
731 }
a0d0e21e 732 else
4147a61b 733 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
734
735 /* reset variables for next time we lex */
9cbb5ea2
GS
736 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
737 = SvPVX(PL_linestr);
3280af22 738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 739 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
740
741 /* Close the filehandle. Could be from -P preprocessor,
742 * STDIN, or a regular file. If we were reading code from
743 * STDIN (because the commandline held no -e or filename)
744 * then we don't close it, we reset it so the code can
745 * read from STDIN too.
746 */
747
3280af22
NIS
748 if (PL_preprocess && !PL_in_eval)
749 (void)PerlProc_pclose(PL_rsfp);
750 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
751 PerlIO_clearerr(PL_rsfp);
8990e307 752 else
3280af22
NIS
753 (void)PerlIO_close(PL_rsfp);
754 PL_rsfp = Nullfp;
463ee0b2
LW
755 return s;
756 }
ffb4593c
NT
757
758 /* not at end of file, so we only read another line */
09bef843
SB
759 /* make corresponding updates to old pointers, for yyerror() */
760 oldprevlen = PL_oldbufptr - PL_bufend;
761 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
762 if (PL_last_uni)
763 oldunilen = PL_last_uni - PL_bufend;
764 if (PL_last_lop)
765 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
766 PL_linestart = PL_bufptr = s + prevlen;
767 PL_bufend = s + SvCUR(PL_linestr);
768 s = PL_bufptr;
09bef843
SB
769 PL_oldbufptr = s + oldprevlen;
770 PL_oldoldbufptr = s + oldoldprevlen;
771 if (PL_last_uni)
772 PL_last_uni = s + oldunilen;
773 if (PL_last_lop)
774 PL_last_lop = s + oldloplen;
a0d0e21e 775 incline(s);
ffb4593c
NT
776
777 /* debugger active and we're not compiling the debugger code,
778 * so store the line into the debugger's array of lines
779 */
3280af22 780 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 781 SV * const sv = NEWSV(85,0);
8990e307
LW
782
783 sv_upgrade(sv, SVt_PVMG);
3280af22 784 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 785 (void)SvIOK_on(sv);
45977657 786 SvIV_set(sv, 0);
57843af0 787 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 788 }
463ee0b2 789 }
a687059c 790}
378cc40b 791
ffb4593c
NT
792/*
793 * S_check_uni
794 * Check the unary operators to ensure there's no ambiguity in how they're
795 * used. An ambiguous piece of code would be:
796 * rand + 5
797 * This doesn't mean rand() + 5. Because rand() is a unary operator,
798 * the +5 is its argument.
799 */
800
76e3520e 801STATIC void
cea2e8a9 802S_check_uni(pTHX)
ba106d47 803{
2f3197b3 804 char *s;
a0d0e21e 805 char *t;
2f3197b3 806
3280af22 807 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 808 return;
3280af22
NIS
809 while (isSPACE(*PL_last_uni))
810 PL_last_uni++;
7e2040f0 811 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 812 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 813 return;
0453d815 814 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 815 const char ch = *s;
0453d815 816 *s = '\0';
9014280d 817 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 818 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
819 PL_last_uni);
820 *s = ch;
821 }
2f3197b3
LW
822}
823
ffb4593c
NT
824/*
825 * LOP : macro to build a list operator. Its behaviour has been replaced
826 * with a subroutine, S_lop() for which LOP is just another name.
827 */
828
a0d0e21e
LW
829#define LOP(f,x) return lop(f,x,s)
830
ffb4593c
NT
831/*
832 * S_lop
833 * Build a list operator (or something that might be one). The rules:
834 * - if we have a next token, then it's a list operator [why?]
835 * - if the next thing is an opening paren, then it's a function
836 * - else it's a list operator
837 */
838
76e3520e 839STATIC I32
a0be28da 840S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 841{
79072805 842 yylval.ival = f;
35c8bce7 843 CLINE;
3280af22
NIS
844 PL_expect = x;
845 PL_bufptr = s;
846 PL_last_lop = PL_oldbufptr;
eb160463 847 PL_last_lop_op = (OPCODE)f;
3280af22 848 if (PL_nexttoke)
bbf60fe6 849 return REPORT(LSTOP);
79072805 850 if (*s == '(')
bbf60fe6 851 return REPORT(FUNC);
79072805
LW
852 s = skipspace(s);
853 if (*s == '(')
bbf60fe6 854 return REPORT(FUNC);
79072805 855 else
bbf60fe6 856 return REPORT(LSTOP);
79072805
LW
857}
858
ffb4593c
NT
859/*
860 * S_force_next
9cbb5ea2 861 * When the lexer realizes it knows the next token (for instance,
ffb4593c 862 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
863 * to know what token to return the next time the lexer is called. Caller
864 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
865 * handles the token correctly.
ffb4593c
NT
866 */
867
4e553d73 868STATIC void
cea2e8a9 869S_force_next(pTHX_ I32 type)
79072805 870{
3280af22
NIS
871 PL_nexttype[PL_nexttoke] = type;
872 PL_nexttoke++;
873 if (PL_lex_state != LEX_KNOWNEXT) {
874 PL_lex_defer = PL_lex_state;
875 PL_lex_expect = PL_expect;
876 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
877 }
878}
879
d0a148a6
NC
880STATIC SV *
881S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
882{
9d4ba2ae 883 SV * const sv = newSVpvn(start,len);
bfed75c6 884 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
885 SvUTF8_on(sv);
886 return sv;
887}
888
ffb4593c
NT
889/*
890 * S_force_word
891 * When the lexer knows the next thing is a word (for instance, it has
892 * just seen -> and it knows that the next char is a word char, then
893 * it calls S_force_word to stick the next word into the PL_next lookahead.
894 *
895 * Arguments:
b1b65b59 896 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
897 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
898 * int check_keyword : if true, Perl checks to make sure the word isn't
899 * a keyword (do this if the word is a label, e.g. goto FOO)
900 * int allow_pack : if true, : characters will also be allowed (require,
901 * use, etc. do this)
9cbb5ea2 902 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
903 */
904
76e3520e 905STATIC char *
cea2e8a9 906S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 907{
463ee0b2
LW
908 register char *s;
909 STRLEN len;
4e553d73 910
463ee0b2
LW
911 start = skipspace(start);
912 s = start;
7e2040f0 913 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 914 (allow_pack && *s == ':') ||
15f0808c 915 (allow_initial_tick && *s == '\'') )
a0d0e21e 916 {
3280af22
NIS
917 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
918 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
919 return start;
920 if (token == METHOD) {
921 s = skipspace(s);
922 if (*s == '(')
3280af22 923 PL_expect = XTERM;
463ee0b2 924 else {
3280af22 925 PL_expect = XOPERATOR;
463ee0b2 926 }
79072805 927 }
d0a148a6
NC
928 PL_nextval[PL_nexttoke].opval
929 = (OP*)newSVOP(OP_CONST,0,
930 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 931 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
932 force_next(token);
933 }
934 return s;
935}
936
ffb4593c
NT
937/*
938 * S_force_ident
9cbb5ea2 939 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
940 * text only contains the "foo" portion. The first argument is a pointer
941 * to the "foo", and the second argument is the type symbol to prefix.
942 * Forces the next token to be a "WORD".
9cbb5ea2 943 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
944 */
945
76e3520e 946STATIC void
bfed75c6 947S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
948{
949 if (s && *s) {
bfed75c6 950 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 951 PL_nextval[PL_nexttoke].opval = o;
79072805 952 force_next(WORD);
748a9306 953 if (kind) {
11343788 954 o->op_private = OPpCONST_ENTERED;
55497cff 955 /* XXX see note in pp_entereval() for why we forgo typo
956 warnings if the symbol must be introduced in an eval.
957 GSAR 96-10-12 */
3280af22 958 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
959 kind == '$' ? SVt_PV :
960 kind == '@' ? SVt_PVAV :
961 kind == '%' ? SVt_PVHV :
962 SVt_PVGV
963 );
748a9306 964 }
79072805
LW
965 }
966}
967
1571675a
GS
968NV
969Perl_str_to_version(pTHX_ SV *sv)
970{
971 NV retval = 0.0;
972 NV nshift = 1.0;
973 STRLEN len;
cfd0369c 974 const char *start = SvPV_const(sv,len);
9d4ba2ae 975 const char * const end = start + len;
504618e9 976 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 977 while (start < end) {
ba210ebe 978 STRLEN skip;
1571675a
GS
979 UV n;
980 if (utf)
9041c2e3 981 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
982 else {
983 n = *(U8*)start;
984 skip = 1;
985 }
986 retval += ((NV)n)/nshift;
987 start += skip;
988 nshift *= 1000;
989 }
990 return retval;
991}
992
4e553d73 993/*
ffb4593c
NT
994 * S_force_version
995 * Forces the next token to be a version number.
e759cc13
RGS
996 * If the next token appears to be an invalid version number, (e.g. "v2b"),
997 * and if "guessing" is TRUE, then no new token is created (and the caller
998 * must use an alternative parsing method).
ffb4593c
NT
999 */
1000
76e3520e 1001STATIC char *
e759cc13 1002S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1003{
1004 OP *version = Nullop;
44dcb63b 1005 char *d;
89bfa8cd 1006
1007 s = skipspace(s);
1008
44dcb63b 1009 d = s;
dd629d5b 1010 if (*d == 'v')
44dcb63b 1011 d++;
44dcb63b 1012 if (isDIGIT(*d)) {
e759cc13
RGS
1013 while (isDIGIT(*d) || *d == '_' || *d == '.')
1014 d++;
9f3d182e 1015 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1016 SV *ver;
b73d6f50 1017 s = scan_num(s, &yylval);
89bfa8cd 1018 version = yylval.opval;
dd629d5b
GS
1019 ver = cSVOPx(version)->op_sv;
1020 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1021 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1022 SvNV_set(ver, str_to_version(ver));
1571675a 1023 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1024 }
89bfa8cd 1025 }
e759cc13
RGS
1026 else if (guessing)
1027 return s;
89bfa8cd 1028 }
1029
1030 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1031 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1032 force_next(WORD);
89bfa8cd 1033
e759cc13 1034 return s;
89bfa8cd 1035}
1036
ffb4593c
NT
1037/*
1038 * S_tokeq
1039 * Tokenize a quoted string passed in as an SV. It finds the next
1040 * chunk, up to end of string or a backslash. It may make a new
1041 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1042 * turns \\ into \.
1043 */
1044
76e3520e 1045STATIC SV *
cea2e8a9 1046S_tokeq(pTHX_ SV *sv)
79072805
LW
1047{
1048 register char *s;
1049 register char *send;
1050 register char *d;
b3ac6de7
IZ
1051 STRLEN len = 0;
1052 SV *pv = sv;
79072805
LW
1053
1054 if (!SvLEN(sv))
b3ac6de7 1055 goto finish;
79072805 1056
a0d0e21e 1057 s = SvPV_force(sv, len);
21a311ee 1058 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1059 goto finish;
463ee0b2 1060 send = s + len;
79072805
LW
1061 while (s < send && *s != '\\')
1062 s++;
1063 if (s == send)
b3ac6de7 1064 goto finish;
79072805 1065 d = s;
be4731d2 1066 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1067 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1068 if (SvUTF8(sv))
1069 SvUTF8_on(pv);
1070 }
79072805
LW
1071 while (s < send) {
1072 if (*s == '\\') {
a0d0e21e 1073 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1074 s++; /* all that, just for this */
1075 }
1076 *d++ = *s++;
1077 }
1078 *d = '\0';
95a20fc0 1079 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1080 finish:
3280af22 1081 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1082 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1083 return sv;
1084}
1085
ffb4593c
NT
1086/*
1087 * Now come three functions related to double-quote context,
1088 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1089 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1090 * interact with PL_lex_state, and create fake ( ... ) argument lists
1091 * to handle functions and concatenation.
1092 * They assume that whoever calls them will be setting up a fake
1093 * join call, because each subthing puts a ',' after it. This lets
1094 * "lower \luPpEr"
1095 * become
1096 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1097 *
1098 * (I'm not sure whether the spurious commas at the end of lcfirst's
1099 * arguments and join's arguments are created or not).
1100 */
1101
1102/*
1103 * S_sublex_start
1104 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1105 *
1106 * Pattern matching will set PL_lex_op to the pattern-matching op to
1107 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1108 *
1109 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1110 *
1111 * Everything else becomes a FUNC.
1112 *
1113 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1114 * had an OP_CONST or OP_READLINE). This just sets us up for a
1115 * call to S_sublex_push().
1116 */
1117
76e3520e 1118STATIC I32
cea2e8a9 1119S_sublex_start(pTHX)
79072805 1120{
f54cb97a 1121 const register I32 op_type = yylval.ival;
79072805
LW
1122
1123 if (op_type == OP_NULL) {
3280af22
NIS
1124 yylval.opval = PL_lex_op;
1125 PL_lex_op = Nullop;
79072805
LW
1126 return THING;
1127 }
1128 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1129 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1130
1131 if (SvTYPE(sv) == SVt_PVIV) {
1132 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1133 STRLEN len;
cfd0369c 1134 const char *p = SvPV_const(sv, len);
f54cb97a 1135 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1136 if (SvUTF8(sv))
1137 SvUTF8_on(nsv);
b3ac6de7
IZ
1138 SvREFCNT_dec(sv);
1139 sv = nsv;
4e553d73 1140 }
b3ac6de7 1141 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1142 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1143 /* Allow <FH> // "foo" */
1144 if (op_type == OP_READLINE)
1145 PL_expect = XTERMORDORDOR;
79072805
LW
1146 return THING;
1147 }
1148
3280af22
NIS
1149 PL_sublex_info.super_state = PL_lex_state;
1150 PL_sublex_info.sub_inwhat = op_type;
1151 PL_sublex_info.sub_op = PL_lex_op;
1152 PL_lex_state = LEX_INTERPPUSH;
55497cff 1153
3280af22
NIS
1154 PL_expect = XTERM;
1155 if (PL_lex_op) {
1156 yylval.opval = PL_lex_op;
1157 PL_lex_op = Nullop;
55497cff 1158 return PMFUNC;
1159 }
1160 else
1161 return FUNC;
1162}
1163
ffb4593c
NT
1164/*
1165 * S_sublex_push
1166 * Create a new scope to save the lexing state. The scope will be
1167 * ended in S_sublex_done. Returns a '(', starting the function arguments
1168 * to the uc, lc, etc. found before.
1169 * Sets PL_lex_state to LEX_INTERPCONCAT.
1170 */
1171
76e3520e 1172STATIC I32
cea2e8a9 1173S_sublex_push(pTHX)
55497cff 1174{
27da23d5 1175 dVAR;
f46d017c 1176 ENTER;
55497cff 1177
3280af22
NIS
1178 PL_lex_state = PL_sublex_info.super_state;
1179 SAVEI32(PL_lex_dojoin);
1180 SAVEI32(PL_lex_brackets);
3280af22
NIS
1181 SAVEI32(PL_lex_casemods);
1182 SAVEI32(PL_lex_starts);
1183 SAVEI32(PL_lex_state);
7766f137 1184 SAVEVPTR(PL_lex_inpat);
3280af22 1185 SAVEI32(PL_lex_inwhat);
57843af0 1186 SAVECOPLINE(PL_curcop);
3280af22 1187 SAVEPPTR(PL_bufptr);
8452ff4b 1188 SAVEPPTR(PL_bufend);
3280af22
NIS
1189 SAVEPPTR(PL_oldbufptr);
1190 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1191 SAVEPPTR(PL_last_lop);
1192 SAVEPPTR(PL_last_uni);
3280af22
NIS
1193 SAVEPPTR(PL_linestart);
1194 SAVESPTR(PL_linestr);
8edd5f42
RGS
1195 SAVEGENERICPV(PL_lex_brackstack);
1196 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1197
1198 PL_linestr = PL_lex_stuff;
1199 PL_lex_stuff = Nullsv;
1200
9cbb5ea2
GS
1201 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1202 = SvPVX(PL_linestr);
3280af22 1203 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1204 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1205 SAVEFREESV(PL_linestr);
1206
1207 PL_lex_dojoin = FALSE;
1208 PL_lex_brackets = 0;
a02a5408
JC
1209 Newx(PL_lex_brackstack, 120, char);
1210 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1211 PL_lex_casemods = 0;
1212 *PL_lex_casestack = '\0';
1213 PL_lex_starts = 0;
1214 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1215 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1216
1217 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1218 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1219 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1220 else
3280af22 1221 PL_lex_inpat = Nullop;
79072805 1222
55497cff 1223 return '(';
79072805
LW
1224}
1225
ffb4593c
NT
1226/*
1227 * S_sublex_done
1228 * Restores lexer state after a S_sublex_push.
1229 */
1230
76e3520e 1231STATIC I32
cea2e8a9 1232S_sublex_done(pTHX)
79072805 1233{
27da23d5 1234 dVAR;
3280af22 1235 if (!PL_lex_starts++) {
9d4ba2ae 1236 SV * const sv = newSVpvn("",0);
9aa983d2
JH
1237 if (SvUTF8(PL_linestr))
1238 SvUTF8_on(sv);
3280af22 1239 PL_expect = XOPERATOR;
9aa983d2 1240 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1241 return THING;
1242 }
1243
3280af22
NIS
1244 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1245 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1246 return yylex();
79072805
LW
1247 }
1248
ffb4593c 1249 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1250 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1251 PL_linestr = PL_lex_repl;
1252 PL_lex_inpat = 0;
1253 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1254 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1255 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1256 SAVEFREESV(PL_linestr);
1257 PL_lex_dojoin = FALSE;
1258 PL_lex_brackets = 0;
3280af22
NIS
1259 PL_lex_casemods = 0;
1260 *PL_lex_casestack = '\0';
1261 PL_lex_starts = 0;
25da4f38 1262 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1263 PL_lex_state = LEX_INTERPNORMAL;
1264 PL_lex_starts++;
e9fa98b2
HS
1265 /* we don't clear PL_lex_repl here, so that we can check later
1266 whether this is an evalled subst; that means we rely on the
1267 logic to ensure sublex_done() is called again only via the
1268 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1269 }
e9fa98b2 1270 else {
3280af22 1271 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1272 PL_lex_repl = Nullsv;
1273 }
79072805 1274 return ',';
ffed7fef
LW
1275 }
1276 else {
f46d017c 1277 LEAVE;
3280af22
NIS
1278 PL_bufend = SvPVX(PL_linestr);
1279 PL_bufend += SvCUR(PL_linestr);
1280 PL_expect = XOPERATOR;
09bef843 1281 PL_sublex_info.sub_inwhat = 0;
79072805 1282 return ')';
ffed7fef
LW
1283 }
1284}
1285
02aa26ce
NT
1286/*
1287 scan_const
1288
1289 Extracts a pattern, double-quoted string, or transliteration. This
1290 is terrifying code.
1291
3280af22
NIS
1292 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1293 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1294 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1295
9b599b2a
GS
1296 Returns a pointer to the character scanned up to. Iff this is
1297 advanced from the start pointer supplied (ie if anything was
1298 successfully parsed), will leave an OP for the substring scanned
1299 in yylval. Caller must intuit reason for not parsing further
1300 by looking at the next characters herself.
1301
02aa26ce
NT
1302 In patterns:
1303 backslashes:
1304 double-quoted style: \r and \n
1305 regexp special ones: \D \s
1306 constants: \x3
1307 backrefs: \1 (deprecated in substitution replacements)
1308 case and quoting: \U \Q \E
1309 stops on @ and $, but not for $ as tail anchor
1310
1311 In transliterations:
1312 characters are VERY literal, except for - not at the start or end
1313 of the string, which indicates a range. scan_const expands the
1314 range to the full set of intermediate characters.
1315
1316 In double-quoted strings:
1317 backslashes:
1318 double-quoted style: \r and \n
1319 constants: \x3
1320 backrefs: \1 (deprecated)
1321 case and quoting: \U \Q \E
1322 stops on @ and $
1323
1324 scan_const does *not* construct ops to handle interpolated strings.
1325 It stops processing as soon as it finds an embedded $ or @ variable
1326 and leaves it to the caller to work out what's going on.
1327
da6eedaa 1328 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1329
1330 $ in pattern could be $foo or could be tail anchor. Assumption:
1331 it's a tail anchor if $ is the last thing in the string, or if it's
1332 followed by one of ")| \n\t"
1333
1334 \1 (backreferences) are turned into $1
1335
1336 The structure of the code is
1337 while (there's a character to process) {
1338 handle transliteration ranges
1339 skip regexp comments
1340 skip # initiated comments in //x patterns
1341 check for embedded @foo
1342 check for embedded scalars
1343 if (backslash) {
1344 leave intact backslashes from leave (below)
1345 deprecate \1 in strings and sub replacements
1346 handle string-changing backslashes \l \U \Q \E, etc.
1347 switch (what was escaped) {
1348 handle - in a transliteration (becomes a literal -)
1349 handle \132 octal characters
1350 handle 0x15 hex characters
1351 handle \cV (control V)
1352 handle printf backslashes (\f, \r, \n, etc)
1353 } (end switch)
1354 } (end if backslash)
1355 } (end while character to read)
4e553d73 1356
02aa26ce
NT
1357*/
1358
76e3520e 1359STATIC char *
cea2e8a9 1360S_scan_const(pTHX_ char *start)
79072805 1361{
3280af22 1362 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1363 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1364 register char *s = start; /* start of the constant */
1365 register char *d = SvPVX(sv); /* destination for copies */
1366 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1367 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1368 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1369 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1370 UV uv;
4c3a8340
TS
1371#ifdef EBCDIC
1372 UV literal_endpoint = 0;
1373#endif
012bcf8d 1374
dff6d3cd 1375 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1376 PL_lex_inpat
b6d5fef8 1377 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1378 : "";
79072805 1379
2b9d42f0
NIS
1380 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1381 /* If we are doing a trans and we know we want UTF8 set expectation */
1382 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1383 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1384 }
1385
1386
79072805 1387 while (s < send || dorange) {
02aa26ce 1388 /* get transliterations out of the way (they're most literal) */
3280af22 1389 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1390 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1391 if (dorange) {
1ba5c669
JH
1392 I32 i; /* current expanded character */
1393 I32 min; /* first character in range */
1394 I32 max; /* last character in range */
02aa26ce 1395
2b9d42f0 1396 if (has_utf8) {
9d4ba2ae 1397 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1398 char *e = d++;
1399 while (e-- > c)
1400 *(e + 1) = *e;
25716404 1401 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1402 /* mark the range as done, and continue */
1403 dorange = FALSE;
1404 didrange = TRUE;
1405 continue;
1406 }
2b9d42f0 1407
95a20fc0 1408 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1409 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1410 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1411 d -= 2; /* eat the first char and the - */
1412
8ada0baa
JH
1413 min = (U8)*d; /* first char in range */
1414 max = (U8)d[1]; /* last char in range */
1415
c2e66d9e 1416 if (min > max) {
01ec43d0 1417 Perl_croak(aTHX_
d1573ac7 1418 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1419 (char)min, (char)max);
c2e66d9e
GS
1420 }
1421
c7f1f016 1422#ifdef EBCDIC
4c3a8340
TS
1423 if (literal_endpoint == 2 &&
1424 ((isLOWER(min) && isLOWER(max)) ||
1425 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1426 if (isLOWER(min)) {
1427 for (i = min; i <= max; i++)
1428 if (isLOWER(i))
db42d148 1429 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1430 } else {
1431 for (i = min; i <= max; i++)
1432 if (isUPPER(i))
db42d148 1433 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1434 }
1435 }
1436 else
1437#endif
1438 for (i = min; i <= max; i++)
eb160463 1439 *d++ = (char)i;
02aa26ce
NT
1440
1441 /* mark the range as done, and continue */
79072805 1442 dorange = FALSE;
01ec43d0 1443 didrange = TRUE;
4c3a8340
TS
1444#ifdef EBCDIC
1445 literal_endpoint = 0;
1446#endif
79072805 1447 continue;
4e553d73 1448 }
02aa26ce
NT
1449
1450 /* range begins (ignore - as first or last char) */
79072805 1451 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1452 if (didrange) {
1fafa243 1453 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1454 }
2b9d42f0 1455 if (has_utf8) {
25716404 1456 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1457 s++;
1458 continue;
1459 }
79072805
LW
1460 dorange = TRUE;
1461 s++;
01ec43d0
GS
1462 }
1463 else {
1464 didrange = FALSE;
4c3a8340
TS
1465#ifdef EBCDIC
1466 literal_endpoint = 0;
1467#endif
01ec43d0 1468 }
79072805 1469 }
02aa26ce
NT
1470
1471 /* if we get here, we're not doing a transliteration */
1472
0f5d15d6
IZ
1473 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1474 except for the last char, which will be done separately. */
3280af22 1475 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1476 if (s[2] == '#') {
e994fd66 1477 while (s+1 < send && *s != ')')
db42d148 1478 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1479 }
1480 else if (s[2] == '{' /* This should match regcomp.c */
1481 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1482 {
cc6b7395 1483 I32 count = 1;
0f5d15d6 1484 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1485 char c;
1486
d9f97599
GS
1487 while (count && (c = *regparse)) {
1488 if (c == '\\' && regparse[1])
1489 regparse++;
4e553d73 1490 else if (c == '{')
cc6b7395 1491 count++;
4e553d73 1492 else if (c == '}')
cc6b7395 1493 count--;
d9f97599 1494 regparse++;
cc6b7395 1495 }
e994fd66 1496 if (*regparse != ')')
5bdf89e7 1497 regparse--; /* Leave one char for continuation. */
0f5d15d6 1498 while (s < regparse)
db42d148 1499 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1500 }
748a9306 1501 }
02aa26ce
NT
1502
1503 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1504 else if (*s == '#' && PL_lex_inpat &&
1505 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1506 while (s+1 < send && *s != '\n')
db42d148 1507 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1508 }
02aa26ce 1509
5d1d4326 1510 /* check for embedded arrays
da6eedaa 1511 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1512 */
7e2040f0 1513 else if (*s == '@' && s[1]
5d1d4326 1514 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1515 break;
02aa26ce
NT
1516
1517 /* check for embedded scalars. only stop if we're sure it's a
1518 variable.
1519 */
79072805 1520 else if (*s == '$') {
3280af22 1521 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1522 break;
6002328a 1523 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1524 break; /* in regexp, $ might be tail anchor */
1525 }
02aa26ce 1526
2b9d42f0
NIS
1527 /* End of else if chain - OP_TRANS rejoin rest */
1528
02aa26ce 1529 /* backslashes */
79072805
LW
1530 if (*s == '\\' && s+1 < send) {
1531 s++;
02aa26ce
NT
1532
1533 /* some backslashes we leave behind */
c9f97d15 1534 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1535 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1536 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1537 continue;
1538 }
02aa26ce
NT
1539
1540 /* deprecate \1 in strings and substitution replacements */
3280af22 1541 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1542 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1543 {
599cee73 1544 if (ckWARN(WARN_SYNTAX))
9014280d 1545 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1546 *--s = '$';
1547 break;
1548 }
02aa26ce
NT
1549
1550 /* string-change backslash escapes */
3280af22 1551 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1552 --s;
1553 break;
1554 }
02aa26ce
NT
1555
1556 /* if we get here, it's either a quoted -, or a digit */
79072805 1557 switch (*s) {
02aa26ce
NT
1558
1559 /* quoted - in transliterations */
79072805 1560 case '-':
3280af22 1561 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1562 *d++ = *s++;
1563 continue;
1564 }
1565 /* FALL THROUGH */
1566 default:
11b8faa4 1567 {
041457d9
DM
1568 if (isALNUM(*s) &&
1569 *s != '_' &&
1570 ckWARN(WARN_MISC))
9014280d 1571 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1572 "Unrecognized escape \\%c passed through",
1573 *s);
1574 /* default action is to copy the quoted character */
f9a63242 1575 goto default_action;
11b8faa4 1576 }
02aa26ce
NT
1577
1578 /* \132 indicates an octal constant */
79072805
LW
1579 case '0': case '1': case '2': case '3':
1580 case '4': case '5': case '6': case '7':
ba210ebe 1581 {
53305cf1
NC
1582 I32 flags = 0;
1583 STRLEN len = 3;
1584 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1585 s += len;
1586 }
012bcf8d 1587 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1588
1589 /* \x24 indicates a hex constant */
79072805 1590 case 'x':
a0ed51b3
LW
1591 ++s;
1592 if (*s == '{') {
9d4ba2ae 1593 char* const e = strchr(s, '}');
a4c04bdc
NC
1594 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1595 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1596 STRLEN len;
355860ce 1597
53305cf1 1598 ++s;
adaeee49 1599 if (!e) {
a0ed51b3 1600 yyerror("Missing right brace on \\x{}");
355860ce 1601 continue;
ba210ebe 1602 }
53305cf1
NC
1603 len = e - s;
1604 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1605 s = e + 1;
a0ed51b3
LW
1606 }
1607 else {
ba210ebe 1608 {
53305cf1 1609 STRLEN len = 2;
a4c04bdc 1610 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1611 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1612 s += len;
1613 }
012bcf8d
GS
1614 }
1615
1616 NUM_ESCAPE_INSERT:
1617 /* Insert oct or hex escaped character.
301d3d20 1618 * There will always enough room in sv since such
db42d148 1619 * escapes will be longer than any UTF-8 sequence
301d3d20 1620 * they can end up as. */
ba7cea30 1621
c7f1f016
NIS
1622 /* We need to map to chars to ASCII before doing the tests
1623 to cover EBCDIC
1624 */
c4d5f83a 1625 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1626 if (!has_utf8 && uv > 255) {
301d3d20
JH
1627 /* Might need to recode whatever we have
1628 * accumulated so far if it contains any
1629 * hibit chars.
1630 *
1631 * (Can't we keep track of that and avoid
1632 * this rescan? --jhi)
012bcf8d 1633 */
c7f1f016 1634 int hicount = 0;
63cd0674
NIS
1635 U8 *c;
1636 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1637 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1638 hicount++;
db42d148 1639 }
012bcf8d 1640 }
63cd0674 1641 if (hicount) {
9d4ba2ae 1642 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1643 U8 *src, *dst;
1644 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1645 src = (U8 *)d - 1;
1646 dst = src+hicount;
1647 d += hicount;
cfd0369c 1648 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1649 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1650 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1651 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1652 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1653 }
1654 else {
63cd0674 1655 *dst-- = *src;
012bcf8d 1656 }
c7f1f016 1657 src--;
012bcf8d
GS
1658 }
1659 }
1660 }
1661
9aa983d2 1662 if (has_utf8 || uv > 255) {
9041c2e3 1663 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1664 has_utf8 = TRUE;
f9a63242
JH
1665 if (PL_lex_inwhat == OP_TRANS &&
1666 PL_sublex_info.sub_op) {
1667 PL_sublex_info.sub_op->op_private |=
1668 (PL_lex_repl ? OPpTRANS_FROM_UTF
1669 : OPpTRANS_TO_UTF);
f9a63242 1670 }
012bcf8d 1671 }
a0ed51b3 1672 else {
012bcf8d 1673 *d++ = (char)uv;
a0ed51b3 1674 }
012bcf8d
GS
1675 }
1676 else {
c4d5f83a 1677 *d++ = (char) uv;
a0ed51b3 1678 }
79072805 1679 continue;
02aa26ce 1680
b239daa5 1681 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1682 case 'N':
55eda711 1683 ++s;
423cee85
JH
1684 if (*s == '{') {
1685 char* e = strchr(s, '}');
155aba94 1686 SV *res;
423cee85 1687 STRLEN len;
cfd0369c 1688 const char *str;
4e553d73 1689
423cee85 1690 if (!e) {
5777a3f7 1691 yyerror("Missing right brace on \\N{}");
423cee85
JH
1692 e = s - 1;
1693 goto cont_scan;
1694 }
dbc0d4f2
JH
1695 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1696 /* \N{U+...} */
1697 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1698 PERL_SCAN_DISALLOW_PREFIX;
1699 s += 3;
1700 len = e - s;
1701 uv = grok_hex(s, &len, &flags, NULL);
1702 s = e + 1;
1703 goto NUM_ESCAPE_INSERT;
1704 }
55eda711
JH
1705 res = newSVpvn(s + 1, e - s - 1);
1706 res = new_constant( Nullch, 0, "charnames",
1707 res, Nullsv, "\\N{...}" );
f9a63242
JH
1708 if (has_utf8)
1709 sv_utf8_upgrade(res);
cfd0369c 1710 str = SvPV_const(res,len);
1c47067b
JH
1711#ifdef EBCDIC_NEVER_MIND
1712 /* charnames uses pack U and that has been
1713 * recently changed to do the below uni->native
1714 * mapping, so this would be redundant (and wrong,
1715 * the code point would be doubly converted).
1716 * But leave this in just in case the pack U change
1717 * gets revoked, but the semantics is still
1718 * desireable for charnames. --jhi */
cddc7ef4 1719 {
cfd0369c 1720 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1721
1722 if (uv < 0x100) {
89ebb4a3 1723 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1724
1725 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1726 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1727 str = SvPV_const(res, len);
cddc7ef4
JH
1728 }
1729 }
1730#endif
89491803 1731 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1732 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1733 SvCUR_set(sv, d - ostart);
1734 SvPOK_on(sv);
e4f3eed8 1735 *d = '\0';
f08d6ad9 1736 sv_utf8_upgrade(sv);
d2f449dd 1737 /* this just broke our allocation above... */
eb160463 1738 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1739 d = SvPVX(sv) + SvCUR(sv);
89491803 1740 has_utf8 = TRUE;
f08d6ad9 1741 }
eb160463 1742 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1743 const char * const odest = SvPVX_const(sv);
423cee85 1744
8973db79 1745 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1746 d = SvPVX(sv) + (d - odest);
1747 }
1748 Copy(str, d, len, char);
1749 d += len;
1750 SvREFCNT_dec(res);
1751 cont_scan:
1752 s = e + 1;
1753 }
1754 else
5777a3f7 1755 yyerror("Missing braces on \\N{}");
423cee85
JH
1756 continue;
1757
02aa26ce 1758 /* \c is a control character */
79072805
LW
1759 case 'c':
1760 s++;
961ce445 1761 if (s < send) {
ba210ebe 1762 U8 c = *s++;
c7f1f016
NIS
1763#ifdef EBCDIC
1764 if (isLOWER(c))
1765 c = toUPPER(c);
1766#endif
db42d148 1767 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1768 }
961ce445
RGS
1769 else {
1770 yyerror("Missing control char name in \\c");
1771 }
79072805 1772 continue;
02aa26ce
NT
1773
1774 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1775 case 'b':
db42d148 1776 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1777 break;
1778 case 'n':
db42d148 1779 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1780 break;
1781 case 'r':
db42d148 1782 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1783 break;
1784 case 'f':
db42d148 1785 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1786 break;
1787 case 't':
db42d148 1788 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1789 break;
34a3fe2a 1790 case 'e':
db42d148 1791 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1792 break;
1793 case 'a':
db42d148 1794 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1795 break;
02aa26ce
NT
1796 } /* end switch */
1797
79072805
LW
1798 s++;
1799 continue;
02aa26ce 1800 } /* end if (backslash) */
4c3a8340
TS
1801#ifdef EBCDIC
1802 else
1803 literal_endpoint++;
1804#endif
02aa26ce 1805
f9a63242 1806 default_action:
2b9d42f0
NIS
1807 /* If we started with encoded form, or already know we want it
1808 and then encode the next character */
1809 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1810 STRLEN len = 1;
9d4ba2ae
AL
1811 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1812 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1813 s += len;
1814 if (need > len) {
1815 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1816 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1817 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1818 }
1819 d = (char*)uvchr_to_utf8((U8*)d, uv);
1820 has_utf8 = TRUE;
1821 }
1822 else {
1823 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1824 }
02aa26ce
NT
1825 } /* while loop to process each character */
1826
1827 /* terminate the string and set up the sv */
79072805 1828 *d = '\0';
95a20fc0 1829 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1830 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1831 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1832
79072805 1833 SvPOK_on(sv);
9f4817db 1834 if (PL_encoding && !has_utf8) {
d0063567
DK
1835 sv_recode_to_utf8(sv, PL_encoding);
1836 if (SvUTF8(sv))
1837 has_utf8 = TRUE;
9f4817db 1838 }
2b9d42f0 1839 if (has_utf8) {
7e2040f0 1840 SvUTF8_on(sv);
2b9d42f0 1841 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1842 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1843 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1844 }
1845 }
79072805 1846
02aa26ce 1847 /* shrink the sv if we allocated more than we used */
79072805 1848 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1849 SvPV_shrink_to_cur(sv);
79072805 1850 }
02aa26ce 1851
9b599b2a 1852 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1853 if (s > PL_bufptr) {
1854 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1855 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1856 sv, Nullsv,
4e553d73 1857 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1858 ? "tr"
3280af22 1859 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1860 ? "s"
1861 : "qq")));
79072805 1862 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1863 } else
8990e307 1864 SvREFCNT_dec(sv);
79072805
LW
1865 return s;
1866}
1867
ffb4593c
NT
1868/* S_intuit_more
1869 * Returns TRUE if there's more to the expression (e.g., a subscript),
1870 * FALSE otherwise.
ffb4593c
NT
1871 *
1872 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1873 *
1874 * ->[ and ->{ return TRUE
1875 * { and [ outside a pattern are always subscripts, so return TRUE
1876 * if we're outside a pattern and it's not { or [, then return FALSE
1877 * if we're in a pattern and the first char is a {
1878 * {4,5} (any digits around the comma) returns FALSE
1879 * if we're in a pattern and the first char is a [
1880 * [] returns FALSE
1881 * [SOMETHING] has a funky algorithm to decide whether it's a
1882 * character class or not. It has to deal with things like
1883 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1884 * anything else returns TRUE
1885 */
1886
9cbb5ea2
GS
1887/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1888
76e3520e 1889STATIC int
cea2e8a9 1890S_intuit_more(pTHX_ register char *s)
79072805 1891{
3280af22 1892 if (PL_lex_brackets)
79072805
LW
1893 return TRUE;
1894 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1895 return TRUE;
1896 if (*s != '{' && *s != '[')
1897 return FALSE;
3280af22 1898 if (!PL_lex_inpat)
79072805
LW
1899 return TRUE;
1900
1901 /* In a pattern, so maybe we have {n,m}. */
1902 if (*s == '{') {
1903 s++;
1904 if (!isDIGIT(*s))
1905 return TRUE;
1906 while (isDIGIT(*s))
1907 s++;
1908 if (*s == ',')
1909 s++;
1910 while (isDIGIT(*s))
1911 s++;
1912 if (*s == '}')
1913 return FALSE;
1914 return TRUE;
1915
1916 }
1917
1918 /* On the other hand, maybe we have a character class */
1919
1920 s++;
1921 if (*s == ']' || *s == '^')
1922 return FALSE;
1923 else {
ffb4593c 1924 /* this is terrifying, and it works */
79072805
LW
1925 int weight = 2; /* let's weigh the evidence */
1926 char seen[256];
f27ffc4a 1927 unsigned char un_char = 255, last_un_char;
9d4ba2ae 1928 const char * const send = strchr(s,']');
3280af22 1929 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1930
1931 if (!send) /* has to be an expression */
1932 return TRUE;
1933
1934 Zero(seen,256,char);
1935 if (*s == '$')
1936 weight -= 3;
1937 else if (isDIGIT(*s)) {
1938 if (s[1] != ']') {
1939 if (isDIGIT(s[1]) && s[2] == ']')
1940 weight -= 10;
1941 }
1942 else
1943 weight -= 100;
1944 }
1945 for (; s < send; s++) {
1946 last_un_char = un_char;
1947 un_char = (unsigned char)*s;
1948 switch (*s) {
1949 case '@':
1950 case '&':
1951 case '$':
1952 weight -= seen[un_char] * 10;
7e2040f0 1953 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1954 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1955 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1956 weight -= 100;
1957 else
1958 weight -= 10;
1959 }
1960 else if (*s == '$' && s[1] &&
93a17b20
LW
1961 strchr("[#!%*<>()-=",s[1])) {
1962 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1963 weight -= 10;
1964 else
1965 weight -= 1;
1966 }
1967 break;
1968 case '\\':
1969 un_char = 254;
1970 if (s[1]) {
93a17b20 1971 if (strchr("wds]",s[1]))
79072805
LW
1972 weight += 100;
1973 else if (seen['\''] || seen['"'])
1974 weight += 1;
93a17b20 1975 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1976 weight += 40;
1977 else if (isDIGIT(s[1])) {
1978 weight += 40;
1979 while (s[1] && isDIGIT(s[1]))
1980 s++;
1981 }
1982 }
1983 else
1984 weight += 100;
1985 break;
1986 case '-':
1987 if (s[1] == '\\')
1988 weight += 50;
93a17b20 1989 if (strchr("aA01! ",last_un_char))
79072805 1990 weight += 30;
93a17b20 1991 if (strchr("zZ79~",s[1]))
79072805 1992 weight += 30;
f27ffc4a
GS
1993 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1994 weight -= 5; /* cope with negative subscript */
79072805
LW
1995 break;
1996 default:
3792a11b
NC
1997 if (!isALNUM(last_un_char)
1998 && !(last_un_char == '$' || last_un_char == '@'
1999 || last_un_char == '&')
2000 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2001 char *d = tmpbuf;
2002 while (isALPHA(*s))
2003 *d++ = *s++;
2004 *d = '\0';
2005 if (keyword(tmpbuf, d - tmpbuf))
2006 weight -= 150;
2007 }
2008 if (un_char == last_un_char + 1)
2009 weight += 5;
2010 weight -= seen[un_char];
2011 break;
2012 }
2013 seen[un_char]++;
2014 }
2015 if (weight >= 0) /* probably a character class */
2016 return FALSE;
2017 }
2018
2019 return TRUE;
2020}
ffed7fef 2021
ffb4593c
NT
2022/*
2023 * S_intuit_method
2024 *
2025 * Does all the checking to disambiguate
2026 * foo bar
2027 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2028 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2029 *
2030 * First argument is the stuff after the first token, e.g. "bar".
2031 *
2032 * Not a method if bar is a filehandle.
2033 * Not a method if foo is a subroutine prototyped to take a filehandle.
2034 * Not a method if it's really "Foo $bar"
2035 * Method if it's "foo $bar"
2036 * Not a method if it's really "print foo $bar"
2037 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2038 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2039 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2040 * =>
2041 */
2042
76e3520e 2043STATIC int
cea2e8a9 2044S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2045{
2046 char *s = start + (*start == '$');
3280af22 2047 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2048 STRLEN len;
2049 GV* indirgv;
2050
2051 if (gv) {
b6c543e3 2052 CV *cv;
a0d0e21e
LW
2053 if (GvIO(gv))
2054 return 0;
b6c543e3 2055 if ((cv = GvCVu(gv))) {
95a20fc0 2056 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2057 if (proto) {
2058 if (*proto == ';')
2059 proto++;
2060 if (*proto == '*')
2061 return 0;
2062 }
2063 } else
a0d0e21e
LW
2064 gv = 0;
2065 }
8903cb82 2066 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2067 /* start is the beginning of the possible filehandle/object,
2068 * and s is the end of it
2069 * tmpbuf is a copy of it
2070 */
2071
a0d0e21e 2072 if (*start == '$') {
3280af22 2073 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2074 return 0;
2075 s = skipspace(s);
3280af22
NIS
2076 PL_bufptr = start;
2077 PL_expect = XREF;
a0d0e21e
LW
2078 return *s == '(' ? FUNCMETH : METHOD;
2079 }
2080 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2081 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2082 len -= 2;
2083 tmpbuf[len] = '\0';
2084 goto bare_package;
2085 }
2086 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2087 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2088 return 0;
2089 /* filehandle or package name makes it a method */
89bfa8cd 2090 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2091 s = skipspace(s);
3280af22 2092 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2093 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2094 bare_package:
3280af22 2095 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2096 newSVpvn(tmpbuf,len));
3280af22
NIS
2097 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2098 PL_expect = XTERM;
a0d0e21e 2099 force_next(WORD);
3280af22 2100 PL_bufptr = s;
a0d0e21e
LW
2101 return *s == '(' ? FUNCMETH : METHOD;
2102 }
2103 }
2104 return 0;
2105}
2106
ffb4593c
NT
2107/*
2108 * S_incl_perldb
2109 * Return a string of Perl code to load the debugger. If PERL5DB
2110 * is set, it will return the contents of that, otherwise a
2111 * compile-time require of perl5db.pl.
2112 */
2113
bfed75c6 2114STATIC const char*
cea2e8a9 2115S_incl_perldb(pTHX)
a0d0e21e 2116{
3280af22 2117 if (PL_perldb) {
9d4ba2ae 2118 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2119
2120 if (pdb)
2121 return pdb;
93189314 2122 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2123 return "BEGIN { require 'perl5db.pl' }";
2124 }
2125 return "";
2126}
2127
2128
16d20bd9 2129/* Encoded script support. filter_add() effectively inserts a
4e553d73 2130 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2131 * Note that the filter function only applies to the current source file
2132 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2133 *
2134 * The datasv parameter (which may be NULL) can be used to pass
2135 * private data to this instance of the filter. The filter function
2136 * can recover the SV using the FILTER_DATA macro and use it to
2137 * store private buffers and state information.
2138 *
2139 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2140 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2141 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2142 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2143 * private use must be set using malloc'd pointers.
2144 */
16d20bd9
AD
2145
2146SV *
864dbfa3 2147Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2148{
f4c556ac
GS
2149 if (!funcp)
2150 return Nullsv;
2151
3280af22
NIS
2152 if (!PL_rsfp_filters)
2153 PL_rsfp_filters = newAV();
16d20bd9 2154 if (!datasv)
8c52afec 2155 datasv = NEWSV(255,0);
862a34c6 2156 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2157 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2158 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2159 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2160 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2161 av_unshift(PL_rsfp_filters, 1);
2162 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2163 return(datasv);
2164}
4e553d73 2165
16d20bd9
AD
2166
2167/* Delete most recently added instance of this filter function. */
a0d0e21e 2168void
864dbfa3 2169Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2170{
e0c19803 2171 SV *datasv;
24801a4b 2172
33073adb 2173#ifdef DEBUGGING
8141890a 2174 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2175#endif
3280af22 2176 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2177 return;
2178 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2179 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2180 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2181 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2182 IoANY(datasv) = (void *)NULL;
3280af22 2183 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2184
16d20bd9
AD
2185 return;
2186 }
2187 /* we need to search for the correct entry and clear it */
cea2e8a9 2188 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2189}
2190
2191
1de9afcd
RGS
2192/* Invoke the idxth filter function for the current rsfp. */
2193/* maxlen 0 = read one text line */
16d20bd9 2194I32
864dbfa3 2195Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2196{
16d20bd9
AD
2197 filter_t funcp;
2198 SV *datasv = NULL;
e50aee73 2199
3280af22 2200 if (!PL_rsfp_filters)
16d20bd9 2201 return -1;
1de9afcd 2202 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2203 /* Provide a default input filter to make life easy. */
2204 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2205 DEBUG_P(PerlIO_printf(Perl_debug_log,
2206 "filter_read %d: from rsfp\n", idx));
4e553d73 2207 if (maxlen) {
16d20bd9
AD
2208 /* Want a block */
2209 int len ;
f54cb97a 2210 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2211
2212 /* ensure buf_sv is large enough */
eb160463 2213 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2214 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2215 if (PerlIO_error(PL_rsfp))
37120919
AD
2216 return -1; /* error */
2217 else
2218 return 0 ; /* end of file */
2219 }
16d20bd9
AD
2220 SvCUR_set(buf_sv, old_len + len) ;
2221 } else {
2222 /* Want a line */
3280af22
NIS
2223 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2224 if (PerlIO_error(PL_rsfp))
37120919
AD
2225 return -1; /* error */
2226 else
2227 return 0 ; /* end of file */
2228 }
16d20bd9
AD
2229 }
2230 return SvCUR(buf_sv);
2231 }
2232 /* Skip this filter slot if filter has been deleted */
1de9afcd 2233 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2234 DEBUG_P(PerlIO_printf(Perl_debug_log,
2235 "filter_read %d: skipped (filter deleted)\n",
2236 idx));
16d20bd9
AD
2237 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2238 }
2239 /* Get function pointer hidden within datasv */
8141890a 2240 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2241 DEBUG_P(PerlIO_printf(Perl_debug_log,
2242 "filter_read %d: via function %p (%s)\n",
cfd0369c 2243 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2244 /* Call function. The function is expected to */
2245 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2246 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2247 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2248}
2249
76e3520e 2250STATIC char *
cea2e8a9 2251S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2252{
c39cd008 2253#ifdef PERL_CR_FILTER
3280af22 2254 if (!PL_rsfp_filters) {
c39cd008 2255 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2256 }
2257#endif
3280af22 2258 if (PL_rsfp_filters) {
55497cff 2259 if (!append)
2260 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2261 if (FILTER_READ(0, sv, 0) > 0)
2262 return ( SvPVX(sv) ) ;
2263 else
2264 return Nullch ;
2265 }
9d116dd7 2266 else
fd049845 2267 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2268}
2269
01ec43d0 2270STATIC HV *
7fc63493 2271S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2272{
2273 GV *gv;
2274
01ec43d0 2275 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2276 return PL_curstash;
2277
2278 if (len > 2 &&
2279 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2280 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2281 {
2282 return GvHV(gv); /* Foo:: */
def3634b
GS
2283 }
2284
2285 /* use constant CLASS => 'MyClass' */
2286 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2287 SV *sv;
2288 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2289 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2290 }
2291 }
2292
2293 return gv_stashpv(pkgname, FALSE);
2294}
a0d0e21e 2295
468aa647 2296STATIC char *
cc6ed77d 2297S_tokenize_use(pTHX_ int is_use, char *s) {
468aa647
RGS
2298 if (PL_expect != XSTATE)
2299 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2300 is_use ? "use" : "no"));
2301 s = skipspace(s);
2302 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2303 s = force_version(s, TRUE);
2304 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2305 PL_nextval[PL_nexttoke].opval = Nullop;
2306 force_next(WORD);
2307 }
2308 else if (*s == 'v') {
2309 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2310 s = force_version(s, FALSE);
2311 }
2312 }
2313 else {
2314 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2315 s = force_version(s, FALSE);
2316 }
2317 yylval.ival = is_use;
2318 return s;
2319}
748a9306 2320#ifdef DEBUGGING
27da23d5 2321 static const char* const exp_name[] =
09bef843 2322 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2323 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2324 };
748a9306 2325#endif
463ee0b2 2326
02aa26ce
NT
2327/*
2328 yylex
2329
2330 Works out what to call the token just pulled out of the input
2331 stream. The yacc parser takes care of taking the ops we return and
2332 stitching them into a tree.
2333
2334 Returns:
2335 PRIVATEREF
2336
2337 Structure:
2338 if read an identifier
2339 if we're in a my declaration
2340 croak if they tried to say my($foo::bar)
2341 build the ops for a my() declaration
2342 if it's an access to a my() variable
2343 are we in a sort block?
2344 croak if my($a); $a <=> $b
2345 build ops for access to a my() variable
2346 if in a dq string, and they've said @foo and we can't find @foo
2347 croak
2348 build ops for a bareword
2349 if we already built the token before, use it.
2350*/
2351
20141f0e 2352
dba4d153
JH
2353#ifdef __SC__
2354#pragma segment Perl_yylex
2355#endif
dba4d153 2356int
dba4d153 2357Perl_yylex(pTHX)
20141f0e 2358{
3afc138a 2359 register char *s = PL_bufptr;
378cc40b 2360 register char *d;
79072805 2361 register I32 tmp;
463ee0b2 2362 STRLEN len;
161b471a
NIS
2363 GV *gv = Nullgv;
2364 GV **gvp = 0;
aa7440fb 2365 bool bof = FALSE;
1d239bbb 2366 I32 orig_keyword = 0;
a687059c 2367
bbf60fe6
DM
2368 DEBUG_T( {
2369 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2370 lex_state_names[PL_lex_state]);
2371 } );
02aa26ce 2372 /* check if there's an identifier for us to look at */
ba979b31 2373 if (PL_pending_ident)
bbf60fe6 2374 return REPORT(S_pending_ident(aTHX));
bbce6d69 2375
02aa26ce
NT
2376 /* no identifier pending identification */
2377
3280af22 2378 switch (PL_lex_state) {
79072805
LW
2379#ifdef COMMENTARY
2380 case LEX_NORMAL: /* Some compilers will produce faster */
2381 case LEX_INTERPNORMAL: /* code if we comment these out. */
2382 break;
2383#endif
2384
09bef843 2385 /* when we've already built the next token, just pull it out of the queue */
79072805 2386 case LEX_KNOWNEXT:
3280af22
NIS
2387 PL_nexttoke--;
2388 yylval = PL_nextval[PL_nexttoke];
2389 if (!PL_nexttoke) {
2390 PL_lex_state = PL_lex_defer;
2391 PL_expect = PL_lex_expect;
2392 PL_lex_defer = LEX_NORMAL;
463ee0b2 2393 }
607df283 2394 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2395 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2396 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2397
bbf60fe6 2398 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2399
02aa26ce 2400 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2401 when we get here, PL_bufptr is at the \
02aa26ce 2402 */
79072805
LW
2403 case LEX_INTERPCASEMOD:
2404#ifdef DEBUGGING
3280af22 2405 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2406 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2407#endif
02aa26ce 2408 /* handle \E or end of string */
3280af22 2409 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2410 /* if at a \E */
3280af22 2411 if (PL_lex_casemods) {
f54cb97a 2412 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2413 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2414
3792a11b
NC
2415 if (PL_bufptr != PL_bufend
2416 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2417 PL_bufptr += 2;
2418 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2419 }
bbf60fe6 2420 return REPORT(')');
79072805 2421 }
3280af22
NIS
2422 if (PL_bufptr != PL_bufend)
2423 PL_bufptr += 2;
2424 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2425 return yylex();
79072805
LW
2426 }
2427 else {
607df283 2428 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2429 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2430 s = PL_bufptr + 1;
6e909404
JH
2431 if (s[1] == '\\' && s[2] == 'E') {
2432 PL_bufptr = s + 3;
2433 PL_lex_state = LEX_INTERPCONCAT;
2434 return yylex();
a0d0e21e 2435 }
6e909404
JH
2436 else {
2437 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2438 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2439 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2440 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2441 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2442 return REPORT(')');
6e909404
JH
2443 }
2444 if (PL_lex_casemods > 10)
2445 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2446 PL_lex_casestack[PL_lex_casemods++] = *s;
2447 PL_lex_casestack[PL_lex_casemods] = '\0';
2448 PL_lex_state = LEX_INTERPCONCAT;
2449 PL_nextval[PL_nexttoke].ival = 0;
2450 force_next('(');
2451 if (*s == 'l')
2452 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2453 else if (*s == 'u')
2454 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2455 else if (*s == 'L')
2456 PL_nextval[PL_nexttoke].ival = OP_LC;
2457 else if (*s == 'U')
2458 PL_nextval[PL_nexttoke].ival = OP_UC;
2459 else if (*s == 'Q')
2460 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2461 else
2462 Perl_croak(aTHX_ "panic: yylex");
2463 PL_bufptr = s + 1;
a0d0e21e 2464 }
79072805 2465 force_next(FUNC);
3280af22
NIS
2466 if (PL_lex_starts) {
2467 s = PL_bufptr;
2468 PL_lex_starts = 0;
131b3ad0
DM
2469 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2470 if (PL_lex_casemods == 1 && PL_lex_inpat)
2471 OPERATOR(',');
2472 else
2473 Aop(OP_CONCAT);
79072805
LW
2474 }
2475 else
cea2e8a9 2476 return yylex();
79072805
LW
2477 }
2478
55497cff 2479 case LEX_INTERPPUSH:
bbf60fe6 2480 return REPORT(sublex_push());
55497cff 2481
79072805 2482 case LEX_INTERPSTART:
3280af22 2483 if (PL_bufptr == PL_bufend)
bbf60fe6 2484 return REPORT(sublex_done());
607df283 2485 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2486 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2487 PL_expect = XTERM;
2488 PL_lex_dojoin = (*PL_bufptr == '@');
2489 PL_lex_state = LEX_INTERPNORMAL;
2490 if (PL_lex_dojoin) {
2491 PL_nextval[PL_nexttoke].ival = 0;
79072805 2492 force_next(',');
a0d0e21e 2493 force_ident("\"", '$');
3280af22 2494 PL_nextval[PL_nexttoke].ival = 0;
79072805 2495 force_next('$');
3280af22 2496 PL_nextval[PL_nexttoke].ival = 0;
79072805 2497 force_next('(');
3280af22 2498 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2499 force_next(FUNC);
2500 }
3280af22
NIS
2501 if (PL_lex_starts++) {
2502 s = PL_bufptr;
131b3ad0
DM
2503 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2504 if (!PL_lex_casemods && PL_lex_inpat)
2505 OPERATOR(',');
2506 else
2507 Aop(OP_CONCAT);
79072805 2508 }
cea2e8a9 2509 return yylex();
79072805
LW
2510
2511 case LEX_INTERPENDMAYBE:
3280af22
NIS
2512 if (intuit_more(PL_bufptr)) {
2513 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2514 break;
2515 }
2516 /* FALL THROUGH */
2517
2518 case LEX_INTERPEND:
3280af22
NIS
2519 if (PL_lex_dojoin) {
2520 PL_lex_dojoin = FALSE;
2521 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2522 return REPORT(')');
79072805 2523 }
43a16006 2524 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2525 && SvEVALED(PL_lex_repl))
43a16006 2526 {
e9fa98b2 2527 if (PL_bufptr != PL_bufend)
cea2e8a9 2528 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2529 PL_lex_repl = Nullsv;
2530 }
79072805
LW
2531 /* FALLTHROUGH */
2532 case LEX_INTERPCONCAT:
2533#ifdef DEBUGGING
3280af22 2534 if (PL_lex_brackets)
cea2e8a9 2535 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2536#endif
3280af22 2537 if (PL_bufptr == PL_bufend)
bbf60fe6 2538 return REPORT(sublex_done());
79072805 2539
3280af22
NIS
2540 if (SvIVX(PL_linestr) == '\'') {
2541 SV *sv = newSVsv(PL_linestr);
2542 if (!PL_lex_inpat)
76e3520e 2543 sv = tokeq(sv);
3280af22 2544 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2545 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2546 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2547 s = PL_bufend;
79072805
LW
2548 }
2549 else {
3280af22 2550 s = scan_const(PL_bufptr);
79072805 2551 if (*s == '\\')
3280af22 2552 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2553 else
3280af22 2554 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2555 }
2556
3280af22
NIS
2557 if (s != PL_bufptr) {
2558 PL_nextval[PL_nexttoke] = yylval;
2559 PL_expect = XTERM;
79072805 2560 force_next(THING);
131b3ad0
DM
2561 if (PL_lex_starts++) {
2562 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2563 if (!PL_lex_casemods && PL_lex_inpat)
2564 OPERATOR(',');
2565 else
2566 Aop(OP_CONCAT);
2567 }
79072805 2568 else {
3280af22 2569 PL_bufptr = s;
cea2e8a9 2570 return yylex();
79072805
LW
2571 }
2572 }
2573
cea2e8a9 2574 return yylex();
a0d0e21e 2575 case LEX_FORMLINE:
3280af22
NIS
2576 PL_lex_state = LEX_NORMAL;
2577 s = scan_formline(PL_bufptr);
2578 if (!PL_lex_formbrack)
a0d0e21e
LW
2579 goto rightbracket;
2580 OPERATOR(';');
79072805
LW
2581 }
2582
3280af22
NIS
2583 s = PL_bufptr;
2584 PL_oldoldbufptr = PL_oldbufptr;
2585 PL_oldbufptr = s;
607df283 2586 DEBUG_T( {
bbf60fe6 2587 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2588 exp_name[PL_expect], s);
5f80b19c 2589 } );
463ee0b2
LW
2590
2591 retry:
378cc40b
LW
2592 switch (*s) {
2593 default:
7e2040f0 2594 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2595 goto keylookup;
cea2e8a9 2596 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2597 case 4:
2598 case 26:
2599 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2600 case 0:
3280af22
NIS
2601 if (!PL_rsfp) {
2602 PL_last_uni = 0;
2603 PL_last_lop = 0;
c5ee2135
WL
2604 if (PL_lex_brackets) {
2605 if (PL_lex_formbrack)
2606 yyerror("Format not terminated");
2607 else
2608 yyerror("Missing right curly or square bracket");
2609 }
4e553d73 2610 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2611 "### Tokener got EOF\n");
5f80b19c 2612 } );
79072805 2613 TOKEN(0);
463ee0b2 2614 }
3280af22 2615 if (s++ < PL_bufend)
a687059c 2616 goto retry; /* ignore stray nulls */
3280af22
NIS
2617 PL_last_uni = 0;
2618 PL_last_lop = 0;
2619 if (!PL_in_eval && !PL_preambled) {
2620 PL_preambled = TRUE;
2621 sv_setpv(PL_linestr,incl_perldb());
2622 if (SvCUR(PL_linestr))
4147a61b 2623 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2624 if (PL_preambleav){
2625 while(AvFILLp(PL_preambleav) >= 0) {
2626 SV *tmpsv = av_shift(PL_preambleav);
2627 sv_catsv(PL_linestr, tmpsv);
4147a61b 2628 sv_catpvn(PL_linestr, ";", 1);
91b7def8 2629 sv_free(tmpsv);
2630 }
3280af22
NIS
2631 sv_free((SV*)PL_preambleav);
2632 PL_preambleav = NULL;
91b7def8 2633 }
3280af22
NIS
2634 if (PL_minus_n || PL_minus_p) {
2635 sv_catpv(PL_linestr, "LINE: while (<>) {");
2636 if (PL_minus_l)
2637 sv_catpv(PL_linestr,"chomp;");
2638 if (PL_minus_a) {
3280af22 2639 if (PL_minus_F) {
3792a11b
NC
2640 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2641 || *PL_splitstr == '"')
3280af22 2642 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2643 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2644 else {
c8ef6a4b
NC
2645 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2646 bytes can be used as quoting characters. :-) */
48c4c863
NC
2647 /* The count here deliberately includes the NUL
2648 that terminates the C string constant. This
2649 embeds the opening NUL into the string. */
dd374669 2650 const char *splits = PL_splitstr;
9a3fb652 2651 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2652 do {
2653 /* Need to \ \s */
dd374669
AL
2654 if (*splits == '\\')
2655 sv_catpvn(PL_linestr, splits, 1);
2656 sv_catpvn(PL_linestr, splits, 1);
2657 } while (*splits++);
48c4c863
NC
2658 /* This loop will embed the trailing NUL of
2659 PL_linestr as the last thing it does before
2660 terminating. */
4fccd7c6 2661 sv_catpvn(PL_linestr, ");", 2);
54310121 2662 }
2304df62
AD
2663 }
2664 else
75c72d73 2665 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2666 }
79072805 2667 }
4147a61b 2668 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2669 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2670 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2671 PL_last_lop = PL_last_uni = Nullch;
3280af22 2672 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2673 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2674
2675 sv_upgrade(sv, SVt_PVMG);
3280af22 2676 sv_setsv(sv,PL_linestr);
0ac0412a 2677 (void)SvIOK_on(sv);
45977657 2678 SvIV_set(sv, 0);
57843af0 2679 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2680 }
79072805 2681 goto retry;
a687059c 2682 }
e929a76b 2683 do {
aa7440fb 2684 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2685 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2686 fake_eof:
2687 if (PL_rsfp) {
2688 if (PL_preprocess && !PL_in_eval)
2689 (void)PerlProc_pclose(PL_rsfp);
2690 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2691 PerlIO_clearerr(PL_rsfp);
2692 else
2693 (void)PerlIO_close(PL_rsfp);
2694 PL_rsfp = Nullfp;
2695 PL_doextract = FALSE;
2696 }
2697 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2698 sv_setpv(PL_linestr,PL_minus_p
2699 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2700 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2701 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2702 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2703 PL_minus_n = PL_minus_p = 0;
2704 goto retry;
2705 }
2706 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2707 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2708 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2709 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2710 }
7aa207d6
JH
2711 /* If it looks like the start of a BOM or raw UTF-16,
2712 * check if it in fact is. */
2713 else if (bof &&
2714 (*s == 0 ||
2715 *(U8*)s == 0xEF ||
2716 *(U8*)s >= 0xFE ||
2717 s[1] == 0)) {
226017aa 2718#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2719# ifdef __GNU_LIBRARY__
2720# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2721# define FTELL_FOR_PIPE_IS_BROKEN
2722# endif
e3f494f1
JH
2723# else
2724# ifdef __GLIBC__
2725# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2726# define FTELL_FOR_PIPE_IS_BROKEN
2727# endif
2728# endif
226017aa
DD
2729# endif
2730#endif
2731#ifdef FTELL_FOR_PIPE_IS_BROKEN
2732 /* This loses the possibility to detect the bof
2733 * situation on perl -P when the libc5 is being used.
2734 * Workaround? Maybe attach some extra state to PL_rsfp?
2735 */
2736 if (!PL_preprocess)
7e28d3af 2737 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2738#else
eb160463 2739 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2740#endif
7e28d3af 2741 if (bof) {
3280af22 2742 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2743 s = swallow_bom((U8*)s);
e929a76b 2744 }
378cc40b 2745 }
3280af22 2746 if (PL_doextract) {
a0d0e21e
LW
2747 /* Incest with pod. */
2748 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2749 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2750 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2751 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2752 PL_last_lop = PL_last_uni = Nullch;
3280af22 2753 PL_doextract = FALSE;
a0d0e21e 2754 }
4e553d73 2755 }
463ee0b2 2756 incline(s);
3280af22
NIS
2757 } while (PL_doextract);
2758 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2759 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2760 SV * const sv = NEWSV(85,0);
a687059c 2761
93a17b20 2762 sv_upgrade(sv, SVt_PVMG);
3280af22 2763 sv_setsv(sv,PL_linestr);
0ac0412a 2764 (void)SvIOK_on(sv);
45977657 2765 SvIV_set(sv, 0);
57843af0 2766 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2767 }
3280af22 2768 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2769 PL_last_lop = PL_last_uni = Nullch;
57843af0 2770 if (CopLINE(PL_curcop) == 1) {
3280af22 2771 while (s < PL_bufend && isSPACE(*s))
79072805 2772 s++;
a0d0e21e 2773 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2774 s++;
44a8e56a 2775 d = Nullch;
3280af22 2776 if (!PL_in_eval) {
44a8e56a 2777 if (*s == '#' && *(s+1) == '!')
2778 d = s + 2;
2779#ifdef ALTERNATE_SHEBANG
2780 else {
bfed75c6 2781 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 2782 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2783 d = s + (sizeof(as) - 1);
2784 }
2785#endif /* ALTERNATE_SHEBANG */
2786 }
2787 if (d) {
b8378b72 2788 char *ipath;
774d564b 2789 char *ipathend;
b8378b72 2790
774d564b 2791 while (isSPACE(*d))
b8378b72
CS
2792 d++;
2793 ipath = d;
774d564b 2794 while (*d && !isSPACE(*d))
2795 d++;
2796 ipathend = d;
2797
2798#ifdef ARG_ZERO_IS_SCRIPT
2799 if (ipathend > ipath) {
2800 /*
2801 * HP-UX (at least) sets argv[0] to the script name,
2802 * which makes $^X incorrect. And Digital UNIX and Linux,
2803 * at least, set argv[0] to the basename of the Perl
2804 * interpreter. So, having found "#!", we'll set it right.
2805 */
ee2f7564 2806 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2807 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2808 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2809 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2810 SvSETMAGIC(x);
2811 }
556c1dec
JH
2812 else {
2813 STRLEN blen;
2814 STRLEN llen;
cfd0369c 2815 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2816 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2817 if (llen < blen) {
2818 bstart += blen - llen;
2819 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2820 sv_setpvn(x, ipath, ipathend - ipath);
2821 SvSETMAGIC(x);
2822 }
2823 }
2824 }
774d564b 2825 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2826 }
774d564b 2827#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2828
2829 /*
2830 * Look for options.
2831 */
748a9306 2832 d = instr(s,"perl -");
84e30d1a 2833 if (!d) {
748a9306 2834 d = instr(s,"perl");
84e30d1a
GS
2835#if defined(DOSISH)
2836 /* avoid getting into infinite loops when shebang
2837 * line contains "Perl" rather than "perl" */
2838 if (!d) {
2839 for (d = ipathend-4; d >= ipath; --d) {
2840 if ((*d == 'p' || *d == 'P')
2841 && !ibcmp(d, "perl", 4))
2842 {
2843 break;
2844 }
2845 }
2846 if (d < ipath)
2847 d = Nullch;
2848 }
2849#endif
2850 }
44a8e56a 2851#ifdef ALTERNATE_SHEBANG
2852 /*
2853 * If the ALTERNATE_SHEBANG on this system starts with a
2854 * character that can be part of a Perl expression, then if
2855 * we see it but not "perl", we're probably looking at the
2856 * start of Perl code, not a request to hand off to some
2857 * other interpreter. Similarly, if "perl" is there, but
2858 * not in the first 'word' of the line, we assume the line
2859 * contains the start of the Perl program.
44a8e56a 2860 */
2861 if (d && *s != '#') {
f54cb97a 2862 const char *c = ipath;
44a8e56a 2863 while (*c && !strchr("; \t\r\n\f\v#", *c))
2864 c++;
2865 if (c < d)
2866 d = Nullch; /* "perl" not in first word; ignore */
2867 else
2868 *s = '#'; /* Don't try to parse shebang line */
2869 }
774d564b 2870#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2871#ifndef MACOS_TRADITIONAL
748a9306 2872 if (!d &&
44a8e56a 2873 *s == '#' &&
774d564b 2874 ipathend > ipath &&
3280af22 2875 !PL_minus_c &&
748a9306 2876 !instr(s,"indir") &&
3280af22 2877 instr(PL_origargv[0],"perl"))
748a9306 2878 {
27da23d5 2879 dVAR;
9f68db38 2880 char **newargv;
9f68db38 2881
774d564b 2882 *ipathend = '\0';
2883 s = ipathend + 1;
3280af22 2884 while (s < PL_bufend && isSPACE(*s))
9f68db38 2885 s++;
3280af22 2886 if (s < PL_bufend) {
a02a5408 2887 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2888 newargv[1] = s;
3280af22 2889 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2890 s++;
2891 *s = '\0';
3280af22 2892 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2893 }
2894 else
3280af22 2895 newargv = PL_origargv;
774d564b 2896 newargv[0] = ipath;
b35112e7 2897 PERL_FPU_PRE_EXEC
b4748376 2898 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2899 PERL_FPU_POST_EXEC
cea2e8a9 2900 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2901 }
bf4acbe4 2902#endif
748a9306 2903 if (d) {
f54cb97a
AL
2904 const U32 oldpdb = PL_perldb;
2905 const bool oldn = PL_minus_n;
2906 const bool oldp = PL_minus_p;
748a9306
LW
2907
2908 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2909 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2910
2911 if (*d++ == '-') {
f54cb97a 2912 const bool switches_done = PL_doswitches;
8cc95fdb 2913 do {
3ffe3ee4 2914 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2915 const char * const m = d;
8cc95fdb 2916 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2917 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2918 (int)(d - m), m);
2919 }
2920 d = moreswitches(d);
2921 } while (d);
f0b2cf55
YST
2922 if (PL_doswitches && !switches_done) {
2923 int argc = PL_origargc;
2924 char **argv = PL_origargv;
2925 do {
2926 argc--,argv++;
2927 } while (argc && argv[0][0] == '-' && argv[0][1]);
2928 init_argv_symbols(argc,argv);
2929 }
155aba94
GS
2930 if ((PERLDB_LINE && !oldpdb) ||
2931 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2932 /* if we have already added "LINE: while (<>) {",
2933 we must not do it again */
748a9306 2934 {
c69006e4 2935 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2936 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2937 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2938 PL_last_lop = PL_last_uni = Nullch;
3280af22 2939 PL_preambled = FALSE;
84902520 2940 if (PERLDB_LINE)
3280af22 2941 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2942 goto retry;
2943 }
a11ec5a9
RGS
2944 if (PL_doswitches && !switches_done) {
2945 int argc = PL_origargc;
2946 char **argv = PL_origargv;
2947 do {
2948 argc--,argv++;
2949 } while (argc && argv[0][0] == '-' && argv[0][1]);
2950 init_argv_symbols(argc,argv);
2951 }
a0d0e21e 2952 }
79072805 2953 }
9f68db38 2954 }
79072805 2955 }
3280af22
NIS
2956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2957 PL_bufptr = s;
2958 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2959 return yylex();
ae986130 2960 }
378cc40b 2961 goto retry;
4fdae800 2962 case '\r':
6a27c188 2963#ifdef PERL_STRICT_CR
cea2e8a9 2964 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2965 Perl_croak(aTHX_
cc507455 2966 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2967#endif
4fdae800 2968 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2969#ifdef MACOS_TRADITIONAL
2970 case '\312':
2971#endif
378cc40b
LW
2972 s++;
2973 goto retry;
378cc40b 2974 case '#':
e929a76b 2975 case '\n':
3280af22 2976 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2977 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2978 /* handle eval qq[#line 1 "foo"\n ...] */
2979 CopLINE_dec(PL_curcop);
2980 incline(s);
2981 }
3280af22 2982 d = PL_bufend;
a687059c 2983 while (s < d && *s != '\n')
378cc40b 2984 s++;
0f85fab0 2985 if (s < d)
378cc40b 2986 s++;
78c267c1 2987 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2988 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2989 incline(s);
3280af22
NIS
2990 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2991 PL_bufptr = s;
2992 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2993 return yylex();
a687059c 2994 }
378cc40b 2995 }
a687059c 2996 else {
378cc40b 2997 *s = '\0';
3280af22 2998 PL_bufend = s;
a687059c 2999 }
378cc40b
LW
3000 goto retry;
3001 case '-':
79072805 3002 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
3003 I32 ftst = 0;
3004
378cc40b 3005 s++;
3280af22 3006 PL_bufptr = s;
748a9306
LW
3007 tmp = *s++;
3008
bf4acbe4 3009 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3010 s++;
3011
3012 if (strnEQ(s,"=>",2)) {
3280af22 3013 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 3014 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3015 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 3016 } );
748a9306
LW
3017 OPERATOR('-'); /* unary minus */
3018 }
3280af22 3019 PL_last_uni = PL_oldbufptr;
748a9306 3020 switch (tmp) {
e5edeb50
JH
3021 case 'r': ftst = OP_FTEREAD; break;
3022 case 'w': ftst = OP_FTEWRITE; break;
3023 case 'x': ftst = OP_FTEEXEC; break;
3024 case 'o': ftst = OP_FTEOWNED; break;
3025 case 'R': ftst = OP_FTRREAD; break;
3026 case 'W': ftst = OP_FTRWRITE; break;
3027 case 'X': ftst = OP_FTREXEC; break;
3028 case 'O': ftst = OP_FTROWNED; break;
3029 case 'e': ftst = OP_FTIS; break;
3030 case 'z': ftst = OP_FTZERO; break;
3031 case 's': ftst = OP_FTSIZE; break;
3032 case 'f': ftst = OP_FTFILE; break;
3033 case 'd': ftst = OP_FTDIR; break;
3034 case 'l': ftst = OP_FTLINK; break;
3035 case 'p': ftst = OP_FTPIPE; break;
3036 case 'S': ftst = OP_FTSOCK; break;
3037 case 'u': ftst = OP_FTSUID; break;
3038 case 'g': ftst = OP_FTSGID; break;
3039 case 'k': ftst = OP_FTSVTX; break;
3040 case 'b': ftst = OP_FTBLK; break;
3041 case 'c': ftst = OP_FTCHR; break;
3042 case 't': ftst = OP_FTTTY; break;
3043 case 'T': ftst = OP_FTTEXT; break;
3044 case 'B': ftst = OP_FTBINARY; break;
3045 case 'M': case 'A': case 'C':
3046 gv_fetchpv("\024",TRUE, SVt_PV);
3047 switch (tmp) {
3048 case 'M': ftst = OP_FTMTIME; break;
3049 case 'A': ftst = OP_FTATIME; break;
3050 case 'C': ftst = OP_FTCTIME; break;
3051 default: break;
3052 }
3053 break;
378cc40b 3054 default:
378cc40b
LW
3055 break;
3056 }
e5edeb50 3057 if (ftst) {
eb160463 3058 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3059 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3060 "### Saw file test %c\n", (int)ftst);
5f80b19c 3061 } );
e5edeb50
JH
3062 FTST(ftst);
3063 }
3064 else {
3065 /* Assume it was a minus followed by a one-letter named
3066 * subroutine call (or a -bareword), then. */
95c31fe3 3067 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3068 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3069 (int) tmp);
5f80b19c 3070 } );
3cf7b4c4 3071 s = --PL_bufptr;
e5edeb50 3072 }
378cc40b 3073 }
a687059c
LW
3074 tmp = *s++;
3075 if (*s == tmp) {
3076 s++;
3280af22 3077 if (PL_expect == XOPERATOR)
79072805
LW
3078 TERM(POSTDEC);
3079 else
3080 OPERATOR(PREDEC);
3081 }
3082 else if (*s == '>') {
3083 s++;
3084 s = skipspace(s);
7e2040f0 3085 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3086 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3087 TOKEN(ARROW);
79072805 3088 }
748a9306
LW
3089 else if (*s == '$')
3090 OPERATOR(ARROW);
463ee0b2 3091 else
748a9306 3092 TERM(ARROW);
a687059c 3093 }
3280af22 3094 if (PL_expect == XOPERATOR)
79072805
LW
3095 Aop(OP_SUBTRACT);
3096 else {
3280af22 3097 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3098 check_uni();
79072805 3099 OPERATOR('-'); /* unary minus */
2f3197b3 3100 }
79072805 3101
378cc40b 3102 case '+':
a687059c
LW
3103 tmp = *s++;
3104 if (*s == tmp) {
378cc40b 3105 s++;
3280af22 3106 if (PL_expect == XOPERATOR)
79072805
LW
3107 TERM(POSTINC);
3108 else
3109 OPERATOR(PREINC);
378cc40b 3110 }
3280af22 3111 if (PL_expect == XOPERATOR)
79072805
LW
3112 Aop(OP_ADD);
3113 else {
3280af22 3114 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3115 check_uni();
a687059c 3116 OPERATOR('+');
2f3197b3 3117 }
a687059c 3118
378cc40b 3119 case '*':
3280af22
NIS
3120 if (PL_expect != XOPERATOR) {
3121 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3122 PL_expect = XOPERATOR;
3123 force_ident(PL_tokenbuf, '*');
3124 if (!*PL_tokenbuf)
a0d0e21e 3125 PREREF('*');
79072805 3126 TERM('*');
a687059c 3127 }
79072805
LW
3128 s++;
3129 if (*s == '*') {
a687059c 3130 s++;
79072805 3131 PWop(OP_POW);
a687059c 3132 }
79072805
LW
3133 Mop(OP_MULTIPLY);
3134
378cc40b 3135 case '%':
3280af22 3136 if (PL_expect == XOPERATOR) {
bbce6d69 3137 ++s;
3138 Mop(OP_MODULO);
a687059c 3139 }
3280af22
NIS
3140 PL_tokenbuf[0] = '%';
3141 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3142 if (!PL_tokenbuf[1]) {
bbce6d69 3143 PREREF('%');
a687059c 3144 }
3280af22 3145 PL_pending_ident = '%';
bbce6d69 3146 TERM('%');
a687059c 3147
378cc40b 3148 case '^':
79072805 3149 s++;
a0d0e21e 3150 BOop(OP_BIT_XOR);
79072805 3151 case '[':
3280af22 3152 PL_lex_brackets++;
79072805 3153 /* FALL THROUGH */
378cc40b 3154 case '~':
378cc40b 3155 case ',':
378cc40b
LW
3156 tmp = *s++;
3157 OPERATOR(tmp);
a0d0e21e
LW
3158 case ':':
3159 if (s[1] == ':') {
3160 len = 0;
3161 goto just_a_word;
3162 }
3163 s++;
09bef843
SB
3164 switch (PL_expect) {
3165 OP *attrs;
3166 case XOPERATOR:
3167 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3168 break;
3169 PL_bufptr = s; /* update in case we back off */
3170 goto grabattrs;
3171 case XATTRBLOCK:
3172 PL_expect = XBLOCK;
3173 goto grabattrs;
3174 case XATTRTERM:
3175 PL_expect = XTERMBLOCK;
3176 grabattrs:
3177 s = skipspace(s);
3178 attrs = Nullop;
7e2040f0 3179 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3180 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3181 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3182 if (tmp < 0) tmp = -tmp;
3183 switch (tmp) {
3184 case KEY_or:
3185 case KEY_and:
c963b151 3186 case KEY_err:
f9829d6b
GS
3187 case KEY_for:
3188 case KEY_unless:
3189 case KEY_if:
3190 case KEY_while:
3191 case KEY_until:
3192 goto got_attrs;
3193 default:
3194 break;
3195 }
3196 }
09bef843
SB
3197 if (*d == '(') {
3198 d = scan_str(d,TRUE,TRUE);
3199 if (!d) {
09bef843
SB
3200 /* MUST advance bufptr here to avoid bogus
3201 "at end of line" context messages from yyerror().
3202 */
3203 PL_bufptr = s + len;
3204 yyerror("Unterminated attribute parameter in attribute list");
3205 if (attrs)
3206 op_free(attrs);
bbf60fe6 3207 return REPORT(0); /* EOF indicator */
09bef843
SB
3208 }
3209 }
3210 if (PL_lex_stuff) {
3211 SV *sv = newSVpvn(s, len);
3212 sv_catsv(sv, PL_lex_stuff);
3213 attrs = append_elem(OP_LIST, attrs,
3214 newSVOP(OP_CONST, 0, sv));
3215 SvREFCNT_dec(PL_lex_stuff);
3216 PL_lex_stuff = Nullsv;
3217 }
3218 else {
371fce9b
DM
3219 if (len == 6 && strnEQ(s, "unique", len)) {
3220 if (PL_in_my == KEY_our)
3221#ifdef USE_ITHREADS
3222 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3223#else
3224 ; /* skip to avoid loading attributes.pm */
3225#endif
bfed75c6 3226 else
371fce9b
DM
3227 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3228 }
3229
d3cea301
SB
3230 /* NOTE: any CV attrs applied here need to be part of
3231 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3232 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3233 CvLVALUE_on(PL_compcv);
3234 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3235 CvLOCKED_on(PL_compcv);
3236 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3237 CvMETHOD_on(PL_compcv);
06492da6
SF
3238 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3239 CvASSERTION_on(PL_compcv);
78f9721b
SM
3240 /* After we've set the flags, it could be argued that
3241 we don't need to do the attributes.pm-based setting
3242 process, and shouldn't bother appending recognized
d3cea301
SB
3243 flags. To experiment with that, uncomment the
3244 following "else". (Note that's already been
3245 uncommented. That keeps the above-applied built-in
3246 attributes from being intercepted (and possibly
3247 rejected) by a package's attribute routines, but is
3248 justified by the performance win for the common case
3249 of applying only built-in attributes.) */
0256094b 3250 else
78f9721b
SM
3251 attrs = append_elem(OP_LIST, attrs,
3252 newSVOP(OP_CONST, 0,
3253 newSVpvn(s, len)));
09bef843
SB
3254 }
3255 s = skipspace(d);
0120eecf 3256 if (*s == ':' && s[1] != ':')
09bef843 3257 s = skipspace(s+1);
0120eecf
GS
3258 else if (s == d)
3259 break; /* require real whitespace or :'s */
09bef843 3260 }
f9829d6b 3261 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3262 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3263 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3264 /* If here for an expression, and parsed no attrs, back off. */
3265 if (tmp == '=' && !attrs) {
3266 s = PL_bufptr;
3267 break;
3268 }
3269 /* MUST advance bufptr here to avoid bogus "at end of line"
3270 context messages from yyerror().
3271 */
3272 PL_bufptr = s;
3273 if (!*s)
3274 yyerror("Unterminated attribute list");
3275 else
3276 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3277 q, *s, q));
3278 if (attrs)
3279 op_free(attrs);
3280 OPERATOR(':');
3281 }
f9829d6b 3282 got_attrs:
09bef843
SB
3283 if (attrs) {
3284 PL_nextval[PL_nexttoke].opval = attrs;
3285 force_next(THING);
3286 }
3287 TOKEN(COLONATTR);
3288 }
a0d0e21e 3289 OPERATOR(':');
8990e307
LW
3290 case '(':
3291 s++;
3280af22
NIS
3292 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3293 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3294 else
3280af22 3295 PL_expect = XTERM;
4a202259 3296 s = skipspace(s);
a0d0e21e 3297 TOKEN('(');
378cc40b 3298 case ';':
f4dd75d9 3299 CLINE;
378cc40b
LW
3300 tmp = *s++;
3301 OPERATOR(tmp);
3302 case ')':
378cc40b 3303 tmp = *s++;
16d20bd9
AD
3304 s = skipspace(s);
3305 if (*s == '{')
3306 PREBLOCK(tmp);
378cc40b 3307 TERM(tmp);
79072805
LW
3308 case ']':
3309 s++;
3280af22 3310 if (PL_lex_brackets <= 0)
d98d5fff 3311 yyerror("Unmatched right square bracket");
463ee0b2 3312 else
3280af22
NIS
3313 --PL_lex_brackets;
3314 if (PL_lex_state == LEX_INTERPNORMAL) {
3315 if (PL_lex_brackets == 0) {
a0d0e21e 3316 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3317 PL_lex_state = LEX_INTERPEND;
79072805
LW
3318 }
3319 }
4633a7c4 3320 TERM(']');
79072805
LW
3321 case '{':
3322 leftbracket:
79072805 3323 s++;
3280af22 3324 if (PL_lex_brackets > 100) {
8edd5f42 3325 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3326 }
3280af22 3327 switch (PL_expect) {
a0d0e21e 3328 case XTERM:
3280af22 3329 if (PL_lex_formbrack) {
a0d0e21e
LW
3330 s--;
3331 PRETERMBLOCK(DO);
3332 }
3280af22
NIS
3333 if (PL_oldoldbufptr == PL_last_lop)
3334 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3335 else
3280af22 3336 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3337 OPERATOR(HASHBRACK);
a0d0e21e 3338 case XOPERATOR:
bf4acbe4 3339 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3340 s++;
44a8e56a 3341 d = s;
3280af22
NIS
3342 PL_tokenbuf[0] = '\0';
3343 if (d < PL_bufend && *d == '-') {
3344 PL_tokenbuf[0] = '-';
44a8e56a 3345 d++;
bf4acbe4 3346 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3347 d++;
3348 }
7e2040f0 3349 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3350 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3351 FALSE, &len);
bf4acbe4 3352 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3353 d++;
3354 if (*d == '}') {
f54cb97a 3355 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3356 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3357 if (minus)
3358 force_next('-');
748a9306
LW
3359 }
3360 }
3361 /* FALL THROUGH */
09bef843 3362 case XATTRBLOCK:
748a9306 3363 case XBLOCK:
3280af22
NIS
3364 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3365 PL_expect = XSTATE;
a0d0e21e 3366 break;
09bef843 3367 case XATTRTERM:
a0d0e21e 3368 case XTERMBLOCK:
3280af22
NIS
3369 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3370 PL_expect = XSTATE;
a0d0e21e
LW
3371 break;
3372 default: {
f54cb97a 3373 const char *t;
3280af22
NIS
3374 if (PL_oldoldbufptr == PL_last_lop)
3375 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3376 else
3280af22 3377 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3378 s = skipspace(s);
8452ff4b
SB
3379 if (*s == '}') {
3380 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3381 PL_expect = XTERM;
3382 /* This hack is to get the ${} in the message. */
3383 PL_bufptr = s+1;
3384 yyerror("syntax error");
3385 break;
3386 }
a0d0e21e 3387 OPERATOR(HASHBRACK);
8452ff4b 3388 }
b8a4b1be
GS
3389 /* This hack serves to disambiguate a pair of curlies
3390 * as being a block or an anon hash. Normally, expectation
3391 * determines that, but in cases where we're not in a
3392 * position to expect anything in particular (like inside
3393 * eval"") we have to resolve the ambiguity. This code
3394 * covers the case where the first term in the curlies is a
3395 * quoted string. Most other cases need to be explicitly
a0288114 3396 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3397 * curly in order to force resolution as an anon hash.
3398 *
3399 * XXX should probably propagate the outer expectation
3400 * into eval"" to rely less on this hack, but that could
3401 * potentially break current behavior of eval"".
3402 * GSAR 97-07-21
3403 */
3404 t = s;
3405 if (*s == '\'' || *s == '"' || *s == '`') {
3406 /* common case: get past first string, handling escapes */
3280af22 3407 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3408 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3409 t++;
3410 t++;
a0d0e21e 3411 }
b8a4b1be 3412 else if (*s == 'q') {
3280af22 3413 if (++t < PL_bufend
b8a4b1be 3414 && (!isALNUM(*t)
3280af22 3415 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3416 && !isALNUM(*t))))
3417 {
abc667d1 3418 /* skip q//-like construct */
f54cb97a 3419 const char *tmps;
b8a4b1be
GS
3420 char open, close, term;
3421 I32 brackets = 1;
3422
3280af22 3423 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3424 t++;
abc667d1
DM
3425 /* check for q => */
3426 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3427 OPERATOR(HASHBRACK);
3428 }
b8a4b1be
GS
3429 term = *t;
3430 open = term;
3431 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3432 term = tmps[5];
3433 close = term;
3434 if (open == close)
3280af22
NIS
3435 for (t++; t < PL_bufend; t++) {
3436 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3437 t++;
6d07e5e9 3438 else if (*t == open)
b8a4b1be
GS
3439 break;
3440 }
abc667d1 3441 else {
3280af22
NIS
3442 for (t++; t < PL_bufend; t++) {
3443 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3444 t++;
6d07e5e9 3445 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3446 break;
3447 else if (*t == open)
3448 brackets++;
3449 }
abc667d1
DM
3450 }
3451 t++;
b8a4b1be 3452 }
abc667d1
DM
3453 else
3454 /* skip plain q word */
3455 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3456 t += UTF8SKIP(t);
a0d0e21e 3457 }
7e2040f0 3458 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3459 t += UTF8SKIP(t);
7e2040f0 3460 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3461 t += UTF8SKIP(t);
a0d0e21e 3462 }
3280af22 3463 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3464 t++;
b8a4b1be
GS
3465 /* if comma follows first term, call it an anon hash */
3466 /* XXX it could be a comma expression with loop modifiers */
3280af22 3467 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3468 || (*t == '=' && t[1] == '>')))
a0d0e21e 3469 OPERATOR(HASHBRACK);
3280af22 3470 if (PL_expect == XREF)
4e4e412b 3471 PL_expect = XTERM;
a0d0e21e 3472 else {
3280af22
NIS
3473 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3474 PL_expect = XSTATE;
a0d0e21e 3475 }
8990e307 3476 }
a0d0e21e 3477 break;
463ee0b2 3478 }
57843af0 3479 yylval.ival = CopLINE(PL_curcop);
79072805 3480 if (isSPACE(*s) || *s == '#')
3280af22 3481 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3482 TOKEN('{');
378cc40b 3483 case '}':
79072805
LW
3484 rightbracket:
3485 s++;
3280af22 3486 if (PL_lex_brackets <= 0)
d98d5fff 3487 yyerror("Unmatched right curly bracket");
463ee0b2 3488 else
3280af22 3489 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3490 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3491 PL_lex_formbrack = 0;
3492 if (PL_lex_state == LEX_INTERPNORMAL) {
3493 if (PL_lex_brackets == 0) {
9059aa12
LW
3494 if (PL_expect & XFAKEBRACK) {
3495 PL_expect &= XENUMMASK;
3280af22
NIS
3496 PL_lex_state = LEX_INTERPEND;
3497 PL_bufptr = s;
cea2e8a9 3498 return yylex(); /* ignore fake brackets */
79072805 3499 }
fa83b5b6 3500 if (*s == '-' && s[1] == '>')
3280af22 3501 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3502 else if (*s != '[' && *s != '{')
3280af22 3503 PL_lex_state = LEX_INTERPEND;
79072805
LW
3504 }
3505 }
9059aa12
LW
3506 if (PL_expect & XFAKEBRACK) {
3507 PL_expect &= XENUMMASK;
3280af22 3508 PL_bufptr = s;
cea2e8a9 3509 return yylex(); /* ignore fake brackets */
748a9306 3510 }
79072805
LW
3511 force_next('}');
3512 TOKEN(';');
378cc40b
LW
3513 case '&':
3514 s++;
3515 tmp = *s++;
3516 if (tmp == '&')
a0d0e21e 3517 AOPERATOR(ANDAND);
378cc40b