This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
More SvPV consting
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
27da23d5
JH
29static const char ident_too_long[] =
30 "Identifier too long";
31static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
8903cb82 35
acfe0abc 36static void restore_rsfp(pTHX_ void *f);
6e3aabd6 37#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
38static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 40#endif
51371543 41
9059aa12
LW
42#define XFAKEBRACK 128
43#define XENUMMASK 127
44
39e02b42
JH
45#ifdef USE_UTF8_SCRIPTS
46# define UTF (!IN_BYTES)
2b9d42f0 47#else
746b446a 48# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 49#endif
a0ed51b3 50
61f0cdd9 51/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
52 * 1999-02-27 mjd-perl-patch@plover.com */
53#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
54
bf4acbe4
GS
55/* On MacOS, respect nonbreaking spaces */
56#ifdef MACOS_TRADITIONAL
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
58#else
59#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60#endif
61
ffb4593c
NT
62/* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
79072805
LW
64 * can get by with a single comparison (if the compiler is smart enough).
65 */
66
fb73857a
PP
67/* #define LEX_NOTPARSING 11 is done in perl.h. */
68
55497cff
PP
69#define LEX_NORMAL 10
70#define LEX_INTERPNORMAL 9
71#define LEX_INTERPCASEMOD 8
72#define LEX_INTERPPUSH 7
73#define LEX_INTERPSTART 6
74#define LEX_INTERPEND 5
75#define LEX_INTERPENDMAYBE 4
76#define LEX_INTERPCONCAT 3
77#define LEX_INTERPCONST 2
78#define LEX_FORMLINE 1
79#define LEX_KNOWNEXT 0
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
24801a4b
JH
110/* According to some strict interpretations of ANSI C89 one cannot
111 * cast void pointers to code pointers or vice versa (as filter_add(),
112 * filter_del(), and filter_read() will want to do). We should still
113 * be able to use a union for sneaky "casting". */
114typedef union {
115 XPVIO* iop;
116 filter_t filter;
117} xpvio_filter_u;
118
ffb4593c
NT
119/*
120 * Convenience functions to return different tokens and prime the
9cbb5ea2 121 * lexer for the next token. They all take an argument.
ffb4593c
NT
122 *
123 * TOKEN : generic token (used for '(', DOLSHARP, etc)
124 * OPERATOR : generic operator
125 * AOPERATOR : assignment operator
126 * PREBLOCK : beginning the block after an if, while, foreach, ...
127 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
128 * PREREF : *EXPR where EXPR is not a simple identifier
129 * TERM : expression term
130 * LOOPX : loop exiting command (goto, last, dump, etc)
131 * FTST : file test operator
132 * FUN0 : zero-argument function
2d2e263d 133 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
134 * BOop : bitwise or or xor
135 * BAop : bitwise and
136 * SHop : shift operator
137 * PWop : power operator
9cbb5ea2 138 * PMop : pattern-matching operator
ffb4593c
NT
139 * Aop : addition-level operator
140 * Mop : multiplication-level operator
141 * Eop : equality-testing operator
e5edeb50 142 * Rop : relational operator <= != gt
ffb4593c
NT
143 *
144 * Also see LOP and lop() below.
145 */
146
998054bd 147#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 148# define REPORT(retval) tokereport(s,(int)retval)
998054bd 149#else
bbf60fe6 150# define REPORT(retval) (retval)
998054bd
SC
151#endif
152
bbf60fe6
DM
153#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
154#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
155#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
156#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
157#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
158#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
159#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
160#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
161#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
162#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
163#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
164#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
165#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
166#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
167#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
168#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
169#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
170#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
171#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
172#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 173
a687059c
LW
174/* This bit of chicanery makes a unary function followed by
175 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
176 * The UNIDOR macro is for unary functions that can be followed by the //
177 * operator (such as C<shift // 0>).
a687059c 178 */
376fcdbf
AL
179#define UNI2(f,x) { \
180 yylval.ival = f; \
181 PL_expect = x; \
182 PL_bufptr = s; \
183 PL_last_uni = PL_oldbufptr; \
184 PL_last_lop_op = f; \
185 if (*s == '(') \
186 return REPORT( (int)FUNC1 ); \
187 s = skipspace(s); \
188 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
189 }
6f33ba73
RGS
190#define UNI(f) UNI2(f,XTERM)
191#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 192
376fcdbf
AL
193#define UNIBRACK(f) { \
194 yylval.ival = f; \
195 PL_bufptr = s; \
196 PL_last_uni = PL_oldbufptr; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
199 s = skipspace(s); \
200 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
201 }
79072805 202
9f68db38 203/* grandfather return to old style */
3280af22 204#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 205
8fa7f367
JH
206#ifdef DEBUGGING
207
bbf60fe6
DM
208/* how to interpret the yylval associated with the token */
209enum token_type {
210 TOKENTYPE_NONE,
211 TOKENTYPE_IVAL,
212 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
213 TOKENTYPE_PVAL,
214 TOKENTYPE_OPVAL,
215 TOKENTYPE_GVVAL
216};
217
27da23d5
JH
218static struct debug_tokens { const int token, type; const char *name; }
219 const debug_tokens[] =
9041c2e3 220{
bbf60fe6
DM
221 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
222 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
223 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
224 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
225 { ARROW, TOKENTYPE_NONE, "ARROW" },
226 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
227 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
228 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
229 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
230 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
231 { DO, TOKENTYPE_NONE, "DO" },
232 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
233 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
234 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
235 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
236 { ELSE, TOKENTYPE_NONE, "ELSE" },
237 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
238 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
239 { FOR, TOKENTYPE_IVAL, "FOR" },
240 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
241 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
242 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
243 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
244 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
245 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
246 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
247 { IF, TOKENTYPE_IVAL, "IF" },
248 { LABEL, TOKENTYPE_PVAL, "LABEL" },
249 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
250 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
251 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
252 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
253 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
254 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
255 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
256 { MY, TOKENTYPE_IVAL, "MY" },
257 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
258 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
259 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
260 { OROP, TOKENTYPE_IVAL, "OROP" },
261 { OROR, TOKENTYPE_NONE, "OROR" },
262 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
263 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
264 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
265 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
266 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
267 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
268 { PREINC, TOKENTYPE_NONE, "PREINC" },
269 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
270 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
271 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
272 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
273 { SUB, TOKENTYPE_NONE, "SUB" },
274 { THING, TOKENTYPE_OPVAL, "THING" },
275 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
276 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
277 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
278 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
279 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
280 { USE, TOKENTYPE_IVAL, "USE" },
281 { WHILE, TOKENTYPE_IVAL, "WHILE" },
282 { WORD, TOKENTYPE_OPVAL, "WORD" },
283 { 0, TOKENTYPE_NONE, 0 }
284};
285
286/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 287
bbf60fe6 288STATIC int
bfed75c6 289S_tokereport(pTHX_ const char* s, I32 rv)
bbf60fe6
DM
290{
291 if (DEBUG_T_TEST) {
bfed75c6 292 const char *name = Nullch;
bbf60fe6 293 enum token_type type = TOKENTYPE_NONE;
f54cb97a 294 const struct debug_tokens *p;
bfed75c6 295 SV* report = newSVpvn("<== ", 4);
bbf60fe6 296
f54cb97a 297 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
298 if (p->token == (int)rv) {
299 name = p->name;
300 type = p->type;
301 break;
302 }
303 }
304 if (name)
54667de8 305 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
306 else if ((char)rv > ' ' && (char)rv < '~')
307 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
308 else if (!rv)
54667de8 309 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
310 else
311 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
312 switch (type) {
313 case TOKENTYPE_NONE:
314 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
315 break;
316 case TOKENTYPE_IVAL:
e4584336 317 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
318 break;
319 case TOKENTYPE_OPNUM:
320 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
321 PL_op_name[yylval.ival]);
322 break;
323 case TOKENTYPE_PVAL:
324 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
325 break;
326 case TOKENTYPE_OPVAL:
401441c0
RGS
327 if (yylval.opval)
328 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 329 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
330 else
331 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
332 break;
333 }
e4584336 334 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
998054bd
SC
335 if (s - PL_bufptr > 0)
336 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
337 else {
338 if (PL_oldbufptr && *PL_oldbufptr)
339 sv_catpv(report, PL_tokenbuf);
340 }
bbf60fe6
DM
341 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
342 };
343 return (int)rv;
998054bd
SC
344}
345
8fa7f367
JH
346#endif
347
ffb4593c
NT
348/*
349 * S_ao
350 *
c963b151
BD
351 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
352 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
353 */
354
76e3520e 355STATIC int
cea2e8a9 356S_ao(pTHX_ int toketype)
a0d0e21e 357{
3280af22
NIS
358 if (*PL_bufptr == '=') {
359 PL_bufptr++;
a0d0e21e
LW
360 if (toketype == ANDAND)
361 yylval.ival = OP_ANDASSIGN;
362 else if (toketype == OROR)
363 yylval.ival = OP_ORASSIGN;
c963b151
BD
364 else if (toketype == DORDOR)
365 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
366 toketype = ASSIGNOP;
367 }
368 return toketype;
369}
370
ffb4593c
NT
371/*
372 * S_no_op
373 * When Perl expects an operator and finds something else, no_op
374 * prints the warning. It always prints "<something> found where
375 * operator expected. It prints "Missing semicolon on previous line?"
376 * if the surprise occurs at the start of the line. "do you need to
377 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
378 * where the compiler doesn't know if foo is a method call or a function.
379 * It prints "Missing operator before end of line" if there's nothing
380 * after the missing operator, or "... before <...>" if there is something
381 * after the missing operator.
382 */
383
76e3520e 384STATIC void
bfed75c6 385S_no_op(pTHX_ const char *what, char *s)
463ee0b2 386{
3280af22
NIS
387 char *oldbp = PL_bufptr;
388 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 389
1189a94a
GS
390 if (!s)
391 s = oldbp;
07c798fb 392 else
1189a94a 393 PL_bufptr = s;
cea2e8a9 394 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
395 if (ckWARN_d(WARN_SYNTAX)) {
396 if (is_first)
397 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
398 "\t(Missing semicolon on previous line?)\n");
399 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 400 const char *t;
56da5a46
RGS
401 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
402 if (t < PL_bufptr && isSPACE(*t))
403 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
404 "\t(Do you need to predeclare %.*s?)\n",
405 t - PL_oldoldbufptr, PL_oldoldbufptr);
406 }
407 else {
408 assert(s >= oldbp);
409 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
410 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
411 }
07c798fb 412 }
3280af22 413 PL_bufptr = oldbp;
8990e307
LW
414}
415
ffb4593c
NT
416/*
417 * S_missingterm
418 * Complain about missing quote/regexp/heredoc terminator.
419 * If it's called with (char *)NULL then it cauterizes the line buffer.
420 * If we're in a delimited string and the delimiter is a control
421 * character, it's reformatted into a two-char sequence like ^C.
422 * This is fatal.
423 */
424
76e3520e 425STATIC void
cea2e8a9 426S_missingterm(pTHX_ char *s)
8990e307
LW
427{
428 char tmpbuf[3];
429 char q;
430 if (s) {
431 char *nl = strrchr(s,'\n');
d2719217 432 if (nl)
8990e307
LW
433 *nl = '\0';
434 }
9d116dd7
JH
435 else if (
436#ifdef EBCDIC
437 iscntrl(PL_multi_close)
438#else
439 PL_multi_close < 32 || PL_multi_close == 127
440#endif
441 ) {
8990e307 442 *tmpbuf = '^';
3280af22 443 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
444 tmpbuf[2] = '\0';
445 s = tmpbuf;
446 }
447 else {
eb160463 448 *tmpbuf = (char)PL_multi_close;
8990e307
LW
449 tmpbuf[1] = '\0';
450 s = tmpbuf;
451 }
452 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 453 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 454}
79072805 455
ffb4593c
NT
456/*
457 * Perl_deprecate
ffb4593c
NT
458 */
459
79072805 460void
bfed75c6 461Perl_deprecate(pTHX_ const char *s)
a0d0e21e 462{
599cee73 463 if (ckWARN(WARN_DEPRECATED))
9014280d 464 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
465}
466
12bcd1a6 467void
bfed75c6 468Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
469{
470 /* This function should NOT be called for any new deprecated warnings */
471 /* Use Perl_deprecate instead */
472 /* */
473 /* It is here to maintain backward compatibility with the pre-5.8 */
474 /* warnings category hierarchy. The "deprecated" category used to */
475 /* live under the "syntax" category. It is now a top-level category */
476 /* in its own right. */
477
478 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 479 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
480 "Use of %s is deprecated", s);
481}
482
ffb4593c
NT
483/*
484 * depcom
9cbb5ea2 485 * Deprecate a comma-less variable list.
ffb4593c
NT
486 */
487
76e3520e 488STATIC void
cea2e8a9 489S_depcom(pTHX)
a0d0e21e 490{
12bcd1a6 491 deprecate_old("comma-less variable list");
a0d0e21e
LW
492}
493
ffb4593c 494/*
9cbb5ea2
GS
495 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
496 * utf16-to-utf8-reversed.
ffb4593c
NT
497 */
498
c39cd008
GS
499#ifdef PERL_CR_FILTER
500static void
501strip_return(SV *sv)
502{
95a20fc0 503 register const char *s = SvPVX_const(sv);
f54cb97a 504 register const char *e = s + SvCUR(sv);
c39cd008
GS
505 /* outer loop optimized to do nothing if there are no CR-LFs */
506 while (s < e) {
507 if (*s++ == '\r' && *s == '\n') {
508 /* hit a CR-LF, need to copy the rest */
509 register char *d = s - 1;
510 *d++ = *s++;
511 while (s < e) {
512 if (*s == '\r' && s[1] == '\n')
513 s++;
514 *d++ = *s++;
515 }
516 SvCUR(sv) -= s - d;
517 return;
518 }
519 }
520}
a868473f 521
76e3520e 522STATIC I32
c39cd008 523S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 524{
f54cb97a 525 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
526 if (count > 0 && !maxlen)
527 strip_return(sv);
528 return count;
a868473f
NIS
529}
530#endif
531
ffb4593c
NT
532/*
533 * Perl_lex_start
9cbb5ea2
GS
534 * Initialize variables. Uses the Perl save_stack to save its state (for
535 * recursive calls to the parser).
ffb4593c
NT
536 */
537
a0d0e21e 538void
864dbfa3 539Perl_lex_start(pTHX_ SV *line)
79072805 540{
8990e307
LW
541 char *s;
542 STRLEN len;
543
3280af22
NIS
544 SAVEI32(PL_lex_dojoin);
545 SAVEI32(PL_lex_brackets);
3280af22
NIS
546 SAVEI32(PL_lex_casemods);
547 SAVEI32(PL_lex_starts);
548 SAVEI32(PL_lex_state);
7766f137 549 SAVEVPTR(PL_lex_inpat);
3280af22 550 SAVEI32(PL_lex_inwhat);
18b09519
GS
551 if (PL_lex_state == LEX_KNOWNEXT) {
552 I32 toke = PL_nexttoke;
553 while (--toke >= 0) {
554 SAVEI32(PL_nexttype[toke]);
555 SAVEVPTR(PL_nextval[toke]);
556 }
557 SAVEI32(PL_nexttoke);
18b09519 558 }
57843af0 559 SAVECOPLINE(PL_curcop);
3280af22
NIS
560 SAVEPPTR(PL_bufptr);
561 SAVEPPTR(PL_bufend);
562 SAVEPPTR(PL_oldbufptr);
563 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
564 SAVEPPTR(PL_last_lop);
565 SAVEPPTR(PL_last_uni);
3280af22
NIS
566 SAVEPPTR(PL_linestart);
567 SAVESPTR(PL_linestr);
8edd5f42
RGS
568 SAVEGENERICPV(PL_lex_brackstack);
569 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 570 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
571 SAVESPTR(PL_lex_stuff);
572 SAVEI32(PL_lex_defer);
09bef843 573 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 574 SAVESPTR(PL_lex_repl);
bebdddfc
GS
575 SAVEINT(PL_expect);
576 SAVEINT(PL_lex_expect);
3280af22
NIS
577
578 PL_lex_state = LEX_NORMAL;
579 PL_lex_defer = 0;
580 PL_expect = XSTATE;
581 PL_lex_brackets = 0;
3280af22
NIS
582 New(899, PL_lex_brackstack, 120, char);
583 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
584 PL_lex_casemods = 0;
585 *PL_lex_casestack = '\0';
586 PL_lex_dojoin = 0;
587 PL_lex_starts = 0;
588 PL_lex_stuff = Nullsv;
589 PL_lex_repl = Nullsv;
590 PL_lex_inpat = 0;
76be56bc 591 PL_nexttoke = 0;
3280af22 592 PL_lex_inwhat = 0;
09bef843 593 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
594 PL_linestr = line;
595 if (SvREADONLY(PL_linestr))
596 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
597 s = SvPV(PL_linestr, len);
6f27f9a7 598 if (!len || s[len-1] != ';') {
3280af22
NIS
599 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
600 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
601 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 602 }
3280af22
NIS
603 SvTEMP_off(PL_linestr);
604 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
605 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 606 PL_last_lop = PL_last_uni = Nullch;
3280af22 607 PL_rsfp = 0;
79072805 608}
a687059c 609
ffb4593c
NT
610/*
611 * Perl_lex_end
9cbb5ea2
GS
612 * Finalizer for lexing operations. Must be called when the parser is
613 * done with the lexer.
ffb4593c
NT
614 */
615
463ee0b2 616void
864dbfa3 617Perl_lex_end(pTHX)
463ee0b2 618{
3280af22 619 PL_doextract = FALSE;
463ee0b2
LW
620}
621
ffb4593c
NT
622/*
623 * S_incline
624 * This subroutine has nothing to do with tilting, whether at windmills
625 * or pinball tables. Its name is short for "increment line". It
57843af0 626 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 627 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
628 * # line 500 "foo.pm"
629 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
630 */
631
76e3520e 632STATIC void
cea2e8a9 633S_incline(pTHX_ char *s)
463ee0b2
LW
634{
635 char *t;
636 char *n;
73659bf1 637 char *e;
463ee0b2 638 char ch;
463ee0b2 639
57843af0 640 CopLINE_inc(PL_curcop);
463ee0b2
LW
641 if (*s++ != '#')
642 return;
bf4acbe4 643 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
644 if (strnEQ(s, "line", 4))
645 s += 4;
646 else
647 return;
084592ab 648 if (SPACE_OR_TAB(*s))
73659bf1 649 s++;
4e553d73 650 else
73659bf1 651 return;
bf4acbe4 652 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
653 if (!isDIGIT(*s))
654 return;
655 n = s;
656 while (isDIGIT(*s))
657 s++;
bf4acbe4 658 while (SPACE_OR_TAB(*s))
463ee0b2 659 s++;
73659bf1 660 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 661 s++;
73659bf1
GS
662 e = t + 1;
663 }
463ee0b2 664 else {
463ee0b2 665 for (t = s; !isSPACE(*t); t++) ;
73659bf1 666 e = t;
463ee0b2 667 }
bf4acbe4 668 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
669 e++;
670 if (*e != '\n' && *e != '\0')
671 return; /* false alarm */
672
463ee0b2
LW
673 ch = *t;
674 *t = '\0';
f4dd75d9 675 if (t - s > 0) {
05ec9bb3 676 CopFILE_free(PL_curcop);
57843af0 677 CopFILE_set(PL_curcop, s);
f4dd75d9 678 }
463ee0b2 679 *t = ch;
57843af0 680 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
681}
682
ffb4593c
NT
683/*
684 * S_skipspace
685 * Called to gobble the appropriate amount and type of whitespace.
686 * Skips comments as well.
687 */
688
76e3520e 689STATIC char *
cea2e8a9 690S_skipspace(pTHX_ register char *s)
a687059c 691{
3280af22 692 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 693 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
694 s++;
695 return s;
696 }
697 for (;;) {
fd049845 698 STRLEN prevlen;
09bef843 699 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 700 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
701 while (s < PL_bufend && isSPACE(*s)) {
702 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
703 incline(s);
704 }
ffb4593c
NT
705
706 /* comment */
3280af22
NIS
707 if (s < PL_bufend && *s == '#') {
708 while (s < PL_bufend && *s != '\n')
463ee0b2 709 s++;
60e6418e 710 if (s < PL_bufend) {
463ee0b2 711 s++;
60e6418e
GS
712 if (PL_in_eval && !PL_rsfp) {
713 incline(s);
714 continue;
715 }
716 }
463ee0b2 717 }
ffb4593c
NT
718
719 /* only continue to recharge the buffer if we're at the end
720 * of the buffer, we're not reading from a source filter, and
721 * we're in normal lexing mode
722 */
09bef843
SB
723 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
724 PL_lex_state == LEX_FORMLINE)
463ee0b2 725 return s;
ffb4593c
NT
726
727 /* try to recharge the buffer */
9cbb5ea2
GS
728 if ((s = filter_gets(PL_linestr, PL_rsfp,
729 (prevlen = SvCUR(PL_linestr)))) == Nullch)
730 {
731 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
732 if (PL_minus_p) {
733 sv_setpv(PL_linestr,
734 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 735 PL_minus_n = PL_minus_p = 0;
a0d0e21e 736 }
01a19ab0
NC
737 else if (PL_minus_n) {
738 sv_setpvn(PL_linestr, ";}", 2);
739 PL_minus_n = 0;
740 }
a0d0e21e 741 else
4147a61b 742 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
743
744 /* reset variables for next time we lex */
9cbb5ea2
GS
745 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
746 = SvPVX(PL_linestr);
3280af22 747 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 748 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
749
750 /* Close the filehandle. Could be from -P preprocessor,
751 * STDIN, or a regular file. If we were reading code from
752 * STDIN (because the commandline held no -e or filename)
753 * then we don't close it, we reset it so the code can
754 * read from STDIN too.
755 */
756
3280af22
NIS
757 if (PL_preprocess && !PL_in_eval)
758 (void)PerlProc_pclose(PL_rsfp);
759 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
760 PerlIO_clearerr(PL_rsfp);
8990e307 761 else
3280af22
NIS
762 (void)PerlIO_close(PL_rsfp);
763 PL_rsfp = Nullfp;
463ee0b2
LW
764 return s;
765 }
ffb4593c
NT
766
767 /* not at end of file, so we only read another line */
09bef843
SB
768 /* make corresponding updates to old pointers, for yyerror() */
769 oldprevlen = PL_oldbufptr - PL_bufend;
770 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
771 if (PL_last_uni)
772 oldunilen = PL_last_uni - PL_bufend;
773 if (PL_last_lop)
774 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
775 PL_linestart = PL_bufptr = s + prevlen;
776 PL_bufend = s + SvCUR(PL_linestr);
777 s = PL_bufptr;
09bef843
SB
778 PL_oldbufptr = s + oldprevlen;
779 PL_oldoldbufptr = s + oldoldprevlen;
780 if (PL_last_uni)
781 PL_last_uni = s + oldunilen;
782 if (PL_last_lop)
783 PL_last_lop = s + oldloplen;
a0d0e21e 784 incline(s);
ffb4593c
NT
785
786 /* debugger active and we're not compiling the debugger code,
787 * so store the line into the debugger's array of lines
788 */
3280af22 789 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
790 SV *sv = NEWSV(85,0);
791
792 sv_upgrade(sv, SVt_PVMG);
3280af22 793 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 794 (void)SvIOK_on(sv);
45977657 795 SvIV_set(sv, 0);
57843af0 796 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 797 }
463ee0b2 798 }
a687059c 799}
378cc40b 800
ffb4593c
NT
801/*
802 * S_check_uni
803 * Check the unary operators to ensure there's no ambiguity in how they're
804 * used. An ambiguous piece of code would be:
805 * rand + 5
806 * This doesn't mean rand() + 5. Because rand() is a unary operator,
807 * the +5 is its argument.
808 */
809
76e3520e 810STATIC void
cea2e8a9 811S_check_uni(pTHX)
ba106d47 812{
2f3197b3 813 char *s;
a0d0e21e 814 char *t;
2f3197b3 815
3280af22 816 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 817 return;
3280af22
NIS
818 while (isSPACE(*PL_last_uni))
819 PL_last_uni++;
7e2040f0 820 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 821 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 822 return;
0453d815 823 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 824 char ch = *s;
0453d815 825 *s = '\0';
9014280d 826 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 827 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
828 PL_last_uni);
829 *s = ch;
830 }
2f3197b3
LW
831}
832
ffb4593c
NT
833/*
834 * LOP : macro to build a list operator. Its behaviour has been replaced
835 * with a subroutine, S_lop() for which LOP is just another name.
836 */
837
a0d0e21e
LW
838#define LOP(f,x) return lop(f,x,s)
839
ffb4593c
NT
840/*
841 * S_lop
842 * Build a list operator (or something that might be one). The rules:
843 * - if we have a next token, then it's a list operator [why?]
844 * - if the next thing is an opening paren, then it's a function
845 * - else it's a list operator
846 */
847
76e3520e 848STATIC I32
a0be28da 849S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 850{
79072805 851 yylval.ival = f;
35c8bce7 852 CLINE;
3280af22
NIS
853 PL_expect = x;
854 PL_bufptr = s;
855 PL_last_lop = PL_oldbufptr;
eb160463 856 PL_last_lop_op = (OPCODE)f;
3280af22 857 if (PL_nexttoke)
bbf60fe6 858 return REPORT(LSTOP);
79072805 859 if (*s == '(')
bbf60fe6 860 return REPORT(FUNC);
79072805
LW
861 s = skipspace(s);
862 if (*s == '(')
bbf60fe6 863 return REPORT(FUNC);
79072805 864 else
bbf60fe6 865 return REPORT(LSTOP);
79072805
LW
866}
867
ffb4593c
NT
868/*
869 * S_force_next
9cbb5ea2 870 * When the lexer realizes it knows the next token (for instance,
ffb4593c 871 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
872 * to know what token to return the next time the lexer is called. Caller
873 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
874 * handles the token correctly.
ffb4593c
NT
875 */
876
4e553d73 877STATIC void
cea2e8a9 878S_force_next(pTHX_ I32 type)
79072805 879{
3280af22
NIS
880 PL_nexttype[PL_nexttoke] = type;
881 PL_nexttoke++;
882 if (PL_lex_state != LEX_KNOWNEXT) {
883 PL_lex_defer = PL_lex_state;
884 PL_lex_expect = PL_expect;
885 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
886 }
887}
888
d0a148a6
NC
889STATIC SV *
890S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
891{
892 SV *sv = newSVpvn(start,len);
bfed75c6 893 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
894 SvUTF8_on(sv);
895 return sv;
896}
897
ffb4593c
NT
898/*
899 * S_force_word
900 * When the lexer knows the next thing is a word (for instance, it has
901 * just seen -> and it knows that the next char is a word char, then
902 * it calls S_force_word to stick the next word into the PL_next lookahead.
903 *
904 * Arguments:
b1b65b59 905 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
906 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
907 * int check_keyword : if true, Perl checks to make sure the word isn't
908 * a keyword (do this if the word is a label, e.g. goto FOO)
909 * int allow_pack : if true, : characters will also be allowed (require,
910 * use, etc. do this)
9cbb5ea2 911 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
912 */
913
76e3520e 914STATIC char *
cea2e8a9 915S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 916{
463ee0b2
LW
917 register char *s;
918 STRLEN len;
4e553d73 919
463ee0b2
LW
920 start = skipspace(start);
921 s = start;
7e2040f0 922 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 923 (allow_pack && *s == ':') ||
15f0808c 924 (allow_initial_tick && *s == '\'') )
a0d0e21e 925 {
3280af22
NIS
926 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
927 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
928 return start;
929 if (token == METHOD) {
930 s = skipspace(s);
931 if (*s == '(')
3280af22 932 PL_expect = XTERM;
463ee0b2 933 else {
3280af22 934 PL_expect = XOPERATOR;
463ee0b2 935 }
79072805 936 }
d0a148a6
NC
937 PL_nextval[PL_nexttoke].opval
938 = (OP*)newSVOP(OP_CONST,0,
939 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 940 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
941 force_next(token);
942 }
943 return s;
944}
945
ffb4593c
NT
946/*
947 * S_force_ident
9cbb5ea2 948 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
949 * text only contains the "foo" portion. The first argument is a pointer
950 * to the "foo", and the second argument is the type symbol to prefix.
951 * Forces the next token to be a "WORD".
9cbb5ea2 952 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
953 */
954
76e3520e 955STATIC void
bfed75c6 956S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
957{
958 if (s && *s) {
bfed75c6 959 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 960 PL_nextval[PL_nexttoke].opval = o;
79072805 961 force_next(WORD);
748a9306 962 if (kind) {
11343788 963 o->op_private = OPpCONST_ENTERED;
55497cff
PP
964 /* XXX see note in pp_entereval() for why we forgo typo
965 warnings if the symbol must be introduced in an eval.
966 GSAR 96-10-12 */
3280af22 967 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
968 kind == '$' ? SVt_PV :
969 kind == '@' ? SVt_PVAV :
970 kind == '%' ? SVt_PVHV :
971 SVt_PVGV
972 );
748a9306 973 }
79072805
LW
974 }
975}
976
1571675a
GS
977NV
978Perl_str_to_version(pTHX_ SV *sv)
979{
980 NV retval = 0.0;
981 NV nshift = 1.0;
982 STRLEN len;
e62f0680 983 const char *start = SvPVx_const(sv,len);
f54cb97a 984 const char *end = start + len;
504618e9 985 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 986 while (start < end) {
ba210ebe 987 STRLEN skip;
1571675a
GS
988 UV n;
989 if (utf)
9041c2e3 990 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
991 else {
992 n = *(U8*)start;
993 skip = 1;
994 }
995 retval += ((NV)n)/nshift;
996 start += skip;
997 nshift *= 1000;
998 }
999 return retval;
1000}
1001
4e553d73 1002/*
ffb4593c
NT
1003 * S_force_version
1004 * Forces the next token to be a version number.
e759cc13
RGS
1005 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1006 * and if "guessing" is TRUE, then no new token is created (and the caller
1007 * must use an alternative parsing method).
ffb4593c
NT
1008 */
1009
76e3520e 1010STATIC char *
e759cc13 1011S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
1012{
1013 OP *version = Nullop;
44dcb63b 1014 char *d;
89bfa8cd
PP
1015
1016 s = skipspace(s);
1017
44dcb63b 1018 d = s;
dd629d5b 1019 if (*d == 'v')
44dcb63b 1020 d++;
44dcb63b 1021 if (isDIGIT(*d)) {
e759cc13
RGS
1022 while (isDIGIT(*d) || *d == '_' || *d == '.')
1023 d++;
9f3d182e 1024 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1025 SV *ver;
b73d6f50 1026 s = scan_num(s, &yylval);
89bfa8cd 1027 version = yylval.opval;
dd629d5b
GS
1028 ver = cSVOPx(version)->op_sv;
1029 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1030 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1031 SvNV_set(ver, str_to_version(ver));
1571675a 1032 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1033 }
89bfa8cd 1034 }
e759cc13
RGS
1035 else if (guessing)
1036 return s;
89bfa8cd
PP
1037 }
1038
1039 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1040 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1041 force_next(WORD);
89bfa8cd 1042
e759cc13 1043 return s;
89bfa8cd
PP
1044}
1045
ffb4593c
NT
1046/*
1047 * S_tokeq
1048 * Tokenize a quoted string passed in as an SV. It finds the next
1049 * chunk, up to end of string or a backslash. It may make a new
1050 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1051 * turns \\ into \.
1052 */
1053
76e3520e 1054STATIC SV *
cea2e8a9 1055S_tokeq(pTHX_ SV *sv)
79072805
LW
1056{
1057 register char *s;
1058 register char *send;
1059 register char *d;
b3ac6de7
IZ
1060 STRLEN len = 0;
1061 SV *pv = sv;
79072805
LW
1062
1063 if (!SvLEN(sv))
b3ac6de7 1064 goto finish;
79072805 1065
a0d0e21e 1066 s = SvPV_force(sv, len);
21a311ee 1067 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1068 goto finish;
463ee0b2 1069 send = s + len;
79072805
LW
1070 while (s < send && *s != '\\')
1071 s++;
1072 if (s == send)
b3ac6de7 1073 goto finish;
79072805 1074 d = s;
be4731d2 1075 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1076 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1077 if (SvUTF8(sv))
1078 SvUTF8_on(pv);
1079 }
79072805
LW
1080 while (s < send) {
1081 if (*s == '\\') {
a0d0e21e 1082 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1083 s++; /* all that, just for this */
1084 }
1085 *d++ = *s++;
1086 }
1087 *d = '\0';
95a20fc0 1088 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1089 finish:
3280af22 1090 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1091 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1092 return sv;
1093}
1094
ffb4593c
NT
1095/*
1096 * Now come three functions related to double-quote context,
1097 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1098 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1099 * interact with PL_lex_state, and create fake ( ... ) argument lists
1100 * to handle functions and concatenation.
1101 * They assume that whoever calls them will be setting up a fake
1102 * join call, because each subthing puts a ',' after it. This lets
1103 * "lower \luPpEr"
1104 * become
1105 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1106 *
1107 * (I'm not sure whether the spurious commas at the end of lcfirst's
1108 * arguments and join's arguments are created or not).
1109 */
1110
1111/*
1112 * S_sublex_start
1113 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1114 *
1115 * Pattern matching will set PL_lex_op to the pattern-matching op to
1116 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1117 *
1118 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1119 *
1120 * Everything else becomes a FUNC.
1121 *
1122 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1123 * had an OP_CONST or OP_READLINE). This just sets us up for a
1124 * call to S_sublex_push().
1125 */
1126
76e3520e 1127STATIC I32
cea2e8a9 1128S_sublex_start(pTHX)
79072805 1129{
f54cb97a 1130 const register I32 op_type = yylval.ival;
79072805
LW
1131
1132 if (op_type == OP_NULL) {
3280af22
NIS
1133 yylval.opval = PL_lex_op;
1134 PL_lex_op = Nullop;
79072805
LW
1135 return THING;
1136 }
1137 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1138 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1139
1140 if (SvTYPE(sv) == SVt_PVIV) {
1141 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1142 STRLEN len;
f54cb97a
AL
1143 const char *p = SvPV(sv, len);
1144 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1145 if (SvUTF8(sv))
1146 SvUTF8_on(nsv);
b3ac6de7
IZ
1147 SvREFCNT_dec(sv);
1148 sv = nsv;
4e553d73 1149 }
b3ac6de7 1150 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1151 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1152 /* Allow <FH> // "foo" */
1153 if (op_type == OP_READLINE)
1154 PL_expect = XTERMORDORDOR;
79072805
LW
1155 return THING;
1156 }
1157
3280af22
NIS
1158 PL_sublex_info.super_state = PL_lex_state;
1159 PL_sublex_info.sub_inwhat = op_type;
1160 PL_sublex_info.sub_op = PL_lex_op;
1161 PL_lex_state = LEX_INTERPPUSH;
55497cff 1162
3280af22
NIS
1163 PL_expect = XTERM;
1164 if (PL_lex_op) {
1165 yylval.opval = PL_lex_op;
1166 PL_lex_op = Nullop;
55497cff
PP
1167 return PMFUNC;
1168 }
1169 else
1170 return FUNC;
1171}
1172
ffb4593c
NT
1173/*
1174 * S_sublex_push
1175 * Create a new scope to save the lexing state. The scope will be
1176 * ended in S_sublex_done. Returns a '(', starting the function arguments
1177 * to the uc, lc, etc. found before.
1178 * Sets PL_lex_state to LEX_INTERPCONCAT.
1179 */
1180
76e3520e 1181STATIC I32
cea2e8a9 1182S_sublex_push(pTHX)
55497cff 1183{
27da23d5 1184 dVAR;
f46d017c 1185 ENTER;
55497cff 1186
3280af22
NIS
1187 PL_lex_state = PL_sublex_info.super_state;
1188 SAVEI32(PL_lex_dojoin);
1189 SAVEI32(PL_lex_brackets);
3280af22
NIS
1190 SAVEI32(PL_lex_casemods);
1191 SAVEI32(PL_lex_starts);
1192 SAVEI32(PL_lex_state);
7766f137 1193 SAVEVPTR(PL_lex_inpat);
3280af22 1194 SAVEI32(PL_lex_inwhat);
57843af0 1195 SAVECOPLINE(PL_curcop);
3280af22 1196 SAVEPPTR(PL_bufptr);
8452ff4b 1197 SAVEPPTR(PL_bufend);
3280af22
NIS
1198 SAVEPPTR(PL_oldbufptr);
1199 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1200 SAVEPPTR(PL_last_lop);
1201 SAVEPPTR(PL_last_uni);
3280af22
NIS
1202 SAVEPPTR(PL_linestart);
1203 SAVESPTR(PL_linestr);
8edd5f42
RGS
1204 SAVEGENERICPV(PL_lex_brackstack);
1205 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1206
1207 PL_linestr = PL_lex_stuff;
1208 PL_lex_stuff = Nullsv;
1209
9cbb5ea2
GS
1210 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1211 = SvPVX(PL_linestr);
3280af22 1212 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1213 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1214 SAVEFREESV(PL_linestr);
1215
1216 PL_lex_dojoin = FALSE;
1217 PL_lex_brackets = 0;
3280af22
NIS
1218 New(899, PL_lex_brackstack, 120, char);
1219 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1220 PL_lex_casemods = 0;
1221 *PL_lex_casestack = '\0';
1222 PL_lex_starts = 0;
1223 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1224 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1225
1226 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1227 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1228 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1229 else
3280af22 1230 PL_lex_inpat = Nullop;
79072805 1231
55497cff 1232 return '(';
79072805
LW
1233}
1234
ffb4593c
NT
1235/*
1236 * S_sublex_done
1237 * Restores lexer state after a S_sublex_push.
1238 */
1239
76e3520e 1240STATIC I32
cea2e8a9 1241S_sublex_done(pTHX)
79072805 1242{
27da23d5 1243 dVAR;
3280af22 1244 if (!PL_lex_starts++) {
9aa983d2
JH
1245 SV *sv = newSVpvn("",0);
1246 if (SvUTF8(PL_linestr))
1247 SvUTF8_on(sv);
3280af22 1248 PL_expect = XOPERATOR;
9aa983d2 1249 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1250 return THING;
1251 }
1252
3280af22
NIS
1253 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1254 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1255 return yylex();
79072805
LW
1256 }
1257
ffb4593c 1258 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1259 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1260 PL_linestr = PL_lex_repl;
1261 PL_lex_inpat = 0;
1262 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1263 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1264 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1265 SAVEFREESV(PL_linestr);
1266 PL_lex_dojoin = FALSE;
1267 PL_lex_brackets = 0;
3280af22
NIS
1268 PL_lex_casemods = 0;
1269 *PL_lex_casestack = '\0';
1270 PL_lex_starts = 0;
25da4f38 1271 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1272 PL_lex_state = LEX_INTERPNORMAL;
1273 PL_lex_starts++;
e9fa98b2
HS
1274 /* we don't clear PL_lex_repl here, so that we can check later
1275 whether this is an evalled subst; that means we rely on the
1276 logic to ensure sublex_done() is called again only via the
1277 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1278 }
e9fa98b2 1279 else {
3280af22 1280 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1281 PL_lex_repl = Nullsv;
1282 }
79072805 1283 return ',';
ffed7fef
LW
1284 }
1285 else {
f46d017c 1286 LEAVE;
3280af22
NIS
1287 PL_bufend = SvPVX(PL_linestr);
1288 PL_bufend += SvCUR(PL_linestr);
1289 PL_expect = XOPERATOR;
09bef843 1290 PL_sublex_info.sub_inwhat = 0;
79072805 1291 return ')';
ffed7fef
LW
1292 }
1293}
1294
02aa26ce
NT
1295/*
1296 scan_const
1297
1298 Extracts a pattern, double-quoted string, or transliteration. This
1299 is terrifying code.
1300
3280af22
NIS
1301 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1302 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1303 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1304
9b599b2a
GS
1305 Returns a pointer to the character scanned up to. Iff this is
1306 advanced from the start pointer supplied (ie if anything was
1307 successfully parsed), will leave an OP for the substring scanned
1308 in yylval. Caller must intuit reason for not parsing further
1309 by looking at the next characters herself.
1310
02aa26ce
NT
1311 In patterns:
1312 backslashes:
1313 double-quoted style: \r and \n
1314 regexp special ones: \D \s
1315 constants: \x3
1316 backrefs: \1 (deprecated in substitution replacements)
1317 case and quoting: \U \Q \E
1318 stops on @ and $, but not for $ as tail anchor
1319
1320 In transliterations:
1321 characters are VERY literal, except for - not at the start or end
1322 of the string, which indicates a range. scan_const expands the
1323 range to the full set of intermediate characters.
1324
1325 In double-quoted strings:
1326 backslashes:
1327 double-quoted style: \r and \n
1328 constants: \x3
1329 backrefs: \1 (deprecated)
1330 case and quoting: \U \Q \E
1331 stops on @ and $
1332
1333 scan_const does *not* construct ops to handle interpolated strings.
1334 It stops processing as soon as it finds an embedded $ or @ variable
1335 and leaves it to the caller to work out what's going on.
1336
da6eedaa 1337 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1338
1339 $ in pattern could be $foo or could be tail anchor. Assumption:
1340 it's a tail anchor if $ is the last thing in the string, or if it's
1341 followed by one of ")| \n\t"
1342
1343 \1 (backreferences) are turned into $1
1344
1345 The structure of the code is
1346 while (there's a character to process) {
1347 handle transliteration ranges
1348 skip regexp comments
1349 skip # initiated comments in //x patterns
1350 check for embedded @foo
1351 check for embedded scalars
1352 if (backslash) {
1353 leave intact backslashes from leave (below)
1354 deprecate \1 in strings and sub replacements
1355 handle string-changing backslashes \l \U \Q \E, etc.
1356 switch (what was escaped) {
1357 handle - in a transliteration (becomes a literal -)
1358 handle \132 octal characters
1359 handle 0x15 hex characters
1360 handle \cV (control V)
1361 handle printf backslashes (\f, \r, \n, etc)
1362 } (end switch)
1363 } (end if backslash)
1364 } (end while character to read)
4e553d73 1365
02aa26ce
NT
1366*/
1367
76e3520e 1368STATIC char *
cea2e8a9 1369S_scan_const(pTHX_ char *start)
79072805 1370{
3280af22 1371 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1372 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1373 register char *s = start; /* start of the constant */
1374 register char *d = SvPVX(sv); /* destination for copies */
1375 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1376 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1377 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1378 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1379 UV uv;
1380
dff6d3cd 1381 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1382 PL_lex_inpat
b6d5fef8 1383 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1384 : "";
79072805 1385
2b9d42f0
NIS
1386 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1387 /* If we are doing a trans and we know we want UTF8 set expectation */
1388 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1389 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1390 }
1391
1392
79072805 1393 while (s < send || dorange) {
02aa26ce 1394 /* get transliterations out of the way (they're most literal) */
3280af22 1395 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1396 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1397 if (dorange) {
1ba5c669
JH
1398 I32 i; /* current expanded character */
1399 I32 min; /* first character in range */
1400 I32 max; /* last character in range */
02aa26ce 1401
2b9d42f0 1402 if (has_utf8) {
8973db79
JH
1403 char *c = (char*)utf8_hop((U8*)d, -1);
1404 char *e = d++;
1405 while (e-- > c)
1406 *(e + 1) = *e;
25716404 1407 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1408 /* mark the range as done, and continue */
1409 dorange = FALSE;
1410 didrange = TRUE;
1411 continue;
1412 }
2b9d42f0 1413
95a20fc0 1414 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1415 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1416 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1417 d -= 2; /* eat the first char and the - */
1418
8ada0baa
JH
1419 min = (U8)*d; /* first char in range */
1420 max = (U8)d[1]; /* last char in range */
1421
c2e66d9e 1422 if (min > max) {
01ec43d0 1423 Perl_croak(aTHX_
d1573ac7 1424 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1425 (char)min, (char)max);
c2e66d9e
GS
1426 }
1427
c7f1f016 1428#ifdef EBCDIC
8ada0baa
JH
1429 if ((isLOWER(min) && isLOWER(max)) ||
1430 (isUPPER(min) && isUPPER(max))) {
1431 if (isLOWER(min)) {
1432 for (i = min; i <= max; i++)
1433 if (isLOWER(i))
db42d148 1434 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1435 } else {
1436 for (i = min; i <= max; i++)
1437 if (isUPPER(i))
db42d148 1438 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1439 }
1440 }
1441 else
1442#endif
1443 for (i = min; i <= max; i++)
eb160463 1444 *d++ = (char)i;
02aa26ce
NT
1445
1446 /* mark the range as done, and continue */
79072805 1447 dorange = FALSE;
01ec43d0 1448 didrange = TRUE;
79072805 1449 continue;
4e553d73 1450 }
02aa26ce
NT
1451
1452 /* range begins (ignore - as first or last char) */
79072805 1453 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1454 if (didrange) {
1fafa243 1455 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1456 }
2b9d42f0 1457 if (has_utf8) {
25716404 1458 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1459 s++;
1460 continue;
1461 }
79072805
LW
1462 dorange = TRUE;
1463 s++;
01ec43d0
GS
1464 }
1465 else {
1466 didrange = FALSE;
1467 }
79072805 1468 }
02aa26ce
NT
1469
1470 /* if we get here, we're not doing a transliteration */
1471
0f5d15d6
IZ
1472 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1473 except for the last char, which will be done separately. */
3280af22 1474 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1475 if (s[2] == '#') {
e994fd66 1476 while (s+1 < send && *s != ')')
db42d148 1477 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1478 }
1479 else if (s[2] == '{' /* This should match regcomp.c */
1480 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1481 {
cc6b7395 1482 I32 count = 1;
0f5d15d6 1483 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1484 char c;
1485
d9f97599
GS
1486 while (count && (c = *regparse)) {
1487 if (c == '\\' && regparse[1])
1488 regparse++;
4e553d73 1489 else if (c == '{')
cc6b7395 1490 count++;
4e553d73 1491 else if (c == '}')
cc6b7395 1492 count--;
d9f97599 1493 regparse++;
cc6b7395 1494 }
e994fd66 1495 if (*regparse != ')')
5bdf89e7 1496 regparse--; /* Leave one char for continuation. */
0f5d15d6 1497 while (s < regparse)
db42d148 1498 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1499 }
748a9306 1500 }
02aa26ce
NT
1501
1502 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1503 else if (*s == '#' && PL_lex_inpat &&
1504 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1505 while (s+1 < send && *s != '\n')
db42d148 1506 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1507 }
02aa26ce 1508
5d1d4326 1509 /* check for embedded arrays
da6eedaa 1510 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1511 */
7e2040f0 1512 else if (*s == '@' && s[1]
5d1d4326 1513 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1514 break;
02aa26ce
NT
1515
1516 /* check for embedded scalars. only stop if we're sure it's a
1517 variable.
1518 */
79072805 1519 else if (*s == '$') {
3280af22 1520 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1521 break;
6002328a 1522 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1523 break; /* in regexp, $ might be tail anchor */
1524 }
02aa26ce 1525
2b9d42f0
NIS
1526 /* End of else if chain - OP_TRANS rejoin rest */
1527
02aa26ce 1528 /* backslashes */
79072805
LW
1529 if (*s == '\\' && s+1 < send) {
1530 s++;
02aa26ce
NT
1531
1532 /* some backslashes we leave behind */
c9f97d15 1533 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1534 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1535 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1536 continue;
1537 }
02aa26ce
NT
1538
1539 /* deprecate \1 in strings and substitution replacements */
3280af22 1540 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1541 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1542 {
599cee73 1543 if (ckWARN(WARN_SYNTAX))
9014280d 1544 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1545 *--s = '$';
1546 break;
1547 }
02aa26ce
NT
1548
1549 /* string-change backslash escapes */
3280af22 1550 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1551 --s;
1552 break;
1553 }
02aa26ce
NT
1554
1555 /* if we get here, it's either a quoted -, or a digit */
79072805 1556 switch (*s) {
02aa26ce
NT
1557
1558 /* quoted - in transliterations */
79072805 1559 case '-':
3280af22 1560 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1561 *d++ = *s++;
1562 continue;
1563 }
1564 /* FALL THROUGH */
1565 default:
11b8faa4 1566 {
707afd92 1567 if (ckWARN(WARN_MISC) &&
bfed75c6 1568 isALNUM(*s) &&
707afd92 1569 *s != '_')
9014280d 1570 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1571 "Unrecognized escape \\%c passed through",
1572 *s);
1573 /* default action is to copy the quoted character */
f9a63242 1574 goto default_action;
11b8faa4 1575 }
02aa26ce
NT
1576
1577 /* \132 indicates an octal constant */
79072805
LW
1578 case '0': case '1': case '2': case '3':
1579 case '4': case '5': case '6': case '7':
ba210ebe 1580 {
53305cf1
NC
1581 I32 flags = 0;
1582 STRLEN len = 3;
1583 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1584 s += len;
1585 }
012bcf8d 1586 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1587
1588 /* \x24 indicates a hex constant */
79072805 1589 case 'x':
a0ed51b3
LW
1590 ++s;
1591 if (*s == '{') {
1592 char* e = strchr(s, '}');
a4c04bdc
NC
1593 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1594 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1595 STRLEN len;
355860ce 1596
53305cf1 1597 ++s;
adaeee49 1598 if (!e) {
a0ed51b3 1599 yyerror("Missing right brace on \\x{}");
355860ce 1600 continue;
ba210ebe 1601 }
53305cf1
NC
1602 len = e - s;
1603 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1604 s = e + 1;
a0ed51b3
LW
1605 }
1606 else {
ba210ebe 1607 {
53305cf1 1608 STRLEN len = 2;
a4c04bdc 1609 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1610 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1611 s += len;
1612 }
012bcf8d
GS
1613 }
1614
1615 NUM_ESCAPE_INSERT:
1616 /* Insert oct or hex escaped character.
301d3d20 1617 * There will always enough room in sv since such
db42d148 1618 * escapes will be longer than any UTF-8 sequence
301d3d20 1619 * they can end up as. */
ba7cea30 1620
c7f1f016
NIS
1621 /* We need to map to chars to ASCII before doing the tests
1622 to cover EBCDIC
1623 */
c4d5f83a 1624 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1625 if (!has_utf8 && uv > 255) {
301d3d20
JH
1626 /* Might need to recode whatever we have
1627 * accumulated so far if it contains any
1628 * hibit chars.
1629 *
1630 * (Can't we keep track of that and avoid
1631 * this rescan? --jhi)
012bcf8d 1632 */
c7f1f016 1633 int hicount = 0;
63cd0674
NIS
1634 U8 *c;
1635 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1636 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1637 hicount++;
db42d148 1638 }
012bcf8d 1639 }
63cd0674 1640 if (hicount) {
95a20fc0 1641 STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1642 U8 *src, *dst;
1643 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1644 src = (U8 *)d - 1;
1645 dst = src+hicount;
1646 d += hicount;
1647 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1648 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1649 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1650 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1651 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1652 }
1653 else {
63cd0674 1654 *dst-- = *src;
012bcf8d 1655 }
c7f1f016 1656 src--;
012bcf8d
GS
1657 }
1658 }
1659 }
1660
9aa983d2 1661 if (has_utf8 || uv > 255) {
9041c2e3 1662 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1663 has_utf8 = TRUE;
f9a63242
JH
1664 if (PL_lex_inwhat == OP_TRANS &&
1665 PL_sublex_info.sub_op) {
1666 PL_sublex_info.sub_op->op_private |=
1667 (PL_lex_repl ? OPpTRANS_FROM_UTF
1668 : OPpTRANS_TO_UTF);
f9a63242 1669 }
012bcf8d 1670 }
a0ed51b3 1671 else {
012bcf8d 1672 *d++ = (char)uv;
a0ed51b3 1673 }
012bcf8d
GS
1674 }
1675 else {
c4d5f83a 1676 *d++ = (char) uv;
a0ed51b3 1677 }
79072805 1678 continue;
02aa26ce 1679
b239daa5 1680 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1681 case 'N':
55eda711 1682 ++s;
423cee85
JH
1683 if (*s == '{') {
1684 char* e = strchr(s, '}');
155aba94 1685 SV *res;
423cee85
JH
1686 STRLEN len;
1687 char *str;
4e553d73 1688
423cee85 1689 if (!e) {
5777a3f7 1690 yyerror("Missing right brace on \\N{}");
423cee85
JH
1691 e = s - 1;
1692 goto cont_scan;
1693 }
dbc0d4f2
JH
1694 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1695 /* \N{U+...} */
1696 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1697 PERL_SCAN_DISALLOW_PREFIX;
1698 s += 3;
1699 len = e - s;
1700 uv = grok_hex(s, &len, &flags, NULL);
1701 s = e + 1;
1702 goto NUM_ESCAPE_INSERT;
1703 }
55eda711
JH
1704 res = newSVpvn(s + 1, e - s - 1);
1705 res = new_constant( Nullch, 0, "charnames",
1706 res, Nullsv, "\\N{...}" );
f9a63242
JH
1707 if (has_utf8)
1708 sv_utf8_upgrade(res);
423cee85 1709 str = SvPV(res,len);
1c47067b
JH
1710#ifdef EBCDIC_NEVER_MIND
1711 /* charnames uses pack U and that has been
1712 * recently changed to do the below uni->native
1713 * mapping, so this would be redundant (and wrong,
1714 * the code point would be doubly converted).
1715 * But leave this in just in case the pack U change
1716 * gets revoked, but the semantics is still
1717 * desireable for charnames. --jhi */
cddc7ef4
JH
1718 {
1719 UV uv = utf8_to_uvchr((U8*)str, 0);
1720
1721 if (uv < 0x100) {
89ebb4a3 1722 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1723
1724 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1725 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1726 str = SvPV(res, len);
1727 }
1728 }
1729#endif
89491803 1730 if (!has_utf8 && SvUTF8(res)) {
95a20fc0 1731 const char *ostart = SvPVX_const(sv);
f08d6ad9
GS
1732 SvCUR_set(sv, d - ostart);
1733 SvPOK_on(sv);
e4f3eed8 1734 *d = '\0';
f08d6ad9 1735 sv_utf8_upgrade(sv);
d2f449dd 1736 /* this just broke our allocation above... */
eb160463 1737 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1738 d = SvPVX(sv) + SvCUR(sv);
89491803 1739 has_utf8 = TRUE;
f08d6ad9 1740 }
eb160463 1741 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
95a20fc0 1742 const char *odest = SvPVX_const(sv);
423cee85 1743
8973db79 1744 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1745 d = SvPVX(sv) + (d - odest);
1746 }
1747 Copy(str, d, len, char);
1748 d += len;
1749 SvREFCNT_dec(res);
1750 cont_scan:
1751 s = e + 1;
1752 }
1753 else
5777a3f7 1754 yyerror("Missing braces on \\N{}");
423cee85
JH
1755 continue;
1756
02aa26ce 1757 /* \c is a control character */
79072805
LW
1758 case 'c':
1759 s++;
961ce445 1760 if (s < send) {
ba210ebe 1761 U8 c = *s++;
c7f1f016
NIS
1762#ifdef EBCDIC
1763 if (isLOWER(c))
1764 c = toUPPER(c);
1765#endif
db42d148 1766 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1767 }
961ce445
RGS
1768 else {
1769 yyerror("Missing control char name in \\c");
1770 }
79072805 1771 continue;
02aa26ce
NT
1772
1773 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1774 case 'b':
db42d148 1775 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1776 break;
1777 case 'n':
db42d148 1778 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1779 break;
1780 case 'r':
db42d148 1781 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1782 break;
1783 case 'f':
db42d148 1784 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1785 break;
1786 case 't':
db42d148 1787 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1788 break;
34a3fe2a 1789 case 'e':
db42d148 1790 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1791 break;
1792 case 'a':
db42d148 1793 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1794 break;
02aa26ce
NT
1795 } /* end switch */
1796
79072805
LW
1797 s++;
1798 continue;
02aa26ce
NT
1799 } /* end if (backslash) */
1800
f9a63242 1801 default_action:
2b9d42f0
NIS
1802 /* If we started with encoded form, or already know we want it
1803 and then encode the next character */
1804 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1805 STRLEN len = 1;
1806 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1807 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1808 s += len;
1809 if (need > len) {
1810 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
95a20fc0 1811 STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1812 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1813 }
1814 d = (char*)uvchr_to_utf8((U8*)d, uv);
1815 has_utf8 = TRUE;
1816 }
1817 else {
1818 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1819 }
02aa26ce
NT
1820 } /* while loop to process each character */
1821
1822 /* terminate the string and set up the sv */
79072805 1823 *d = '\0';
95a20fc0 1824 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1825 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1826 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1827
79072805 1828 SvPOK_on(sv);
9f4817db 1829 if (PL_encoding && !has_utf8) {
d0063567
DK
1830 sv_recode_to_utf8(sv, PL_encoding);
1831 if (SvUTF8(sv))
1832 has_utf8 = TRUE;
9f4817db 1833 }
2b9d42f0 1834 if (has_utf8) {
7e2040f0 1835 SvUTF8_on(sv);
2b9d42f0 1836 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1837 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1838 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1839 }
1840 }
79072805 1841
02aa26ce 1842 /* shrink the sv if we allocated more than we used */
79072805 1843 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1844 SvPV_shrink_to_cur(sv);
79072805 1845 }
02aa26ce 1846
9b599b2a 1847 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1848 if (s > PL_bufptr) {
1849 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1850 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1851 sv, Nullsv,
4e553d73 1852 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1853 ? "tr"
3280af22 1854 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1855 ? "s"
1856 : "qq")));
79072805 1857 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1858 } else
8990e307 1859 SvREFCNT_dec(sv);
79072805
LW
1860 return s;
1861}
1862
ffb4593c
NT
1863/* S_intuit_more
1864 * Returns TRUE if there's more to the expression (e.g., a subscript),
1865 * FALSE otherwise.
ffb4593c
NT
1866 *
1867 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1868 *
1869 * ->[ and ->{ return TRUE
1870 * { and [ outside a pattern are always subscripts, so return TRUE
1871 * if we're outside a pattern and it's not { or [, then return FALSE
1872 * if we're in a pattern and the first char is a {
1873 * {4,5} (any digits around the comma) returns FALSE
1874 * if we're in a pattern and the first char is a [
1875 * [] returns FALSE
1876 * [SOMETHING] has a funky algorithm to decide whether it's a
1877 * character class or not. It has to deal with things like
1878 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1879 * anything else returns TRUE
1880 */
1881
9cbb5ea2
GS
1882/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1883
76e3520e 1884STATIC int
cea2e8a9 1885S_intuit_more(pTHX_ register char *s)
79072805 1886{
3280af22 1887 if (PL_lex_brackets)
79072805
LW
1888 return TRUE;
1889 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1890 return TRUE;
1891 if (*s != '{' && *s != '[')
1892 return FALSE;
3280af22 1893 if (!PL_lex_inpat)
79072805
LW
1894 return TRUE;
1895
1896 /* In a pattern, so maybe we have {n,m}. */
1897 if (*s == '{') {
1898 s++;
1899 if (!isDIGIT(*s))
1900 return TRUE;
1901 while (isDIGIT(*s))
1902 s++;
1903 if (*s == ',')
1904 s++;
1905 while (isDIGIT(*s))
1906 s++;
1907 if (*s == '}')
1908 return FALSE;
1909 return TRUE;
1910
1911 }
1912
1913 /* On the other hand, maybe we have a character class */
1914
1915 s++;
1916 if (*s == ']' || *s == '^')
1917 return FALSE;
1918 else {
ffb4593c 1919 /* this is terrifying, and it works */
79072805
LW
1920 int weight = 2; /* let's weigh the evidence */
1921 char seen[256];
f27ffc4a 1922 unsigned char un_char = 255, last_un_char;
f54cb97a 1923 const char *send = strchr(s,']');
3280af22 1924 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1925
1926 if (!send) /* has to be an expression */
1927 return TRUE;
1928
1929 Zero(seen,256,char);
1930 if (*s == '$')
1931 weight -= 3;
1932 else if (isDIGIT(*s)) {
1933 if (s[1] != ']') {
1934 if (isDIGIT(s[1]) && s[2] == ']')
1935 weight -= 10;
1936 }
1937 else
1938 weight -= 100;
1939 }
1940 for (; s < send; s++) {
1941 last_un_char = un_char;
1942 un_char = (unsigned char)*s;
1943 switch (*s) {
1944 case '@':
1945 case '&':
1946 case '$':
1947 weight -= seen[un_char] * 10;
7e2040f0 1948 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1949 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1950 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1951 weight -= 100;
1952 else
1953 weight -= 10;
1954 }
1955 else if (*s == '$' && s[1] &&
93a17b20
LW
1956 strchr("[#!%*<>()-=",s[1])) {
1957 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1958 weight -= 10;
1959 else
1960 weight -= 1;
1961 }
1962 break;
1963 case '\\':
1964 un_char = 254;
1965 if (s[1]) {
93a17b20 1966 if (strchr("wds]",s[1]))
79072805
LW
1967 weight += 100;
1968 else if (seen['\''] || seen['"'])
1969 weight += 1;
93a17b20 1970 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1971 weight += 40;
1972 else if (isDIGIT(s[1])) {
1973 weight += 40;
1974 while (s[1] && isDIGIT(s[1]))
1975 s++;
1976 }
1977 }
1978 else
1979 weight += 100;
1980 break;
1981 case '-':
1982 if (s[1] == '\\')
1983 weight += 50;
93a17b20 1984 if (strchr("aA01! ",last_un_char))
79072805 1985 weight += 30;
93a17b20 1986 if (strchr("zZ79~",s[1]))
79072805 1987 weight += 30;
f27ffc4a
GS
1988 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1989 weight -= 5; /* cope with negative subscript */
79072805
LW
1990 break;
1991 default:
3792a11b
NC
1992 if (!isALNUM(last_un_char)
1993 && !(last_un_char == '$' || last_un_char == '@'
1994 || last_un_char == '&')
1995 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1996 char *d = tmpbuf;
1997 while (isALPHA(*s))
1998 *d++ = *s++;
1999 *d = '\0';
2000 if (keyword(tmpbuf, d - tmpbuf))
2001 weight -= 150;
2002 }
2003 if (un_char == last_un_char + 1)
2004 weight += 5;
2005 weight -= seen[un_char];
2006 break;
2007 }
2008 seen[un_char]++;
2009 }
2010 if (weight >= 0) /* probably a character class */
2011 return FALSE;
2012 }
2013
2014 return TRUE;
2015}
ffed7fef 2016
ffb4593c
NT
2017/*
2018 * S_intuit_method
2019 *
2020 * Does all the checking to disambiguate
2021 * foo bar
2022 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2023 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2024 *
2025 * First argument is the stuff after the first token, e.g. "bar".
2026 *
2027 * Not a method if bar is a filehandle.
2028 * Not a method if foo is a subroutine prototyped to take a filehandle.
2029 * Not a method if it's really "Foo $bar"
2030 * Method if it's "foo $bar"
2031 * Not a method if it's really "print foo $bar"
2032 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2033 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2034 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2035 * =>
2036 */
2037
76e3520e 2038STATIC int
cea2e8a9 2039S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2040{
2041 char *s = start + (*start == '$');
3280af22 2042 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2043 STRLEN len;
2044 GV* indirgv;
2045
2046 if (gv) {
b6c543e3 2047 CV *cv;
a0d0e21e
LW
2048 if (GvIO(gv))
2049 return 0;
b6c543e3 2050 if ((cv = GvCVu(gv))) {
95a20fc0 2051 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2052 if (proto) {
2053 if (*proto == ';')
2054 proto++;
2055 if (*proto == '*')
2056 return 0;
2057 }
2058 } else
a0d0e21e
LW
2059 gv = 0;
2060 }
8903cb82 2061 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2062 /* start is the beginning of the possible filehandle/object,
2063 * and s is the end of it
2064 * tmpbuf is a copy of it
2065 */
2066
a0d0e21e 2067 if (*start == '$') {
3280af22 2068 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2069 return 0;
2070 s = skipspace(s);
3280af22
NIS
2071 PL_bufptr = start;
2072 PL_expect = XREF;
a0d0e21e
LW
2073 return *s == '(' ? FUNCMETH : METHOD;
2074 }
2075 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2076 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2077 len -= 2;
2078 tmpbuf[len] = '\0';
2079 goto bare_package;
2080 }
2081 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2082 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2083 return 0;
2084 /* filehandle or package name makes it a method */
89bfa8cd 2085 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2086 s = skipspace(s);
3280af22 2087 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2088 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2089 bare_package:
3280af22 2090 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2091 newSVpvn(tmpbuf,len));
3280af22
NIS
2092 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2093 PL_expect = XTERM;
a0d0e21e 2094 force_next(WORD);
3280af22 2095 PL_bufptr = s;
a0d0e21e
LW
2096 return *s == '(' ? FUNCMETH : METHOD;
2097 }
2098 }
2099 return 0;
2100}
2101
ffb4593c
NT
2102/*
2103 * S_incl_perldb
2104 * Return a string of Perl code to load the debugger. If PERL5DB
2105 * is set, it will return the contents of that, otherwise a
2106 * compile-time require of perl5db.pl.
2107 */
2108
bfed75c6 2109STATIC const char*
cea2e8a9 2110S_incl_perldb(pTHX)
a0d0e21e 2111{
3280af22 2112 if (PL_perldb) {
bfed75c6 2113 const char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2114
2115 if (pdb)
2116 return pdb;
93189314 2117 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2118 return "BEGIN { require 'perl5db.pl' }";
2119 }
2120 return "";
2121}
2122
2123
16d20bd9 2124/* Encoded script support. filter_add() effectively inserts a
4e553d73 2125 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2126 * Note that the filter function only applies to the current source file
2127 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2128 *
2129 * The datasv parameter (which may be NULL) can be used to pass
2130 * private data to this instance of the filter. The filter function
2131 * can recover the SV using the FILTER_DATA macro and use it to
2132 * store private buffers and state information.
2133 *
2134 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2135 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2136 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2137 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2138 * private use must be set using malloc'd pointers.
2139 */
16d20bd9
AD
2140
2141SV *
864dbfa3 2142Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2143{
24801a4b
JH
2144 xpvio_filter_u u;
2145
f4c556ac
GS
2146 if (!funcp)
2147 return Nullsv;
2148
3280af22
NIS
2149 if (!PL_rsfp_filters)
2150 PL_rsfp_filters = newAV();
16d20bd9 2151 if (!datasv)
8c52afec 2152 datasv = NEWSV(255,0);
862a34c6 2153 SvUPGRADE(datasv, SVt_PVIO);
24801a4b
JH
2154 u.filter = funcp;
2155 IoANY(datasv) = u.iop; /* stash funcp into spare field */
e0c19803 2156 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2157 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
33073adb 2158 (void*)u.iop, SvPV_nolen(datasv)));
3280af22
NIS
2159 av_unshift(PL_rsfp_filters, 1);
2160 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2161 return(datasv);
2162}
4e553d73 2163
16d20bd9
AD
2164
2165/* Delete most recently added instance of this filter function. */
a0d0e21e 2166void
864dbfa3 2167Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2168{
e0c19803 2169 SV *datasv;
24801a4b
JH
2170 xpvio_filter_u u;
2171
33073adb
RGS
2172#ifdef DEBUGGING
2173 u.filter = funcp;
2174 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)u.iop));
2175#endif
3280af22 2176 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2177 return;
2178 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2179 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
24801a4b
JH
2180 u.iop = IoANY(datasv);
2181 if (u.filter == funcp) {
e0c19803 2182 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2183 IoANY(datasv) = (void *)NULL;
3280af22 2184 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2185
16d20bd9
AD
2186 return;
2187 }
2188 /* we need to search for the correct entry and clear it */
cea2e8a9 2189 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2190}
2191
2192
1de9afcd
RGS
2193/* Invoke the idxth filter function for the current rsfp. */
2194/* maxlen 0 = read one text line */
16d20bd9 2195I32
864dbfa3 2196Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2197{
16d20bd9
AD
2198 filter_t funcp;
2199 SV *datasv = NULL;
24801a4b 2200 xpvio_filter_u u;
e50aee73 2201
3280af22 2202 if (!PL_rsfp_filters)
16d20bd9 2203 return -1;
1de9afcd 2204 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2205 /* Provide a default input filter to make life easy. */
2206 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2207 DEBUG_P(PerlIO_printf(Perl_debug_log,
2208 "filter_read %d: from rsfp\n", idx));
4e553d73 2209 if (maxlen) {
16d20bd9
AD
2210 /* Want a block */
2211 int len ;
f54cb97a 2212 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2213
2214 /* ensure buf_sv is large enough */
eb160463 2215 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2216 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2217 if (PerlIO_error(PL_rsfp))
37120919
AD
2218 return -1; /* error */
2219 else
2220 return 0 ; /* end of file */
2221 }
16d20bd9
AD
2222 SvCUR_set(buf_sv, old_len + len) ;
2223 } else {
2224 /* Want a line */
3280af22
NIS
2225 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2226 if (PerlIO_error(PL_rsfp))
37120919
AD
2227 return -1; /* error */
2228 else
2229 return 0 ; /* end of file */
2230 }
16d20bd9
AD
2231 }
2232 return SvCUR(buf_sv);
2233 }
2234 /* Skip this filter slot if filter has been deleted */
1de9afcd 2235 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2236 DEBUG_P(PerlIO_printf(Perl_debug_log,
2237 "filter_read %d: skipped (filter deleted)\n",
2238 idx));
16d20bd9
AD
2239 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2240 }
2241 /* Get function pointer hidden within datasv */
24801a4b
JH
2242 u.iop = IoANY(datasv);
2243 funcp = u.filter;
f4c556ac
GS
2244 DEBUG_P(PerlIO_printf(Perl_debug_log,
2245 "filter_read %d: via function %p (%s)\n",
33073adb 2246 idx, (void*)u.iop, SvPV_nolen(datasv)));
16d20bd9
AD
2247 /* Call function. The function is expected to */
2248 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2249 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2250 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2251}
2252
76e3520e 2253STATIC char *
cea2e8a9 2254S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2255{
c39cd008 2256#ifdef PERL_CR_FILTER
3280af22 2257 if (!PL_rsfp_filters) {
c39cd008 2258 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2259 }
2260#endif
3280af22 2261 if (PL_rsfp_filters) {
55497cff
PP
2262 if (!append)
2263 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2264 if (FILTER_READ(0, sv, 0) > 0)
2265 return ( SvPVX(sv) ) ;
2266 else
2267 return Nullch ;
2268 }
9d116dd7 2269 else
fd049845 2270 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2271}
2272
01ec43d0 2273STATIC HV *
7fc63493 2274S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2275{
2276 GV *gv;
2277
01ec43d0 2278 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2279 return PL_curstash;
2280
2281 if (len > 2 &&
2282 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2283 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2284 {
2285 return GvHV(gv); /* Foo:: */
def3634b
GS
2286 }
2287
2288 /* use constant CLASS => 'MyClass' */
2289 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2290 SV *sv;
2291 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2292 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2293 }
2294 }
2295
2296 return gv_stashpv(pkgname, FALSE);
2297}
a0d0e21e 2298
748a9306 2299#ifdef DEBUGGING
27da23d5 2300 static const char* const exp_name[] =
09bef843 2301 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2302 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2303 };
748a9306 2304#endif
463ee0b2 2305
02aa26ce
NT
2306/*
2307 yylex
2308
2309 Works out what to call the token just pulled out of the input
2310 stream. The yacc parser takes care of taking the ops we return and
2311 stitching them into a tree.
2312
2313 Returns:
2314 PRIVATEREF
2315
2316 Structure:
2317 if read an identifier
2318 if we're in a my declaration
2319 croak if they tried to say my($foo::bar)
2320 build the ops for a my() declaration
2321 if it's an access to a my() variable
2322 are we in a sort block?
2323 croak if my($a); $a <=> $b
2324 build ops for access to a my() variable
2325 if in a dq string, and they've said @foo and we can't find @foo
2326 croak
2327 build ops for a bareword
2328 if we already built the token before, use it.
2329*/
2330
20141f0e 2331
dba4d153
JH
2332#ifdef __SC__
2333#pragma segment Perl_yylex
2334#endif
dba4d153 2335int
dba4d153 2336Perl_yylex(pTHX)
20141f0e 2337{
3afc138a 2338 register char *s = PL_bufptr;
378cc40b 2339 register char *d;
79072805 2340 register I32 tmp;
463ee0b2 2341 STRLEN len;
161b471a
NIS
2342 GV *gv = Nullgv;
2343 GV **gvp = 0;
aa7440fb 2344 bool bof = FALSE;
1d239bbb 2345 I32 orig_keyword = 0;
a687059c 2346
bbf60fe6
DM
2347 DEBUG_T( {
2348 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2349 lex_state_names[PL_lex_state]);
2350 } );
02aa26ce 2351 /* check if there's an identifier for us to look at */
ba979b31 2352 if (PL_pending_ident)
bbf60fe6 2353 return REPORT(S_pending_ident(aTHX));
bbce6d69 2354
02aa26ce
NT
2355 /* no identifier pending identification */
2356
3280af22 2357 switch (PL_lex_state) {
79072805
LW
2358#ifdef COMMENTARY
2359 case LEX_NORMAL: /* Some compilers will produce faster */
2360 case LEX_INTERPNORMAL: /* code if we comment these out. */
2361 break;
2362#endif
2363
09bef843 2364 /* when we've already built the next token, just pull it out of the queue */
79072805 2365 case LEX_KNOWNEXT:
3280af22
NIS
2366 PL_nexttoke--;
2367 yylval = PL_nextval[PL_nexttoke];
2368 if (!PL_nexttoke) {
2369 PL_lex_state = PL_lex_defer;
2370 PL_expect = PL_lex_expect;
2371 PL_lex_defer = LEX_NORMAL;
463ee0b2 2372 }
607df283 2373 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2374 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2375 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2376
bbf60fe6 2377 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2378
02aa26ce 2379 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2380 when we get here, PL_bufptr is at the \
02aa26ce 2381 */
79072805
LW
2382 case LEX_INTERPCASEMOD:
2383#ifdef DEBUGGING
3280af22 2384 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2385 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2386#endif
02aa26ce 2387 /* handle \E or end of string */
3280af22 2388 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2389 /* if at a \E */
3280af22 2390 if (PL_lex_casemods) {
f54cb97a 2391 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2392 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2393
3792a11b
NC
2394 if (PL_bufptr != PL_bufend
2395 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2396 PL_bufptr += 2;
2397 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2398 }
bbf60fe6 2399 return REPORT(')');
79072805 2400 }
3280af22
NIS
2401 if (PL_bufptr != PL_bufend)
2402 PL_bufptr += 2;
2403 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2404 return yylex();
79072805
LW
2405 }
2406 else {
607df283 2407 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2408 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2409 s = PL_bufptr + 1;
6e909404
JH
2410 if (s[1] == '\\' && s[2] == 'E') {
2411 PL_bufptr = s + 3;
2412 PL_lex_state = LEX_INTERPCONCAT;
2413 return yylex();
a0d0e21e 2414 }
6e909404
JH
2415 else {
2416 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2417 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2418 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2419 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2420 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2421 return REPORT(')');
6e909404
JH
2422 }
2423 if (PL_lex_casemods > 10)
2424 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2425 PL_lex_casestack[PL_lex_casemods++] = *s;
2426 PL_lex_casestack[PL_lex_casemods] = '\0';
2427 PL_lex_state = LEX_INTERPCONCAT;
2428 PL_nextval[PL_nexttoke].ival = 0;
2429 force_next('(');
2430 if (*s == 'l')
2431 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2432 else if (*s == 'u')
2433 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2434 else if (*s == 'L')
2435 PL_nextval[PL_nexttoke].ival = OP_LC;
2436 else if (*s == 'U')
2437 PL_nextval[PL_nexttoke].ival = OP_UC;
2438 else if (*s == 'Q')
2439 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2440 else
2441 Perl_croak(aTHX_ "panic: yylex");
2442 PL_bufptr = s + 1;
a0d0e21e 2443 }
79072805 2444 force_next(FUNC);
3280af22
NIS
2445 if (PL_lex_starts) {
2446 s = PL_bufptr;
2447 PL_lex_starts = 0;
131b3ad0
DM
2448 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2449 if (PL_lex_casemods == 1 && PL_lex_inpat)
2450 OPERATOR(',');
2451 else
2452 Aop(OP_CONCAT);
79072805
LW
2453 }
2454 else
cea2e8a9 2455 return yylex();
79072805
LW
2456 }
2457
55497cff 2458 case LEX_INTERPPUSH:
bbf60fe6 2459 return REPORT(sublex_push());
55497cff 2460
79072805 2461 case LEX_INTERPSTART:
3280af22 2462 if (PL_bufptr == PL_bufend)
bbf60fe6 2463 return REPORT(sublex_done());
607df283 2464 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2465 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2466 PL_expect = XTERM;
2467 PL_lex_dojoin = (*PL_bufptr == '@');
2468 PL_lex_state = LEX_INTERPNORMAL;
2469 if (PL_lex_dojoin) {
2470 PL_nextval[PL_nexttoke].ival = 0;
79072805 2471 force_next(',');
a0d0e21e 2472 force_ident("\"", '$');
3280af22 2473 PL_nextval[PL_nexttoke].ival = 0;
79072805 2474 force_next('$');
3280af22 2475 PL_nextval[PL_nexttoke].ival = 0;
79072805 2476 force_next('(');
3280af22 2477 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2478 force_next(FUNC);
2479 }
3280af22
NIS
2480 if (PL_lex_starts++) {
2481 s = PL_bufptr;
131b3ad0
DM
2482 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2483 if (!PL_lex_casemods && PL_lex_inpat)
2484 OPERATOR(',');
2485 else
2486 Aop(OP_CONCAT);
79072805 2487 }
cea2e8a9 2488 return yylex();
79072805
LW
2489
2490 case LEX_INTERPENDMAYBE:
3280af22
NIS
2491 if (intuit_more(PL_bufptr)) {
2492 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2493 break;
2494 }
2495 /* FALL THROUGH */
2496
2497 case LEX_INTERPEND:
3280af22
NIS
2498 if (PL_lex_dojoin) {
2499 PL_lex_dojoin = FALSE;
2500 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2501 return REPORT(')');
79072805 2502 }
43a16006 2503 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2504 && SvEVALED(PL_lex_repl))
43a16006 2505 {
e9fa98b2 2506 if (PL_bufptr != PL_bufend)
cea2e8a9 2507 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2508 PL_lex_repl = Nullsv;
2509 }
79072805
LW
2510 /* FALLTHROUGH */
2511 case LEX_INTERPCONCAT:
2512#ifdef DEBUGGING
3280af22 2513 if (PL_lex_brackets)
cea2e8a9 2514 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2515#endif
3280af22 2516 if (PL_bufptr == PL_bufend)
bbf60fe6 2517 return REPORT(sublex_done());
79072805 2518
3280af22
NIS
2519 if (SvIVX(PL_linestr) == '\'') {
2520 SV *sv = newSVsv(PL_linestr);
2521 if (!PL_lex_inpat)
76e3520e 2522 sv = tokeq(sv);
3280af22 2523 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2524 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2525 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2526 s = PL_bufend;
79072805
LW
2527 }
2528 else {
3280af22 2529 s = scan_const(PL_bufptr);
79072805 2530 if (*s == '\\')
3280af22 2531 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2532 else
3280af22 2533 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2534 }
2535
3280af22
NIS
2536 if (s != PL_bufptr) {
2537 PL_nextval[PL_nexttoke] = yylval;
2538 PL_expect = XTERM;
79072805 2539 force_next(THING);
131b3ad0
DM
2540 if (PL_lex_starts++) {
2541 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2542 if (!PL_lex_casemods && PL_lex_inpat)
2543 OPERATOR(',');
2544 else
2545 Aop(OP_CONCAT);
2546 }
79072805 2547 else {
3280af22 2548 PL_bufptr = s;
cea2e8a9 2549 return yylex();
79072805
LW
2550 }
2551 }
2552
cea2e8a9 2553 return yylex();
a0d0e21e 2554 case LEX_FORMLINE:
3280af22
NIS
2555 PL_lex_state = LEX_NORMAL;
2556 s = scan_formline(PL_bufptr);
2557 if (!PL_lex_formbrack)
a0d0e21e
LW
2558 goto rightbracket;
2559 OPERATOR(';');
79072805
LW
2560 }
2561
3280af22
NIS
2562 s = PL_bufptr;
2563 PL_oldoldbufptr = PL_oldbufptr;
2564 PL_oldbufptr = s;
607df283 2565 DEBUG_T( {
bbf60fe6 2566 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2567 exp_name[PL_expect], s);
5f80b19c 2568 } );
463ee0b2
LW
2569
2570 retry:
378cc40b
LW
2571 switch (*s) {
2572 default:
7e2040f0 2573 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2574 goto keylookup;
cea2e8a9 2575 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2576 case 4:
2577 case 26:
2578 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2579 case 0:
3280af22
NIS
2580 if (!PL_rsfp) {
2581 PL_last_uni = 0;
2582 PL_last_lop = 0;
c5ee2135
LW
2583 if (PL_lex_brackets) {
2584 if (PL_lex_formbrack)
2585 yyerror("Format not terminated");
2586 else
2587 yyerror("Missing right curly or square bracket");
2588 }
4e553d73 2589 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2590 "### Tokener got EOF\n");
5f80b19c 2591 } );
79072805 2592 TOKEN(0);
463ee0b2 2593 }
3280af22 2594 if (s++ < PL_bufend)
a687059c 2595 goto retry; /* ignore stray nulls */
3280af22
NIS
2596 PL_last_uni = 0;
2597 PL_last_lop = 0;
2598 if (!PL_in_eval && !PL_preambled) {
2599 PL_preambled = TRUE;
2600 sv_setpv(PL_linestr,incl_perldb());
2601 if (SvCUR(PL_linestr))
4147a61b 2602 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2603 if (PL_preambleav){
2604 while(AvFILLp(PL_preambleav) >= 0) {
2605 SV *tmpsv = av_shift(PL_preambleav);
2606 sv_catsv(PL_linestr, tmpsv);
4147a61b 2607 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2608 sv_free(tmpsv);
2609 }
3280af22
NIS
2610 sv_free((SV*)PL_preambleav);
2611 PL_preambleav = NULL;
91b7def8 2612 }
3280af22
NIS
2613 if (PL_minus_n || PL_minus_p) {
2614 sv_catpv(PL_linestr, "LINE: while (<>) {");
2615 if (PL_minus_l)
2616 sv_catpv(PL_linestr,"chomp;");
2617 if (PL_minus_a) {
3280af22 2618 if (PL_minus_F) {
3792a11b
NC
2619 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2620 || *PL_splitstr == '"')
3280af22 2621 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2622 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2623 else {
c8ef6a4b
NC
2624 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2625 bytes can be used as quoting characters. :-) */
48c4c863
NC
2626 /* The count here deliberately includes the NUL
2627 that terminates the C string constant. This
2628 embeds the opening NUL into the string. */
dd374669 2629 const char *splits = PL_splitstr;
9a3fb652 2630 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2631 do {
2632 /* Need to \ \s */
dd374669
AL
2633 if (*splits == '\\')
2634 sv_catpvn(PL_linestr, splits, 1);
2635 sv_catpvn(PL_linestr, splits, 1);
2636 } while (*splits++);
48c4c863
NC
2637 /* This loop will embed the trailing NUL of
2638 PL_linestr as the last thing it does before
2639 terminating. */
4fccd7c6 2640 sv_catpvn(PL_linestr, ");", 2);
54310121 2641 }
2304df62
AD
2642 }
2643 else
75c72d73 2644 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2645 }
79072805 2646 }
4147a61b 2647 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2648 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2649 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2650 PL_last_lop = PL_last_uni = Nullch;
3280af22 2651 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2652 SV *sv = NEWSV(85,0);
2653
2654 sv_upgrade(sv, SVt_PVMG);
3280af22 2655 sv_setsv(sv,PL_linestr);
0ac0412a 2656 (void)SvIOK_on(sv);
45977657 2657 SvIV_set(sv, 0);
57843af0 2658 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2659 }
79072805 2660 goto retry;
a687059c 2661 }
e929a76b 2662 do {
aa7440fb 2663 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2664 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2665 fake_eof:
2666 if (PL_rsfp) {
2667 if (PL_preprocess && !PL_in_eval)
2668 (void)PerlProc_pclose(PL_rsfp);
2669 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2670 PerlIO_clearerr(PL_rsfp);
2671 else
2672 (void)PerlIO_close(PL_rsfp);
2673 PL_rsfp = Nullfp;
2674 PL_doextract = FALSE;
2675 }
2676 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2677 sv_setpv(PL_linestr,PL_minus_p
2678 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2679 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2680 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2681 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2682 PL_minus_n = PL_minus_p = 0;
2683 goto retry;
2684 }
2685 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2686 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2687 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2688 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2689 }
7aa207d6
JH
2690 /* If it looks like the start of a BOM or raw UTF-16,
2691 * check if it in fact is. */
2692 else if (bof &&
2693 (*s == 0 ||
2694 *(U8*)s == 0xEF ||
2695 *(U8*)s >= 0xFE ||
2696 s[1] == 0)) {
226017aa 2697#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2698# ifdef __GNU_LIBRARY__
2699# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2700# define FTELL_FOR_PIPE_IS_BROKEN
2701# endif
e3f494f1
JH
2702# else
2703# ifdef __GLIBC__
2704# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2705# define FTELL_FOR_PIPE_IS_BROKEN
2706# endif
2707# endif
226017aa
DD
2708# endif
2709#endif
2710#ifdef FTELL_FOR_PIPE_IS_BROKEN
2711 /* This loses the possibility to detect the bof
2712 * situation on perl -P when the libc5 is being used.
2713 * Workaround? Maybe attach some extra state to PL_rsfp?
2714 */
2715 if (!PL_preprocess)
7e28d3af 2716 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2717#else
eb160463 2718 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2719#endif
7e28d3af 2720 if (bof) {
3280af22 2721 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2722 s = swallow_bom((U8*)s);
e929a76b 2723 }
378cc40b 2724 }
3280af22 2725 if (PL_doextract) {
a0d0e21e
LW
2726 /* Incest with pod. */
2727 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2728 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2729 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2731 PL_last_lop = PL_last_uni = Nullch;
3280af22 2732 PL_doextract = FALSE;
a0d0e21e 2733 }
4e553d73 2734 }
463ee0b2 2735 incline(s);
3280af22
NIS
2736 } while (PL_doextract);
2737 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2738 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2739 SV *sv = NEWSV(85,0);
a687059c 2740
93a17b20 2741 sv_upgrade(sv, SVt_PVMG);
3280af22 2742 sv_setsv(sv,PL_linestr);
0ac0412a 2743 (void)SvIOK_on(sv);
45977657 2744 SvIV_set(sv, 0);
57843af0 2745 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2746 }
3280af22 2747 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2748 PL_last_lop = PL_last_uni = Nullch;
57843af0 2749 if (CopLINE(PL_curcop) == 1) {
3280af22 2750 while (s < PL_bufend && isSPACE(*s))
79072805 2751 s++;
a0d0e21e 2752 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2753 s++;
44a8e56a 2754 d = Nullch;
3280af22 2755 if (!PL_in_eval) {
44a8e56a
PP
2756 if (*s == '#' && *(s+1) == '!')
2757 d = s + 2;
2758#ifdef ALTERNATE_SHEBANG
2759 else {
bfed75c6 2760 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2761 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2762 d = s + (sizeof(as) - 1);
2763 }
2764#endif /* ALTERNATE_SHEBANG */
2765 }
2766 if (d) {
b8378b72 2767 char *ipath;
774d564b 2768 char *ipathend;
b8378b72 2769
774d564b 2770 while (isSPACE(*d))
b8378b72
CS
2771 d++;
2772 ipath = d;
774d564b
PP
2773 while (*d && !isSPACE(*d))
2774 d++;
2775 ipathend = d;
2776
2777#ifdef ARG_ZERO_IS_SCRIPT
2778 if (ipathend > ipath) {
2779 /*
2780 * HP-UX (at least) sets argv[0] to the script name,
2781 * which makes $^X incorrect. And Digital UNIX and Linux,
2782 * at least, set argv[0] to the basename of the Perl
2783 * interpreter. So, having found "#!", we'll set it right.
2784 */
ee2f7564 2785 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2786 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2787 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2788 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2789 SvSETMAGIC(x);
2790 }
556c1dec
JH
2791 else {
2792 STRLEN blen;
2793 STRLEN llen;
f54cb97a
AL
2794 const char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2795 const char *lstart = SvPV(x,llen);
556c1dec
JH
2796 if (llen < blen) {
2797 bstart += blen - llen;
2798 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2799 sv_setpvn(x, ipath, ipathend - ipath);
2800 SvSETMAGIC(x);
2801 }
2802 }
2803 }
774d564b 2804 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2805 }
774d564b 2806#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2807
2808 /*
2809 * Look for options.
2810 */
748a9306 2811 d = instr(s,"perl -");
84e30d1a 2812 if (!d) {
748a9306 2813 d = instr(s,"perl");
84e30d1a
GS
2814#if defined(DOSISH)
2815 /* avoid getting into infinite loops when shebang
2816 * line contains "Perl" rather than "perl" */
2817 if (!d) {
2818 for (d = ipathend-4; d >= ipath; --d) {
2819 if ((*d == 'p' || *d == 'P')
2820 && !ibcmp(d, "perl", 4))
2821 {
2822 break;
2823 }
2824 }
2825 if (d < ipath)
2826 d = Nullch;
2827 }
2828#endif
2829 }
44a8e56a
PP
2830#ifdef ALTERNATE_SHEBANG
2831 /*
2832 * If the ALTERNATE_SHEBANG on this system starts with a
2833 * character that can be part of a Perl expression, then if
2834 * we see it but not "perl", we're probably looking at the
2835 * start of Perl code, not a request to hand off to some
2836 * other interpreter. Similarly, if "perl" is there, but
2837 * not in the first 'word' of the line, we assume the line
2838 * contains the start of the Perl program.
44a8e56a
PP
2839 */
2840 if (d && *s != '#') {
f54cb97a 2841 const char *c = ipath;
44a8e56a
PP
2842 while (*c && !strchr("; \t\r\n\f\v#", *c))
2843 c++;
2844 if (c < d)
2845 d = Nullch; /* "perl" not in first word; ignore */
2846 else
2847 *s = '#'; /* Don't try to parse shebang line */
2848 }
774d564b 2849#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2850#ifndef MACOS_TRADITIONAL
748a9306 2851 if (!d &&
44a8e56a 2852 *s == '#' &&
774d564b 2853 ipathend > ipath &&
3280af22 2854 !PL_minus_c &&
748a9306 2855 !instr(s,"indir") &&
3280af22 2856 instr(PL_origargv[0],"perl"))
748a9306 2857 {
27da23d5 2858 dVAR;
9f68db38 2859 char **newargv;
9f68db38 2860
774d564b
PP
2861 *ipathend = '\0';
2862 s = ipathend + 1;
3280af22 2863 while (s < PL_bufend && isSPACE(*s))
9f68db38 2864 s++;
3280af22
NIS
2865 if (s < PL_bufend) {
2866 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2867 newargv[1] = s;
3280af22 2868 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2869 s++;
2870 *s = '\0';
3280af22 2871 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2872 }
2873 else
3280af22 2874 newargv = PL_origargv;
774d564b 2875 newargv[0] = ipath;
b35112e7 2876 PERL_FPU_PRE_EXEC
b4748376 2877 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2878 PERL_FPU_POST_EXEC
cea2e8a9 2879 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2880 }
bf4acbe4 2881#endif
748a9306 2882 if (d) {
f54cb97a
AL
2883 const U32 oldpdb = PL_perldb;
2884 const bool oldn = PL_minus_n;
2885 const bool oldp = PL_minus_p;
748a9306
LW
2886
2887 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2888 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2889
2890 if (*d++ == '-') {
f54cb97a 2891 const bool switches_done = PL_doswitches;
8cc95fdb 2892 do {
3ffe3ee4 2893 if (*d == 'M' || *d == 'm' || *d == 'C') {
f54cb97a 2894 const char *m = d;
8cc95fdb 2895 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2896 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2897 (int)(d - m), m);
2898 }
2899 d = moreswitches(d);
2900 } while (d);
f0b2cf55
YST
2901 if (PL_doswitches && !switches_done) {
2902 int argc = PL_origargc;
2903 char **argv = PL_origargv;
2904 do {
2905 argc--,argv++;
2906 } while (argc && argv[0][0] == '-' && argv[0][1]);
2907 init_argv_symbols(argc,argv);
2908 }
155aba94
GS
2909 if ((PERLDB_LINE && !oldpdb) ||
2910 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2911 /* if we have already added "LINE: while (<>) {",
2912 we must not do it again */
748a9306 2913 {
c69006e4 2914 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2915 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2916 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2917 PL_last_lop = PL_last_uni = Nullch;
3280af22 2918 PL_preambled = FALSE;
84902520 2919 if (PERLDB_LINE)
3280af22 2920 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2921 goto retry;
2922 }
a11ec5a9
RGS
2923 if (PL_doswitches && !switches_done) {
2924 int argc = PL_origargc;
2925 char **argv = PL_origargv;
2926 do {
2927 argc--,argv++;
2928 } while (argc && argv[0][0] == '-' && argv[0][1]);
2929 init_argv_symbols(argc,argv);
2930 }
a0d0e21e 2931 }
79072805 2932 }
9f68db38 2933 }
79072805 2934 }
3280af22
NIS
2935 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2936 PL_bufptr = s;
2937 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2938 return yylex();
ae986130 2939 }
378cc40b 2940 goto retry;
4fdae800 2941 case '\r':
6a27c188 2942#ifdef PERL_STRICT_CR
cea2e8a9 2943 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2944 Perl_croak(aTHX_
cc507455 2945 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2946#endif
4fdae800 2947 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2948#ifdef MACOS_TRADITIONAL
2949 case '\312':
2950#endif
378cc40b
LW
2951 s++;
2952 goto retry;
378cc40b 2953 case '#':
e929a76b 2954 case '\n':
3280af22 2955 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2956 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2957 /* handle eval qq[#line 1 "foo"\n ...] */
2958 CopLINE_dec(PL_curcop);
2959 incline(s);
2960 }
3280af22 2961 d = PL_bufend;
a687059c 2962 while (s < d && *s != '\n')
378cc40b 2963 s++;
0f85fab0 2964 if (s < d)
378cc40b 2965 s++;
78c267c1 2966 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2967 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2968 incline(s);
3280af22
NIS
2969 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2970 PL_bufptr = s;
2971 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2972 return yylex();
a687059c 2973 }
378cc40b 2974 }
a687059c 2975 else {
378cc40b 2976 *s = '\0';
3280af22 2977 PL_bufend = s;
a687059c 2978 }
378cc40b
LW
2979 goto retry;
2980 case '-':
79072805 2981 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2982 I32 ftst = 0;
2983
378cc40b 2984 s++;
3280af22 2985 PL_bufptr = s;
748a9306
LW
2986 tmp = *s++;
2987
bf4acbe4 2988 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2989 s++;
2990
2991 if (strnEQ(s,"=>",2)) {
3280af22 2992 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2993 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2994 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2995 } );
748a9306
LW
2996 OPERATOR('-'); /* unary minus */
2997 }
3280af22 2998 PL_last_uni = PL_oldbufptr;
748a9306 2999 switch (tmp) {
e5edeb50
JH
3000 case 'r': ftst = OP_FTEREAD; break;
3001 case 'w': ftst = OP_FTEWRITE; break;
3002 case 'x': ftst = OP_FTEEXEC; break;
3003 case 'o': ftst = OP_FTEOWNED; break;
3004 case 'R': ftst = OP_FTRREAD; break;
3005 case 'W': ftst = OP_FTRWRITE; break;
3006 case 'X': ftst = OP_FTREXEC; break;
3007 case 'O': ftst = OP_FTROWNED; break;
3008 case 'e': ftst = OP_FTIS; break;
3009 case 'z': ftst = OP_FTZERO; break;
3010 case 's': ftst = OP_FTSIZE; break;
3011 case 'f': ftst = OP_FTFILE; break;
3012 case 'd': ftst = OP_FTDIR; break;
3013 case 'l': ftst = OP_FTLINK; break;
3014 case 'p': ftst = OP_FTPIPE; break;
3015 case 'S': ftst = OP_FTSOCK; break;
3016 case 'u': ftst = OP_FTSUID; break;
3017 case 'g': ftst = OP_FTSGID; break;
3018 case 'k': ftst = OP_FTSVTX; break;
3019 case 'b': ftst = OP_FTBLK; break;
3020 case 'c': ftst = OP_FTCHR; break;
3021 case 't': ftst = OP_FTTTY; break;
3022 case 'T': ftst = OP_FTTEXT; break;
3023 case 'B': ftst = OP_FTBINARY; break;
3024 case 'M': case 'A': case 'C':
3025 gv_fetchpv("\024",TRUE, SVt_PV);
3026 switch (tmp) {
3027 case 'M': ftst = OP_FTMTIME; break;
3028 case 'A': ftst = OP_FTATIME; break;
3029 case 'C': ftst = OP_FTCTIME; break;
3030 default: break;
3031 }
3032 break;
378cc40b 3033 default:
378cc40b
LW
3034 break;
3035 }
e5edeb50 3036 if (ftst) {
eb160463 3037 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3038 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3039 "### Saw file test %c\n", (int)ftst);
5f80b19c 3040 } );
e5edeb50
JH
3041 FTST(ftst);
3042 }
3043 else {
3044 /* Assume it was a minus followed by a one-letter named
3045 * subroutine call (or a -bareword), then. */
95c31fe3 3046 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3047 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3048 (int) tmp);
5f80b19c 3049 } );
3cf7b4c4 3050 s = --PL_bufptr;
e5edeb50 3051 }
378cc40b 3052 }
a687059c
LW
3053 tmp = *s++;
3054 if (*s == tmp) {
3055 s++;
3280af22 3056 if (PL_expect == XOPERATOR)
79072805
LW
3057 TERM(POSTDEC);
3058 else
3059 OPERATOR(PREDEC);
3060 }
3061 else if (*s == '>') {
3062 s++;
3063 s = skipspace(s);
7e2040f0 3064 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3065 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3066 TOKEN(ARROW);
79072805 3067 }
748a9306
LW
3068 else if (*s == '$')
3069 OPERATOR(ARROW);
463ee0b2 3070 else
748a9306 3071 TERM(ARROW);
a687059c 3072 }
3280af22 3073 if (PL_expect == XOPERATOR)
79072805
LW
3074 Aop(OP_SUBTRACT);
3075 else {
3280af22 3076 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3077 check_uni();
79072805 3078 OPERATOR('-'); /* unary minus */
2f3197b3 3079 }
79072805 3080
378cc40b 3081 case '+':
a687059c
LW
3082 tmp = *s++;
3083 if (*s == tmp) {
378cc40b 3084 s++;
3280af22 3085 if (PL_expect == XOPERATOR)
79072805
LW
3086 TERM(POSTINC);
3087 else
3088 OPERATOR(PREINC);
378cc40b 3089 }
3280af22 3090 if (PL_expect == XOPERATOR)
79072805
LW
3091 Aop(OP_ADD);
3092 else {
3280af22 3093 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3094 check_uni();
a687059c 3095 OPERATOR('+');
2f3197b3 3096 }
a687059c 3097
378cc40b 3098 case '*':
3280af22
NIS
3099 if (PL_expect != XOPERATOR) {
3100 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3101 PL_expect = XOPERATOR;
3102 force_ident(PL_tokenbuf, '*');
3103 if (!*PL_tokenbuf)
a0d0e21e 3104 PREREF('*');
79072805 3105 TERM('*');
a687059c 3106 }
79072805
LW
3107 s++;
3108 if (*s == '*') {
a687059c 3109 s++;
79072805 3110 PWop(OP_POW);
a687059c 3111 }
79072805
LW
3112 Mop(OP_MULTIPLY);
3113
378cc40b 3114 case '%':
3280af22 3115 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3116 ++s;
3117 Mop(OP_MODULO);
a687059c 3118 }
3280af22
NIS
3119 PL_tokenbuf[0] = '%';
3120 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3121 if (!PL_tokenbuf[1]) {
bbce6d69 3122 PREREF('%');
a687059c 3123 }
3280af22 3124 PL_pending_ident = '%';
bbce6d69 3125 TERM('%');
a687059c 3126
378cc40b 3127 case '^':
79072805 3128 s++;
a0d0e21e 3129 BOop(OP_BIT_XOR);
79072805 3130 case '[':
3280af22 3131 PL_lex_brackets++;
79072805 3132 /* FALL THROUGH */
378cc40b 3133 case '~':
378cc40b 3134 case ',':
378cc40b
LW
3135 tmp = *s++;
3136 OPERATOR(tmp);
a0d0e21e
LW
3137 case ':':
3138 if (s[1] == ':') {
3139 len = 0;
3140 goto just_a_word;
3141 }
3142 s++;
09bef843
SB
3143 switch (PL_expect) {
3144 OP *attrs;
3145 case XOPERATOR:
3146 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3147 break;
3148 PL_bufptr = s; /* update in case we back off */
3149 goto grabattrs;
3150 case XATTRBLOCK:
3151 PL_expect = XBLOCK;
3152 goto grabattrs;
3153 case XATTRTERM:
3154 PL_expect = XTERMBLOCK;
3155 grabattrs:
3156 s = skipspace(s);
3157 attrs = Nullop;
7e2040f0 3158 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3159 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3160 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3161 if (tmp < 0) tmp = -tmp;
3162 switch (tmp) {
3163 case KEY_or:
3164 case KEY_and:
c963b151 3165 case KEY_err:
f9829d6b
GS
3166 case KEY_for:
3167 case KEY_unless:
3168 case KEY_if:
3169 case KEY_while:
3170 case KEY_until:
3171 goto got_attrs;
3172 default:
3173 break;
3174 }
3175 }
09bef843
SB
3176 if (*d == '(') {
3177 d = scan_str(d,TRUE,TRUE);
3178 if (!d) {
09bef843
SB
3179 /* MUST advance bufptr here to avoid bogus
3180 "at end of line" context messages from yyerror().
3181 */
3182 PL_bufptr = s + len;
3183 yyerror("Unterminated attribute parameter in attribute list");
3184 if (attrs)
3185 op_free(attrs);
bbf60fe6 3186 return REPORT(0); /* EOF indicator */
09bef843
SB
3187 }
3188 }
3189 if (PL_lex_stuff) {
3190 SV *sv = newSVpvn(s, len);
3191 sv_catsv(sv, PL_lex_stuff);
3192 attrs = append_elem(OP_LIST, attrs,
3193 newSVOP(OP_CONST, 0, sv));
3194 SvREFCNT_dec(PL_lex_stuff);
3195 PL_lex_stuff = Nullsv;
3196 }
3197 else {
371fce9b
DM
3198 if (len == 6 && strnEQ(s, "unique", len)) {
3199 if (PL_in_my == KEY_our)
3200#ifdef USE_ITHREADS
3201 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3202#else
3203 ; /* skip to avoid loading attributes.pm */
3204#endif
bfed75c6 3205 else
371fce9b
DM
3206 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3207 }
3208
d3cea301
SB
3209 /* NOTE: any CV attrs applied here need to be part of
3210 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3211 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3212 CvLVALUE_on(PL_compcv);
3213 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3214 CvLOCKED_on(PL_compcv);
3215 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3216 CvMETHOD_on(PL_compcv);
06492da6
SF
3217 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3218 CvASSERTION_on(PL_compcv);
78f9721b
SM
3219 /* After we've set the flags, it could be argued that
3220 we don't need to do the attributes.pm-based setting
3221 process, and shouldn't bother appending recognized
d3cea301
SB
3222 flags. To experiment with that, uncomment the
3223 following "else". (Note that's already been
3224 uncommented. That keeps the above-applied built-in
3225 attributes from being intercepted (and possibly
3226 rejected) by a package's attribute routines, but is
3227 justified by the performance win for the common case
3228 of applying only built-in attributes.) */
0256094b 3229 else
78f9721b
SM
3230 attrs = append_elem(OP_LIST, attrs,
3231 newSVOP(OP_CONST, 0,
3232 newSVpvn(s, len)));
09bef843
SB
3233 }
3234 s = skipspace(d);
0120eecf 3235 if (*s == ':' && s[1] != ':')
09bef843 3236 s = skipspace(s+1);
0120eecf
GS
3237 else if (s == d)
3238 break; /* require real whitespace or :'s */
09bef843 3239 }
f9829d6b 3240 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3241 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3242 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3243 /* If here for an expression, and parsed no attrs, back off. */
3244 if (tmp == '=' && !attrs) {
3245 s = PL_bufptr;
3246 break;
3247 }
3248 /* MUST advance bufptr here to avoid bogus "at end of line"
3249 context messages from yyerror().
3250 */
3251 PL_bufptr = s;
3252 if (!*s)
3253 yyerror("Unterminated attribute list");
3254 else
3255 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3256 q, *s, q));
3257 if (attrs)
3258 op_free(attrs);
3259 OPERATOR(':');
3260 }
f9829d6b 3261 got_attrs:
09bef843
SB
3262 if (attrs) {
3263 PL_nextval[PL_nexttoke].opval = attrs;
3264 force_next(THING);
3265 }
3266 TOKEN(COLONATTR);
3267 }
a0d0e21e 3268 OPERATOR(':');
8990e307
LW
3269 case '(':
3270 s++;
3280af22
NIS
3271 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3272 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3273 else
3280af22 3274 PL_expect = XTERM;
4a202259 3275 s = skipspace(s);
a0d0e21e 3276 TOKEN('(');
378cc40b 3277 case ';':
f4dd75d9 3278 CLINE;
378cc40b
LW
3279 tmp = *s++;
3280 OPERATOR(tmp);
3281 case ')':
378cc40b 3282 tmp = *s++;
16d20bd9
AD
3283 s = skipspace(s);
3284 if (*s == '{')
3285 PREBLOCK(tmp);
378cc40b 3286 TERM(tmp);
79072805
LW
3287 case ']':
3288 s++;
3280af22 3289 if (PL_lex_brackets <= 0)
d98d5fff 3290 yyerror("Unmatched right square bracket");
463ee0b2 3291 else
3280af22
NIS
3292 --PL_lex_brackets;
3293 if (PL_lex_state == LEX_INTERPNORMAL) {
3294 if (PL_lex_brackets == 0) {
a0d0e21e 3295 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3296 PL_lex_state = LEX_INTERPEND;
79072805
LW
3297 }
3298 }
4633a7c4 3299 TERM(']');
79072805
LW
3300 case '{':
3301 leftbracket:
79072805 3302 s++;
3280af22 3303 if (PL_lex_brackets > 100) {
8edd5f42 3304 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3305 }
3280af22 3306 switch (PL_expect) {
a0d0e21e 3307 case XTERM:
3280af22 3308 if (PL_lex_formbrack) {
a0d0e21e
LW
3309 s--;
3310 PRETERMBLOCK(DO);
3311 }
3280af22
NIS
3312 if (PL_oldoldbufptr == PL_last_lop)
3313 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3314 else
3280af22 3315 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3316 OPERATOR(HASHBRACK);
a0d0e21e 3317 case XOPERATOR:
bf4acbe4 3318 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3319 s++;
44a8e56a 3320 d = s;
3280af22
NIS
3321 PL_tokenbuf[0] = '\0';
3322 if (d < PL_bufend && *d == '-') {
3323 PL_tokenbuf[0] = '-';
44a8e56a 3324 d++;
bf4acbe4 3325 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3326 d++;
3327 }
7e2040f0 3328 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3329 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3330 FALSE, &len);
bf4acbe4 3331 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3332 d++;
3333 if (*d == '}') {
f54cb97a 3334 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3335 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3336 if (minus)
3337 force_next('-');
748a9306
LW
3338 }
3339 }
3340 /* FALL THROUGH */
09bef843 3341 case XATTRBLOCK:
748a9306 3342 case XBLOCK:
3280af22
NIS
3343 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3344 PL_expect = XSTATE;
a0d0e21e 3345 break;
09bef843 3346 case XATTRTERM:
a0d0e21e 3347 case XTERMBLOCK:
3280af22
NIS
3348 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3349 PL_expect = XSTATE;
a0d0e21e
LW
3350 break;
3351 default: {
f54cb97a 3352 const char *t;
3280af22
NIS
3353 if (PL_oldoldbufptr == PL_last_lop)
3354 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3355 else
3280af22 3356 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3357 s = skipspace(s);
8452ff4b
SB
3358 if (*s == '}') {
3359 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3360 PL_expect = XTERM;
3361 /* This hack is to get the ${} in the message. */
3362 PL_bufptr = s+1;
3363 yyerror("syntax error");
3364 break;
3365 }
a0d0e21e 3366 OPERATOR(HASHBRACK);
8452ff4b 3367 }
b8a4b1be
GS
3368 /* This hack serves to disambiguate a pair of curlies
3369 * as being a block or an anon hash. Normally, expectation
3370 * determines that, but in cases where we're not in a
3371 * position to expect anything in particular (like inside
3372 * eval"") we have to resolve the ambiguity. This code
3373 * covers the case where the first term in the curlies is a
3374 * quoted string. Most other cases need to be explicitly
a0288114 3375 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3376 * curly in order to force resolution as an anon hash.
3377 *
3378 * XXX should probably propagate the outer expectation
3379 * into eval"" to rely less on this hack, but that could
3380 * potentially break current behavior of eval"".
3381 * GSAR 97-07-21
3382 */
3383 t = s;
3384 if (*s == '\'' || *s == '"' || *s == '`') {
3385 /* common case: get past first string, handling escapes */
3280af22 3386 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3387 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3388 t++;
3389 t++;
a0d0e21e 3390 }
b8a4b1be 3391 else if (*s == 'q') {
3280af22 3392 if (++t < PL_bufend
b8a4b1be 3393 && (!isALNUM(*t)
3280af22 3394 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3395 && !isALNUM(*t))))
3396 {
abc667d1 3397 /* skip q//-like construct */
f54cb97a 3398 const char *tmps;
b8a4b1be
GS
3399 char open, close, term;
3400 I32 brackets = 1;
3401
3280af22 3402 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3403 t++;
abc667d1
DM
3404 /* check for q => */
3405 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3406 OPERATOR(HASHBRACK);
3407 }
b8a4b1be
GS
3408 term = *t;
3409 open = term;
3410 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3411 term = tmps[5];
3412 close = term;
3413 if (open == close)
3280af22
NIS
3414 for (t++; t < PL_bufend; t++) {
3415 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3416 t++;
6d07e5e9 3417 else if (*t == open)
b8a4b1be
GS
3418 break;
3419 }
abc667d1 3420 else {
3280af22
NIS
3421 for (t++; t < PL_bufend; t++) {
3422 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3423 t++;
6d07e5e9 3424 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3425 break;
3426 else if (*t == open)
3427 brackets++;
3428 }
abc667d1
DM
3429 }
3430 t++;
b8a4b1be 3431 }
abc667d1
DM
3432 else
3433 /* skip plain q word */
3434 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3435 t += UTF8SKIP(t);
a0d0e21e 3436 }
7e2040f0 3437 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3438 t += UTF8SKIP(t);
7e2040f0 3439 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3440 t += UTF8SKIP(t);
a0d0e21e 3441 }
3280af22 3442 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3443 t++;
b8a4b1be
GS
3444 /* if comma follows first term, call it an anon hash */
3445 /* XXX it could be a comma expression with loop modifiers */
3280af22 3446 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3447 || (*t == '=' && t[1] == '>')))
a0d0e21e 3448 OPERATOR(HASHBRACK);
3280af22 3449 if (PL_expect == XREF)
4e4e412b 3450 PL_expect = XTERM;
a0d0e21e 3451 else {
3280af22
NIS
3452 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3453 PL_expect = XSTATE;
a0d0e21e 3454 }
8990e307 3455 }
a0d0e21e 3456 break;
463ee0b2 3457 }
57843af0 3458 yylval.ival = CopLINE(PL_curcop);
79072805 3459 if (isSPACE(*s) || *s == '#')
3280af22 3460 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3461 TOKEN('{');
378cc40b 3462 case '}':
79072805
LW
3463 rightbracket:
3464 s++;
3280af22 3465 if (PL_lex_brackets <= 0)
d98d5fff 3466 yyerror("Unmatched right curly bracket");
463ee0b2 3467 else
3280af22 3468 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3469 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3470 PL_lex_formbrack = 0;
3471 if (PL_lex_state == LEX_INTERPNORMAL) {
3472 if (PL_lex_brackets == 0) {
9059aa12
LW
3473 if (PL_expect & XFAKEBRACK) {
3474 PL_expect &= XENUMMASK;
3280af22
NIS
3475 PL_lex_state = LEX_INTERPEND;
3476 PL_bufptr = s;
cea2e8a9 3477 return yylex(); /* ignore fake brackets */
79072805 3478 }
fa83b5b6 3479 if (*s == '-' && s[1] == '>')
3280af22 3480 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3481 else if (*s != '[' && *s != '{')
3280af22 3482 PL_lex_state = LEX_INTERPEND;
79072805
LW
3483 }
3484 }
9059aa12
LW
3485 if (PL_expect & XFAKEBRACK) {
3486 PL_expect &= XENUMMASK;
3280af22 3487 PL_bufptr = s;
cea2e8a9 3488 return yylex(); /* ignore fake brackets */
748a9306 3489 }
79072805
LW
3490 force_next('}');
3491 TOKEN(';');
378cc40b
LW
3492 case '&':
3493 s++;
3494 tmp = *s++;
3495 if (tmp == '&')
a0d0e21e 3496 AOPERATOR(ANDAND);
378cc40b 3497 s--;
3280af22 3498 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3499 if (ckWARN(WARN_SEMICOLON)
3500 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3501 {
57843af0 3502 CopLINE_dec(PL_curcop);
9014280d 3503 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3504 CopLINE_inc(PL_curcop);
463ee0b2 3505 }
79072805 3506 BAop(OP_BIT_AND);
463ee0b2 3507 }
79072805 3508
3280af22
NIS
3509 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3510 if (*PL_tokenbuf) {
3511 PL_expect = XOPERATOR;
3512 force_ident(PL_tokenbuf, '&');
463ee0b2 3513 }
79072805
LW
3514 else
3515 PREREF('&');
c07a80fd 3516 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3517 TERM('&');
3518
378cc40b
LW
3519 case '|':
3520 s++;
3521 tmp = *s++;
3522 if (tmp == '|')
a0d0e21e 3523 AOPERATOR(OROR);
378cc40b 3524 s--;