This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make building perl on Win32 with the free VC++ toolkit easier
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
27da23d5
JH
29static const char ident_too_long[] =
30 "Identifier too long";
31static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
8903cb82 35
acfe0abc 36static void restore_rsfp(pTHX_ void *f);
6e3aabd6 37#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
38static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 40#endif
51371543 41
9059aa12
LW
42#define XFAKEBRACK 128
43#define XENUMMASK 127
44
39e02b42
JH
45#ifdef USE_UTF8_SCRIPTS
46# define UTF (!IN_BYTES)
2b9d42f0 47#else
746b446a 48# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 49#endif
a0ed51b3 50
61f0cdd9 51/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
52 * 1999-02-27 mjd-perl-patch@plover.com */
53#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
54
bf4acbe4
GS
55/* On MacOS, respect nonbreaking spaces */
56#ifdef MACOS_TRADITIONAL
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
58#else
59#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60#endif
61
ffb4593c
NT
62/* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
79072805
LW
64 * can get by with a single comparison (if the compiler is smart enough).
65 */
66
fb73857a
PP
67/* #define LEX_NOTPARSING 11 is done in perl.h. */
68
55497cff
PP
69#define LEX_NORMAL 10
70#define LEX_INTERPNORMAL 9
71#define LEX_INTERPCASEMOD 8
72#define LEX_INTERPPUSH 7
73#define LEX_INTERPSTART 6
74#define LEX_INTERPEND 5
75#define LEX_INTERPENDMAYBE 4
76#define LEX_INTERPCONCAT 3
77#define LEX_INTERPCONST 2
78#define LEX_FORMLINE 1
79#define LEX_KNOWNEXT 0
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 139# define REPORT(retval) tokereport(s,(int)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
376fcdbf
AL
170#define UNI2(f,x) { \
171 yylval.ival = f; \
172 PL_expect = x; \
173 PL_bufptr = s; \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
176 if (*s == '(') \
177 return REPORT( (int)FUNC1 ); \
178 s = skipspace(s); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
180 }
6f33ba73
RGS
181#define UNI(f) UNI2(f,XTERM)
182#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 183
376fcdbf
AL
184#define UNIBRACK(f) { \
185 yylval.ival = f; \
186 PL_bufptr = s; \
187 PL_last_uni = PL_oldbufptr; \
188 if (*s == '(') \
189 return REPORT( (int)FUNC1 ); \
190 s = skipspace(s); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
192 }
79072805 193
9f68db38 194/* grandfather return to old style */
3280af22 195#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 196
8fa7f367
JH
197#ifdef DEBUGGING
198
bbf60fe6
DM
199/* how to interpret the yylval associated with the token */
200enum token_type {
201 TOKENTYPE_NONE,
202 TOKENTYPE_IVAL,
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
204 TOKENTYPE_PVAL,
205 TOKENTYPE_OPVAL,
206 TOKENTYPE_GVVAL
207};
208
27da23d5
JH
209static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
9041c2e3 211{
bbf60fe6
DM
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DO, TOKENTYPE_NONE, "DO" },
223 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
224 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
225 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
226 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
227 { ELSE, TOKENTYPE_NONE, "ELSE" },
228 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
229 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
230 { FOR, TOKENTYPE_IVAL, "FOR" },
231 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
232 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
233 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
234 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
235 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
236 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
237 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
238 { IF, TOKENTYPE_IVAL, "IF" },
239 { LABEL, TOKENTYPE_PVAL, "LABEL" },
240 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
241 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
242 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
243 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
244 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
245 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
246 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
247 { MY, TOKENTYPE_IVAL, "MY" },
248 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
249 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
250 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
251 { OROP, TOKENTYPE_IVAL, "OROP" },
252 { OROR, TOKENTYPE_NONE, "OROR" },
253 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
254 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
255 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
256 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
257 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
258 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
259 { PREINC, TOKENTYPE_NONE, "PREINC" },
260 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
261 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
262 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
263 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
264 { SUB, TOKENTYPE_NONE, "SUB" },
265 { THING, TOKENTYPE_OPVAL, "THING" },
266 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
267 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
268 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
269 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
270 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
271 { USE, TOKENTYPE_IVAL, "USE" },
272 { WHILE, TOKENTYPE_IVAL, "WHILE" },
273 { WORD, TOKENTYPE_OPVAL, "WORD" },
274 { 0, TOKENTYPE_NONE, 0 }
275};
276
277/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 278
bbf60fe6 279STATIC int
bfed75c6 280S_tokereport(pTHX_ const char* s, I32 rv)
bbf60fe6
DM
281{
282 if (DEBUG_T_TEST) {
bfed75c6 283 const char *name = Nullch;
bbf60fe6 284 enum token_type type = TOKENTYPE_NONE;
f54cb97a 285 const struct debug_tokens *p;
9d4ba2ae 286 SV* const report = newSVpvn("<== ", 4);
bbf60fe6 287
f54cb97a 288 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
289 if (p->token == (int)rv) {
290 name = p->name;
291 type = p->type;
292 break;
293 }
294 }
295 if (name)
54667de8 296 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
297 else if ((char)rv > ' ' && (char)rv < '~')
298 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
299 else if (!rv)
54667de8 300 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
301 else
302 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
303 switch (type) {
304 case TOKENTYPE_NONE:
305 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
306 break;
307 case TOKENTYPE_IVAL:
e4584336 308 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
309 break;
310 case TOKENTYPE_OPNUM:
311 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
312 PL_op_name[yylval.ival]);
313 break;
314 case TOKENTYPE_PVAL:
315 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
316 break;
317 case TOKENTYPE_OPVAL:
401441c0
RGS
318 if (yylval.opval)
319 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 320 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
321 else
322 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
323 break;
324 }
e4584336 325 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
998054bd
SC
326 if (s - PL_bufptr > 0)
327 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
328 else {
329 if (PL_oldbufptr && *PL_oldbufptr)
330 sv_catpv(report, PL_tokenbuf);
331 }
cfd0369c 332 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen_const(report));
bbf60fe6
DM
333 };
334 return (int)rv;
998054bd
SC
335}
336
8fa7f367
JH
337#endif
338
ffb4593c
NT
339/*
340 * S_ao
341 *
c963b151
BD
342 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
343 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
344 */
345
76e3520e 346STATIC int
cea2e8a9 347S_ao(pTHX_ int toketype)
a0d0e21e 348{
3280af22
NIS
349 if (*PL_bufptr == '=') {
350 PL_bufptr++;
a0d0e21e
LW
351 if (toketype == ANDAND)
352 yylval.ival = OP_ANDASSIGN;
353 else if (toketype == OROR)
354 yylval.ival = OP_ORASSIGN;
c963b151
BD
355 else if (toketype == DORDOR)
356 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
357 toketype = ASSIGNOP;
358 }
359 return toketype;
360}
361
ffb4593c
NT
362/*
363 * S_no_op
364 * When Perl expects an operator and finds something else, no_op
365 * prints the warning. It always prints "<something> found where
366 * operator expected. It prints "Missing semicolon on previous line?"
367 * if the surprise occurs at the start of the line. "do you need to
368 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
369 * where the compiler doesn't know if foo is a method call or a function.
370 * It prints "Missing operator before end of line" if there's nothing
371 * after the missing operator, or "... before <...>" if there is something
372 * after the missing operator.
373 */
374
76e3520e 375STATIC void
bfed75c6 376S_no_op(pTHX_ const char *what, char *s)
463ee0b2 377{
9d4ba2ae
AL
378 char * const oldbp = PL_bufptr;
379 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 380
1189a94a
GS
381 if (!s)
382 s = oldbp;
07c798fb 383 else
1189a94a 384 PL_bufptr = s;
cea2e8a9 385 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
386 if (ckWARN_d(WARN_SYNTAX)) {
387 if (is_first)
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Missing semicolon on previous line?)\n");
390 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 391 const char *t;
56da5a46
RGS
392 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
393 if (t < PL_bufptr && isSPACE(*t))
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Do you need to predeclare %.*s?)\n",
396 t - PL_oldoldbufptr, PL_oldoldbufptr);
397 }
398 else {
399 assert(s >= oldbp);
400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
401 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
402 }
07c798fb 403 }
3280af22 404 PL_bufptr = oldbp;
8990e307
LW
405}
406
ffb4593c
NT
407/*
408 * S_missingterm
409 * Complain about missing quote/regexp/heredoc terminator.
410 * If it's called with (char *)NULL then it cauterizes the line buffer.
411 * If we're in a delimited string and the delimiter is a control
412 * character, it's reformatted into a two-char sequence like ^C.
413 * This is fatal.
414 */
415
76e3520e 416STATIC void
cea2e8a9 417S_missingterm(pTHX_ char *s)
8990e307
LW
418{
419 char tmpbuf[3];
420 char q;
421 if (s) {
9d4ba2ae 422 char * const nl = strrchr(s,'\n');
d2719217 423 if (nl)
8990e307
LW
424 *nl = '\0';
425 }
9d116dd7
JH
426 else if (
427#ifdef EBCDIC
428 iscntrl(PL_multi_close)
429#else
430 PL_multi_close < 32 || PL_multi_close == 127
431#endif
432 ) {
8990e307 433 *tmpbuf = '^';
3280af22 434 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
435 tmpbuf[2] = '\0';
436 s = tmpbuf;
437 }
438 else {
eb160463 439 *tmpbuf = (char)PL_multi_close;
8990e307
LW
440 tmpbuf[1] = '\0';
441 s = tmpbuf;
442 }
443 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 444 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 445}
79072805 446
ffb4593c
NT
447/*
448 * Perl_deprecate
ffb4593c
NT
449 */
450
79072805 451void
bfed75c6 452Perl_deprecate(pTHX_ const char *s)
a0d0e21e 453{
599cee73 454 if (ckWARN(WARN_DEPRECATED))
9014280d 455 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
456}
457
12bcd1a6 458void
bfed75c6 459Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
460{
461 /* This function should NOT be called for any new deprecated warnings */
462 /* Use Perl_deprecate instead */
463 /* */
464 /* It is here to maintain backward compatibility with the pre-5.8 */
465 /* warnings category hierarchy. The "deprecated" category used to */
466 /* live under the "syntax" category. It is now a top-level category */
467 /* in its own right. */
468
469 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 470 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
471 "Use of %s is deprecated", s);
472}
473
ffb4593c
NT
474/*
475 * depcom
9cbb5ea2 476 * Deprecate a comma-less variable list.
ffb4593c
NT
477 */
478
76e3520e 479STATIC void
cea2e8a9 480S_depcom(pTHX)
a0d0e21e 481{
12bcd1a6 482 deprecate_old("comma-less variable list");
a0d0e21e
LW
483}
484
ffb4593c 485/*
9cbb5ea2
GS
486 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
487 * utf16-to-utf8-reversed.
ffb4593c
NT
488 */
489
c39cd008
GS
490#ifdef PERL_CR_FILTER
491static void
492strip_return(SV *sv)
493{
95a20fc0 494 register const char *s = SvPVX_const(sv);
9d4ba2ae 495 register const char * const e = s + SvCUR(sv);
c39cd008
GS
496 /* outer loop optimized to do nothing if there are no CR-LFs */
497 while (s < e) {
498 if (*s++ == '\r' && *s == '\n') {
499 /* hit a CR-LF, need to copy the rest */
500 register char *d = s - 1;
501 *d++ = *s++;
502 while (s < e) {
503 if (*s == '\r' && s[1] == '\n')
504 s++;
505 *d++ = *s++;
506 }
507 SvCUR(sv) -= s - d;
508 return;
509 }
510 }
511}
a868473f 512
76e3520e 513STATIC I32
c39cd008 514S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 515{
f54cb97a 516 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
517 if (count > 0 && !maxlen)
518 strip_return(sv);
519 return count;
a868473f
NIS
520}
521#endif
522
ffb4593c
NT
523/*
524 * Perl_lex_start
9cbb5ea2
GS
525 * Initialize variables. Uses the Perl save_stack to save its state (for
526 * recursive calls to the parser).
ffb4593c
NT
527 */
528
a0d0e21e 529void
864dbfa3 530Perl_lex_start(pTHX_ SV *line)
79072805 531{
cfd0369c 532 const char *s;
8990e307
LW
533 STRLEN len;
534
3280af22
NIS
535 SAVEI32(PL_lex_dojoin);
536 SAVEI32(PL_lex_brackets);
3280af22
NIS
537 SAVEI32(PL_lex_casemods);
538 SAVEI32(PL_lex_starts);
539 SAVEI32(PL_lex_state);
7766f137 540 SAVEVPTR(PL_lex_inpat);
3280af22 541 SAVEI32(PL_lex_inwhat);
18b09519
GS
542 if (PL_lex_state == LEX_KNOWNEXT) {
543 I32 toke = PL_nexttoke;
544 while (--toke >= 0) {
545 SAVEI32(PL_nexttype[toke]);
546 SAVEVPTR(PL_nextval[toke]);
547 }
548 SAVEI32(PL_nexttoke);
18b09519 549 }
57843af0 550 SAVECOPLINE(PL_curcop);
3280af22
NIS
551 SAVEPPTR(PL_bufptr);
552 SAVEPPTR(PL_bufend);
553 SAVEPPTR(PL_oldbufptr);
554 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
555 SAVEPPTR(PL_last_lop);
556 SAVEPPTR(PL_last_uni);
3280af22
NIS
557 SAVEPPTR(PL_linestart);
558 SAVESPTR(PL_linestr);
8edd5f42
RGS
559 SAVEGENERICPV(PL_lex_brackstack);
560 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 561 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
562 SAVESPTR(PL_lex_stuff);
563 SAVEI32(PL_lex_defer);
09bef843 564 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 565 SAVESPTR(PL_lex_repl);
bebdddfc
GS
566 SAVEINT(PL_expect);
567 SAVEINT(PL_lex_expect);
3280af22
NIS
568
569 PL_lex_state = LEX_NORMAL;
570 PL_lex_defer = 0;
571 PL_expect = XSTATE;
572 PL_lex_brackets = 0;
a02a5408
JC
573 Newx(PL_lex_brackstack, 120, char);
574 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
575 PL_lex_casemods = 0;
576 *PL_lex_casestack = '\0';
577 PL_lex_dojoin = 0;
578 PL_lex_starts = 0;
579 PL_lex_stuff = Nullsv;
580 PL_lex_repl = Nullsv;
581 PL_lex_inpat = 0;
76be56bc 582 PL_nexttoke = 0;
3280af22 583 PL_lex_inwhat = 0;
09bef843 584 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
585 PL_linestr = line;
586 if (SvREADONLY(PL_linestr))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 588 s = SvPV_const(PL_linestr, len);
6f27f9a7 589 if (!len || s[len-1] != ';') {
3280af22
NIS
590 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
591 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
592 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 593 }
3280af22
NIS
594 SvTEMP_off(PL_linestr);
595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 597 PL_last_lop = PL_last_uni = Nullch;
3280af22 598 PL_rsfp = 0;
79072805 599}
a687059c 600
ffb4593c
NT
601/*
602 * Perl_lex_end
9cbb5ea2
GS
603 * Finalizer for lexing operations. Must be called when the parser is
604 * done with the lexer.
ffb4593c
NT
605 */
606
463ee0b2 607void
864dbfa3 608Perl_lex_end(pTHX)
463ee0b2 609{
3280af22 610 PL_doextract = FALSE;
463ee0b2
LW
611}
612
ffb4593c
NT
613/*
614 * S_incline
615 * This subroutine has nothing to do with tilting, whether at windmills
616 * or pinball tables. Its name is short for "increment line". It
57843af0 617 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 618 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
619 * # line 500 "foo.pm"
620 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
621 */
622
76e3520e 623STATIC void
cea2e8a9 624S_incline(pTHX_ char *s)
463ee0b2
LW
625{
626 char *t;
627 char *n;
73659bf1 628 char *e;
463ee0b2 629 char ch;
463ee0b2 630
57843af0 631 CopLINE_inc(PL_curcop);
463ee0b2
LW
632 if (*s++ != '#')
633 return;
bf4acbe4 634 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
635 if (strnEQ(s, "line", 4))
636 s += 4;
637 else
638 return;
084592ab 639 if (SPACE_OR_TAB(*s))
73659bf1 640 s++;
4e553d73 641 else
73659bf1 642 return;
bf4acbe4 643 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
644 if (!isDIGIT(*s))
645 return;
646 n = s;
647 while (isDIGIT(*s))
648 s++;
bf4acbe4 649 while (SPACE_OR_TAB(*s))
463ee0b2 650 s++;
73659bf1 651 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 652 s++;
73659bf1
GS
653 e = t + 1;
654 }
463ee0b2 655 else {
463ee0b2 656 for (t = s; !isSPACE(*t); t++) ;
73659bf1 657 e = t;
463ee0b2 658 }
bf4acbe4 659 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
660 e++;
661 if (*e != '\n' && *e != '\0')
662 return; /* false alarm */
663
463ee0b2
LW
664 ch = *t;
665 *t = '\0';
f4dd75d9 666 if (t - s > 0) {
8a5ee598 667#ifndef USE_ITHREADS
e66cf94c
RGS
668 const char *cf = CopFILE(PL_curcop);
669 if (cf && strlen(cf) > 7 && strnEQ(cf, "(eval ", 6)) {
670 /* must copy *{"::_<(eval N)[oldfilename:L]"}
671 * to *{"::_<newfilename"} */
672 char smallbuf[256], smallbuf2[256];
673 char *tmpbuf, *tmpbuf2;
8a5ee598 674 GV **gvp, *gv2;
e66cf94c
RGS
675 STRLEN tmplen = strlen(cf);
676 STRLEN tmplen2 = strlen(s);
677 if (tmplen + 3 < sizeof smallbuf)
678 tmpbuf = smallbuf;
679 else
680 Newx(tmpbuf, tmplen + 3, char);
681 if (tmplen2 + 3 < sizeof smallbuf2)
682 tmpbuf2 = smallbuf2;
683 else
684 Newx(tmpbuf2, tmplen2 + 3, char);
685 tmpbuf[0] = tmpbuf2[0] = '_';
686 tmpbuf[1] = tmpbuf2[1] = '<';
687 memcpy(tmpbuf + 2, cf, ++tmplen);
688 memcpy(tmpbuf2 + 2, s, ++tmplen2);
689 ++tmplen; ++tmplen2;
8a5ee598
RGS
690 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
691 if (gvp) {
692 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
693 if (!isGV(gv2))
694 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
695 /* adjust ${"::_<newfilename"} to store the new file name */
696 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
697 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
698 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
699 }
e66cf94c
RGS
700 if (tmpbuf != smallbuf) Safefree(tmpbuf);
701 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
702 }
8a5ee598 703#endif
05ec9bb3 704 CopFILE_free(PL_curcop);
57843af0 705 CopFILE_set(PL_curcop, s);
f4dd75d9 706 }
463ee0b2 707 *t = ch;
57843af0 708 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
709}
710
ffb4593c
NT
711/*
712 * S_skipspace
713 * Called to gobble the appropriate amount and type of whitespace.
714 * Skips comments as well.
715 */
716
76e3520e 717STATIC char *
cea2e8a9 718S_skipspace(pTHX_ register char *s)
a687059c 719{
3280af22 720 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 721 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
722 s++;
723 return s;
724 }
725 for (;;) {
fd049845 726 STRLEN prevlen;
09bef843 727 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 728 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
729 while (s < PL_bufend && isSPACE(*s)) {
730 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
731 incline(s);
732 }
ffb4593c
NT
733
734 /* comment */
3280af22
NIS
735 if (s < PL_bufend && *s == '#') {
736 while (s < PL_bufend && *s != '\n')
463ee0b2 737 s++;
60e6418e 738 if (s < PL_bufend) {
463ee0b2 739 s++;
60e6418e
GS
740 if (PL_in_eval && !PL_rsfp) {
741 incline(s);
742 continue;
743 }
744 }
463ee0b2 745 }
ffb4593c
NT
746
747 /* only continue to recharge the buffer if we're at the end
748 * of the buffer, we're not reading from a source filter, and
749 * we're in normal lexing mode
750 */
09bef843
SB
751 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
752 PL_lex_state == LEX_FORMLINE)
463ee0b2 753 return s;
ffb4593c
NT
754
755 /* try to recharge the buffer */
9cbb5ea2
GS
756 if ((s = filter_gets(PL_linestr, PL_rsfp,
757 (prevlen = SvCUR(PL_linestr)))) == Nullch)
758 {
759 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
760 if (PL_minus_p) {
761 sv_setpv(PL_linestr,
762 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 763 PL_minus_n = PL_minus_p = 0;
a0d0e21e 764 }
01a19ab0
NC
765 else if (PL_minus_n) {
766 sv_setpvn(PL_linestr, ";}", 2);
767 PL_minus_n = 0;
768 }
a0d0e21e 769 else
4147a61b 770 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
771
772 /* reset variables for next time we lex */
9cbb5ea2
GS
773 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
774 = SvPVX(PL_linestr);
3280af22 775 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 776 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
777
778 /* Close the filehandle. Could be from -P preprocessor,
779 * STDIN, or a regular file. If we were reading code from
780 * STDIN (because the commandline held no -e or filename)
781 * then we don't close it, we reset it so the code can
782 * read from STDIN too.
783 */
784
3280af22
NIS
785 if (PL_preprocess && !PL_in_eval)
786 (void)PerlProc_pclose(PL_rsfp);
787 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
788 PerlIO_clearerr(PL_rsfp);
8990e307 789 else
3280af22
NIS
790 (void)PerlIO_close(PL_rsfp);
791 PL_rsfp = Nullfp;
463ee0b2
LW
792 return s;
793 }
ffb4593c
NT
794
795 /* not at end of file, so we only read another line */
09bef843
SB
796 /* make corresponding updates to old pointers, for yyerror() */
797 oldprevlen = PL_oldbufptr - PL_bufend;
798 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
799 if (PL_last_uni)
800 oldunilen = PL_last_uni - PL_bufend;
801 if (PL_last_lop)
802 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
803 PL_linestart = PL_bufptr = s + prevlen;
804 PL_bufend = s + SvCUR(PL_linestr);
805 s = PL_bufptr;
09bef843
SB
806 PL_oldbufptr = s + oldprevlen;
807 PL_oldoldbufptr = s + oldoldprevlen;
808 if (PL_last_uni)
809 PL_last_uni = s + oldunilen;
810 if (PL_last_lop)
811 PL_last_lop = s + oldloplen;
a0d0e21e 812 incline(s);
ffb4593c
NT
813
814 /* debugger active and we're not compiling the debugger code,
815 * so store the line into the debugger's array of lines
816 */
3280af22 817 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 818 SV * const sv = NEWSV(85,0);
8990e307
LW
819
820 sv_upgrade(sv, SVt_PVMG);
3280af22 821 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 822 (void)SvIOK_on(sv);
45977657 823 SvIV_set(sv, 0);
57843af0 824 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 825 }
463ee0b2 826 }
a687059c 827}
378cc40b 828
ffb4593c
NT
829/*
830 * S_check_uni
831 * Check the unary operators to ensure there's no ambiguity in how they're
832 * used. An ambiguous piece of code would be:
833 * rand + 5
834 * This doesn't mean rand() + 5. Because rand() is a unary operator,
835 * the +5 is its argument.
836 */
837
76e3520e 838STATIC void
cea2e8a9 839S_check_uni(pTHX)
ba106d47 840{
2f3197b3 841 char *s;
a0d0e21e 842 char *t;
2f3197b3 843
3280af22 844 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 845 return;
3280af22
NIS
846 while (isSPACE(*PL_last_uni))
847 PL_last_uni++;
7e2040f0 848 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 849 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 850 return;
0453d815 851 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 852 const char ch = *s;
0453d815 853 *s = '\0';
9014280d 854 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 855 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
856 PL_last_uni);
857 *s = ch;
858 }
2f3197b3
LW
859}
860
ffb4593c
NT
861/*
862 * LOP : macro to build a list operator. Its behaviour has been replaced
863 * with a subroutine, S_lop() for which LOP is just another name.
864 */
865
a0d0e21e
LW
866#define LOP(f,x) return lop(f,x,s)
867
ffb4593c
NT
868/*
869 * S_lop
870 * Build a list operator (or something that might be one). The rules:
871 * - if we have a next token, then it's a list operator [why?]
872 * - if the next thing is an opening paren, then it's a function
873 * - else it's a list operator
874 */
875
76e3520e 876STATIC I32
a0be28da 877S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 878{
79072805 879 yylval.ival = f;
35c8bce7 880 CLINE;
3280af22
NIS
881 PL_expect = x;
882 PL_bufptr = s;
883 PL_last_lop = PL_oldbufptr;
eb160463 884 PL_last_lop_op = (OPCODE)f;
3280af22 885 if (PL_nexttoke)
bbf60fe6 886 return REPORT(LSTOP);
79072805 887 if (*s == '(')
bbf60fe6 888 return REPORT(FUNC);
79072805
LW
889 s = skipspace(s);
890 if (*s == '(')
bbf60fe6 891 return REPORT(FUNC);
79072805 892 else
bbf60fe6 893 return REPORT(LSTOP);
79072805
LW
894}
895
ffb4593c
NT
896/*
897 * S_force_next
9cbb5ea2 898 * When the lexer realizes it knows the next token (for instance,
ffb4593c 899 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
900 * to know what token to return the next time the lexer is called. Caller
901 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
902 * handles the token correctly.
ffb4593c
NT
903 */
904
4e553d73 905STATIC void
cea2e8a9 906S_force_next(pTHX_ I32 type)
79072805 907{
3280af22
NIS
908 PL_nexttype[PL_nexttoke] = type;
909 PL_nexttoke++;
910 if (PL_lex_state != LEX_KNOWNEXT) {
911 PL_lex_defer = PL_lex_state;
912 PL_lex_expect = PL_expect;
913 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
914 }
915}
916
d0a148a6
NC
917STATIC SV *
918S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
919{
9d4ba2ae 920 SV * const sv = newSVpvn(start,len);
bfed75c6 921 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
922 SvUTF8_on(sv);
923 return sv;
924}
925
ffb4593c
NT
926/*
927 * S_force_word
928 * When the lexer knows the next thing is a word (for instance, it has
929 * just seen -> and it knows that the next char is a word char, then
930 * it calls S_force_word to stick the next word into the PL_next lookahead.
931 *
932 * Arguments:
b1b65b59 933 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
934 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
935 * int check_keyword : if true, Perl checks to make sure the word isn't
936 * a keyword (do this if the word is a label, e.g. goto FOO)
937 * int allow_pack : if true, : characters will also be allowed (require,
938 * use, etc. do this)
9cbb5ea2 939 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
940 */
941
76e3520e 942STATIC char *
cea2e8a9 943S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 944{
463ee0b2
LW
945 register char *s;
946 STRLEN len;
4e553d73 947
463ee0b2
LW
948 start = skipspace(start);
949 s = start;
7e2040f0 950 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 951 (allow_pack && *s == ':') ||
15f0808c 952 (allow_initial_tick && *s == '\'') )
a0d0e21e 953 {
3280af22
NIS
954 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
955 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
956 return start;
957 if (token == METHOD) {
958 s = skipspace(s);
959 if (*s == '(')
3280af22 960 PL_expect = XTERM;
463ee0b2 961 else {
3280af22 962 PL_expect = XOPERATOR;
463ee0b2 963 }
79072805 964 }
d0a148a6
NC
965 PL_nextval[PL_nexttoke].opval
966 = (OP*)newSVOP(OP_CONST,0,
967 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 968 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
969 force_next(token);
970 }
971 return s;
972}
973
ffb4593c
NT
974/*
975 * S_force_ident
9cbb5ea2 976 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
977 * text only contains the "foo" portion. The first argument is a pointer
978 * to the "foo", and the second argument is the type symbol to prefix.
979 * Forces the next token to be a "WORD".
9cbb5ea2 980 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
981 */
982
76e3520e 983STATIC void
bfed75c6 984S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
985{
986 if (s && *s) {
bfed75c6 987 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 988 PL_nextval[PL_nexttoke].opval = o;
79072805 989 force_next(WORD);
748a9306 990 if (kind) {
11343788 991 o->op_private = OPpCONST_ENTERED;
55497cff
PP
992 /* XXX see note in pp_entereval() for why we forgo typo
993 warnings if the symbol must be introduced in an eval.
994 GSAR 96-10-12 */
3280af22 995 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
996 kind == '$' ? SVt_PV :
997 kind == '@' ? SVt_PVAV :
998 kind == '%' ? SVt_PVHV :
999 SVt_PVGV
1000 );
748a9306 1001 }
79072805
LW
1002 }
1003}
1004
1571675a
GS
1005NV
1006Perl_str_to_version(pTHX_ SV *sv)
1007{
1008 NV retval = 0.0;
1009 NV nshift = 1.0;
1010 STRLEN len;
cfd0369c 1011 const char *start = SvPV_const(sv,len);
9d4ba2ae 1012 const char * const end = start + len;
504618e9 1013 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1014 while (start < end) {
ba210ebe 1015 STRLEN skip;
1571675a
GS
1016 UV n;
1017 if (utf)
9041c2e3 1018 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1019 else {
1020 n = *(U8*)start;
1021 skip = 1;
1022 }
1023 retval += ((NV)n)/nshift;
1024 start += skip;
1025 nshift *= 1000;
1026 }
1027 return retval;
1028}
1029
4e553d73 1030/*
ffb4593c
NT
1031 * S_force_version
1032 * Forces the next token to be a version number.
e759cc13
RGS
1033 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1034 * and if "guessing" is TRUE, then no new token is created (and the caller
1035 * must use an alternative parsing method).
ffb4593c
NT
1036 */
1037
76e3520e 1038STATIC char *
e759cc13 1039S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
1040{
1041 OP *version = Nullop;
44dcb63b 1042 char *d;
89bfa8cd
PP
1043
1044 s = skipspace(s);
1045
44dcb63b 1046 d = s;
dd629d5b 1047 if (*d == 'v')
44dcb63b 1048 d++;
44dcb63b 1049 if (isDIGIT(*d)) {
e759cc13
RGS
1050 while (isDIGIT(*d) || *d == '_' || *d == '.')
1051 d++;
9f3d182e 1052 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1053 SV *ver;
b73d6f50 1054 s = scan_num(s, &yylval);
89bfa8cd 1055 version = yylval.opval;
dd629d5b
GS
1056 ver = cSVOPx(version)->op_sv;
1057 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1058 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1059 SvNV_set(ver, str_to_version(ver));
1571675a 1060 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1061 }
89bfa8cd 1062 }
e759cc13
RGS
1063 else if (guessing)
1064 return s;
89bfa8cd
PP
1065 }
1066
1067 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1068 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1069 force_next(WORD);
89bfa8cd 1070
e759cc13 1071 return s;
89bfa8cd
PP
1072}
1073
ffb4593c
NT
1074/*
1075 * S_tokeq
1076 * Tokenize a quoted string passed in as an SV. It finds the next
1077 * chunk, up to end of string or a backslash. It may make a new
1078 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1079 * turns \\ into \.
1080 */
1081
76e3520e 1082STATIC SV *
cea2e8a9 1083S_tokeq(pTHX_ SV *sv)
79072805
LW
1084{
1085 register char *s;
1086 register char *send;
1087 register char *d;
b3ac6de7
IZ
1088 STRLEN len = 0;
1089 SV *pv = sv;
79072805
LW
1090
1091 if (!SvLEN(sv))
b3ac6de7 1092 goto finish;
79072805 1093
a0d0e21e 1094 s = SvPV_force(sv, len);
21a311ee 1095 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1096 goto finish;
463ee0b2 1097 send = s + len;
79072805
LW
1098 while (s < send && *s != '\\')
1099 s++;
1100 if (s == send)
b3ac6de7 1101 goto finish;
79072805 1102 d = s;
be4731d2 1103 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1104 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1105 if (SvUTF8(sv))
1106 SvUTF8_on(pv);
1107 }
79072805
LW
1108 while (s < send) {
1109 if (*s == '\\') {
a0d0e21e 1110 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1111 s++; /* all that, just for this */
1112 }
1113 *d++ = *s++;
1114 }
1115 *d = '\0';
95a20fc0 1116 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1117 finish:
3280af22 1118 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1119 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1120 return sv;
1121}
1122
ffb4593c
NT
1123/*
1124 * Now come three functions related to double-quote context,
1125 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1126 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1127 * interact with PL_lex_state, and create fake ( ... ) argument lists
1128 * to handle functions and concatenation.
1129 * They assume that whoever calls them will be setting up a fake
1130 * join call, because each subthing puts a ',' after it. This lets
1131 * "lower \luPpEr"
1132 * become
1133 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1134 *
1135 * (I'm not sure whether the spurious commas at the end of lcfirst's
1136 * arguments and join's arguments are created or not).
1137 */
1138
1139/*
1140 * S_sublex_start
1141 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1142 *
1143 * Pattern matching will set PL_lex_op to the pattern-matching op to
1144 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1145 *
1146 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1147 *
1148 * Everything else becomes a FUNC.
1149 *
1150 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1151 * had an OP_CONST or OP_READLINE). This just sets us up for a
1152 * call to S_sublex_push().
1153 */
1154
76e3520e 1155STATIC I32
cea2e8a9 1156S_sublex_start(pTHX)
79072805 1157{
f54cb97a 1158 const register I32 op_type = yylval.ival;
79072805
LW
1159
1160 if (op_type == OP_NULL) {
3280af22
NIS
1161 yylval.opval = PL_lex_op;
1162 PL_lex_op = Nullop;
79072805
LW
1163 return THING;
1164 }
1165 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1166 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1167
1168 if (SvTYPE(sv) == SVt_PVIV) {
1169 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1170 STRLEN len;
cfd0369c 1171 const char *p = SvPV_const(sv, len);
f54cb97a 1172 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1173 if (SvUTF8(sv))
1174 SvUTF8_on(nsv);
b3ac6de7
IZ
1175 SvREFCNT_dec(sv);
1176 sv = nsv;
4e553d73 1177 }
b3ac6de7 1178 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1179 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1180 /* Allow <FH> // "foo" */
1181 if (op_type == OP_READLINE)
1182 PL_expect = XTERMORDORDOR;
79072805
LW
1183 return THING;
1184 }
1185
3280af22
NIS
1186 PL_sublex_info.super_state = PL_lex_state;
1187 PL_sublex_info.sub_inwhat = op_type;
1188 PL_sublex_info.sub_op = PL_lex_op;
1189 PL_lex_state = LEX_INTERPPUSH;
55497cff 1190
3280af22
NIS
1191 PL_expect = XTERM;
1192 if (PL_lex_op) {
1193 yylval.opval = PL_lex_op;
1194 PL_lex_op = Nullop;
55497cff
PP
1195 return PMFUNC;
1196 }
1197 else
1198 return FUNC;
1199}
1200
ffb4593c
NT
1201/*
1202 * S_sublex_push
1203 * Create a new scope to save the lexing state. The scope will be
1204 * ended in S_sublex_done. Returns a '(', starting the function arguments
1205 * to the uc, lc, etc. found before.
1206 * Sets PL_lex_state to LEX_INTERPCONCAT.
1207 */
1208
76e3520e 1209STATIC I32
cea2e8a9 1210S_sublex_push(pTHX)
55497cff 1211{
27da23d5 1212 dVAR;
f46d017c 1213 ENTER;
55497cff 1214
3280af22
NIS
1215 PL_lex_state = PL_sublex_info.super_state;
1216 SAVEI32(PL_lex_dojoin);
1217 SAVEI32(PL_lex_brackets);
3280af22
NIS
1218 SAVEI32(PL_lex_casemods);
1219 SAVEI32(PL_lex_starts);
1220 SAVEI32(PL_lex_state);
7766f137 1221 SAVEVPTR(PL_lex_inpat);
3280af22 1222 SAVEI32(PL_lex_inwhat);
57843af0 1223 SAVECOPLINE(PL_curcop);
3280af22 1224 SAVEPPTR(PL_bufptr);
8452ff4b 1225 SAVEPPTR(PL_bufend);
3280af22
NIS
1226 SAVEPPTR(PL_oldbufptr);
1227 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1228 SAVEPPTR(PL_last_lop);
1229 SAVEPPTR(PL_last_uni);
3280af22
NIS
1230 SAVEPPTR(PL_linestart);
1231 SAVESPTR(PL_linestr);
8edd5f42
RGS
1232 SAVEGENERICPV(PL_lex_brackstack);
1233 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1234
1235 PL_linestr = PL_lex_stuff;
1236 PL_lex_stuff = Nullsv;
1237
9cbb5ea2
GS
1238 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1239 = SvPVX(PL_linestr);
3280af22 1240 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1241 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1242 SAVEFREESV(PL_linestr);
1243
1244 PL_lex_dojoin = FALSE;
1245 PL_lex_brackets = 0;
a02a5408
JC
1246 Newx(PL_lex_brackstack, 120, char);
1247 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1248 PL_lex_casemods = 0;
1249 *PL_lex_casestack = '\0';
1250 PL_lex_starts = 0;
1251 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1252 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1253
1254 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1255 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1256 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1257 else
3280af22 1258 PL_lex_inpat = Nullop;
79072805 1259
55497cff 1260 return '(';
79072805
LW
1261}
1262
ffb4593c
NT
1263/*
1264 * S_sublex_done
1265 * Restores lexer state after a S_sublex_push.
1266 */
1267
76e3520e 1268STATIC I32
cea2e8a9 1269S_sublex_done(pTHX)
79072805 1270{
27da23d5 1271 dVAR;
3280af22 1272 if (!PL_lex_starts++) {
9d4ba2ae 1273 SV * const sv = newSVpvn("",0);
9aa983d2
JH
1274 if (SvUTF8(PL_linestr))
1275 SvUTF8_on(sv);
3280af22 1276 PL_expect = XOPERATOR;
9aa983d2 1277 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1278 return THING;
1279 }
1280
3280af22
NIS
1281 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1282 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1283 return yylex();
79072805
LW
1284 }
1285
ffb4593c 1286 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1287 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1288 PL_linestr = PL_lex_repl;
1289 PL_lex_inpat = 0;
1290 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1291 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1292 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1293 SAVEFREESV(PL_linestr);
1294 PL_lex_dojoin = FALSE;
1295 PL_lex_brackets = 0;
3280af22
NIS
1296 PL_lex_casemods = 0;
1297 *PL_lex_casestack = '\0';
1298 PL_lex_starts = 0;
25da4f38 1299 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1300 PL_lex_state = LEX_INTERPNORMAL;
1301 PL_lex_starts++;
e9fa98b2
HS
1302 /* we don't clear PL_lex_repl here, so that we can check later
1303 whether this is an evalled subst; that means we rely on the
1304 logic to ensure sublex_done() is called again only via the
1305 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1306 }
e9fa98b2 1307 else {
3280af22 1308 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1309 PL_lex_repl = Nullsv;
1310 }
79072805 1311 return ',';
ffed7fef
LW
1312 }
1313 else {
f46d017c 1314 LEAVE;
3280af22
NIS
1315 PL_bufend = SvPVX(PL_linestr);
1316 PL_bufend += SvCUR(PL_linestr);
1317 PL_expect = XOPERATOR;
09bef843 1318 PL_sublex_info.sub_inwhat = 0;
79072805 1319 return ')';
ffed7fef
LW
1320 }
1321}
1322
02aa26ce
NT
1323/*
1324 scan_const
1325
1326 Extracts a pattern, double-quoted string, or transliteration. This
1327 is terrifying code.
1328
3280af22
NIS
1329 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1330 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1331 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1332
9b599b2a
GS
1333 Returns a pointer to the character scanned up to. Iff this is
1334 advanced from the start pointer supplied (ie if anything was
1335 successfully parsed), will leave an OP for the substring scanned
1336 in yylval. Caller must intuit reason for not parsing further
1337 by looking at the next characters herself.
1338
02aa26ce
NT
1339 In patterns:
1340 backslashes:
1341 double-quoted style: \r and \n
1342 regexp special ones: \D \s
1343 constants: \x3
1344 backrefs: \1 (deprecated in substitution replacements)
1345 case and quoting: \U \Q \E
1346 stops on @ and $, but not for $ as tail anchor
1347
1348 In transliterations:
1349 characters are VERY literal, except for - not at the start or end
1350 of the string, which indicates a range. scan_const expands the
1351 range to the full set of intermediate characters.
1352
1353 In double-quoted strings:
1354 backslashes:
1355 double-quoted style: \r and \n
1356 constants: \x3
1357 backrefs: \1 (deprecated)
1358 case and quoting: \U \Q \E
1359 stops on @ and $
1360
1361 scan_const does *not* construct ops to handle interpolated strings.
1362 It stops processing as soon as it finds an embedded $ or @ variable
1363 and leaves it to the caller to work out what's going on.
1364
da6eedaa 1365 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1366
1367 $ in pattern could be $foo or could be tail anchor. Assumption:
1368 it's a tail anchor if $ is the last thing in the string, or if it's
1369 followed by one of ")| \n\t"
1370
1371 \1 (backreferences) are turned into $1
1372
1373 The structure of the code is
1374 while (there's a character to process) {
1375 handle transliteration ranges
1376 skip regexp comments
1377 skip # initiated comments in //x patterns
1378 check for embedded @foo
1379 check for embedded scalars
1380 if (backslash) {
1381 leave intact backslashes from leave (below)
1382 deprecate \1 in strings and sub replacements
1383 handle string-changing backslashes \l \U \Q \E, etc.
1384 switch (what was escaped) {
1385 handle - in a transliteration (becomes a literal -)
1386 handle \132 octal characters
1387 handle 0x15 hex characters
1388 handle \cV (control V)
1389 handle printf backslashes (\f, \r, \n, etc)
1390 } (end switch)
1391 } (end if backslash)
1392 } (end while character to read)
4e553d73 1393
02aa26ce
NT
1394*/
1395
76e3520e 1396STATIC char *
cea2e8a9 1397S_scan_const(pTHX_ char *start)
79072805 1398{
3280af22 1399 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1400 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1401 register char *s = start; /* start of the constant */
1402 register char *d = SvPVX(sv); /* destination for copies */
1403 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1404 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1405 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1406 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1407 UV uv;
4c3a8340
ST
1408#ifdef EBCDIC
1409 UV literal_endpoint = 0;
1410#endif
012bcf8d 1411
dff6d3cd 1412 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1413 PL_lex_inpat
b6d5fef8 1414 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1415 : "";
79072805 1416
2b9d42f0
NIS
1417 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1418 /* If we are doing a trans and we know we want UTF8 set expectation */
1419 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1420 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1421 }
1422
1423
79072805 1424 while (s < send || dorange) {
02aa26ce 1425 /* get transliterations out of the way (they're most literal) */
3280af22 1426 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1427 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1428 if (dorange) {
1ba5c669
JH
1429 I32 i; /* current expanded character */
1430 I32 min; /* first character in range */
1431 I32 max; /* last character in range */
02aa26ce 1432
2b9d42f0 1433 if (has_utf8) {
9d4ba2ae 1434 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1435 char *e = d++;
1436 while (e-- > c)
1437 *(e + 1) = *e;
25716404 1438 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1439 /* mark the range as done, and continue */
1440 dorange = FALSE;
1441 didrange = TRUE;
1442 continue;
1443 }
2b9d42f0 1444
95a20fc0 1445 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1446 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1447 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1448 d -= 2; /* eat the first char and the - */
1449
8ada0baa
JH
1450 min = (U8)*d; /* first char in range */
1451 max = (U8)d[1]; /* last char in range */
1452
c2e66d9e 1453 if (min > max) {
01ec43d0 1454 Perl_croak(aTHX_
d1573ac7 1455 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1456 (char)min, (char)max);
c2e66d9e
GS
1457 }
1458
c7f1f016 1459#ifdef EBCDIC
4c3a8340
ST
1460 if (literal_endpoint == 2 &&
1461 ((isLOWER(min) && isLOWER(max)) ||
1462 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1463 if (isLOWER(min)) {
1464 for (i = min; i <= max; i++)
1465 if (isLOWER(i))
db42d148 1466 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1467 } else {
1468 for (i = min; i <= max; i++)
1469 if (isUPPER(i))
db42d148 1470 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1471 }
1472 }
1473 else
1474#endif
1475 for (i = min; i <= max; i++)
eb160463 1476 *d++ = (char)i;
02aa26ce
NT
1477
1478 /* mark the range as done, and continue */
79072805 1479 dorange = FALSE;
01ec43d0 1480 didrange = TRUE;
4c3a8340
ST
1481#ifdef EBCDIC
1482 literal_endpoint = 0;
1483#endif
79072805 1484 continue;
4e553d73 1485 }
02aa26ce
NT
1486
1487 /* range begins (ignore - as first or last char) */
79072805 1488 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1489 if (didrange) {
1fafa243 1490 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1491 }
2b9d42f0 1492 if (has_utf8) {
25716404 1493 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1494 s++;
1495 continue;
1496 }
79072805
LW
1497 dorange = TRUE;
1498 s++;
01ec43d0
GS
1499 }
1500 else {
1501 didrange = FALSE;
4c3a8340
ST
1502#ifdef EBCDIC
1503 literal_endpoint = 0;
1504#endif
01ec43d0 1505 }
79072805 1506 }
02aa26ce
NT
1507
1508 /* if we get here, we're not doing a transliteration */
1509
0f5d15d6
IZ
1510 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1511 except for the last char, which will be done separately. */
3280af22 1512 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1513 if (s[2] == '#') {
e994fd66 1514 while (s+1 < send && *s != ')')
db42d148 1515 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1516 }
1517 else if (s[2] == '{' /* This should match regcomp.c */
1518 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1519 {
cc6b7395 1520 I32 count = 1;
0f5d15d6 1521 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1522 char c;
1523
d9f97599
GS
1524 while (count && (c = *regparse)) {
1525 if (c == '\\' && regparse[1])
1526 regparse++;
4e553d73 1527 else if (c == '{')
cc6b7395 1528 count++;
4e553d73 1529 else if (c == '}')
cc6b7395 1530 count--;
d9f97599 1531 regparse++;
cc6b7395 1532 }
e994fd66 1533 if (*regparse != ')')
5bdf89e7 1534 regparse--; /* Leave one char for continuation. */
0f5d15d6 1535 while (s < regparse)
db42d148 1536 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1537 }
748a9306 1538 }
02aa26ce
NT
1539
1540 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1541 else if (*s == '#' && PL_lex_inpat &&
1542 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1543 while (s+1 < send && *s != '\n')
db42d148 1544 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1545 }
02aa26ce 1546
5d1d4326 1547 /* check for embedded arrays
da6eedaa 1548 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1549 */
7e2040f0 1550 else if (*s == '@' && s[1]
5d1d4326 1551 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1552 break;
02aa26ce
NT
1553
1554 /* check for embedded scalars. only stop if we're sure it's a
1555 variable.
1556 */
79072805 1557 else if (*s == '$') {
3280af22 1558 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1559 break;
6002328a 1560 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1561 break; /* in regexp, $ might be tail anchor */
1562 }
02aa26ce 1563
2b9d42f0
NIS
1564 /* End of else if chain - OP_TRANS rejoin rest */
1565
02aa26ce 1566 /* backslashes */
79072805
LW
1567 if (*s == '\\' && s+1 < send) {
1568 s++;
02aa26ce
NT
1569
1570 /* some backslashes we leave behind */
c9f97d15 1571 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1572 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1573 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1574 continue;
1575 }
02aa26ce
NT
1576
1577 /* deprecate \1 in strings and substitution replacements */
3280af22 1578 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1579 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1580 {
599cee73 1581 if (ckWARN(WARN_SYNTAX))
9014280d 1582 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1583 *--s = '$';
1584 break;
1585 }
02aa26ce
NT
1586
1587 /* string-change backslash escapes */
3280af22 1588 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1589 --s;
1590 break;
1591 }
02aa26ce
NT
1592
1593 /* if we get here, it's either a quoted -, or a digit */
79072805 1594 switch (*s) {
02aa26ce
NT
1595
1596 /* quoted - in transliterations */
79072805 1597 case '-':
3280af22 1598 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1599 *d++ = *s++;
1600 continue;
1601 }
1602 /* FALL THROUGH */
1603 default:
11b8faa4 1604 {
041457d9
DM
1605 if (isALNUM(*s) &&
1606 *s != '_' &&
1607 ckWARN(WARN_MISC))
9014280d 1608 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1609 "Unrecognized escape \\%c passed through",
1610 *s);
1611 /* default action is to copy the quoted character */
f9a63242 1612 goto default_action;
11b8faa4 1613 }
02aa26ce
NT
1614
1615 /* \132 indicates an octal constant */
79072805
LW
1616 case '0': case '1': case '2': case '3':
1617 case '4': case '5': case '6': case '7':
ba210ebe 1618 {
53305cf1
NC
1619 I32 flags = 0;
1620 STRLEN len = 3;
1621 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1622 s += len;
1623 }
012bcf8d 1624 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1625
1626 /* \x24 indicates a hex constant */
79072805 1627 case 'x':
a0ed51b3
LW
1628 ++s;
1629 if (*s == '{') {
9d4ba2ae 1630 char* const e = strchr(s, '}');
a4c04bdc
NC
1631 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1632 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1633 STRLEN len;
355860ce 1634
53305cf1 1635 ++s;
adaeee49 1636 if (!e) {
a0ed51b3 1637 yyerror("Missing right brace on \\x{}");
355860ce 1638 continue;
ba210ebe 1639 }
53305cf1
NC
1640 len = e - s;
1641 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1642 s = e + 1;
a0ed51b3
LW
1643 }
1644 else {
ba210ebe 1645 {
53305cf1 1646 STRLEN len = 2;
a4c04bdc 1647 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1648 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1649 s += len;
1650 }
012bcf8d
GS
1651 }
1652
1653 NUM_ESCAPE_INSERT:
1654 /* Insert oct or hex escaped character.
301d3d20 1655 * There will always enough room in sv since such
db42d148 1656 * escapes will be longer than any UTF-8 sequence
301d3d20 1657 * they can end up as. */
ba7cea30 1658
c7f1f016
NIS
1659 /* We need to map to chars to ASCII before doing the tests
1660 to cover EBCDIC
1661 */
c4d5f83a 1662 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1663 if (!has_utf8 && uv > 255) {
301d3d20
JH
1664 /* Might need to recode whatever we have
1665 * accumulated so far if it contains any
1666 * hibit chars.
1667 *
1668 * (Can't we keep track of that and avoid
1669 * this rescan? --jhi)
012bcf8d 1670 */
c7f1f016 1671 int hicount = 0;
63cd0674
NIS
1672 U8 *c;
1673 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1674 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1675 hicount++;
db42d148 1676 }
012bcf8d 1677 }
63cd0674 1678 if (hicount) {
9d4ba2ae 1679 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1680 U8 *src, *dst;
1681 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1682 src = (U8 *)d - 1;
1683 dst = src+hicount;
1684 d += hicount;
cfd0369c 1685 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1686 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1687 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1688 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1689 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1690 }
1691 else {
63cd0674 1692 *dst-- = *src;
012bcf8d 1693 }
c7f1f016 1694 src--;
012bcf8d
GS
1695 }
1696 }
1697 }
1698
9aa983d2 1699 if (has_utf8 || uv > 255) {
9041c2e3 1700 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1701 has_utf8 = TRUE;
f9a63242
JH
1702 if (PL_lex_inwhat == OP_TRANS &&
1703 PL_sublex_info.sub_op) {
1704 PL_sublex_info.sub_op->op_private |=
1705 (PL_lex_repl ? OPpTRANS_FROM_UTF
1706 : OPpTRANS_TO_UTF);
f9a63242 1707 }
012bcf8d 1708 }
a0ed51b3 1709 else {
012bcf8d 1710 *d++ = (char)uv;
a0ed51b3 1711 }
012bcf8d
GS
1712 }
1713 else {
c4d5f83a 1714 *d++ = (char) uv;
a0ed51b3 1715 }
79072805 1716 continue;
02aa26ce 1717
b239daa5 1718 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1719 case 'N':
55eda711 1720 ++s;
423cee85
JH
1721 if (*s == '{') {
1722 char* e = strchr(s, '}');
155aba94 1723 SV *res;
423cee85 1724 STRLEN len;
cfd0369c 1725 const char *str;
4e553d73 1726
423cee85 1727 if (!e) {
5777a3f7 1728 yyerror("Missing right brace on \\N{}");
423cee85
JH
1729 e = s - 1;
1730 goto cont_scan;
1731 }
dbc0d4f2
JH
1732 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1733 /* \N{U+...} */
1734 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1735 PERL_SCAN_DISALLOW_PREFIX;
1736 s += 3;
1737 len = e - s;
1738 uv = grok_hex(s, &len, &flags, NULL);
1739 s = e + 1;
1740 goto NUM_ESCAPE_INSERT;
1741 }
55eda711
JH
1742 res = newSVpvn(s + 1, e - s - 1);
1743 res = new_constant( Nullch, 0, "charnames",
1744 res, Nullsv, "\\N{...}" );
f9a63242
JH
1745 if (has_utf8)
1746 sv_utf8_upgrade(res);
cfd0369c 1747 str = SvPV_const(res,len);
1c47067b
JH
1748#ifdef EBCDIC_NEVER_MIND
1749 /* charnames uses pack U and that has been
1750 * recently changed to do the below uni->native
1751 * mapping, so this would be redundant (and wrong,
1752 * the code point would be doubly converted).
1753 * But leave this in just in case the pack U change
1754 * gets revoked, but the semantics is still
1755 * desireable for charnames. --jhi */
cddc7ef4 1756 {
cfd0369c 1757 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1758
1759 if (uv < 0x100) {
89ebb4a3 1760 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1761
1762 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1763 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1764 str = SvPV_const(res, len);
cddc7ef4
JH
1765 }
1766 }
1767#endif
89491803 1768 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1769 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1770 SvCUR_set(sv, d - ostart);
1771 SvPOK_on(sv);
e4f3eed8 1772 *d = '\0';
f08d6ad9 1773 sv_utf8_upgrade(sv);
d2f449dd 1774 /* this just broke our allocation above... */
eb160463 1775 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1776 d = SvPVX(sv) + SvCUR(sv);
89491803 1777 has_utf8 = TRUE;
f08d6ad9 1778 }
eb160463 1779 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1780 const char * const odest = SvPVX_const(sv);
423cee85 1781
8973db79 1782 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1783 d = SvPVX(sv) + (d - odest);
1784 }
1785 Copy(str, d, len, char);
1786 d += len;
1787 SvREFCNT_dec(res);
1788 cont_scan:
1789 s = e + 1;
1790 }
1791 else
5777a3f7 1792 yyerror("Missing braces on \\N{}");
423cee85
JH
1793 continue;
1794
02aa26ce 1795 /* \c is a control character */
79072805
LW
1796 case 'c':
1797 s++;
961ce445 1798 if (s < send) {
ba210ebe 1799 U8 c = *s++;
c7f1f016
NIS
1800#ifdef EBCDIC
1801 if (isLOWER(c))
1802 c = toUPPER(c);
1803#endif
db42d148 1804 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1805 }
961ce445
RGS
1806 else {
1807 yyerror("Missing control char name in \\c");
1808 }
79072805 1809 continue;
02aa26ce
NT
1810
1811 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1812 case 'b':
db42d148 1813 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1814 break;
1815 case 'n':
db42d148 1816 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1817 break;
1818 case 'r':
db42d148 1819 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1820 break;
1821 case 'f':
db42d148 1822 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1823 break;
1824 case 't':
db42d148 1825 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1826 break;
34a3fe2a 1827 case 'e':
db42d148 1828 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1829 break;
1830 case 'a':
db42d148 1831 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1832 break;
02aa26ce
NT
1833 } /* end switch */
1834
79072805
LW
1835 s++;
1836 continue;
02aa26ce 1837 } /* end if (backslash) */
4c3a8340
ST
1838#ifdef EBCDIC
1839 else
1840 literal_endpoint++;
1841#endif
02aa26ce 1842
f9a63242 1843 default_action:
2b9d42f0
NIS
1844 /* If we started with encoded form, or already know we want it
1845 and then encode the next character */
1846 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1847 STRLEN len = 1;
9d4ba2ae
AL
1848 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1849 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1850 s += len;
1851 if (need > len) {
1852 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1853 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1854 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1855 }
1856 d = (char*)uvchr_to_utf8((U8*)d, uv);
1857 has_utf8 = TRUE;
1858 }
1859 else {
1860 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1861 }
02aa26ce
NT
1862 } /* while loop to process each character */
1863
1864 /* terminate the string and set up the sv */
79072805 1865 *d = '\0';
95a20fc0 1866 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1867 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1868 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1869
79072805 1870 SvPOK_on(sv);
9f4817db 1871 if (PL_encoding && !has_utf8) {
d0063567
DK
1872 sv_recode_to_utf8(sv, PL_encoding);
1873 if (SvUTF8(sv))
1874 has_utf8 = TRUE;
9f4817db 1875 }
2b9d42f0 1876 if (has_utf8) {
7e2040f0 1877 SvUTF8_on(sv);
2b9d42f0 1878 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1879 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1880 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1881 }
1882 }
79072805 1883
02aa26ce 1884 /* shrink the sv if we allocated more than we used */
79072805 1885 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1886 SvPV_shrink_to_cur(sv);
79072805 1887 }
02aa26ce 1888
9b599b2a 1889 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1890 if (s > PL_bufptr) {
1891 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1892 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1893 sv, Nullsv,
4e553d73 1894 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1895 ? "tr"
3280af22 1896 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1897 ? "s"
1898 : "qq")));
79072805 1899 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1900 } else
8990e307 1901 SvREFCNT_dec(sv);
79072805
LW
1902 return s;
1903}
1904
ffb4593c
NT
1905/* S_intuit_more
1906 * Returns TRUE if there's more to the expression (e.g., a subscript),
1907 * FALSE otherwise.
ffb4593c
NT
1908 *
1909 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1910 *
1911 * ->[ and ->{ return TRUE
1912 * { and [ outside a pattern are always subscripts, so return TRUE
1913 * if we're outside a pattern and it's not { or [, then return FALSE
1914 * if we're in a pattern and the first char is a {
1915 * {4,5} (any digits around the comma) returns FALSE
1916 * if we're in a pattern and the first char is a [
1917 * [] returns FALSE
1918 * [SOMETHING] has a funky algorithm to decide whether it's a
1919 * character class or not. It has to deal with things like
1920 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1921 * anything else returns TRUE
1922 */
1923
9cbb5ea2
GS
1924/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1925
76e3520e 1926STATIC int
cea2e8a9 1927S_intuit_more(pTHX_ register char *s)
79072805 1928{
3280af22 1929 if (PL_lex_brackets)
79072805
LW
1930 return TRUE;
1931 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1932 return TRUE;
1933 if (*s != '{' && *s != '[')
1934 return FALSE;
3280af22 1935 if (!PL_lex_inpat)
79072805
LW
1936 return TRUE;
1937
1938 /* In a pattern, so maybe we have {n,m}. */
1939 if (*s == '{') {
1940 s++;
1941 if (!isDIGIT(*s))
1942 return TRUE;
1943 while (isDIGIT(*s))
1944 s++;
1945 if (*s == ',')
1946 s++;
1947 while (isDIGIT(*s))
1948 s++;
1949 if (*s == '}')
1950 return FALSE;
1951 return TRUE;
1952
1953 }
1954
1955 /* On the other hand, maybe we have a character class */
1956
1957 s++;
1958 if (*s == ']' || *s == '^')
1959 return FALSE;
1960 else {
ffb4593c 1961 /* this is terrifying, and it works */
79072805
LW
1962 int weight = 2; /* let's weigh the evidence */
1963 char seen[256];
f27ffc4a 1964 unsigned char un_char = 255, last_un_char;
9d4ba2ae 1965 const char * const send = strchr(s,']');
3280af22 1966 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1967
1968 if (!send) /* has to be an expression */
1969 return TRUE;
1970
1971 Zero(seen,256,char);
1972 if (*s == '$')
1973 weight -= 3;
1974 else if (isDIGIT(*s)) {
1975 if (s[1] != ']') {
1976 if (isDIGIT(s[1]) && s[2] == ']')
1977 weight -= 10;
1978 }
1979 else
1980 weight -= 100;
1981 }
1982 for (; s < send; s++) {
1983 last_un_char = un_char;
1984 un_char = (unsigned char)*s;
1985 switch (*s) {
1986 case '@':
1987 case '&':
1988 case '$':
1989 weight -= seen[un_char] * 10;
7e2040f0 1990 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1991 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1992 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1993 weight -= 100;
1994 else
1995 weight -= 10;
1996 }
1997 else if (*s == '$' && s[1] &&
93a17b20
LW
1998 strchr("[#!%*<>()-=",s[1])) {
1999 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2000 weight -= 10;
2001 else
2002 weight -= 1;
2003 }
2004 break;
2005 case '\\':
2006 un_char = 254;
2007 if (s[1]) {
93a17b20 2008 if (strchr("wds]",s[1]))
79072805
LW
2009 weight += 100;
2010 else if (seen['\''] || seen['"'])
2011 weight += 1;
93a17b20 2012 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2013 weight += 40;
2014 else if (isDIGIT(s[1])) {
2015 weight += 40;
2016 while (s[1] && isDIGIT(s[1]))
2017 s++;
2018 }
2019 }
2020 else
2021 weight += 100;
2022 break;
2023 case '-':
2024 if (s[1] == '\\')
2025 weight += 50;
93a17b20 2026 if (strchr("aA01! ",last_un_char))
79072805 2027 weight += 30;
93a17b20 2028 if (strchr("zZ79~",s[1]))
79072805 2029 weight += 30;
f27ffc4a
GS
2030 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2031 weight -= 5; /* cope with negative subscript */
79072805
LW
2032 break;
2033 default:
3792a11b
NC
2034 if (!isALNUM(last_un_char)
2035 && !(last_un_char == '$' || last_un_char == '@'
2036 || last_un_char == '&')
2037 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2038 char *d = tmpbuf;
2039 while (isALPHA(*s))
2040 *d++ = *s++;
2041 *d = '\0';
2042 if (keyword(tmpbuf, d - tmpbuf))
2043 weight -= 150;
2044 }
2045 if (un_char == last_un_char + 1)
2046 weight += 5;
2047 weight -= seen[un_char];
2048 break;
2049 }
2050 seen[un_char]++;
2051 }
2052 if (weight >= 0) /* probably a character class */
2053 return FALSE;
2054 }
2055
2056 return TRUE;
2057}
ffed7fef 2058
ffb4593c
NT
2059/*
2060 * S_intuit_method
2061 *
2062 * Does all the checking to disambiguate
2063 * foo bar
2064 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2065 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2066 *
2067 * First argument is the stuff after the first token, e.g. "bar".
2068 *
2069 * Not a method if bar is a filehandle.
2070 * Not a method if foo is a subroutine prototyped to take a filehandle.
2071 * Not a method if it's really "Foo $bar"
2072 * Method if it's "foo $bar"
2073 * Not a method if it's really "print foo $bar"
2074 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2075 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2076 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2077 * =>
2078 */
2079
76e3520e 2080STATIC int
cea2e8a9 2081S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2082{
2083 char *s = start + (*start == '$');
3280af22 2084 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2085 STRLEN len;
2086 GV* indirgv;
2087
2088 if (gv) {
b6c543e3 2089 CV *cv;
a0d0e21e
LW
2090 if (GvIO(gv))
2091 return 0;
b6c543e3 2092 if ((cv = GvCVu(gv))) {
95a20fc0 2093 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2094 if (proto) {
2095 if (*proto == ';')
2096 proto++;
2097 if (*proto == '*')
2098 return 0;
2099 }
2100 } else
a0d0e21e
LW
2101 gv = 0;
2102 }
8903cb82 2103 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2104 /* start is the beginning of the possible filehandle/object,
2105 * and s is the end of it
2106 * tmpbuf is a copy of it
2107 */
2108
a0d0e21e 2109 if (*start == '$') {
3280af22 2110 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2111 return 0;
2112 s = skipspace(s);
3280af22
NIS
2113 PL_bufptr = start;
2114 PL_expect = XREF;
a0d0e21e
LW
2115 return *s == '(' ? FUNCMETH : METHOD;
2116 }
2117 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2118 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2119 len -= 2;
2120 tmpbuf[len] = '\0';
2121 goto bare_package;
2122 }
2123 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2124 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2125 return 0;
2126 /* filehandle or package name makes it a method */
89bfa8cd 2127 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2128 s = skipspace(s);
3280af22 2129 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2130 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2131 bare_package:
3280af22 2132 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2133 newSVpvn(tmpbuf,len));
3280af22
NIS
2134 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2135 PL_expect = XTERM;
a0d0e21e 2136 force_next(WORD);
3280af22 2137 PL_bufptr = s;
a0d0e21e
LW
2138 return *s == '(' ? FUNCMETH : METHOD;
2139 }
2140 }
2141 return 0;
2142}
2143
ffb4593c
NT
2144/*
2145 * S_incl_perldb
2146 * Return a string of Perl code to load the debugger. If PERL5DB
2147 * is set, it will return the contents of that, otherwise a
2148 * compile-time require of perl5db.pl.
2149 */
2150
bfed75c6 2151STATIC const char*
cea2e8a9 2152S_incl_perldb(pTHX)
a0d0e21e 2153{
3280af22 2154 if (PL_perldb) {
9d4ba2ae 2155 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2156
2157 if (pdb)
2158 return pdb;
93189314 2159 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2160 return "BEGIN { require 'perl5db.pl' }";
2161 }
2162 return "";
2163}
2164
2165
16d20bd9 2166/* Encoded script support. filter_add() effectively inserts a
4e553d73 2167 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2168 * Note that the filter function only applies to the current source file
2169 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2170 *
2171 * The datasv parameter (which may be NULL) can be used to pass
2172 * private data to this instance of the filter. The filter function
2173 * can recover the SV using the FILTER_DATA macro and use it to
2174 * store private buffers and state information.
2175 *
2176 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2177 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2178 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2179 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2180 * private use must be set using malloc'd pointers.
2181 */
16d20bd9
AD
2182
2183SV *
864dbfa3 2184Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2185{
f4c556ac
GS
2186 if (!funcp)
2187 return Nullsv;
2188
3280af22
NIS
2189 if (!PL_rsfp_filters)
2190 PL_rsfp_filters = newAV();
16d20bd9 2191 if (!datasv)
8c52afec 2192 datasv = NEWSV(255,0);
862a34c6 2193 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2194 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2195 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2196 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2197 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2198 av_unshift(PL_rsfp_filters, 1);
2199 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2200 return(datasv);
2201}
4e553d73 2202
16d20bd9
AD
2203
2204/* Delete most recently added instance of this filter function. */
a0d0e21e 2205void
864dbfa3 2206Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2207{
e0c19803 2208 SV *datasv;
24801a4b 2209
33073adb 2210#ifdef DEBUGGING
8141890a 2211 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2212#endif
3280af22 2213 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2214 return;
2215 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2216 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2217 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2218 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2219 IoANY(datasv) = (void *)NULL;
3280af22 2220 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2221
16d20bd9
AD
2222 return;
2223 }
2224 /* we need to search for the correct entry and clear it */
cea2e8a9 2225 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2226}
2227
2228
1de9afcd
RGS
2229/* Invoke the idxth filter function for the current rsfp. */
2230/* maxlen 0 = read one text line */
16d20bd9 2231I32
864dbfa3 2232Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2233{
16d20bd9
AD
2234 filter_t funcp;
2235 SV *datasv = NULL;
e50aee73 2236
3280af22 2237 if (!PL_rsfp_filters)
16d20bd9 2238 return -1;
1de9afcd 2239 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2240 /* Provide a default input filter to make life easy. */
2241 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2242 DEBUG_P(PerlIO_printf(Perl_debug_log,
2243 "filter_read %d: from rsfp\n", idx));
4e553d73 2244 if (maxlen) {
16d20bd9
AD
2245 /* Want a block */
2246 int len ;
f54cb97a 2247 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2248
2249 /* ensure buf_sv is large enough */
eb160463 2250 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2251 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2252 if (PerlIO_error(PL_rsfp))
37120919
AD
2253 return -1; /* error */
2254 else
2255 return 0 ; /* end of file */
2256 }
16d20bd9
AD
2257 SvCUR_set(buf_sv, old_len + len) ;
2258 } else {
2259 /* Want a line */
3280af22
NIS
2260 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2261 if (PerlIO_error(PL_rsfp))
37120919
AD
2262 return -1; /* error */
2263 else
2264 return 0 ; /* end of file */
2265 }
16d20bd9
AD
2266 }
2267 return SvCUR(buf_sv);
2268 }
2269 /* Skip this filter slot if filter has been deleted */
1de9afcd 2270 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2271 DEBUG_P(PerlIO_printf(Perl_debug_log,
2272 "filter_read %d: skipped (filter deleted)\n",
2273 idx));
16d20bd9
AD
2274 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2275 }
2276 /* Get function pointer hidden within datasv */
8141890a 2277 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2278 DEBUG_P(PerlIO_printf(Perl_debug_log,
2279 "filter_read %d: via function %p (%s)\n",
cfd0369c 2280 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2281 /* Call function. The function is expected to */
2282 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2283 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2284 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2285}
2286
76e3520e 2287STATIC char *
cea2e8a9 2288S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2289{
c39cd008 2290#ifdef PERL_CR_FILTER
3280af22 2291 if (!PL_rsfp_filters) {
c39cd008 2292 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2293 }
2294#endif
3280af22 2295 if (PL_rsfp_filters) {
55497cff
PP
2296 if (!append)
2297 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2298 if (FILTER_READ(0, sv, 0) > 0)
2299 return ( SvPVX(sv) ) ;
2300 else
2301 return Nullch ;
2302 }
9d116dd7 2303 else
fd049845 2304 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2305}
2306
01ec43d0 2307STATIC HV *
7fc63493 2308S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2309{
2310 GV *gv;
2311
01ec43d0 2312 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2313 return PL_curstash;
2314
2315 if (len > 2 &&
2316 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2317 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2318 {
2319 return GvHV(gv); /* Foo:: */
def3634b
GS
2320 }
2321
2322 /* use constant CLASS => 'MyClass' */
2323 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2324 SV *sv;
2325 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2326 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2327 }
2328 }
2329
2330 return gv_stashpv(pkgname, FALSE);
2331}
a0d0e21e 2332
468aa647 2333STATIC char *
cc6ed77d 2334S_tokenize_use(pTHX_ int is_use, char *s) {
468aa647
RGS
2335 if (PL_expect != XSTATE)
2336 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2337 is_use ? "use" : "no"));
2338 s = skipspace(s);
2339 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2340 s = force_version(s, TRUE);
2341 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2342 PL_nextval[PL_nexttoke].opval = Nullop;
2343 force_next(WORD);
2344 }
2345 else if (*s == 'v') {
2346 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2347 s = force_version(s, FALSE);
2348 }
2349 }
2350 else {
2351 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2352 s = force_version(s, FALSE);
2353 }
2354 yylval.ival = is_use;
2355 return s;
2356}
748a9306 2357#ifdef DEBUGGING
27da23d5 2358 static const char* const exp_name[] =
09bef843 2359 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2360 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2361 };
748a9306 2362#endif
463ee0b2 2363
02aa26ce
NT
2364/*
2365 yylex
2366
2367 Works out what to call the token just pulled out of the input
2368 stream. The yacc parser takes care of taking the ops we return and
2369 stitching them into a tree.
2370
2371 Returns:
2372 PRIVATEREF
2373
2374 Structure:
2375 if read an identifier
2376 if we're in a my declaration
2377 croak if they tried to say my($foo::bar)
2378 build the ops for a my() declaration
2379 if it's an access to a my() variable
2380 are we in a sort block?
2381 croak if my($a); $a <=> $b
2382 build ops for access to a my() variable
2383 if in a dq string, and they've said @foo and we can't find @foo
2384 croak
2385 build ops for a bareword
2386 if we already built the token before, use it.
2387*/
2388
20141f0e 2389
dba4d153
JH
2390#ifdef __SC__
2391#pragma segment Perl_yylex
2392#endif
dba4d153 2393int
dba4d153 2394Perl_yylex(pTHX)
20141f0e 2395{
3afc138a 2396 register char *s = PL_bufptr;
378cc40b 2397 register char *d;
79072805 2398 register I32 tmp;
463ee0b2 2399 STRLEN len;
161b471a
NIS
2400 GV *gv = Nullgv;
2401 GV **gvp = 0;
aa7440fb 2402 bool bof = FALSE;
1d239bbb 2403 I32 orig_keyword = 0;
a687059c 2404
bbf60fe6
DM
2405 DEBUG_T( {
2406 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2407 lex_state_names[PL_lex_state]);
2408 } );
02aa26ce 2409 /* check if there's an identifier for us to look at */
ba979b31 2410 if (PL_pending_ident)
bbf60fe6 2411 return REPORT(S_pending_ident(aTHX));
bbce6d69 2412
02aa26ce
NT
2413 /* no identifier pending identification */
2414
3280af22 2415 switch (PL_lex_state) {
79072805
LW
2416#ifdef COMMENTARY
2417 case LEX_NORMAL: /* Some compilers will produce faster */
2418 case LEX_INTERPNORMAL: /* code if we comment these out. */
2419 break;
2420#endif
2421
09bef843 2422 /* when we've already built the next token, just pull it out of the queue */
79072805 2423 case LEX_KNOWNEXT:
3280af22
NIS
2424 PL_nexttoke--;
2425 yylval = PL_nextval[PL_nexttoke];
2426 if (!PL_nexttoke) {
2427 PL_lex_state = PL_lex_defer;
2428 PL_expect = PL_lex_expect;
2429 PL_lex_defer = LEX_NORMAL;
463ee0b2 2430 }
607df283 2431 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2432 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2433 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2434
bbf60fe6 2435 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2436
02aa26ce 2437 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2438 when we get here, PL_bufptr is at the \
02aa26ce 2439 */
79072805
LW
2440 case LEX_INTERPCASEMOD:
2441#ifdef DEBUGGING
3280af22 2442 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2443 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2444#endif
02aa26ce 2445 /* handle \E or end of string */
3280af22 2446 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2447 /* if at a \E */
3280af22 2448 if (PL_lex_casemods) {
f54cb97a 2449 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2450 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2451
3792a11b
NC
2452 if (PL_bufptr != PL_bufend
2453 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2454 PL_bufptr += 2;
2455 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2456 }
bbf60fe6 2457 return REPORT(')');
79072805 2458 }
3280af22
NIS
2459 if (PL_bufptr != PL_bufend)
2460 PL_bufptr += 2;
2461 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2462 return yylex();
79072805
LW
2463 }
2464 else {
607df283 2465 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2466 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2467 s = PL_bufptr + 1;
6e909404
JH
2468 if (s[1] == '\\' && s[2] == 'E') {
2469 PL_bufptr = s + 3;
2470 PL_lex_state = LEX_INTERPCONCAT;
2471 return yylex();
a0d0e21e 2472 }
6e909404
JH
2473 else {
2474 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2475 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2476 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2477 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2478 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2479 return REPORT(')');
6e909404
JH
2480 }
2481 if (PL_lex_casemods > 10)
2482 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2483 PL_lex_casestack[PL_lex_casemods++] = *s;
2484 PL_lex_casestack[PL_lex_casemods] = '\0';
2485 PL_lex_state = LEX_INTERPCONCAT;
2486 PL_nextval[PL_nexttoke].ival = 0;
2487 force_next('(');
2488 if (*s == 'l')
2489 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2490 else if (*s == 'u')
2491 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2492 else if (*s == 'L')
2493 PL_nextval[PL_nexttoke].ival = OP_LC;
2494 else if (*s == 'U')
2495 PL_nextval[PL_nexttoke].ival = OP_UC;
2496 else if (*s == 'Q')
2497 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2498 else
2499 Perl_croak(aTHX_ "panic: yylex");
2500 PL_bufptr = s + 1;
a0d0e21e 2501 }
79072805 2502 force_next(FUNC);
3280af22
NIS
2503 if (PL_lex_starts) {
2504 s = PL_bufptr;
2505 PL_lex_starts = 0;
131b3ad0
DM
2506 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2507 if (PL_lex_casemods == 1 && PL_lex_inpat)
2508 OPERATOR(',');
2509 else
2510 Aop(OP_CONCAT);
79072805
LW
2511 }
2512 else
cea2e8a9 2513 return yylex();
79072805
LW
2514 }
2515
55497cff 2516 case LEX_INTERPPUSH:
bbf60fe6 2517 return REPORT(sublex_push());
55497cff 2518
79072805 2519 case LEX_INTERPSTART:
3280af22 2520 if (PL_bufptr == PL_bufend)
bbf60fe6 2521 return REPORT(sublex_done());
607df283 2522 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2523 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2524 PL_expect = XTERM;
2525 PL_lex_dojoin = (*PL_bufptr == '@');
2526 PL_lex_state = LEX_INTERPNORMAL;
2527 if (PL_lex_dojoin) {
2528 PL_nextval[PL_nexttoke].ival = 0;
79072805 2529 force_next(',');
a0d0e21e 2530 force_ident("\"", '$');
3280af22 2531 PL_nextval[PL_nexttoke].ival = 0;
79072805 2532 force_next('$');
3280af22 2533 PL_nextval[PL_nexttoke].ival = 0;
79072805 2534 force_next('(');
3280af22 2535 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2536 force_next(FUNC);
2537 }
3280af22
NIS
2538 if (PL_lex_starts++) {
2539 s = PL_bufptr;
131b3ad0
DM
2540 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2541 if (!PL_lex_casemods && PL_lex_inpat)
2542 OPERATOR(',');
2543 else
2544 Aop(OP_CONCAT);
79072805 2545 }
cea2e8a9 2546 return yylex();
79072805
LW
2547
2548 case LEX_INTERPENDMAYBE:
3280af22
NIS
2549 if (intuit_more(PL_bufptr)) {
2550 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2551 break;
2552 }
2553 /* FALL THROUGH */
2554
2555 case LEX_INTERPEND:
3280af22
NIS
2556 if (PL_lex_dojoin) {
2557 PL_lex_dojoin = FALSE;
2558 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2559 return REPORT(')');
79072805 2560 }
43a16006 2561 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2562 && SvEVALED(PL_lex_repl))
43a16006 2563 {
e9fa98b2 2564 if (PL_bufptr != PL_bufend)
cea2e8a9 2565 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2566 PL_lex_repl = Nullsv;
2567 }
79072805
LW
2568 /* FALLTHROUGH */
2569 case LEX_INTERPCONCAT:
2570#ifdef DEBUGGING
3280af22 2571 if (PL_lex_brackets)
cea2e8a9 2572 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2573#endif
3280af22 2574 if (PL_bufptr == PL_bufend)
bbf60fe6 2575 return REPORT(sublex_done());
79072805 2576
3280af22
NIS
2577 if (SvIVX(PL_linestr) == '\'') {
2578 SV *sv = newSVsv(PL_linestr);
2579 if (!PL_lex_inpat)
76e3520e 2580 sv = tokeq(sv);
3280af22 2581 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2582 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2583 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2584 s = PL_bufend;
79072805
LW
2585 }
2586 else {
3280af22 2587 s = scan_const(PL_bufptr);
79072805 2588 if (*s == '\\')
3280af22 2589 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2590 else
3280af22 2591 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2592 }
2593
3280af22
NIS
2594 if (s != PL_bufptr) {
2595 PL_nextval[PL_nexttoke] = yylval;
2596 PL_expect = XTERM;
79072805 2597 force_next(THING);
131b3ad0
DM
2598 if (PL_lex_starts++) {
2599 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2600 if (!PL_lex_casemods && PL_lex_inpat)
2601 OPERATOR(',');
2602 else
2603 Aop(OP_CONCAT);
2604 }
79072805 2605 else {
3280af22 2606 PL_bufptr = s;
cea2e8a9 2607 return yylex();
79072805
LW
2608 }
2609 }
2610
cea2e8a9 2611 return yylex();
a0d0e21e 2612 case LEX_FORMLINE:
3280af22
NIS
2613 PL_lex_state = LEX_NORMAL;
2614 s = scan_formline(PL_bufptr);
2615 if (!PL_lex_formbrack)
a0d0e21e
LW
2616 goto rightbracket;
2617 OPERATOR(';');
79072805
LW
2618 }
2619
3280af22
NIS
2620 s = PL_bufptr;
2621 PL_oldoldbufptr = PL_oldbufptr;
2622 PL_oldbufptr = s;
607df283 2623 DEBUG_T( {
bbf60fe6 2624 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2625 exp_name[PL_expect], s);
5f80b19c 2626 } );
463ee0b2
LW
2627
2628 retry:
378cc40b
LW
2629 switch (*s) {
2630 default:
7e2040f0 2631 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2632 goto keylookup;
cea2e8a9 2633 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2634 case 4:
2635 case 26:
2636 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2637 case 0:
3280af22
NIS
2638 if (!PL_rsfp) {
2639 PL_last_uni = 0;
2640 PL_last_lop = 0;
c5ee2135
LW
2641 if (PL_lex_brackets) {
2642 if (PL_lex_formbrack)
2643 yyerror("Format not terminated");
2644 else
2645 yyerror("Missing right curly or square bracket");
2646 }
4e553d73 2647 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2648 "### Tokener got EOF\n");
5f80b19c 2649 } );
79072805 2650 TOKEN(0);
463ee0b2 2651 }
3280af22 2652 if (s++ < PL_bufend)
a687059c 2653 goto retry; /* ignore stray nulls */
3280af22
NIS
2654 PL_last_uni = 0;
2655 PL_last_lop = 0;
2656 if (!PL_in_eval && !PL_preambled) {
2657 PL_preambled = TRUE;
2658 sv_setpv(PL_linestr,incl_perldb());
2659 if (SvCUR(PL_linestr))
4147a61b 2660 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2661 if (PL_preambleav){
2662 while(AvFILLp(PL_preambleav) >= 0) {
2663 SV *tmpsv = av_shift(PL_preambleav);
2664 sv_catsv(PL_linestr, tmpsv);
4147a61b 2665 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2666 sv_free(tmpsv);
2667 }
3280af22
NIS
2668 sv_free((SV*)PL_preambleav);
2669 PL_preambleav = NULL;
91b7def8 2670 }
3280af22
NIS
2671 if (PL_minus_n || PL_minus_p) {
2672 sv_catpv(PL_linestr, "LINE: while (<>) {");
2673 if (PL_minus_l)
2674 sv_catpv(PL_linestr,"chomp;");
2675 if (PL_minus_a) {
3280af22 2676 if (PL_minus_F) {
3792a11b
NC
2677 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2678 || *PL_splitstr == '"')
3280af22 2679 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2680 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2681 else {
c8ef6a4b
NC
2682 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2683 bytes can be used as quoting characters. :-) */
48c4c863
NC
2684 /* The count here deliberately includes the NUL
2685 that terminates the C string constant. This
2686 embeds the opening NUL into the string. */
dd374669 2687 const char *splits = PL_splitstr;
9a3fb652 2688 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2689 do {
2690 /* Need to \ \s */
dd374669
AL
2691 if (*splits == '\\')
2692 sv_catpvn(PL_linestr, splits, 1);
2693 sv_catpvn(PL_linestr, splits, 1);
2694 } while (*splits++);
48c4c863
NC
2695 /* This loop will embed the trailing NUL of
2696 PL_linestr as the last thing it does before
2697 terminating. */
4fccd7c6 2698 sv_catpvn(PL_linestr, ");", 2);
54310121 2699 }
2304df62
AD
2700 }
2701 else
75c72d73 2702 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2703 }
79072805 2704 }
4147a61b 2705 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2706 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2707 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2708 PL_last_lop = PL_last_uni = Nullch;
3280af22 2709 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2710 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2711
2712 sv_upgrade(sv, SVt_PVMG);
3280af22 2713 sv_setsv(sv,PL_linestr);
0ac0412a 2714 (void)SvIOK_on(sv);
45977657 2715 SvIV_set(sv, 0);
57843af0 2716 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2717 }
79072805 2718 goto retry;
a687059c 2719 }
e929a76b 2720 do {
aa7440fb 2721 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2722 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2723 fake_eof:
2724 if (PL_rsfp) {
2725 if (PL_preprocess && !PL_in_eval)
2726 (void)PerlProc_pclose(PL_rsfp);
2727 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2728 PerlIO_clearerr(PL_rsfp);
2729 else
2730 (void)PerlIO_close(PL_rsfp);
2731 PL_rsfp = Nullfp;
2732 PL_doextract = FALSE;
2733 }
2734 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2735 sv_setpv(PL_linestr,PL_minus_p
2736 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2737 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2739 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2740 PL_minus_n = PL_minus_p = 0;
2741 goto retry;
2742 }
2743 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2744 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2745 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2746 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2747 }
7aa207d6
JH
2748 /* If it looks like the start of a BOM or raw UTF-16,
2749 * check if it in fact is. */
2750 else if (bof &&
2751 (*s == 0 ||
2752 *(U8*)s == 0xEF ||
2753 *(U8*)s >= 0xFE ||
2754 s[1] == 0)) {
226017aa 2755#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2756# ifdef __GNU_LIBRARY__
2757# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2758# define FTELL_FOR_PIPE_IS_BROKEN
2759# endif
e3f494f1
JH
2760# else
2761# ifdef __GLIBC__
2762# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2763# define FTELL_FOR_PIPE_IS_BROKEN
2764# endif
2765# endif
226017aa
DD
2766# endif
2767#endif
2768#ifdef FTELL_FOR_PIPE_IS_BROKEN
2769 /* This loses the possibility to detect the bof
2770 * situation on perl -P when the libc5 is being used.
2771 * Workaround? Maybe attach some extra state to PL_rsfp?
2772 */
2773 if (!PL_preprocess)
7e28d3af 2774 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2775#else
eb160463 2776 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2777#endif
7e28d3af 2778 if (bof) {
3280af22 2779 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2780 s = swallow_bom((U8*)s);
e929a76b 2781 }
378cc40b 2782 }
3280af22 2783 if (PL_doextract) {
a0d0e21e
LW
2784 /* Incest with pod. */
2785 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2786 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2787 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2788 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2789 PL_last_lop = PL_last_uni = Nullch;
3280af22 2790 PL_doextract = FALSE;
a0d0e21e 2791 }
4e553d73 2792 }
463ee0b2 2793 incline(s);
3280af22
NIS
2794 } while (PL_doextract);
2795 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2796 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2797 SV * const sv = NEWSV(85,0);
a687059c 2798
93a17b20 2799 sv_upgrade(sv, SVt_PVMG);
3280af22 2800 sv_setsv(sv,PL_linestr);
0ac0412a 2801 (void)SvIOK_on(sv);
45977657 2802 SvIV_set(sv, 0);
57843af0 2803 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2804 }
3280af22 2805 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2806 PL_last_lop = PL_last_uni = Nullch;
57843af0 2807 if (CopLINE(PL_curcop) == 1) {
3280af22 2808 while (s < PL_bufend && isSPACE(*s))
79072805 2809 s++;
a0d0e21e 2810 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2811 s++;
44a8e56a 2812 d = Nullch;
3280af22 2813 if (!PL_in_eval) {
44a8e56a
PP
2814 if (*s == '#' && *(s+1) == '!')
2815 d = s + 2;
2816#ifdef ALTERNATE_SHEBANG
2817 else {
bfed75c6 2818 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2819 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2820 d = s + (sizeof(as) - 1);
2821 }
2822#endif /* ALTERNATE_SHEBANG */
2823 }
2824 if (d) {
b8378b72 2825 char *ipath;
774d564b 2826 char *ipathend;
b8378b72 2827
774d564b 2828 while (isSPACE(*d))
b8378b72
CS
2829 d++;
2830 ipath = d;
774d564b
PP
2831 while (*d && !isSPACE(*d))
2832 d++;
2833 ipathend = d;
2834
2835#ifdef ARG_ZERO_IS_SCRIPT
2836 if (ipathend > ipath) {
2837 /*
2838 * HP-UX (at least) sets argv[0] to the script name,
2839 * which makes $^X incorrect. And Digital UNIX and Linux,
2840 * at least, set argv[0] to the basename of the Perl
2841 * interpreter. So, having found "#!", we'll set it right.
2842 */
ee2f7564 2843 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2844 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2845 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2846 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2847 SvSETMAGIC(x);
2848 }
556c1dec
JH
2849 else {
2850 STRLEN blen;
2851 STRLEN llen;
cfd0369c 2852 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2853 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2854 if (llen < blen) {
2855 bstart += blen - llen;
2856 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2857 sv_setpvn(x, ipath, ipathend - ipath);
2858 SvSETMAGIC(x);
2859 }
2860 }
2861 }
774d564b 2862 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2863 }
774d564b 2864#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2865
2866 /*
2867 * Look for options.
2868 */
748a9306 2869 d = instr(s,"perl -");
84e30d1a 2870 if (!d) {
748a9306 2871 d = instr(s,"perl");
84e30d1a
GS
2872#if defined(DOSISH)
2873 /* avoid getting into infinite loops when shebang
2874 * line contains "Perl" rather than "perl" */
2875 if (!d) {
2876 for (d = ipathend-4; d >= ipath; --d) {
2877 if ((*d == 'p' || *d == 'P')
2878 && !ibcmp(d, "perl", 4))
2879 {
2880 break;
2881 }
2882 }
2883 if (d < ipath)
2884 d = Nullch;
2885 }
2886#endif
2887 }
44a8e56a
PP
2888#ifdef ALTERNATE_SHEBANG
2889 /*
2890 * If the ALTERNATE_SHEBANG on this system starts with a
2891 * character that can be part of a Perl expression, then if
2892 * we see it but not "perl", we're probably looking at the
2893 * start of Perl code, not a request to hand off to some
2894 * other interpreter. Similarly, if "perl" is there, but
2895 * not in the first 'word' of the line, we assume the line
2896 * contains the start of the Perl program.
44a8e56a
PP
2897 */
2898 if (d && *s != '#') {
f54cb97a 2899 const char *c = ipath;
44a8e56a
PP
2900 while (*c && !strchr("; \t\r\n\f\v#", *c))
2901 c++;
2902 if (c < d)
2903 d = Nullch; /* "perl" not in first word; ignore */
2904 else
2905 *s = '#'; /* Don't try to parse shebang line */
2906 }
774d564b 2907#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2908#ifndef MACOS_TRADITIONAL
748a9306 2909 if (!d &&
44a8e56a 2910 *s == '#' &&
774d564b 2911 ipathend > ipath &&
3280af22 2912 !PL_minus_c &&
748a9306 2913 !instr(s,"indir") &&
3280af22 2914 instr(PL_origargv[0],"perl"))
748a9306 2915 {
27da23d5 2916 dVAR;
9f68db38 2917 char **newargv;
9f68db38 2918
774d564b
PP
2919 *ipathend = '\0';
2920 s = ipathend + 1;
3280af22 2921 while (s < PL_bufend && isSPACE(*s))
9f68db38 2922 s++;
3280af22 2923 if (s < PL_bufend) {
a02a5408 2924 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2925 newargv[1] = s;
3280af22 2926 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2927 s++;
2928 *s = '\0';
3280af22 2929 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2930 }
2931 else
3280af22 2932 newargv = PL_origargv;
774d564b 2933 newargv[0] = ipath;
b35112e7 2934 PERL_FPU_PRE_EXEC
b4748376 2935 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2936 PERL_FPU_POST_EXEC
cea2e8a9 2937 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2938 }
bf4acbe4 2939#endif
748a9306 2940 if (d) {
f54cb97a
AL
2941 const U32 oldpdb = PL_perldb;
2942 const bool oldn = PL_minus_n;
2943 const bool oldp = PL_minus_p;
748a9306
LW
2944
2945 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2946 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2947
2948 if (*d++ == '-') {
f54cb97a 2949 const bool switches_done = PL_doswitches;
8cc95fdb 2950 do {
3ffe3ee4 2951 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2952 const char * const m = d;
8cc95fdb 2953 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2954 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2955 (int)(d - m), m);
2956 }
2957 d = moreswitches(d);
2958 } while (d);
f0b2cf55
YST
2959 if (PL_doswitches && !switches_done) {
2960 int argc = PL_origargc;
2961 char **argv = PL_origargv;
2962 do {
2963 argc--,argv++;
2964 } while (argc && argv[0][0] == '-' && argv[0][1]);
2965 init_argv_symbols(argc,argv);
2966 }
155aba94
GS
2967 if ((PERLDB_LINE && !oldpdb) ||
2968 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2969 /* if we have already added "LINE: while (<>) {",
2970 we must not do it again */
748a9306 2971 {
c69006e4 2972 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2973 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2974 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2975 PL_last_lop = PL_last_uni = Nullch;
3280af22 2976 PL_preambled = FALSE;
84902520 2977 if (PERLDB_LINE)
3280af22 2978 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2979 goto retry;
2980 }
a11ec5a9
RGS
2981 if (PL_doswitches && !switches_done) {
2982 int argc = PL_origargc;
2983 char **argv = PL_origargv;
2984 do {
2985 argc--,argv++;
2986 } while (argc && argv[0][0] == '-' && argv[0][1]);
2987 init_argv_symbols(argc,argv);
2988 }
a0d0e21e 2989 }
79072805 2990 }
9f68db38 2991 }
79072805 2992 }
3280af22
NIS
2993 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2994 PL_bufptr = s;
2995 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2996 return yylex();
ae986130 2997 }
378cc40b 2998 goto retry;
4fdae800 2999 case '\r':
6a27c188 3000#ifdef PERL_STRICT_CR
cea2e8a9 3001 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3002 Perl_croak(aTHX_
cc507455 3003 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3004#endif
4fdae800 3005 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3006#ifdef MACOS_TRADITIONAL
3007 case '\312':
3008#endif
378cc40b
LW
3009 s++;
3010 goto retry;
378cc40b 3011 case '#':
e929a76b 3012 case '\n':
3280af22 3013 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3014 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3015 /* handle eval qq[#line 1 "foo"\n ...] */
3016 CopLINE_dec(PL_curcop);
3017 incline(s);
3018 }
3280af22 3019 d = PL_bufend;
a687059c 3020 while (s < d && *s != '\n')
378cc40b 3021 s++;
0f85fab0 3022 if (s < d)
378cc40b 3023 s++;
78c267c1 3024 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3025 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3026 incline(s);
3280af22
NIS
3027 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3028 PL_bufptr = s;
3029 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3030 return yylex();
a687059c 3031 }
378cc40b 3032 }
a687059c 3033 else {
378cc40b 3034 *s = '\0';
3280af22 3035 PL_bufend = s;
a687059c 3036 }
378cc40b
LW
3037 goto retry;
3038 case '-':
79072805 3039 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
3040 I32 ftst = 0;
3041
378cc40b 3042 s++;
3280af22 3043 PL_bufptr = s;
748a9306
LW
3044 tmp = *s++;
3045
bf4acbe4 3046 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3047 s++;
3048
3049 if (strnEQ(s,"=>",2)) {
3280af22 3050 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 3051 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3052 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 3053 } );
748a9306
LW
3054 OPERATOR('-'); /* unary minus */
3055 }
3280af22 3056 PL_last_uni = PL_oldbufptr;
748a9306 3057 switch (tmp) {
e5edeb50
JH
3058 case 'r': ftst = OP_FTEREAD; break;
3059 case 'w': ftst = OP_FTEWRITE; break;
3060 case 'x': ftst = OP_FTEEXEC; break;
3061 case 'o': ftst = OP_FTEOWNED; break;
3062 case 'R': ftst = OP_FTRREAD; break;
3063 case 'W': ftst = OP_FTRWRITE; break;
3064 case 'X': ftst = OP_FTREXEC; break;
3065 case 'O': ftst = OP_FTROWNED; break;
3066 case 'e': ftst = OP_FTIS; break;
3067 case 'z': ftst = OP_FTZERO; break;
3068 case 's': ftst = OP_FTSIZE; break;
3069 case 'f': ftst = OP_FTFILE; break;
3070 case 'd': ftst = OP_FTDIR; break;
3071 case 'l': ftst = OP_FTLINK; break;
3072 case 'p': ftst = OP_FTPIPE; break;
3073 case 'S': ftst = OP_FTSOCK; break;
3074 case 'u': ftst = OP_FTSUID; break;
3075 case 'g': ftst = OP_FTSGID; break;
3076 case 'k': ftst = OP_FTSVTX; break;
3077 case 'b': ftst = OP_FTBLK; break;
3078 case 'c': ftst = OP_FTCHR; break;
3079 case 't': ftst = OP_FTTTY; break;
3080 case 'T': ftst = OP_FTTEXT; break;
3081 case 'B': ftst = OP_FTBINARY; break;
3082 case 'M': case 'A': case 'C':
3083 gv_fetchpv("\024",TRUE, SVt_PV);
3084 switch (tmp) {
3085 case 'M': ftst = OP_FTMTIME; break;
3086 case 'A': ftst = OP_FTATIME; break;
3087 case 'C': ftst = OP_FTCTIME; break;
3088 default: break;
3089 }
3090 break;
378cc40b 3091 default:
378cc40b
LW
3092 break;
3093 }
e5edeb50 3094 if (ftst) {
eb160463 3095 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3096 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3097 "### Saw file test %c\n", (int)ftst);
5f80b19c 3098 } );
e5edeb50
JH
3099 FTST(ftst);
3100 }
3101 else {
3102 /* Assume it was a minus followed by a one-letter named
3103 * subroutine call (or a -bareword), then. */
95c31fe3 3104 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3105 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3106 (int) tmp);
5f80b19c 3107 } );
3cf7b4c4 3108 s = --PL_bufptr;
e5edeb50 3109 }
378cc40b 3110 }
a687059c
LW
3111 tmp = *s++;
3112 if (*s == tmp) {
3113 s++;
3280af22 3114 if (PL_expect == XOPERATOR)
79072805
LW
3115 TERM(POSTDEC);
3116 else
3117 OPERATOR(PREDEC);
3118 }
3119 else if (*s == '>') {
3120 s++;
3121 s = skipspace(s);
7e2040f0 3122 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3123 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3124 TOKEN(ARROW);
79072805 3125 }
748a9306
LW
3126 else if (*s == '$')
3127 OPERATOR(ARROW);
463ee0b2 3128 else
748a9306 3129 TERM(ARROW);
a687059c 3130 }
3280af22 3131 if (PL_expect == XOPERATOR)
79072805
LW
3132 Aop(OP_SUBTRACT);
3133 else {
3280af22 3134 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3135 check_uni();
79072805 3136 OPERATOR('-'); /* unary minus */
2f3197b3 3137 }
79072805 3138
378cc40b 3139 case '+':
a687059c
LW
3140 tmp = *s++;
3141 if (*s == tmp) {
378cc40b 3142 s++;
3280af22 3143 if (PL_expect == XOPERATOR)
79072805
LW
3144 TERM(POSTINC);
3145 else
3146 OPERATOR(PREINC);
378cc40b 3147 }
3280af22 3148 if (PL_expect == XOPERATOR)
79072805
LW
3149 Aop(OP_ADD);
3150 else {
3280af22 3151 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3152 check_uni();
a687059c 3153 OPERATOR('+');
2f3197b3 3154 }
a687059c 3155
378cc40b 3156 case '*':
3280af22
NIS
3157 if (PL_expect != XOPERATOR) {
3158 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3159 PL_expect = XOPERATOR;
3160 force_ident(PL_tokenbuf, '*');
3161 if (!*PL_tokenbuf)
a0d0e21e 3162 PREREF('*');
79072805 3163 TERM('*');
a687059c 3164 }
79072805
LW
3165 s++;
3166 if (*s == '*') {
a687059c 3167 s++;
79072805 3168 PWop(OP_POW);
a687059c 3169 }
79072805
LW
3170 Mop(OP_MULTIPLY);
3171
378cc40b 3172 case '%':
3280af22 3173 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3174 ++s;
3175 Mop(OP_MODULO);
a687059c 3176 }
3280af22
NIS
3177 PL_tokenbuf[0] = '%';
3178 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3179 if (!PL_tokenbuf[1]) {
bbce6d69 3180 PREREF('%');
a687059c 3181 }
3280af22 3182 PL_pending_ident = '%';
bbce6d69 3183 TERM('%');
a687059c 3184
378cc40b 3185 case '^':
79072805 3186 s++;
a0d0e21e 3187 BOop(OP_BIT_XOR);
79072805 3188 case '[':
3280af22 3189 PL_lex_brackets++;
79072805 3190 /* FALL THROUGH */
378cc40b 3191 case '~':
378cc40b 3192 case ',':
378cc40b
LW
3193 tmp = *s++;
3194 OPERATOR(tmp);
a0d0e21e
LW
3195 case ':':
3196 if (s[1] == ':') {
3197 len = 0;
3198 goto just_a_word;
3199 }
3200 s++;
09bef843
SB
3201 switch (PL_expect) {
3202 OP *attrs;
3203 case XOPERATOR:
3204 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3205 break;
3206 PL_bufptr = s; /* update in case we back off */
3207 goto grabattrs;
3208 case XATTRBLOCK:
3209 PL_expect = XBLOCK;
3210 goto grabattrs;
3211 case XATTRTERM:
3212 PL_expect = XTERMBLOCK;
3213 grabattrs:
3214 s = skipspace(s);
3215 attrs = Nullop;
7e2040f0 3216 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3217 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3218 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3219 if (tmp < 0) tmp = -tmp;
3220 switch (tmp) {
3221 case KEY_or:
3222 case KEY_and:
c963b151 3223 case KEY_err:
f9829d6b
GS
3224 case KEY_for:
3225 case KEY_unless:
3226 case KEY_if:
3227 case KEY_while:
3228 case KEY_until:
3229 goto got_attrs;
3230 default:
3231 break;
3232 }
3233 }
09bef843
SB
3234 if (*d == '(') {
3235 d = scan_str(d,TRUE,TRUE);
3236 if (!d) {
09bef843
SB
3237 /* MUST advance bufptr here to avoid bogus
3238 "at end of line" context messages from yyerror().
3239 */
3240 PL_bufptr = s + len;
3241 yyerror("Unterminated attribute parameter in attribute list");
3242 if (attrs)
3243 op_free(attrs);
bbf60fe6 3244 return REPORT(0); /* EOF indicator */
09bef843
SB
3245 }
3246 }
3247 if (PL_lex_stuff) {
3248 SV *sv = newSVpvn(s, len);
3249 sv_catsv(sv, PL_lex_stuff);
3250 attrs = append_elem(OP_LIST, attrs,
3251 newSVOP(OP_CONST, 0, sv));
3252 SvREFCNT_dec(PL_lex_stuff);
3253 PL_lex_stuff = Nullsv;
3254 }
3255 else {
371fce9b
DM
3256 if (len == 6 && strnEQ(s, "unique", len)) {
3257 if (PL_in_my == KEY_our)
3258#ifdef USE_ITHREADS
3259 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3260#else
3261 ; /* skip to avoid loading attributes.pm */
3262#endif
bfed75c6 3263 else
371fce9b
DM
3264 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3265 }
3266
d3cea301
SB
3267 /* NOTE: any CV attrs applied here need to be part of
3268 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3269 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3270 CvLVALUE_on(PL_compcv);
3271 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3272 CvLOCKED_on(PL_compcv);
3273 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3274 CvMETHOD_on(PL_compcv);
06492da6
SF
3275 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3276 CvASSERTION_on(PL_compcv);
78f9721b
SM
3277 /* After we've set the flags, it could be argued that
3278 we don't need to do the attributes.pm-based setting
3279 process, and shouldn't bother appending recognized
d3cea301
SB
3280 flags. To experiment with that, uncomment the
3281 following "else". (Note that's already been
3282 uncommented. That keeps the above-applied built-in
3283 attributes from being intercepted (and possibly
3284 rejected) by a package's attribute routines, but is
3285 justified by the performance win for the common case
3286 of applying only built-in attributes.) */
0256094b 3287 else
78f9721b
SM
3288 attrs = append_elem(OP_LIST, attrs,
3289 newSVOP(OP_CONST, 0,
3290 newSVpvn(s, len)));
09bef843
SB
3291 }
3292 s = skipspace(d);
0120eecf 3293 if (*s == ':' && s[1] != ':')
09bef843 3294 s = skipspace(s+1);
0120eecf
GS
3295 else if (s == d)
3296 break; /* require real whitespace or :'s */
09bef843 3297 }
f9829d6b 3298 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3299 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3300 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3301 /* If here for an expression, and parsed no attrs, back off. */
3302 if (tmp == '=' && !attrs) {
3303 s = PL_bufptr;
3304 break;
3305 }
3306 /* MUST advance bufptr here to avoid bogus "at end of line"
3307 context messages from yyerror().
3308 */
3309 PL_bufptr = s;
3310 if (!*s)
3311 yyerror("Unterminated attribute list");
3312 else
3313 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3314 q, *s, q));
3315 if (attrs)
3316 op_free(attrs);
3317 OPERATOR(':');
3318 }
f9829d6b 3319 got_attrs:
09bef843
SB
3320 if (attrs) {
3321 PL_nextval[PL_nexttoke].opval = attrs;
3322 force_next(THING);
3323 }
3324 TOKEN(COLONATTR);
3325 }
a0d0e21e 3326 OPERATOR(':');
8990e307
LW
3327 case '(':
3328 s++;
3280af22
NIS
3329 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3330 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3331 else
3280af22 3332 PL_expect = XTERM;
4a202259 3333 s = skipspace(s);
a0d0e21e 3334 TOKEN('(');
378cc40b 3335 case ';':
f4dd75d9 3336 CLINE;
378cc40b
LW
3337 tmp = *s++;
3338 OPERATOR(tmp);
3339 case ')':
378cc40b 3340 tmp = *s++;
16d20bd9
AD
3341 s = skipspace(s);
3342 if (*s == '{')
3343 PREBLOCK(tmp);
378cc40b 3344 TERM(tmp);
79072805
LW
3345 case ']':
3346 s++;
3280af22 3347 if (PL_lex_brackets <= 0)
d98d5fff 3348 yyerror("Unmatched right square bracket");
463ee0b2 3349 else
3280af22
NIS
3350 --PL_lex_brackets;
3351 if (PL_lex_state == LEX_INTERPNORMAL) {
3352 if (PL_lex_brackets == 0) {
a0d0e21e 3353 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3354 PL_lex_state = LEX_INTERPEND;
79072805
LW
3355 }
3356 }
4633a7c4 3357 TERM(']');
79072805
LW
3358 case '{':
3359 leftbracket:
79072805 3360 s++;
3280af22 3361 if (PL_lex_brackets > 100) {
8edd5f42 3362 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3363 }
3280af22 3364 switch (PL_expect) {
a0d0e21e 3365 case XTERM:
3280af22 3366 if (PL_lex_formbrack) {
a0d0e21e
LW
3367 s--;
3368 PRETERMBLOCK(DO);
3369 }
3280af22
NIS
3370 if (PL_oldoldbufptr == PL_last_lop)
3371 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3372 else
3280af22 3373 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3374 OPERATOR(HASHBRACK);
a0d0e21e 3375 case XOPERATOR:
bf4acbe4 3376 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3377 s++;
44a8e56a 3378 d = s;
3280af22
NIS
3379 PL_tokenbuf[0] = '\0';
3380 if (d < PL_bufend && *d == '-') {
3381 PL_tokenbuf[0] = '-';
44a8e56a 3382 d++;
bf4acbe4 3383 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3384 d++;
3385 }
7e2040f0 3386 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3387 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3388 FALSE, &len);
bf4acbe4 3389 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3390 d++;
3391 if (*d == '}') {
f54cb97a 3392 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3393 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3394 if (minus)
3395 force_next('-');
748a9306
LW
3396 }
3397 }
3398 /* FALL THROUGH */
09bef843 3399 case XATTRBLOCK:
748a9306 3400 case XBLOCK:
3280af22
NIS
3401 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3402 PL_expect = XSTATE;
a0d0e21e 3403 break;
09bef843 3404 case XATTRTERM:
a0d0e21e 3405 case XTERMBLOCK:
3280af22
NIS
3406 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3407 PL_expect = XSTATE;
a0d0e21e
LW
3408 break;
3409 default: {
f54cb97a 3410 const char *t;
3280af22
NIS
3411 if (PL_oldoldbufptr == PL_last_lop)
3412 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3413 else
3280af22 3414 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3415 s = skipspace(s);
8452ff4b
SB
3416 if (*s == '}') {
3417 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3418 PL_expect = XTERM;
3419 /* This hack is to get the ${} in the message. */
3420 PL_bufptr = s+1;
3421 yyerror("syntax error");
3422 break;
3423 }
a0d0e21e 3424 OPERATOR(HASHBRACK);
8452ff4b 3425 }
b8a4b1be
GS
3426 /* This hack serves to disambiguate a pair of curlies
3427 * as being a block or an anon hash. Normally, expectation
3428 * determines that, but in cases where we're not in a
3429 * position to expect anything in particular (like inside
3430 * eval"") we have to resolve the ambiguity. This code
3431 * covers the case where the first term in the curlies is a
3432 * quoted string. Most other cases need to be explicitly
a0288114 3433 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3434 * curly in order to force resolution as an anon hash.
3435 *
3436 * XXX should probably propagate the outer expectation
3437 * into eval"" to rely less on this hack, but that could
3438 * potentially break current behavior of eval"".
3439 * GSAR 97-07-21
3440 */
3441 t = s;
3442 if (*s == '\'' || *s == '"' || *s == '`') {
3443 /* common case: get past first string, handling escapes */
3280af22 3444 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3445 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3446 t++;
3447 t++;
a0d0e21e 3448 }
b8a4b1be 3449 else if (*s == 'q') {
3280af22 3450 if (++t < PL_bufend
b8a4b1be 3451 && (!isALNUM(*t)
3280af22 3452 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3453 && !isALNUM(*t))))
3454 {
abc667d1 3455 /* skip q//-like construct */
f54cb97a 3456 const char *tmps;
b8a4b1be
GS
3457 char open, close, term;
3458 I32 brackets = 1;
3459
3280af22 3460 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3461 t++;
abc667d1
DM
3462 /* check for q => */
3463 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3464 OPERATOR(HASHBRACK);
3465 }
b8a4b1be
GS
3466 term = *t;
3467 open = term;
3468 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3469 term = tmps[5];
3470 close = term;
3471 if (open == close)
3280af22
NIS
3472 for (t++; t < PL_bufend; t++) {
3473 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3474 t++;
6d07e5e9 3475 else if (*t == open)
b8a4b1be
GS
3476 break;
3477 }
abc667d1 3478 else {
3280af22
NIS
3479 for (t++; t < PL_bufend; t++) {
3480 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3481 t++;
6d07e5e9 3482 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3483 break;
3484 else if (*t == open)
3485 brackets++;
3486 }
abc667d1
DM
3487 }
3488 t++;
b8a4b1be 3489 }
abc667d1
DM
3490 else
3491 /* skip plain q word */
3492 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3493 t += UTF8SKIP(t);
a0d0e21e 3494 }
7e2040f0 3495 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3496 t += UTF8SKIP(t);
7e2040f0 3497 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3498 t += UTF8SKIP(t);
a0d0e21e 3499 }
3280af22 3500 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3501 t++;
b8a4b1be
GS
3502 /* if comma follows first term, call it an anon hash */
3503 /* XXX it could be a comma expression with loop modifiers */
3280af22 3504 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3505 || (*t == '=' && t[1] == '>')))
a0d0e21e 3506 OPERATOR(HASHBRACK);
3280af22 3507 if (PL_expect == XREF)
4e4e412b 3508 PL_expect = XTERM;
a0d0e21e 3509 else {
3280af22
NIS
3510 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3511 PL_expect = XSTATE;
a0d0e21e 3512 }
8990e307 3513 }
a0d0e21e 3514 break;
463ee0b2 3515 }
57843af0 3516 yylval.ival = CopLINE(PL_curcop);
79072805 3517 if (isSPACE(*s) || *s == '#')
3280af22 3518 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3519 TOKEN('{');
378cc40b 3520 case '}':
79072805
LW
3521 rightbracket:
3522 s++;
3280af22 3523 if (PL_lex_brackets <= 0)
d98d5fff 3524 yyerror("Unmatched right curly bracket");