This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
e6906430 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
2c351e65 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
d3b6f988
GS
26#define yychar PL_yychar
27#define yylval PL_yylval
28
8c89da26 29static const char ident_too_long[] = "Identifier too long";
f4362cdc 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a
PP
63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
3f3939a9
NC
65#define LEX_NORMAL 10 /* normal code (ie not within "...") */
66#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69#define LEX_INTERPSTART 6 /* expecting the start of a $var */
70
71 /* at end of code, eg "$x" followed by: */
72#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74
75#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77#define LEX_INTERPCONST 2 /* NOT USED */
78#define LEX_FORMLINE 1 /* expecting a format line */
79#define LEX_KNOWNEXT 0 /* next token known; just return it */
80
79072805 81
f31bfe61 82#ifdef DEBUGGING
fe20fd30 83static const char* const lex_state_names[] = {
f31bfe61
NC
84 "KNOWNEXT",
85 "FORMLINE",
86 "INTERPCONST",
87 "INTERPCONCAT",
88 "INTERPENDMAYBE",
89 "INTERPEND",
90 "INTERPSTART",
91 "INTERPPUSH",
92 "INTERPCASEMOD",
93 "INTERPNORMAL",
94 "NORMAL"
95};
96#endif
97
79072805
LW
98#ifdef ff_next
99#undef ff_next
d48672a2
LW
100#endif
101
a1a0e61e 102#ifdef USE_PURE_BISON
dba4d153
JH
103# ifndef YYMAXLEVEL
104# define YYMAXLEVEL 100
105# endif
20141f0e
RI
106YYSTYPE* yylval_pointer[YYMAXLEVEL];
107int* yychar_pointer[YYMAXLEVEL];
6f202aea 108int yyactlevel = -1;
22c35a8c
GS
109# undef yylval
110# undef yychar
20141f0e
RI
111# define yylval (*yylval_pointer[yyactlevel])
112# define yychar (*yychar_pointer[yyactlevel])
113# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 114# undef yylex
dba4d153 115# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
116#endif
117
79072805 118#include "keywords.h"
fe14fcc3 119
ffb4593c
NT
120/* CLINE is a macro that ensures PL_copline has a sane value */
121
ae986130
LW
122#ifdef CLINE
123#undef CLINE
124#endif
57843af0 125#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 126
ffb4593c
NT
127/*
128 * Convenience functions to return different tokens and prime the
9cbb5ea2 129 * lexer for the next token. They all take an argument.
ffb4593c
NT
130 *
131 * TOKEN : generic token (used for '(', DOLSHARP, etc)
132 * OPERATOR : generic operator
133 * AOPERATOR : assignment operator
134 * PREBLOCK : beginning the block after an if, while, foreach, ...
135 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
136 * PREREF : *EXPR where EXPR is not a simple identifier
137 * TERM : expression term
138 * LOOPX : loop exiting command (goto, last, dump, etc)
139 * FTST : file test operator
140 * FUN0 : zero-argument function
2d2e263d 141 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
142 * BOop : bitwise or or xor
143 * BAop : bitwise and
144 * SHop : shift operator
145 * PWop : power operator
9cbb5ea2 146 * PMop : pattern-matching operator
ffb4593c
NT
147 * Aop : addition-level operator
148 * Mop : multiplication-level operator
149 * Eop : equality-testing operator
e5edeb50 150 * Rop : relational operator <= != gt
ffb4593c
NT
151 *
152 * Also see LOP and lop() below.
153 */
154
998054bd 155#ifdef DEBUGGING /* Serve -DT. */
2746ec50 156# define REPORT(retval) tokereport((I32)retval)
998054bd 157#else
f31bfe61 158# define REPORT(retval) (retval)
998054bd
SC
159#endif
160
f31bfe61
NC
161#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
162#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
163#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
164#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
165#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
166#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
167#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
168#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
169#define FTST(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)UNIOP))
170#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
171#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
172#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
173#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
174#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
175#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
176#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
177#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
178#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
179#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
180#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 181
a687059c
LW
182/* This bit of chicanery makes a unary function followed by
183 * a parenthesis into a function with one argument, highest precedence.
184 */
23cd8e9f
AL
185#define UNI(f) { \
186 yylval.ival = f; \
187 PL_expect = XTERM; \
188 PL_bufptr = s; \
189 PL_last_uni = PL_oldbufptr; \
190 PL_last_lop_op = f; \
191 if (*s == '(') \
192 return REPORT( (int)FUNC1 ); \
193 s = skipspace(s); \
194 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
195 }
196
197#define UNIBRACK(f) { \
198 yylval.ival = f; \
199 PL_bufptr = s; \
200 PL_last_uni = PL_oldbufptr; \
201 if (*s == '(') \
202 return REPORT( (int)FUNC1 ); \
203 s = skipspace(s); \
204 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
205 }
79072805 206
9f68db38 207/* grandfather return to old style */
3280af22 208#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 209
8fa7f367
JH
210#ifdef DEBUGGING
211
f31bfe61
NC
212/* how to interpret the yylval associated with the token */
213enum token_type {
214 TOKENTYPE_NONE,
215 TOKENTYPE_IVAL,
216 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
217 TOKENTYPE_PVAL,
218 TOKENTYPE_OPVAL,
219 TOKENTYPE_GVVAL
220};
221
fe20fd30
JH
222static struct debug_tokens { const int token, type; const char *name; }
223 const debug_tokens[] =
9041c2e3 224{
f31bfe61
NC
225 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
226 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
227 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
228 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
229 { ARROW, TOKENTYPE_NONE, "ARROW" },
230 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
231 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
232 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
233 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
234 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
235 { DO, TOKENTYPE_NONE, "DO" },
236 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
237 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
238 { ELSE, TOKENTYPE_NONE, "ELSE" },
239 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
240 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
241 { FOR, TOKENTYPE_IVAL, "FOR" },
242 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
243 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
244 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
245 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
246 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
247 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
248 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
249 { IF, TOKENTYPE_IVAL, "IF" },
250 { LABEL, TOKENTYPE_PVAL, "LABEL" },
251 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
252 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
253 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
254 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
255 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
256 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
257 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
258 { MY, TOKENTYPE_IVAL, "MY" },
259 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
260 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
261 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
262 { OROP, TOKENTYPE_IVAL, "OROP" },
263 { OROR, TOKENTYPE_NONE, "OROR" },
264 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
265 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
266 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
267 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
268 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
269 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
270 { PREINC, TOKENTYPE_NONE, "PREINC" },
271 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
272 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
273 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
274 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
275 { SUB, TOKENTYPE_NONE, "SUB" },
276 { THING, TOKENTYPE_OPVAL, "THING" },
277 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
278 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
279 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
280 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
281 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
282 { USE, TOKENTYPE_IVAL, "USE" },
283 { WHILE, TOKENTYPE_IVAL, "WHILE" },
284 { WORD, TOKENTYPE_OPVAL, "WORD" },
285 { 0, TOKENTYPE_NONE, 0 }
286};
287
288/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 289
f31bfe61 290STATIC int
2746ec50 291S_tokereport(pTHX_ I32 rv)
f31bfe61
NC
292{
293 if (DEBUG_T_TEST) {
0e2d6244 294 const char *name = NULL;
f31bfe61 295 enum token_type type = TOKENTYPE_NONE;
24c2fff4 296 const struct debug_tokens *p;
d7559646 297 SV* const report = newSVpvs("<== ");
f31bfe61
NC
298
299 for (p = debug_tokens; p->token; p++) {
300 if (p->token == (int)rv) {
301 name = p->name;
302 type = p->type;
303 break;
304 }
305 }
306 if (name)
307 Perl_sv_catpvf(aTHX_ report, "%s", name);
308 else if ((char)rv > ' ' && (char)rv < '~')
309 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
310 else if (!rv)
d7559646 311 sv_catpvs(report, "EOF");
f31bfe61
NC
312 else
313 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
314 switch (type) {
315 case TOKENTYPE_NONE:
316 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
317 break;
318 case TOKENTYPE_IVAL:
319 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
320 break;
321 case TOKENTYPE_OPNUM:
322 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
323 PL_op_name[yylval.ival]);
324 break;
325 case TOKENTYPE_PVAL:
326 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
327 break;
328 case TOKENTYPE_OPVAL:
3f3939a9 329 if (yylval.opval) {
c870331e 330 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
f31bfe61 331 PL_op_name[yylval.opval->op_type]);
3f3939a9
NC
332 if (yylval.opval->op_type == OP_CONST) {
333 Perl_sv_catpvf(aTHX_ report, " %s",
334 SvPEEK(cSVOPx_sv(yylval.opval)));
335 }
336
337 }
c870331e 338 else
d7559646 339 sv_catpvs(report, "(opval=null)");
f31bfe61
NC
340 break;
341 }
3f3939a9 342 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
f31bfe61
NC
343 };
344 return (int)rv;
998054bd
SC
345}
346
3f3939a9
NC
347
348/* print the buffer with suitable escapes */
349
350STATIC void
351S_printbuf(pTHX_ const char* fmt, const char* s)
352{
d7559646 353 SV* const tmp = newSVpvs("");
ef9ec799
NC
354 PerlIO_printf(Perl_debug_log, fmt,
355 pv_display(tmp, (char *)s, strlen(s), 0, 60));
3f3939a9
NC
356 SvREFCNT_dec(tmp);
357}
358
8fa7f367
JH
359#endif
360
ffb4593c
NT
361/*
362 * S_ao
363 *
364 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
365 * into an OP_ANDASSIGN or OP_ORASSIGN
366 */
367
76e3520e 368STATIC int
cea2e8a9 369S_ao(pTHX_ int toketype)
a0d0e21e 370{
3280af22
NIS
371 if (*PL_bufptr == '=') {
372 PL_bufptr++;
a0d0e21e
LW
373 if (toketype == ANDAND)
374 yylval.ival = OP_ANDASSIGN;
375 else if (toketype == OROR)
376 yylval.ival = OP_ORASSIGN;
377 toketype = ASSIGNOP;
378 }
379 return toketype;
380}
381
ffb4593c
NT
382/*
383 * S_no_op
384 * When Perl expects an operator and finds something else, no_op
385 * prints the warning. It always prints "<something> found where
386 * operator expected. It prints "Missing semicolon on previous line?"
387 * if the surprise occurs at the start of the line. "do you need to
388 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
389 * where the compiler doesn't know if foo is a method call or a function.
390 * It prints "Missing operator before end of line" if there's nothing
391 * after the missing operator, or "... before <...>" if there is something
392 * after the missing operator.
393 */
394
76e3520e 395STATIC void
a00f3e00 396S_no_op(pTHX_ const char *what, char *s)
463ee0b2 397{
a2592645
NC
398 char * const oldbp = PL_bufptr;
399 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 400
1189a94a
GS
401 if (!s)
402 s = oldbp;
07c798fb 403 else
1189a94a 404 PL_bufptr = s;
cea2e8a9 405 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
c77beb56
NC
406 if (ckWARN_d(WARN_SYNTAX)) {
407 if (is_first)
408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
409 "\t(Missing semicolon on previous line?)\n");
410 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
24c2fff4 411 const char *t;
c77beb56
NC
412 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
413 if (t < PL_bufptr && isSPACE(*t))
414 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
415 "\t(Do you need to predeclare %.*s?)\n",
78fd745e 416 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
c77beb56
NC
417 }
418 else {
419 assert(s >= oldbp);
420 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
78fd745e 421 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
c77beb56 422 }
07c798fb 423 }
3280af22 424 PL_bufptr = oldbp;
8990e307
LW
425}
426
ffb4593c
NT
427/*
428 * S_missingterm
429 * Complain about missing quote/regexp/heredoc terminator.
430 * If it's called with (char *)NULL then it cauterizes the line buffer.
431 * If we're in a delimited string and the delimiter is a control
432 * character, it's reformatted into a two-char sequence like ^C.
433 * This is fatal.
434 */
435
76e3520e 436STATIC void
cea2e8a9 437S_missingterm(pTHX_ char *s)
8990e307
LW
438{
439 char tmpbuf[3];
440 char q;
441 if (s) {
a2592645 442 char * const nl = strrchr(s,'\n');
d2719217 443 if (nl)
8990e307
LW
444 *nl = '\0';
445 }
9d116dd7
JH
446 else if (
447#ifdef EBCDIC
448 iscntrl(PL_multi_close)
449#else
450 PL_multi_close < 32 || PL_multi_close == 127
451#endif
452 ) {
8990e307 453 *tmpbuf = '^';
3bad88ff 454 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
455 tmpbuf[2] = '\0';
456 s = tmpbuf;
457 }
458 else {
eb160463 459 *tmpbuf = (char)PL_multi_close;
8990e307
LW
460 tmpbuf[1] = '\0';
461 s = tmpbuf;
462 }
463 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 464 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 465}
79072805 466
ffb4593c
NT
467/*
468 * Perl_deprecate
ffb4593c
NT
469 */
470
79072805 471void
864dbfa3 472Perl_deprecate(pTHX_ char *s)
a0d0e21e 473{
599cee73 474 if (ckWARN(WARN_DEPRECATED))
9014280d 475 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
476}
477
12bcd1a6
PM
478void
479Perl_deprecate_old(pTHX_ char *s)
480{
481 /* This function should NOT be called for any new deprecated warnings */
482 /* Use Perl_deprecate instead */
483 /* */
484 /* It is here to maintain backward compatibility with the pre-5.8 */
485 /* warnings category hierarchy. The "deprecated" category used to */
486 /* live under the "syntax" category. It is now a top-level category */
487 /* in its own right. */
488
489 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
a00f3e00 490 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
491 "Use of %s is deprecated", s);
492}
493
ffb4593c 494/*
9cbb5ea2
GS
495 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
496 * utf16-to-utf8-reversed.
ffb4593c
NT
497 */
498
c39cd008
GS
499#ifdef PERL_CR_FILTER
500static void
501strip_return(SV *sv)
502{
5e7e76a3 503 register const char *s = SvPVX_const(sv);
a2592645 504 register const char * const e = s + SvCUR(sv);
c39cd008
GS
505 /* outer loop optimized to do nothing if there are no CR-LFs */
506 while (s < e) {
507 if (*s++ == '\r' && *s == '\n') {
508 /* hit a CR-LF, need to copy the rest */
509 register char *d = s - 1;
510 *d++ = *s++;
511 while (s < e) {
512 if (*s == '\r' && s[1] == '\n')
513 s++;
514 *d++ = *s++;
515 }
516 SvCUR(sv) -= s - d;
517 return;
518 }
519 }
520}
a868473f 521
76e3520e 522STATIC I32
c39cd008 523S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 524{
24c2fff4 525 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
526 if (count > 0 && !maxlen)
527 strip_return(sv);
528 return count;
a868473f
NIS
529}
530#endif
531
ffb4593c
NT
532/*
533 * Perl_lex_start
9cbb5ea2
GS
534 * Initialize variables. Uses the Perl save_stack to save its state (for
535 * recursive calls to the parser).
ffb4593c
NT
536 */
537
a0d0e21e 538void
864dbfa3 539Perl_lex_start(pTHX_ SV *line)
79072805 540{
71a0dd65 541 const char *s;
8990e307
LW
542 STRLEN len;
543
3280af22
NIS
544 SAVEI32(PL_lex_dojoin);
545 SAVEI32(PL_lex_brackets);
3280af22
NIS
546 SAVEI32(PL_lex_casemods);
547 SAVEI32(PL_lex_starts);
548 SAVEI32(PL_lex_state);
7766f137 549 SAVEVPTR(PL_lex_inpat);
3280af22 550 SAVEI32(PL_lex_inwhat);
18b09519
GS
551 if (PL_lex_state == LEX_KNOWNEXT) {
552 I32 toke = PL_nexttoke;
553 while (--toke >= 0) {
554 SAVEI32(PL_nexttype[toke]);
555 SAVEVPTR(PL_nextval[toke]);
556 }
557 SAVEI32(PL_nexttoke);
18b09519 558 }
57843af0 559 SAVECOPLINE(PL_curcop);
3280af22
NIS
560 SAVEPPTR(PL_bufptr);
561 SAVEPPTR(PL_bufend);
562 SAVEPPTR(PL_oldbufptr);
563 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
564 SAVEPPTR(PL_last_lop);
565 SAVEPPTR(PL_last_uni);
3280af22
NIS
566 SAVEPPTR(PL_linestart);
567 SAVESPTR(PL_linestr);
bda19f49
JH
568 SAVEGENERICPV(PL_lex_brackstack);
569 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 570 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
571 SAVESPTR(PL_lex_stuff);
572 SAVEI32(PL_lex_defer);
09bef843 573 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 574 SAVESPTR(PL_lex_repl);
bebdddfc
GS
575 SAVEINT(PL_expect);
576 SAVEINT(PL_lex_expect);
3280af22
NIS
577
578 PL_lex_state = LEX_NORMAL;
579 PL_lex_defer = 0;
580 PL_expect = XSTATE;
581 PL_lex_brackets = 0;
cd7a8267
JC
582 Newx(PL_lex_brackstack, 120, char);
583 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
584 PL_lex_casemods = 0;
585 *PL_lex_casestack = '\0';
586 PL_lex_dojoin = 0;
587 PL_lex_starts = 0;
0e2d6244
SS
588 PL_lex_stuff = NULL;
589 PL_lex_repl = NULL;
3280af22 590 PL_lex_inpat = 0;
76be56bc 591 PL_nexttoke = 0;
3280af22 592 PL_lex_inwhat = 0;
09bef843 593 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
594 PL_linestr = line;
595 if (SvREADONLY(PL_linestr))
596 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
71a0dd65 597 s = SvPV_const(PL_linestr, len);
6f27f9a7 598 if (!len || s[len-1] != ';') {
3280af22
NIS
599 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
600 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
d7559646 601 sv_catpvs(PL_linestr, "\n;");
8990e307 602 }
3280af22
NIS
603 SvTEMP_off(PL_linestr);
604 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
605 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
0e2d6244 606 PL_last_lop = PL_last_uni = NULL;
3280af22 607 PL_rsfp = 0;
79072805 608}
a687059c 609
ffb4593c
NT
610/*
611 * Perl_lex_end
9cbb5ea2
GS
612 * Finalizer for lexing operations. Must be called when the parser is
613 * done with the lexer.
ffb4593c
NT
614 */
615
463ee0b2 616void
864dbfa3 617Perl_lex_end(pTHX)
463ee0b2 618{
3280af22 619 PL_doextract = FALSE;
463ee0b2
LW
620}
621
ffb4593c
NT
622/*
623 * S_incline
624 * This subroutine has nothing to do with tilting, whether at windmills
625 * or pinball tables. Its name is short for "increment line". It
57843af0 626 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 627 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
628 * # line 500 "foo.pm"
629 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
630 */
631
76e3520e 632STATIC void
cea2e8a9 633S_incline(pTHX_ char *s)
463ee0b2
LW
634{
635 char *t;
636 char *n;
73659bf1 637 char *e;
463ee0b2 638 char ch;
463ee0b2 639
57843af0 640 CopLINE_inc(PL_curcop);
463ee0b2
LW
641 if (*s++ != '#')
642 return;
bf4acbe4 643 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
644 if (strnEQ(s, "line", 4))
645 s += 4;
646 else
647 return;
084592ab 648 if (SPACE_OR_TAB(*s))
73659bf1 649 s++;
4e553d73 650 else
73659bf1 651 return;
bf4acbe4 652 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
653 if (!isDIGIT(*s))
654 return;
655 n = s;
656 while (isDIGIT(*s))
657 s++;
bf4acbe4 658 while (SPACE_OR_TAB(*s))
463ee0b2 659 s++;
73659bf1 660 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 661 s++;
73659bf1
GS
662 e = t + 1;
663 }
463ee0b2 664 else {
463ee0b2 665 for (t = s; !isSPACE(*t); t++) ;
73659bf1 666 e = t;
463ee0b2 667 }
bf4acbe4 668 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
669 e++;
670 if (*e != '\n' && *e != '\0')
671 return; /* false alarm */
672
463ee0b2
LW
673 ch = *t;
674 *t = '\0';
f4dd75d9 675 if (t - s > 0) {
3f3939a9 676#ifndef USE_ITHREADS
1a9219e7 677 const char * const cf = CopFILE(PL_curcop);
aae6d3c0
NC
678 STRLEN tmplen = cf ? strlen(cf) : 0;
679 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
3f3939a9
NC
680 /* must copy *{"::_<(eval N)[oldfilename:L]"}
681 * to *{"::_<newfilename"} */
682 char smallbuf[256], smallbuf2[256];
683 char *tmpbuf, *tmpbuf2;
684 GV **gvp, *gv2;
3f3939a9
NC
685 STRLEN tmplen2 = strlen(s);
686 if (tmplen + 3 < sizeof smallbuf)
687 tmpbuf = smallbuf;
688 else
689 Newx(tmpbuf, tmplen + 3, char);
690 if (tmplen2 + 3 < sizeof smallbuf2)
691 tmpbuf2 = smallbuf2;
692 else
693 Newx(tmpbuf2, tmplen2 + 3, char);
694 tmpbuf[0] = tmpbuf2[0] = '_';
695 tmpbuf[1] = tmpbuf2[1] = '<';
696 memcpy(tmpbuf + 2, cf, ++tmplen);
697 memcpy(tmpbuf2 + 2, s, ++tmplen2);
698 ++tmplen; ++tmplen2;
699 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
700 if (gvp) {
701 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
702 if (!isGV(gv2))
703 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
704 /* adjust ${"::_<newfilename"} to store the new file name */
705 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
706 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
707 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
708 }
709 if (tmpbuf != smallbuf) Safefree(tmpbuf);
710 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
711 }
712#endif
05ec9bb3 713 CopFILE_free(PL_curcop);
57843af0 714 CopFILE_set(PL_curcop, s);
f4dd75d9 715 }
463ee0b2 716 *t = ch;
57843af0 717 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
718}
719
ffb4593c
NT
720/*
721 * S_skipspace
722 * Called to gobble the appropriate amount and type of whitespace.
723 * Skips comments as well.
724 */
725
76e3520e 726STATIC char *
cea2e8a9 727S_skipspace(pTHX_ register char *s)
a687059c 728{
3280af22 729 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 730 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
731 s++;
732 return s;
733 }
734 for (;;) {
fd049845 735 STRLEN prevlen;
09bef843 736 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 737 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
738 while (s < PL_bufend && isSPACE(*s)) {
739 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
740 incline(s);
741 }
ffb4593c
NT
742
743 /* comment */
3280af22
NIS
744 if (s < PL_bufend && *s == '#') {
745 while (s < PL_bufend && *s != '\n')
463ee0b2 746 s++;
60e6418e 747 if (s < PL_bufend) {
463ee0b2 748 s++;
60e6418e
GS
749 if (PL_in_eval && !PL_rsfp) {
750 incline(s);
751 continue;
752 }
753 }
463ee0b2 754 }
ffb4593c
NT
755
756 /* only continue to recharge the buffer if we're at the end
757 * of the buffer, we're not reading from a source filter, and
758 * we're in normal lexing mode
759 */
09bef843
SB
760 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
761 PL_lex_state == LEX_FORMLINE)
463ee0b2 762 return s;
ffb4593c
NT
763
764 /* try to recharge the buffer */
9cbb5ea2 765 if ((s = filter_gets(PL_linestr, PL_rsfp,
0e2d6244 766 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2
GS
767 {
768 /* end of file. Add on the -p or -n magic */
29652248
NC
769 if (PL_minus_p) {
770 sv_setpv(PL_linestr,
771 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 772 PL_minus_n = PL_minus_p = 0;
a0d0e21e 773 }
29652248
NC
774 else if (PL_minus_n) {
775 sv_setpvn(PL_linestr, ";}", 2);
776 PL_minus_n = 0;
777 }
a0d0e21e 778 else
29652248 779 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
780
781 /* reset variables for next time we lex */
9cbb5ea2
GS
782 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
783 = SvPVX(PL_linestr);
3280af22 784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
0e2d6244 785 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
786
787 /* Close the filehandle. Could be from -P preprocessor,
788 * STDIN, or a regular file. If we were reading code from
789 * STDIN (because the commandline held no -e or filename)
790 * then we don't close it, we reset it so the code can
791 * read from STDIN too.
792 */
793
3280af22
NIS
794 if (PL_preprocess && !PL_in_eval)
795 (void)PerlProc_pclose(PL_rsfp);
796 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
797 PerlIO_clearerr(PL_rsfp);
8990e307 798 else
3280af22 799 (void)PerlIO_close(PL_rsfp);
0e2d6244 800 PL_rsfp = NULL;
463ee0b2
LW
801 return s;
802 }
ffb4593c
NT
803
804 /* not at end of file, so we only read another line */
09bef843
SB
805 /* make corresponding updates to old pointers, for yyerror() */
806 oldprevlen = PL_oldbufptr - PL_bufend;
807 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
808 if (PL_last_uni)
809 oldunilen = PL_last_uni - PL_bufend;
810 if (PL_last_lop)
811 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
812 PL_linestart = PL_bufptr = s + prevlen;
813 PL_bufend = s + SvCUR(PL_linestr);
814 s = PL_bufptr;
09bef843
SB
815 PL_oldbufptr = s + oldprevlen;
816 PL_oldoldbufptr = s + oldoldprevlen;
817 if (PL_last_uni)
818 PL_last_uni = s + oldunilen;
819 if (PL_last_lop)
820 PL_last_lop = s + oldloplen;
a0d0e21e 821 incline(s);
ffb4593c
NT
822
823 /* debugger active and we're not compiling the debugger code,
824 * so store the line into the debugger's array of lines
825 */
3280af22 826 if (PERLDB_LINE && PL_curstash != PL_debstash) {
133cdda0 827 SV * const sv = newSV(0);
8990e307
LW
828
829 sv_upgrade(sv, SVt_PVMG);
3280af22 830 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 831 (void)SvIOK_on(sv);
0da6cfda 832 SvIV_set(sv, 0);
22d66770 833 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 834 }
463ee0b2 835 }
a687059c 836}
378cc40b 837
ffb4593c
NT
838/*
839 * S_check_uni
840 * Check the unary operators to ensure there's no ambiguity in how they're
841 * used. An ambiguous piece of code would be:
842 * rand + 5
843 * This doesn't mean rand() + 5. Because rand() is a unary operator,
844 * the +5 is its argument.
845 */
846
76e3520e 847STATIC void
cea2e8a9 848S_check_uni(pTHX)
ba106d47 849{
2f3197b3 850 char *s;
a0d0e21e 851 char *t;
2f3197b3 852
3280af22 853 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 854 return;
3280af22
NIS
855 while (isSPACE(*PL_last_uni))
856 PL_last_uni++;
7e2040f0 857 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 858 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 859 return;
8916b559
NC
860
861 /* XXX Things like this are just so nasty. We shouldn't be modifying
862 source code, even if we realquick set it back. */
0453d815 863 if (ckWARN_d(WARN_AMBIGUOUS)){
a2592645 864 const char ch = *s;
0453d815 865 *s = '\0';
9014280d 866 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
219ef941 867 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
868 PL_last_uni);
869 *s = ch;
870 }
2f3197b3
LW
871}
872
ffb4593c
NT
873/*
874 * LOP : macro to build a list operator. Its behaviour has been replaced
875 * with a subroutine, S_lop() for which LOP is just another name.
876 */
877
a0d0e21e
LW
878#define LOP(f,x) return lop(f,x,s)
879
ffb4593c
NT
880/*
881 * S_lop
882 * Build a list operator (or something that might be one). The rules:
883 * - if we have a next token, then it's a list operator [why?]
884 * - if the next thing is an opening paren, then it's a function
885 * - else it's a list operator
886 */
887
76e3520e 888STATIC I32
a0be28da 889S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 890{
79072805 891 yylval.ival = f;
35c8bce7 892 CLINE;
3280af22
NIS
893 PL_expect = x;
894 PL_bufptr = s;
895 PL_last_lop = PL_oldbufptr;
eb160463 896 PL_last_lop_op = (OPCODE)f;
3280af22 897 if (PL_nexttoke)
f31bfe61 898 return REPORT(LSTOP);
79072805 899 if (*s == '(')
f31bfe61 900 return REPORT(FUNC);
79072805
LW
901 s = skipspace(s);
902 if (*s == '(')
f31bfe61 903 return REPORT(FUNC);
79072805 904 else
f31bfe61 905 return REPORT(LSTOP);
79072805
LW
906}
907
ffb4593c
NT
908/*
909 * S_force_next
9cbb5ea2 910 * When the lexer realizes it knows the next token (for instance,
ffb4593c 911 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
912 * to know what token to return the next time the lexer is called. Caller
913 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
914 * handles the token correctly.
ffb4593c
NT
915 */
916
4e553d73 917STATIC void
cea2e8a9 918S_force_next(pTHX_ I32 type)
79072805 919{
3280af22
NIS
920 PL_nexttype[PL_nexttoke] = type;
921 PL_nexttoke++;
922 if (PL_lex_state != LEX_KNOWNEXT) {
923 PL_lex_defer = PL_lex_state;
924 PL_lex_expect = PL_expect;
925 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
926 }
927}
928
057b822e
NC
929STATIC SV *
930S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
931{
932 SV *sv = newSVpvn(start,len);
933 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
934 SvUTF8_on(sv);
935 return sv;
936}
937
ffb4593c
NT
938/*
939 * S_force_word
940 * When the lexer knows the next thing is a word (for instance, it has
941 * just seen -> and it knows that the next char is a word char, then
942 * it calls S_force_word to stick the next word into the PL_next lookahead.
943 *
944 * Arguments:
b1b65b59 945 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
946 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
947 * int check_keyword : if true, Perl checks to make sure the word isn't
948 * a keyword (do this if the word is a label, e.g. goto FOO)
949 * int allow_pack : if true, : characters will also be allowed (require,
950 * use, etc. do this)
9cbb5ea2 951 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
952 */
953
76e3520e 954STATIC char *
cea2e8a9 955S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 956{
463ee0b2
LW
957 register char *s;
958 STRLEN len;
4e553d73 959
463ee0b2
LW
960 start = skipspace(start);
961 s = start;
7e2040f0 962 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 963 (allow_pack && *s == ':') ||
15f0808c 964 (allow_initial_tick && *s == '\'') )
a0d0e21e 965 {
3280af22
NIS
966 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
967 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
968 return start;
969 if (token == METHOD) {
970 s = skipspace(s);
971 if (*s == '(')
3280af22 972 PL_expect = XTERM;
463ee0b2 973 else {
3280af22 974 PL_expect = XOPERATOR;
463ee0b2 975 }
79072805 976 }
057b822e
NC
977 PL_nextval[PL_nexttoke].opval
978 = (OP*)newSVOP(OP_CONST,0,
979 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 980 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
981 force_next(token);
982 }
983 return s;
984}
985
ffb4593c
NT
986/*
987 * S_force_ident
9cbb5ea2 988 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
989 * text only contains the "foo" portion. The first argument is a pointer
990 * to the "foo", and the second argument is the type symbol to prefix.
991 * Forces the next token to be a "WORD".
9cbb5ea2 992 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
993 */
994
76e3520e 995STATIC void
a00f3e00 996S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
997{
998 if (s && *s) {
a00f3e00 999 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 1000 PL_nextval[PL_nexttoke].opval = o;
79072805 1001 force_next(WORD);
748a9306 1002 if (kind) {
11343788 1003 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1004 /* XXX see note in pp_entereval() for why we forgo typo
1005 warnings if the symbol must be introduced in an eval.
1006 GSAR 96-10-12 */
057b822e 1007 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
a0d0e21e
LW
1008 kind == '$' ? SVt_PV :
1009 kind == '@' ? SVt_PVAV :
1010 kind == '%' ? SVt_PVHV :
1011 SVt_PVGV
1012 );
748a9306 1013 }
79072805
LW
1014 }
1015}
1016
1571675a
GS
1017NV
1018Perl_str_to_version(pTHX_ SV *sv)
1019{
1020 NV retval = 0.0;
1021 NV nshift = 1.0;
1022 STRLEN len;
71a0dd65 1023 const char *start = SvPV_const(sv,len);
a2592645 1024 const char * const end = start + len;
547d29e4 1025 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1026 while (start < end) {
ba210ebe 1027 STRLEN skip;
1571675a
GS
1028 UV n;
1029 if (utf)
9041c2e3 1030 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1031 else {
1032 n = *(U8*)start;
1033 skip = 1;
1034 }
1035 retval += ((NV)n)/nshift;
1036 start += skip;
1037 nshift *= 1000;
1038 }
1039 return retval;
1040}
1041
4e553d73 1042/*
ffb4593c
NT
1043 * S_force_version
1044 * Forces the next token to be a version number.
e759cc13
RGS
1045 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1046 * and if "guessing" is TRUE, then no new token is created (and the caller
1047 * must use an alternative parsing method).
ffb4593c
NT
1048 */
1049
76e3520e 1050STATIC char *
e759cc13 1051S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
1052{
1053 OP *version = Nullop;
44dcb63b 1054 char *d;
89bfa8cd
PP
1055
1056 s = skipspace(s);
1057
44dcb63b 1058 d = s;
dd629d5b 1059 if (*d == 'v')
44dcb63b 1060 d++;
44dcb63b 1061 if (isDIGIT(*d)) {
e759cc13
RGS
1062 while (isDIGIT(*d) || *d == '_' || *d == '.')
1063 d++;
9f3d182e 1064 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1065 SV *ver;
b73d6f50 1066 s = scan_num(s, &yylval);
89bfa8cd 1067 version = yylval.opval;
dd629d5b
GS
1068 ver = cSVOPx(version)->op_sv;
1069 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1070 (void)SvUPGRADE(ver, SVt_PVNV);
0da6cfda 1071 SvNV_set(ver, str_to_version(ver));
1571675a 1072 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1073 }
89bfa8cd 1074 }
e759cc13
RGS
1075 else if (guessing)
1076 return s;
89bfa8cd
PP
1077 }
1078
1079 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1080 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1081 force_next(WORD);
89bfa8cd 1082
e759cc13 1083 return s;
89bfa8cd
PP
1084}
1085
ffb4593c
NT
1086/*
1087 * S_tokeq
1088 * Tokenize a quoted string passed in as an SV. It finds the next
1089 * chunk, up to end of string or a backslash. It may make a new
1090 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1091 * turns \\ into \.
1092 */
1093
76e3520e 1094STATIC SV *
cea2e8a9 1095S_tokeq(pTHX_ SV *sv)
79072805
LW
1096{
1097 register char *s;
1098 register char *send;
1099 register char *d;
b3ac6de7
IZ
1100 STRLEN len = 0;
1101 SV *pv = sv;
79072805
LW
1102
1103 if (!SvLEN(sv))
b3ac6de7 1104 goto finish;
79072805 1105
a0d0e21e 1106 s = SvPV_force(sv, len);
21a311ee 1107 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1108 goto finish;
463ee0b2 1109 send = s + len;
79072805
LW
1110 while (s < send && *s != '\\')
1111 s++;
1112 if (s == send)
b3ac6de7 1113 goto finish;
79072805 1114 d = s;
be4731d2 1115 if ( PL_hints & HINT_NEW_STRING ) {
5e7e76a3 1116 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1117 if (SvUTF8(sv))
1118 SvUTF8_on(pv);
1119 }
79072805
LW
1120 while (s < send) {
1121 if (*s == '\\') {
a0d0e21e 1122 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1123 s++; /* all that, just for this */
1124 }
1125 *d++ = *s++;
1126 }
1127 *d = '\0';
5e7e76a3 1128 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1129 finish:
3280af22 1130 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1131 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1132 return sv;
1133}
1134
ffb4593c
NT
1135/*
1136 * Now come three functions related to double-quote context,
1137 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1138 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1139 * interact with PL_lex_state, and create fake ( ... ) argument lists
1140 * to handle functions and concatenation.
1141 * They assume that whoever calls them will be setting up a fake
1142 * join call, because each subthing puts a ',' after it. This lets
1143 * "lower \luPpEr"
1144 * become
1145 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1146 *
1147 * (I'm not sure whether the spurious commas at the end of lcfirst's
1148 * arguments and join's arguments are created or not).
1149 */
1150
1151/*
1152 * S_sublex_start
1153 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1154 *
1155 * Pattern matching will set PL_lex_op to the pattern-matching op to
1156 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1157 *
1158 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1159 *
1160 * Everything else becomes a FUNC.
1161 *
1162 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1163 * had an OP_CONST or OP_READLINE). This just sets us up for a
1164 * call to S_sublex_push().
1165 */
1166
76e3520e 1167STATIC I32
cea2e8a9 1168S_sublex_start(pTHX)
79072805 1169{
3bad88ff 1170 register const I32 op_type = yylval.ival;
79072805
LW
1171
1172 if (op_type == OP_NULL) {
3280af22
NIS
1173 yylval.opval = PL_lex_op;
1174 PL_lex_op = Nullop;
79072805
LW
1175 return THING;
1176 }
1177 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1178 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1179
1180 if (SvTYPE(sv) == SVt_PVIV) {
1181 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1182 STRLEN len;
71a0dd65 1183 const char *p = SvPV_const(sv, len);
24c2fff4 1184 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1185 if (SvUTF8(sv))
1186 SvUTF8_on(nsv);
b3ac6de7
IZ
1187 SvREFCNT_dec(sv);
1188 sv = nsv;
4e553d73 1189 }
b3ac6de7 1190 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
0e2d6244 1191 PL_lex_stuff = NULL;
79072805
LW
1192 return THING;
1193 }
1194
3280af22
NIS
1195 PL_sublex_info.super_state = PL_lex_state;
1196 PL_sublex_info.sub_inwhat = op_type;
1197 PL_sublex_info.sub_op = PL_lex_op;
1198 PL_lex_state = LEX_INTERPPUSH;
55497cff 1199
3280af22
NIS
1200 PL_expect = XTERM;
1201 if (PL_lex_op) {
1202 yylval.opval = PL_lex_op;
1203 PL_lex_op = Nullop;
55497cff
PP
1204 return PMFUNC;
1205 }
1206 else
1207 return FUNC;
1208}
1209
ffb4593c
NT
1210/*
1211 * S_sublex_push
1212 * Create a new scope to save the lexing state. The scope will be
1213 * ended in S_sublex_done. Returns a '(', starting the function arguments
1214 * to the uc, lc, etc. found before.
1215 * Sets PL_lex_state to LEX_INTERPCONCAT.
1216 */
1217
76e3520e 1218STATIC I32
cea2e8a9 1219S_sublex_push(pTHX)
55497cff 1220{
f46d017c 1221 ENTER;
55497cff 1222
3280af22
NIS
1223 PL_lex_state = PL_sublex_info.super_state;
1224 SAVEI32(PL_lex_dojoin);
1225 SAVEI32(PL_lex_brackets);
3280af22
NIS
1226 SAVEI32(PL_lex_casemods);
1227 SAVEI32(PL_lex_starts);
1228 SAVEI32(PL_lex_state);
7766f137 1229 SAVEVPTR(PL_lex_inpat);
3280af22 1230 SAVEI32(PL_lex_inwhat);
57843af0 1231 SAVECOPLINE(PL_curcop);
3280af22 1232 SAVEPPTR(PL_bufptr);
8452ff4b 1233 SAVEPPTR(PL_bufend);
3280af22
NIS
1234 SAVEPPTR(PL_oldbufptr);
1235 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1236 SAVEPPTR(PL_last_lop);
1237 SAVEPPTR(PL_last_uni);
3280af22
NIS
1238 SAVEPPTR(PL_linestart);
1239 SAVESPTR(PL_linestr);
bda19f49
JH
1240 SAVEGENERICPV(PL_lex_brackstack);
1241 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1242
1243 PL_linestr = PL_lex_stuff;
0e2d6244 1244 PL_lex_stuff = NULL;
3280af22 1245
9cbb5ea2
GS
1246 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1247 = SvPVX(PL_linestr);
3280af22 1248 PL_bufend += SvCUR(PL_linestr);
0e2d6244 1249 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1250 SAVEFREESV(PL_linestr);
1251
1252 PL_lex_dojoin = FALSE;
1253 PL_lex_brackets = 0;
cd7a8267
JC
1254 Newx(PL_lex_brackstack, 120, char);
1255 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1256 PL_lex_casemods = 0;
1257 *PL_lex_casestack = '\0';
1258 PL_lex_starts = 0;
1259 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1260 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1261
1262 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1263 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1264 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1265 else
3280af22 1266 PL_lex_inpat = Nullop;
79072805 1267
55497cff 1268 return '(';
79072805
LW
1269}
1270
ffb4593c
NT
1271/*
1272 * S_sublex_done
1273 * Restores lexer state after a S_sublex_push.
1274 */
1275
76e3520e 1276STATIC I32
cea2e8a9 1277S_sublex_done(pTHX)
79072805 1278{
3280af22 1279 if (!PL_lex_starts++) {
d7559646 1280 SV * const sv = newSVpvs("");
9aa983d2
JH
1281 if (SvUTF8(PL_linestr))
1282 SvUTF8_on(sv);
3280af22 1283 PL_expect = XOPERATOR;
9aa983d2 1284 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1285 return THING;
1286 }
1287
3280af22
NIS
1288 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1289 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1290 return yylex();
79072805
LW
1291 }
1292
ffb4593c 1293 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1294 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1295 PL_linestr = PL_lex_repl;
1296 PL_lex_inpat = 0;
1297 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1298 PL_bufend += SvCUR(PL_linestr);
0e2d6244 1299 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1300 SAVEFREESV(PL_linestr);
1301 PL_lex_dojoin = FALSE;
1302 PL_lex_brackets = 0;
3280af22
NIS
1303 PL_lex_casemods = 0;
1304 *PL_lex_casestack = '\0';
1305 PL_lex_starts = 0;
25da4f38 1306 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1307 PL_lex_state = LEX_INTERPNORMAL;
1308 PL_lex_starts++;
e9fa98b2
HS
1309 /* we don't clear PL_lex_repl here, so that we can check later
1310 whether this is an evalled subst; that means we rely on the
1311 logic to ensure sublex_done() is called again only via the
1312 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1313 }
e9fa98b2 1314 else {
3280af22 1315 PL_lex_state = LEX_INTERPCONCAT;
0e2d6244 1316 PL_lex_repl = NULL;
e9fa98b2 1317 }
79072805 1318 return ',';
ffed7fef
LW
1319 }
1320 else {
f46d017c 1321 LEAVE;
3280af22
NIS
1322 PL_bufend = SvPVX(PL_linestr);
1323 PL_bufend += SvCUR(PL_linestr);
1324 PL_expect = XOPERATOR;
09bef843 1325 PL_sublex_info.sub_inwhat = 0;
79072805 1326 return ')';
ffed7fef
LW
1327 }
1328}
1329
02aa26ce
NT
1330/*
1331 scan_const
1332
1333 Extracts a pattern, double-quoted string, or transliteration. This
1334 is terrifying code.
1335
3280af22
NIS
1336 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1337 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1338 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1339
9b599b2a
GS
1340 Returns a pointer to the character scanned up to. Iff this is
1341 advanced from the start pointer supplied (ie if anything was
1342 successfully parsed), will leave an OP for the substring scanned
1343 in yylval. Caller must intuit reason for not parsing further
1344 by looking at the next characters herself.
1345
02aa26ce
NT
1346 In patterns:
1347 backslashes:
1348 double-quoted style: \r and \n
1349 regexp special ones: \D \s
1350 constants: \x3
1351 backrefs: \1 (deprecated in substitution replacements)
1352 case and quoting: \U \Q \E
1353 stops on @ and $, but not for $ as tail anchor
1354
1355 In transliterations:
1356 characters are VERY literal, except for - not at the start or end
1357 of the string, which indicates a range. scan_const expands the
1358 range to the full set of intermediate characters.
1359
1360 In double-quoted strings:
1361 backslashes:
1362 double-quoted style: \r and \n
1363 constants: \x3
1364 backrefs: \1 (deprecated)
1365 case and quoting: \U \Q \E
1366 stops on @ and $
1367
1368 scan_const does *not* construct ops to handle interpolated strings.
1369 It stops processing as soon as it finds an embedded $ or @ variable
1370 and leaves it to the caller to work out what's going on.
1371
da6eedaa 1372 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1373
1374 $ in pattern could be $foo or could be tail anchor. Assumption:
1375 it's a tail anchor if $ is the last thing in the string, or if it's
1376 followed by one of ")| \n\t"
1377
1378 \1 (backreferences) are turned into $1
1379
1380 The structure of the code is
1381 while (there's a character to process) {
1382 handle transliteration ranges
1383 skip regexp comments
1384 skip # initiated comments in //x patterns
1385 check for embedded @foo
1386 check for embedded scalars
1387 if (backslash) {
1388 leave intact backslashes from leave (below)
1389 deprecate \1 in strings and sub replacements
1390 handle string-changing backslashes \l \U \Q \E, etc.
1391 switch (what was escaped) {
1392 handle - in a transliteration (becomes a literal -)
1393 handle \132 octal characters
1394 handle 0x15 hex characters
1395 handle \cV (control V)
1396 handle printf backslashes (\f, \r, \n, etc)
1397 } (end switch)
1398 } (end if backslash)
1399 } (end while character to read)
4e553d73 1400
02aa26ce
NT
1401*/
1402
76e3520e 1403STATIC char *
cea2e8a9 1404S_scan_const(pTHX_ char *start)
79072805 1405{
3280af22 1406 register char *send = PL_bufend; /* end of the constant */
133cdda0 1407 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1408 register char *s = start; /* start of the constant */
1409 register char *d = SvPVX(sv); /* destination for copies */
1410 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1411 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1412 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1413 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1414 UV uv;
48ea04fa
ST
1415#ifdef EBCDIC
1416 UV literal_endpoint = 0;
1417#endif
012bcf8d 1418
dff6d3cd 1419 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1420 PL_lex_inpat
b035a42e 1421 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1422 : "";
79072805 1423
2b9d42f0
NIS
1424 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1425 /* If we are doing a trans and we know we want UTF8 set expectation */
1426 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1427 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1428 }
1429
1430
79072805 1431 while (s < send || dorange) {
02aa26ce 1432 /* get transliterations out of the way (they're most literal) */
3280af22 1433 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1434 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1435 if (dorange) {
1ba5c669
JH
1436 I32 i; /* current expanded character */
1437 I32 min; /* first character in range */
1438 I32 max; /* last character in range */
02aa26ce 1439
2b9d42f0 1440 if (has_utf8) {
a2592645 1441 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1442 char *e = d++;
1443 while (e-- > c)
1444 *(e + 1) = *e;
25716404 1445 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1446 /* mark the range as done, and continue */
1447 dorange = FALSE;
1448 didrange = TRUE;
1449 continue;
1450 }
2b9d42f0 1451
5e7e76a3 1452 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1453 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1454 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1455 d -= 2; /* eat the first char and the - */
1456
8ada0baa
JH
1457 min = (U8)*d; /* first char in range */
1458 max = (U8)d[1]; /* last char in range */
1459
c2e66d9e 1460 if (min > max) {
01ec43d0 1461 Perl_croak(aTHX_
5b7ea690 1462 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1463 (char)min, (char)max);
c2e66d9e
GS
1464 }
1465
c7f1f016 1466#ifdef EBCDIC
48ea04fa
ST
1467 if (literal_endpoint == 2 &&
1468 ((isLOWER(min) && isLOWER(max)) ||
1469 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1470 if (isLOWER(min)) {
1471 for (i = min; i <= max; i++)
1472 if (isLOWER(i))
db42d148 1473 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1474 } else {
1475 for (i = min; i <= max; i++)
1476 if (isUPPER(i))
db42d148 1477 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1478 }
1479 }
1480 else
1481#endif
1482 for (i = min; i <= max; i++)
eb160463 1483 *d++ = (char)i;
02aa26ce
NT
1484
1485 /* mark the range as done, and continue */
79072805 1486 dorange = FALSE;
01ec43d0 1487 didrange = TRUE;
48ea04fa
ST
1488#ifdef EBCDIC
1489 literal_endpoint = 0;
1490#endif
79072805 1491 continue;
4e553d73 1492 }
02aa26ce
NT
1493
1494 /* range begins (ignore - as first or last char) */
79072805 1495 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1496 if (didrange) {
1fafa243 1497 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1498 }
2b9d42f0 1499 if (has_utf8) {
25716404 1500 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1501 s++;
1502 continue;
1503 }
79072805
LW
1504 dorange = TRUE;
1505 s++;
01ec43d0
GS
1506 }
1507 else {
1508 didrange = FALSE;
48ea04fa
ST
1509#ifdef EBCDIC
1510 literal_endpoint = 0;
1511#endif
01ec43d0 1512 }
79072805 1513 }
02aa26ce
NT
1514
1515 /* if we get here, we're not doing a transliteration */
1516
0f5d15d6
IZ
1517 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1518 except for the last char, which will be done separately. */
3280af22 1519 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1520 if (s[2] == '#') {
a00160b9 1521 while (s+1 < send && *s != ')')
db42d148 1522 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1523 }
1524 else if (s[2] == '{' /* This should match regcomp.c */
1525 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1526 {
cc6b7395 1527 I32 count = 1;
0f5d15d6 1528 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1529 char c;
1530
d9f97599
GS
1531 while (count && (c = *regparse)) {
1532 if (c == '\\' && regparse[1])
1533 regparse++;
4e553d73 1534 else if (c == '{')
cc6b7395 1535 count++;
4e553d73 1536 else if (c == '}')
cc6b7395 1537 count--;
d9f97599 1538 regparse++;
cc6b7395 1539 }
a00160b9 1540 if (*regparse != ')')
5bdf89e7 1541 regparse--; /* Leave one char for continuation. */
0f5d15d6 1542 while (s < regparse)
db42d148 1543 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1544 }
748a9306 1545 }
02aa26ce
NT
1546
1547 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1548 else if (*s == '#' && PL_lex_inpat &&
1549 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1550 while (s+1 < send && *s != '\n')
db42d148 1551 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1552 }
02aa26ce 1553
5d1d4326 1554 /* check for embedded arrays
da6eedaa 1555 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1556 */
7e2040f0 1557 else if (*s == '@' && s[1]
5d1d4326 1558 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1559 break;
02aa26ce
NT
1560
1561 /* check for embedded scalars. only stop if we're sure it's a
1562 variable.
1563 */
79072805 1564 else if (*s == '$') {
3280af22 1565 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1566 break;
6002328a 1567 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1568 break; /* in regexp, $ might be tail anchor */
1569 }
02aa26ce 1570
2b9d42f0
NIS
1571 /* End of else if chain - OP_TRANS rejoin rest */
1572
02aa26ce 1573 /* backslashes */
79072805
LW
1574 if (*s == '\\' && s+1 < send) {
1575 s++;
02aa26ce
NT
1576
1577 /* some backslashes we leave behind */
c9f97d15 1578 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1579 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1580 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1581 continue;
1582 }
02aa26ce
NT
1583
1584 /* deprecate \1 in strings and substitution replacements */
3280af22 1585 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1586 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1587 {
599cee73 1588 if (ckWARN(WARN_SYNTAX))
9014280d 1589 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1590 *--s = '$';
1591 break;
1592 }
02aa26ce
NT
1593
1594 /* string-change backslash escapes */
3280af22 1595 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1596 --s;
1597 break;
1598 }
02aa26ce
NT
1599
1600 /* if we get here, it's either a quoted -, or a digit */
79072805 1601 switch (*s) {
02aa26ce
NT
1602
1603 /* quoted - in transliterations */
79072805 1604 case '-':
3280af22 1605 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1606 *d++ = *s++;
1607 continue;
1608 }
1609 /* FALL THROUGH */
1610 default:
11b8faa4 1611 {
f5e9f069
NC
1612 if (isALNUM(*s) &&
1613 *s != '_' &&
1614 ckWARN(WARN_MISC))
9014280d 1615 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1616 "Unrecognized escape \\%c passed through",
1617 *s);
1618 /* default action is to copy the quoted character */
f9a63242 1619 goto default_action;
11b8faa4 1620 }
02aa26ce
NT
1621
1622 /* \132 indicates an octal constant */
79072805
LW
1623 case '0': case '1': case '2': case '3':
1624 case '4': case '5': case '6': case '7':
ba210ebe 1625 {
53305cf1
NC
1626 I32 flags = 0;
1627 STRLEN len = 3;
1628 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1629 s += len;
1630 }
012bcf8d 1631 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1632
1633 /* \x24 indicates a hex constant */
79072805 1634 case 'x':
a0ed51b3
LW
1635 ++s;
1636 if (*s == '{') {
a2592645 1637 char* const e = strchr(s, '}');
a4c04bdc
NC
1638 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1639 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1640 STRLEN len;
355860ce 1641
53305cf1 1642 ++s;
adaeee49 1643 if (!e) {
a0ed51b3 1644 yyerror("Missing right brace on \\x{}");
355860ce 1645 continue;
ba210ebe 1646 }
53305cf1
NC
1647 len = e - s;
1648 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1649 s = e + 1;
a0ed51b3
LW
1650 }
1651 else {
ba210ebe 1652 {
53305cf1 1653 STRLEN len = 2;
a4c04bdc 1654 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1655 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1656 s += len;
1657 }
012bcf8d
GS
1658 }
1659
1660 NUM_ESCAPE_INSERT:
1661 /* Insert oct or hex escaped character.
301d3d20 1662 * There will always enough room in sv since such
db42d148 1663 * escapes will be longer than any UTF-8 sequence
301d3d20 1664 * they can end up as. */
ba7cea30 1665
c7f1f016
NIS
1666 /* We need to map to chars to ASCII before doing the tests
1667 to cover EBCDIC
1668 */
c4d5f83a 1669 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1670 if (!has_utf8 && uv > 255) {
301d3d20
JH
1671 /* Might need to recode whatever we have
1672 * accumulated so far if it contains any
1673 * hibit chars.
1674 *
1675 * (Can't we keep track of that and avoid
1676 * this rescan? --jhi)
012bcf8d 1677 */
c7f1f016 1678 int hicount = 0;
63cd0674
NIS
1679 U8 *c;
1680 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1681 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1682 hicount++;
db42d148 1683 }
012bcf8d 1684 }
63cd0674 1685 if (hicount) {
a2592645 1686 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1687 U8 *src, *dst;
1688 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1689 src = (U8 *)d - 1;
1690 dst = src+hicount;
1691 d += hicount;
71a0dd65 1692 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1693 if (!NATIVE_IS_INVARIANT(*src)) {
a2592645 1694 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1695 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1696 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1697 }
1698 else {
63cd0674 1699 *dst-- = *src;
012bcf8d 1700 }
c7f1f016 1701 src--;
012bcf8d
GS
1702 }
1703 }
1704 }
1705
9aa983d2 1706 if (has_utf8 || uv > 255) {
9041c2e3 1707 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1708 has_utf8 = TRUE;
f9a63242
JH
1709 if (PL_lex_inwhat == OP_TRANS &&
1710 PL_sublex_info.sub_op) {
1711 PL_sublex_info.sub_op->op_private |=
1712 (PL_lex_repl ? OPpTRANS_FROM_UTF
1713 : OPpTRANS_TO_UTF);
f9a63242 1714 }
012bcf8d 1715 }
a0ed51b3 1716 else {
012bcf8d 1717 *d++ = (char)uv;
a0ed51b3 1718 }
012bcf8d
GS
1719 }
1720 else {
c4d5f83a 1721 *d++ = (char) uv;
a0ed51b3 1722 }
79072805 1723 continue;
02aa26ce 1724
b239daa5 1725 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1726 case 'N':
55eda711 1727 ++s;
423cee85
JH
1728 if (*s == '{') {
1729 char* e = strchr(s, '}');
155aba94 1730 SV *res;
423cee85 1731 STRLEN len;
71a0dd65 1732 const char *str;
4e553d73 1733
423cee85 1734 if (!e) {
5777a3f7 1735 yyerror("Missing right brace on \\N{}");
423cee85
JH
1736 e = s - 1;
1737 goto cont_scan;
1738 }
dbc0d4f2
JH
1739 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1740 /* \N{U+...} */
1741 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1742 PERL_SCAN_DISALLOW_PREFIX;
1743 s += 3;
1744 len = e - s;
1745 uv = grok_hex(s, &len, &flags, NULL);
1746 s = e + 1;
1747 goto NUM_ESCAPE_INSERT;
1748 }
55eda711 1749 res = newSVpvn(s + 1, e - s - 1);
0e2d6244
SS
1750 res = new_constant( NULL, 0, "charnames",
1751 res, NULL, "\\N{...}" );
f9a63242
JH
1752 if (has_utf8)
1753 sv_utf8_upgrade(res);
71a0dd65 1754 str = SvPV_const(res,len);
1c47067b
JH
1755#ifdef EBCDIC_NEVER_MIND
1756 /* charnames uses pack U and that has been
1757 * recently changed to do the below uni->native
1758 * mapping, so this would be redundant (and wrong,
1759 * the code point would be doubly converted).
1760 * But leave this in just in case the pack U change
1761 * gets revoked, but the semantics is still
1762 * desireable for charnames. --jhi */
cddc7ef4 1763 {
71a0dd65 1764 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1765
1766 if (uv < 0x100) {
a2a469f9 1767 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1768
1769 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1770 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
71a0dd65 1771 str = SvPV_const(res, len);
cddc7ef4
JH
1772 }
1773 }
1774#endif
89491803 1775 if (!has_utf8 && SvUTF8(res)) {
a2592645 1776 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1777 SvCUR_set(sv, d - ostart);
1778 SvPOK_on(sv);
e4f3eed8 1779 *d = '\0';
f08d6ad9 1780 sv_utf8_upgrade(sv);
d2f449dd 1781 /* this just broke our allocation above... */
eb160463 1782 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1783 d = SvPVX(sv) + SvCUR(sv);
89491803 1784 has_utf8 = TRUE;
f08d6ad9 1785 }
eb160463 1786 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
a2592645 1787 const char * const odest = SvPVX_const(sv);
423cee85 1788
8973db79 1789 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1790 d = SvPVX(sv) + (d - odest);
1791 }
1792 Copy(str, d, len, char);
1793 d += len;
1794 SvREFCNT_dec(res);
1795 cont_scan:
1796 s = e + 1;
1797 }
1798 else
5777a3f7 1799 yyerror("Missing braces on \\N{}");
423cee85
JH
1800 continue;
1801
02aa26ce 1802 /* \c is a control character */
79072805
LW
1803 case 'c':
1804 s++;
7223e9d8 1805 if (s < send) {
ba210ebe 1806 U8 c = *s++;
c7f1f016
NIS
1807#ifdef EBCDIC
1808 if (isLOWER(c))
1809 c = toUPPER(c);
1810#endif
db42d148 1811 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1812 }
7223e9d8
JH
1813 else {
1814 yyerror("Missing control char name in \\c");
1815 }
79072805 1816 continue;
02aa26ce
NT
1817
1818 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1819 case 'b':
db42d148 1820 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1821 break;
1822 case 'n':
db42d148 1823 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1824 break;
1825 case 'r':
db42d148 1826 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1827 break;
1828 case 'f':
db42d148 1829 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1830 break;
1831 case 't':
db42d148 1832 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1833 break;
34a3fe2a 1834 case 'e':
db42d148 1835 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1836 break;
1837 case 'a':
db42d148 1838 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1839 break;
02aa26ce
NT
1840 } /* end switch */
1841
79072805
LW
1842 s++;
1843 continue;
02aa26ce 1844 } /* end if (backslash) */
48ea04fa
ST
1845#ifdef EBCDIC
1846 else
1847 literal_endpoint++;
1848#endif
02aa26ce 1849
f9a63242 1850 default_action:
2b9d42f0
NIS
1851 /* If we started with encoded form, or already know we want it
1852 and then encode the next character */
1853 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1854 STRLEN len = 1;
a2592645
NC
1855 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1856 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1857 s += len;
1858 if (need > len) {
1859 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
a2592645 1860 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1861 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1862 }
1863 d = (char*)uvchr_to_utf8((U8*)d, uv);
1864 has_utf8 = TRUE;
1865 }
1866 else {
1867 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1868 }
02aa26ce
NT
1869 } /* while loop to process each character */
1870
1871 /* terminate the string and set up the sv */
79072805 1872 *d = '\0';
5e7e76a3 1873 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1874 if (SvCUR(sv) >= SvLEN(sv))
5b7ea690 1875 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1876
79072805 1877 SvPOK_on(sv);
9f4817db 1878 if (PL_encoding && !has_utf8) {
5b7ea690
JH
1879 sv_recode_to_utf8(sv, PL_encoding);
1880 if (SvUTF8(sv))
1881 has_utf8 = TRUE;
9f4817db 1882 }
2b9d42f0 1883 if (has_utf8) {
7e2040f0 1884 SvUTF8_on(sv);
2b9d42f0 1885 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
5b7ea690 1886 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1887 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1888 }
1889 }
79072805 1890
02aa26ce 1891 /* shrink the sv if we allocated more than we used */
79072805 1892 if (SvCUR(sv) + 5 < SvLEN(sv)) {
ea5389ca 1893 SvPV_shrink_to_cur(sv);
79072805 1894 }
02aa26ce 1895
9b599b2a 1896 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1897 if (s > PL_bufptr) {
1898 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1899 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
0e2d6244 1900 sv, NULL,
4e553d73 1901 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1902 ? "tr"
3280af22 1903 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1904 ? "s"
1905 : "qq")));
79072805 1906 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1907 } else
8990e307 1908 SvREFCNT_dec(sv);
79072805
LW
1909 return s;
1910}
1911
ffb4593c
NT
1912/* S_intuit_more
1913 * Returns TRUE if there's more to the expression (e.g., a subscript),
1914 * FALSE otherwise.
ffb4593c
NT
1915 *
1916 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1917 *
1918 * ->[ and ->{ return TRUE
1919 * { and [ outside a pattern are always subscripts, so return TRUE
1920 * if we're outside a pattern and it's not { or [, then return FALSE
1921 * if we're in a pattern and the first char is a {
1922 * {4,5} (any digits around the comma) returns FALSE
1923 * if we're in a pattern and the first char is a [
1924 * [] returns FALSE
1925 * [SOMETHING] has a funky algorithm to decide whether it's a
1926 * character class or not. It has to deal with things like
1927 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1928 * anything else returns TRUE
1929 */
1930
9cbb5ea2
GS
1931/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1932
76e3520e 1933STATIC int
cea2e8a9 1934S_intuit_more(pTHX_ register char *s)
79072805 1935{
3280af22 1936 if (PL_lex_brackets)
79072805
LW
1937 return TRUE;
1938 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1939 return TRUE;
1940 if (*s != '{' && *s != '[')
1941 return FALSE;
3280af22 1942 if (!PL_lex_inpat)
79072805
LW
1943 return TRUE;
1944
1945 /* In a pattern, so maybe we have {n,m}. */
1946 if (*s == '{') {
1947 s++;
1948 if (!isDIGIT(*s))
1949 return TRUE;
1950 while (isDIGIT(*s))
1951 s++;
1952 if (*s == ',')
1953 s++;
1954 while (isDIGIT(*s))
1955 s++;
1956 if (*s == '}')
1957 return FALSE;
1958 return TRUE;
1959
1960 }
1961
1962 /* On the other hand, maybe we have a character class */
1963
1964 s++;
1965 if (*s == ']' || *s == '^')
1966 return FALSE;
1967 else {
ffb4593c 1968 /* this is terrifying, and it works */
79072805
LW
1969 int weight = 2; /* let's weigh the evidence */
1970 char seen[256];
f27ffc4a 1971 unsigned char un_char = 255, last_un_char;
a2592645 1972 const char * const send = strchr(s,']');
3280af22 1973 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1974
1975 if (!send) /* has to be an expression */
1976 return TRUE;
1977
1978 Zero(seen,256,char);
1979 if (*s == '$')
1980 weight -= 3;
1981 else if (isDIGIT(*s)) {
1982 if (s[1] != ']') {
1983 if (isDIGIT(s[1]) && s[2] == ']')
1984 weight -= 10;
1985 }
1986 else
1987 weight -= 100;
1988 }
1989 for (; s < send; s++) {
1990 last_un_char = un_char;
1991 un_char = (unsigned char)*s;
1992 switch (*s) {
1993 case '@':
1994 case '&':
1995 case '$':
1996 weight -= seen[un_char] * 10;
7e2040f0 1997 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1998 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
057b822e
NC
1999 if ((int)strlen(tmpbuf) > 1
2000 && gv_fetchpv(tmpbuf, 0, SVt_PV))
79072805
LW
2001 weight -= 100;
2002 else
2003 weight -= 10;
2004 }
2005 else if (*s == '$' && s[1] &&
93a17b20
LW
2006 strchr("[#!%*<>()-=",s[1])) {
2007 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2008 weight -= 10;
2009 else
2010 weight -= 1;
2011 }
2012 break;
2013 case '\\':
2014 un_char = 254;
2015 if (s[1]) {
93a17b20 2016 if (strchr("wds]",s[1]))
79072805
LW
2017 weight += 100;
2018 else if (seen['\''] || seen['"'])
2019 weight += 1;
93a17b20 2020 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2021 weight += 40;
2022 else if (isDIGIT(s[1])) {
2023 weight += 40;
2024 while (s[1] && isDIGIT(s[1]))
2025 s++;
2026 }
2027 }
2028 else
2029 weight += 100;
2030 break;
2031 case '-':
2032 if (s[1] == '\\')
2033 weight += 50;
93a17b20 2034 if (strchr("aA01! ",last_un_char))
79072805 2035 weight += 30;
93a17b20 2036 if (strchr("zZ79~",s[1]))
79072805 2037 weight += 30;
f27ffc4a
GS
2038 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2039 weight -= 5; /* cope with negative subscript */
79072805
LW
2040 break;
2041 default:
3c71ae1e
NC
2042 if (!isALNUM(last_un_char)
2043 && !(last_un_char == '$' || last_un_char == '@'
2044 || last_un_char == '&')
2045 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2046 char *d = tmpbuf;
2047 while (isALPHA(*s))
2048 *d++ = *s++;
2049 *d = '\0';
2050 if (keyword(tmpbuf, d - tmpbuf))
2051 weight -= 150;
2052 }
2053 if (un_char == last_un_char + 1)
2054 weight += 5;
2055 weight -= seen[un_char];
2056 break;
2057 }
2058 seen[un_char]++;
2059 }
2060 if (weight >= 0) /* probably a character class */
2061 return FALSE;
2062 }
2063
2064 return TRUE;
2065}
ffed7fef 2066
ffb4593c
NT
2067/*
2068 * S_intuit_method
2069 *
2070 * Does all the checking to disambiguate
2071 * foo bar
2072 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2073 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2074 *
2075 * First argument is the stuff after the first token, e.g. "bar".
2076 *
2077 * Not a method if bar is a filehandle.
2078 * Not a method if foo is a subroutine prototyped to take a filehandle.
2079 * Not a method if it's really "Foo $bar"
2080 * Method if it's "foo $bar"
2081 * Not a method if it's really "print foo $bar"
2082 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2083 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2084 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2085 * =>
2086 */
2087
76e3520e 2088STATIC int
a92f283d 2089S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e
LW
2090{
2091 char *s = start + (*start == '$');
3280af22 2092 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2093 STRLEN len;
2094 GV* indirgv;
2095
2096 if (gv) {
a92f283d 2097 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2098 return 0;
a92f283d
NC
2099 if (cv) {
2100 if (SvPOK(cv)) {
2101 const char *proto = SvPVX_const(cv);
2102 if (proto) {
2103 if (*proto == ';')
2104 proto++;
2105 if (*proto == '*')
2106 return 0;
2107 }
b6c543e3
IZ
2108 }
2109 } else
a0d0e21e
LW
2110 gv = 0;
2111 }
8903cb82 2112 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2113 /* start is the beginning of the possible filehandle/object,
2114 * and s is the end of it
2115 * tmpbuf is a copy of it
2116 */
2117
a0d0e21e 2118 if (*start == '$') {
3280af22 2119 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2120 return 0;
2121 s = skipspace(s);
3280af22
NIS
2122 PL_bufptr = start;
2123 PL_expect = XREF;
a0d0e21e
LW
2124 return *s == '(' ? FUNCMETH : METHOD;
2125 }
2126 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2127 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2128 len -= 2;
2129 tmpbuf[len] = '\0';
2130 goto bare_package;
2131 }
057b822e 2132 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
8ebc5c01 2133 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2134 return 0;
2135 /* filehandle or package name makes it a method */
89bfa8cd 2136 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2137 s = skipspace(s);
3280af22 2138 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2139 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2140 bare_package:
3280af22 2141 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2142 newSVpvn(tmpbuf,len));
3280af22
NIS
2143 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2144 PL_expect = XTERM;
a0d0e21e 2145 force_next(WORD);
3280af22 2146 PL_bufptr = s;
a0d0e21e
LW
2147 return *s == '(' ? FUNCMETH : METHOD;
2148 }
2149 }
2150 return 0;
2151}
2152
ffb4593c
NT
2153/*
2154 * S_incl_perldb
2155 * Return a string of Perl code to load the debugger. If PERL5DB
2156 * is set, it will return the contents of that, otherwise a
2157 * compile-time require of perl5db.pl.
2158 */
2159
a00f3e00 2160STATIC const char*
cea2e8a9 2161S_incl_perldb(pTHX)
a0d0e21e 2162{
3280af22 2163 if (PL_perldb) {
a2592645 2164 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2165
2166 if (pdb)
2167 return pdb;
5b7ea690 2168 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2169 return "BEGIN { require 'perl5db.pl' }";
2170 }
2171 return "";
2172}
2173
2174
16d20bd9 2175/* Encoded script support. filter_add() effectively inserts a
4e553d73 2176 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2177 * Note that the filter function only applies to the current source file
2178 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2179 *
2180 * The datasv parameter (which may be NULL) can be used to pass
2181 * private data to this instance of the filter. The filter function
2182 * can recover the SV using the FILTER_DATA macro and use it to
2183 * store private buffers and state information.
2184 *
2185 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2186 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2187 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2188 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2189 * private use must be set using malloc'd pointers.
2190 */
16d20bd9
AD
2191
2192SV *
864dbfa3 2193Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2194{
f4c556ac 2195 if (!funcp)
0e2d6244 2196 return NULL;
f4c556ac 2197
3280af22
NIS
2198 if (!PL_rsfp_filters)
2199 PL_rsfp_filters = newAV();
16d20bd9 2200 if (!datasv)
133cdda0 2201 datasv = newSV(0);
d2e2df35 2202 (void)SvUPGRADE(datasv, SVt_PVIO);
1c7ab622 2203 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2204 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2205 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1c7ab622 2206 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2207 av_unshift(PL_rsfp_filters, 1);
2208 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2209 return(datasv);
2210}
4e553d73 2211
16d20bd9
AD
2212
2213/* Delete most recently added instance of this filter function. */
a0d0e21e 2214void
864dbfa3 2215Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2216{
e0c19803 2217 SV *datasv;
cf011bf2 2218
1c7ab622
NC
2219#ifdef DEBUGGING
2220 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2221#endif
3280af22 2222 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2223 return;
2224 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2225 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1c7ab622 2226 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2227 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2228 IoANY(datasv) = (void *)NULL;
3280af22 2229 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2230
16d20bd9
AD
2231 return;
2232 }
2233 /* we need to search for the correct entry and clear it */
cea2e8a9 2234 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2235}
2236
2237
6dc97886
NC
2238/* Invoke the idxth filter function for the current rsfp. */
2239/* maxlen 0 = read one text line */
16d20bd9 2240I32
864dbfa3 2241Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2242{
16d20bd9
AD
2243 filter_t funcp;
2244 SV *datasv = NULL;
e50aee73 2245
3280af22 2246 if (!PL_rsfp_filters)
16d20bd9 2247 return -1;
6dc97886 2248 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2249 /* Provide a default input filter to make life easy. */
2250 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2251 DEBUG_P(PerlIO_printf(Perl_debug_log,
2252 "filter_read %d: from rsfp\n", idx));
4e553d73 2253 if (maxlen) {
16d20bd9
AD
2254 /* Want a block */
2255 int len ;
24c2fff4 2256 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2257
2258 /* ensure buf_sv is large enough */
eb160463 2259 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2260 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2261 if (PerlIO_error(PL_rsfp))
37120919
AD
2262 return -1; /* error */
2263 else
2264 return 0 ; /* end of file */
2265 }
16d20bd9
AD
2266 SvCUR_set(buf_sv, old_len + len) ;
2267 } else {
2268 /* Want a line */
3280af22
NIS
2269 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2270 if (PerlIO_error(PL_rsfp))
37120919
AD
2271 return -1; /* error */
2272 else
2273 return 0 ; /* end of file */
2274 }
16d20bd9
AD
2275 }
2276 return SvCUR(buf_sv);
2277 }
2278 /* Skip this filter slot if filter has been deleted */
6dc97886 2279 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2280 DEBUG_P(PerlIO_printf(Perl_debug_log,
2281 "filter_read %d: skipped (filter deleted)\n",
2282 idx));
16d20bd9
AD
2283 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2284 }
2285 /* Get function pointer hidden within datasv */
1c7ab622 2286 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2287 DEBUG_P(PerlIO_printf(Perl_debug_log,
2288 "filter_read %d: via function %p (%s)\n",
71a0dd65 2289 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2290 /* Call function. The function is expected to */
2291 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2292 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2293 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2294}
2295
76e3520e 2296STATIC char *
cea2e8a9 2297S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2298{
c39cd008 2299#ifdef PERL_CR_FILTER
3280af22 2300 if (!PL_rsfp_filters) {
c39cd008 2301 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2302 }
2303#endif
3280af22 2304 if (PL_rsfp_filters) {
55497cff
PP
2305 if (!append)
2306 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2307 if (FILTER_READ(0, sv, 0) > 0)
2308 return ( SvPVX(sv) ) ;
2309 else
0e2d6244 2310 return NULL ;
16d20bd9 2311 }
9d116dd7 2312 else
fd049845 2313 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2314}
2315
01ec43d0 2316STATIC HV *
1f49be52 2317S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2318{
2319 GV *gv;
2320
01ec43d0 2321 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2322 return PL_curstash;
2323
2324 if (len > 2 &&
2325 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
057b822e 2326 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
01ec43d0
GS
2327 {
2328 return GvHV(gv); /* Foo:: */
def3634b
GS
2329 }
2330
2331 /* use constant CLASS => 'MyClass' */
057b822e 2332 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
def3634b
GS
2333 SV *sv;
2334 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
c06c673c 2335 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2336 }
2337 }
2338
2339 return gv_stashpv(pkgname, FALSE);
2340}
a0d0e21e 2341
d9e8c084
NC
2342STATIC char *
2343S_tokenize_use(pTHX_ int is_use, char *s) {
2344 if (PL_expect != XSTATE)
2345 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2346 is_use ? "use" : "no"));
2347 s = skipspace(s);
2348 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2349 s = force_version(s, TRUE);
2350 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2351 PL_nextval[PL_nexttoke].opval = Nullop;
2352 force_next(WORD);
2353 }
2354 else if (*s == 'v') {
2355 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2356 s = force_version(s, FALSE);
2357 }
2358 }
2359 else {
2360 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2361 s = force_version(s, FALSE);
2362 }
2363 yylval.ival = is_use;
2364 return s;
2365}
748a9306 2366#ifdef DEBUGGING
fe20fd30 2367 static const char* const exp_name[] =
09bef843
SB
2368 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2369 "ATTRTERM", "TERMBLOCK"
2370 };
748a9306 2371#endif
463ee0b2 2372
02aa26ce
NT
2373/*
2374 yylex
2375
2376 Works out what to call the token just pulled out of the input
2377 stream. The yacc parser takes care of taking the ops we return and
2378 stitching them into a tree.
2379
2380 Returns:
2381 PRIVATEREF
2382
2383 Structure:
2384 if read an identifier
2385 if we're in a my declaration
2386 croak if they tried to say my($foo::bar)
2387 build the ops for a my() declaration
2388 if it's an access to a my() variable
2389 are we in a sort block?
2390 croak if my($a); $a <=> $b
2391 build ops for access to a my() variable
2392 if in a dq string, and they've said @foo and we can't find @foo
2393 croak
2394 build ops for a bareword
2395 if we already built the token before, use it.
2396*/
2397
dba4d153 2398#ifdef USE_PURE_BISON
864dbfa3 2399int
dba4d153 2400Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2401{
20141f0e
RI
2402 int r;
2403
6f202aea 2404 yyactlevel++;
20141f0e
RI
2405 yylval_pointer[yyactlevel] = lvalp;
2406 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2407 if (yyactlevel >= YYMAXLEVEL)
2408 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2409
dba4d153 2410 r = Perl_yylex(aTHX);
20141f0e 2411
d8ae6756
RI
2412 if (yyactlevel > 0)
2413 yyactlevel--;
20141f0e
RI
2414
2415 return r;
2416}
dba4d153 2417#endif
20141f0e 2418
dba4d153
JH
2419#ifdef __SC__
2420#pragma segment Perl_yylex
2421#endif
dba4d153 2422int
dba4d153 2423Perl_yylex(pTHX)
20141f0e 2424{
c870331e 2425 register char *s = PL_bufptr;
378cc40b 2426 register char *d;
463ee0b2 2427 STRLEN len;
aa7440fb 2428 bool bof = FALSE;
a687059c 2429
f31bfe61 2430 DEBUG_T( {
d7559646 2431 SV* tmp = newSVpvs("");
3f3939a9
NC
2432 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2433 (IV)CopLINE(PL_curcop),
2434 lex_state_names[PL_lex_state],
2435 exp_name[PL_expect],
2436 pv_display(tmp, s, strlen(s), 0, 60));
2437 SvREFCNT_dec(tmp);
f31bfe61 2438 } );
02aa26ce 2439 /* check if there's an identifier for us to look at */
ba979b31 2440 if (PL_pending_ident)
f31bfe61 2441 return REPORT(S_pending_ident(aTHX));
bbce6d69 2442
02aa26ce
NT
2443 /* no identifier pending identification */
2444
3280af22 2445 switch (PL_lex_state) {
79072805
LW
2446#ifdef COMMENTARY
2447 case LEX_NORMAL: /* Some compilers will produce faster */
2448 case LEX_INTERPNORMAL: /* code if we comment these out. */
2449 break;
2450#endif
2451
09bef843 2452 /* when we've already built the next token, just pull it out of the queue */
79072805 2453 case LEX_KNOWNEXT:
3280af22
NIS
2454 PL_nexttoke--;
2455 yylval = PL_nextval[PL_nexttoke];
2456 if (!PL_nexttoke) {
2457 PL_lex_state = PL_lex_defer;
2458 PL_expect = PL_lex_expect;
2459 PL_lex_defer = LEX_NORMAL;
463ee0b2 2460 }
f31bfe61 2461 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2462
02aa26ce 2463 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2464 when we get here, PL_bufptr is at the \
02aa26ce 2465 */
79072805
LW
2466 case LEX_INTERPCASEMOD:
2467#ifdef DEBUGGING
3280af22 2468 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2469 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2470#endif
02aa26ce 2471 /* handle \E or end of string */
3280af22 2472 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2473 /* if at a \E */
3280af22 2474 if (PL_lex_casemods) {
24c2fff4 2475 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2476 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2477
3c71ae1e
NC
2478 if (PL_bufptr != PL_bufend
2479 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2480 PL_bufptr += 2;
2481 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2482 }
f31bfe61 2483 return REPORT(')');
79072805 2484 }
3280af22
NIS
2485 if (PL_bufptr != PL_bufend)
2486 PL_bufptr += 2;
2487 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2488 return yylex();
79072805
LW
2489 }
2490 else {
607df283 2491 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3f3939a9 2492 "### Saw case modifier\n"); });
3280af22 2493 s = PL_bufptr + 1;
2ea5368e
JH
2494 if (s[1] == '\\' && s[2] == 'E') {
2495 PL_bufptr = s + 3;
2496 PL_lex_state = LEX_INTERPCONCAT;
2497 return yylex();
a0d0e21e 2498 }
2ea5368e 2499 else {
0211776e 2500 I32 tmp;
2ea5368e
JH
2501 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2502 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3c71ae1e 2503 if ((*s == 'L' || *s == 'U') &&
2ea5368e
JH
2504 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2505 PL_lex_casestack[--PL_lex_casemods] = '\0';
f31bfe61 2506 return REPORT(')');
2ea5368e
JH
2507 }
2508 if (PL_lex_casemods > 10)
2509 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2510 PL_lex_casestack[PL_lex_casemods++] = *s;
2511 PL_lex_casestack[PL_lex_casemods] = '\0';
2512 PL_lex_state = LEX_INTERPCONCAT;
2513 PL_nextval[PL_nexttoke].ival = 0;
2514 force_next('(');
2515 if (*s == 'l')
2516 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2517 else if (*s == 'u')
2518 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2519 else if (*s == 'L')
2520 PL_nextval[PL_nexttoke].ival = OP_LC;
2521 else if (*s == 'U')
2522 PL_nextval[PL_nexttoke].ival = OP_UC;
2523 else if (*s == 'Q')
2524 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2525 else
2526 Perl_croak(aTHX_ "panic: yylex");
2527 PL_bufptr = s + 1;
a0d0e21e 2528 }
79072805 2529 force_next(FUNC);
3280af22
NIS
2530 if (PL_lex_starts) {
2531 s = PL_bufptr;
2532 PL_lex_starts = 0;
79072805
LW
2533 Aop(OP_CONCAT);
2534 }
2535 else
cea2e8a9 2536 return yylex();
79072805
LW
2537 }
2538
55497cff 2539 case LEX_INTERPPUSH:
f31bfe61 2540 return REPORT(sublex_push());
55497cff 2541
79072805 2542 case LEX_INTERPSTART:
3280af22 2543 if (PL_bufptr == PL_bufend)
f31bfe61 2544 return REPORT(sublex_done());
607df283 2545 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3f3939a9 2546 "### Interpolated variable\n"); });
3280af22
NIS
2547 PL_expect = XTERM;
2548 PL_lex_dojoin = (*PL_bufptr == '@');
2549 PL_lex_state = LEX_INTERPNORMAL;
2550 if (PL_lex_dojoin) {
2551 PL_nextval[PL_nexttoke].ival = 0;
79072805 2552 force_next(',');
4d1ff10f 2553#ifdef USE_5005THREADS
533c011a
NIS
2554 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2555 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2556 force_next(PRIVATEREF);
2557#else
a0d0e21e 2558 force_ident("\"", '$');
4d1ff10f 2559#endif /* USE_5005THREADS */
3280af22 2560 PL_nextval[PL_nexttoke].ival = 0;
79072805 2561 force_next('$');
3280af22 2562 PL_nextval[PL_nexttoke].ival = 0;
79072805 2563 force_next('(');
3280af22 2564 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2565 force_next(FUNC);
2566 }
3280af22
NIS
2567 if (PL_lex_starts++) {
2568 s = PL_bufptr;
79072805
LW
2569 Aop(OP_CONCAT);
2570 }
cea2e8a9 2571 return yylex();
79072805
LW
2572
2573 case LEX_INTERPENDMAYBE:
3280af22
NIS
2574 if (intuit_more(PL_bufptr)) {
2575 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2576 break;
2577 }
2578 /* FALL THROUGH */
2579
2580 case LEX_INTERPEND:
3280af22
NIS
2581 if (PL_lex_dojoin) {
2582 PL_lex_dojoin = FALSE;
2583 PL_lex_state = LEX_INTERPCONCAT;
f31bfe61 2584 return REPORT(')');
79072805 2585 }
43a16006 2586 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2587 && SvEVALED(PL_lex_repl))
43a16006 2588 {
e9fa98b2 2589 if (PL_bufptr != PL_bufend)
cea2e8a9 2590 Perl_croak(aTHX_ "Bad evalled substitution pattern");
0e2d6244 2591 PL_lex_repl = NULL;
e9fa98b2 2592 }
79072805
LW
2593 /* FALLTHROUGH */
2594 case LEX_INTERPCONCAT:
2595#ifdef DEBUGGING
3280af22 2596 if (PL_lex_brackets)
cea2e8a9 2597 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2598#endif
3280af22 2599 if (PL_bufptr == PL_bufend)
f31bfe61 2600 return REPORT(sublex_done());
79072805 2601
3280af22
NIS
2602 if (SvIVX(PL_linestr) == '\'') {
2603 SV *sv = newSVsv(PL_linestr);
2604 if (!PL_lex_inpat)
76e3520e 2605 sv = tokeq(sv);
3280af22 2606 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2607 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2608 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2609 s = PL_bufend;
79072805
LW
2610 }
2611 else {
3280af22 2612 s = scan_const(PL_bufptr);
79072805 2613 if (*s == '\\')
3280af22 2614 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2615 else
3280af22 2616 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2617 }
2618
3280af22
NIS
2619 if (s != PL_bufptr) {
2620 PL_nextval[PL_nexttoke] = yylval;
2621 PL_expect = XTERM;
79072805 2622 force_next(THING);
3280af22 2623 if (PL_lex_starts++)
79072805
LW
2624 Aop(OP_CONCAT);
2625 else {
3280af22 2626 PL_bufptr = s;
cea2e8a9 2627 return yylex();
79072805
LW
2628 }
2629 }
2630
cea2e8a9 2631 return yylex();
a0d0e21e 2632 case LEX_FORMLINE:
3280af22
NIS
2633 PL_lex_state = LEX_NORMAL;
2634 s = scan_formline(PL_bufptr);
2635 if (!PL_lex_formbrack)
a0d0e21e
LW
2636 goto rightbracket;
2637 OPERATOR(';');
79072805
LW
2638 }
2639
3280af22
NIS
2640 s = PL_bufptr;
2641 PL_oldoldbufptr = PL_oldbufptr;
2642 PL_oldbufptr = s;
463ee0b2
LW
2643
2644 retry:
378cc40b
LW
2645 switch (*s) {
2646 default:
7e2040f0 2647 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2648 goto keylookup;
cea2e8a9 2649 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2650 case 4:
2651 case 26:
2652 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2653 case 0:
3280af22
NIS
2654 if (!PL_rsfp) {
2655 PL_last_uni = 0;
2656 PL_last_lop = 0;
9dea3e30 2657 if (PL_lex_brackets) {
8c89da26
AL
2658 yyerror(PL_lex_formbrack
2659 ? "Format not terminated"
2660 : "Missing right curly or square bracket");
9dea3e30 2661 }
4e553d73 2662 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2663 "### Tokener got EOF\n");
5f80b19c 2664 } );
79072805 2665 TOKEN(0);
463ee0b2 2666 }
3280af22 2667 if (s++ < PL_bufend)
a687059c 2668 goto retry; /* ignore stray nulls */
3280af22
NIS
2669 PL_last_uni = 0;
2670 PL_last_lop = 0;
2671 if (!PL_in_eval && !PL_preambled) {
2672 PL_preambled = TRUE;
2673 sv_setpv(PL_linestr,incl_perldb());
2674 if (SvCUR(PL_linestr))
d7559646 2675 sv_catpvs(PL_linestr,";");
3280af22
NIS
2676 if (PL_preambleav){
2677 while(AvFILLp(PL_preambleav) >= 0) {
2678 SV *tmpsv = av_shift(PL_preambleav);
2679 sv_catsv(PL_linestr, tmpsv);
d7559646 2680 sv_catpvs(PL_linestr, ";");
91b7def8
PP
2681 sv_free(tmpsv);
2682 }
3280af22
NIS
2683 sv_free((SV*)PL_preambleav);
2684 PL_preambleav = NULL;
91b7def8 2685 }
3280af22 2686 if (PL_minus_n || PL_minus_p) {
d7559646 2687 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2688 if (PL_minus_l)
d7559646 2689 sv_catpvs(PL_linestr,"chomp;");
3280af22 2690 if (PL_minus_a) {
3280af22 2691 if (PL_minus_F) {
3c71ae1e
NC
2692 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2693 || *PL_splitstr == '"')
3280af22 2694 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2695 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2696 else {
6155262a
NC
2697 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2698 bytes can be used as quoting characters. :-) */
0473add9 2699 const char *splits = PL_splitstr;
d7559646 2700 sv_catpvs(PL_linestr, "our @F=split(q\0");
6155262a
NC
2701 do {
2702 /* Need to \ \s */
0473add9
AL
2703 if (*splits == '\\')
2704 sv_catpvn(PL_linestr, splits, 1);
2705 sv_catpvn(PL_linestr, splits, 1);
2706 } while (*splits++);
6155262a
NC
2707 /* This loop will embed the trailing NUL of
2708 PL_linestr as the last thing it does before
2709 terminating. */
d7559646 2710 sv_catpvs(PL_linestr, ");");
54310121 2711 }
2304df62
AD
2712 }
2713 else
d7559646 2714 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2715 }
79072805 2716 }
d7559646 2717 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2718 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2719 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
0e2d6244 2720 PL_last_lop = PL_last_uni = NULL;
3280af22 2721 if (PERLDB_LINE && PL_curstash != PL_debstash) {
133cdda0 2722 SV * const sv = newSV(0);
a0d0e21e
LW
2723
2724 sv_upgrade(sv, SVt_PVMG);
3280af22 2725 sv_setsv(sv,PL_linestr);
0ac0412a 2726 (void)SvIOK_on(sv);
0da6cfda 2727 SvIV_set(sv, 0);
22d66770 2728 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2729 }
79072805 2730 goto retry;
a687059c 2731 }
e929a76b 2732 do {
aa7440fb 2733 bof = PL_rsfp ? TRUE : FALSE;
0e2d6244 2734 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af
JH
2735 fake_eof:
2736 if (PL_rsfp) {
2737 if (PL_preprocess && !PL_in_eval)
2738 (void)PerlProc_pclose(PL_rsfp);
2739 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2740 PerlIO_clearerr(PL_rsfp);
2741 else
2742 (void)PerlIO_close(PL_rsfp);
0e2d6244 2743 PL_rsfp = NULL;
7e28d3af
JH
2744 PL_doextract = FALSE;
2745 }
2746 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
29652248
NC
2747 sv_setpv(PL_linestr,PL_minus_p
2748 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2749 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2750 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
0e2d6244 2751 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
2752 PL_minus_n = PL_minus_p = 0;
2753 goto retry;
2754 }
2755 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
0e2d6244 2756 PL_last_lop = PL_last_uni = NULL;
2a8de9e2 2757 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2758 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2759 }
92a78a40
JH
2760 /* If it looks like the start of a BOM or raw UTF-16,
2761 * check if it in fact is. */
2762 else if (bof &&
2763 (*s == 0 ||
2764 *(U8*)s == 0xEF ||
2765 *(U8*)s >= 0xFE ||
2766 s[1] == 0)) {
226017aa 2767#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2768# ifdef __GNU_LIBRARY__
2769# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2770# define FTELL_FOR_PIPE_IS_BROKEN
2771# endif
e3f494f1
JH
2772# else
2773# ifdef __GLIBC__
2774# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2775# define FTELL_FOR_PIPE_IS_BROKEN
2776# endif
2777# endif
226017aa
DD
2778# endif
2779#endif
2780#ifdef FTELL_FOR_PIPE_IS_BROKEN
2781 /* This loses the possibility to detect the bof
2782 * situation on perl -P when the libc5 is being used.
2783 * Workaround? Maybe attach some extra state to PL_rsfp?
2784 */
2785 if (!PL_preprocess)
7e28d3af 2786 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2787#else
eb160463 2788 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2789#endif
7e28d3af 2790 if (bof) {
3280af22 2791 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2792 s = swallow_bom((U8*)s);
e929a76b 2793 }
378cc40b 2794 }
3280af22 2795 if (PL_doextract) {
a0d0e21e
LW
2796 /* Incest with pod. */
2797 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2a8de9e2 2798 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2799 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2800 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
0e2d6244 2801 PL_last_lop = PL_last_uni = NULL;
3280af22 2802 PL_doextract = FALSE;
a0d0e21e 2803 }
4e553d73 2804 }
463ee0b2 2805 incline(s);
3280af22
NIS
2806 } while (PL_doextract);
2807 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2808 if (PERLDB_LINE && PL_curstash != PL_debstash) {
133cdda0 2809 SV * const sv = newSV(0);
a687059c 2810
93a17b20 2811 sv_upgrade(sv, SVt_PVMG);
3280af22 2812 sv_setsv(sv,PL_linestr);
0ac0412a 2813 (void)SvIOK_on(sv);
0da6cfda 2814 SvIV_set(sv, 0);
22d66770 2815 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2816 }
3280af22 2817 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
0e2d6244 2818 PL_last_lop = PL_last_uni = NULL;
57843af0 2819 if (CopLINE(PL_curcop) == 1) {
3280af22 2820 while (s < PL_bufend && isSPACE(*s))
79072805 2821 s++;
a0d0e21e 2822 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2823 s++;
0e2d6244 2824 d = NULL;
3280af22 2825 if (!PL_in_eval) {
44a8e56a
PP
2826 if (*s == '#' && *(s+1) == '!')
2827 d = s + 2;
2828#ifdef ALTERNATE_SHEBANG
2829 else {
a00f3e00 2830 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2831 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2832 d = s + (sizeof(as) - 1);
2833 }
2834#endif /* ALTERNATE_SHEBANG */
2835 }
2836 if (d) {
b8378b72 2837 char *ipath;
774d564b 2838 char *ipathend;
b8378b72 2839
774d564b 2840 while (isSPACE(*d))
b8378b72
CS
2841 d++;
2842 ipath = d;
774d564b
PP
2843 while (*d && !isSPACE(*d))
2844 d++;
2845 ipathend = d;
2846
2847#ifdef ARG_ZERO_IS_SCRIPT
2848 if (ipathend > ipath) {
2849 /*
2850 * HP-UX (at least) sets argv[0] to the script name,
2851 * which makes $^X incorrect. And Digital UNIX and Linux,
2852 * at least, set argv[0] to the basename of the Perl
2853 * interpreter. So, having found "#!", we'll set it right.
2854 */
b977d03a
NC
2855 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
2856 SVt_PV)); /* $^X */
774d564b 2857 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2858 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2859 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2860 SvSETMAGIC(x);
2861 }
556c1dec
JH
2862 else {
2863 STRLEN blen;
2864 STRLEN llen;
71a0dd65 2865 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
a2592645 2866 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2867 if (llen < blen) {
2868 bstart += blen - llen;
2869 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2870 sv_setpvn(x, ipath, ipathend - ipath);
2871 SvSETMAGIC(x);
2872 }
2873 }
2874 }
774d564b 2875 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2876 }
774d564b 2877#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2878
2879 /*
2880 * Look for options.
2881 */
748a9306 2882 d = instr(s,"perl -");
84e30d1a 2883 if (!d) {
748a9306 2884 d = instr(s,"perl");
84e30d1a
GS
2885#if defined(DOSISH)
2886 /* avoid getting into infinite loops when shebang
2887 * line contains "Perl" rather than "perl" */
2888 if (!d) {
2889 for (d = ipathend-4; d >= ipath; --d) {
2890 if ((*d == 'p' || *d == 'P')
2891 && !ibcmp(d, "perl", 4))
2892 {
2893 break;
2894 }
2895 }
2896 if (d < ipath)
0e2d6244 2897 d = NULL;
84e30d1a
GS
2898 }
2899#endif
2900 }
44a8e56a
PP
2901#ifdef ALTERNATE_SHEBANG
2902 /*
2903 * If the ALTERNATE_SHEBANG on this system starts with a
2904 * character that can be part of a Perl expression, then if
2905 * we see it but not "perl", we're probably looking at the
2906 * start of Perl code, not a request to hand off to some
2907 * other interpreter. Similarly, if "perl" is there, but
2908 * not in the first 'word' of the line, we assume the line
2909 * contains the start of the Perl program.
44a8e56a
PP
2910 */
2911 if (d && *s != '#') {
24c2fff4 2912 const char *c = ipath;
44a8e56a
PP
2913 while (*c && !strchr("; \t\r\n\f\v#", *c))
2914 c++;
2915 if (c < d)
0e2d6244 2916 d = NULL; /* "perl" not in first word; ignore */
44a8e56a
PP
2917 else
2918 *s = '#'; /* Don't try to parse shebang line */
2919 }
774d564b 2920#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2921#ifndef MACOS_TRADITIONAL
748a9306 2922 if (!d &&
44a8e56a 2923 *s == '#' &&
774d564b 2924 ipathend > ipath &&
3280af22 2925 !PL_minus_c &&
748a9306 2926 !instr(s,"indir") &&
3280af22 2927 instr(PL_origargv[0],"perl"))
748a9306 2928 {
fe20fd30 2929 char **newargv;
9f68db38 2930
774d564b
PP
2931 *ipathend = '\0';
2932 s = ipathend + 1;
3280af22 2933 while (s < PL_bufend && isSPACE(*s))
9f68db38 2934 s++;
3280af22 2935 if (s < PL_bufend) {
cd7a8267 2936 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2937 newargv[1] = s;
3280af22 2938 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2939 s++;
2940 *s = '\0';
3280af22 2941 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2942 }
2943 else
3280af22 2944 newargv = PL_origargv;
774d564b 2945 newargv[0] = ipath;
629185f5 2946 PERL_FPU_PRE_EXEC
b4748376 2947 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
629185f5 2948 PERL_FPU_POST_EXEC
cea2e8a9 2949 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2950 }
bf4acbe4 2951#endif
748a9306 2952 if (d) {
748a9306 2953 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2954 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2955
2956 if (*d++ == '-') {
24c2fff4 2957 const bool switches_done = PL_doswitches;
145ab0cc
NC
2958 const U32 oldpdb = PL_perldb;
2959 const bool oldn = PL_minus_n;
2960 const bool oldp = PL_minus_p;
2961
8cc95fdb 2962 do {
7f8d406d 2963 if (*d == 'M' || *d == 'm') {
a2592645 2964 const char * const m = d;
8cc95fdb 2965 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2966 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2967 (int)(d - m), m);
2968 }
2969 d = moreswitches(d);
2970 } while (d);
f824e39a
JH
2971 if (PL_doswitches && !switches_done) {
2972 int argc = PL_origargc;
2973 char **argv = PL_origargv;
2974 do {
2975 argc--,argv++;
2976 } while (argc && argv[0][0] == '-' && argv[0][1]);
2977 init_argv_symbols(argc,argv);
2978 }
155aba94
GS
2979 if ((PERLDB_LINE && !oldpdb) ||
2980 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2981 /* if we have already added "LINE: while (<>) {",
2982 we must not do it again */
748a9306 2983 {
2a8de9e2 2984 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2985 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2986 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
0e2d6244 2987 PL_last_lop = PL_last_uni = NULL;
3280af22 2988 PL_preambled = FALSE;
84902520 2989 if (PERLDB_LINE)
3280af22 2990 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2991 goto retry;
2992 }
a0d0e21e 2993 }
79072805 2994 }
9f68db38 2995 }
79072805 2996 }
3280af22
NIS
2997 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2998 PL_bufptr = s;
2999 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3000 return yylex();
ae986130 3001 }
378cc40b 3002 goto retry;
4fdae800 3003 case '\r':
6a27c188 3004#ifdef PERL_STRICT_CR
cea2e8a9 3005 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3006 Perl_croak(aTHX_
cc507455 3007 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3008#endif
4fdae800 3009 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3010#ifdef MACOS_TRADITIONAL
3011 case '\312':
3012#endif
378cc40b
LW
3013 s++;
3014 goto retry;
378cc40b 3015 case '#':
e929a76b 3016 case '\n':
3280af22 3017 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3018 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3019 /* handle eval qq[#line 1 "foo"\n ...] */
3020 CopLINE_dec(PL_curcop);
3021 incline(s);
3022 }
3280af22 3023 d = PL_bufend;
a687059c 3024 while (s < d && *s != '\n')
378cc40b 3025 s++;
0f85fab0 3026 if (s < d)
378cc40b 3027 s++;
78c267c1 3028 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3029 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3030 incline(s);
3280af22
NIS
3031 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3032 PL_bufptr = s;
3033 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3034 return yylex();
a687059c 3035 }
378cc40b 3036 }
a687059c 3037 else {
378cc40b 3038 *s = '\0';
3280af22 3039 PL_bufend = s;
a687059c 3040 }
378cc40b
LW
3041 goto retry;
3042 case '-':
79072805 3043 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3044 I32 ftst = 0;
0211776e 3045 char tmp;
e5edeb50 3046
378cc40b 3047 s++;
3280af22 3048 PL_bufptr = s;
748a9306
LW
3049 tmp = *s++;
3050
bf4acbe4 3051 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3052 s++;
3053
3054 if (strnEQ(s,"=>",2)) {
3280af22 3055 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3f3939a9
NC
3056 DEBUG_T( { S_printbuf(aTHX_
3057 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3058 } );
748a9306
LW
3059 OPERATOR('-'); /* unary minus */
3060 }
3280af22 3061 PL_last_uni = PL_oldbufptr;
748a9306 3062 switch (tmp) {
e5edeb50
JH
3063 case 'r': ftst = OP_FTEREAD; break;
3064 case 'w': ftst = OP_FTEWRITE; break;
3065 case 'x': ftst = OP_FTEEXEC; break;
3066 case 'o': ftst = OP_FTEOWNED; break;
3067 case 'R': ftst = OP_FTRREAD; break;
3068 case 'W': ftst = OP_FTRWRITE; break;
3069 case 'X': ftst = OP_FTREXEC; break;
3070 case 'O': ftst = OP_FTROWNED; break;
3071 case 'e': ftst = OP_FTIS; break;
3072 case 'z': ftst = OP_FTZERO; break;
3073 case 's': ftst = OP_FTSIZE; break;
3074 case 'f': ftst = OP_FTFILE; break;
3075 case 'd': ftst = OP_FTDIR; break;
3076 case 'l': ftst = OP_FTLINK; break;
3077 case 'p': ftst = OP_FTPIPE; break;
3078 case 'S': ftst = OP_FTSOCK; break;
3079 case 'u': ftst = OP_FTSUID; break;
3080 case 'g': ftst = OP_FTSGID; break;
3081 case 'k': ftst = OP_FTSVTX; break;
3082 case 'b': ftst = OP_FTBLK; break;
3083 case 'c': ftst = OP_FTCHR; break;
3084 case 't': ftst = OP_FTTTY; break;
3085 case 'T': ftst = OP_FTTEXT; break;
3086 case 'B': ftst = OP_FTBINARY; break;
3087 case 'M': case 'A': case 'C':
b977d03a 3088 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
3089 switch (tmp) {
3090 case 'M': ftst = OP_FTMTIME; break;
3091 case 'A': ftst = OP_FTATIME; break;
3092 case 'C': ftst = OP_FTCTIME; break;
3093 default: break;
3094 }
3095 break;
378cc40b 3096 default:
378cc40b
LW
3097 break;
3098 }
e5edeb50 3099 if (ftst) {
eb160463 3100 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3101 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3f3939a9 3102 "### Saw file test %c\n", (int)tmp);
5f80b19c 3103 } );
e5edeb50
JH
3104 FTST(ftst);
3105 }
3106 else {
3107 /* Assume it was a minus followed by a one-letter named
3108 * subroutine call (or a -bareword), then. */
95c31fe3 3109 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a8eb0fba 3110 "### '-%c' looked like a file test but was not\n",
6155262a 3111 (int) tmp);
5f80b19c 3112 } );
f050833e 3113 s = --PL_bufptr;
e5edeb50 3114 }
378cc40b 3115 }
0211776e
NC
3116 {
3117 const char tmp = *s++;
3118 if (*s == tmp) {
3119 s++;
3120 if (PL_expect == XOPERATOR)
3121 TERM(POSTDEC);
3122 else
3123 OPERATOR(PREDEC);
3124 }
3125 else if (*s == '>') {
3126 s++;
3127 s = skipspace(s);
3128 if (isIDFIRST_lazy_if(s,UTF)) {
3129 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3130 TOKEN(ARROW);
3131 }
3132 else if (*s == '$')
3133 OPERATOR(ARROW);
3134 else
3135 TERM(ARROW);
3136 }
3280af22 3137 if (PL_expect == XOPERATOR)
0211776e
NC
3138 Aop(OP_SUBTRACT);
3139 else {
3140 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3141 check_uni();
3142 OPERATOR('-'); /* unary minus */
79072805 3143 }
2f3197b3 3144 }
79072805 3145
378cc40b 3146 case '+':
0211776e
NC
3147 {
3148 const char tmp = *s++;
3149 if (*s == tmp) {
3150 s++;
3151 if (PL_expect == XOPERATOR)
3152 TERM(POSTINC);
3153 else
3154 OPERATOR(PREINC);
3155 }
3280af22 3156 if (PL_expect == XOPERATOR)
0211776e
NC
3157 Aop(OP_ADD);
3158 else {
3159 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3160 check_uni();
3161 OPERATOR('+');
3162 }
2f3197b3 3163 }
a687059c 3164
378cc40b 3165 case '*':
3280af22
NIS
3166 if (PL_expect != XOPERATOR) {
3167 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3168 PL_expect = XOPERATOR;
3169 force_ident(PL_tokenbuf, '*');
3170 if (!*PL_tokenbuf)
a0d0e21e 3171 PREREF('*');
79072805 3172 TERM('*');
a687059c 3173 }
79072805
LW
3174 s++;
3175 if (*s == '*') {
a687059c 3176 s++;
79072805 3177 PWop(OP_POW);
a687059c 3178 }
79072805
LW
3179 Mop(OP_MULTIPLY);
3180
378cc40b 3181 case '%':
3280af22 3182 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3183 ++s;
3184 Mop(OP_MODULO);
a687059c 3185 }
3280af22
NIS
3186 PL_tokenbuf[0] = '%';
3187 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3188 if (!PL_tokenbuf[1]) {
bbce6d69 3189 PREREF('%');
a687059c 3190 }
3280af22 3191 PL_pending_ident = '%';
bbce6d69 3192 TERM('%');
a687059c 3193
378cc40b 3194 case '^':
79072805 3195 s++;
a0d0e21e 3196 BOop(OP_BIT_XOR);
79072805 3197 case '[':
3280af22 3198 PL_lex_brackets++;
79072805 3199 /* FALL THROUGH */
378cc40b 3200 case '~':
378cc40b 3201 case ',':
0211776e
NC
3202 {
3203 const char tmp = *s++;
3204 OPERATOR(tmp);
3205 }
a0d0e21e
LW
3206 case ':':
3207 if (s[1] == ':') {
3208 len = 0;
0211776e 3209 goto just_a_word_zero_gv;
a0d0e21e
LW
3210 }
3211 s++;
09bef843
SB
3212 switch (PL_expect) {
3213 OP *attrs;
3214 case XOPERATOR:
3215 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3216 break;
3217 PL_bufptr = s; /* update in case we back off */
3218 goto grabattrs;
3219 case XATTRBLOCK:
3220 PL_expect = XBLOCK;
3221 goto grabattrs;
3222 case XATTRTERM:
3223 PL_expect = XTERMBLOCK;
3224 grabattrs:
3225 s = skipspace(s);
3226 attrs = Nullop;
7e2040f0 3227 while (isIDFIRST_lazy_if(s,UTF)) {
0211776e 3228 I32 tmp;
09bef843 3229 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3230 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3231 if (tmp < 0) tmp = -tmp;
3232 switch (tmp) {
3233 case KEY_or:
3234 case KEY_and:
3235 case KEY_for:
3236 case KEY_unless:
3237 case KEY_if:
3238 case KEY_while:
3239 case KEY_until:
3240 goto got_attrs;
3241 default:
3242 break;
3243 }
3244 }
09bef843
SB
3245 if (*d == '(') {
3246 d = scan_str(d,TRUE,TRUE);
3247 if (!d) {
09bef843
SB
3248 /* MUST advance bufptr here to avoid bogus
3249 "at end of line" context messages from yyerror().
3250 */
3251 PL_bufptr = s + len;
3252 yyerror("Unterminated attribute parameter in attribute list");
3253 if (attrs)
3254 op_free(attrs);
f31bfe61 3255 return REPORT(0); /* EOF indicator */
09bef843
SB
3256 }
3257 }
3258 if (PL_lex_stuff) {
3259 SV *sv = newSVpvn(s, len);
3260 sv_catsv(sv, PL_lex_stuff);
3261 attrs = append_elem(OP_LIST, attrs,
3262 newSVOP(OP_CONST, 0, sv));
3263 SvREFCNT_dec(PL_lex_stuff);
0e2d6244 3264 PL_lex_stuff = NULL;
09bef843
SB
3265 }
3266 else {
7a3458b7
NC
3267 if (len == 6 && strnEQ(s, "unique", len)) {
3268 if (PL_in_my == KEY_our)
3269#ifdef USE_ITHREADS
3270 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3271#else
3272 ; /* skip to avoid loading attributes.pm */
3273#endif
a00f3e00 3274 else
7a3458b7
NC
3275 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3276 }
3277
d3cea301
SB
3278 /* NOTE: any CV attrs applied here need to be part of
3279 the CVf_BUILTIN_ATTRS define in cv.h! */
7a3458b7 3280 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3281 CvLVALUE_on(PL_compcv);
3282 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3283 CvLOCKED_on(PL_compcv);
3284 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3285 CvMETHOD_on(PL_compcv);
3286 /* After we've set the flags, it could be argued that
3287 we don't need to do the attributes.pm-based setting
3288 process, and shouldn't bother appending recognized
d3cea301
SB
3289 flags. To experiment with that, uncomment the
3290 following "else". (Note that's already been
3291 uncommented. That keeps the above-applied built-in
3292 attributes from being intercepted (and possibly
3293 rejected) by a package's attribute routines, but is
3294 justified by the performance win for the common case
3295 of applying only built-in attributes.) */
0256094b 3296 else
78f9721b
SM
3297 attrs = append_elem(OP_LIST, attrs,
3298 newSVOP(OP_CONST, 0,
3299 newSVpvn(s, len)));
09bef843
SB
3300 }
3301 s = skipspace(d);
0120eecf 3302 if (*s == ':' && s[1] != ':')
09bef843 3303 s = skipspace(s+1);
0120eecf
GS
3304 else if (s == d)
3305 break; /* require real whitespace or :'s */
09bef843 3306 }
0211776e
NC
3307 {
3308 const char tmp
3309 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3310 if (*s != ';' && *s != '}' && *s != tmp
3311 && (tmp != '=' || *s != ')')) {
3312 const char q = ((*s == '\'') ? '"' : '\'');
3313 /* If here for an expression, and parsed no attrs, back
3314 off. */
3315 if (tmp == '=' && !attrs) {
3316 s = PL_bufptr;
3317 break;
3318 }
3319 /* MUST advance bufptr here to avoid bogus "at end of line"
3320 context messages from yyerror().
3321 */
3322 PL_bufptr = s;
3323 yyerror( *s
3324 ? Perl_form(aTHX_ "Invalid separator character "
3325 "%c%c%c in attribute list", q, *s, q)
3326 : "Unterminated attribute list" );
3327 if (attrs)
3328 op_free(attrs);
3329 OPERATOR(':');
09bef843 3330 }
09bef843 3331 }
f9829d6b 3332 got_attrs:
09bef843
SB
3333 if (attrs) {
3334 PL_nextval[PL_nexttoke].opval = attrs;
3335 force_next(THING);
3336 }
3337 TOKEN(COLONATTR);
3338 }
a0d0e21e 3339 OPERATOR(':');
8990e307
LW
3340 case '(':
3341 s++;
3280af22
NIS
3342 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3343 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3344 else
3280af22 3345 PL_expect = XTERM;
ec2e9529 3346 s = skipspace(s);
a0d0e21e 3347 TOKEN('(');
378cc40b 3348 case ';':
f4dd75d9 3349 CLINE;
0211776e
NC
3350 {
3351 const char tmp = *s++;
3352 OPERATOR(tmp);
3353 }
378cc40b 3354 case ')':
0211776e
NC
3355 {
3356 const char tmp = *s++;
3357 s = skipspace(s);
3358 if (*s == '{')
3359 PREBLOCK(tmp);
3360 TERM(tmp);
3361 }
79072805
LW
3362 case ']':
3363 s++;
3280af22 3364 if (PL_lex_brackets <= 0)
d98d5fff 3365 yyerror("Unmatched right square bracket");
463ee0b2 3366 else
3280af22
NIS
3367 --PL_lex_brackets;
3368 if (PL_lex_state == LEX_INTERPNORMAL) {
3369 if (PL_lex_brackets == 0) {
a0d0e21e 3370 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3371 PL_lex_state = LEX_INTERPEND;
79072805
LW
3372 }
3373 }
4633a7c4 3374 TERM(']');
79072805
LW
3375 case '{':
3376 leftbracket:
79072805 3377 s++;
3280af22 3378 if (PL_lex_brackets > 100) {
bda19f49 3379 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3380 }
3280af22 3381 switch (PL_expect) {
a0d0e21e 3382 case XTERM:
3280af22 3383 if (PL_lex_formbrack) {
a0d0e21e
LW
3384 s--;
3385 PRETERMBLOCK(DO);
3386 }
3280af22
NIS
3387 if (PL_oldoldbufptr == PL_last_lop)
3388 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3389 else
3280af22 3390 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3391 OPERATOR(HASHBRACK);
a0d0e21e 3392 case XOPERATOR:
bf4acbe4 3393 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3394 s++;
44a8e56a 3395 d = s;
3280af22
NIS
3396 PL_tokenbuf[0] = '\0';
3397 if (d < PL_bufend && *d == '-') {
3398 PL_tokenbuf[0] = '-';
44a8e56a 3399 d++;
bf4acbe4 3400 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3401 d++;
3402 }
7e2040f0 3403 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3404 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3405 FALSE, &len);
bf4acbe4 3406 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3407 d++;
3408 if (*d == '}') {
24c2fff4 3409 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3410 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3411 if (minus)
3412 force_next('-');
748a9306
LW
3413 }
3414 }
3415 /* FALL THROUGH */
09bef843 3416 case XATTRBLOCK:
748a9306 3417 case XBLOCK:
3280af22
NIS
3418 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3419 PL_expect = XSTATE;
a0d0e21e 3420 break;
09bef843 3421 case XATTRTERM:
a0d0e21e 3422 case XTERMBLOCK:
3280af22
NIS
3423 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3424 PL_expect = XSTATE;
a0d0e21e
LW
3425 break;
3426 default: {
24c2fff4 3427 const char *t;
3280af22
NIS
3428 if (PL_oldoldbufptr == PL_last_lop)
3429 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3430 else
3280af22 3431 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3432 s = skipspace(s);
8452ff4b
SB
3433 if (*s == '}') {
3434 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3435 PL_expect = XTERM;
3436 /* This hack is to get the ${} in the message. */
3437 PL_bufptr = s+1;
3438 yyerror("syntax error");
3439 break;
3440 }
a0d0e21e 3441 OPERATOR(HASHBRACK);
8452ff4b 3442 }
b8a4b1be
GS
3443 /* This hack serves to disambiguate a pair of curlies
3444 * as being a block or an anon hash. Normally, expectation
3445 * determines that, but in cases where we're not in a
3446 * position to expect anything in particular (like inside
3447 * eval"") we have to resolve the ambiguity. This code
3448 * covers the case where the first term in the curlies is a
3449 * quoted string. Most other cases need to be explicitly
5332c881 3450 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3451 * curly in order to force resolution as an anon hash.
3452 *
3453 * XXX should probably propagate the outer expectation
3454 * into eval"" to rely less on this hack, but that could
3455 * potentially break current behavior of eval"".
3456 * GSAR 97-07-21
3457 */
3458 t = s;
3459 if (*s == '\'' || *s == '"' || *s == '`') {
3460 /* common case: get past first string, handling escapes */
3280af22 3461 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3462 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3463 t++;
3464 t++;
a0d0e21e 3465 }
b8a4b1be 3466 else if (*s == 'q') {
3280af22 3467 if (++t < PL_bufend
b8a4b1be 3468 && (!isALNUM(*t)
3280af22 3469 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3470 && !isALNUM(*t))))
3471 {
e3b9cd39 3472 /* skip q//-like construct */
24c2fff4 3473 const char *tmps;
b8a4b1be
GS
3474 char open, close, term;
3475 I32 brackets = 1;
3476
3280af22 3477 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3478 t++;
e3b9cd39
JH
3479 /* check for q => */
3480 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3481 OPERATOR(HASHBRACK);
3482 }
b8a4b1be
GS
3483 term = *t;
3484 open = term;
3485 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3486 term = tmps[5];
3487 close = term;
3488 if (open == close)
3280af22
NIS
3489 for (t++; t < PL_bufend; t++) {
3490 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3491 t++;
6d07e5e9 3492 else if (*t == open)
b8a4b1be
GS
3493 break;
3494 }
e3b9cd39 3495 else {
3280af22
NIS
3496 for (t++; t < PL_bufend; t++) {
3497 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3498 t++;
6d07e5e9 3499 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3500 break;
3501 else if (*t == open)
3502 brackets++;
3503 }
e3b9cd39
JH
3504 }
3505 t++;
b8a4b1be 3506 }
e3b9cd39
JH
3507 else
3508 /* skip plain q word */
3509 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3510 t += UTF8SKIP(t);
a0d0e21e 3511 }
7e2040f0 3512 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3513 t += UTF8SKIP(t);
7e2040f0 3514 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3515 t += UTF8SKIP(t);
a0d0e21e 3516 }
3280af22 3517 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3518 t++;
b8a4b1be
GS
3519 /* if comma follows first term, call it an anon hash */
3520 /* XXX it could be a comma expression with loop modifiers */
3280af22 3521 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3522 || (*t == '=' && t[1] == '>')))
a0d0e21e 3523 OPERATOR(HASHBRACK);
3280af22 3524 if (PL_expect == XREF)
4e4e412b 3525 PL_expect = XTERM;
a0d0e21e 3526 else {
3280af22
NIS
3527 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3528 PL_expect = XSTATE;
a0d0e21e 3529 }
8990e307 3530 }
a0d0e21e 3531 break;
463ee0b2 3532 }
<