This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
performance tweaking op.c
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a
PP
63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
b6007c36
DM
65#define LEX_NORMAL 10 /* normal code (ie not within "...") */
66#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69#define LEX_INTERPSTART 6 /* expecting the start of a $var */
70
71 /* at end of code, eg "$x" followed by: */
72#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74
75#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77#define LEX_INTERPCONST 2 /* NOT USED */
78#define LEX_FORMLINE 1 /* expecting a format line */
79#define LEX_KNOWNEXT 0 /* next token known; just return it */
80
79072805 81
bbf60fe6 82#ifdef DEBUGGING
27da23d5 83static const char* const lex_state_names[] = {
bbf60fe6
DM
84 "KNOWNEXT",
85 "FORMLINE",
86 "INTERPCONST",
87 "INTERPCONCAT",
88 "INTERPENDMAYBE",
89 "INTERPEND",
90 "INTERPSTART",
91 "INTERPPUSH",
92 "INTERPCASEMOD",
93 "INTERPNORMAL",
94 "NORMAL"
95};
96#endif
97
79072805
LW
98#ifdef ff_next
99#undef ff_next
d48672a2
LW
100#endif
101
79072805 102#include "keywords.h"
fe14fcc3 103
ffb4593c
NT
104/* CLINE is a macro that ensures PL_copline has a sane value */
105
ae986130
LW
106#ifdef CLINE
107#undef CLINE
108#endif
57843af0 109#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 110
ffb4593c
NT
111/*
112 * Convenience functions to return different tokens and prime the
9cbb5ea2 113 * lexer for the next token. They all take an argument.
ffb4593c
NT
114 *
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
2d2e263d 125 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
126 * BOop : bitwise or or xor
127 * BAop : bitwise and
128 * SHop : shift operator
129 * PWop : power operator
9cbb5ea2 130 * PMop : pattern-matching operator
ffb4593c
NT
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
e5edeb50 134 * Rop : relational operator <= != gt
ffb4593c
NT
135 *
136 * Also see LOP and lop() below.
137 */
138
998054bd 139#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 140# define REPORT(retval) tokereport((I32)retval)
998054bd 141#else
bbf60fe6 142# define REPORT(retval) (retval)
998054bd
SC
143#endif
144
bbf60fe6
DM
145#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 165
a687059c
LW
166/* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
a687059c 170 */
376fcdbf
AL
171#define UNI2(f,x) { \
172 yylval.ival = f; \
173 PL_expect = x; \
174 PL_bufptr = s; \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
177 if (*s == '(') \
178 return REPORT( (int)FUNC1 ); \
179 s = skipspace(s); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 }
6f33ba73
RGS
182#define UNI(f) UNI2(f,XTERM)
183#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 184
376fcdbf
AL
185#define UNIBRACK(f) { \
186 yylval.ival = f; \
187 PL_bufptr = s; \
188 PL_last_uni = PL_oldbufptr; \
189 if (*s == '(') \
190 return REPORT( (int)FUNC1 ); \
191 s = skipspace(s); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
193 }
79072805 194
9f68db38 195/* grandfather return to old style */
3280af22 196#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 197
8fa7f367
JH
198#ifdef DEBUGGING
199
bbf60fe6
DM
200/* how to interpret the yylval associated with the token */
201enum token_type {
202 TOKENTYPE_NONE,
203 TOKENTYPE_IVAL,
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
205 TOKENTYPE_PVAL,
206 TOKENTYPE_OPVAL,
207 TOKENTYPE_GVVAL
208};
209
27da23d5
JH
210static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
9041c2e3 212{
bbf60fe6
DM
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
279};
280
281/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 282
bbf60fe6 283STATIC int
f5bd084c 284S_tokereport(pTHX_ I32 rv)
bbf60fe6
DM
285{
286 if (DEBUG_T_TEST) {
bfed75c6 287 const char *name = Nullch;
bbf60fe6 288 enum token_type type = TOKENTYPE_NONE;
f54cb97a 289 const struct debug_tokens *p;
396482e1 290 SV* const report = newSVpvs("<== ");
bbf60fe6 291
f54cb97a 292 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
293 if (p->token == (int)rv) {
294 name = p->name;
295 type = p->type;
296 break;
297 }
298 }
299 if (name)
54667de8 300 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
301 else if ((char)rv > ' ' && (char)rv < '~')
302 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
303 else if (!rv)
396482e1 304 sv_catpvs(report, "EOF");
bbf60fe6
DM
305 else
306 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
307 switch (type) {
308 case TOKENTYPE_NONE:
309 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
310 break;
311 case TOKENTYPE_IVAL:
e4584336 312 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
313 break;
314 case TOKENTYPE_OPNUM:
315 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
316 PL_op_name[yylval.ival]);
317 break;
318 case TOKENTYPE_PVAL:
319 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
320 break;
321 case TOKENTYPE_OPVAL:
b6007c36 322 if (yylval.opval) {
401441c0 323 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 324 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
325 if (yylval.opval->op_type == OP_CONST) {
326 Perl_sv_catpvf(aTHX_ report, " %s",
327 SvPEEK(cSVOPx_sv(yylval.opval)));
328 }
329
330 }
401441c0 331 else
396482e1 332 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
333 break;
334 }
b6007c36 335 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
336 };
337 return (int)rv;
998054bd
SC
338}
339
b6007c36
DM
340
341/* print the buffer with suitable escapes */
342
343STATIC void
344S_printbuf(pTHX_ const char* fmt, const char* s)
345{
396482e1 346 SV* const tmp = newSVpvs("");
b6007c36
DM
347 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
348 SvREFCNT_dec(tmp);
349}
350
8fa7f367
JH
351#endif
352
ffb4593c
NT
353/*
354 * S_ao
355 *
c963b151
BD
356 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
357 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
358 */
359
76e3520e 360STATIC int
cea2e8a9 361S_ao(pTHX_ int toketype)
a0d0e21e 362{
3280af22
NIS
363 if (*PL_bufptr == '=') {
364 PL_bufptr++;
a0d0e21e
LW
365 if (toketype == ANDAND)
366 yylval.ival = OP_ANDASSIGN;
367 else if (toketype == OROR)
368 yylval.ival = OP_ORASSIGN;
c963b151
BD
369 else if (toketype == DORDOR)
370 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
371 toketype = ASSIGNOP;
372 }
373 return toketype;
374}
375
ffb4593c
NT
376/*
377 * S_no_op
378 * When Perl expects an operator and finds something else, no_op
379 * prints the warning. It always prints "<something> found where
380 * operator expected. It prints "Missing semicolon on previous line?"
381 * if the surprise occurs at the start of the line. "do you need to
382 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
383 * where the compiler doesn't know if foo is a method call or a function.
384 * It prints "Missing operator before end of line" if there's nothing
385 * after the missing operator, or "... before <...>" if there is something
386 * after the missing operator.
387 */
388
76e3520e 389STATIC void
bfed75c6 390S_no_op(pTHX_ const char *what, char *s)
463ee0b2 391{
9d4ba2ae
AL
392 char * const oldbp = PL_bufptr;
393 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 394
1189a94a
GS
395 if (!s)
396 s = oldbp;
07c798fb 397 else
1189a94a 398 PL_bufptr = s;
cea2e8a9 399 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
400 if (ckWARN_d(WARN_SYNTAX)) {
401 if (is_first)
402 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
403 "\t(Missing semicolon on previous line?)\n");
404 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 405 const char *t;
56da5a46
RGS
406 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
407 if (t < PL_bufptr && isSPACE(*t))
408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
409 "\t(Do you need to predeclare %.*s?)\n",
551405c4 410 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
411 }
412 else {
413 assert(s >= oldbp);
414 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 415 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 416 }
07c798fb 417 }
3280af22 418 PL_bufptr = oldbp;
8990e307
LW
419}
420
ffb4593c
NT
421/*
422 * S_missingterm
423 * Complain about missing quote/regexp/heredoc terminator.
424 * If it's called with (char *)NULL then it cauterizes the line buffer.
425 * If we're in a delimited string and the delimiter is a control
426 * character, it's reformatted into a two-char sequence like ^C.
427 * This is fatal.
428 */
429
76e3520e 430STATIC void
cea2e8a9 431S_missingterm(pTHX_ char *s)
8990e307
LW
432{
433 char tmpbuf[3];
434 char q;
435 if (s) {
9d4ba2ae 436 char * const nl = strrchr(s,'\n');
d2719217 437 if (nl)
8990e307
LW
438 *nl = '\0';
439 }
9d116dd7
JH
440 else if (
441#ifdef EBCDIC
442 iscntrl(PL_multi_close)
443#else
444 PL_multi_close < 32 || PL_multi_close == 127
445#endif
446 ) {
8990e307 447 *tmpbuf = '^';
585ec06d 448 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
449 tmpbuf[2] = '\0';
450 s = tmpbuf;
451 }
452 else {
eb160463 453 *tmpbuf = (char)PL_multi_close;
8990e307
LW
454 tmpbuf[1] = '\0';
455 s = tmpbuf;
456 }
457 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 458 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 459}
79072805 460
ef89dcc3 461#define FEATURE_IS_ENABLED(name) \
0d863452 462 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 463 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
464/*
465 * S_feature_is_enabled
466 * Check whether the named feature is enabled.
467 */
468STATIC bool
469S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
470{
471 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
472 char he_name[32] = "feature_";
473 (void) strncpy(&he_name[8], name, 24);
474
475 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
476}
477
ffb4593c
NT
478/*
479 * Perl_deprecate
ffb4593c
NT
480 */
481
79072805 482void
bfed75c6 483Perl_deprecate(pTHX_ const char *s)
a0d0e21e 484{
599cee73 485 if (ckWARN(WARN_DEPRECATED))
9014280d 486 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
487}
488
12bcd1a6 489void
bfed75c6 490Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
491{
492 /* This function should NOT be called for any new deprecated warnings */
493 /* Use Perl_deprecate instead */
494 /* */
495 /* It is here to maintain backward compatibility with the pre-5.8 */
496 /* warnings category hierarchy. The "deprecated" category used to */
497 /* live under the "syntax" category. It is now a top-level category */
498 /* in its own right. */
499
500 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 501 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
502 "Use of %s is deprecated", s);
503}
504
ffb4593c 505/*
9cbb5ea2
GS
506 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
507 * utf16-to-utf8-reversed.
ffb4593c
NT
508 */
509
c39cd008
GS
510#ifdef PERL_CR_FILTER
511static void
512strip_return(SV *sv)
513{
95a20fc0 514 register const char *s = SvPVX_const(sv);
9d4ba2ae 515 register const char * const e = s + SvCUR(sv);
c39cd008
GS
516 /* outer loop optimized to do nothing if there are no CR-LFs */
517 while (s < e) {
518 if (*s++ == '\r' && *s == '\n') {
519 /* hit a CR-LF, need to copy the rest */
520 register char *d = s - 1;
521 *d++ = *s++;
522 while (s < e) {
523 if (*s == '\r' && s[1] == '\n')
524 s++;
525 *d++ = *s++;
526 }
527 SvCUR(sv) -= s - d;
528 return;
529 }
530 }
531}
a868473f 532
76e3520e 533STATIC I32
c39cd008 534S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 535{
f54cb97a 536 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
537 if (count > 0 && !maxlen)
538 strip_return(sv);
539 return count;
a868473f
NIS
540}
541#endif
542
ffb4593c
NT
543/*
544 * Perl_lex_start
9cbb5ea2
GS
545 * Initialize variables. Uses the Perl save_stack to save its state (for
546 * recursive calls to the parser).
ffb4593c
NT
547 */
548
a0d0e21e 549void
864dbfa3 550Perl_lex_start(pTHX_ SV *line)
79072805 551{
cfd0369c 552 const char *s;
8990e307
LW
553 STRLEN len;
554
3280af22
NIS
555 SAVEI32(PL_lex_dojoin);
556 SAVEI32(PL_lex_brackets);
3280af22
NIS
557 SAVEI32(PL_lex_casemods);
558 SAVEI32(PL_lex_starts);
559 SAVEI32(PL_lex_state);
7766f137 560 SAVEVPTR(PL_lex_inpat);
3280af22 561 SAVEI32(PL_lex_inwhat);
18b09519
GS
562 if (PL_lex_state == LEX_KNOWNEXT) {
563 I32 toke = PL_nexttoke;
564 while (--toke >= 0) {
565 SAVEI32(PL_nexttype[toke]);
566 SAVEVPTR(PL_nextval[toke]);
567 }
568 SAVEI32(PL_nexttoke);
18b09519 569 }
57843af0 570 SAVECOPLINE(PL_curcop);
3280af22
NIS
571 SAVEPPTR(PL_bufptr);
572 SAVEPPTR(PL_bufend);
573 SAVEPPTR(PL_oldbufptr);
574 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
575 SAVEPPTR(PL_last_lop);
576 SAVEPPTR(PL_last_uni);
3280af22
NIS
577 SAVEPPTR(PL_linestart);
578 SAVESPTR(PL_linestr);
8edd5f42
RGS
579 SAVEGENERICPV(PL_lex_brackstack);
580 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 581 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
582 SAVESPTR(PL_lex_stuff);
583 SAVEI32(PL_lex_defer);
09bef843 584 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 585 SAVESPTR(PL_lex_repl);
bebdddfc
GS
586 SAVEINT(PL_expect);
587 SAVEINT(PL_lex_expect);
3280af22
NIS
588
589 PL_lex_state = LEX_NORMAL;
590 PL_lex_defer = 0;
591 PL_expect = XSTATE;
592 PL_lex_brackets = 0;
a02a5408
JC
593 Newx(PL_lex_brackstack, 120, char);
594 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
595 PL_lex_casemods = 0;
596 *PL_lex_casestack = '\0';
597 PL_lex_dojoin = 0;
598 PL_lex_starts = 0;
599 PL_lex_stuff = Nullsv;
600 PL_lex_repl = Nullsv;
601 PL_lex_inpat = 0;
76be56bc 602 PL_nexttoke = 0;
3280af22 603 PL_lex_inwhat = 0;
09bef843 604 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
605 PL_linestr = line;
606 if (SvREADONLY(PL_linestr))
607 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 608 s = SvPV_const(PL_linestr, len);
6f27f9a7 609 if (!len || s[len-1] != ';') {
3280af22
NIS
610 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
611 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 612 sv_catpvs(PL_linestr, "\n;");
8990e307 613 }
3280af22
NIS
614 SvTEMP_off(PL_linestr);
615 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
616 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 617 PL_last_lop = PL_last_uni = Nullch;
3280af22 618 PL_rsfp = 0;
79072805 619}
a687059c 620
ffb4593c
NT
621/*
622 * Perl_lex_end
9cbb5ea2
GS
623 * Finalizer for lexing operations. Must be called when the parser is
624 * done with the lexer.
ffb4593c
NT
625 */
626
463ee0b2 627void
864dbfa3 628Perl_lex_end(pTHX)
463ee0b2 629{
3280af22 630 PL_doextract = FALSE;
463ee0b2
LW
631}
632
ffb4593c
NT
633/*
634 * S_incline
635 * This subroutine has nothing to do with tilting, whether at windmills
636 * or pinball tables. Its name is short for "increment line". It
57843af0 637 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 638 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
639 * # line 500 "foo.pm"
640 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
641 */
642
76e3520e 643STATIC void
cea2e8a9 644S_incline(pTHX_ char *s)
463ee0b2
LW
645{
646 char *t;
647 char *n;
73659bf1 648 char *e;
463ee0b2 649 char ch;
463ee0b2 650
57843af0 651 CopLINE_inc(PL_curcop);
463ee0b2
LW
652 if (*s++ != '#')
653 return;
bf4acbe4 654 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
655 if (strnEQ(s, "line", 4))
656 s += 4;
657 else
658 return;
084592ab 659 if (SPACE_OR_TAB(*s))
73659bf1 660 s++;
4e553d73 661 else
73659bf1 662 return;
bf4acbe4 663 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
664 if (!isDIGIT(*s))
665 return;
666 n = s;
667 while (isDIGIT(*s))
668 s++;
bf4acbe4 669 while (SPACE_OR_TAB(*s))
463ee0b2 670 s++;
73659bf1 671 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 672 s++;
73659bf1
GS
673 e = t + 1;
674 }
463ee0b2 675 else {
463ee0b2 676 for (t = s; !isSPACE(*t); t++) ;
73659bf1 677 e = t;
463ee0b2 678 }
bf4acbe4 679 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
680 e++;
681 if (*e != '\n' && *e != '\0')
682 return; /* false alarm */
683
463ee0b2
LW
684 ch = *t;
685 *t = '\0';
f4dd75d9 686 if (t - s > 0) {
8a5ee598 687#ifndef USE_ITHREADS
c4420975 688 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
689 STRLEN tmplen = cf ? strlen(cf) : 0;
690 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
691 /* must copy *{"::_<(eval N)[oldfilename:L]"}
692 * to *{"::_<newfilename"} */
693 char smallbuf[256], smallbuf2[256];
694 char *tmpbuf, *tmpbuf2;
8a5ee598 695 GV **gvp, *gv2;
e66cf94c
RGS
696 STRLEN tmplen2 = strlen(s);
697 if (tmplen + 3 < sizeof smallbuf)
698 tmpbuf = smallbuf;
699 else
700 Newx(tmpbuf, tmplen + 3, char);
701 if (tmplen2 + 3 < sizeof smallbuf2)
702 tmpbuf2 = smallbuf2;
703 else
704 Newx(tmpbuf2, tmplen2 + 3, char);
705 tmpbuf[0] = tmpbuf2[0] = '_';
706 tmpbuf[1] = tmpbuf2[1] = '<';
707 memcpy(tmpbuf + 2, cf, ++tmplen);
708 memcpy(tmpbuf2 + 2, s, ++tmplen2);
709 ++tmplen; ++tmplen2;
8a5ee598
RGS
710 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
711 if (gvp) {
712 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
713 if (!isGV(gv2))
714 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
715 /* adjust ${"::_<newfilename"} to store the new file name */
716 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
717 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
718 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
719 }
e66cf94c
RGS
720 if (tmpbuf != smallbuf) Safefree(tmpbuf);
721 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
722 }
8a5ee598 723#endif
05ec9bb3 724 CopFILE_free(PL_curcop);
57843af0 725 CopFILE_set(PL_curcop, s);
f4dd75d9 726 }
463ee0b2 727 *t = ch;
57843af0 728 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
729}
730
ffb4593c
NT
731/*
732 * S_skipspace
733 * Called to gobble the appropriate amount and type of whitespace.
734 * Skips comments as well.
735 */
736
76e3520e 737STATIC char *
cea2e8a9 738S_skipspace(pTHX_ register char *s)
a687059c 739{
3280af22 740 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 741 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
742 s++;
743 return s;
744 }
745 for (;;) {
fd049845 746 STRLEN prevlen;
09bef843 747 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 748 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
749 while (s < PL_bufend && isSPACE(*s)) {
750 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
751 incline(s);
752 }
ffb4593c
NT
753
754 /* comment */
3280af22
NIS
755 if (s < PL_bufend && *s == '#') {
756 while (s < PL_bufend && *s != '\n')
463ee0b2 757 s++;
60e6418e 758 if (s < PL_bufend) {
463ee0b2 759 s++;
60e6418e
GS
760 if (PL_in_eval && !PL_rsfp) {
761 incline(s);
762 continue;
763 }
764 }
463ee0b2 765 }
ffb4593c
NT
766
767 /* only continue to recharge the buffer if we're at the end
768 * of the buffer, we're not reading from a source filter, and
769 * we're in normal lexing mode
770 */
09bef843
SB
771 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
772 PL_lex_state == LEX_FORMLINE)
463ee0b2 773 return s;
ffb4593c
NT
774
775 /* try to recharge the buffer */
9cbb5ea2
GS
776 if ((s = filter_gets(PL_linestr, PL_rsfp,
777 (prevlen = SvCUR(PL_linestr)))) == Nullch)
778 {
779 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
780 if (PL_minus_p) {
781 sv_setpv(PL_linestr,
782 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 783 PL_minus_n = PL_minus_p = 0;
a0d0e21e 784 }
01a19ab0
NC
785 else if (PL_minus_n) {
786 sv_setpvn(PL_linestr, ";}", 2);
787 PL_minus_n = 0;
788 }
a0d0e21e 789 else
4147a61b 790 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
791
792 /* reset variables for next time we lex */
9cbb5ea2
GS
793 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
794 = SvPVX(PL_linestr);
3280af22 795 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 796 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
797
798 /* Close the filehandle. Could be from -P preprocessor,
799 * STDIN, or a regular file. If we were reading code from
800 * STDIN (because the commandline held no -e or filename)
801 * then we don't close it, we reset it so the code can
802 * read from STDIN too.
803 */
804
3280af22
NIS
805 if (PL_preprocess && !PL_in_eval)
806 (void)PerlProc_pclose(PL_rsfp);
807 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
808 PerlIO_clearerr(PL_rsfp);
8990e307 809 else
3280af22
NIS
810 (void)PerlIO_close(PL_rsfp);
811 PL_rsfp = Nullfp;
463ee0b2
LW
812 return s;
813 }
ffb4593c
NT
814
815 /* not at end of file, so we only read another line */
09bef843
SB
816 /* make corresponding updates to old pointers, for yyerror() */
817 oldprevlen = PL_oldbufptr - PL_bufend;
818 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
819 if (PL_last_uni)
820 oldunilen = PL_last_uni - PL_bufend;
821 if (PL_last_lop)
822 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
823 PL_linestart = PL_bufptr = s + prevlen;
824 PL_bufend = s + SvCUR(PL_linestr);
825 s = PL_bufptr;
09bef843
SB
826 PL_oldbufptr = s + oldprevlen;
827 PL_oldoldbufptr = s + oldoldprevlen;
828 if (PL_last_uni)
829 PL_last_uni = s + oldunilen;
830 if (PL_last_lop)
831 PL_last_lop = s + oldloplen;
a0d0e21e 832 incline(s);
ffb4593c
NT
833
834 /* debugger active and we're not compiling the debugger code,
835 * so store the line into the debugger's array of lines
836 */
3280af22 837 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 838 SV * const sv = NEWSV(85,0);
8990e307
LW
839
840 sv_upgrade(sv, SVt_PVMG);
3280af22 841 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 842 (void)SvIOK_on(sv);
45977657 843 SvIV_set(sv, 0);
36c7798d 844 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 845 }
463ee0b2 846 }
a687059c 847}
378cc40b 848
ffb4593c
NT
849/*
850 * S_check_uni
851 * Check the unary operators to ensure there's no ambiguity in how they're
852 * used. An ambiguous piece of code would be:
853 * rand + 5
854 * This doesn't mean rand() + 5. Because rand() is a unary operator,
855 * the +5 is its argument.
856 */
857
76e3520e 858STATIC void
cea2e8a9 859S_check_uni(pTHX)
ba106d47 860{
2f3197b3 861 char *s;
a0d0e21e 862 char *t;
2f3197b3 863
3280af22 864 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 865 return;
3280af22
NIS
866 while (isSPACE(*PL_last_uni))
867 PL_last_uni++;
7e2040f0 868 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 869 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 870 return;
0453d815 871 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 872 const char ch = *s;
0453d815 873 *s = '\0';
9014280d 874 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 875 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
876 PL_last_uni);
877 *s = ch;
878 }
2f3197b3
LW
879}
880
ffb4593c
NT
881/*
882 * LOP : macro to build a list operator. Its behaviour has been replaced
883 * with a subroutine, S_lop() for which LOP is just another name.
884 */
885
a0d0e21e
LW
886#define LOP(f,x) return lop(f,x,s)
887
ffb4593c
NT
888/*
889 * S_lop
890 * Build a list operator (or something that might be one). The rules:
891 * - if we have a next token, then it's a list operator [why?]
892 * - if the next thing is an opening paren, then it's a function
893 * - else it's a list operator
894 */
895
76e3520e 896STATIC I32
a0be28da 897S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 898{
79072805 899 yylval.ival = f;
35c8bce7 900 CLINE;
3280af22
NIS
901 PL_expect = x;
902 PL_bufptr = s;
903 PL_last_lop = PL_oldbufptr;
eb160463 904 PL_last_lop_op = (OPCODE)f;
3280af22 905 if (PL_nexttoke)
bbf60fe6 906 return REPORT(LSTOP);
79072805 907 if (*s == '(')
bbf60fe6 908 return REPORT(FUNC);
79072805
LW
909 s = skipspace(s);
910 if (*s == '(')
bbf60fe6 911 return REPORT(FUNC);
79072805 912 else
bbf60fe6 913 return REPORT(LSTOP);
79072805
LW
914}
915
ffb4593c
NT
916/*
917 * S_force_next
9cbb5ea2 918 * When the lexer realizes it knows the next token (for instance,
ffb4593c 919 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
920 * to know what token to return the next time the lexer is called. Caller
921 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
922 * handles the token correctly.
ffb4593c
NT
923 */
924
4e553d73 925STATIC void
cea2e8a9 926S_force_next(pTHX_ I32 type)
79072805 927{
3280af22
NIS
928 PL_nexttype[PL_nexttoke] = type;
929 PL_nexttoke++;
930 if (PL_lex_state != LEX_KNOWNEXT) {
931 PL_lex_defer = PL_lex_state;
932 PL_lex_expect = PL_expect;
933 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
934 }
935}
936
d0a148a6
NC
937STATIC SV *
938S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
939{
9d4ba2ae 940 SV * const sv = newSVpvn(start,len);
bfed75c6 941 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
942 SvUTF8_on(sv);
943 return sv;
944}
945
ffb4593c
NT
946/*
947 * S_force_word
948 * When the lexer knows the next thing is a word (for instance, it has
949 * just seen -> and it knows that the next char is a word char, then
950 * it calls S_force_word to stick the next word into the PL_next lookahead.
951 *
952 * Arguments:
b1b65b59 953 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
954 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
955 * int check_keyword : if true, Perl checks to make sure the word isn't
956 * a keyword (do this if the word is a label, e.g. goto FOO)
957 * int allow_pack : if true, : characters will also be allowed (require,
958 * use, etc. do this)
9cbb5ea2 959 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
960 */
961
76e3520e 962STATIC char *
cea2e8a9 963S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 964{
463ee0b2
LW
965 register char *s;
966 STRLEN len;
4e553d73 967
463ee0b2
LW
968 start = skipspace(start);
969 s = start;
7e2040f0 970 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 971 (allow_pack && *s == ':') ||
15f0808c 972 (allow_initial_tick && *s == '\'') )
a0d0e21e 973 {
3280af22
NIS
974 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
975 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
976 return start;
977 if (token == METHOD) {
978 s = skipspace(s);
979 if (*s == '(')
3280af22 980 PL_expect = XTERM;
463ee0b2 981 else {
3280af22 982 PL_expect = XOPERATOR;
463ee0b2 983 }
79072805 984 }
d0a148a6
NC
985 PL_nextval[PL_nexttoke].opval
986 = (OP*)newSVOP(OP_CONST,0,
987 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 988 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
989 force_next(token);
990 }
991 return s;
992}
993
ffb4593c
NT
994/*
995 * S_force_ident
9cbb5ea2 996 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
997 * text only contains the "foo" portion. The first argument is a pointer
998 * to the "foo", and the second argument is the type symbol to prefix.
999 * Forces the next token to be a "WORD".
9cbb5ea2 1000 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1001 */
1002
76e3520e 1003STATIC void
bfed75c6 1004S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
1005{
1006 if (s && *s) {
bfed75c6 1007 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 1008 PL_nextval[PL_nexttoke].opval = o;
79072805 1009 force_next(WORD);
748a9306 1010 if (kind) {
11343788 1011 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1012 /* XXX see note in pp_entereval() for why we forgo typo
1013 warnings if the symbol must be introduced in an eval.
1014 GSAR 96-10-12 */
f776e3cd 1015 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
a0d0e21e
LW
1016 kind == '$' ? SVt_PV :
1017 kind == '@' ? SVt_PVAV :
1018 kind == '%' ? SVt_PVHV :
1019 SVt_PVGV
1020 );
748a9306 1021 }
79072805
LW
1022 }
1023}
1024
1571675a
GS
1025NV
1026Perl_str_to_version(pTHX_ SV *sv)
1027{
1028 NV retval = 0.0;
1029 NV nshift = 1.0;
1030 STRLEN len;
cfd0369c 1031 const char *start = SvPV_const(sv,len);
9d4ba2ae 1032 const char * const end = start + len;
504618e9 1033 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1034 while (start < end) {
ba210ebe 1035 STRLEN skip;
1571675a
GS
1036 UV n;
1037 if (utf)
9041c2e3 1038 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1039 else {
1040 n = *(U8*)start;
1041 skip = 1;
1042 }
1043 retval += ((NV)n)/nshift;
1044 start += skip;
1045 nshift *= 1000;
1046 }
1047 return retval;
1048}
1049
4e553d73 1050/*
ffb4593c
NT
1051 * S_force_version
1052 * Forces the next token to be a version number.
e759cc13
RGS
1053 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1054 * and if "guessing" is TRUE, then no new token is created (and the caller
1055 * must use an alternative parsing method).
ffb4593c
NT
1056 */
1057
76e3520e 1058STATIC char *
e759cc13 1059S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
1060{
1061 OP *version = Nullop;
44dcb63b 1062 char *d;
89bfa8cd
PP
1063
1064 s = skipspace(s);
1065
44dcb63b 1066 d = s;
dd629d5b 1067 if (*d == 'v')
44dcb63b 1068 d++;
44dcb63b 1069 if (isDIGIT(*d)) {
e759cc13
RGS
1070 while (isDIGIT(*d) || *d == '_' || *d == '.')
1071 d++;
9f3d182e 1072 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1073 SV *ver;
b73d6f50 1074 s = scan_num(s, &yylval);
89bfa8cd 1075 version = yylval.opval;
dd629d5b
GS
1076 ver = cSVOPx(version)->op_sv;
1077 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1078 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1079 SvNV_set(ver, str_to_version(ver));
1571675a 1080 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1081 }
89bfa8cd 1082 }
e759cc13
RGS
1083 else if (guessing)
1084 return s;
89bfa8cd
PP
1085 }
1086
1087 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1088 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1089 force_next(WORD);
89bfa8cd 1090
e759cc13 1091 return s;
89bfa8cd
PP
1092}
1093
ffb4593c
NT
1094/*
1095 * S_tokeq
1096 * Tokenize a quoted string passed in as an SV. It finds the next
1097 * chunk, up to end of string or a backslash. It may make a new
1098 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1099 * turns \\ into \.
1100 */
1101
76e3520e 1102STATIC SV *
cea2e8a9 1103S_tokeq(pTHX_ SV *sv)
79072805
LW
1104{
1105 register char *s;
1106 register char *send;
1107 register char *d;
b3ac6de7
IZ
1108 STRLEN len = 0;
1109 SV *pv = sv;
79072805
LW
1110
1111 if (!SvLEN(sv))
b3ac6de7 1112 goto finish;
79072805 1113
a0d0e21e 1114 s = SvPV_force(sv, len);
21a311ee 1115 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1116 goto finish;
463ee0b2 1117 send = s + len;
79072805
LW
1118 while (s < send && *s != '\\')
1119 s++;
1120 if (s == send)
b3ac6de7 1121 goto finish;
79072805 1122 d = s;
be4731d2 1123 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1124 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1125 if (SvUTF8(sv))
1126 SvUTF8_on(pv);
1127 }
79072805
LW
1128 while (s < send) {
1129 if (*s == '\\') {
a0d0e21e 1130 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1131 s++; /* all that, just for this */
1132 }
1133 *d++ = *s++;
1134 }
1135 *d = '\0';
95a20fc0 1136 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1137 finish:
3280af22 1138 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1139 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1140 return sv;
1141}
1142
ffb4593c
NT
1143/*
1144 * Now come three functions related to double-quote context,
1145 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1146 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1147 * interact with PL_lex_state, and create fake ( ... ) argument lists
1148 * to handle functions and concatenation.
1149 * They assume that whoever calls them will be setting up a fake
1150 * join call, because each subthing puts a ',' after it. This lets
1151 * "lower \luPpEr"
1152 * become
1153 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1154 *
1155 * (I'm not sure whether the spurious commas at the end of lcfirst's
1156 * arguments and join's arguments are created or not).
1157 */
1158
1159/*
1160 * S_sublex_start
1161 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1162 *
1163 * Pattern matching will set PL_lex_op to the pattern-matching op to
1164 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1165 *
1166 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1167 *
1168 * Everything else becomes a FUNC.
1169 *
1170 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1171 * had an OP_CONST or OP_READLINE). This just sets us up for a
1172 * call to S_sublex_push().
1173 */
1174
76e3520e 1175STATIC I32
cea2e8a9 1176S_sublex_start(pTHX)
79072805 1177{
0d46e09a 1178 register const I32 op_type = yylval.ival;
79072805
LW
1179
1180 if (op_type == OP_NULL) {
3280af22
NIS
1181 yylval.opval = PL_lex_op;
1182 PL_lex_op = Nullop;
79072805
LW
1183 return THING;
1184 }
1185 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1186 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1187
1188 if (SvTYPE(sv) == SVt_PVIV) {
1189 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1190 STRLEN len;
cfd0369c 1191 const char *p = SvPV_const(sv, len);
f54cb97a 1192 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1193 if (SvUTF8(sv))
1194 SvUTF8_on(nsv);
b3ac6de7
IZ
1195 SvREFCNT_dec(sv);
1196 sv = nsv;
4e553d73 1197 }
b3ac6de7 1198 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1199 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1200 /* Allow <FH> // "foo" */
1201 if (op_type == OP_READLINE)
1202 PL_expect = XTERMORDORDOR;
79072805
LW
1203 return THING;
1204 }
1205
3280af22
NIS
1206 PL_sublex_info.super_state = PL_lex_state;
1207 PL_sublex_info.sub_inwhat = op_type;
1208 PL_sublex_info.sub_op = PL_lex_op;
1209 PL_lex_state = LEX_INTERPPUSH;
55497cff 1210
3280af22
NIS
1211 PL_expect = XTERM;
1212 if (PL_lex_op) {
1213 yylval.opval = PL_lex_op;
1214 PL_lex_op = Nullop;
55497cff
PP
1215 return PMFUNC;
1216 }
1217 else
1218 return FUNC;
1219}
1220
ffb4593c
NT
1221/*
1222 * S_sublex_push
1223 * Create a new scope to save the lexing state. The scope will be
1224 * ended in S_sublex_done. Returns a '(', starting the function arguments
1225 * to the uc, lc, etc. found before.
1226 * Sets PL_lex_state to LEX_INTERPCONCAT.
1227 */
1228
76e3520e 1229STATIC I32
cea2e8a9 1230S_sublex_push(pTHX)
55497cff 1231{
27da23d5 1232 dVAR;
f46d017c 1233 ENTER;
55497cff 1234
3280af22
NIS
1235 PL_lex_state = PL_sublex_info.super_state;
1236 SAVEI32(PL_lex_dojoin);
1237 SAVEI32(PL_lex_brackets);
3280af22
NIS
1238 SAVEI32(PL_lex_casemods);
1239 SAVEI32(PL_lex_starts);
1240 SAVEI32(PL_lex_state);
7766f137 1241 SAVEVPTR(PL_lex_inpat);
3280af22 1242 SAVEI32(PL_lex_inwhat);
57843af0 1243 SAVECOPLINE(PL_curcop);
3280af22 1244 SAVEPPTR(PL_bufptr);
8452ff4b 1245 SAVEPPTR(PL_bufend);
3280af22
NIS
1246 SAVEPPTR(PL_oldbufptr);
1247 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1248 SAVEPPTR(PL_last_lop);
1249 SAVEPPTR(PL_last_uni);
3280af22
NIS
1250 SAVEPPTR(PL_linestart);
1251 SAVESPTR(PL_linestr);
8edd5f42
RGS
1252 SAVEGENERICPV(PL_lex_brackstack);
1253 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1254
1255 PL_linestr = PL_lex_stuff;
1256 PL_lex_stuff = Nullsv;
1257
9cbb5ea2
GS
1258 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1259 = SvPVX(PL_linestr);
3280af22 1260 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1261 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1262 SAVEFREESV(PL_linestr);
1263
1264 PL_lex_dojoin = FALSE;
1265 PL_lex_brackets = 0;
a02a5408
JC
1266 Newx(PL_lex_brackstack, 120, char);
1267 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1268 PL_lex_casemods = 0;
1269 *PL_lex_casestack = '\0';
1270 PL_lex_starts = 0;
1271 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1272 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1273
1274 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1275 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1276 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1277 else
3280af22 1278 PL_lex_inpat = Nullop;
79072805 1279
55497cff 1280 return '(';
79072805
LW
1281}
1282
ffb4593c
NT
1283/*
1284 * S_sublex_done
1285 * Restores lexer state after a S_sublex_push.
1286 */
1287
76e3520e 1288STATIC I32
cea2e8a9 1289S_sublex_done(pTHX)
79072805 1290{
27da23d5 1291 dVAR;
3280af22 1292 if (!PL_lex_starts++) {
396482e1 1293 SV * const sv = newSVpvs("");
9aa983d2
JH
1294 if (SvUTF8(PL_linestr))
1295 SvUTF8_on(sv);
3280af22 1296 PL_expect = XOPERATOR;
9aa983d2 1297 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1298 return THING;
1299 }
1300
3280af22
NIS
1301 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1302 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1303 return yylex();
79072805
LW
1304 }
1305
ffb4593c 1306 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1307 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1308 PL_linestr = PL_lex_repl;
1309 PL_lex_inpat = 0;
1310 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1311 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1312 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1313 SAVEFREESV(PL_linestr);
1314 PL_lex_dojoin = FALSE;
1315 PL_lex_brackets = 0;
3280af22
NIS
1316 PL_lex_casemods = 0;
1317 *PL_lex_casestack = '\0';
1318 PL_lex_starts = 0;
25da4f38 1319 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1320 PL_lex_state = LEX_INTERPNORMAL;
1321 PL_lex_starts++;
e9fa98b2
HS
1322 /* we don't clear PL_lex_repl here, so that we can check later
1323 whether this is an evalled subst; that means we rely on the
1324 logic to ensure sublex_done() is called again only via the
1325 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1326 }
e9fa98b2 1327 else {
3280af22 1328 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1329 PL_lex_repl = Nullsv;
1330 }
79072805 1331 return ',';
ffed7fef
LW
1332 }
1333 else {
f46d017c 1334 LEAVE;
3280af22
NIS
1335 PL_bufend = SvPVX(PL_linestr);
1336 PL_bufend += SvCUR(PL_linestr);
1337 PL_expect = XOPERATOR;
09bef843 1338 PL_sublex_info.sub_inwhat = 0;
79072805 1339 return ')';
ffed7fef
LW
1340 }
1341}
1342
02aa26ce
NT
1343/*
1344 scan_const
1345
1346 Extracts a pattern, double-quoted string, or transliteration. This
1347 is terrifying code.
1348
3280af22
NIS
1349 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1350 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1351 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1352
9b599b2a
GS
1353 Returns a pointer to the character scanned up to. Iff this is
1354 advanced from the start pointer supplied (ie if anything was
1355 successfully parsed), will leave an OP for the substring scanned
1356 in yylval. Caller must intuit reason for not parsing further
1357 by looking at the next characters herself.
1358
02aa26ce
NT
1359 In patterns:
1360 backslashes:
1361 double-quoted style: \r and \n
1362 regexp special ones: \D \s
1363 constants: \x3
1364 backrefs: \1 (deprecated in substitution replacements)
1365 case and quoting: \U \Q \E
1366 stops on @ and $, but not for $ as tail anchor
1367
1368 In transliterations:
1369 characters are VERY literal, except for - not at the start or end
1370 of the string, which indicates a range. scan_const expands the
1371 range to the full set of intermediate characters.
1372
1373 In double-quoted strings:
1374 backslashes:
1375 double-quoted style: \r and \n
1376 constants: \x3
1377 backrefs: \1 (deprecated)
1378 case and quoting: \U \Q \E
1379 stops on @ and $
1380
1381 scan_const does *not* construct ops to handle interpolated strings.
1382 It stops processing as soon as it finds an embedded $ or @ variable
1383 and leaves it to the caller to work out what's going on.
1384
da6eedaa 1385 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1386
1387 $ in pattern could be $foo or could be tail anchor. Assumption:
1388 it's a tail anchor if $ is the last thing in the string, or if it's
1389 followed by one of ")| \n\t"
1390
1391 \1 (backreferences) are turned into $1
1392
1393 The structure of the code is
1394 while (there's a character to process) {
1395 handle transliteration ranges
1396 skip regexp comments
1397 skip # initiated comments in //x patterns
1398 check for embedded @foo
1399 check for embedded scalars
1400 if (backslash) {
1401 leave intact backslashes from leave (below)
1402 deprecate \1 in strings and sub replacements
1403 handle string-changing backslashes \l \U \Q \E, etc.
1404 switch (what was escaped) {
1405 handle - in a transliteration (becomes a literal -)
1406 handle \132 octal characters
1407 handle 0x15 hex characters
1408 handle \cV (control V)
1409 handle printf backslashes (\f, \r, \n, etc)
1410 } (end switch)
1411 } (end if backslash)
1412 } (end while character to read)
4e553d73 1413
02aa26ce
NT
1414*/
1415
76e3520e 1416STATIC char *
cea2e8a9 1417S_scan_const(pTHX_ char *start)
79072805 1418{
3280af22 1419 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1420 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1421 register char *s = start; /* start of the constant */
1422 register char *d = SvPVX(sv); /* destination for copies */
1423 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1424 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1425 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1426 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1427 UV uv;
4c3a8340
ST
1428#ifdef EBCDIC
1429 UV literal_endpoint = 0;
1430#endif
012bcf8d 1431
dff6d3cd 1432 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1433 PL_lex_inpat
b6d5fef8 1434 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1435 : "";
79072805 1436
2b9d42f0
NIS
1437 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1438 /* If we are doing a trans and we know we want UTF8 set expectation */
1439 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1440 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1441 }
1442
1443
79072805 1444 while (s < send || dorange) {
02aa26ce 1445 /* get transliterations out of the way (they're most literal) */
3280af22 1446 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1447 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1448 if (dorange) {
1ba5c669
JH
1449 I32 i; /* current expanded character */
1450 I32 min; /* first character in range */
1451 I32 max; /* last character in range */
02aa26ce 1452
2b9d42f0 1453 if (has_utf8) {
9d4ba2ae 1454 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1455 char *e = d++;
1456 while (e-- > c)
1457 *(e + 1) = *e;
25716404 1458 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1459 /* mark the range as done, and continue */
1460 dorange = FALSE;
1461 didrange = TRUE;
1462 continue;
1463 }
2b9d42f0 1464
95a20fc0 1465 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1466 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1467 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1468 d -= 2; /* eat the first char and the - */
1469
8ada0baa
JH
1470 min = (U8)*d; /* first char in range */
1471 max = (U8)d[1]; /* last char in range */
1472
c2e66d9e 1473 if (min > max) {
01ec43d0 1474 Perl_croak(aTHX_
d1573ac7 1475 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1476 (char)min, (char)max);
c2e66d9e
GS
1477 }
1478
c7f1f016 1479#ifdef EBCDIC
4c3a8340
ST
1480 if (literal_endpoint == 2 &&
1481 ((isLOWER(min) && isLOWER(max)) ||
1482 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1483 if (isLOWER(min)) {
1484 for (i = min; i <= max; i++)
1485 if (isLOWER(i))
db42d148 1486 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1487 } else {
1488 for (i = min; i <= max; i++)
1489 if (isUPPER(i))
db42d148 1490 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1491 }
1492 }
1493 else
1494#endif
1495 for (i = min; i <= max; i++)
eb160463 1496 *d++ = (char)i;
02aa26ce
NT
1497
1498 /* mark the range as done, and continue */
79072805 1499 dorange = FALSE;
01ec43d0 1500 didrange = TRUE;
4c3a8340
ST
1501#ifdef EBCDIC
1502 literal_endpoint = 0;
1503#endif
79072805 1504 continue;
4e553d73 1505 }
02aa26ce
NT
1506
1507 /* range begins (ignore - as first or last char) */
79072805 1508 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1509 if (didrange) {
1fafa243 1510 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1511 }
2b9d42f0 1512 if (has_utf8) {
25716404 1513 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1514 s++;
1515 continue;
1516 }
79072805
LW
1517 dorange = TRUE;
1518 s++;
01ec43d0
GS
1519 }
1520 else {
1521 didrange = FALSE;
4c3a8340
ST
1522#ifdef EBCDIC
1523 literal_endpoint = 0;
1524#endif
01ec43d0 1525 }
79072805 1526 }
02aa26ce
NT
1527
1528 /* if we get here, we're not doing a transliteration */
1529
0f5d15d6
IZ
1530 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1531 except for the last char, which will be done separately. */
3280af22 1532 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1533 if (s[2] == '#') {
e994fd66 1534 while (s+1 < send && *s != ')')
db42d148 1535 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1536 }
1537 else if (s[2] == '{' /* This should match regcomp.c */
1538 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1539 {
cc6b7395 1540 I32 count = 1;
0f5d15d6 1541 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1542 char c;
1543
d9f97599
GS
1544 while (count && (c = *regparse)) {
1545 if (c == '\\' && regparse[1])
1546 regparse++;
4e553d73 1547 else if (c == '{')
cc6b7395 1548 count++;
4e553d73 1549 else if (c == '}')
cc6b7395 1550 count--;
d9f97599 1551 regparse++;
cc6b7395 1552 }
e994fd66 1553 if (*regparse != ')')
5bdf89e7 1554 regparse--; /* Leave one char for continuation. */
0f5d15d6 1555 while (s < regparse)
db42d148 1556 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1557 }
748a9306 1558 }
02aa26ce
NT
1559
1560 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1561 else if (*s == '#' && PL_lex_inpat &&
1562 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1563 while (s+1 < send && *s != '\n')
db42d148 1564 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1565 }
02aa26ce 1566
5d1d4326 1567 /* check for embedded arrays
da6eedaa 1568 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1569 */
7e2040f0 1570 else if (*s == '@' && s[1]
5d1d4326 1571 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1572 break;
02aa26ce
NT
1573
1574 /* check for embedded scalars. only stop if we're sure it's a
1575 variable.
1576 */
79072805 1577 else if (*s == '$') {
3280af22 1578 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1579 break;
6002328a 1580 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1581 break; /* in regexp, $ might be tail anchor */
1582 }
02aa26ce 1583
2b9d42f0
NIS
1584 /* End of else if chain - OP_TRANS rejoin rest */
1585
02aa26ce 1586 /* backslashes */
79072805
LW
1587 if (*s == '\\' && s+1 < send) {
1588 s++;
02aa26ce
NT
1589
1590 /* some backslashes we leave behind */
c9f97d15 1591 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1592 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1593 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1594 continue;
1595 }
02aa26ce
NT
1596
1597 /* deprecate \1 in strings and substitution replacements */
3280af22 1598 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1599 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1600 {
599cee73 1601 if (ckWARN(WARN_SYNTAX))
9014280d 1602 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1603 *--s = '$';
1604 break;
1605 }
02aa26ce
NT
1606
1607 /* string-change backslash escapes */
3280af22 1608 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1609 --s;
1610 break;
1611 }
02aa26ce
NT
1612
1613 /* if we get here, it's either a quoted -, or a digit */
79072805 1614 switch (*s) {
02aa26ce
NT
1615
1616 /* quoted - in transliterations */
79072805 1617 case '-':
3280af22 1618 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1619 *d++ = *s++;
1620 continue;
1621 }
1622 /* FALL THROUGH */
1623 default:
11b8faa4 1624 {
041457d9
DM
1625 if (isALNUM(*s) &&
1626 *s != '_' &&
1627 ckWARN(WARN_MISC))
9014280d 1628 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1629 "Unrecognized escape \\%c passed through",
1630 *s);
1631 /* default action is to copy the quoted character */
f9a63242 1632 goto default_action;
11b8faa4 1633 }
02aa26ce
NT
1634
1635 /* \132 indicates an octal constant */
79072805
LW
1636 case '0': case '1': case '2': case '3':
1637 case '4': case '5': case '6': case '7':
ba210ebe 1638 {
53305cf1
NC
1639 I32 flags = 0;
1640 STRLEN len = 3;
1641 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1642 s += len;
1643 }
012bcf8d 1644 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1645
1646 /* \x24 indicates a hex constant */
79072805 1647 case 'x':
a0ed51b3
LW
1648 ++s;
1649 if (*s == '{') {
9d4ba2ae 1650 char* const e = strchr(s, '}');
a4c04bdc
NC
1651 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1652 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1653 STRLEN len;
355860ce 1654
53305cf1 1655 ++s;
adaeee49 1656 if (!e) {
a0ed51b3 1657 yyerror("Missing right brace on \\x{}");
355860ce 1658 continue;
ba210ebe 1659 }
53305cf1
NC
1660 len = e - s;
1661 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1662 s = e + 1;
a0ed51b3
LW
1663 }
1664 else {
ba210ebe 1665 {
53305cf1 1666 STRLEN len = 2;
a4c04bdc 1667 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1668 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1669 s += len;
1670 }
012bcf8d
GS
1671 }
1672
1673 NUM_ESCAPE_INSERT:
1674 /* Insert oct or hex escaped character.
301d3d20 1675 * There will always enough room in sv since such
db42d148 1676 * escapes will be longer than any UTF-8 sequence
301d3d20 1677 * they can end up as. */
ba7cea30 1678
c7f1f016
NIS
1679 /* We need to map to chars to ASCII before doing the tests
1680 to cover EBCDIC
1681 */
c4d5f83a 1682 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1683 if (!has_utf8 && uv > 255) {
301d3d20
JH
1684 /* Might need to recode whatever we have
1685 * accumulated so far if it contains any
1686 * hibit chars.
1687 *
1688 * (Can't we keep track of that and avoid
1689 * this rescan? --jhi)
012bcf8d 1690 */
c7f1f016 1691 int hicount = 0;
63cd0674
NIS
1692 U8 *c;
1693 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1694 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1695 hicount++;
db42d148 1696 }
012bcf8d 1697 }
63cd0674 1698 if (hicount) {
9d4ba2ae 1699 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1700 U8 *src, *dst;
1701 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1702 src = (U8 *)d - 1;
1703 dst = src+hicount;
1704 d += hicount;
cfd0369c 1705 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1706 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1707 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1708 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1709 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1710 }
1711 else {
63cd0674 1712 *dst-- = *src;
012bcf8d 1713 }
c7f1f016 1714 src--;
012bcf8d
GS
1715 }
1716 }
1717 }
1718
9aa983d2 1719 if (has_utf8 || uv > 255) {
9041c2e3 1720 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1721 has_utf8 = TRUE;
f9a63242
JH
1722 if (PL_lex_inwhat == OP_TRANS &&
1723 PL_sublex_info.sub_op) {
1724 PL_sublex_info.sub_op->op_private |=
1725 (PL_lex_repl ? OPpTRANS_FROM_UTF
1726 : OPpTRANS_TO_UTF);
f9a63242 1727 }
012bcf8d 1728 }
a0ed51b3 1729 else {
012bcf8d 1730 *d++ = (char)uv;
a0ed51b3 1731 }
012bcf8d
GS
1732 }
1733 else {
c4d5f83a 1734 *d++ = (char) uv;
a0ed51b3 1735 }
79072805 1736 continue;
02aa26ce 1737
b239daa5 1738 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1739 case 'N':
55eda711 1740 ++s;
423cee85
JH
1741 if (*s == '{') {
1742 char* e = strchr(s, '}');
155aba94 1743 SV *res;
423cee85 1744 STRLEN len;
cfd0369c 1745 const char *str;
4e553d73 1746
423cee85 1747 if (!e) {
5777a3f7 1748 yyerror("Missing right brace on \\N{}");
423cee85
JH
1749 e = s - 1;
1750 goto cont_scan;
1751 }
dbc0d4f2
JH
1752 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1753 /* \N{U+...} */
1754 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1755 PERL_SCAN_DISALLOW_PREFIX;
1756 s += 3;
1757 len = e - s;
1758 uv = grok_hex(s, &len, &flags, NULL);
1759 s = e + 1;
1760 goto NUM_ESCAPE_INSERT;
1761 }
55eda711
JH
1762 res = newSVpvn(s + 1, e - s - 1);
1763 res = new_constant( Nullch, 0, "charnames",
1764 res, Nullsv, "\\N{...}" );
f9a63242
JH
1765 if (has_utf8)
1766 sv_utf8_upgrade(res);
cfd0369c 1767 str = SvPV_const(res,len);
1c47067b
JH
1768#ifdef EBCDIC_NEVER_MIND
1769 /* charnames uses pack U and that has been
1770 * recently changed to do the below uni->native
1771 * mapping, so this would be redundant (and wrong,
1772 * the code point would be doubly converted).
1773 * But leave this in just in case the pack U change
1774 * gets revoked, but the semantics is still
1775 * desireable for charnames. --jhi */
cddc7ef4 1776 {
cfd0369c 1777 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1778
1779 if (uv < 0x100) {
89ebb4a3 1780 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1781
1782 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1783 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1784 str = SvPV_const(res, len);
cddc7ef4
JH
1785 }
1786 }
1787#endif
89491803 1788 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1789 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1790 SvCUR_set(sv, d - ostart);
1791 SvPOK_on(sv);
e4f3eed8 1792 *d = '\0';
f08d6ad9 1793 sv_utf8_upgrade(sv);
d2f449dd 1794 /* this just broke our allocation above... */
eb160463 1795 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1796 d = SvPVX(sv) + SvCUR(sv);
89491803 1797 has_utf8 = TRUE;
f08d6ad9 1798 }
eb160463 1799 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1800 const char * const odest = SvPVX_const(sv);
423cee85 1801
8973db79 1802 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1803 d = SvPVX(sv) + (d - odest);
1804 }
1805 Copy(str, d, len, char);
1806 d += len;
1807 SvREFCNT_dec(res);
1808 cont_scan:
1809 s = e + 1;
1810 }
1811 else
5777a3f7 1812 yyerror("Missing braces on \\N{}");
423cee85
JH
1813 continue;
1814
02aa26ce 1815 /* \c is a control character */
79072805
LW
1816 case 'c':
1817 s++;
961ce445 1818 if (s < send) {
ba210ebe 1819 U8 c = *s++;
c7f1f016
NIS
1820#ifdef EBCDIC
1821 if (isLOWER(c))
1822 c = toUPPER(c);
1823#endif
db42d148 1824 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1825 }
961ce445
RGS
1826 else {
1827 yyerror("Missing control char name in \\c");
1828 }
79072805 1829 continue;
02aa26ce
NT
1830
1831 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1832 case 'b':
db42d148 1833 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1834 break;
1835 case 'n':
db42d148 1836 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1837 break;
1838 case 'r':
db42d148 1839 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1840 break;
1841 case 'f':
db42d148 1842 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1843 break;
1844 case 't':
db42d148 1845 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1846 break;
34a3fe2a 1847 case 'e':
db42d148 1848 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1849 break;
1850 case 'a':
db42d148 1851 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1852 break;
02aa26ce
NT
1853 } /* end switch */
1854
79072805
LW
1855 s++;
1856 continue;
02aa26ce 1857 } /* end if (backslash) */
4c3a8340
ST
1858#ifdef EBCDIC
1859 else
1860 literal_endpoint++;
1861#endif
02aa26ce 1862
f9a63242 1863 default_action:
2b9d42f0
NIS
1864 /* If we started with encoded form, or already know we want it
1865 and then encode the next character */
1866 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1867 STRLEN len = 1;
9d4ba2ae
AL
1868 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1869 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1870 s += len;
1871 if (need > len) {
1872 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1873 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1874 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1875 }
1876 d = (char*)uvchr_to_utf8((U8*)d, uv);
1877 has_utf8 = TRUE;
1878 }
1879 else {
1880 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1881 }
02aa26ce
NT
1882 } /* while loop to process each character */
1883
1884 /* terminate the string and set up the sv */
79072805 1885 *d = '\0';
95a20fc0 1886 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1887 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1888 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1889
79072805 1890 SvPOK_on(sv);
9f4817db 1891 if (PL_encoding && !has_utf8) {
d0063567
DK
1892 sv_recode_to_utf8(sv, PL_encoding);
1893 if (SvUTF8(sv))
1894 has_utf8 = TRUE;
9f4817db 1895 }
2b9d42f0 1896 if (has_utf8) {
7e2040f0 1897 SvUTF8_on(sv);
2b9d42f0 1898 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1899 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1900 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1901 }
1902 }
79072805 1903
02aa26ce 1904 /* shrink the sv if we allocated more than we used */
79072805 1905 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1906 SvPV_shrink_to_cur(sv);
79072805 1907 }
02aa26ce 1908
9b599b2a 1909 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1910 if (s > PL_bufptr) {
1911 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1912 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1913 sv, Nullsv,
4e553d73 1914 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1915 ? "tr"
3280af22 1916 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1917 ? "s"
1918 : "qq")));
79072805 1919 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1920 } else
8990e307 1921 SvREFCNT_dec(sv);
79072805
LW
1922 return s;
1923}
1924
ffb4593c
NT
1925/* S_intuit_more
1926 * Returns TRUE if there's more to the expression (e.g., a subscript),
1927 * FALSE otherwise.
ffb4593c
NT
1928 *
1929 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1930 *
1931 * ->[ and ->{ return TRUE
1932 * { and [ outside a pattern are always subscripts, so return TRUE
1933 * if we're outside a pattern and it's not { or [, then return FALSE
1934 * if we're in a pattern and the first char is a {
1935 * {4,5} (any digits around the comma) returns FALSE
1936 * if we're in a pattern and the first char is a [
1937 * [] returns FALSE
1938 * [SOMETHING] has a funky algorithm to decide whether it's a
1939 * character class or not. It has to deal with things like
1940 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1941 * anything else returns TRUE
1942 */
1943
9cbb5ea2
GS
1944/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1945
76e3520e 1946STATIC int
cea2e8a9 1947S_intuit_more(pTHX_ register char *s)
79072805 1948{
3280af22 1949 if (PL_lex_brackets)
79072805
LW
1950 return TRUE;
1951 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1952 return TRUE;
1953 if (*s != '{' && *s != '[')
1954 return FALSE;
3280af22 1955 if (!PL_lex_inpat)
79072805
LW
1956 return TRUE;
1957
1958 /* In a pattern, so maybe we have {n,m}. */
1959 if (*s == '{') {
1960 s++;
1961 if (!isDIGIT(*s))
1962 return TRUE;
1963 while (isDIGIT(*s))
1964 s++;
1965 if (*s == ',')
1966 s++;
1967 while (isDIGIT(*s))
1968 s++;
1969 if (*s == '}')
1970 return FALSE;
1971 return TRUE;
1972
1973 }
1974
1975 /* On the other hand, maybe we have a character class */
1976
1977 s++;
1978 if (*s == ']' || *s == '^')
1979 return FALSE;
1980 else {
ffb4593c 1981 /* this is terrifying, and it works */
79072805
LW
1982 int weight = 2; /* let's weigh the evidence */
1983 char seen[256];
f27ffc4a 1984 unsigned char un_char = 255, last_un_char;
9d4ba2ae 1985 const char * const send = strchr(s,']');
3280af22 1986 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1987
1988 if (!send) /* has to be an expression */
1989 return TRUE;
1990
1991 Zero(seen,256,char);
1992 if (*s == '$')
1993 weight -= 3;
1994 else if (isDIGIT(*s)) {
1995 if (s[1] != ']') {
1996 if (isDIGIT(s[1]) && s[2] == ']')
1997 weight -= 10;
1998 }
1999 else
2000 weight -= 100;
2001 }
2002 for (; s < send; s++) {
2003 last_un_char = un_char;
2004 un_char = (unsigned char)*s;
2005 switch (*s) {
2006 case '@':
2007 case '&':
2008 case '$':
2009 weight -= seen[un_char] * 10;
7e2040f0 2010 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 2011 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
f776e3cd
NC
2012 if ((int)strlen(tmpbuf) > 1
2013 && gv_fetchpv(tmpbuf, 0, SVt_PV))
79072805
LW
2014 weight -= 100;
2015 else
2016 weight -= 10;
2017 }
2018 else if (*s == '$' && s[1] &&
93a17b20
LW
2019 strchr("[#!%*<>()-=",s[1])) {
2020 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2021 weight -= 10;
2022 else
2023 weight -= 1;
2024 }
2025 break;
2026 case '\\':
2027 un_char = 254;
2028 if (s[1]) {
93a17b20 2029 if (strchr("wds]",s[1]))
79072805
LW
2030 weight += 100;
2031 else if (seen['\''] || seen['"'])
2032 weight += 1;
93a17b20 2033 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2034 weight += 40;
2035 else if (isDIGIT(s[1])) {
2036 weight += 40;
2037 while (s[1] && isDIGIT(s[1]))
2038 s++;
2039 }
2040 }
2041 else
2042 weight += 100;
2043 break;
2044 case '-':
2045 if (s[1] == '\\')
2046 weight += 50;
93a17b20 2047 if (strchr("aA01! ",last_un_char))
79072805 2048 weight += 30;
93a17b20 2049 if (strchr("zZ79~",s[1]))
79072805 2050 weight += 30;
f27ffc4a
GS
2051 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2052 weight -= 5; /* cope with negative subscript */
79072805
LW
2053 break;
2054 default:
3792a11b
NC
2055 if (!isALNUM(last_un_char)
2056 && !(last_un_char == '$' || last_un_char == '@'
2057 || last_un_char == '&')
2058 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2059 char *d = tmpbuf;
2060 while (isALPHA(*s))
2061 *d++ = *s++;
2062 *d = '\0';
2063 if (keyword(tmpbuf, d - tmpbuf))
2064 weight -= 150;
2065 }
2066 if (un_char == last_un_char + 1)
2067 weight += 5;
2068 weight -= seen[un_char];
2069 break;
2070 }
2071 seen[un_char]++;
2072 }
2073 if (weight >= 0) /* probably a character class */
2074 return FALSE;
2075 }
2076
2077 return TRUE;
2078}
ffed7fef 2079
ffb4593c
NT
2080/*
2081 * S_intuit_method
2082 *
2083 * Does all the checking to disambiguate
2084 * foo bar
2085 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2086 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2087 *
2088 * First argument is the stuff after the first token, e.g. "bar".
2089 *
2090 * Not a method if bar is a filehandle.
2091 * Not a method if foo is a subroutine prototyped to take a filehandle.
2092 * Not a method if it's really "Foo $bar"
2093 * Method if it's "foo $bar"
2094 * Not a method if it's really "print foo $bar"
2095 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2096 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2097 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2098 * =>
2099 */
2100
76e3520e 2101STATIC int
62d55b22 2102S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e
LW
2103{
2104 char *s = start + (*start == '$');
3280af22 2105 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2106 STRLEN len;
2107 GV* indirgv;
2108
2109 if (gv) {
62d55b22 2110 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2111 return 0;
62d55b22
NC
2112 if (cv) {
2113 if (SvPOK(cv)) {
2114 const char *proto = SvPVX_const(cv);
2115 if (proto) {
2116 if (*proto == ';')
2117 proto++;
2118 if (*proto == '*')
2119 return 0;
2120 }
b6c543e3
IZ
2121 }
2122 } else
a0d0e21e
LW
2123 gv = 0;
2124 }
8903cb82 2125 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2126 /* start is the beginning of the possible filehandle/object,
2127 * and s is the end of it
2128 * tmpbuf is a copy of it
2129 */
2130
a0d0e21e 2131 if (*start == '$') {
3280af22 2132 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2133 return 0;
2134 s = skipspace(s);
3280af22
NIS
2135 PL_bufptr = start;
2136 PL_expect = XREF;
a0d0e21e
LW
2137 return *s == '(' ? FUNCMETH : METHOD;
2138 }
2139 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2140 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2141 len -= 2;
2142 tmpbuf[len] = '\0';
2143 goto bare_package;
2144 }
f776e3cd 2145 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
8ebc5c01 2146 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2147 return 0;
2148 /* filehandle or package name makes it a method */
89bfa8cd 2149 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2150 s = skipspace(s);
3280af22 2151 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2152 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2153 bare_package:
3280af22 2154 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2155 newSVpvn(tmpbuf,len));
3280af22
NIS
2156 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2157 PL_expect = XTERM;
a0d0e21e 2158 force_next(WORD);
3280af22 2159 PL_bufptr = s;
a0d0e21e
LW
2160 return *s == '(' ? FUNCMETH : METHOD;
2161 }
2162 }
2163 return 0;
2164}
2165
ffb4593c
NT
2166/*
2167 * S_incl_perldb
2168 * Return a string of Perl code to load the debugger. If PERL5DB
2169 * is set, it will return the contents of that, otherwise a
2170 * compile-time require of perl5db.pl.
2171 */
2172
bfed75c6 2173STATIC const char*
cea2e8a9 2174S_incl_perldb(pTHX)
a0d0e21e 2175{
3280af22 2176 if (PL_perldb) {
9d4ba2ae 2177 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2178
2179 if (pdb)
2180 return pdb;
93189314 2181 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2182 return "BEGIN { require 'perl5db.pl' }";
2183 }
2184 return "";
2185}
2186
2187
16d20bd9 2188/* Encoded script support. filter_add() effectively inserts a
4e553d73 2189 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2190 * Note that the filter function only applies to the current source file
2191 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2192 *
2193 * The datasv parameter (which may be NULL) can be used to pass
2194 * private data to this instance of the filter. The filter function
2195 * can recover the SV using the FILTER_DATA macro and use it to
2196 * store private buffers and state information.
2197 *
2198 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2199 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2200 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2201 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2202 * private use must be set using malloc'd pointers.
2203 */
16d20bd9
AD
2204
2205SV *
864dbfa3 2206Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2207{
f4c556ac
GS
2208 if (!funcp)
2209 return Nullsv;
2210
3280af22
NIS
2211 if (!PL_rsfp_filters)
2212 PL_rsfp_filters = newAV();
16d20bd9 2213 if (!datasv)
8c52afec 2214 datasv = NEWSV(255,0);
862a34c6 2215 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2216 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2217 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2218 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2219 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2220 av_unshift(PL_rsfp_filters, 1);
2221 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2222 return(datasv);
2223}
4e553d73 2224
16d20bd9
AD
2225
2226/* Delete most recently added instance of this filter function. */
a0d0e21e 2227void
864dbfa3 2228Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2229{
e0c19803 2230 SV *datasv;
24801a4b 2231
33073adb 2232#ifdef DEBUGGING
8141890a 2233 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2234#endif
3280af22 2235 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2236 return;
2237 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2238 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2239 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2240 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2241 IoANY(datasv) = (void *)NULL;
3280af22 2242 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2243
16d20bd9
AD
2244 return;
2245 }
2246 /* we need to search for the correct entry and clear it */
cea2e8a9 2247 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2248}
2249
2250
1de9afcd
RGS
2251/* Invoke the idxth filter function for the current rsfp. */
2252/* maxlen 0 = read one text line */
16d20bd9 2253I32
864dbfa3 2254Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2255{
16d20bd9
AD
2256 filter_t funcp;
2257 SV *datasv = NULL;
e50aee73 2258
3280af22 2259 if (!PL_rsfp_filters)
16d20bd9 2260 return -1;
1de9afcd 2261 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2262 /* Provide a default input filter to make life easy. */
2263 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2264 DEBUG_P(PerlIO_printf(Perl_debug_log,
2265 "filter_read %d: from rsfp\n", idx));
4e553d73 2266 if (maxlen) {
16d20bd9
AD
2267 /* Want a block */
2268 int len ;
f54cb97a 2269 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2270
2271 /* ensure buf_sv is large enough */
eb160463 2272 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2273 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2274 if (PerlIO_error(PL_rsfp))
37120919
AD
2275 return -1; /* error */
2276 else
2277 return 0 ; /* end of file */
2278 }
16d20bd9
AD
2279 SvCUR_set(buf_sv, old_len + len) ;
2280 } else {
2281 /* Want a line */
3280af22
NIS
2282 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2283 if (PerlIO_error(PL_rsfp))
37120919
AD
2284 return -1; /* error */
2285 else
2286 return 0 ; /* end of file */
2287 }
16d20bd9
AD
2288 }
2289 return SvCUR(buf_sv);
2290 }
2291 /* Skip this filter slot if filter has been deleted */
1de9afcd 2292 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2293 DEBUG_P(PerlIO_printf(Perl_debug_log,
2294 "filter_read %d: skipped (filter deleted)\n",
2295 idx));
16d20bd9
AD
2296 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2297 }
2298 /* Get function pointer hidden within datasv */
8141890a 2299 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2300 DEBUG_P(PerlIO_printf(Perl_debug_log,
2301 "filter_read %d: via function %p (%s)\n",
cfd0369c 2302 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2303 /* Call function. The function is expected to */
2304 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2305 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2306 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2307}
2308
76e3520e 2309STATIC char *
cea2e8a9 2310S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2311{
c39cd008 2312#ifdef PERL_CR_FILTER
3280af22 2313 if (!PL_rsfp_filters) {
c39cd008 2314 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2315 }
2316#endif
3280af22 2317 if (PL_rsfp_filters) {
55497cff
PP
2318 if (!append)
2319 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2320 if (FILTER_READ(0, sv, 0) > 0)
2321 return ( SvPVX(sv) ) ;
2322 else
2323 return Nullch ;
2324 }
9d116dd7 2325 else
fd049845 2326 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2327}
2328
01ec43d0 2329STATIC HV *
7fc63493 2330S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2331{
2332 GV *gv;
2333
01ec43d0 2334 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2335 return PL_curstash;
2336
2337 if (len > 2 &&
2338 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
f776e3cd 2339 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
01ec43d0
GS
2340 {
2341 return GvHV(gv); /* Foo:: */
def3634b
GS
2342 }
2343
2344 /* use constant CLASS => 'MyClass' */
f776e3cd 2345 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
def3634b
GS
2346 SV *sv;
2347 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2348 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2349 }
2350 }
2351
2352 return gv_stashpv(pkgname, FALSE);
2353}
a0d0e21e 2354
468aa647 2355STATIC char *
cc6ed77d 2356S_tokenize_use(pTHX_ int is_use, char *s) {
468aa647
RGS
2357 if (PL_expect != XSTATE)
2358 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2359 is_use ? "use" : "no"));
2360 s = skipspace(s);
2361 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2362 s = force_version(s, TRUE);
2363 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2364 PL_nextval[PL_nexttoke].opval = Nullop;
2365 force_next(WORD);
2366 }
2367 else if (*s == 'v') {
2368 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2369 s = force_version(s, FALSE);
2370 }
2371 }
2372 else {
2373 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2374 s = force_version(s, FALSE);
2375 }
2376 yylval.ival = is_use;
2377 return s;
2378}
748a9306 2379#ifdef DEBUGGING
27da23d5 2380 static const char* const exp_name[] =
09bef843 2381 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2382 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2383 };
748a9306 2384#endif
463ee0b2 2385
02aa26ce
NT
2386/*
2387 yylex
2388
2389 Works out what to call the token just pulled out of the input
2390 stream. The yacc parser takes care of taking the ops we return and
2391 stitching them into a tree.
2392
2393 Returns:
2394 PRIVATEREF
2395
2396 Structure:
2397 if read an identifier
2398 if we're in a my declaration
2399 croak if they tried to say my($foo::bar)
2400 build the ops for a my() declaration
2401 if it's an access to a my() variable
2402 are we in a sort block?
2403 croak if my($a); $a <=> $b
2404 build ops for access to a my() variable
2405 if in a dq string, and they've said @foo and we can't find @foo
2406 croak
2407 build ops for a bareword
2408 if we already built the token before, use it.
2409*/
2410
20141f0e 2411
dba4d153
JH
2412#ifdef __SC__
2413#pragma segment Perl_yylex
2414#endif
dba4d153 2415int
dba4d153 2416Perl_yylex(pTHX)
20141f0e 2417{
3afc138a 2418 register char *s = PL_bufptr;
378cc40b 2419 register char *d;
463ee0b2 2420 STRLEN len;
aa7440fb 2421 bool bof = FALSE;
a687059c 2422
bbf60fe6 2423 DEBUG_T( {
396482e1 2424 SV* tmp = newSVpvs("");
b6007c36
DM
2425 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2426 (IV)CopLINE(PL_curcop),
2427 lex_state_names[PL_lex_state],
2428 exp_name[PL_expect],
2429 pv_display(tmp, s, strlen(s), 0, 60));
2430 SvREFCNT_dec(tmp);
bbf60fe6 2431 } );
02aa26ce 2432 /* check if there's an identifier for us to look at */
ba979b31 2433 if (PL_pending_ident)
bbf60fe6 2434 return REPORT(S_pending_ident(aTHX));
bbce6d69 2435
02aa26ce
NT
2436 /* no identifier pending identification */
2437
3280af22 2438 switch (PL_lex_state) {
79072805
LW
2439#ifdef COMMENTARY
2440 case LEX_NORMAL: /* Some compilers will produce faster */
2441 case LEX_INTERPNORMAL: /* code if we comment these out. */
2442 break;
2443#endif
2444
09bef843 2445 /* when we've already built the next token, just pull it out of the queue */
79072805 2446 case LEX_KNOWNEXT:
3280af22
NIS
2447 PL_nexttoke--;
2448 yylval = PL_nextval[PL_nexttoke];
2449 if (!PL_nexttoke) {
2450 PL_lex_state = PL_lex_defer;
2451 PL_expect = PL_lex_expect;
2452 PL_lex_defer = LEX_NORMAL;
463ee0b2 2453 }
bbf60fe6 2454 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2455
02aa26ce 2456 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2457 when we get here, PL_bufptr is at the \
02aa26ce 2458 */
79072805
LW
2459 case LEX_INTERPCASEMOD:
2460#ifdef DEBUGGING
3280af22 2461 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2462 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2463#endif
02aa26ce 2464 /* handle \E or end of string */
3280af22 2465 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2466 /* if at a \E */
3280af22 2467 if (PL_lex_casemods) {
f54cb97a 2468 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2469 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2470
3792a11b
NC
2471 if (PL_bufptr != PL_bufend
2472 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2473 PL_bufptr += 2;
2474 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2475 }
bbf60fe6 2476 return REPORT(')');
79072805 2477 }
3280af22
NIS
2478 if (PL_bufptr != PL_bufend)
2479 PL_bufptr += 2;
2480 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2481 return yylex();
79072805
LW
2482 }
2483 else {
607df283 2484 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2485 "### Saw case modifier\n"); });
3280af22 2486 s = PL_bufptr + 1;
6e909404
JH
2487 if (s[1] == '\\' && s[2] == 'E') {
2488 PL_bufptr = s + 3;
2489 PL_lex_state = LEX_INTERPCONCAT;
2490 return yylex();
a0d0e21e 2491 }
6e909404 2492 else {
90771dc0 2493 I32 tmp;
6e909404
JH
2494 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2495 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2496 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2497 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2498 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2499 return REPORT(')');
6e909404
JH
2500 }
2501 if (PL_lex_casemods > 10)
2502 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2503 PL_lex_casestack[PL_lex_casemods++] = *s;
2504 PL_lex_casestack[PL_lex_casemods] = '\0';
2505 PL_lex_state = LEX_INTERPCONCAT;
2506 PL_nextval[PL_nexttoke].ival = 0;
2507 force_next('(');
2508 if (*s == 'l')
2509 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2510 else if (*s == 'u')
2511 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2512 else if (*s == 'L')
2513 PL_nextval[PL_nexttoke].ival = OP_LC;
2514 else if (*s == 'U')
2515 PL_nextval[PL_nexttoke].ival = OP_UC;
2516 else if (*s == 'Q')
2517 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2518 else
2519 Perl_croak(aTHX_ "panic: yylex");
2520 PL_bufptr = s + 1;
a0d0e21e 2521 }
79072805 2522 force_next(FUNC);
3280af22
NIS
2523 if (PL_lex_starts) {
2524 s = PL_bufptr;
2525 PL_lex_starts = 0;
131b3ad0
DM
2526 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2527 if (PL_lex_casemods == 1 && PL_lex_inpat)
2528 OPERATOR(',');
2529 else
2530 Aop(OP_CONCAT);
79072805
LW
2531 }
2532 else
cea2e8a9 2533 return yylex();
79072805
LW
2534 }
2535
55497cff 2536 case LEX_INTERPPUSH:
bbf60fe6 2537 return REPORT(sublex_push());
55497cff 2538
79072805 2539 case LEX_INTERPSTART:
3280af22 2540 if (PL_bufptr == PL_bufend)
bbf60fe6 2541 return REPORT(sublex_done());
607df283 2542 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2543 "### Interpolated variable\n"); });
3280af22
NIS
2544 PL_expect = XTERM;
2545 PL_lex_dojoin = (*PL_bufptr == '@');
2546 PL_lex_state = LEX_INTERPNORMAL;
2547 if (PL_lex_dojoin) {
2548 PL_nextval[PL_nexttoke].ival = 0;
79072805 2549 force_next(',');
a0d0e21e 2550 force_ident("\"", '$');
3280af22 2551 PL_nextval[PL_nexttoke].ival = 0;
79072805 2552 force_next('$');
3280af22 2553 PL_nextval[PL_nexttoke].ival = 0;
79072805 2554 force_next('(');
3280af22 2555 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2556 force_next(FUNC);
2557 }
3280af22
NIS
2558 if (PL_lex_starts++) {
2559 s = PL_bufptr;
131b3ad0
DM
2560 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2561 if (!PL_lex_casemods && PL_lex_inpat)
2562 OPERATOR(',');
2563 else
2564 Aop(OP_CONCAT);
79072805 2565 }
cea2e8a9 2566 return yylex();
79072805
LW
2567
2568 case LEX_INTERPENDMAYBE:
3280af22
NIS
2569 if (intuit_more(PL_bufptr)) {
2570 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2571 break;
2572 }
2573 /* FALL THROUGH */
2574
2575 case LEX_INTERPEND:
3280af22
NIS
2576 if (PL_lex_dojoin) {
2577 PL_lex_dojoin = FALSE;
2578 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2579 return REPORT(')');
79072805 2580 }
43a16006 2581 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2582 && SvEVALED(PL_lex_repl))
43a16006 2583 {
e9fa98b2 2584 if (PL_bufptr != PL_bufend)
cea2e8a9 2585 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2586 PL_lex_repl = Nullsv;
2587 }
79072805
LW
2588 /* FALLTHROUGH */
2589 case LEX_INTERPCONCAT:
2590#ifdef DEBUGGING
3280af22 2591 if (PL_lex_brackets)
cea2e8a9 2592 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2593#endif
3280af22 2594 if (PL_bufptr == PL_bufend)
bbf60fe6 2595 return REPORT(sublex_done());
79072805 2596
3280af22
NIS
2597 if (SvIVX(PL_linestr) == '\'') {
2598 SV *sv = newSVsv(PL_linestr);
2599 if (!PL_lex_inpat)
76e3520e 2600 sv = tokeq(sv);
3280af22 2601 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2602 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2603 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2604 s = PL_bufend;
79072805
LW
2605 }
2606 else {
3280af22 2607 s = scan_const(PL_bufptr);
79072805 2608 if (*s == '\\')
3280af22 2609 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2610 else
3280af22 2611 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2612 }
2613
3280af22
NIS
2614 if (s != PL_bufptr) {
2615 PL_nextval[PL_nexttoke] = yylval;
2616 PL_expect = XTERM;
79072805 2617 force_next(THING);
131b3ad0
DM
2618 if (PL_lex_starts++) {
2619 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2620 if (!PL_lex_casemods && PL_lex_inpat)
2621 OPERATOR(',');
2622 else
2623 Aop(OP_CONCAT);
2624 }
79072805 2625 else {
3280af22 2626 PL_bufptr = s;
cea2e8a9 2627 return yylex();
79072805
LW
2628 }
2629 }
2630
cea2e8a9 2631 return yylex();
a0d0e21e 2632 case LEX_FORMLINE:
3280af22
NIS
2633 PL_lex_state = LEX_NORMAL;
2634 s = scan_formline(PL_bufptr);
2635 if (!PL_lex_formbrack)
a0d0e21e
LW
2636 goto rightbracket;
2637 OPERATOR(';');
79072805
LW
2638 }
2639
3280af22
NIS
2640 s = PL_bufptr;
2641 PL_oldoldbufptr = PL_oldbufptr;
2642 PL_oldbufptr = s;
463ee0b2
LW
2643
2644 retry:
378cc40b
LW
2645 switch (*s) {
2646 default:
7e2040f0 2647 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2648 goto keylookup;
cea2e8a9 2649 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2650 case 4:
2651 case 26:
2652 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2653 case 0:
3280af22
NIS
2654 if (!PL_rsfp) {
2655 PL_last_uni = 0;
2656 PL_last_lop = 0;
c5ee2135 2657 if (PL_lex_brackets) {
0bd48802
AL
2658 yyerror(PL_lex_formbrack
2659 ? "Format not terminated"
2660 : "Missing right curly or square bracket");
c5ee2135 2661 }
4e553d73 2662 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2663 "### Tokener got EOF\n");
5f80b19c 2664 } );
79072805 2665 TOKEN(0);
463ee0b2 2666 }
3280af22 2667 if (s++ < PL_bufend)
a687059c 2668 goto retry; /* ignore stray nulls */
3280af22
NIS
2669 PL_last_uni = 0;
2670 PL_last_lop = 0;
2671 if (!PL_in_eval && !PL_preambled) {
2672 PL_preambled = TRUE;
2673 sv_setpv(PL_linestr,incl_perldb());
2674 if (SvCUR(PL_linestr))
396482e1 2675 sv_catpvs(PL_linestr,";");
3280af22
NIS
2676 if (PL_preambleav){
2677 while(AvFILLp(PL_preambleav) >= 0) {
2678 SV *tmpsv = av_shift(PL_preambleav);
2679 sv_catsv(PL_linestr, tmpsv);
396482e1 2680 sv_catpvs(PL_linestr, ";");
91b7def8
PP
2681 sv_free(tmpsv);
2682 }
3280af22
NIS
2683 sv_free((SV*)PL_preambleav);
2684 PL_preambleav = NULL;
91b7def8 2685 }
3280af22 2686 if (PL_minus_n || PL_minus_p) {
396482e1 2687 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2688 if (PL_minus_l)
396482e1 2689 sv_catpvs(PL_linestr,"chomp;");
3280af22 2690 if (PL_minus_a) {
3280af22 2691 if (PL_minus_F) {
3792a11b
NC
2692 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2693 || *PL_splitstr == '"')
3280af22 2694 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2695 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2696 else {
c8ef6a4b
NC
2697 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2698 bytes can be used as quoting characters. :-) */
dd374669 2699 const char *splits = PL_splitstr;
91d456ae 2700 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2701 do {
2702 /* Need to \ \s */
dd374669
AL
2703 if (*splits == '\\')
2704 sv_catpvn(PL_linestr, splits, 1);
2705 sv_catpvn(PL_linestr, splits, 1);
2706 } while (*splits++);
48c4c863
NC
2707 /* This loop will embed the trailing NUL of
2708 PL_linestr as the last thing it does before
2709 terminating. */
396482e1 2710 sv_catpvs(PL_linestr, ");");
54310121 2711 }
2304df62
AD
2712 }
2713 else
396482e1 2714 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2715 }
79072805 2716 }
bc9b29db 2717 if (PL_minus_E)
396482e1
GA
2718 sv_catpvs(PL_linestr,"use feature ':5.10';");
2719 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2720 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2721 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2722 PL_last_lop = PL_last_uni = Nullch;
3280af22 2723 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2724 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2725
2726 sv_upgrade(sv, SVt_PVMG);
3280af22 2727 sv_setsv(sv,PL_linestr);
0ac0412a 2728 (void)SvIOK_on(sv);
45977657 2729 SvIV_set(sv, 0);
36c7798d 2730 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2731 }
79072805 2732 goto retry;
a687059c 2733 }
e929a76b 2734 do {
aa7440fb 2735 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2736 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2737 fake_eof:
2738 if (PL_rsfp) {
2739 if (PL_preprocess && !PL_in_eval)
2740 (void)PerlProc_pclose(PL_rsfp);
2741 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2742 PerlIO_clearerr(PL_rsfp);
2743 else
2744 (void)PerlIO_close(PL_rsfp);
2745 PL_rsfp = Nullfp;
2746 PL_doextract = FALSE;
2747 }
2748 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2749 sv_setpv(PL_linestr,PL_minus_p
2750 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2751 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2752 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2753 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2754 PL_minus_n = PL_minus_p = 0;
2755 goto retry;
2756 }
2757 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2758 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2759 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2760 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2761 }
7aa207d6
JH
2762 /* If it looks like the start of a BOM or raw UTF-16,
2763 * check if it in fact is. */
2764 else if (bof &&
2765 (*s == 0 ||
2766 *(U8*)s == 0xEF ||
2767 *(U8*)s >= 0xFE ||
2768 s[1] == 0)) {
226017aa 2769#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2770# ifdef __GNU_LIBRARY__
2771# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2772# define FTELL_FOR_PIPE_IS_BROKEN
2773# endif
e3f494f1
JH
2774# else
2775# ifdef __GLIBC__
2776# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2777# define FTELL_FOR_PIPE_IS_BROKEN
2778# endif
2779# endif
226017aa
DD
2780# endif
2781#endif
2782#ifdef FTELL_FOR_PIPE_IS_BROKEN
2783 /* This loses the possibility to detect the bof
2784 * situation on perl -P when the libc5 is being used.
2785 * Workaround? Maybe attach some extra state to PL_rsfp?
2786 */
2787 if (!PL_preprocess)
7e28d3af 2788 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2789#else
eb160463 2790 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2791#endif
7e28d3af 2792 if (bof) {
3280af22 2793 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2794 s = swallow_bom((U8*)s);
e929a76b 2795 }
378cc40b 2796 }
3280af22 2797 if (PL_doextract) {
a0d0e21e
LW
2798 /* Incest with pod. */
2799 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2800 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2801 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2802 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2803 PL_last_lop = PL_last_uni = Nullch;
3280af22 2804 PL_doextract = FALSE;
a0d0e21e 2805 }
4e553d73 2806 }
463ee0b2 2807 incline(s);
3280af22
NIS
2808 } while (PL_doextract);
2809 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2810 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2811 SV * const sv = NEWSV(85,0);
a687059c 2812
93a17b20 2813 sv_upgrade(sv, SVt_PVMG);
3280af22 2814 sv_setsv(sv,PL_linestr);
0ac0412a 2815 (void)SvIOK_on(sv);
45977657 2816 SvIV_set(sv, 0);
36c7798d 2817 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2818 }
3280af22 2819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2820 PL_last_lop = PL_last_uni = Nullch;
57843af0 2821 if (CopLINE(PL_curcop) == 1) {
3280af22 2822 while (s < PL_bufend && isSPACE(*s))
79072805 2823 s++;
a0d0e21e 2824 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2825 s++;
44a8e56a 2826 d = Nullch;
3280af22 2827 if (!PL_in_eval) {
44a8e56a
PP
2828 if (*s == '#' && *(s+1) == '!')
2829 d = s + 2;
2830#ifdef ALTERNATE_SHEBANG
2831 else {
bfed75c6 2832 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2833 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2834 d = s + (sizeof(as) - 1);
2835 }
2836#endif /* ALTERNATE_SHEBANG */
2837 }
2838 if (d) {
b8378b72 2839 char *ipath;
774d564b 2840 char *ipathend;
b8378b72 2841
774d564b 2842 while (isSPACE(*d))
b8378b72
CS
2843 d++;
2844 ipath = d;
774d564b
PP
2845 while (*d && !isSPACE(*d))
2846 d++;
2847 ipathend = d;
2848
2849#ifdef ARG_ZERO_IS_SCRIPT
2850 if (ipathend > ipath) {
2851 /*
2852 * HP-UX (at least) sets argv[0] to the script name,
2853 * which makes $^X incorrect. And Digital UNIX and Linux,
2854 * at least, set argv[0] to the basename of the Perl
2855 * interpreter. So, having found "#!", we'll set it right.
2856 */
f776e3cd
NC
2857 SV * const x
2858 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
774d564b 2859 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2860 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2861 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2862 SvSETMAGIC(x);
2863 }
556c1dec
JH
2864 else {
2865 STRLEN blen;
2866 STRLEN llen;
cfd0369c 2867 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2868 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2869 if (llen < blen) {
2870 bstart += blen - llen;
2871 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2872 sv_setpvn(x, ipath, ipathend - ipath);
2873 SvSETMAGIC(x);
2874 }
2875 }
2876 }
774d564b 2877 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2878 }
774d564b 2879#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2880
2881 /*
2882 * Look for options.
2883 */
748a9306 2884 d = instr(s,"perl -");
84e30d1a 2885 if (!d) {
748a9306 2886 d = instr(s,"perl");
84e30d1a
GS
2887#if defined(DOSISH)
2888 /* avoid getting into infinite loops when shebang
2889 * line contains "Perl" rather than "perl" */
2890 if (!d) {
2891 for (d = ipathend-4; d >= ipath; --d) {
2892 if ((*d == 'p' || *d == 'P')
2893 && !ibcmp(d, "perl", 4))
2894 {
2895 break;
2896 }
2897 }
2898 if (d < ipath)
2899 d = Nullch;
2900 }
2901#endif
2902 }
44a8e56a
PP
2903#ifdef ALTERNATE_SHEBANG
2904 /*
2905 * If the ALTERNATE_SHEBANG on this system starts with a
2906 * character that can be part of a Perl expression, then if
2907 * we see it but not "perl", we're probably looking at the
2908 * start of Perl code, not a request to hand off to some
2909 * other interpreter. Similarly, if "perl" is there, but
2910 * not in the first 'word' of the line, we assume the line
2911 * contains the start of the Perl program.
44a8e56a
PP
2912 */
2913 if (d && *s != '#') {
f54cb97a 2914 const char *c = ipath;
44a8e56a
PP
2915 while (*c && !strchr("; \t\r\n\f\v#", *c))
2916 c++;
2917 if (c < d)
2918 d = Nullch; /* "perl" not in first word; ignore */
2919 else
2920 *s = '#'; /* Don't try to parse shebang line */
2921 }
774d564b 2922#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2923#ifndef MACOS_TRADITIONAL
748a9306 2924 if (!d &&
44a8e56a 2925 *s == '#' &&
774d564b 2926 ipathend > ipath &&
3280af22 2927 !PL_minus_c &&
748a9306 2928 !instr(s,"indir") &&
3280af22 2929 instr(PL_origargv[0],"perl"))
748a9306 2930 {
27da23d5 2931 dVAR;
9f68db38 2932 char **newargv;
9f68db38 2933
774d564b
PP
2934 *ipathend = '\0';
2935 s = ipathend + 1;
3280af22 2936 while (s < PL_bufend && isSPACE(*s))
9f68db38 2937 s++;
3280af22 2938 if (s < PL_bufend) {
a02a5408 2939 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2940 newargv[1] = s;
3280af22 2941 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2942 s++;
2943 *s = '\0';
3280af22 2944 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2945 }
2946 else
3280af22 2947 newargv = PL_origargv;
774d564b 2948 newargv[0] = ipath;
b35112e7 2949 PERL_FPU_PRE_EXEC
b4748376 2950 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2951 PERL_FPU_POST_EXEC
cea2e8a9 2952 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2953 }
bf4acbe4 2954#endif
748a9306 2955 if (d) {
f54cb97a
AL
2956 const U32 oldpdb = PL_perldb;
2957 const bool oldn = PL_minus_n;
2958 const bool oldp = PL_minus_p;
748a9306
LW
2959
2960 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2961 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2962
2963 if (*d++ == '-') {
f54cb97a 2964 const bool switches_done = PL_doswitches;
8cc95fdb 2965 do {
3ffe3ee4 2966 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2967 const char * const m = d;
8cc95fdb 2968 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2969 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2970 (int)(d - m), m);
2971 }
2972 d = moreswitches(d);
2973 } while (d);
f0b2cf55
YST
2974 if (PL_doswitches && !switches_done) {
2975 int argc = PL_origargc;
2976 char **argv = PL_origargv;
2977 do {
2978 argc--,argv++;
2979 } while (argc && argv[0][0] == '-' && argv[0][1]);
2980 init_argv_symbols(argc,argv);
2981 }
155aba94
GS
2982 if ((PERLDB_LINE && !oldpdb) ||
2983 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2984 /* if we have already added "LINE: while (<>) {",
2985 we must not do it again */
748a9306 2986 {
c69006e4 2987 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2988 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2989 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2990 PL_last_lop = PL_last_uni = Nullch;
3280af22 2991 PL_preambled = FALSE;
84902520 2992 if (PERLDB_LINE)
3280af22 2993 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2994 goto retry;
2995 }
a0d0e21e 2996 }
79072805 2997 }
9f68db38 2998 }
79072805 2999 }
3280af22
NIS
3000 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3001 PL_bufptr = s;
3002 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3003 return yylex();
ae986130 3004 }
378cc40b 3005 goto retry;
4fdae800 3006 case '\r':
6a27c188 3007#ifdef PERL_STRICT_CR
cea2e8a9 3008 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3009 Perl_croak(aTHX_
cc507455 3010 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3011#endif
4fdae800 3012 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3013#ifdef MACOS_TRADITIONAL
3014 case '\312':
3015#endif
378cc40b
LW
3016 s++;
3017 goto retry;
378cc40b 3018 case '#':
e929a76b 3019 case '\n':
3280af22 3020 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3021 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3022 /* handle eval qq[#line 1 "foo"\n ...] */
3023 CopLINE_dec(PL_curcop);
3024 incline(s);
3025 }
3280af22 3026 d = PL_bufend;
a687059c 3027 while (s < d && *s != '\n')
378cc40b 3028 s++;
0f85fab0 3029 if (s < d)
378cc40b 3030 s++;
78c267c1 3031 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3032 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3033 incline(s);
3280af22
NIS
3034 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3035 PL_bufptr = s;
3036 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3037 return yylex();
a687059c 3038 }
378cc40b 3039 }
a687059c 3040 else {
378cc40b 3041 *s = '\0';
3280af22 3042 PL_bufend = s;
a687059c 3043 }
378cc40b
LW
3044 goto retry;
3045 case '-':
79072805 3046 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3047 I32 ftst = 0;
90771dc0 3048 char tmp;
e5edeb50 3049
378cc40b 3050 s++;
3280af22 3051 PL_bufptr = s;
748a9306
LW
3052 tmp = *s++;
3053
bf4acbe4 3054 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3055 s++;
3056
3057 if (strnEQ(s,"=>",2)) {
3280af22 3058 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3059 DEBUG_T( { S_printbuf(aTHX_
3060 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3061 } );
748a9306
LW
3062 OPERATOR('-'); /* unary minus */
3063 }
3280af22 3064 PL_last_uni = PL_oldbufptr;
748a9306 3065 switch (tmp) {
e5edeb50
JH
3066 case 'r': ftst = OP_FTEREAD; break;
3067 case 'w': ftst = OP_FTEWRITE; break;
3068 case 'x': ftst = OP_FTEEXEC; break;
3069 case 'o': ftst = OP_FTEOWNED; break;
3070 case 'R': ftst = OP_FTRREAD; break;
3071 case 'W': ftst = OP_FTRWRITE; break;
3072 case 'X': ftst = OP_FTREXEC; break;
3073 case 'O': ftst = OP_FTROWNED; break;
3074 case 'e': ftst = OP_FTIS; break;
3075 case 'z': ftst = OP_FTZERO; break;
3076 case 's': ftst = OP_FTSIZE; break;
3077 case 'f': ftst = OP_FTFILE; break;
3078 case 'd': ftst = OP_FTDIR; break;
3079 case 'l': ftst = OP_FTLINK; break;
3080 case 'p': ftst = OP_FTPIPE; break;
3081 case 'S': ftst = OP_FTSOCK; break;
3082 case 'u': ftst = OP_FTSUID; break;
3083 case 'g': ftst = OP_FTSGID; break;
3084 case 'k': ftst = OP_FTSVTX; break;
3085 case 'b': ftst = OP_FTBLK; break;
3086 case 'c': ftst = OP_FTCHR; break;
3087 case 't': ftst = OP_FTTTY; break;
3088 case 'T': ftst = OP_FTTEXT; break;
3089 case 'B': ftst = OP_FTBINARY; break;
3090 case 'M': case 'A': case 'C':
f776e3cd 3091 gv_fetchpv("\024",GV_ADD, SVt_PV);
e5edeb50
JH
3092 switch (tmp) {
3093 case 'M': ftst = OP_FTMTIME; break;
3094 case 'A': ftst = OP_FTATIME; break;
3095 case 'C': ftst = OP_FTCTIME; break;
3096 default: break;
3097 }
3098 break;
378cc40b 3099 default:
378cc40b
LW
3100 break;
3101 }
e5edeb50 3102 if (ftst) {
eb160463 3103 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3104 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3105 "### Saw file test %c\n", (int)tmp);
5f80b19c 3106 } );
e5edeb50
JH
3107 FTST(ftst);
3108 }
3109 else {
3110 /* Assume it was a minus followed by a one-letter named
3111 * subroutine call (or a -bareword), then. */
95c31fe3 3112 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3113 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3114 (int) tmp);
5f80b19c 3115 } );
3cf7b4c4 3116 s = --PL_bufptr;
e5edeb50 3117 }
378cc40b 3118 }
90771dc0
NC
3119 {
3120 const char tmp = *s++;
3121 if (*s == tmp) {
3122 s++;
3123 if (PL_expect == XOPERATOR)
3124 TERM(POSTDEC);
3125 else
3126 OPERATOR(PREDEC);
3127 }
3128 else if (*s == '>') {
3129 s++;
3130 s = skipspace(s);
3131 if (isIDFIRST_lazy_if(s,UTF)) {
3132 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3133 TOKEN(ARROW);
3134 }
3135 else if (*s == '$')
3136 OPERATOR(ARROW);
3137 else
3138 TERM(ARROW);
3139 }
3280af22 3140 if (PL_expect == XOPERATOR)
90771dc0
NC
3141 Aop(OP_SUBTRACT);
3142 else {
3143 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3144 check_uni();
3145 OPERATOR('-'); /* unary minus */
79072805 3146 }
2f3197b3 3147 }
79072805 3148
378cc40b 3149 case '+':
90771dc0
NC
3150 {
3151 const char tmp = *s++;
3152 if (*s == tmp) {
3153 s++;
3154 if (PL_expect == XOPERATOR)
3155 TERM(POSTINC);
3156 else
3157 OPERATOR(PREINC);
3158 }
3280af22 3159 if (PL_expect == XOPERATOR)
90771dc0
NC
3160 Aop(OP_ADD);
3161 else {
3162 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3163 check_uni();
3164 OPERATOR('+');
3165 }
2f3197b3 3166 }
a687059c 3167
378cc40b 3168 case '*':
3280af22
NIS
3169 if (PL_expect != XOPERATOR) {
3170 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3171 PL_expect = XOPERATOR;
3172 force_ident(PL_tokenbuf, '*');
3173 if (!*PL_tokenbuf)
a0d0e21e 3174 PREREF('*');
79072805 3175 TERM('*');
a687059c 3176 }
79072805
LW
3177 s++;
3178 if (*s == '*') {
a687059c 3179 s++;
79072805 3180 PWop(OP_POW);
a687059c 3181 }
79072805
LW
3182 Mop(OP_MULTIPLY);
3183
378cc40b 3184 case '%':
3280af22 3185 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3186 ++s;
3187 Mop(OP_MODULO);
a687059c 3188 }
3280af22
NIS
3189 PL_tokenbuf[0] = '%';
3190 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3191 if (!PL_tokenbuf[1]) {
bbce6d69 3192 PREREF('%');
a687059c 3193 }
3280af22 3194 PL_pending_ident = '%';
bbce6d69 3195 TERM('%');
a687059c 3196
378cc40b 3197 case '^':
79072805 3198 s++;
a0d0e21e 3199 BOop(OP_BIT_XOR);
79072805 3200 case '[':
3280af22 3201 PL_lex_brackets++;
79072805 3202 /* FALL THROUGH */
378cc40b 3203 case '~':
0d863452
RH
3204 if (s[1] == '~'
3205 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3206 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3207 {
3208 s += 2;
3209 Eop(OP_SMARTMATCH);
3210 }
378cc40b 3211 case ',':
90771dc0
NC
3212 {
3213 const char tmp = *s++;
3214 OPERATOR(tmp);
3215 }
a0d0e21e
LW
3216 case ':':
3217 if (s[1] == ':') {
3218 len = 0;
0bfa2a8a 3219 goto just_a_word_zero_gv;
a0d0e21e
LW
3220 }
3221 s++;
09bef843
SB
3222 switch (PL_expect) {
3223 OP *attrs;
3224 case XOPERATOR:
3225 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3226 break;
3227 PL_bufptr = s; /* update in case we back off */
3228 goto grabattrs;
3229 case XATTRBLOCK:
3230 PL_expect = XBLOCK;
3231 goto grabattrs;
3232 case XATTRTERM:
3233 PL_expect = XTERMBLOCK;
3234 grabattrs:
3235 s = skipspace(s);
3236 attrs = Nullop;
7e2040f0 3237 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3238 I32 tmp;
09bef843 3239 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3240 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3241 if (tmp < 0) tmp = -tmp;
3242 switch (tmp) {
3243 case KEY_or:
3244 case KEY_and:
c963b151 3245 case KEY_err:
f9829d6b
GS
3246 case KEY_for:
3247 case KEY_unless:
3248 case KEY_if:
3249 case KEY_while:
3250 case KEY_until:
3251 goto got_attrs;
3252 default:
3253 break;
3254 }
3255 }
09bef843
SB
3256 if (*d == '(') {
3257 d = scan_str(d,TRUE,TRUE);
3258 if (!d) {
09bef843
SB
3259 /* MUST advance bufptr here to avoid bogus
3260 "at end of line" context messages from yyerror().
3261 */
3262 PL_bufptr = s + len;
3263 yyerror("Unterminated attribute parameter in attribute list");
3264 if (attrs)
3265 op_free(attrs);
bbf60fe6 3266 return REPORT(0); /* EOF indicator */
09bef843
SB
3267 }
3268 }
3269 if (PL_lex_stuff) {
3270 SV *sv = newSVpvn(s, len);
3271 sv_catsv(sv, PL_lex_stuff);
3272 attrs = append_elem(OP_LIST, attrs,
3273 newSVOP(OP_CONST, 0, sv));
3274 SvREFCNT_dec(PL_lex_stuff);
3275 PL_lex_stuff = Nullsv;
3276 }
3277 else {
371fce9b
DM
3278 if (len == 6 && strnEQ(s, "unique", len)) {
3279 if (PL_in_my == KEY_our)
3280#ifdef USE_ITHREADS
3281 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3282#else
3283 ; /* skip to avoid loading attributes.pm */
3284#endif
bfed75c6 3285 else
371fce9b
DM
3286 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3287 }
3288
d3cea301
SB
3289 /* NOTE: any CV attrs applied here need to be part of
3290 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3291 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3292 CvLVALUE_on(PL_compcv);
3293 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3294 CvLOCKED_on(PL_compcv);
3295 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3296 CvMETHOD_on(PL_compcv);
06492da6
SF
3297 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3298 CvASSERTION_on(PL_compcv);
78f9721b
SM
3299 /* After we've set the flags, it could be argued that
3300 we don't need to do the attributes.pm-based setting
3301 process, and shouldn't bother appending recognized
d3cea301
SB
3302 flags. To experiment with that, uncomment the
3303 following "else". (Note that's already been
3304 uncommented. That keeps the above-applied built-in
3305 attributes from being intercepted (and possibly
3306 rejected) by a package's attribute routines, but is
3307 justified by the performance win for the common case
3308 of applying only built-in attributes.) */
0256094b 3309 else
78f9721b
SM
3310 attrs = append_elem(OP_LIST, attrs,
3311 newSVOP(OP_CONST, 0,
3312 newSVpvn(s, len)));
09bef843
SB
3313 }
3314 s = skipspace(d);
0120eecf 3315 if (*s == ':' && s[1] != ':')
09bef843 3316 s = skipspace(s+1);
0120eecf
GS
3317 else if (s == d)
3318 break; /* require real whitespace or :'s */
09bef843 3319 }
90771dc0
NC
3320 {
3321 const char tmp
3322 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3323 if (*s != ';' && *s != '}' && *s != tmp
3324 && (tmp != '=' || *s != ')')) {
3325 const char q = ((*s == '\'') ? '"' : '\'');
3326 /* If here for an expression, and parsed no attrs, back
3327 off. */
3328 if (tmp == '=' && !attrs) {
3329 s = PL_bufptr;
3330 break;
3331 }
3332 /* MUST advance bufptr here to avoid bogus "at end of line"
3333 context messages from yyerror().
3334 */
3335 PL_bufptr = s;
3336 yyerror( *s
3337 ? Perl_form(aTHX_ "Invalid separator character "
3338 "%c%c%c in attribute list", q, *s, q)
3339 : "Unterminated attribute list" );
3340 if (attrs)
3341 op_free(attrs);
3342 OPERATOR(':');
09bef843 3343 }
09bef843 3344 }
f9829d6b 3345 got_attrs:
09bef843
SB
3346 if (attrs) {
3347 PL_nextval[PL_nexttoke].opval = attrs;
3348 force_next(THING);
3349 }
3350 TOKEN(COLONATTR);
3351 }
a0d0e21e 3352 OPERATOR(':');
8990e307
LW
3353 case '(':
3354 s++;
3280af22
NIS
3355 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3356 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3357 else
3280af22 3358 PL_expect = XTERM;
4a202259 3359 s = skipspace(s);
a0d0e21e 3360 TOKEN('(');
378cc40b 3361 case ';':
f4dd75d9 3362 CLINE;
90771dc0
NC
3363 {
3364 const char tmp = *s++;
3365 OPERATOR(tmp);
3366 }
378cc40b 3367 case ')':
90771dc0
NC
3368 {
3369 const char tmp = *s++;
3370 s = skipspace(s);
3371 if (*s == '{')
3372 PREBLOCK(tmp);
3373 TERM(tmp);
3374 }
79072805
LW
3375 case ']':
3376 s++;
3280af22 3377 if (PL_lex_brackets <= 0)
d98d5fff 3378 yyerror("Unmatched right square bracket");
463ee0b2 3379 else
3280af22
NIS
3380 --PL_lex_brackets;
3381 if (PL_lex_state == LEX_INTERPNORMAL) {
3382 if (PL_lex_brackets == 0) {
a0d0e21e 3383 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3384 PL_lex_state = LEX_INTERPEND;
79072805
LW
3385 }
3386 }
4633a7c4 3387 TERM(']');
79072805
LW
3388 case '{':
3389 leftbracket:
79072805 3390 s++;
3280af22 3391 if (PL_lex_brackets > 100) {
8edd5f42 3392 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3393 }
3280af22 3394 switch (PL_expect) {
a0d0e21e 3395 case XTERM:
3280af22 3396 if (PL_lex_formbrack) {
a0d0e21e
LW
3397 s--;
3398 PRETERMBLOCK(DO);
3399 }
3280af22
NIS
3400 if (PL_oldoldbufptr == PL_last_lop)
3401 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3402 else
3280af22 3403 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3404 OPERATOR(HASHBRACK);
a0d0e21e 3405 case XOPERATOR:
bf4acbe4 3406 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3407 s++;
44a8e56a 3408 d = s;
3280af22
NIS
3409 PL_tokenbuf[0] = '\0';
3410 if (d < PL_bufend && *d == '-') {
3411 PL_tokenbuf[0] = '-';
44a8e56a 3412 d++;
bf4acbe4 3413 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3414 d++;
3415 }
7e2040f0 3416 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3417 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3418 FALSE, &len);
bf4acbe4 3419 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3420 d++;
3421 if (*d == '}') {
f54cb97a 3422 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3423 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3424 if (minus)
3425 force_next('-');
748a9306
LW
3426 }
3427 }
3428 /* FALL THROUGH */
09bef843 3429 case XATTRBLOCK:
748a9306 3430 case XBLOCK:
3280af22
NIS
3431 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3432 PL_expect = XSTATE;
a0d0e21e 3433 break;
09bef843 3434 case XATTRTERM:
a0d0e21e 3435 case XTERMBLOCK:
3280af22
NIS
3436 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3437 PL_expect = XSTATE;
a0d0e21e
LW
3438 break;
3439 default: {
f54cb97a 3440 const char *t;
3280af22
NIS
3441 if (PL_oldoldbufptr == PL_last_lop)
3442 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3443 else
3280af22 3444 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3445 s = skipspace(s);
8452ff4b
SB
3446 if (*s == '}') {
3447 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3448 PL_expect = XTERM;
3449 /* This hack is to get the ${} in the message. */
3450 PL_bufptr = s+1;
3451 yyerror("syntax error");
3452 break;
3453 }
a0d0e21e 3454 OPERATOR(HASHBRACK);
8452ff4b 3455 }
b8a4b1be
GS
3456 /* This hack serves to disambiguate a pair of curlies
3457 * as being a block or an anon hash. Normally, expectation
3458 * determines that, but in cases where we're not in a
3459 * position to expect anything in particular (like inside
3460 * eval"") we have to resolve the ambiguity. This code
3461 * covers the case where the first term in the curlies is a
3462 * quoted string. Most other cases need to be explicitly
a0288114 3463 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3464 * curly in order to force resolution as an anon hash.
3465 *
3466 * XXX should probably propagate the outer expectation
3467 * into eval"" to rely less on this hack, but that could
3468 * potentially break current behavior of eval"".
3469 * GSAR 97-07-21
3470 */
3471 t = s;
3472 if (*s == '\'' || *s == '"' || *s == '`') {
3473 /* common case: get past first string, handling escapes */
3280af22 3474 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3475 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3476 t++;
3477 t++;
a0d0e21e 3478 }
b8a4b1be 3479 else if (*s == 'q') {
3280af22 3480 if (++t < PL_bufend
b8a4b1be 3481 && (!isALNUM(*t)
3280af22 3482 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3483 && !isALNUM(*t))))
3484 {
abc667d1 3485 /* skip q//-like construct */
f54cb97a 3486 const char *tmps;
b8a4b1be
GS
3487 char open, close, term;
3488 I32 brackets = 1;
3489
3280af22 3490 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3491 t++;
abc667d1
DM
3492 /* check for q => */
3493 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3494 OPERATOR(HASHBRACK);
3495 }
b8a4b1be
GS
3496 term = *t;
3497 open = term;
3498 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3499 term = tmps[5];
3500 close = term;
3501 if (open == close)
3280af22
NIS
3502 for (t++; t < PL_bufend; t++) {
3503 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3504 t++;
6d07e5e9 3505 else if (*t == open)
b8a4b1be
GS
3506 break;
3507 }
abc667d1 3508 else {
3280af22
NIS
3509 for (t++; t < PL_bufend; t++) {
3510 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3511 t++;
6d07e5e9 3512 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3513 break;
3514 else if (*t == open)
3515 brackets++;
3516 }
abc667d1
DM
3517 }
3518 t++;
b8a4b1be 3519 }
abc667d1
DM
3520 else
3521 /* skip plain q word */
3522 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3523 t += UTF8SKIP(t);
a0d0e21e 3524 }
7e2040f0 3525 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3526 t += UTF8SKIP(t);
7e2040f0 3527 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3528 t += UTF8SKIP(t);
a0d0e21e 3529 }
3280af22 3530 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3531 t++;
b8a4b1be
GS
3532 /* if comma follows first term, call it an anon hash */
3533 /* XXX it could be a comma expression with loop modifiers */
3280af22 3534 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))