This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to podlators-2.0.2
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a
PP
63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
b6007c36
DM
65#define LEX_NORMAL 10 /* normal code (ie not within "...") */
66#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69#define LEX_INTERPSTART 6 /* expecting the start of a $var */
70
71 /* at end of code, eg "$x" followed by: */
72#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74
75#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77#define LEX_INTERPCONST 2 /* NOT USED */
78#define LEX_FORMLINE 1 /* expecting a format line */
79#define LEX_KNOWNEXT 0 /* next token known; just return it */
80
79072805 81
bbf60fe6 82#ifdef DEBUGGING
27da23d5 83static const char* const lex_state_names[] = {
bbf60fe6
DM
84 "KNOWNEXT",
85 "FORMLINE",
86 "INTERPCONST",
87 "INTERPCONCAT",
88 "INTERPENDMAYBE",
89 "INTERPEND",
90 "INTERPSTART",
91 "INTERPPUSH",
92 "INTERPCASEMOD",
93 "INTERPNORMAL",
94 "NORMAL"
95};
96#endif
97
79072805
LW
98#ifdef ff_next
99#undef ff_next
d48672a2
LW
100#endif
101
79072805 102#include "keywords.h"
fe14fcc3 103
ffb4593c
NT
104/* CLINE is a macro that ensures PL_copline has a sane value */
105
ae986130
LW
106#ifdef CLINE
107#undef CLINE
108#endif
57843af0 109#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 110
ffb4593c
NT
111/*
112 * Convenience functions to return different tokens and prime the
9cbb5ea2 113 * lexer for the next token. They all take an argument.
ffb4593c
NT
114 *
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
2d2e263d 125 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
126 * BOop : bitwise or or xor
127 * BAop : bitwise and
128 * SHop : shift operator
129 * PWop : power operator
9cbb5ea2 130 * PMop : pattern-matching operator
ffb4593c
NT
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
e5edeb50 134 * Rop : relational operator <= != gt
ffb4593c
NT
135 *
136 * Also see LOP and lop() below.
137 */
138
998054bd 139#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 140# define REPORT(retval) tokereport((I32)retval)
998054bd 141#else
bbf60fe6 142# define REPORT(retval) (retval)
998054bd
SC
143#endif
144
bbf60fe6
DM
145#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 165
a687059c
LW
166/* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
a687059c 170 */
376fcdbf
AL
171#define UNI2(f,x) { \
172 yylval.ival = f; \
173 PL_expect = x; \
174 PL_bufptr = s; \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
177 if (*s == '(') \
178 return REPORT( (int)FUNC1 ); \
179 s = skipspace(s); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 }
6f33ba73
RGS
182#define UNI(f) UNI2(f,XTERM)
183#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 184
376fcdbf
AL
185#define UNIBRACK(f) { \
186 yylval.ival = f; \
187 PL_bufptr = s; \
188 PL_last_uni = PL_oldbufptr; \
189 if (*s == '(') \
190 return REPORT( (int)FUNC1 ); \
191 s = skipspace(s); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
193 }
79072805 194
9f68db38 195/* grandfather return to old style */
3280af22 196#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 197
8fa7f367
JH
198#ifdef DEBUGGING
199
bbf60fe6
DM
200/* how to interpret the yylval associated with the token */
201enum token_type {
202 TOKENTYPE_NONE,
203 TOKENTYPE_IVAL,
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
205 TOKENTYPE_PVAL,
206 TOKENTYPE_OPVAL,
207 TOKENTYPE_GVVAL
208};
209
27da23d5
JH
210static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
9041c2e3 212{
bbf60fe6
DM
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
279};
280
281/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 282
bbf60fe6 283STATIC int
f5bd084c 284S_tokereport(pTHX_ I32 rv)
bbf60fe6 285{
97aff369 286 dVAR;
bbf60fe6 287 if (DEBUG_T_TEST) {
bfed75c6 288 const char *name = Nullch;
bbf60fe6 289 enum token_type type = TOKENTYPE_NONE;
f54cb97a 290 const struct debug_tokens *p;
396482e1 291 SV* const report = newSVpvs("<== ");
bbf60fe6 292
f54cb97a 293 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
294 if (p->token == (int)rv) {
295 name = p->name;
296 type = p->type;
297 break;
298 }
299 }
300 if (name)
54667de8 301 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
304 else if (!rv)
396482e1 305 sv_catpvs(report, "EOF");
bbf60fe6
DM
306 else
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
308 switch (type) {
309 case TOKENTYPE_NONE:
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
311 break;
312 case TOKENTYPE_IVAL:
e4584336 313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
314 break;
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
318 break;
319 case TOKENTYPE_PVAL:
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
321 break;
322 case TOKENTYPE_OPVAL:
b6007c36 323 if (yylval.opval) {
401441c0 324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 325 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
329 }
330
331 }
401441c0 332 else
396482e1 333 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
334 break;
335 }
b6007c36 336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
337 };
338 return (int)rv;
998054bd
SC
339}
340
b6007c36
DM
341
342/* print the buffer with suitable escapes */
343
344STATIC void
345S_printbuf(pTHX_ const char* fmt, const char* s)
346{
396482e1 347 SV* const tmp = newSVpvs("");
b6007c36
DM
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
349 SvREFCNT_dec(tmp);
350}
351
8fa7f367
JH
352#endif
353
ffb4593c
NT
354/*
355 * S_ao
356 *
c963b151
BD
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
359 */
360
76e3520e 361STATIC int
cea2e8a9 362S_ao(pTHX_ int toketype)
a0d0e21e 363{
97aff369 364 dVAR;
3280af22
NIS
365 if (*PL_bufptr == '=') {
366 PL_bufptr++;
a0d0e21e
LW
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
c963b151
BD
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
373 toketype = ASSIGNOP;
374 }
375 return toketype;
376}
377
ffb4593c
NT
378/*
379 * S_no_op
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
389 */
390
76e3520e 391STATIC void
bfed75c6 392S_no_op(pTHX_ const char *what, char *s)
463ee0b2 393{
97aff369 394 dVAR;
9d4ba2ae
AL
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 397
1189a94a
GS
398 if (!s)
399 s = oldbp;
07c798fb 400 else
1189a94a 401 PL_bufptr = s;
cea2e8a9 402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
403 if (ckWARN_d(WARN_SYNTAX)) {
404 if (is_first)
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 408 const char *t;
56da5a46
RGS
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
551405c4 413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
414 }
415 else {
416 assert(s >= oldbp);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 419 }
07c798fb 420 }
3280af22 421 PL_bufptr = oldbp;
8990e307
LW
422}
423
ffb4593c
NT
424/*
425 * S_missingterm
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
430 * This is fatal.
431 */
432
76e3520e 433STATIC void
cea2e8a9 434S_missingterm(pTHX_ char *s)
8990e307 435{
97aff369 436 dVAR;
8990e307
LW
437 char tmpbuf[3];
438 char q;
439 if (s) {
9d4ba2ae 440 char * const nl = strrchr(s,'\n');
d2719217 441 if (nl)
8990e307
LW
442 *nl = '\0';
443 }
9d116dd7
JH
444 else if (
445#ifdef EBCDIC
446 iscntrl(PL_multi_close)
447#else
448 PL_multi_close < 32 || PL_multi_close == 127
449#endif
450 ) {
8990e307 451 *tmpbuf = '^';
585ec06d 452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
453 tmpbuf[2] = '\0';
454 s = tmpbuf;
455 }
456 else {
eb160463 457 *tmpbuf = (char)PL_multi_close;
8990e307
LW
458 tmpbuf[1] = '\0';
459 s = tmpbuf;
460 }
461 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 463}
79072805 464
ef89dcc3 465#define FEATURE_IS_ENABLED(name) \
0d863452 466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
468/*
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
471 */
472STATIC bool
473S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
474{
97aff369 475 dVAR;
0d863452 476 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
479
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
481}
482
ffb4593c
NT
483/*
484 * Perl_deprecate
ffb4593c
NT
485 */
486
79072805 487void
bfed75c6 488Perl_deprecate(pTHX_ const char *s)
a0d0e21e 489{
599cee73 490 if (ckWARN(WARN_DEPRECATED))
9014280d 491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
492}
493
12bcd1a6 494void
bfed75c6 495Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
496{
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
499 /* */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
504
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
507 "Use of %s is deprecated", s);
508}
509
ffb4593c 510/*
9cbb5ea2
GS
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
ffb4593c
NT
513 */
514
c39cd008
GS
515#ifdef PERL_CR_FILTER
516static void
517strip_return(SV *sv)
518{
95a20fc0 519 register const char *s = SvPVX_const(sv);
9d4ba2ae 520 register const char * const e = s + SvCUR(sv);
c39cd008
GS
521 /* outer loop optimized to do nothing if there are no CR-LFs */
522 while (s < e) {
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
526 *d++ = *s++;
527 while (s < e) {
528 if (*s == '\r' && s[1] == '\n')
529 s++;
530 *d++ = *s++;
531 }
532 SvCUR(sv) -= s - d;
533 return;
534 }
535 }
536}
a868473f 537
76e3520e 538STATIC I32
c39cd008 539S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 540{
f54cb97a 541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
542 if (count > 0 && !maxlen)
543 strip_return(sv);
544 return count;
a868473f
NIS
545}
546#endif
547
ffb4593c
NT
548/*
549 * Perl_lex_start
9cbb5ea2
GS
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
ffb4593c
NT
552 */
553
a0d0e21e 554void
864dbfa3 555Perl_lex_start(pTHX_ SV *line)
79072805 556{
97aff369 557 dVAR;
cfd0369c 558 const char *s;
8990e307
LW
559 STRLEN len;
560
3280af22
NIS
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
3280af22
NIS
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
7766f137 566 SAVEVPTR(PL_lex_inpat);
3280af22 567 SAVEI32(PL_lex_inwhat);
18b09519
GS
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
573 }
574 SAVEI32(PL_nexttoke);
18b09519 575 }
57843af0 576 SAVECOPLINE(PL_curcop);
3280af22
NIS
577 SAVEPPTR(PL_bufptr);
578 SAVEPPTR(PL_bufend);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
3280af22
NIS
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
8edd5f42
RGS
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
09bef843 590 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 591 SAVESPTR(PL_lex_repl);
bebdddfc
GS
592 SAVEINT(PL_expect);
593 SAVEINT(PL_lex_expect);
3280af22
NIS
594
595 PL_lex_state = LEX_NORMAL;
596 PL_lex_defer = 0;
597 PL_expect = XSTATE;
598 PL_lex_brackets = 0;
a02a5408
JC
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
601 PL_lex_casemods = 0;
602 *PL_lex_casestack = '\0';
603 PL_lex_dojoin = 0;
604 PL_lex_starts = 0;
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
607 PL_lex_inpat = 0;
76be56bc 608 PL_nexttoke = 0;
3280af22 609 PL_lex_inwhat = 0;
09bef843 610 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
611 PL_linestr = line;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 614 s = SvPV_const(PL_linestr, len);
6f27f9a7 615 if (!len || s[len-1] != ';') {
3280af22
NIS
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 618 sv_catpvs(PL_linestr, "\n;");
8990e307 619 }
3280af22
NIS
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 623 PL_last_lop = PL_last_uni = Nullch;
3280af22 624 PL_rsfp = 0;
79072805 625}
a687059c 626
ffb4593c
NT
627/*
628 * Perl_lex_end
9cbb5ea2
GS
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
ffb4593c
NT
631 */
632
463ee0b2 633void
864dbfa3 634Perl_lex_end(pTHX)
463ee0b2 635{
97aff369 636 dVAR;
3280af22 637 PL_doextract = FALSE;
463ee0b2
LW
638}
639
ffb4593c
NT
640/*
641 * S_incline
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
57843af0 644 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 645 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
648 */
649
76e3520e 650STATIC void
cea2e8a9 651S_incline(pTHX_ char *s)
463ee0b2 652{
97aff369 653 dVAR;
463ee0b2
LW
654 char *t;
655 char *n;
73659bf1 656 char *e;
463ee0b2 657 char ch;
463ee0b2 658
57843af0 659 CopLINE_inc(PL_curcop);
463ee0b2
LW
660 if (*s++ != '#')
661 return;
bf4acbe4 662 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
663 if (strnEQ(s, "line", 4))
664 s += 4;
665 else
666 return;
084592ab 667 if (SPACE_OR_TAB(*s))
73659bf1 668 s++;
4e553d73 669 else
73659bf1 670 return;
bf4acbe4 671 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
672 if (!isDIGIT(*s))
673 return;
674 n = s;
675 while (isDIGIT(*s))
676 s++;
bf4acbe4 677 while (SPACE_OR_TAB(*s))
463ee0b2 678 s++;
73659bf1 679 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 680 s++;
73659bf1
GS
681 e = t + 1;
682 }
463ee0b2 683 else {
463ee0b2 684 for (t = s; !isSPACE(*t); t++) ;
73659bf1 685 e = t;
463ee0b2 686 }
bf4acbe4 687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
688 e++;
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
691
463ee0b2
LW
692 ch = *t;
693 *t = '\0';
f4dd75d9 694 if (t - s > 0) {
8a5ee598 695#ifndef USE_ITHREADS
c4420975 696 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
8a5ee598 703 GV **gvp, *gv2;
e66cf94c
RGS
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
706 tmpbuf = smallbuf;
707 else
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
710 tmpbuf2 = smallbuf2;
711 else
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
717 ++tmplen; ++tmplen2;
8a5ee598
RGS
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
719 if (gvp) {
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
721 if (!isGV(gv2))
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
727 }
e66cf94c
RGS
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
730 }
8a5ee598 731#endif
05ec9bb3 732 CopFILE_free(PL_curcop);
57843af0 733 CopFILE_set(PL_curcop, s);
f4dd75d9 734 }
463ee0b2 735 *t = ch;
57843af0 736 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
737}
738
ffb4593c
NT
739/*
740 * S_skipspace
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
743 */
744
76e3520e 745STATIC char *
cea2e8a9 746S_skipspace(pTHX_ register char *s)
a687059c 747{
97aff369 748 dVAR;
3280af22 749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 750 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
751 s++;
752 return s;
753 }
754 for (;;) {
fd049845 755 STRLEN prevlen;
09bef843 756 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 757 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
760 incline(s);
761 }
ffb4593c
NT
762
763 /* comment */
3280af22
NIS
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
463ee0b2 766 s++;
60e6418e 767 if (s < PL_bufend) {
463ee0b2 768 s++;
60e6418e
GS
769 if (PL_in_eval && !PL_rsfp) {
770 incline(s);
771 continue;
772 }
773 }
463ee0b2 774 }
ffb4593c
NT
775
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
779 */
09bef843
SB
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
463ee0b2 782 return s;
ffb4593c
NT
783
784 /* try to recharge the buffer */
9cbb5ea2
GS
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
787 {
788 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
789 if (PL_minus_p) {
790 sv_setpv(PL_linestr,
791 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 792 PL_minus_n = PL_minus_p = 0;
a0d0e21e 793 }
01a19ab0
NC
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
796 PL_minus_n = 0;
797 }
a0d0e21e 798 else
4147a61b 799 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
800
801 /* reset variables for next time we lex */
9cbb5ea2
GS
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
803 = SvPVX(PL_linestr);
3280af22 804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 805 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
806
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
812 */
813
3280af22
NIS
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
8990e307 818 else
3280af22
NIS
819 (void)PerlIO_close(PL_rsfp);
820 PL_rsfp = Nullfp;
463ee0b2
LW
821 return s;
822 }
ffb4593c
NT
823
824 /* not at end of file, so we only read another line */
09bef843
SB
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
828 if (PL_last_uni)
829 oldunilen = PL_last_uni - PL_bufend;
830 if (PL_last_lop)
831 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
834 s = PL_bufptr;
09bef843
SB
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
837 if (PL_last_uni)
838 PL_last_uni = s + oldunilen;
839 if (PL_last_lop)
840 PL_last_lop = s + oldloplen;
a0d0e21e 841 incline(s);
ffb4593c
NT
842
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
845 */
3280af22 846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 847 SV * const sv = newSV(0);
8990e307
LW
848
849 sv_upgrade(sv, SVt_PVMG);
3280af22 850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 851 (void)SvIOK_on(sv);
45977657 852 SvIV_set(sv, 0);
36c7798d 853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 854 }
463ee0b2 855 }
a687059c 856}
378cc40b 857
ffb4593c
NT
858/*
859 * S_check_uni
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
862 * rand + 5
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
865 */
866
76e3520e 867STATIC void
cea2e8a9 868S_check_uni(pTHX)
ba106d47 869{
97aff369 870 dVAR;
2f3197b3 871 char *s;
a0d0e21e 872 char *t;
2f3197b3 873
3280af22 874 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 875 return;
3280af22
NIS
876 while (isSPACE(*PL_last_uni))
877 PL_last_uni++;
7e2040f0 878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 879 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 880 return;
6136c704
AL
881
882 /* XXX Things like this are just so nasty. We shouldn't be modifying
883 source code, even if we realquick set it back. */
0453d815 884 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 885 const char ch = *s;
0453d815 886 *s = '\0';
9014280d 887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 888 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
889 PL_last_uni);
890 *s = ch;
891 }
2f3197b3
LW
892}
893
ffb4593c
NT
894/*
895 * LOP : macro to build a list operator. Its behaviour has been replaced
896 * with a subroutine, S_lop() for which LOP is just another name.
897 */
898
a0d0e21e
LW
899#define LOP(f,x) return lop(f,x,s)
900
ffb4593c
NT
901/*
902 * S_lop
903 * Build a list operator (or something that might be one). The rules:
904 * - if we have a next token, then it's a list operator [why?]
905 * - if the next thing is an opening paren, then it's a function
906 * - else it's a list operator
907 */
908
76e3520e 909STATIC I32
a0be28da 910S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 911{
97aff369 912 dVAR;
79072805 913 yylval.ival = f;
35c8bce7 914 CLINE;
3280af22
NIS
915 PL_expect = x;
916 PL_bufptr = s;
917 PL_last_lop = PL_oldbufptr;
eb160463 918 PL_last_lop_op = (OPCODE)f;
3280af22 919 if (PL_nexttoke)
bbf60fe6 920 return REPORT(LSTOP);
79072805 921 if (*s == '(')
bbf60fe6 922 return REPORT(FUNC);
79072805
LW
923 s = skipspace(s);
924 if (*s == '(')
bbf60fe6 925 return REPORT(FUNC);
79072805 926 else
bbf60fe6 927 return REPORT(LSTOP);
79072805
LW
928}
929
ffb4593c
NT
930/*
931 * S_force_next
9cbb5ea2 932 * When the lexer realizes it knows the next token (for instance,
ffb4593c 933 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
934 * to know what token to return the next time the lexer is called. Caller
935 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
936 * handles the token correctly.
ffb4593c
NT
937 */
938
4e553d73 939STATIC void
cea2e8a9 940S_force_next(pTHX_ I32 type)
79072805 941{
97aff369 942 dVAR;
3280af22
NIS
943 PL_nexttype[PL_nexttoke] = type;
944 PL_nexttoke++;
945 if (PL_lex_state != LEX_KNOWNEXT) {
946 PL_lex_defer = PL_lex_state;
947 PL_lex_expect = PL_expect;
948 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
949 }
950}
951
d0a148a6
NC
952STATIC SV *
953S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
954{
97aff369 955 dVAR;
9d4ba2ae 956 SV * const sv = newSVpvn(start,len);
bfed75c6 957 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
958 SvUTF8_on(sv);
959 return sv;
960}
961
ffb4593c
NT
962/*
963 * S_force_word
964 * When the lexer knows the next thing is a word (for instance, it has
965 * just seen -> and it knows that the next char is a word char, then
966 * it calls S_force_word to stick the next word into the PL_next lookahead.
967 *
968 * Arguments:
b1b65b59 969 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
970 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
971 * int check_keyword : if true, Perl checks to make sure the word isn't
972 * a keyword (do this if the word is a label, e.g. goto FOO)
973 * int allow_pack : if true, : characters will also be allowed (require,
974 * use, etc. do this)
9cbb5ea2 975 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
976 */
977
76e3520e 978STATIC char *
cea2e8a9 979S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 980{
97aff369 981 dVAR;
463ee0b2
LW
982 register char *s;
983 STRLEN len;
4e553d73 984
463ee0b2
LW
985 start = skipspace(start);
986 s = start;
7e2040f0 987 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 988 (allow_pack && *s == ':') ||
15f0808c 989 (allow_initial_tick && *s == '\'') )
a0d0e21e 990 {
3280af22
NIS
991 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
992 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
993 return start;
994 if (token == METHOD) {
995 s = skipspace(s);
996 if (*s == '(')
3280af22 997 PL_expect = XTERM;
463ee0b2 998 else {
3280af22 999 PL_expect = XOPERATOR;
463ee0b2 1000 }
79072805 1001 }
d0a148a6
NC
1002 PL_nextval[PL_nexttoke].opval
1003 = (OP*)newSVOP(OP_CONST,0,
1004 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 1005 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
1006 force_next(token);
1007 }
1008 return s;
1009}
1010
ffb4593c
NT
1011/*
1012 * S_force_ident
9cbb5ea2 1013 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1014 * text only contains the "foo" portion. The first argument is a pointer
1015 * to the "foo", and the second argument is the type symbol to prefix.
1016 * Forces the next token to be a "WORD".
9cbb5ea2 1017 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1018 */
1019
76e3520e 1020STATIC void
bfed75c6 1021S_force_ident(pTHX_ register const char *s, int kind)
79072805 1022{
97aff369 1023 dVAR;
79072805 1024 if (s && *s) {
bfed75c6 1025 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 1026 PL_nextval[PL_nexttoke].opval = o;
79072805 1027 force_next(WORD);
748a9306 1028 if (kind) {
11343788 1029 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1030 /* XXX see note in pp_entereval() for why we forgo typo
1031 warnings if the symbol must be introduced in an eval.
1032 GSAR 96-10-12 */
f776e3cd 1033 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
a0d0e21e
LW
1034 kind == '$' ? SVt_PV :
1035 kind == '@' ? SVt_PVAV :
1036 kind == '%' ? SVt_PVHV :
1037 SVt_PVGV
1038 );
748a9306 1039 }
79072805
LW
1040 }
1041}
1042
1571675a
GS
1043NV
1044Perl_str_to_version(pTHX_ SV *sv)
1045{
1046 NV retval = 0.0;
1047 NV nshift = 1.0;
1048 STRLEN len;
cfd0369c 1049 const char *start = SvPV_const(sv,len);
9d4ba2ae 1050 const char * const end = start + len;
504618e9 1051 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1052 while (start < end) {
ba210ebe 1053 STRLEN skip;
1571675a
GS
1054 UV n;
1055 if (utf)
9041c2e3 1056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1057 else {
1058 n = *(U8*)start;
1059 skip = 1;
1060 }
1061 retval += ((NV)n)/nshift;
1062 start += skip;
1063 nshift *= 1000;
1064 }
1065 return retval;
1066}
1067
4e553d73 1068/*
ffb4593c
NT
1069 * S_force_version
1070 * Forces the next token to be a version number.
e759cc13
RGS
1071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1072 * and if "guessing" is TRUE, then no new token is created (and the caller
1073 * must use an alternative parsing method).
ffb4593c
NT
1074 */
1075
76e3520e 1076STATIC char *
e759cc13 1077S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1078{
97aff369 1079 dVAR;
89bfa8cd 1080 OP *version = Nullop;
44dcb63b 1081 char *d;
89bfa8cd
PP
1082
1083 s = skipspace(s);
1084
44dcb63b 1085 d = s;
dd629d5b 1086 if (*d == 'v')
44dcb63b 1087 d++;
44dcb63b 1088 if (isDIGIT(*d)) {
e759cc13
RGS
1089 while (isDIGIT(*d) || *d == '_' || *d == '.')
1090 d++;
9f3d182e 1091 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1092 SV *ver;
b73d6f50 1093 s = scan_num(s, &yylval);
89bfa8cd 1094 version = yylval.opval;
dd629d5b
GS
1095 ver = cSVOPx(version)->op_sv;
1096 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1097 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1098 SvNV_set(ver, str_to_version(ver));
1571675a 1099 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1100 }
89bfa8cd 1101 }
e759cc13
RGS
1102 else if (guessing)
1103 return s;
89bfa8cd
PP
1104 }
1105
1106 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1107 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1108 force_next(WORD);
89bfa8cd 1109
e759cc13 1110 return s;
89bfa8cd
PP
1111}
1112
ffb4593c
NT
1113/*
1114 * S_tokeq
1115 * Tokenize a quoted string passed in as an SV. It finds the next
1116 * chunk, up to end of string or a backslash. It may make a new
1117 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1118 * turns \\ into \.
1119 */
1120
76e3520e 1121STATIC SV *
cea2e8a9 1122S_tokeq(pTHX_ SV *sv)
79072805 1123{
97aff369 1124 dVAR;
79072805
LW
1125 register char *s;
1126 register char *send;
1127 register char *d;
b3ac6de7
IZ
1128 STRLEN len = 0;
1129 SV *pv = sv;
79072805
LW
1130
1131 if (!SvLEN(sv))
b3ac6de7 1132 goto finish;
79072805 1133
a0d0e21e 1134 s = SvPV_force(sv, len);
21a311ee 1135 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1136 goto finish;
463ee0b2 1137 send = s + len;
79072805
LW
1138 while (s < send && *s != '\\')
1139 s++;
1140 if (s == send)
b3ac6de7 1141 goto finish;
79072805 1142 d = s;
be4731d2 1143 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1144 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1145 if (SvUTF8(sv))
1146 SvUTF8_on(pv);
1147 }
79072805
LW
1148 while (s < send) {
1149 if (*s == '\\') {
a0d0e21e 1150 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1151 s++; /* all that, just for this */
1152 }
1153 *d++ = *s++;
1154 }
1155 *d = '\0';
95a20fc0 1156 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1157 finish:
3280af22 1158 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1159 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1160 return sv;
1161}
1162
ffb4593c
NT
1163/*
1164 * Now come three functions related to double-quote context,
1165 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1166 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1167 * interact with PL_lex_state, and create fake ( ... ) argument lists
1168 * to handle functions and concatenation.
1169 * They assume that whoever calls them will be setting up a fake
1170 * join call, because each subthing puts a ',' after it. This lets
1171 * "lower \luPpEr"
1172 * become
1173 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1174 *
1175 * (I'm not sure whether the spurious commas at the end of lcfirst's
1176 * arguments and join's arguments are created or not).
1177 */
1178
1179/*
1180 * S_sublex_start
1181 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1182 *
1183 * Pattern matching will set PL_lex_op to the pattern-matching op to
1184 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1185 *
1186 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1187 *
1188 * Everything else becomes a FUNC.
1189 *
1190 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1191 * had an OP_CONST or OP_READLINE). This just sets us up for a
1192 * call to S_sublex_push().
1193 */
1194
76e3520e 1195STATIC I32
cea2e8a9 1196S_sublex_start(pTHX)
79072805 1197{
97aff369 1198 dVAR;
0d46e09a 1199 register const I32 op_type = yylval.ival;
79072805
LW
1200
1201 if (op_type == OP_NULL) {
3280af22
NIS
1202 yylval.opval = PL_lex_op;
1203 PL_lex_op = Nullop;
79072805
LW
1204 return THING;
1205 }
1206 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1207 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1208
1209 if (SvTYPE(sv) == SVt_PVIV) {
1210 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1211 STRLEN len;
cfd0369c 1212 const char *p = SvPV_const(sv, len);
f54cb97a 1213 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1214 if (SvUTF8(sv))
1215 SvUTF8_on(nsv);
b3ac6de7
IZ
1216 SvREFCNT_dec(sv);
1217 sv = nsv;
4e553d73 1218 }
b3ac6de7 1219 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1220 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1221 /* Allow <FH> // "foo" */
1222 if (op_type == OP_READLINE)
1223 PL_expect = XTERMORDORDOR;
79072805
LW
1224 return THING;
1225 }
1226
3280af22
NIS
1227 PL_sublex_info.super_state = PL_lex_state;
1228 PL_sublex_info.sub_inwhat = op_type;
1229 PL_sublex_info.sub_op = PL_lex_op;
1230 PL_lex_state = LEX_INTERPPUSH;
55497cff 1231
3280af22
NIS
1232 PL_expect = XTERM;
1233 if (PL_lex_op) {
1234 yylval.opval = PL_lex_op;
1235 PL_lex_op = Nullop;
55497cff
PP
1236 return PMFUNC;
1237 }
1238 else
1239 return FUNC;
1240}
1241
ffb4593c
NT
1242/*
1243 * S_sublex_push
1244 * Create a new scope to save the lexing state. The scope will be
1245 * ended in S_sublex_done. Returns a '(', starting the function arguments
1246 * to the uc, lc, etc. found before.
1247 * Sets PL_lex_state to LEX_INTERPCONCAT.
1248 */
1249
76e3520e 1250STATIC I32
cea2e8a9 1251S_sublex_push(pTHX)
55497cff 1252{
27da23d5 1253 dVAR;
f46d017c 1254 ENTER;
55497cff 1255
3280af22
NIS
1256 PL_lex_state = PL_sublex_info.super_state;
1257 SAVEI32(PL_lex_dojoin);
1258 SAVEI32(PL_lex_brackets);
3280af22
NIS
1259 SAVEI32(PL_lex_casemods);
1260 SAVEI32(PL_lex_starts);
1261 SAVEI32(PL_lex_state);
7766f137 1262 SAVEVPTR(PL_lex_inpat);
3280af22 1263 SAVEI32(PL_lex_inwhat);
57843af0 1264 SAVECOPLINE(PL_curcop);
3280af22 1265 SAVEPPTR(PL_bufptr);
8452ff4b 1266 SAVEPPTR(PL_bufend);
3280af22
NIS
1267 SAVEPPTR(PL_oldbufptr);
1268 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1269 SAVEPPTR(PL_last_lop);
1270 SAVEPPTR(PL_last_uni);
3280af22
NIS
1271 SAVEPPTR(PL_linestart);
1272 SAVESPTR(PL_linestr);
8edd5f42
RGS
1273 SAVEGENERICPV(PL_lex_brackstack);
1274 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1275
1276 PL_linestr = PL_lex_stuff;
1277 PL_lex_stuff = Nullsv;
1278
9cbb5ea2
GS
1279 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1280 = SvPVX(PL_linestr);
3280af22 1281 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1282 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1283 SAVEFREESV(PL_linestr);
1284
1285 PL_lex_dojoin = FALSE;
1286 PL_lex_brackets = 0;
a02a5408
JC
1287 Newx(PL_lex_brackstack, 120, char);
1288 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1289 PL_lex_casemods = 0;
1290 *PL_lex_casestack = '\0';
1291 PL_lex_starts = 0;
1292 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1293 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1294
1295 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1296 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1297 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1298 else
3280af22 1299 PL_lex_inpat = Nullop;
79072805 1300
55497cff 1301 return '(';
79072805
LW
1302}
1303
ffb4593c
NT
1304/*
1305 * S_sublex_done
1306 * Restores lexer state after a S_sublex_push.
1307 */
1308
76e3520e 1309STATIC I32
cea2e8a9 1310S_sublex_done(pTHX)
79072805 1311{
27da23d5 1312 dVAR;
3280af22 1313 if (!PL_lex_starts++) {
396482e1 1314 SV * const sv = newSVpvs("");
9aa983d2
JH
1315 if (SvUTF8(PL_linestr))
1316 SvUTF8_on(sv);
3280af22 1317 PL_expect = XOPERATOR;
9aa983d2 1318 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1319 return THING;
1320 }
1321
3280af22
NIS
1322 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1323 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1324 return yylex();
79072805
LW
1325 }
1326
ffb4593c 1327 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1328 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1329 PL_linestr = PL_lex_repl;
1330 PL_lex_inpat = 0;
1331 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1332 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1333 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1334 SAVEFREESV(PL_linestr);
1335 PL_lex_dojoin = FALSE;
1336 PL_lex_brackets = 0;
3280af22
NIS
1337 PL_lex_casemods = 0;
1338 *PL_lex_casestack = '\0';
1339 PL_lex_starts = 0;
25da4f38 1340 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1341 PL_lex_state = LEX_INTERPNORMAL;
1342 PL_lex_starts++;
e9fa98b2
HS
1343 /* we don't clear PL_lex_repl here, so that we can check later
1344 whether this is an evalled subst; that means we rely on the
1345 logic to ensure sublex_done() is called again only via the
1346 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1347 }
e9fa98b2 1348 else {
3280af22 1349 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1350 PL_lex_repl = Nullsv;
1351 }
79072805 1352 return ',';
ffed7fef
LW
1353 }
1354 else {
f46d017c 1355 LEAVE;
3280af22
NIS
1356 PL_bufend = SvPVX(PL_linestr);
1357 PL_bufend += SvCUR(PL_linestr);
1358 PL_expect = XOPERATOR;
09bef843 1359 PL_sublex_info.sub_inwhat = 0;
79072805 1360 return ')';
ffed7fef
LW
1361 }
1362}
1363
02aa26ce
NT
1364/*
1365 scan_const
1366
1367 Extracts a pattern, double-quoted string, or transliteration. This
1368 is terrifying code.
1369
3280af22
NIS
1370 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1371 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1372 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1373
9b599b2a
GS
1374 Returns a pointer to the character scanned up to. Iff this is
1375 advanced from the start pointer supplied (ie if anything was
1376 successfully parsed), will leave an OP for the substring scanned
1377 in yylval. Caller must intuit reason for not parsing further
1378 by looking at the next characters herself.
1379
02aa26ce
NT
1380 In patterns:
1381 backslashes:
1382 double-quoted style: \r and \n
1383 regexp special ones: \D \s
1384 constants: \x3
1385 backrefs: \1 (deprecated in substitution replacements)
1386 case and quoting: \U \Q \E
1387 stops on @ and $, but not for $ as tail anchor
1388
1389 In transliterations:
1390 characters are VERY literal, except for - not at the start or end
1391 of the string, which indicates a range. scan_const expands the
1392 range to the full set of intermediate characters.
1393
1394 In double-quoted strings:
1395 backslashes:
1396 double-quoted style: \r and \n
1397 constants: \x3
1398 backrefs: \1 (deprecated)
1399 case and quoting: \U \Q \E
1400 stops on @ and $
1401
1402 scan_const does *not* construct ops to handle interpolated strings.
1403 It stops processing as soon as it finds an embedded $ or @ variable
1404 and leaves it to the caller to work out what's going on.
1405
da6eedaa 1406 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1407
1408 $ in pattern could be $foo or could be tail anchor. Assumption:
1409 it's a tail anchor if $ is the last thing in the string, or if it's
1410 followed by one of ")| \n\t"
1411
1412 \1 (backreferences) are turned into $1
1413
1414 The structure of the code is
1415 while (there's a character to process) {
1416 handle transliteration ranges
1417 skip regexp comments
1418 skip # initiated comments in //x patterns
1419 check for embedded @foo
1420 check for embedded scalars
1421 if (backslash) {
1422 leave intact backslashes from leave (below)
1423 deprecate \1 in strings and sub replacements
1424 handle string-changing backslashes \l \U \Q \E, etc.
1425 switch (what was escaped) {
1426 handle - in a transliteration (becomes a literal -)
1427 handle \132 octal characters
1428 handle 0x15 hex characters
1429 handle \cV (control V)
1430 handle printf backslashes (\f, \r, \n, etc)
1431 } (end switch)
1432 } (end if backslash)
1433 } (end while character to read)
4e553d73 1434
02aa26ce
NT
1435*/
1436
76e3520e 1437STATIC char *
cea2e8a9 1438S_scan_const(pTHX_ char *start)
79072805 1439{
97aff369 1440 dVAR;
3280af22 1441 register char *send = PL_bufend; /* end of the constant */
561b68a9 1442 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1443 register char *s = start; /* start of the constant */
1444 register char *d = SvPVX(sv); /* destination for copies */
1445 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1446 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1447 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1448 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1449 UV uv;
4c3a8340
ST
1450#ifdef EBCDIC
1451 UV literal_endpoint = 0;
1452#endif
012bcf8d 1453
dff6d3cd 1454 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1455 PL_lex_inpat
b6d5fef8 1456 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1457 : "";
79072805 1458
2b9d42f0
NIS
1459 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1460 /* If we are doing a trans and we know we want UTF8 set expectation */
1461 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1462 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1463 }
1464
1465
79072805 1466 while (s < send || dorange) {
02aa26ce 1467 /* get transliterations out of the way (they're most literal) */
3280af22 1468 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1469 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1470 if (dorange) {
1ba5c669
JH
1471 I32 i; /* current expanded character */
1472 I32 min; /* first character in range */
1473 I32 max; /* last character in range */
02aa26ce 1474
2b9d42f0 1475 if (has_utf8) {
9d4ba2ae 1476 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1477 char *e = d++;
1478 while (e-- > c)
1479 *(e + 1) = *e;
25716404 1480 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1481 /* mark the range as done, and continue */
1482 dorange = FALSE;
1483 didrange = TRUE;
1484 continue;
1485 }
2b9d42f0 1486
95a20fc0 1487 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1488 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1489 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1490 d -= 2; /* eat the first char and the - */
1491
8ada0baa
JH
1492 min = (U8)*d; /* first char in range */
1493 max = (U8)d[1]; /* last char in range */
1494
c2e66d9e 1495 if (min > max) {
01ec43d0 1496 Perl_croak(aTHX_
d1573ac7 1497 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1498 (char)min, (char)max);
c2e66d9e
GS
1499 }
1500
c7f1f016 1501#ifdef EBCDIC
4c3a8340
ST
1502 if (literal_endpoint == 2 &&
1503 ((isLOWER(min) && isLOWER(max)) ||
1504 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1505 if (isLOWER(min)) {
1506 for (i = min; i <= max; i++)
1507 if (isLOWER(i))
db42d148 1508 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1509 } else {
1510 for (i = min; i <= max; i++)
1511 if (isUPPER(i))
db42d148 1512 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1513 }
1514 }
1515 else
1516#endif
1517 for (i = min; i <= max; i++)
eb160463 1518 *d++ = (char)i;
02aa26ce
NT
1519
1520 /* mark the range as done, and continue */
79072805 1521 dorange = FALSE;
01ec43d0 1522 didrange = TRUE;
4c3a8340
ST
1523#ifdef EBCDIC
1524 literal_endpoint = 0;
1525#endif
79072805 1526 continue;
4e553d73 1527 }
02aa26ce
NT
1528
1529 /* range begins (ignore - as first or last char) */
79072805 1530 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1531 if (didrange) {
1fafa243 1532 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1533 }
2b9d42f0 1534 if (has_utf8) {
25716404 1535 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1536 s++;
1537 continue;
1538 }
79072805
LW
1539 dorange = TRUE;
1540 s++;
01ec43d0
GS
1541 }
1542 else {
1543 didrange = FALSE;
4c3a8340
ST
1544#ifdef EBCDIC
1545 literal_endpoint = 0;
1546#endif
01ec43d0 1547 }
79072805 1548 }
02aa26ce
NT
1549
1550 /* if we get here, we're not doing a transliteration */
1551
0f5d15d6
IZ
1552 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1553 except for the last char, which will be done separately. */
3280af22 1554 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1555 if (s[2] == '#') {
e994fd66 1556 while (s+1 < send && *s != ')')
db42d148 1557 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1558 }
1559 else if (s[2] == '{' /* This should match regcomp.c */
1560 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1561 {
cc6b7395 1562 I32 count = 1;
0f5d15d6 1563 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1564 char c;
1565
d9f97599
GS
1566 while (count && (c = *regparse)) {
1567 if (c == '\\' && regparse[1])
1568 regparse++;
4e553d73 1569 else if (c == '{')
cc6b7395 1570 count++;
4e553d73 1571 else if (c == '}')
cc6b7395 1572 count--;
d9f97599 1573 regparse++;
cc6b7395 1574 }
e994fd66 1575 if (*regparse != ')')
5bdf89e7 1576 regparse--; /* Leave one char for continuation. */
0f5d15d6 1577 while (s < regparse)
db42d148 1578 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1579 }
748a9306 1580 }
02aa26ce
NT
1581
1582 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1583 else if (*s == '#' && PL_lex_inpat &&
1584 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1585 while (s+1 < send && *s != '\n')
db42d148 1586 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1587 }
02aa26ce 1588
5d1d4326 1589 /* check for embedded arrays
da6eedaa 1590 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1591 */
7e2040f0 1592 else if (*s == '@' && s[1]
5d1d4326 1593 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1594 break;
02aa26ce
NT
1595
1596 /* check for embedded scalars. only stop if we're sure it's a
1597 variable.
1598 */
79072805 1599 else if (*s == '$') {
3280af22 1600 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1601 break;
6002328a 1602 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1603 break; /* in regexp, $ might be tail anchor */
1604 }
02aa26ce 1605
2b9d42f0
NIS
1606 /* End of else if chain - OP_TRANS rejoin rest */
1607
02aa26ce 1608 /* backslashes */
79072805
LW
1609 if (*s == '\\' && s+1 < send) {
1610 s++;
02aa26ce
NT
1611
1612 /* some backslashes we leave behind */
c9f97d15 1613 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1614 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1615 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1616 continue;
1617 }
02aa26ce
NT
1618
1619 /* deprecate \1 in strings and substitution replacements */
3280af22 1620 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1621 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1622 {
599cee73 1623 if (ckWARN(WARN_SYNTAX))
9014280d 1624 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1625 *--s = '$';
1626 break;
1627 }
02aa26ce
NT
1628
1629 /* string-change backslash escapes */
3280af22 1630 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1631 --s;
1632 break;
1633 }
02aa26ce
NT
1634
1635 /* if we get here, it's either a quoted -, or a digit */
79072805 1636 switch (*s) {
02aa26ce
NT
1637
1638 /* quoted - in transliterations */
79072805 1639 case '-':
3280af22 1640 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1641 *d++ = *s++;
1642 continue;
1643 }
1644 /* FALL THROUGH */
1645 default:
11b8faa4 1646 {
041457d9
DM
1647 if (isALNUM(*s) &&
1648 *s != '_' &&
1649 ckWARN(WARN_MISC))
9014280d 1650 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1651 "Unrecognized escape \\%c passed through",
1652 *s);
1653 /* default action is to copy the quoted character */
f9a63242 1654 goto default_action;
11b8faa4 1655 }
02aa26ce
NT
1656
1657 /* \132 indicates an octal constant */
79072805
LW
1658 case '0': case '1': case '2': case '3':
1659 case '4': case '5': case '6': case '7':
ba210ebe 1660 {
53305cf1
NC
1661 I32 flags = 0;
1662 STRLEN len = 3;
1663 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1664 s += len;
1665 }
012bcf8d 1666 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1667
1668 /* \x24 indicates a hex constant */
79072805 1669 case 'x':
a0ed51b3
LW
1670 ++s;
1671 if (*s == '{') {
9d4ba2ae 1672 char* const e = strchr(s, '}');
a4c04bdc
NC
1673 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1674 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1675 STRLEN len;
355860ce 1676
53305cf1 1677 ++s;
adaeee49 1678 if (!e) {
a0ed51b3 1679 yyerror("Missing right brace on \\x{}");
355860ce 1680 continue;
ba210ebe 1681 }
53305cf1
NC
1682 len = e - s;
1683 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1684 s = e + 1;
a0ed51b3
LW
1685 }
1686 else {
ba210ebe 1687 {
53305cf1 1688 STRLEN len = 2;
a4c04bdc 1689 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1690 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1691 s += len;
1692 }
012bcf8d
GS
1693 }
1694
1695 NUM_ESCAPE_INSERT:
1696 /* Insert oct or hex escaped character.
301d3d20 1697 * There will always enough room in sv since such
db42d148 1698 * escapes will be longer than any UTF-8 sequence
301d3d20 1699 * they can end up as. */
ba7cea30 1700
c7f1f016
NIS
1701 /* We need to map to chars to ASCII before doing the tests
1702 to cover EBCDIC
1703 */
c4d5f83a 1704 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1705 if (!has_utf8 && uv > 255) {
301d3d20
JH
1706 /* Might need to recode whatever we have
1707 * accumulated so far if it contains any
1708 * hibit chars.
1709 *
1710 * (Can't we keep track of that and avoid
1711 * this rescan? --jhi)
012bcf8d 1712 */
c7f1f016 1713 int hicount = 0;
63cd0674
NIS
1714 U8 *c;
1715 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1716 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1717 hicount++;
db42d148 1718 }
012bcf8d 1719 }
63cd0674 1720 if (hicount) {
9d4ba2ae 1721 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1722 U8 *src, *dst;
1723 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1724 src = (U8 *)d - 1;
1725 dst = src+hicount;
1726 d += hicount;
cfd0369c 1727 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1728 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1729 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1730 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1731 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1732 }
1733 else {
63cd0674 1734 *dst-- = *src;
012bcf8d 1735 }
c7f1f016 1736 src--;
012bcf8d
GS
1737 }
1738 }
1739 }
1740
9aa983d2 1741 if (has_utf8 || uv > 255) {
9041c2e3 1742 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1743 has_utf8 = TRUE;
f9a63242
JH
1744 if (PL_lex_inwhat == OP_TRANS &&
1745 PL_sublex_info.sub_op) {
1746 PL_sublex_info.sub_op->op_private |=
1747 (PL_lex_repl ? OPpTRANS_FROM_UTF
1748 : OPpTRANS_TO_UTF);
f9a63242 1749 }
012bcf8d 1750 }
a0ed51b3 1751 else {
012bcf8d 1752 *d++ = (char)uv;
a0ed51b3 1753 }
012bcf8d
GS
1754 }
1755 else {
c4d5f83a 1756 *d++ = (char) uv;
a0ed51b3 1757 }
79072805 1758 continue;
02aa26ce 1759
b239daa5 1760 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1761 case 'N':
55eda711 1762 ++s;
423cee85
JH
1763 if (*s == '{') {
1764 char* e = strchr(s, '}');
155aba94 1765 SV *res;
423cee85 1766 STRLEN len;
cfd0369c 1767 const char *str;
4e553d73 1768
423cee85 1769 if (!e) {
5777a3f7 1770 yyerror("Missing right brace on \\N{}");
423cee85
JH
1771 e = s - 1;
1772 goto cont_scan;
1773 }
dbc0d4f2
JH
1774 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1775 /* \N{U+...} */
1776 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1777 PERL_SCAN_DISALLOW_PREFIX;
1778 s += 3;
1779 len = e - s;
1780 uv = grok_hex(s, &len, &flags, NULL);
1781 s = e + 1;
1782 goto NUM_ESCAPE_INSERT;
1783 }
55eda711
JH
1784 res = newSVpvn(s + 1, e - s - 1);
1785 res = new_constant( Nullch, 0, "charnames",
1786 res, Nullsv, "\\N{...}" );
f9a63242
JH
1787 if (has_utf8)
1788 sv_utf8_upgrade(res);
cfd0369c 1789 str = SvPV_const(res,len);
1c47067b
JH
1790#ifdef EBCDIC_NEVER_MIND
1791 /* charnames uses pack U and that has been
1792 * recently changed to do the below uni->native
1793 * mapping, so this would be redundant (and wrong,
1794 * the code point would be doubly converted).
1795 * But leave this in just in case the pack U change
1796 * gets revoked, but the semantics is still
1797 * desireable for charnames. --jhi */
cddc7ef4 1798 {
cfd0369c 1799 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1800
1801 if (uv < 0x100) {
89ebb4a3 1802 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1803
1804 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1805 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1806 str = SvPV_const(res, len);
cddc7ef4
JH
1807 }
1808 }
1809#endif
89491803 1810 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1811 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1812 SvCUR_set(sv, d - ostart);
1813 SvPOK_on(sv);
e4f3eed8 1814 *d = '\0';
f08d6ad9 1815 sv_utf8_upgrade(sv);
d2f449dd 1816 /* this just broke our allocation above... */
eb160463 1817 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1818 d = SvPVX(sv) + SvCUR(sv);
89491803 1819 has_utf8 = TRUE;
f08d6ad9 1820 }
eb160463 1821 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1822 const char * const odest = SvPVX_const(sv);
423cee85 1823
8973db79 1824 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1825 d = SvPVX(sv) + (d - odest);
1826 }
1827 Copy(str, d, len, char);
1828 d += len;
1829 SvREFCNT_dec(res);
1830 cont_scan:
1831 s = e + 1;
1832 }
1833 else
5777a3f7 1834 yyerror("Missing braces on \\N{}");
423cee85
JH
1835 continue;
1836
02aa26ce 1837 /* \c is a control character */
79072805
LW
1838 case 'c':
1839 s++;
961ce445 1840 if (s < send) {
ba210ebe 1841 U8 c = *s++;
c7f1f016
NIS
1842#ifdef EBCDIC
1843 if (isLOWER(c))
1844 c = toUPPER(c);
1845#endif
db42d148 1846 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1847 }
961ce445
RGS
1848 else {
1849 yyerror("Missing control char name in \\c");
1850 }
79072805 1851 continue;
02aa26ce
NT
1852
1853 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1854 case 'b':
db42d148 1855 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1856 break;
1857 case 'n':
db42d148 1858 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1859 break;
1860 case 'r':
db42d148 1861 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1862 break;
1863 case 'f':
db42d148 1864 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1865 break;
1866 case 't':
db42d148 1867 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1868 break;
34a3fe2a 1869 case 'e':
db42d148 1870 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1871 break;
1872 case 'a':
db42d148 1873 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1874 break;
02aa26ce
NT
1875 } /* end switch */
1876
79072805
LW
1877 s++;
1878 continue;
02aa26ce 1879 } /* end if (backslash) */
4c3a8340
ST
1880#ifdef EBCDIC
1881 else
1882 literal_endpoint++;
1883#endif
02aa26ce 1884
f9a63242 1885 default_action:
2b9d42f0
NIS
1886 /* If we started with encoded form, or already know we want it
1887 and then encode the next character */
1888 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1889 STRLEN len = 1;
9d4ba2ae
AL
1890 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1891 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1892 s += len;
1893 if (need > len) {
1894 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1895 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1896 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1897 }
1898 d = (char*)uvchr_to_utf8((U8*)d, uv);
1899 has_utf8 = TRUE;
1900 }
1901 else {
1902 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1903 }
02aa26ce
NT
1904 } /* while loop to process each character */
1905
1906 /* terminate the string and set up the sv */
79072805 1907 *d = '\0';
95a20fc0 1908 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1909 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1910 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1911
79072805 1912 SvPOK_on(sv);
9f4817db 1913 if (PL_encoding && !has_utf8) {
d0063567
DK
1914 sv_recode_to_utf8(sv, PL_encoding);
1915 if (SvUTF8(sv))
1916 has_utf8 = TRUE;
9f4817db 1917 }
2b9d42f0 1918 if (has_utf8) {
7e2040f0 1919 SvUTF8_on(sv);
2b9d42f0 1920 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1921 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1922 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1923 }
1924 }
79072805 1925
02aa26ce 1926 /* shrink the sv if we allocated more than we used */
79072805 1927 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1928 SvPV_shrink_to_cur(sv);
79072805 1929 }
02aa26ce 1930
9b599b2a 1931 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1932 if (s > PL_bufptr) {
1933 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1934 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1935 sv, Nullsv,
4e553d73 1936 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1937 ? "tr"
3280af22 1938 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1939 ? "s"
1940 : "qq")));
79072805 1941 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1942 } else
8990e307 1943 SvREFCNT_dec(sv);
79072805
LW
1944 return s;
1945}
1946
ffb4593c
NT
1947/* S_intuit_more
1948 * Returns TRUE if there's more to the expression (e.g., a subscript),
1949 * FALSE otherwise.
ffb4593c
NT
1950 *
1951 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1952 *
1953 * ->[ and ->{ return TRUE
1954 * { and [ outside a pattern are always subscripts, so return TRUE
1955 * if we're outside a pattern and it's not { or [, then return FALSE
1956 * if we're in a pattern and the first char is a {
1957 * {4,5} (any digits around the comma) returns FALSE
1958 * if we're in a pattern and the first char is a [
1959 * [] returns FALSE
1960 * [SOMETHING] has a funky algorithm to decide whether it's a
1961 * character class or not. It has to deal with things like
1962 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1963 * anything else returns TRUE
1964 */
1965
9cbb5ea2
GS
1966/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1967
76e3520e 1968STATIC int
cea2e8a9 1969S_intuit_more(pTHX_ register char *s)
79072805 1970{
97aff369 1971 dVAR;
3280af22 1972 if (PL_lex_brackets)
79072805
LW
1973 return TRUE;
1974 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1975 return TRUE;
1976 if (*s != '{' && *s != '[')
1977 return FALSE;
3280af22 1978 if (!PL_lex_inpat)
79072805
LW
1979 return TRUE;
1980
1981 /* In a pattern, so maybe we have {n,m}. */
1982 if (*s == '{') {
1983 s++;
1984 if (!isDIGIT(*s))
1985 return TRUE;
1986 while (isDIGIT(*s))
1987 s++;
1988 if (*s == ',')
1989 s++;
1990 while (isDIGIT(*s))
1991 s++;
1992 if (*s == '}')
1993 return FALSE;
1994 return TRUE;
1995
1996 }
1997
1998 /* On the other hand, maybe we have a character class */
1999
2000 s++;
2001 if (*s == ']' || *s == '^')
2002 return FALSE;
2003 else {
ffb4593c 2004 /* this is terrifying, and it works */
79072805
LW
2005 int weight = 2; /* let's weigh the evidence */
2006 char seen[256];
f27ffc4a 2007 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2008 const char * const send = strchr(s,']');
3280af22 2009 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2010
2011 if (!send) /* has to be an expression */
2012 return TRUE;
2013
2014 Zero(seen,256,char);
2015 if (*s == '$')
2016 weight -= 3;
2017 else if (isDIGIT(*s)) {
2018 if (s[1] != ']') {
2019 if (isDIGIT(s[1]) && s[2] == ']')
2020 weight -= 10;
2021 }
2022 else
2023 weight -= 100;
2024 }
2025 for (; s < send; s++) {
2026 last_un_char = un_char;
2027 un_char = (unsigned char)*s;
2028 switch (*s) {
2029 case '@':
2030 case '&':
2031 case '$':
2032 weight -= seen[un_char] * 10;
7e2040f0 2033 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 2034 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
f776e3cd
NC
2035 if ((int)strlen(tmpbuf) > 1
2036 && gv_fetchpv(tmpbuf, 0, SVt_PV))
79072805
LW
2037 weight -= 100;
2038 else
2039 weight -= 10;
2040 }
2041 else if (*s == '$' && s[1] &&
93a17b20
LW
2042 strchr("[#!%*<>()-=",s[1])) {
2043 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2044 weight -= 10;
2045 else
2046 weight -= 1;
2047 }
2048 break;
2049 case '\\':
2050 un_char = 254;
2051 if (s[1]) {
93a17b20 2052 if (strchr("wds]",s[1]))
79072805
LW
2053 weight += 100;
2054 else if (seen['\''] || seen['"'])
2055 weight += 1;
93a17b20 2056 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2057 weight += 40;
2058 else if (isDIGIT(s[1])) {
2059 weight += 40;
2060 while (s[1] && isDIGIT(s[1]))
2061 s++;
2062 }
2063 }
2064 else
2065 weight += 100;
2066 break;
2067 case '-':
2068 if (s[1] == '\\')
2069 weight += 50;
93a17b20 2070 if (strchr("aA01! ",last_un_char))
79072805 2071 weight += 30;
93a17b20 2072 if (strchr("zZ79~",s[1]))
79072805 2073 weight += 30;
f27ffc4a
GS
2074 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2075 weight -= 5; /* cope with negative subscript */
79072805
LW
2076 break;
2077 default:
3792a11b
NC
2078 if (!isALNUM(last_un_char)
2079 && !(last_un_char == '$' || last_un_char == '@'
2080 || last_un_char == '&')
2081 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2082 char *d = tmpbuf;
2083 while (isALPHA(*s))
2084 *d++ = *s++;
2085 *d = '\0';
2086 if (keyword(tmpbuf, d - tmpbuf))
2087 weight -= 150;
2088 }
2089 if (un_char == last_un_char + 1)
2090 weight += 5;
2091 weight -= seen[un_char];
2092 break;
2093 }
2094 seen[un_char]++;
2095 }
2096 if (weight >= 0) /* probably a character class */
2097 return FALSE;
2098 }
2099
2100 return TRUE;
2101}
ffed7fef 2102
ffb4593c
NT
2103/*
2104 * S_intuit_method
2105 *
2106 * Does all the checking to disambiguate
2107 * foo bar
2108 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2109 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2110 *
2111 * First argument is the stuff after the first token, e.g. "bar".
2112 *
2113 * Not a method if bar is a filehandle.
2114 * Not a method if foo is a subroutine prototyped to take a filehandle.
2115 * Not a method if it's really "Foo $bar"
2116 * Method if it's "foo $bar"
2117 * Not a method if it's really "print foo $bar"
2118 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2119 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2120 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2121 * =>
2122 */
2123
76e3520e 2124STATIC int
62d55b22 2125S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2126{
97aff369 2127 dVAR;
a0d0e21e 2128 char *s = start + (*start == '$');
3280af22 2129 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2130 STRLEN len;
2131 GV* indirgv;
2132
2133 if (gv) {
62d55b22 2134 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2135 return 0;
62d55b22
NC
2136 if (cv) {
2137 if (SvPOK(cv)) {
2138 const char *proto = SvPVX_const(cv);
2139 if (proto) {
2140 if (*proto == ';')
2141 proto++;
2142 if (*proto == '*')
2143 return 0;
2144 }
b6c543e3
IZ
2145 }
2146 } else
a0d0e21e
LW
2147 gv = 0;
2148 }
8903cb82 2149 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2150 /* start is the beginning of the possible filehandle/object,
2151 * and s is the end of it
2152 * tmpbuf is a copy of it
2153 */
2154
a0d0e21e 2155 if (*start == '$') {
3280af22 2156 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2157 return 0;
2158 s = skipspace(s);
3280af22
NIS
2159 PL_bufptr = start;
2160 PL_expect = XREF;
a0d0e21e
LW
2161 return *s == '(' ? FUNCMETH : METHOD;
2162 }
2163 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2164 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2165 len -= 2;
2166 tmpbuf[len] = '\0';
2167 goto bare_package;
2168 }
f776e3cd 2169 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
8ebc5c01 2170 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2171 return 0;
2172 /* filehandle or package name makes it a method */
89bfa8cd 2173 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2174 s = skipspace(s);
3280af22 2175 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2176 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2177 bare_package:
3280af22 2178 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2179 newSVpvn(tmpbuf,len));
3280af22
NIS
2180 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2181 PL_expect = XTERM;
a0d0e21e 2182 force_next(WORD);
3280af22 2183 PL_bufptr = s;
a0d0e21e
LW
2184 return *s == '(' ? FUNCMETH : METHOD;
2185 }
2186 }
2187 return 0;
2188}
2189
ffb4593c
NT
2190/*
2191 * S_incl_perldb
2192 * Return a string of Perl code to load the debugger. If PERL5DB
2193 * is set, it will return the contents of that, otherwise a
2194 * compile-time require of perl5db.pl.
2195 */
2196
bfed75c6 2197STATIC const char*
cea2e8a9 2198S_incl_perldb(pTHX)
a0d0e21e 2199{
97aff369 2200 dVAR;
3280af22 2201 if (PL_perldb) {
9d4ba2ae 2202 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2203
2204 if (pdb)
2205 return pdb;
93189314 2206 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2207 return "BEGIN { require 'perl5db.pl' }";
2208 }
2209 return "";
2210}
2211
2212
16d20bd9 2213/* Encoded script support. filter_add() effectively inserts a
4e553d73 2214 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2215 * Note that the filter function only applies to the current source file
2216 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2217 *
2218 * The datasv parameter (which may be NULL) can be used to pass
2219 * private data to this instance of the filter. The filter function
2220 * can recover the SV using the FILTER_DATA macro and use it to
2221 * store private buffers and state information.
2222 *
2223 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2224 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2225 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2226 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2227 * private use must be set using malloc'd pointers.
2228 */
16d20bd9
AD
2229
2230SV *
864dbfa3 2231Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2232{
97aff369 2233 dVAR;
f4c556ac
GS
2234 if (!funcp)
2235 return Nullsv;
2236
3280af22
NIS
2237 if (!PL_rsfp_filters)
2238 PL_rsfp_filters = newAV();
16d20bd9 2239 if (!datasv)
561b68a9 2240 datasv = newSV(0);
862a34c6 2241 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2242 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2243 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2244 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2245 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2246 av_unshift(PL_rsfp_filters, 1);
2247 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2248 return(datasv);
2249}
4e553d73 2250
16d20bd9
AD
2251
2252/* Delete most recently added instance of this filter function. */
a0d0e21e 2253void
864dbfa3 2254Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2255{
97aff369 2256 dVAR;
e0c19803 2257 SV *datasv;
24801a4b 2258
33073adb 2259#ifdef DEBUGGING
8141890a 2260 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2261#endif
3280af22 2262 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2263 return;
2264 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2265 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2266 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2267 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2268 IoANY(datasv) = (void *)NULL;
3280af22 2269 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2270
16d20bd9
AD
2271 return;
2272 }
2273 /* we need to search for the correct entry and clear it */
cea2e8a9 2274 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2275}
2276
2277
1de9afcd
RGS
2278/* Invoke the idxth filter function for the current rsfp. */
2279/* maxlen 0 = read one text line */
16d20bd9 2280I32
864dbfa3 2281Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2282{
97aff369 2283 dVAR;
16d20bd9
AD
2284 filter_t funcp;
2285 SV *datasv = NULL;
e50aee73 2286
3280af22 2287 if (!PL_rsfp_filters)
16d20bd9 2288 return -1;
1de9afcd 2289 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2290 /* Provide a default input filter to make life easy. */
2291 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2292 DEBUG_P(PerlIO_printf(Perl_debug_log,
2293 "filter_read %d: from rsfp\n", idx));
4e553d73 2294 if (maxlen) {
16d20bd9
AD
2295 /* Want a block */
2296 int len ;
f54cb97a 2297 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2298
2299 /* ensure buf_sv is large enough */
eb160463 2300 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2301 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2302 if (PerlIO_error(PL_rsfp))
37120919
AD
2303 return -1; /* error */
2304 else
2305 return 0 ; /* end of file */
2306 }
16d20bd9
AD
2307 SvCUR_set(buf_sv, old_len + len) ;
2308 } else {
2309 /* Want a line */
3280af22
NIS
2310 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2311 if (PerlIO_error(PL_rsfp))
37120919
AD
2312 return -1; /* error */
2313 else
2314 return 0 ; /* end of file */
2315 }
16d20bd9
AD
2316 }
2317 return SvCUR(buf_sv);
2318 }
2319 /* Skip this filter slot if filter has been deleted */
1de9afcd 2320 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2321 DEBUG_P(PerlIO_printf(Perl_debug_log,
2322 "filter_read %d: skipped (filter deleted)\n",
2323 idx));
16d20bd9
AD
2324 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2325 }
2326 /* Get function pointer hidden within datasv */
8141890a 2327 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2328 DEBUG_P(PerlIO_printf(Perl_debug_log,
2329 "filter_read %d: via function %p (%s)\n",
cfd0369c 2330 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2331 /* Call function. The function is expected to */
2332 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2333 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2334 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2335}
2336
76e3520e 2337STATIC char *
cea2e8a9 2338S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2339{
97aff369 2340 dVAR;
c39cd008 2341#ifdef PERL_CR_FILTER
3280af22 2342 if (!PL_rsfp_filters) {
c39cd008 2343 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2344 }
2345#endif
3280af22 2346 if (PL_rsfp_filters) {
55497cff
PP
2347 if (!append)
2348 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2349 if (FILTER_READ(0, sv, 0) > 0)
2350 return ( SvPVX(sv) ) ;
2351 else
2352 return Nullch ;
2353 }
9d116dd7 2354 else
fd049845 2355 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2356}
2357
01ec43d0 2358STATIC HV *
7fc63493 2359S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2360{
97aff369 2361 dVAR;
def3634b
GS
2362 GV *gv;
2363
01ec43d0 2364 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2365 return PL_curstash;
2366
2367 if (len > 2 &&
2368 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
f776e3cd 2369 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
01ec43d0
GS
2370 {
2371 return GvHV(gv); /* Foo:: */
def3634b
GS
2372 }
2373
2374 /* use constant CLASS => 'MyClass' */
f776e3cd 2375 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
def3634b
GS
2376 SV *sv;
2377 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2378 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2379 }
2380 }
2381
2382 return gv_stashpv(pkgname, FALSE);
2383}
a0d0e21e 2384
468aa647 2385STATIC char *
cc6ed77d 2386S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2387 dVAR;
468aa647
RGS
2388 if (PL_expect != XSTATE)
2389 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2390 is_use ? "use" : "no"));
2391 s = skipspace(s);
2392 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2393 s = force_version(s, TRUE);
2394 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2395 PL_nextval[PL_nexttoke].opval = Nullop;
2396 force_next(WORD);
2397 }
2398 else if (*s == 'v') {
2399 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2400 s = force_version(s, FALSE);
2401 }
2402 }
2403 else {
2404 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2405 s = force_version(s, FALSE);
2406 }
2407 yylval.ival = is_use;
2408 return s;
2409}
748a9306 2410#ifdef DEBUGGING
27da23d5 2411 static const char* const exp_name[] =
09bef843 2412 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2413 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2414 };
748a9306 2415#endif
463ee0b2 2416
02aa26ce
NT
2417/*
2418 yylex
2419
2420 Works out what to call the token just pulled out of the input
2421 stream. The yacc parser takes care of taking the ops we return and
2422 stitching them into a tree.
2423
2424 Returns:
2425 PRIVATEREF
2426
2427 Structure:
2428 if read an identifier
2429 if we're in a my declaration
2430 croak if they tried to say my($foo::bar)
2431 build the ops for a my() declaration
2432 if it's an access to a my() variable
2433 are we in a sort block?
2434 croak if my($a); $a <=> $b
2435 build ops for access to a my() variable
2436 if in a dq string, and they've said @foo and we can't find @foo
2437 croak
2438 build ops for a bareword
2439 if we already built the token before, use it.
2440*/
2441
20141f0e 2442
dba4d153
JH
2443#ifdef __SC__
2444#pragma segment Perl_yylex
2445#endif
dba4d153 2446int
dba4d153 2447Perl_yylex(pTHX)
20141f0e 2448{
97aff369 2449 dVAR;
3afc138a 2450 register char *s = PL_bufptr;
378cc40b 2451 register char *d;
463ee0b2 2452 STRLEN len;
aa7440fb 2453 bool bof = FALSE;
a687059c 2454
bbf60fe6 2455 DEBUG_T( {
396482e1 2456 SV* tmp = newSVpvs("");
b6007c36
DM
2457 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2458 (IV)CopLINE(PL_curcop),
2459 lex_state_names[PL_lex_state],
2460 exp_name[PL_expect],
2461 pv_display(tmp, s, strlen(s), 0, 60));
2462 SvREFCNT_dec(tmp);
bbf60fe6 2463 } );
02aa26ce 2464 /* check if there's an identifier for us to look at */
ba979b31 2465 if (PL_pending_ident)
bbf60fe6 2466 return REPORT(S_pending_ident(aTHX));
bbce6d69 2467
02aa26ce
NT
2468 /* no identifier pending identification */
2469
3280af22 2470 switch (PL_lex_state) {
79072805
LW
2471#ifdef COMMENTARY
2472 case LEX_NORMAL: /* Some compilers will produce faster */
2473 case LEX_INTERPNORMAL: /* code if we comment these out. */
2474 break;
2475#endif
2476
09bef843 2477 /* when we've already built the next token, just pull it out of the queue */
79072805 2478 case LEX_KNOWNEXT:
3280af22
NIS
2479 PL_nexttoke--;
2480 yylval = PL_nextval[PL_nexttoke];
2481 if (!PL_nexttoke) {
2482 PL_lex_state = PL_lex_defer;
2483 PL_expect = PL_lex_expect;
2484 PL_lex_defer = LEX_NORMAL;
463ee0b2 2485 }
bbf60fe6 2486 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2487
02aa26ce 2488 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2489 when we get here, PL_bufptr is at the \
02aa26ce 2490 */
79072805
LW
2491 case LEX_INTERPCASEMOD:
2492#ifdef DEBUGGING
3280af22 2493 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2494 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2495#endif
02aa26ce 2496 /* handle \E or end of string */
3280af22 2497 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2498 /* if at a \E */
3280af22 2499 if (PL_lex_casemods) {
f54cb97a 2500 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2501 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2502
3792a11b
NC
2503 if (PL_bufptr != PL_bufend
2504 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2505 PL_bufptr += 2;
2506 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2507 }
bbf60fe6 2508 return REPORT(')');
79072805 2509 }
3280af22
NIS
2510 if (PL_bufptr != PL_bufend)
2511 PL_bufptr += 2;
2512 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2513 return yylex();
79072805
LW
2514 }
2515 else {
607df283 2516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2517 "### Saw case modifier\n"); });
3280af22 2518 s = PL_bufptr + 1;
6e909404
JH
2519 if (s[1] == '\\' && s[2] == 'E') {
2520 PL_bufptr = s + 3;
2521 PL_lex_state = LEX_INTERPCONCAT;
2522 return yylex();
a0d0e21e 2523 }
6e909404 2524 else {
90771dc0 2525 I32 tmp;
6e909404
JH
2526 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2527 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2528 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2529 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2530 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2531 return REPORT(')');
6e909404
JH
2532 }
2533 if (PL_lex_casemods > 10)
2534 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2535 PL_lex_casestack[PL_lex_casemods++] = *s;
2536 PL_lex_casestack[PL_lex_casemods] = '\0';
2537 PL_lex_state = LEX_INTERPCONCAT;
2538 PL_nextval[PL_nexttoke].ival = 0;
2539 force_next('(');
2540 if (*s == 'l')
2541 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2542 else if (*s == 'u')
2543 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2544 else if (*s == 'L')
2545 PL_nextval[PL_nexttoke].ival = OP_LC;
2546 else if (*s == 'U')
2547 PL_nextval[PL_nexttoke].ival = OP_UC;
2548 else if (*s == 'Q')
2549 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2550 else
2551 Perl_croak(aTHX_ "panic: yylex");
2552 PL_bufptr = s + 1;
a0d0e21e 2553 }
79072805 2554 force_next(FUNC);
3280af22
NIS
2555 if (PL_lex_starts) {
2556 s = PL_bufptr;
2557 PL_lex_starts = 0;
131b3ad0
DM
2558 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2559 if (PL_lex_casemods == 1 && PL_lex_inpat)
2560 OPERATOR(',');
2561 else
2562 Aop(OP_CONCAT);
79072805
LW
2563 }
2564 else
cea2e8a9 2565 return yylex();
79072805
LW
2566 }
2567
55497cff 2568 case LEX_INTERPPUSH:
bbf60fe6 2569 return REPORT(sublex_push());
55497cff 2570
79072805 2571 case LEX_INTERPSTART:
3280af22 2572 if (PL_bufptr == PL_bufend)
bbf60fe6 2573 return REPORT(sublex_done());
607df283 2574 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2575 "### Interpolated variable\n"); });
3280af22
NIS
2576 PL_expect = XTERM;
2577 PL_lex_dojoin = (*PL_bufptr == '@');
2578 PL_lex_state = LEX_INTERPNORMAL;
2579 if (PL_lex_dojoin) {
2580 PL_nextval[PL_nexttoke].ival = 0;
79072805 2581 force_next(',');
a0d0e21e 2582 force_ident("\"", '$');
3280af22 2583 PL_nextval[PL_nexttoke].ival = 0;
79072805 2584 force_next('$');
3280af22 2585 PL_nextval[PL_nexttoke].ival = 0;
79072805 2586 force_next('(');
3280af22 2587 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2588 force_next(FUNC);
2589 }
3280af22
NIS
2590 if (PL_lex_starts++) {
2591 s = PL_bufptr;
131b3ad0
DM
2592 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2593 if (!PL_lex_casemods && PL_lex_inpat)
2594 OPERATOR(',');
2595 else
2596 Aop(OP_CONCAT);
79072805 2597 }
cea2e8a9 2598 return yylex();
79072805
LW
2599
2600 case LEX_INTERPENDMAYBE:
3280af22
NIS
2601 if (intuit_more(PL_bufptr)) {
2602 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2603 break;
2604 }
2605 /* FALL THROUGH */
2606
2607 case LEX_INTERPEND:
3280af22
NIS
2608 if (PL_lex_dojoin) {
2609 PL_lex_dojoin = FALSE;
2610 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2611 return REPORT(')');
79072805 2612 }
43a16006 2613 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2614 && SvEVALED(PL_lex_repl))
43a16006 2615 {
e9fa98b2 2616 if (PL_bufptr != PL_bufend)
cea2e8a9 2617 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2618 PL_lex_repl = Nullsv;
2619 }
79072805
LW
2620 /* FALLTHROUGH */
2621 case LEX_INTERPCONCAT:
2622#ifdef DEBUGGING
3280af22 2623 if (PL_lex_brackets)
cea2e8a9 2624 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2625#endif
3280af22 2626 if (PL_bufptr == PL_bufend)
bbf60fe6 2627 return REPORT(sublex_done());
79072805 2628
3280af22
NIS
2629 if (SvIVX(PL_linestr) == '\'') {
2630 SV *sv = newSVsv(PL_linestr);
2631 if (!PL_lex_inpat)
76e3520e 2632 sv = tokeq(sv);
3280af22 2633 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2634 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2635 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2636 s = PL_bufend;
79072805
LW
2637 }
2638 else {
3280af22 2639 s = scan_const(PL_bufptr);
79072805 2640 if (*s == '\\')
3280af22 2641 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2642 else
3280af22 2643 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2644 }
2645
3280af22
NIS
2646 if (s != PL_bufptr) {
2647 PL_nextval[PL_nexttoke] = yylval;
2648 PL_expect = XTERM;
79072805 2649 force_next(THING);
131b3ad0
DM
2650 if (PL_lex_starts++) {
2651 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2652 if (!PL_lex_casemods && PL_lex_inpat)
2653 OPERATOR(',');
2654 else
2655 Aop(OP_CONCAT);
2656 }
79072805 2657 else {
3280af22 2658 PL_bufptr = s;
cea2e8a9 2659 return yylex();
79072805
LW
2660 }
2661 }
2662
cea2e8a9 2663 return yylex();
a0d0e21e 2664 case LEX_FORMLINE:
3280af22
NIS
2665 PL_lex_state = LEX_NORMAL;
2666 s = scan_formline(PL_bufptr);
2667 if (!PL_lex_formbrack)
a0d0e21e
LW
2668 goto rightbracket;
2669 OPERATOR(';');
79072805
LW
2670 }
2671
3280af22
NIS
2672 s = PL_bufptr;
2673 PL_oldoldbufptr = PL_oldbufptr;
2674 PL_oldbufptr = s;
463ee0b2
LW
2675
2676 retry:
378cc40b
LW
2677 switch (*s) {
2678 default:
7e2040f0 2679 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2680 goto keylookup;
cea2e8a9 2681 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2682 case 4:
2683 case 26:
2684 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2685 case 0:
3280af22
NIS
2686 if (!PL_rsfp) {
2687 PL_last_uni = 0;
2688 PL_last_lop = 0;
c5ee2135 2689 if (PL_lex_brackets) {
0bd48802
AL
2690 yyerror(PL_lex_formbrack
2691 ? "Format not terminated"
2692 : "Missing right curly or square bracket");
c5ee2135 2693 }
4e553d73 2694 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2695 "### Tokener got EOF\n");
5f80b19c 2696 } );
79072805 2697 TOKEN(0);
463ee0b2 2698 }
3280af22 2699 if (s++ < PL_bufend)
a687059c 2700 goto retry; /* ignore stray nulls */
3280af22
NIS
2701 PL_last_uni = 0;
2702 PL_last_lop = 0;
2703 if (!PL_in_eval && !PL_preambled) {
2704 PL_preambled = TRUE;
2705 sv_setpv(PL_linestr,incl_perldb());
2706 if (SvCUR(PL_linestr))
396482e1 2707 sv_catpvs(PL_linestr,";");
3280af22
NIS
2708 if (PL_preambleav){
2709 while(AvFILLp(PL_preambleav) >= 0) {
2710 SV *tmpsv = av_shift(PL_preambleav);
2711 sv_catsv(PL_linestr, tmpsv);
396482e1 2712 sv_catpvs(PL_linestr, ";");
91b7def8
PP
2713 sv_free(tmpsv);
2714 }
3280af22
NIS
2715 sv_free((SV*)PL_preambleav);
2716 PL_preambleav = NULL;
91b7def8 2717 }
3280af22 2718 if (PL_minus_n || PL_minus_p) {
396482e1 2719 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2720 if (PL_minus_l)
396482e1 2721 sv_catpvs(PL_linestr,"chomp;");
3280af22 2722 if (PL_minus_a) {
3280af22 2723 if (PL_minus_F) {
3792a11b
NC
2724 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2725 || *PL_splitstr == '"')
3280af22 2726 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2727 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2728 else {
c8ef6a4b
NC
2729 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2730 bytes can be used as quoting characters. :-) */
dd374669 2731 const char *splits = PL_splitstr;
91d456ae 2732 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2733 do {
2734 /* Need to \ \s */
dd374669
AL
2735 if (*splits == '\\')
2736 sv_catpvn(PL_linestr, splits, 1);
2737 sv_catpvn(PL_linestr, splits, 1);
2738 } while (*splits++);
48c4c863
NC
2739 /* This loop will embed the trailing NUL of
2740 PL_linestr as the last thing it does before
2741 terminating. */
396482e1 2742 sv_catpvs(PL_linestr, ");");
54310121 2743 }
2304df62
AD
2744 }
2745 else
396482e1 2746 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2747 }
79072805 2748 }
bc9b29db 2749 if (PL_minus_E)
396482e1
GA
2750 sv_catpvs(PL_linestr,"use feature ':5.10';");
2751 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2752 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2753 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2754 PL_last_lop = PL_last_uni = Nullch;
3280af22 2755 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2756 SV * const sv = newSV(0);
a0d0e21e
LW
2757
2758 sv_upgrade(sv, SVt_PVMG);
3280af22 2759 sv_setsv(sv,PL_linestr);
0ac0412a 2760 (void)SvIOK_on(sv);
45977657 2761 SvIV_set(sv, 0);
36c7798d 2762 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2763 }
79072805 2764 goto retry;
a687059c 2765 }
e929a76b 2766 do {
aa7440fb 2767 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2768 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2769 fake_eof:
2770 if (PL_rsfp) {
2771 if (PL_preprocess && !PL_in_eval)
2772 (void)PerlProc_pclose(PL_rsfp);
2773 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2774 PerlIO_clearerr(PL_rsfp);
2775 else
2776 (void)PerlIO_close(PL_rsfp);
2777 PL_rsfp = Nullfp;
2778 PL_doextract = FALSE;
2779 }
2780 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2781 sv_setpv(PL_linestr,PL_minus_p
2782 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2783 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2785 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2786 PL_minus_n = PL_minus_p = 0;
2787 goto retry;
2788 }
2789 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2790 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2791 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2792 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2793 }
7aa207d6
JH
2794 /* If it looks like the start of a BOM or raw UTF-16,
2795 * check if it in fact is. */
2796 else if (bof &&
2797 (*s == 0 ||
2798 *(U8*)s == 0xEF ||
2799 *(U8*)s >= 0xFE ||
2800 s[1] == 0)) {
226017aa 2801#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2802# ifdef __GNU_LIBRARY__
2803# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2804# define FTELL_FOR_PIPE_IS_BROKEN
2805# endif
e3f494f1
JH
2806# else
2807# ifdef __GLIBC__
2808# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2809# define FTELL_FOR_PIPE_IS_BROKEN
2810# endif
2811# endif
226017aa
DD
2812# endif
2813#endif
2814#ifdef FTELL_FOR_PIPE_IS_BROKEN
2815 /* This loses the possibility to detect the bof
2816 * situation on perl -P when the libc5 is being used.
2817 * Workaround? Maybe attach some extra state to PL_rsfp?
2818 */
2819 if (!PL_preprocess)
7e28d3af 2820 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2821#else
eb160463 2822 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2823#endif
7e28d3af 2824 if (bof) {
3280af22 2825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2826 s = swallow_bom((U8*)s);
e929a76b 2827 }
378cc40b 2828 }
3280af22 2829 if (PL_doextract) {
a0d0e21e
LW
2830 /* Incest with pod. */
2831 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2832 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2833 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2834 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2835 PL_last_lop = PL_last_uni = Nullch;
3280af22 2836 PL_doextract = FALSE;
a0d0e21e 2837 }
4e553d73 2838 }
463ee0b2 2839 incline(s);
3280af22
NIS
2840 } while (PL_doextract);
2841 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2842 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2843 SV * const sv = newSV(0);
a687059c 2844
93a17b20 2845 sv_upgrade(sv, SVt_PVMG);
3280af22 2846 sv_setsv(sv,PL_linestr);
0ac0412a 2847 (void)SvIOK_on(sv);
45977657 2848 SvIV_set(sv, 0);
36c7798d 2849 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2850 }
3280af22 2851 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2852 PL_last_lop = PL_last_uni = Nullch;
57843af0 2853 if (CopLINE(PL_curcop) == 1) {
3280af22 2854 while (s < PL_bufend && isSPACE(*s))
79072805 2855 s++;
a0d0e21e 2856 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2857 s++;
44a8e56a 2858 d = Nullch;
3280af22 2859 if (!PL_in_eval) {
44a8e56a
PP
2860 if (*s == '#' && *(s+1) == '!')
2861 d = s + 2;
2862#ifdef ALTERNATE_SHEBANG
2863 else {
bfed75c6 2864 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2865 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2866 d = s + (sizeof(as) - 1);
2867 }
2868#endif /* ALTERNATE_SHEBANG */
2869 }
2870 if (d) {
b8378b72 2871 char *ipath;
774d564b 2872 char *ipathend;
b8378b72 2873
774d564b 2874 while (isSPACE(*d))
b8378b72
CS
2875 d++;
2876 ipath = d;
774d564b
PP
2877 while (*d && !isSPACE(*d))
2878 d++;
2879 ipathend = d;
2880
2881#ifdef ARG_ZERO_IS_SCRIPT
2882 if (ipathend > ipath) {
2883 /*
2884 * HP-UX (at least) sets argv[0] to the script name,
2885 * which makes $^X incorrect. And Digital UNIX and Linux,
2886 * at least, set argv[0] to the basename of the Perl
2887 * interpreter. So, having found "#!", we'll set it right.
2888 */
f776e3cd 2889 SV * const x
5c1737d1 2890 = GvSV(gv_fetchpvs("\030", GV_ADD, SVt_PV)); /* $^X */
774d564b 2891 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2892 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2893 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2894 SvSETMAGIC(x);
2895 }
556c1dec
JH
2896 else {
2897 STRLEN blen;
2898 STRLEN llen;
cfd0369c 2899 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2900 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2901 if (llen < blen) {
2902 bstart += blen - llen;
2903 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2904 sv_setpvn(x, ipath, ipathend - ipath);
2905 SvSETMAGIC(x);
2906 }
2907 }
2908 }
774d564b 2909 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2910 }
774d564b 2911#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2912
2913 /*
2914 * Look for options.
2915 */
748a9306 2916 d = instr(s,"perl -");
84e30d1a 2917 if (!d) {
748a9306 2918 d = instr(s,"perl");
84e30d1a
GS
2919#if defined(DOSISH)
2920 /* avoid getting into infinite loops when shebang
2921 * line contains "Perl" rather than "perl" */
2922 if (!d) {
2923 for (d = ipathend-4; d >= ipath; --d) {
2924 if ((*d == 'p' || *d == 'P')
2925 && !ibcmp(d, "perl", 4))
2926 {
2927 break;
2928 }
2929 }
2930 if (d < ipath)
2931 d = Nullch;
2932 }
2933#endif
2934 }
44a8e56a
PP
2935#ifdef ALTERNATE_SHEBANG
2936 /*
2937 * If the ALTERNATE_SHEBANG on this system starts with a
2938 * character that can be part of a Perl expression, then if
2939 * we see it but not "perl", we're probably looking at the
2940 * start of Perl code, not a request to hand off to some
2941 * other interpreter. Similarly, if "perl" is there, but
2942 * not in the first 'word' of the line, we assume the line
2943 * contains the start of the Perl program.
44a8e56a
PP
2944 */
2945 if (d && *s != '#') {
f54cb97a 2946 const char *c = ipath;
44a8e56a
PP
2947 while (*c && !strchr("; \t\r\n\f\v#", *c))
2948 c++;
2949 if (c < d)
2950 d = Nullch; /* "perl" not in first word; ignore */
2951 else
2952 *s = '#'; /* Don't try to parse shebang line */
2953 }
774d564b 2954#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2955#ifndef MACOS_TRADITIONAL
748a9306 2956 if (!d &&
44a8e56a 2957 *s == '#' &&
774d564b 2958 ipathend > ipath &&
3280af22 2959 !PL_minus_c &&
748a9306 2960 !instr(s,"indir") &&
3280af22 2961 instr(PL_origargv[0],"perl"))
748a9306 2962 {
27da23d5 2963 dVAR;
9f68db38 2964 char **newargv;
9f68db38 2965
774d564b
PP
2966 *ipathend = '\0';
2967 s = ipathend + 1;
3280af22 2968 while (s < PL_bufend && isSPACE(*s))
9f68db38 2969 s++;
3280af22 2970 if (s < PL_bufend) {
a02a5408 2971 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2972 newargv[1] = s;
3280af22 2973 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2974 s++;
2975 *s = '\0';
3280af22 2976 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2977 }
2978 else
3280af22 2979 newargv = PL_origargv;
774d564b 2980 newargv[0] = ipath;
b35112e7 2981 PERL_FPU_PRE_EXEC
b4748376 2982 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2983 PERL_FPU_POST_EXEC
cea2e8a9 2984 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2985 }
bf4acbe4 2986#endif
748a9306 2987 if (d) {
748a9306 2988 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2989 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2990
2991 if (*d++ == '-') {
f54cb97a 2992 const bool switches_done = PL_doswitches;
fb993905
GA
2993 const U32 oldpdb = PL_perldb;
2994 const bool oldn = PL_minus_n;
2995 const bool oldp = PL_minus_p;
2996
8cc95fdb 2997 do {
3ffe3ee4 2998 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2999 const char * const m = d;
8cc95fdb 3000 while (*d && !isSPACE(*d)) d++;
cea2e8a9 3001 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
3002 (int)(d - m), m);
3003 }
3004 d = moreswitches(d);
3005 } while (d);
f0b2cf55
YST
3006 if (PL_doswitches && !switches_done) {
3007 int argc = PL_origargc;
3008 char **argv = PL_origargv;
3009 do {
3010 argc--,argv++;
3011 } while (argc && argv[0][0] == '-' && argv[0][1]);
3012 init_argv_symbols(argc,argv);
3013 }
155aba94
GS
3014 if ((PERLDB_LINE && !oldpdb) ||
3015 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
3016 /* if we have already added "LINE: while (<>) {",
3017 we must not do it again */
748a9306 3018 {
c69006e4 3019 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3020 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3021 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 3022 PL_last_lop = PL_last_uni = Nullch;
3280af22 3023 PL_preambled = FALSE;
84902520 3024 if (PERLDB_LINE)
3280af22 3025 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3026 goto retry;
3027 }
a0d0e21e 3028 }
79072805 3029 }
9f68db38 3030 }
79072805 3031 }
3280af22
NIS
3032 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3033 PL_bufptr = s;
3034 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3035 return yylex();
ae986130 3036 }
378cc40b 3037 goto retry;
4fdae800 3038 case '\r':
6a27c188 3039#ifdef PERL_STRICT_CR
cea2e8a9 3040 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3041 Perl_croak(aTHX_
cc507455 3042 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3043#endif
4fdae800 3044 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3045#ifdef MACOS_TRADITIONAL
3046 case '\312':
3047#endif
378cc40b
LW
3048 s++;
3049 goto retry;
378cc40b 3050 case '#':
e929a76b 3051 case '\n':
3280af22 3052 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3053 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3054 /* handle eval qq[#line 1 "foo"\n ...] */
3055 CopLINE_dec(PL_curcop);
3056 incline(s);
3057 }
3280af22 3058 d = PL_bufend;
a687059c 3059 while (s < d && *s != '\n')
378cc40b 3060 s++;
0f85fab0 3061 if (s < d)
378cc40b 3062 s++;
78c267c1 3063 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3064 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3065 incline(s);
3280af22
NIS
3066 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3067 PL_bufptr = s;
3068 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3069 return yylex();
a687059c 3070 }
378cc40b 3071 }
a687059c 3072 else {
378cc40b 3073 *s = '\0';
3280af22 3074 PL_bufend = s;
a687059c 3075 }
378cc40b
LW
3076 goto retry;
3077 case '-':
79072805 3078 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3079 I32 ftst = 0;
90771dc0 3080 char tmp;
e5edeb50 3081
378cc40b 3082 s++;
3280af22 3083 PL_bufptr = s;
748a9306
LW
3084 tmp = *s++;
3085
bf4acbe4 3086 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3087 s++;
3088
3089 if (strnEQ(s,"=>",2)) {
3280af22 3090 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3091 DEBUG_T( { S_printbuf(aTHX_
3092 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3093 } );
748a9306
LW
3094 OPERATOR('-'); /* unary minus */
3095 }
3280af22 3096 PL_last_uni = PL_oldbufptr;
748a9306 3097 switch (tmp) {
e5edeb50
JH
3098 case 'r': ftst = OP_FTEREAD; break;
3099 case 'w': ftst = OP_FTEWRITE; break;
3100 case 'x': ftst = OP_FTEEXEC; break;
3101 case 'o': ftst = OP_FTEOWNED; break;
3102 case 'R': ftst = OP_FTRREAD; break;
3103 case 'W': ftst = OP_FTRWRITE; break;
3104 case 'X': ftst = OP_FTREXEC; break;
3105 case 'O': ftst = OP_FTROWNED; break;
3106 case 'e': ftst = OP_FTIS; break;
3107 case 'z': ftst = OP_FTZERO; break;
3108 case 's': ftst = OP_FTSIZE; break;
3109 case 'f': ftst = OP_FTFILE; break;
3110 case 'd': ftst = OP_FTDIR; break;
3111 case 'l': ftst = OP_FTLINK; break;
3112 case 'p': ftst = OP_FTPIPE; break;
3113 case 'S': ftst = OP_FTSOCK; break;
3114 case 'u': ftst = OP_FTSUID; break;
3115 case 'g': ftst = OP_FTSGID; break;
3116 case 'k': ftst = OP_FTSVTX; break;
3117 case 'b': ftst = OP_FTBLK; break;
3118 case 'c': ftst = OP_FTCHR; break;
3119 case 't': ftst = OP_FTTTY; break;
3120 case 'T': ftst = OP_FTTEXT; break;
3121 case 'B': ftst = OP_FTBINARY; break;
3122 case 'M': case 'A': case 'C':
5c1737d1 3123 gv_fetchpvs("\024",GV_ADD, SVt_PV);
e5edeb50
JH
3124 switch (tmp) {
3125 case 'M': ftst = OP_FTMTIME; break;
3126 case 'A': ftst = OP_FTATIME; break;
3127 case 'C': ftst = OP_FTCTIME; break;
3128 default: break;
3129 }
3130 break;
378cc40b 3131 default:
378cc40b
LW
3132 break;
3133 }
e5edeb50 3134 if (ftst) {
eb160463 3135 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3136 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3137 "### Saw file test %c\n", (int)tmp);
5f80b19c 3138 } );
e5edeb50
JH
3139 FTST(ftst);
3140 }
3141 else {
3142 /* Assume it was a minus followed by a one-letter named
3143 * subroutine call (or a -bareword), then. */
95c31fe3 3144 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3145 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3146 (int) tmp);
5f80b19c 3147 } );
3cf7b4c4 3148 s = --PL_bufptr;
e5edeb50 3149 }
378cc40b 3150 }
90771dc0
NC
3151 {
3152 const char tmp = *s++;
3153 if (*s == tmp) {
3154 s++;
3155 if (PL_expect == XOPERATOR)
3156 TERM(POSTDEC);
3157 else
3158 OPERATOR(PREDEC);
3159 }
3160 else if (*s == '>') {
3161 s++;
3162 s = skipspace(s);
3163 if (isIDFIRST_lazy_if(s,UTF)) {
3164 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3165 TOKEN(ARROW);
3166 }
3167 else if (*s == '$')
3168 OPERATOR(ARROW);
3169 else
3170 TERM(ARROW);
3171 }
3280af22 3172 if (PL_expect == XOPERATOR)
90771dc0
NC
3173 Aop(OP_SUBTRACT);
3174 else {
3175 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3176 check_uni();
3177 OPERATOR('-'); /* unary minus */
79072805 3178 }
2f3197b3 3179 }
79072805 3180
378cc40b 3181 case '+':
90771dc0
NC
3182 {
3183 const char tmp = *s++;
3184 if (*s == tmp) {
3185 s++;
3186 if (PL_expect == XOPERATOR)
3187 TERM(POSTINC);
3188 else
3189 OPERATOR(PREINC);
3190 }
3280af22 3191 if (PL_expect == XOPERATOR)
90771dc0
NC
3192 Aop(OP_ADD);
3193 else {
3194 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3195 check_uni();
3196 OPERATOR('+');
3197 }
2f3197b3 3198 }
a687059c 3199
378cc40b 3200 case '*':
3280af22
NIS
3201 if (PL_expect != XOPERATOR) {
3202 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3203 PL_expect = XOPERATOR;
3204 force_ident(PL_tokenbuf, '*');
3205 if (!*PL_tokenbuf)
a0d0e21e 3206 PREREF('*');
79072805 3207 TERM('*');
a687059c 3208 }
79072805
LW
3209 s++;
3210 if (*s == '*') {
a687059c 3211 s++;
79072805 3212 PWop(OP_POW);
a687059c 3213 }
79072805
LW
3214 Mop(OP_MULTIPLY);
3215
378cc40b 3216 case '%':
3280af22 3217 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3218 ++s;
3219 Mop(OP_MODULO);
a687059c 3220 }
3280af22
NIS
3221 PL_tokenbuf[0] = '%';
3222 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3223 if (!PL_tokenbuf[1]) {
bbce6d69 3224 PREREF('%');
a687059c 3225 }
3280af22 3226 PL_pending_ident = '%';
bbce6d69 3227 TERM('%');
a687059c 3228
378cc40b 3229 case '^':
79072805 3230 s++;
a0d0e21e 3231 BOop(OP_BIT_XOR);
79072805 3232 case '[':
3280af22 3233 PL_lex_brackets++;
79072805 3234 /* FALL THROUGH */
378cc40b 3235 case '~':
0d863452
RH
3236 if (s[1] == '~'
3237 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3238 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3239 {
3240 s += 2;
3241 Eop(OP_SMARTMATCH);
3242 }
378cc40b 3243 case ',':
90771dc0
NC
3244 {
3245 const char tmp = *s++;
3246 OPERATOR(tmp);
3247 }
a0d0e21e
LW
3248 case ':':
3249 if (s[1] == ':') {
3250 len = 0;
0bfa2a8a 3251 goto just_a_word_zero_gv;
a0d0e21e
LW
3252 }
3253 s++;
09bef843
SB
3254 switch (PL_expect) {
3255 OP *attrs;
3256 case XOPERATOR:
3257 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3258 break;
3259 PL_bufptr = s; /* update in case we back off */
3260 goto grabattrs;
3261 case XATTRBLOCK:
3262 PL_expect = XBLOCK;
3263 goto grabattrs;
3264 case XATTRTERM:
3265 PL_expect = XTERMBLOCK;
3266 grabattrs:
3267 s = skipspace(s);
3268 attrs = Nullop;
7e2040f0 3269 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3270 I32 tmp;
09bef843 3271 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3272 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3273 if (tmp < 0) tmp = -tmp;
3274 switch (tmp) {
3275 case KEY_or:
3276 case KEY_and:
c963b151 3277 case KEY_err:
f9829d6b
GS
3278 case KEY_for:
3279 case KEY_unless:
3280 case KEY_if:
3281 case KEY_while:
3282 case KEY_until:
3283 goto got_attrs;
3284 default:
3285 break;
3286 }
3287 }
09bef843
SB
3288 if (*d == '(') {
3289 d = scan_str(d,TRUE,TRUE);
3290 if (!d) {
09bef843
SB
3291 /* MUST advance bufptr here to avoid bogus
3292 "at end of line" context messages from yyerror().
3293 */
3294 PL_bufptr = s + len;
3295 yyerror("Unterminated attribute parameter in attribute list");
3296 if (attrs)
3297 op_free(attrs);
bbf60fe6 3298 return REPORT(0); /* EOF indicator */
09bef843
SB
3299 }
3300 }
3301 if (PL_lex_stuff) {
3302 SV *sv = newSVpvn(s, len);
3303 sv_catsv(sv, PL_lex_stuff);
3304 attrs = append_elem(OP_LIST, attrs,
3305 newSVOP(OP_CONST, 0, sv));
3306 SvREFCNT_dec(PL_lex_stuff);
3307 PL_lex_stuff = Nullsv;
3308 }
3309 else {
371fce9b
DM
3310 if (len == 6 && strnEQ(s, "unique", len)) {
3311 if (PL_in_my == KEY_our)
3312#ifdef USE_ITHREADS
3313 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3314#else
3315 ; /* skip to avoid loading attributes.pm */
3316#endif
bfed75c6 3317 else
371fce9b
DM
3318 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3319 }
3320
d3cea301
SB
3321 /* NOTE: any CV attrs applied here need to be part of
3322 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3323 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3324 CvLVALUE_on(PL_compcv);
3325 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3326 CvLOCKED_on(PL_compcv);
3327 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3328 CvMETHOD_on(PL_compcv);
06492da6
SF
3329 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3330 CvASSERTION_on(PL_compcv);
78f9721b
SM
3331 /* After we've set the flags, it could be argued that
3332 we don't need to do the attributes.pm-based setting
3333 process, and shouldn't bother appending recognized
d3cea301
SB
3334 flags. To experiment with that, uncomment the
3335 following "else". (Note that's already been
3336 uncommented. That keeps the above-applied built-in
3337 attributes from being intercepted (and possibly
3338 rejected) by a package's attribute routines, but is
3339 justified by the performance win for the common case
3340 of applying only built-in attributes.) */
0256094b 3341 else
78f9721b
SM
3342 attrs = append_elem(OP_LIST, attrs,
3343 newSVOP(OP_CONST, 0,
3344 newSVpvn(s, len)));
09bef843
SB
3345 }
3346 s = skipspace(d);
0120eecf 3347 if (*s == ':' && s[1] != ':')
09bef843 3348 s = skipspace(s+1);
0120eecf
GS
3349 else if (s == d)
3350 break; /* require real whitespace or :'s */
09bef843 3351 }
90771dc0
NC
3352 {
3353 const char tmp
3354 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3355 if (*s != ';' && *s != '}' && *s != tmp
3356 && (tmp != '=' || *s != ')')) {
3357 const char q = ((*s == '\'') ? '"' : '\'');
3358 /* If here for an expression, and parsed no attrs, back
3359 off. */
3360 if (tmp == '=' && !attrs) {
3361 s = PL_bufptr;
3362 break;
3363 }
3364 /* MUST advance bufptr here to avoid bogus "at end of line"
3365 context messages from yyerror().
3366 */
3367 PL_bufptr = s;
3368 yyerror( *s
3369 ? Perl_form(aTHX_ "Invalid separator character "
3370 "%c%c%c in attribute list", q, *s, q)
3371 : "Unterminated attribute list" );
3372 if (attrs)
3373 op_free(attrs);
3374 OPERATOR(':');
09bef843 3375 }
09bef843 3376 }
f9829d6b 3377 got_attrs:
09bef843
SB
3378 if (attrs) {
3379 PL_nextval[PL_nexttoke].opval = attrs;
3380 force_next(THING);
3381 }
3382 TOKEN(COLONATTR);
3383 }
a0d0e21e 3384 OPERATOR(':');
8990e307
LW
3385 case '(':
3386 s++;
3280af22
NIS
3387 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3388 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3389 else
3280af22 3390 PL_expect = XTERM;
4a202259 3391 s = skipspace(s);
a0d0e21e 3392 TOKEN('(');
378cc40b 3393 case ';':
f4dd75d9 3394 CLINE;
90771dc0
NC
3395 {
3396 const char tmp = *s++;
3397 OPERATOR(tmp);
3398 }
378cc40b 3399 case ')':
90771dc0
NC
3400 {
3401 const char tmp = *s++;
3402 s = skipspace(s);
3403 if (*s == '{')
3404 PREBLOCK(tmp);
3405 TERM(tmp);
3406 }
79072805
LW
3407 case ']':
3408 s++;
3280af22 3409 if (PL_lex_brackets <= 0)
d98d5fff 3410 yyerror("Unmatched right square bracket");
463ee0b2 3411 else
3280af22
NIS
3412 --PL_lex_brackets;
3413 if (PL_lex_state == LEX_INTERPNORMAL) {
3414 if (PL_lex_brackets == 0) {
a0d0e21e 3415 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3416 PL_lex_state = LEX_INTERPEND;
79072805
LW
3417 }
3418 }
4633a7c4 3419 TERM(']');
79072805
LW
3420 case '{':
3421 leftbracket:
79072805 3422 s++;
3280af22 3423 if (PL_lex_brackets > 100) {
8edd5f42 3424 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3425 }
3280af22 3426 switch (PL_expect) {
a0d0e21e 3427 case XTERM:
3280af22 3428 if (PL_lex_formbrack) {
a0d0e21e
LW
3429 s--;
3430 PRETERMBLOCK(DO);
3431 }
3280af22
NIS
3432 if (PL_oldoldbufptr == PL_last_lop)
3433 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3434 else
3280af22 3435 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3436 OPERATOR(HASHBRACK);
a0d0e21e 3437 case XOPERATOR:
bf4acbe4 3438 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3439 s++;
44a8e56a 3440 d = s;
3280af22
NIS
3441 PL_tokenbuf[0] = '\0';
3442 if (d < PL_bufend && *d == '-') {
3443 PL_tokenbuf[0] = '-';
44a8e56a 3444 d++;
bf4acbe4 3445 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3446 d++;
3447 }
7e2040f0 3448 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3449 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3450 FALSE, &len);
bf4acbe4 3451 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3452 d++;
3453 if (*d == '}') {
f54cb97a 3454 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3455 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3456 if (minus)
3457 force_next('-');
748a9306
LW
3458 }
3459 }
3460 /* FALL THROUGH */
09bef843 3461 case XATTRBLOCK:
748a9306 3462 case XBLOCK:
3280af22
NIS
3463 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3464 PL_expect = XSTATE;
a0d0e21e 3465 break;
09bef843 3466 case XATTRTERM:
a0d0e21e 3467 case XTERMBLOCK:
3280af22
NIS
3468 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3469 PL_expect = XSTATE;
a0d0e21e
LW
3470 break;
3471 default: {
f54cb97a 3472 const char *t;
3280af22
NIS
3473 if (PL_oldoldbufptr == PL_last_lop)
3474 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3475 else
3280af22 3476 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3477 s = skipspace(s);
8452ff4b
SB
3478 if (*s == '}') {
3479 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3480 PL_expect = XTERM;
3481 /* This hack is to get the ${} in the message. */
3482 PL_bufptr = s+1;
3483 yyerror("syntax error");
3484 break;
3485 }
a0d0e21e 3486 OPERATOR(HASHBRACK);
8452ff4b 3487 }
b8a4b1be
GS
3488 /* This hack serves to disambiguate a pair of curlies
3489 * as being a block or an anon hash. Normally, expectation
3490 * determines that, but in cases where we're not in a
3491 * position to expect anything in particular (like inside
3492 * eval"") we have to resolve the ambiguity. This code
3493 * covers the case where the first term in the curlies is a
3494 * quoted string. Most other cases need to be explicitly
a0288114 3495 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3496 * curly in order to force resolution as an anon hash.
3497 *
3498 * XXX should probably propagate the outer expectation
3499 * into eval"" to rely less on this hack, but that could
3500 * potentially break current behavior of eval"".
3501 * GSAR 97-07-21
3502 */
3503 t = s;
3504 if (*s == '\'' || *s == '"' || *s == '`') {
3505 /* common case: get past first string, handling escapes */
3280af22 3506 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3507 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3508 t++;
3509 t++;
a0d0e21e 3510 }
b8a4b1be 3511 else if (*s == 'q') {
3280af22 3512 if (++t < PL_bufend
b8a4b1be 3513 && (!isALNUM(*t)
3280af22 3514 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3515 && !isALNUM(*t))))
3516 {
abc667d1 3517 /* skip q//-like construct */
f54cb97a 3518 const char *tmps;
b8a4b1be
GS
3519 char open, close, term;
3520 I32 brackets = 1;
3521
3280af22 3522 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3523 t++;
abc667d1
DM
3524 /* check for q => */