This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Missing file from last change
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
8903cb82 30
acfe0abc 31static void restore_rsfp(pTHX_ void *f);
6e3aabd6 32#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
33static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 35#endif
51371543 36
9059aa12
LW
37#define XFAKEBRACK 128
38#define XENUMMASK 127
39
39e02b42
JH
40#ifdef USE_UTF8_SCRIPTS
41# define UTF (!IN_BYTES)
2b9d42f0 42#else
746b446a 43# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 44#endif
a0ed51b3 45
61f0cdd9 46/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
47 * 1999-02-27 mjd-perl-patch@plover.com */
48#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
49
bf4acbe4
GS
50/* On MacOS, respect nonbreaking spaces */
51#ifdef MACOS_TRADITIONAL
52#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
53#else
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
55#endif
56
ffb4593c
NT
57/* LEX_* are values for PL_lex_state, the state of the lexer.
58 * They are arranged oddly so that the guard on the switch statement
79072805
LW
59 * can get by with a single comparison (if the compiler is smart enough).
60 */
61
fb73857a
PP
62/* #define LEX_NOTPARSING 11 is done in perl.h. */
63
b6007c36
DM
64#define LEX_NORMAL 10 /* normal code (ie not within "...") */
65#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
66#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
67#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
68#define LEX_INTERPSTART 6 /* expecting the start of a $var */
69
70 /* at end of code, eg "$x" followed by: */
71#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
72#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
73
74#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
75 string or after \E, $foo, etc */
76#define LEX_INTERPCONST 2 /* NOT USED */
77#define LEX_FORMLINE 1 /* expecting a format line */
78#define LEX_KNOWNEXT 0 /* next token known; just return it */
79
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 139# define REPORT(retval) tokereport((I32)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
376fcdbf
AL
170#define UNI2(f,x) { \
171 yylval.ival = f; \
172 PL_expect = x; \
173 PL_bufptr = s; \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
176 if (*s == '(') \
177 return REPORT( (int)FUNC1 ); \
178 s = skipspace(s); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
180 }
6f33ba73
RGS
181#define UNI(f) UNI2(f,XTERM)
182#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 183
376fcdbf
AL
184#define UNIBRACK(f) { \
185 yylval.ival = f; \
186 PL_bufptr = s; \
187 PL_last_uni = PL_oldbufptr; \
188 if (*s == '(') \
189 return REPORT( (int)FUNC1 ); \
190 s = skipspace(s); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
192 }
79072805 193
9f68db38 194/* grandfather return to old style */
3280af22 195#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 196
8fa7f367
JH
197#ifdef DEBUGGING
198
bbf60fe6
DM
199/* how to interpret the yylval associated with the token */
200enum token_type {
201 TOKENTYPE_NONE,
202 TOKENTYPE_IVAL,
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
204 TOKENTYPE_PVAL,
205 TOKENTYPE_OPVAL,
206 TOKENTYPE_GVVAL
207};
208
27da23d5
JH
209static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
9041c2e3 211{
bbf60fe6
DM
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 222 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
223 { DO, TOKENTYPE_NONE, "DO" },
224 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
225 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
226 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
227 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
228 { ELSE, TOKENTYPE_NONE, "ELSE" },
229 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
230 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
231 { FOR, TOKENTYPE_IVAL, "FOR" },
232 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
233 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
234 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
235 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
236 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
237 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 238 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
239 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
240 { IF, TOKENTYPE_IVAL, "IF" },
241 { LABEL, TOKENTYPE_PVAL, "LABEL" },
242 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
243 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
244 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
245 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
246 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
247 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
248 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
249 { MY, TOKENTYPE_IVAL, "MY" },
250 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
251 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
252 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
253 { OROP, TOKENTYPE_IVAL, "OROP" },
254 { OROR, TOKENTYPE_NONE, "OROR" },
255 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
256 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
257 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
258 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
259 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
260 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
261 { PREINC, TOKENTYPE_NONE, "PREINC" },
262 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
263 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
264 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
265 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
266 { SUB, TOKENTYPE_NONE, "SUB" },
267 { THING, TOKENTYPE_OPVAL, "THING" },
268 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
269 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
270 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
271 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
272 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
273 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 274 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
275 { WHILE, TOKENTYPE_IVAL, "WHILE" },
276 { WORD, TOKENTYPE_OPVAL, "WORD" },
277 { 0, TOKENTYPE_NONE, 0 }
278};
279
280/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 281
bbf60fe6 282STATIC int
f5bd084c 283S_tokereport(pTHX_ I32 rv)
bbf60fe6
DM
284{
285 if (DEBUG_T_TEST) {
bfed75c6 286 const char *name = Nullch;
bbf60fe6 287 enum token_type type = TOKENTYPE_NONE;
f54cb97a 288 const struct debug_tokens *p;
9d4ba2ae 289 SV* const report = newSVpvn("<== ", 4);
bbf60fe6 290
f54cb97a 291 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
292 if (p->token == (int)rv) {
293 name = p->name;
294 type = p->type;
295 break;
296 }
297 }
298 if (name)
54667de8 299 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
300 else if ((char)rv > ' ' && (char)rv < '~')
301 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
302 else if (!rv)
54667de8 303 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
304 else
305 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
306 switch (type) {
307 case TOKENTYPE_NONE:
308 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
309 break;
310 case TOKENTYPE_IVAL:
e4584336 311 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
312 break;
313 case TOKENTYPE_OPNUM:
314 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
315 PL_op_name[yylval.ival]);
316 break;
317 case TOKENTYPE_PVAL:
318 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
319 break;
320 case TOKENTYPE_OPVAL:
b6007c36 321 if (yylval.opval) {
401441c0 322 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 323 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
324 if (yylval.opval->op_type == OP_CONST) {
325 Perl_sv_catpvf(aTHX_ report, " %s",
326 SvPEEK(cSVOPx_sv(yylval.opval)));
327 }
328
329 }
401441c0
RGS
330 else
331 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
332 break;
333 }
b6007c36 334 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
335 };
336 return (int)rv;
998054bd
SC
337}
338
b6007c36
DM
339
340/* print the buffer with suitable escapes */
341
342STATIC void
343S_printbuf(pTHX_ const char* fmt, const char* s)
344{
c4420975 345 SV* const tmp = newSVpvn("", 0);
b6007c36
DM
346 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
347 SvREFCNT_dec(tmp);
348}
349
8fa7f367
JH
350#endif
351
ffb4593c
NT
352/*
353 * S_ao
354 *
c963b151
BD
355 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
356 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
357 */
358
76e3520e 359STATIC int
cea2e8a9 360S_ao(pTHX_ int toketype)
a0d0e21e 361{
3280af22
NIS
362 if (*PL_bufptr == '=') {
363 PL_bufptr++;
a0d0e21e
LW
364 if (toketype == ANDAND)
365 yylval.ival = OP_ANDASSIGN;
366 else if (toketype == OROR)
367 yylval.ival = OP_ORASSIGN;
c963b151
BD
368 else if (toketype == DORDOR)
369 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
370 toketype = ASSIGNOP;
371 }
372 return toketype;
373}
374
ffb4593c
NT
375/*
376 * S_no_op
377 * When Perl expects an operator and finds something else, no_op
378 * prints the warning. It always prints "<something> found where
379 * operator expected. It prints "Missing semicolon on previous line?"
380 * if the surprise occurs at the start of the line. "do you need to
381 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
382 * where the compiler doesn't know if foo is a method call or a function.
383 * It prints "Missing operator before end of line" if there's nothing
384 * after the missing operator, or "... before <...>" if there is something
385 * after the missing operator.
386 */
387
76e3520e 388STATIC void
bfed75c6 389S_no_op(pTHX_ const char *what, char *s)
463ee0b2 390{
9d4ba2ae
AL
391 char * const oldbp = PL_bufptr;
392 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 393
1189a94a
GS
394 if (!s)
395 s = oldbp;
07c798fb 396 else
1189a94a 397 PL_bufptr = s;
cea2e8a9 398 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
399 if (ckWARN_d(WARN_SYNTAX)) {
400 if (is_first)
401 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
402 "\t(Missing semicolon on previous line?)\n");
403 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 404 const char *t;
56da5a46
RGS
405 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
406 if (t < PL_bufptr && isSPACE(*t))
407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
408 "\t(Do you need to predeclare %.*s?)\n",
551405c4 409 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
410 }
411 else {
412 assert(s >= oldbp);
413 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 414 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 415 }
07c798fb 416 }
3280af22 417 PL_bufptr = oldbp;
8990e307
LW
418}
419
ffb4593c
NT
420/*
421 * S_missingterm
422 * Complain about missing quote/regexp/heredoc terminator.
423 * If it's called with (char *)NULL then it cauterizes the line buffer.
424 * If we're in a delimited string and the delimiter is a control
425 * character, it's reformatted into a two-char sequence like ^C.
426 * This is fatal.
427 */
428
76e3520e 429STATIC void
cea2e8a9 430S_missingterm(pTHX_ char *s)
8990e307
LW
431{
432 char tmpbuf[3];
433 char q;
434 if (s) {
9d4ba2ae 435 char * const nl = strrchr(s,'\n');
d2719217 436 if (nl)
8990e307
LW
437 *nl = '\0';
438 }
9d116dd7
JH
439 else if (
440#ifdef EBCDIC
441 iscntrl(PL_multi_close)
442#else
443 PL_multi_close < 32 || PL_multi_close == 127
444#endif
445 ) {
8990e307 446 *tmpbuf = '^';
585ec06d 447 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
448 tmpbuf[2] = '\0';
449 s = tmpbuf;
450 }
451 else {
eb160463 452 *tmpbuf = (char)PL_multi_close;
8990e307
LW
453 tmpbuf[1] = '\0';
454 s = tmpbuf;
455 }
456 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 457 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 458}
79072805 459
0d863452
RH
460#define FEATURE_IS_ENABLED(name, namelen) \
461 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
462 && feature_is_enabled(name, namelen))
463/*
464 * S_feature_is_enabled
465 * Check whether the named feature is enabled.
466 */
467STATIC bool
468S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
469{
470 HV * const hinthv = GvHV(PL_hintgv);
471 return (hinthv && hv_exists(hinthv, name, namelen));
472}
473
ffb4593c
NT
474/*
475 * Perl_deprecate
ffb4593c
NT
476 */
477
79072805 478void
bfed75c6 479Perl_deprecate(pTHX_ const char *s)
a0d0e21e 480{
599cee73 481 if (ckWARN(WARN_DEPRECATED))
9014280d 482 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
483}
484
12bcd1a6 485void
bfed75c6 486Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
487{
488 /* This function should NOT be called for any new deprecated warnings */
489 /* Use Perl_deprecate instead */
490 /* */
491 /* It is here to maintain backward compatibility with the pre-5.8 */
492 /* warnings category hierarchy. The "deprecated" category used to */
493 /* live under the "syntax" category. It is now a top-level category */
494 /* in its own right. */
495
496 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 497 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
498 "Use of %s is deprecated", s);
499}
500
ffb4593c
NT
501/*
502 * depcom
9cbb5ea2 503 * Deprecate a comma-less variable list.
ffb4593c
NT
504 */
505
76e3520e 506STATIC void
cea2e8a9 507S_depcom(pTHX)
a0d0e21e 508{
12bcd1a6 509 deprecate_old("comma-less variable list");
a0d0e21e
LW
510}
511
ffb4593c 512/*
9cbb5ea2
GS
513 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
514 * utf16-to-utf8-reversed.
ffb4593c
NT
515 */
516
c39cd008
GS
517#ifdef PERL_CR_FILTER
518static void
519strip_return(SV *sv)
520{
95a20fc0 521 register const char *s = SvPVX_const(sv);
9d4ba2ae 522 register const char * const e = s + SvCUR(sv);
c39cd008
GS
523 /* outer loop optimized to do nothing if there are no CR-LFs */
524 while (s < e) {
525 if (*s++ == '\r' && *s == '\n') {
526 /* hit a CR-LF, need to copy the rest */
527 register char *d = s - 1;
528 *d++ = *s++;
529 while (s < e) {
530 if (*s == '\r' && s[1] == '\n')
531 s++;
532 *d++ = *s++;
533 }
534 SvCUR(sv) -= s - d;
535 return;
536 }
537 }
538}
a868473f 539
76e3520e 540STATIC I32
c39cd008 541S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 542{
f54cb97a 543 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
544 if (count > 0 && !maxlen)
545 strip_return(sv);
546 return count;
a868473f
NIS
547}
548#endif
549
ffb4593c
NT
550/*
551 * Perl_lex_start
9cbb5ea2
GS
552 * Initialize variables. Uses the Perl save_stack to save its state (for
553 * recursive calls to the parser).
ffb4593c
NT
554 */
555
a0d0e21e 556void
864dbfa3 557Perl_lex_start(pTHX_ SV *line)
79072805 558{
cfd0369c 559 const char *s;
8990e307
LW
560 STRLEN len;
561
3280af22
NIS
562 SAVEI32(PL_lex_dojoin);
563 SAVEI32(PL_lex_brackets);
3280af22
NIS
564 SAVEI32(PL_lex_casemods);
565 SAVEI32(PL_lex_starts);
566 SAVEI32(PL_lex_state);
7766f137 567 SAVEVPTR(PL_lex_inpat);
3280af22 568 SAVEI32(PL_lex_inwhat);
18b09519
GS
569 if (PL_lex_state == LEX_KNOWNEXT) {
570 I32 toke = PL_nexttoke;
571 while (--toke >= 0) {
572 SAVEI32(PL_nexttype[toke]);
573 SAVEVPTR(PL_nextval[toke]);
574 }
575 SAVEI32(PL_nexttoke);
18b09519 576 }
57843af0 577 SAVECOPLINE(PL_curcop);
3280af22
NIS
578 SAVEPPTR(PL_bufptr);
579 SAVEPPTR(PL_bufend);
580 SAVEPPTR(PL_oldbufptr);
581 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
582 SAVEPPTR(PL_last_lop);
583 SAVEPPTR(PL_last_uni);
3280af22
NIS
584 SAVEPPTR(PL_linestart);
585 SAVESPTR(PL_linestr);
8edd5f42
RGS
586 SAVEGENERICPV(PL_lex_brackstack);
587 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 588 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
589 SAVESPTR(PL_lex_stuff);
590 SAVEI32(PL_lex_defer);
09bef843 591 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 592 SAVESPTR(PL_lex_repl);
bebdddfc
GS
593 SAVEINT(PL_expect);
594 SAVEINT(PL_lex_expect);
3280af22
NIS
595
596 PL_lex_state = LEX_NORMAL;
597 PL_lex_defer = 0;
598 PL_expect = XSTATE;
599 PL_lex_brackets = 0;
a02a5408
JC
600 Newx(PL_lex_brackstack, 120, char);
601 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
602 PL_lex_casemods = 0;
603 *PL_lex_casestack = '\0';
604 PL_lex_dojoin = 0;
605 PL_lex_starts = 0;
606 PL_lex_stuff = Nullsv;
607 PL_lex_repl = Nullsv;
608 PL_lex_inpat = 0;
76be56bc 609 PL_nexttoke = 0;
3280af22 610 PL_lex_inwhat = 0;
09bef843 611 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
612 PL_linestr = line;
613 if (SvREADONLY(PL_linestr))
614 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 615 s = SvPV_const(PL_linestr, len);
6f27f9a7 616 if (!len || s[len-1] != ';') {
3280af22
NIS
617 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
618 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
619 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 620 }
3280af22
NIS
621 SvTEMP_off(PL_linestr);
622 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
623 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 624 PL_last_lop = PL_last_uni = Nullch;
3280af22 625 PL_rsfp = 0;
79072805 626}
a687059c 627
ffb4593c
NT
628/*
629 * Perl_lex_end
9cbb5ea2
GS
630 * Finalizer for lexing operations. Must be called when the parser is
631 * done with the lexer.
ffb4593c
NT
632 */
633
463ee0b2 634void
864dbfa3 635Perl_lex_end(pTHX)
463ee0b2 636{
3280af22 637 PL_doextract = FALSE;
463ee0b2
LW
638}
639
ffb4593c
NT
640/*
641 * S_incline
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
57843af0 644 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 645 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
648 */
649
76e3520e 650STATIC void
cea2e8a9 651S_incline(pTHX_ char *s)
463ee0b2
LW
652{
653 char *t;
654 char *n;
73659bf1 655 char *e;
463ee0b2 656 char ch;
463ee0b2 657
57843af0 658 CopLINE_inc(PL_curcop);
463ee0b2
LW
659 if (*s++ != '#')
660 return;
bf4acbe4 661 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
662 if (strnEQ(s, "line", 4))
663 s += 4;
664 else
665 return;
084592ab 666 if (SPACE_OR_TAB(*s))
73659bf1 667 s++;
4e553d73 668 else
73659bf1 669 return;
bf4acbe4 670 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
671 if (!isDIGIT(*s))
672 return;
673 n = s;
674 while (isDIGIT(*s))
675 s++;
bf4acbe4 676 while (SPACE_OR_TAB(*s))
463ee0b2 677 s++;
73659bf1 678 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 679 s++;
73659bf1
GS
680 e = t + 1;
681 }
463ee0b2 682 else {
463ee0b2 683 for (t = s; !isSPACE(*t); t++) ;
73659bf1 684 e = t;
463ee0b2 685 }
bf4acbe4 686 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
687 e++;
688 if (*e != '\n' && *e != '\0')
689 return; /* false alarm */
690
463ee0b2
LW
691 ch = *t;
692 *t = '\0';
f4dd75d9 693 if (t - s > 0) {
8a5ee598 694#ifndef USE_ITHREADS
c4420975 695 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
696 STRLEN tmplen = cf ? strlen(cf) : 0;
697 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
698 /* must copy *{"::_<(eval N)[oldfilename:L]"}
699 * to *{"::_<newfilename"} */
700 char smallbuf[256], smallbuf2[256];
701 char *tmpbuf, *tmpbuf2;
8a5ee598 702 GV **gvp, *gv2;
e66cf94c
RGS
703 STRLEN tmplen2 = strlen(s);
704 if (tmplen + 3 < sizeof smallbuf)
705 tmpbuf = smallbuf;
706 else
707 Newx(tmpbuf, tmplen + 3, char);
708 if (tmplen2 + 3 < sizeof smallbuf2)
709 tmpbuf2 = smallbuf2;
710 else
711 Newx(tmpbuf2, tmplen2 + 3, char);
712 tmpbuf[0] = tmpbuf2[0] = '_';
713 tmpbuf[1] = tmpbuf2[1] = '<';
714 memcpy(tmpbuf + 2, cf, ++tmplen);
715 memcpy(tmpbuf2 + 2, s, ++tmplen2);
716 ++tmplen; ++tmplen2;
8a5ee598
RGS
717 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
718 if (gvp) {
719 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
720 if (!isGV(gv2))
721 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
722 /* adjust ${"::_<newfilename"} to store the new file name */
723 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
724 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
725 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
726 }
e66cf94c
RGS
727 if (tmpbuf != smallbuf) Safefree(tmpbuf);
728 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
729 }
8a5ee598 730#endif
05ec9bb3 731 CopFILE_free(PL_curcop);
57843af0 732 CopFILE_set(PL_curcop, s);
f4dd75d9 733 }
463ee0b2 734 *t = ch;
57843af0 735 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
736}
737
ffb4593c
NT
738/*
739 * S_skipspace
740 * Called to gobble the appropriate amount and type of whitespace.
741 * Skips comments as well.
742 */
743
76e3520e 744STATIC char *
cea2e8a9 745S_skipspace(pTHX_ register char *s)
a687059c 746{
3280af22 747 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 748 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
749 s++;
750 return s;
751 }
752 for (;;) {
fd049845 753 STRLEN prevlen;
09bef843 754 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 755 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
756 while (s < PL_bufend && isSPACE(*s)) {
757 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
758 incline(s);
759 }
ffb4593c
NT
760
761 /* comment */
3280af22
NIS
762 if (s < PL_bufend && *s == '#') {
763 while (s < PL_bufend && *s != '\n')
463ee0b2 764 s++;
60e6418e 765 if (s < PL_bufend) {
463ee0b2 766 s++;
60e6418e
GS
767 if (PL_in_eval && !PL_rsfp) {
768 incline(s);
769 continue;
770 }
771 }
463ee0b2 772 }
ffb4593c
NT
773
774 /* only continue to recharge the buffer if we're at the end
775 * of the buffer, we're not reading from a source filter, and
776 * we're in normal lexing mode
777 */
09bef843
SB
778 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
779 PL_lex_state == LEX_FORMLINE)
463ee0b2 780 return s;
ffb4593c
NT
781
782 /* try to recharge the buffer */
9cbb5ea2
GS
783 if ((s = filter_gets(PL_linestr, PL_rsfp,
784 (prevlen = SvCUR(PL_linestr)))) == Nullch)
785 {
786 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
787 if (PL_minus_p) {
788 sv_setpv(PL_linestr,
789 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 790 PL_minus_n = PL_minus_p = 0;
a0d0e21e 791 }
01a19ab0
NC
792 else if (PL_minus_n) {
793 sv_setpvn(PL_linestr, ";}", 2);
794 PL_minus_n = 0;
795 }
a0d0e21e 796 else
4147a61b 797 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
798
799 /* reset variables for next time we lex */
9cbb5ea2
GS
800 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
801 = SvPVX(PL_linestr);
3280af22 802 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 803 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
804
805 /* Close the filehandle. Could be from -P preprocessor,
806 * STDIN, or a regular file. If we were reading code from
807 * STDIN (because the commandline held no -e or filename)
808 * then we don't close it, we reset it so the code can
809 * read from STDIN too.
810 */
811
3280af22
NIS
812 if (PL_preprocess && !PL_in_eval)
813 (void)PerlProc_pclose(PL_rsfp);
814 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
815 PerlIO_clearerr(PL_rsfp);
8990e307 816 else
3280af22
NIS
817 (void)PerlIO_close(PL_rsfp);
818 PL_rsfp = Nullfp;
463ee0b2
LW
819 return s;
820 }
ffb4593c
NT
821
822 /* not at end of file, so we only read another line */
09bef843
SB
823 /* make corresponding updates to old pointers, for yyerror() */
824 oldprevlen = PL_oldbufptr - PL_bufend;
825 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
826 if (PL_last_uni)
827 oldunilen = PL_last_uni - PL_bufend;
828 if (PL_last_lop)
829 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
830 PL_linestart = PL_bufptr = s + prevlen;
831 PL_bufend = s + SvCUR(PL_linestr);
832 s = PL_bufptr;
09bef843
SB
833 PL_oldbufptr = s + oldprevlen;
834 PL_oldoldbufptr = s + oldoldprevlen;
835 if (PL_last_uni)
836 PL_last_uni = s + oldunilen;
837 if (PL_last_lop)
838 PL_last_lop = s + oldloplen;
a0d0e21e 839 incline(s);
ffb4593c
NT
840
841 /* debugger active and we're not compiling the debugger code,
842 * so store the line into the debugger's array of lines
843 */
3280af22 844 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 845 SV * const sv = NEWSV(85,0);
8990e307
LW
846
847 sv_upgrade(sv, SVt_PVMG);
3280af22 848 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 849 (void)SvIOK_on(sv);
45977657 850 SvIV_set(sv, 0);
57843af0 851 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 852 }
463ee0b2 853 }
a687059c 854}
378cc40b 855
ffb4593c
NT
856/*
857 * S_check_uni
858 * Check the unary operators to ensure there's no ambiguity in how they're
859 * used. An ambiguous piece of code would be:
860 * rand + 5
861 * This doesn't mean rand() + 5. Because rand() is a unary operator,
862 * the +5 is its argument.
863 */
864
76e3520e 865STATIC void
cea2e8a9 866S_check_uni(pTHX)
ba106d47 867{
2f3197b3 868 char *s;
a0d0e21e 869 char *t;
2f3197b3 870
3280af22 871 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 872 return;
3280af22
NIS
873 while (isSPACE(*PL_last_uni))
874 PL_last_uni++;
7e2040f0 875 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 876 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 877 return;
0453d815 878 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 879 const char ch = *s;
0453d815 880 *s = '\0';
9014280d 881 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 882 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
883 PL_last_uni);
884 *s = ch;
885 }
2f3197b3
LW
886}
887
ffb4593c
NT
888/*
889 * LOP : macro to build a list operator. Its behaviour has been replaced
890 * with a subroutine, S_lop() for which LOP is just another name.
891 */
892
a0d0e21e
LW
893#define LOP(f,x) return lop(f,x,s)
894
ffb4593c
NT
895/*
896 * S_lop
897 * Build a list operator (or something that might be one). The rules:
898 * - if we have a next token, then it's a list operator [why?]
899 * - if the next thing is an opening paren, then it's a function
900 * - else it's a list operator
901 */
902
76e3520e 903STATIC I32
a0be28da 904S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 905{
79072805 906 yylval.ival = f;
35c8bce7 907 CLINE;
3280af22
NIS
908 PL_expect = x;
909 PL_bufptr = s;
910 PL_last_lop = PL_oldbufptr;
eb160463 911 PL_last_lop_op = (OPCODE)f;
3280af22 912 if (PL_nexttoke)
bbf60fe6 913 return REPORT(LSTOP);
79072805 914 if (*s == '(')
bbf60fe6 915 return REPORT(FUNC);
79072805
LW
916 s = skipspace(s);
917 if (*s == '(')
bbf60fe6 918 return REPORT(FUNC);
79072805 919 else
bbf60fe6 920 return REPORT(LSTOP);
79072805
LW
921}
922
ffb4593c
NT
923/*
924 * S_force_next
9cbb5ea2 925 * When the lexer realizes it knows the next token (for instance,
ffb4593c 926 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
927 * to know what token to return the next time the lexer is called. Caller
928 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
929 * handles the token correctly.
ffb4593c
NT
930 */
931
4e553d73 932STATIC void
cea2e8a9 933S_force_next(pTHX_ I32 type)
79072805 934{
3280af22
NIS
935 PL_nexttype[PL_nexttoke] = type;
936 PL_nexttoke++;
937 if (PL_lex_state != LEX_KNOWNEXT) {
938 PL_lex_defer = PL_lex_state;
939 PL_lex_expect = PL_expect;
940 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
941 }
942}
943
d0a148a6
NC
944STATIC SV *
945S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
946{
9d4ba2ae 947 SV * const sv = newSVpvn(start,len);
bfed75c6 948 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
949 SvUTF8_on(sv);
950 return sv;
951}
952
ffb4593c
NT
953/*
954 * S_force_word
955 * When the lexer knows the next thing is a word (for instance, it has
956 * just seen -> and it knows that the next char is a word char, then
957 * it calls S_force_word to stick the next word into the PL_next lookahead.
958 *
959 * Arguments:
b1b65b59 960 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
961 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
962 * int check_keyword : if true, Perl checks to make sure the word isn't
963 * a keyword (do this if the word is a label, e.g. goto FOO)
964 * int allow_pack : if true, : characters will also be allowed (require,
965 * use, etc. do this)
9cbb5ea2 966 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
967 */
968
76e3520e 969STATIC char *
cea2e8a9 970S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 971{
463ee0b2
LW
972 register char *s;
973 STRLEN len;
4e553d73 974
463ee0b2
LW
975 start = skipspace(start);
976 s = start;
7e2040f0 977 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 978 (allow_pack && *s == ':') ||
15f0808c 979 (allow_initial_tick && *s == '\'') )
a0d0e21e 980 {
3280af22
NIS
981 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
982 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
983 return start;
984 if (token == METHOD) {
985 s = skipspace(s);
986 if (*s == '(')
3280af22 987 PL_expect = XTERM;
463ee0b2 988 else {
3280af22 989 PL_expect = XOPERATOR;
463ee0b2 990 }
79072805 991 }
d0a148a6
NC
992 PL_nextval[PL_nexttoke].opval
993 = (OP*)newSVOP(OP_CONST,0,
994 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 995 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
996 force_next(token);
997 }
998 return s;
999}
1000
ffb4593c
NT
1001/*
1002 * S_force_ident
9cbb5ea2 1003 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1004 * text only contains the "foo" portion. The first argument is a pointer
1005 * to the "foo", and the second argument is the type symbol to prefix.
1006 * Forces the next token to be a "WORD".
9cbb5ea2 1007 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1008 */
1009
76e3520e 1010STATIC void
bfed75c6 1011S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
1012{
1013 if (s && *s) {
bfed75c6 1014 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 1015 PL_nextval[PL_nexttoke].opval = o;
79072805 1016 force_next(WORD);
748a9306 1017 if (kind) {
11343788 1018 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1019 /* XXX see note in pp_entereval() for why we forgo typo
1020 warnings if the symbol must be introduced in an eval.
1021 GSAR 96-10-12 */
3280af22 1022 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
1023 kind == '$' ? SVt_PV :
1024 kind == '@' ? SVt_PVAV :
1025 kind == '%' ? SVt_PVHV :
1026 SVt_PVGV
1027 );
748a9306 1028 }
79072805
LW
1029 }
1030}
1031
1571675a
GS
1032NV
1033Perl_str_to_version(pTHX_ SV *sv)
1034{
1035 NV retval = 0.0;
1036 NV nshift = 1.0;
1037 STRLEN len;
cfd0369c 1038 const char *start = SvPV_const(sv,len);
9d4ba2ae 1039 const char * const end = start + len;
504618e9 1040 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1041 while (start < end) {
ba210ebe 1042 STRLEN skip;
1571675a
GS
1043 UV n;
1044 if (utf)
9041c2e3 1045 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1046 else {
1047 n = *(U8*)start;
1048 skip = 1;
1049 }
1050 retval += ((NV)n)/nshift;
1051 start += skip;
1052 nshift *= 1000;
1053 }
1054 return retval;
1055}
1056
4e553d73 1057/*
ffb4593c
NT
1058 * S_force_version
1059 * Forces the next token to be a version number.
e759cc13
RGS
1060 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1061 * and if "guessing" is TRUE, then no new token is created (and the caller
1062 * must use an alternative parsing method).
ffb4593c
NT
1063 */
1064
76e3520e 1065STATIC char *
e759cc13 1066S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
1067{
1068 OP *version = Nullop;
44dcb63b 1069 char *d;
89bfa8cd
PP
1070
1071 s = skipspace(s);
1072
44dcb63b 1073 d = s;
dd629d5b 1074 if (*d == 'v')
44dcb63b 1075 d++;
44dcb63b 1076 if (isDIGIT(*d)) {
e759cc13
RGS
1077 while (isDIGIT(*d) || *d == '_' || *d == '.')
1078 d++;
9f3d182e 1079 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1080 SV *ver;
b73d6f50 1081 s = scan_num(s, &yylval);
89bfa8cd 1082 version = yylval.opval;
dd629d5b
GS
1083 ver = cSVOPx(version)->op_sv;
1084 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1085 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1086 SvNV_set(ver, str_to_version(ver));
1571675a 1087 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1088 }
89bfa8cd 1089 }
e759cc13
RGS
1090 else if (guessing)
1091 return s;
89bfa8cd
PP
1092 }
1093
1094 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1095 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1096 force_next(WORD);
89bfa8cd 1097
e759cc13 1098 return s;
89bfa8cd
PP
1099}
1100
ffb4593c
NT
1101/*
1102 * S_tokeq
1103 * Tokenize a quoted string passed in as an SV. It finds the next
1104 * chunk, up to end of string or a backslash. It may make a new
1105 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1106 * turns \\ into \.
1107 */
1108
76e3520e 1109STATIC SV *
cea2e8a9 1110S_tokeq(pTHX_ SV *sv)
79072805
LW
1111{
1112 register char *s;
1113 register char *send;
1114 register char *d;
b3ac6de7
IZ
1115 STRLEN len = 0;
1116 SV *pv = sv;
79072805
LW
1117
1118 if (!SvLEN(sv))
b3ac6de7 1119 goto finish;
79072805 1120
a0d0e21e 1121 s = SvPV_force(sv, len);
21a311ee 1122 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1123 goto finish;
463ee0b2 1124 send = s + len;
79072805
LW
1125 while (s < send && *s != '\\')
1126 s++;
1127 if (s == send)
b3ac6de7 1128 goto finish;
79072805 1129 d = s;
be4731d2 1130 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1131 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1132 if (SvUTF8(sv))
1133 SvUTF8_on(pv);
1134 }
79072805
LW
1135 while (s < send) {
1136 if (*s == '\\') {
a0d0e21e 1137 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1138 s++; /* all that, just for this */
1139 }
1140 *d++ = *s++;
1141 }
1142 *d = '\0';
95a20fc0 1143 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1144 finish:
3280af22 1145 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1146 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1147 return sv;
1148}
1149
ffb4593c
NT
1150/*
1151 * Now come three functions related to double-quote context,
1152 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1153 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1154 * interact with PL_lex_state, and create fake ( ... ) argument lists
1155 * to handle functions and concatenation.
1156 * They assume that whoever calls them will be setting up a fake
1157 * join call, because each subthing puts a ',' after it. This lets
1158 * "lower \luPpEr"
1159 * become
1160 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1161 *
1162 * (I'm not sure whether the spurious commas at the end of lcfirst's
1163 * arguments and join's arguments are created or not).
1164 */
1165
1166/*
1167 * S_sublex_start
1168 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1169 *
1170 * Pattern matching will set PL_lex_op to the pattern-matching op to
1171 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1172 *
1173 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1174 *
1175 * Everything else becomes a FUNC.
1176 *
1177 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1178 * had an OP_CONST or OP_READLINE). This just sets us up for a
1179 * call to S_sublex_push().
1180 */
1181
76e3520e 1182STATIC I32
cea2e8a9 1183S_sublex_start(pTHX)
79072805 1184{
0d46e09a 1185 register const I32 op_type = yylval.ival;
79072805
LW
1186
1187 if (op_type == OP_NULL) {
3280af22
NIS
1188 yylval.opval = PL_lex_op;
1189 PL_lex_op = Nullop;
79072805
LW
1190 return THING;
1191 }
1192 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1193 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1194
1195 if (SvTYPE(sv) == SVt_PVIV) {
1196 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1197 STRLEN len;
cfd0369c 1198 const char *p = SvPV_const(sv, len);
f54cb97a 1199 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1200 if (SvUTF8(sv))
1201 SvUTF8_on(nsv);
b3ac6de7
IZ
1202 SvREFCNT_dec(sv);
1203 sv = nsv;
4e553d73 1204 }
b3ac6de7 1205 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1206 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1207 /* Allow <FH> // "foo" */
1208 if (op_type == OP_READLINE)
1209 PL_expect = XTERMORDORDOR;
79072805
LW
1210 return THING;
1211 }
1212
3280af22
NIS
1213 PL_sublex_info.super_state = PL_lex_state;
1214 PL_sublex_info.sub_inwhat = op_type;
1215 PL_sublex_info.sub_op = PL_lex_op;
1216 PL_lex_state = LEX_INTERPPUSH;
55497cff 1217
3280af22
NIS
1218 PL_expect = XTERM;
1219 if (PL_lex_op) {
1220 yylval.opval = PL_lex_op;
1221 PL_lex_op = Nullop;
55497cff
PP
1222 return PMFUNC;
1223 }
1224 else
1225 return FUNC;
1226}
1227
ffb4593c
NT
1228/*
1229 * S_sublex_push
1230 * Create a new scope to save the lexing state. The scope will be
1231 * ended in S_sublex_done. Returns a '(', starting the function arguments
1232 * to the uc, lc, etc. found before.
1233 * Sets PL_lex_state to LEX_INTERPCONCAT.
1234 */
1235
76e3520e 1236STATIC I32
cea2e8a9 1237S_sublex_push(pTHX)
55497cff 1238{
27da23d5 1239 dVAR;
f46d017c 1240 ENTER;
55497cff 1241
3280af22
NIS
1242 PL_lex_state = PL_sublex_info.super_state;
1243 SAVEI32(PL_lex_dojoin);
1244 SAVEI32(PL_lex_brackets);
3280af22
NIS
1245 SAVEI32(PL_lex_casemods);
1246 SAVEI32(PL_lex_starts);
1247 SAVEI32(PL_lex_state);
7766f137 1248 SAVEVPTR(PL_lex_inpat);
3280af22 1249 SAVEI32(PL_lex_inwhat);
57843af0 1250 SAVECOPLINE(PL_curcop);
3280af22 1251 SAVEPPTR(PL_bufptr);
8452ff4b 1252 SAVEPPTR(PL_bufend);
3280af22
NIS
1253 SAVEPPTR(PL_oldbufptr);
1254 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1255 SAVEPPTR(PL_last_lop);
1256 SAVEPPTR(PL_last_uni);
3280af22
NIS
1257 SAVEPPTR(PL_linestart);
1258 SAVESPTR(PL_linestr);
8edd5f42
RGS
1259 SAVEGENERICPV(PL_lex_brackstack);
1260 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1261
1262 PL_linestr = PL_lex_stuff;
1263 PL_lex_stuff = Nullsv;
1264
9cbb5ea2
GS
1265 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1266 = SvPVX(PL_linestr);
3280af22 1267 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1268 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1269 SAVEFREESV(PL_linestr);
1270
1271 PL_lex_dojoin = FALSE;
1272 PL_lex_brackets = 0;
a02a5408
JC
1273 Newx(PL_lex_brackstack, 120, char);
1274 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1275 PL_lex_casemods = 0;
1276 *PL_lex_casestack = '\0';
1277 PL_lex_starts = 0;
1278 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1279 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1280
1281 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1282 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1283 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1284 else
3280af22 1285 PL_lex_inpat = Nullop;
79072805 1286
55497cff 1287 return '(';
79072805
LW
1288}
1289
ffb4593c
NT
1290/*
1291 * S_sublex_done
1292 * Restores lexer state after a S_sublex_push.
1293 */
1294
76e3520e 1295STATIC I32
cea2e8a9 1296S_sublex_done(pTHX)
79072805 1297{
27da23d5 1298 dVAR;
3280af22 1299 if (!PL_lex_starts++) {
9d4ba2ae 1300 SV * const sv = newSVpvn("",0);
9aa983d2
JH
1301 if (SvUTF8(PL_linestr))
1302 SvUTF8_on(sv);
3280af22 1303 PL_expect = XOPERATOR;
9aa983d2 1304 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1305 return THING;
1306 }
1307
3280af22
NIS
1308 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1309 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1310 return yylex();
79072805
LW
1311 }
1312
ffb4593c 1313 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1314 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1315 PL_linestr = PL_lex_repl;
1316 PL_lex_inpat = 0;
1317 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1318 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1319 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1320 SAVEFREESV(PL_linestr);
1321 PL_lex_dojoin = FALSE;
1322 PL_lex_brackets = 0;
3280af22
NIS
1323 PL_lex_casemods = 0;
1324 *PL_lex_casestack = '\0';
1325 PL_lex_starts = 0;
25da4f38 1326 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1327 PL_lex_state = LEX_INTERPNORMAL;
1328 PL_lex_starts++;
e9fa98b2
HS
1329 /* we don't clear PL_lex_repl here, so that we can check later
1330 whether this is an evalled subst; that means we rely on the
1331 logic to ensure sublex_done() is called again only via the
1332 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1333 }
e9fa98b2 1334 else {
3280af22 1335 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1336 PL_lex_repl = Nullsv;
1337 }
79072805 1338 return ',';
ffed7fef
LW
1339 }
1340 else {
f46d017c 1341 LEAVE;
3280af22
NIS
1342 PL_bufend = SvPVX(PL_linestr);
1343 PL_bufend += SvCUR(PL_linestr);
1344 PL_expect = XOPERATOR;
09bef843 1345 PL_sublex_info.sub_inwhat = 0;
79072805 1346 return ')';
ffed7fef
LW
1347 }
1348}
1349
02aa26ce
NT
1350/*
1351 scan_const
1352
1353 Extracts a pattern, double-quoted string, or transliteration. This
1354 is terrifying code.
1355
3280af22
NIS
1356 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1357 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1358 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1359
9b599b2a
GS
1360 Returns a pointer to the character scanned up to. Iff this is
1361 advanced from the start pointer supplied (ie if anything was
1362 successfully parsed), will leave an OP for the substring scanned
1363 in yylval. Caller must intuit reason for not parsing further
1364 by looking at the next characters herself.
1365
02aa26ce
NT
1366 In patterns:
1367 backslashes:
1368 double-quoted style: \r and \n
1369 regexp special ones: \D \s
1370 constants: \x3
1371 backrefs: \1 (deprecated in substitution replacements)
1372 case and quoting: \U \Q \E
1373 stops on @ and $, but not for $ as tail anchor
1374
1375 In transliterations:
1376 characters are VERY literal, except for - not at the start or end
1377 of the string, which indicates a range. scan_const expands the
1378 range to the full set of intermediate characters.
1379
1380 In double-quoted strings:
1381 backslashes:
1382 double-quoted style: \r and \n
1383 constants: \x3
1384 backrefs: \1 (deprecated)
1385 case and quoting: \U \Q \E
1386 stops on @ and $
1387
1388 scan_const does *not* construct ops to handle interpolated strings.
1389 It stops processing as soon as it finds an embedded $ or @ variable
1390 and leaves it to the caller to work out what's going on.
1391
da6eedaa 1392 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1393
1394 $ in pattern could be $foo or could be tail anchor. Assumption:
1395 it's a tail anchor if $ is the last thing in the string, or if it's
1396 followed by one of ")| \n\t"
1397
1398 \1 (backreferences) are turned into $1
1399
1400 The structure of the code is
1401 while (there's a character to process) {
1402 handle transliteration ranges
1403 skip regexp comments
1404 skip # initiated comments in //x patterns
1405 check for embedded @foo
1406 check for embedded scalars
1407 if (backslash) {
1408 leave intact backslashes from leave (below)
1409 deprecate \1 in strings and sub replacements
1410 handle string-changing backslashes \l \U \Q \E, etc.
1411 switch (what was escaped) {
1412 handle - in a transliteration (becomes a literal -)
1413 handle \132 octal characters
1414 handle 0x15 hex characters
1415 handle \cV (control V)
1416 handle printf backslashes (\f, \r, \n, etc)
1417 } (end switch)
1418 } (end if backslash)
1419 } (end while character to read)
4e553d73 1420
02aa26ce
NT
1421*/
1422
76e3520e 1423STATIC char *
cea2e8a9 1424S_scan_const(pTHX_ char *start)
79072805 1425{
3280af22 1426 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1427 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1428 register char *s = start; /* start of the constant */
1429 register char *d = SvPVX(sv); /* destination for copies */
1430 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1431 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1432 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1433 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1434 UV uv;
4c3a8340
ST
1435#ifdef EBCDIC
1436 UV literal_endpoint = 0;
1437#endif
012bcf8d 1438
dff6d3cd 1439 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1440 PL_lex_inpat
b6d5fef8 1441 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1442 : "";
79072805 1443
2b9d42f0
NIS
1444 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1445 /* If we are doing a trans and we know we want UTF8 set expectation */
1446 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1447 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1448 }
1449
1450
79072805 1451 while (s < send || dorange) {
02aa26ce 1452 /* get transliterations out of the way (they're most literal) */
3280af22 1453 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1454 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1455 if (dorange) {
1ba5c669
JH
1456 I32 i; /* current expanded character */
1457 I32 min; /* first character in range */
1458 I32 max; /* last character in range */
02aa26ce 1459
2b9d42f0 1460 if (has_utf8) {
9d4ba2ae 1461 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1462 char *e = d++;
1463 while (e-- > c)
1464 *(e + 1) = *e;
25716404 1465 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1466 /* mark the range as done, and continue */
1467 dorange = FALSE;
1468 didrange = TRUE;
1469 continue;
1470 }
2b9d42f0 1471
95a20fc0 1472 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1473 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1474 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1475 d -= 2; /* eat the first char and the - */
1476
8ada0baa
JH
1477 min = (U8)*d; /* first char in range */
1478 max = (U8)d[1]; /* last char in range */
1479
c2e66d9e 1480 if (min > max) {
01ec43d0 1481 Perl_croak(aTHX_
d1573ac7 1482 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1483 (char)min, (char)max);
c2e66d9e
GS
1484 }
1485
c7f1f016 1486#ifdef EBCDIC
4c3a8340
ST
1487 if (literal_endpoint == 2 &&
1488 ((isLOWER(min) && isLOWER(max)) ||
1489 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1490 if (isLOWER(min)) {
1491 for (i = min; i <= max; i++)
1492 if (isLOWER(i))
db42d148 1493 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1494 } else {
1495 for (i = min; i <= max; i++)
1496 if (isUPPER(i))
db42d148 1497 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1498 }
1499 }
1500 else
1501#endif
1502 for (i = min; i <= max; i++)
eb160463 1503 *d++ = (char)i;
02aa26ce
NT
1504
1505 /* mark the range as done, and continue */
79072805 1506 dorange = FALSE;
01ec43d0 1507 didrange = TRUE;
4c3a8340
ST
1508#ifdef EBCDIC
1509 literal_endpoint = 0;
1510#endif
79072805 1511 continue;
4e553d73 1512 }
02aa26ce
NT
1513
1514 /* range begins (ignore - as first or last char) */
79072805 1515 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1516 if (didrange) {
1fafa243 1517 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1518 }
2b9d42f0 1519 if (has_utf8) {
25716404 1520 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1521 s++;
1522 continue;
1523 }
79072805
LW
1524 dorange = TRUE;
1525 s++;
01ec43d0
GS
1526 }
1527 else {
1528 didrange = FALSE;
4c3a8340
ST
1529#ifdef EBCDIC
1530 literal_endpoint = 0;
1531#endif
01ec43d0 1532 }
79072805 1533 }
02aa26ce
NT
1534
1535 /* if we get here, we're not doing a transliteration */
1536
0f5d15d6
IZ
1537 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1538 except for the last char, which will be done separately. */
3280af22 1539 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1540 if (s[2] == '#') {
e994fd66 1541 while (s+1 < send && *s != ')')
db42d148 1542 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1543 }
1544 else if (s[2] == '{' /* This should match regcomp.c */
1545 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1546 {
cc6b7395 1547 I32 count = 1;
0f5d15d6 1548 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1549 char c;
1550
d9f97599
GS
1551 while (count && (c = *regparse)) {
1552 if (c == '\\' && regparse[1])
1553 regparse++;
4e553d73 1554 else if (c == '{')
cc6b7395 1555 count++;
4e553d73 1556 else if (c == '}')
cc6b7395 1557 count--;
d9f97599 1558 regparse++;
cc6b7395 1559 }
e994fd66 1560 if (*regparse != ')')
5bdf89e7 1561 regparse--; /* Leave one char for continuation. */
0f5d15d6 1562 while (s < regparse)
db42d148 1563 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1564 }
748a9306 1565 }
02aa26ce
NT
1566
1567 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1568 else if (*s == '#' && PL_lex_inpat &&
1569 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1570 while (s+1 < send && *s != '\n')
db42d148 1571 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1572 }
02aa26ce 1573
5d1d4326 1574 /* check for embedded arrays
da6eedaa 1575 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1576 */
7e2040f0 1577 else if (*s == '@' && s[1]
5d1d4326 1578 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1579 break;
02aa26ce
NT
1580
1581 /* check for embedded scalars. only stop if we're sure it's a
1582 variable.
1583 */
79072805 1584 else if (*s == '$') {
3280af22 1585 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1586 break;
6002328a 1587 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1588 break; /* in regexp, $ might be tail anchor */
1589 }
02aa26ce 1590
2b9d42f0
NIS
1591 /* End of else if chain - OP_TRANS rejoin rest */
1592
02aa26ce 1593 /* backslashes */
79072805
LW
1594 if (*s == '\\' && s+1 < send) {
1595 s++;
02aa26ce
NT
1596
1597 /* some backslashes we leave behind */
c9f97d15 1598 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1599 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1600 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1601 continue;
1602 }
02aa26ce
NT
1603
1604 /* deprecate \1 in strings and substitution replacements */
3280af22 1605 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1606 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1607 {
599cee73 1608 if (ckWARN(WARN_SYNTAX))
9014280d 1609 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1610 *--s = '$';
1611 break;
1612 }
02aa26ce
NT
1613
1614 /* string-change backslash escapes */
3280af22 1615 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1616 --s;
1617 break;
1618 }
02aa26ce
NT
1619
1620 /* if we get here, it's either a quoted -, or a digit */
79072805 1621 switch (*s) {
02aa26ce
NT
1622
1623 /* quoted - in transliterations */
79072805 1624 case '-':
3280af22 1625 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1626 *d++ = *s++;
1627 continue;
1628 }
1629 /* FALL THROUGH */
1630 default:
11b8faa4 1631 {
041457d9
DM
1632 if (isALNUM(*s) &&
1633 *s != '_' &&
1634 ckWARN(WARN_MISC))
9014280d 1635 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1636 "Unrecognized escape \\%c passed through",
1637 *s);
1638 /* default action is to copy the quoted character */
f9a63242 1639 goto default_action;
11b8faa4 1640 }
02aa26ce
NT
1641
1642 /* \132 indicates an octal constant */
79072805
LW
1643 case '0': case '1': case '2': case '3':
1644 case '4': case '5': case '6': case '7':
ba210ebe 1645 {
53305cf1
NC
1646 I32 flags = 0;
1647 STRLEN len = 3;
1648 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1649 s += len;
1650 }
012bcf8d 1651 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1652
1653 /* \x24 indicates a hex constant */
79072805 1654 case 'x':
a0ed51b3
LW
1655 ++s;
1656 if (*s == '{') {
9d4ba2ae 1657 char* const e = strchr(s, '}');
a4c04bdc
NC
1658 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1659 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1660 STRLEN len;
355860ce 1661
53305cf1 1662 ++s;
adaeee49 1663 if (!e) {
a0ed51b3 1664 yyerror("Missing right brace on \\x{}");
355860ce 1665 continue;
ba210ebe 1666 }
53305cf1
NC
1667 len = e - s;
1668 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1669 s = e + 1;
a0ed51b3
LW
1670 }
1671 else {
ba210ebe 1672 {
53305cf1 1673 STRLEN len = 2;
a4c04bdc 1674 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1675 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1676 s += len;
1677 }
012bcf8d
GS
1678 }
1679
1680 NUM_ESCAPE_INSERT:
1681 /* Insert oct or hex escaped character.
301d3d20 1682 * There will always enough room in sv since such
db42d148 1683 * escapes will be longer than any UTF-8 sequence
301d3d20 1684 * they can end up as. */
ba7cea30 1685
c7f1f016
NIS
1686 /* We need to map to chars to ASCII before doing the tests
1687 to cover EBCDIC
1688 */
c4d5f83a 1689 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1690 if (!has_utf8 && uv > 255) {
301d3d20
JH
1691 /* Might need to recode whatever we have
1692 * accumulated so far if it contains any
1693 * hibit chars.
1694 *
1695 * (Can't we keep track of that and avoid
1696 * this rescan? --jhi)
012bcf8d 1697 */
c7f1f016 1698 int hicount = 0;
63cd0674
NIS
1699 U8 *c;
1700 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1701 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1702 hicount++;
db42d148 1703 }
012bcf8d 1704 }
63cd0674 1705 if (hicount) {
9d4ba2ae 1706 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1707 U8 *src, *dst;
1708 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1709 src = (U8 *)d - 1;
1710 dst = src+hicount;
1711 d += hicount;
cfd0369c 1712 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1713 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1714 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1715 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1716 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1717 }
1718 else {
63cd0674 1719 *dst-- = *src;
012bcf8d 1720 }
c7f1f016 1721 src--;
012bcf8d
GS
1722 }
1723 }
1724 }
1725
9aa983d2 1726 if (has_utf8 || uv > 255) {
9041c2e3 1727 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1728 has_utf8 = TRUE;
f9a63242
JH
1729 if (PL_lex_inwhat == OP_TRANS &&
1730 PL_sublex_info.sub_op) {
1731 PL_sublex_info.sub_op->op_private |=
1732 (PL_lex_repl ? OPpTRANS_FROM_UTF
1733 : OPpTRANS_TO_UTF);
f9a63242 1734 }
012bcf8d 1735 }
a0ed51b3 1736 else {
012bcf8d 1737 *d++ = (char)uv;
a0ed51b3 1738 }
012bcf8d
GS
1739 }
1740 else {
c4d5f83a 1741 *d++ = (char) uv;
a0ed51b3 1742 }
79072805 1743 continue;
02aa26ce 1744
b239daa5 1745 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1746 case 'N':
55eda711 1747 ++s;
423cee85
JH
1748 if (*s == '{') {
1749 char* e = strchr(s, '}');
155aba94 1750 SV *res;
423cee85 1751 STRLEN len;
cfd0369c 1752 const char *str;
4e553d73 1753
423cee85 1754 if (!e) {
5777a3f7 1755 yyerror("Missing right brace on \\N{}");
423cee85
JH
1756 e = s - 1;
1757 goto cont_scan;
1758 }
dbc0d4f2
JH
1759 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1760 /* \N{U+...} */
1761 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1762 PERL_SCAN_DISALLOW_PREFIX;
1763 s += 3;
1764 len = e - s;
1765 uv = grok_hex(s, &len, &flags, NULL);
1766 s = e + 1;
1767 goto NUM_ESCAPE_INSERT;
1768 }
55eda711
JH
1769 res = newSVpvn(s + 1, e - s - 1);
1770 res = new_constant( Nullch, 0, "charnames",
1771 res, Nullsv, "\\N{...}" );
f9a63242
JH
1772 if (has_utf8)
1773 sv_utf8_upgrade(res);
cfd0369c 1774 str = SvPV_const(res,len);
1c47067b
JH
1775#ifdef EBCDIC_NEVER_MIND
1776 /* charnames uses pack U and that has been
1777 * recently changed to do the below uni->native
1778 * mapping, so this would be redundant (and wrong,
1779 * the code point would be doubly converted).
1780 * But leave this in just in case the pack U change
1781 * gets revoked, but the semantics is still
1782 * desireable for charnames. --jhi */
cddc7ef4 1783 {
cfd0369c 1784 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1785
1786 if (uv < 0x100) {
89ebb4a3 1787 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1788
1789 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1790 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1791 str = SvPV_const(res, len);
cddc7ef4
JH
1792 }
1793 }
1794#endif
89491803 1795 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1796 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1797 SvCUR_set(sv, d - ostart);
1798 SvPOK_on(sv);
e4f3eed8 1799 *d = '\0';
f08d6ad9 1800 sv_utf8_upgrade(sv);
d2f449dd 1801 /* this just broke our allocation above... */
eb160463 1802 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1803 d = SvPVX(sv) + SvCUR(sv);
89491803 1804 has_utf8 = TRUE;
f08d6ad9 1805 }
eb160463 1806 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1807 const char * const odest = SvPVX_const(sv);
423cee85 1808
8973db79 1809 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1810 d = SvPVX(sv) + (d - odest);
1811 }
1812 Copy(str, d, len, char);
1813 d += len;
1814 SvREFCNT_dec(res);
1815 cont_scan:
1816 s = e + 1;
1817 }
1818 else
5777a3f7 1819 yyerror("Missing braces on \\N{}");
423cee85
JH
1820 continue;
1821
02aa26ce 1822 /* \c is a control character */
79072805
LW
1823 case 'c':
1824 s++;
961ce445 1825 if (s < send) {
ba210ebe 1826 U8 c = *s++;
c7f1f016
NIS
1827#ifdef EBCDIC
1828 if (isLOWER(c))
1829 c = toUPPER(c);
1830#endif
db42d148 1831 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1832 }
961ce445
RGS
1833 else {
1834 yyerror("Missing control char name in \\c");
1835 }
79072805 1836 continue;
02aa26ce
NT
1837
1838 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1839 case 'b':
db42d148 1840 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1841 break;
1842 case 'n':
db42d148 1843 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1844 break;
1845 case 'r':
db42d148 1846 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1847 break;
1848 case 'f':
db42d148 1849 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1850 break;
1851 case 't':
db42d148 1852 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1853 break;
34a3fe2a 1854 case 'e':
db42d148 1855 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1856 break;
1857 case 'a':
db42d148 1858 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1859 break;
02aa26ce
NT
1860 } /* end switch */
1861
79072805
LW
1862 s++;
1863 continue;
02aa26ce 1864 } /* end if (backslash) */
4c3a8340
ST
1865#ifdef EBCDIC
1866 else
1867 literal_endpoint++;
1868#endif
02aa26ce 1869
f9a63242 1870 default_action:
2b9d42f0
NIS
1871 /* If we started with encoded form, or already know we want it
1872 and then encode the next character */
1873 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1874 STRLEN len = 1;
9d4ba2ae
AL
1875 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1876 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1877 s += len;
1878 if (need > len) {
1879 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1880 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1881 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1882 }
1883 d = (char*)uvchr_to_utf8((U8*)d, uv);
1884 has_utf8 = TRUE;
1885 }
1886 else {
1887 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1888 }
02aa26ce
NT
1889 } /* while loop to process each character */
1890
1891 /* terminate the string and set up the sv */
79072805 1892 *d = '\0';
95a20fc0 1893 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1894 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1895 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1896
79072805 1897 SvPOK_on(sv);
9f4817db 1898 if (PL_encoding && !has_utf8) {
d0063567
DK
1899 sv_recode_to_utf8(sv, PL_encoding);
1900 if (SvUTF8(sv))
1901 has_utf8 = TRUE;
9f4817db 1902 }
2b9d42f0 1903 if (has_utf8) {
7e2040f0 1904 SvUTF8_on(sv);
2b9d42f0 1905 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1906 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1907 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1908 }
1909 }
79072805 1910
02aa26ce 1911 /* shrink the sv if we allocated more than we used */
79072805 1912 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1913 SvPV_shrink_to_cur(sv);
79072805 1914 }
02aa26ce 1915
9b599b2a 1916 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1917 if (s > PL_bufptr) {
1918 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1919 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1920 sv, Nullsv,
4e553d73 1921 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1922 ? "tr"
3280af22 1923 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1924 ? "s"
1925 : "qq")));
79072805 1926 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1927 } else
8990e307 1928 SvREFCNT_dec(sv);
79072805
LW
1929 return s;
1930}
1931
ffb4593c
NT
1932/* S_intuit_more
1933 * Returns TRUE if there's more to the expression (e.g., a subscript),
1934 * FALSE otherwise.
ffb4593c
NT
1935 *
1936 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1937 *
1938 * ->[ and ->{ return TRUE
1939 * { and [ outside a pattern are always subscripts, so return TRUE
1940 * if we're outside a pattern and it's not { or [, then return FALSE
1941 * if we're in a pattern and the first char is a {
1942 * {4,5} (any digits around the comma) returns FALSE
1943 * if we're in a pattern and the first char is a [
1944 * [] returns FALSE
1945 * [SOMETHING] has a funky algorithm to decide whether it's a
1946 * character class or not. It has to deal with things like
1947 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1948 * anything else returns TRUE
1949 */
1950
9cbb5ea2
GS
1951/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1952
76e3520e 1953STATIC int
cea2e8a9 1954S_intuit_more(pTHX_ register char *s)
79072805 1955{
3280af22 1956 if (PL_lex_brackets)
79072805
LW
1957 return TRUE;
1958 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1959 return TRUE;
1960 if (*s != '{' && *s != '[')
1961 return FALSE;
3280af22 1962 if (!PL_lex_inpat)
79072805
LW
1963 return TRUE;
1964
1965 /* In a pattern, so maybe we have {n,m}. */
1966 if (*s == '{') {
1967 s++;
1968 if (!isDIGIT(*s))
1969 return TRUE;
1970 while (isDIGIT(*s))
1971 s++;
1972 if (*s == ',')
1973 s++;
1974 while (isDIGIT(*s))
1975 s++;
1976 if (*s == '}')
1977 return FALSE;
1978 return TRUE;
1979
1980 }
1981
1982 /* On the other hand, maybe we have a character class */
1983
1984 s++;
1985 if (*s == ']' || *s == '^')
1986 return FALSE;
1987 else {
ffb4593c 1988 /* this is terrifying, and it works */
79072805
LW
1989 int weight = 2; /* let's weigh the evidence */
1990 char seen[256];
f27ffc4a 1991 unsigned char un_char = 255, last_un_char;
9d4ba2ae 1992 const char * const send = strchr(s,']');
3280af22 1993 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1994
1995 if (!send) /* has to be an expression */
1996 return TRUE;
1997
1998 Zero(seen,256,char);
1999 if (*s == '$')
2000 weight -= 3;
2001 else if (isDIGIT(*s)) {
2002 if (s[1] != ']') {
2003 if (isDIGIT(s[1]) && s[2] == ']')
2004 weight -= 10;
2005 }
2006 else
2007 weight -= 100;
2008 }
2009 for (; s < send; s++) {
2010 last_un_char = un_char;
2011 un_char = (unsigned char)*s;
2012 switch (*s) {
2013 case '@':
2014 case '&':
2015 case '$':
2016 weight -= seen[un_char] * 10;
7e2040f0 2017 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 2018 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 2019 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
2020 weight -= 100;
2021 else
2022 weight -= 10;
2023 }
2024 else if (*s == '$' && s[1] &&
93a17b20
LW
2025 strchr("[#!%*<>()-=",s[1])) {
2026 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2027 weight -= 10;
2028 else
2029 weight -= 1;
2030 }
2031 break;
2032 case '\\':
2033 un_char = 254;
2034 if (s[1]) {
93a17b20 2035 if (strchr("wds]",s[1]))
79072805
LW
2036 weight += 100;
2037 else if (seen['\''] || seen['"'])
2038 weight += 1;
93a17b20 2039 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2040 weight += 40;
2041 else if (isDIGIT(s[1])) {
2042 weight += 40;
2043 while (s[1] && isDIGIT(s[1]))
2044 s++;
2045 }
2046 }
2047 else
2048 weight += 100;
2049 break;
2050 case '-':
2051 if (s[1] == '\\')
2052 weight += 50;
93a17b20 2053 if (strchr("aA01! ",last_un_char))
79072805 2054 weight += 30;
93a17b20 2055 if (strchr("zZ79~",s[1]))
79072805 2056 weight += 30;
f27ffc4a
GS
2057 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2058 weight -= 5; /* cope with negative subscript */
79072805
LW
2059 break;
2060 default:
3792a11b
NC
2061 if (!isALNUM(last_un_char)
2062 && !(last_un_char == '$' || last_un_char == '@'
2063 || last_un_char == '&')
2064 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2065 char *d = tmpbuf;
2066 while (isALPHA(*s))
2067 *d++ = *s++;
2068 *d = '\0';
2069 if (keyword(tmpbuf, d - tmpbuf))
2070 weight -= 150;
2071 }
2072 if (un_char == last_un_char + 1)
2073 weight += 5;
2074 weight -= seen[un_char];
2075 break;
2076 }
2077 seen[un_char]++;
2078 }
2079 if (weight >= 0) /* probably a character class */
2080 return FALSE;
2081 }
2082
2083 return TRUE;
2084}
ffed7fef 2085
ffb4593c
NT
2086/*
2087 * S_intuit_method
2088 *
2089 * Does all the checking to disambiguate
2090 * foo bar
2091 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2092 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2093 *
2094 * First argument is the stuff after the first token, e.g. "bar".
2095 *
2096 * Not a method if bar is a filehandle.
2097 * Not a method if foo is a subroutine prototyped to take a filehandle.
2098 * Not a method if it's really "Foo $bar"
2099 * Method if it's "foo $bar"
2100 * Not a method if it's really "print foo $bar"
2101 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2102 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2103 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2104 * =>
2105 */
2106
76e3520e 2107STATIC int
cea2e8a9 2108S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2109{
2110 char *s = start + (*start == '$');
3280af22 2111 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2112 STRLEN len;
2113 GV* indirgv;
2114
2115 if (gv) {
b6c543e3 2116 CV *cv;
a0d0e21e
LW
2117 if (GvIO(gv))
2118 return 0;
b6c543e3 2119 if ((cv = GvCVu(gv))) {
95a20fc0 2120 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2121 if (proto) {
2122 if (*proto == ';')
2123 proto++;
2124 if (*proto == '*')
2125 return 0;
2126 }
2127 } else
a0d0e21e
LW
2128 gv = 0;
2129 }
8903cb82 2130 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2131 /* start is the beginning of the possible filehandle/object,
2132 * and s is the end of it
2133 * tmpbuf is a copy of it
2134 */
2135
a0d0e21e 2136 if (*start == '$') {
3280af22 2137 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2138 return 0;
2139 s = skipspace(s);
3280af22
NIS
2140 PL_bufptr = start;
2141 PL_expect = XREF;
a0d0e21e
LW
2142 return *s == '(' ? FUNCMETH : METHOD;
2143 }
2144 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2145 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2146 len -= 2;
2147 tmpbuf[len] = '\0';
2148 goto bare_package;
2149 }
2150 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2151 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2152 return 0;
2153 /* filehandle or package name makes it a method */
89bfa8cd 2154 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2155 s = skipspace(s);
3280af22 2156 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2157 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2158 bare_package:
3280af22 2159 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2160 newSVpvn(tmpbuf,len));
3280af22
NIS
2161 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2162 PL_expect = XTERM;
a0d0e21e 2163 force_next(WORD);
3280af22 2164 PL_bufptr = s;
a0d0e21e
LW
2165 return *s == '(' ? FUNCMETH : METHOD;
2166 }
2167 }
2168 return 0;
2169}
2170
ffb4593c
NT
2171/*
2172 * S_incl_perldb
2173 * Return a string of Perl code to load the debugger. If PERL5DB
2174 * is set, it will return the contents of that, otherwise a
2175 * compile-time require of perl5db.pl.
2176 */
2177
bfed75c6 2178STATIC const char*
cea2e8a9 2179S_incl_perldb(pTHX)
a0d0e21e 2180{
3280af22 2181 if (PL_perldb) {
9d4ba2ae 2182 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2183
2184 if (pdb)
2185 return pdb;
93189314 2186 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2187 return "BEGIN { require 'perl5db.pl' }";
2188 }
2189 return "";
2190}
2191
2192
16d20bd9 2193/* Encoded script support. filter_add() effectively inserts a
4e553d73 2194 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2195 * Note that the filter function only applies to the current source file
2196 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2197 *
2198 * The datasv parameter (which may be NULL) can be used to pass
2199 * private data to this instance of the filter. The filter function
2200 * can recover the SV using the FILTER_DATA macro and use it to
2201 * store private buffers and state information.
2202 *
2203 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2204 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2205 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2206 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2207 * private use must be set using malloc'd pointers.
2208 */
16d20bd9
AD
2209
2210SV *
864dbfa3 2211Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2212{
f4c556ac
GS
2213 if (!funcp)
2214 return Nullsv;
2215
3280af22
NIS
2216 if (!PL_rsfp_filters)
2217 PL_rsfp_filters = newAV();
16d20bd9 2218 if (!datasv)
8c52afec 2219 datasv = NEWSV(255,0);
862a34c6 2220 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2221 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2222 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2223 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2224 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2225 av_unshift(PL_rsfp_filters, 1);
2226 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2227 return(datasv);
2228}
4e553d73 2229
16d20bd9
AD
2230
2231/* Delete most recently added instance of this filter function. */
a0d0e21e 2232void
864dbfa3 2233Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2234{
e0c19803 2235 SV *datasv;
24801a4b 2236
33073adb 2237#ifdef DEBUGGING
8141890a 2238 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2239#endif
3280af22 2240 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2241 return;
2242 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2243 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2244 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2245 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2246 IoANY(datasv) = (void *)NULL;
3280af22 2247 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2248
16d20bd9
AD
2249 return;
2250 }
2251 /* we need to search for the correct entry and clear it */
cea2e8a9 2252 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2253}
2254
2255
1de9afcd
RGS
2256/* Invoke the idxth filter function for the current rsfp. */
2257/* maxlen 0 = read one text line */
16d20bd9 2258I32
864dbfa3 2259Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2260{
16d20bd9
AD
2261 filter_t funcp;
2262 SV *datasv = NULL;
e50aee73 2263
3280af22 2264 if (!PL_rsfp_filters)
16d20bd9 2265 return -1;
1de9afcd 2266 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2267 /* Provide a default input filter to make life easy. */
2268 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2269 DEBUG_P(PerlIO_printf(Perl_debug_log,
2270 "filter_read %d: from rsfp\n", idx));
4e553d73 2271 if (maxlen) {
16d20bd9
AD
2272 /* Want a block */
2273 int len ;
f54cb97a 2274 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2275
2276 /* ensure buf_sv is large enough */
eb160463 2277 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2278 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2279 if (PerlIO_error(PL_rsfp))
37120919
AD
2280 return -1; /* error */
2281 else
2282 return 0 ; /* end of file */
2283 }
16d20bd9
AD
2284 SvCUR_set(buf_sv, old_len + len) ;
2285 } else {
2286 /* Want a line */
3280af22
NIS
2287 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2288 if (PerlIO_error(PL_rsfp))
37120919
AD
2289 return -1; /* error */
2290 else
2291 return 0 ; /* end of file */
2292 }
16d20bd9
AD
2293 }
2294 return SvCUR(buf_sv);
2295 }
2296 /* Skip this filter slot if filter has been deleted */
1de9afcd 2297 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2298 DEBUG_P(PerlIO_printf(Perl_debug_log,
2299 "filter_read %d: skipped (filter deleted)\n",
2300 idx));
16d20bd9
AD
2301 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2302 }
2303 /* Get function pointer hidden within datasv */
8141890a 2304 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2305 DEBUG_P(PerlIO_printf(Perl_debug_log,
2306 "filter_read %d: via function %p (%s)\n",
cfd0369c 2307 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2308 /* Call function. The function is expected to */
2309 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2310 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2311 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2312}
2313
76e3520e 2314STATIC char *
cea2e8a9 2315S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2316{
c39cd008 2317#ifdef PERL_CR_FILTER
3280af22 2318 if (!PL_rsfp_filters) {
c39cd008 2319 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2320 }
2321#endif
3280af22 2322 if (PL_rsfp_filters) {
55497cff
PP
2323 if (!append)
2324 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2325 if (FILTER_READ(0, sv, 0) > 0)
2326 return ( SvPVX(sv) ) ;
2327 else
2328 return Nullch ;
2329 }
9d116dd7 2330 else
fd049845 2331 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2332}
2333
01ec43d0 2334STATIC HV *
7fc63493 2335S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2336{
2337 GV *gv;
2338
01ec43d0 2339 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2340 return PL_curstash;
2341
2342 if (len > 2 &&
2343 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2344 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2345 {
2346 return GvHV(gv); /* Foo:: */
def3634b
GS
2347 }
2348
2349 /* use constant CLASS => 'MyClass' */
2350 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2351 SV *sv;
2352 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2353 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2354 }
2355 }
2356
2357 return gv_stashpv(pkgname, FALSE);
2358}
a0d0e21e 2359
468aa647 2360STATIC char *
cc6ed77d 2361S_tokenize_use(pTHX_ int is_use, char *s) {
468aa647
RGS
2362 if (PL_expect != XSTATE)
2363 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2364 is_use ? "use" : "no"));
2365 s = skipspace(s);
2366 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2367 s = force_version(s, TRUE);
2368 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2369 PL_nextval[PL_nexttoke].opval = Nullop;
2370 force_next(WORD);
2371 }
2372 else if (*s == 'v') {
2373 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2374 s = force_version(s, FALSE);
2375 }
2376 }
2377 else {
2378 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2379 s = force_version(s, FALSE);
2380 }
2381 yylval.ival = is_use;
2382 return s;
2383}
748a9306 2384#ifdef DEBUGGING
27da23d5 2385 static const char* const exp_name[] =
09bef843 2386 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2387 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2388 };
748a9306 2389#endif
463ee0b2 2390
02aa26ce
NT
2391/*
2392 yylex
2393
2394 Works out what to call the token just pulled out of the input
2395 stream. The yacc parser takes care of taking the ops we return and
2396 stitching them into a tree.
2397
2398 Returns:
2399 PRIVATEREF
2400
2401 Structure:
2402 if read an identifier
2403 if we're in a my declaration
2404 croak if they tried to say my($foo::bar)
2405 build the ops for a my() declaration
2406 if it's an access to a my() variable
2407 are we in a sort block?
2408 croak if my($a); $a <=> $b
2409 build ops for access to a my() variable
2410 if in a dq string, and they've said @foo and we can't find @foo
2411 croak
2412 build ops for a bareword
2413 if we already built the token before, use it.
2414*/
2415
20141f0e 2416
dba4d153
JH
2417#ifdef __SC__
2418#pragma segment Perl_yylex
2419#endif
dba4d153 2420int
dba4d153 2421Perl_yylex(pTHX)
20141f0e 2422{
3afc138a 2423 register char *s = PL_bufptr;
378cc40b 2424 register char *d;
79072805 2425 register I32 tmp;
463ee0b2 2426 STRLEN len;
161b471a
NIS
2427 GV *gv = Nullgv;
2428 GV **gvp = 0;
aa7440fb 2429 bool bof = FALSE;
1d239bbb 2430 I32 orig_keyword = 0;
a687059c 2431
bbf60fe6 2432 DEBUG_T( {
b6007c36
DM
2433 SV* tmp = newSVpvn("", 0);
2434 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2435 (IV)CopLINE(PL_curcop),
2436 lex_state_names[PL_lex_state],
2437 exp_name[PL_expect],
2438 pv_display(tmp, s, strlen(s), 0, 60));
2439 SvREFCNT_dec(tmp);
bbf60fe6 2440 } );
02aa26ce 2441 /* check if there's an identifier for us to look at */
ba979b31 2442 if (PL_pending_ident)
bbf60fe6 2443 return REPORT(S_pending_ident(aTHX));
bbce6d69 2444
02aa26ce
NT
2445 /* no identifier pending identification */
2446
3280af22 2447 switch (PL_lex_state) {
79072805
LW
2448#ifdef COMMENTARY
2449 case LEX_NORMAL: /* Some compilers will produce faster */
2450 case LEX_INTERPNORMAL: /* code if we comment these out. */
2451 break;
2452#endif
2453
09bef843 2454 /* when we've already built the next token, just pull it out of the queue */
79072805 2455 case LEX_KNOWNEXT:
3280af22
NIS
2456 PL_nexttoke--;
2457 yylval = PL_nextval[PL_nexttoke];
2458 if (!PL_nexttoke) {
2459 PL_lex_state = PL_lex_defer;
2460 PL_expect = PL_lex_expect;
2461 PL_lex_defer = LEX_NORMAL;
463ee0b2 2462 }
bbf60fe6 2463 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2464
02aa26ce 2465 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2466 when we get here, PL_bufptr is at the \
02aa26ce 2467 */
79072805
LW
2468 case LEX_INTERPCASEMOD:
2469#ifdef DEBUGGING
3280af22 2470 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2471 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2472#endif
02aa26ce 2473 /* handle \E or end of string */
3280af22 2474 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2475 /* if at a \E */
3280af22 2476 if (PL_lex_casemods) {
f54cb97a 2477 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2478 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2479
3792a11b
NC
2480 if (PL_bufptr != PL_bufend
2481 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2482 PL_bufptr += 2;
2483 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2484 }
bbf60fe6 2485 return REPORT(')');
79072805 2486 }
3280af22
NIS
2487 if (PL_bufptr != PL_bufend)
2488 PL_bufptr += 2;
2489 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2490 return yylex();
79072805
LW
2491 }
2492 else {
607df283 2493 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2494 "### Saw case modifier\n"); });
3280af22 2495 s = PL_bufptr + 1;
6e909404
JH
2496 if (s[1] == '\\' && s[2] == 'E') {
2497 PL_bufptr = s + 3;
2498 PL_lex_state = LEX_INTERPCONCAT;
2499 return yylex();
a0d0e21e 2500 }
6e909404
JH
2501 else {
2502 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2503 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2504 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2505 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2506 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2507 return REPORT(')');
6e909404
JH
2508 }
2509 if (PL_lex_casemods > 10)
2510 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2511 PL_lex_casestack[PL_lex_casemods++] = *s;
2512 PL_lex_casestack[PL_lex_casemods] = '\0';
2513 PL_lex_state = LEX_INTERPCONCAT;
2514 PL_nextval[PL_nexttoke].ival = 0;
2515 force_next('(');
2516 if (*s == 'l')
2517 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2518 else if (*s == 'u')
2519 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2520 else if (*s == 'L')
2521 PL_nextval[PL_nexttoke].ival = OP_LC;
2522 else if (*s == 'U')
2523 PL_nextval[PL_nexttoke].ival = OP_UC;
2524 else if (*s == 'Q')
2525 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2526 else
2527 Perl_croak(aTHX_ "panic: yylex");
2528 PL_bufptr = s + 1;
a0d0e21e 2529 }
79072805 2530 force_next(FUNC);
3280af22
NIS
2531 if (PL_lex_starts) {
2532 s = PL_bufptr;
2533 PL_lex_starts = 0;
131b3ad0
DM
2534 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2535 if (PL_lex_casemods == 1 && PL_lex_inpat)
2536 OPERATOR(',');
2537 else
2538 Aop(OP_CONCAT);
79072805
LW
2539 }
2540 else
cea2e8a9 2541 return yylex();
79072805
LW
2542 }
2543
55497cff 2544 case LEX_INTERPPUSH:
bbf60fe6 2545 return REPORT(sublex_push());
55497cff 2546
79072805 2547 case LEX_INTERPSTART:
3280af22 2548 if (PL_bufptr == PL_bufend)
bbf60fe6 2549 return REPORT(sublex_done());
607df283 2550 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2551 "### Interpolated variable\n"); });
3280af22
NIS
2552 PL_expect = XTERM;
2553 PL_lex_dojoin = (*PL_bufptr == '@');
2554 PL_lex_state = LEX_INTERPNORMAL;
2555 if (PL_lex_dojoin) {
2556 PL_nextval[PL_nexttoke].ival = 0;
79072805 2557 force_next(',');
a0d0e21e 2558 force_ident("\"", '$');
3280af22 2559 PL_nextval[PL_nexttoke].ival = 0;
79072805 2560 force_next('$');
3280af22 2561 PL_nextval[PL_nexttoke].ival = 0;
79072805 2562 force_next('(');
3280af22 2563 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2564 force_next(FUNC);
2565 }
3280af22
NIS
2566 if (PL_lex_starts++) {
2567 s = PL_bufptr;
131b3ad0
DM
2568 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2569 if (!PL_lex_casemods && PL_lex_inpat)
2570 OPERATOR(',');
2571 else
2572 Aop(OP_CONCAT);
79072805 2573 }
cea2e8a9 2574 return yylex();
79072805
LW
2575
2576 case LEX_INTERPENDMAYBE:
3280af22
NIS
2577 if (intuit_more(PL_bufptr)) {
2578 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2579 break;
2580 }
2581 /* FALL THROUGH */
2582
2583 case LEX_INTERPEND:
3280af22
NIS
2584 if (PL_lex_dojoin) {
2585 PL_lex_dojoin = FALSE;
2586 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2587 return REPORT(')');
79072805 2588 }
43a16006 2589 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2590 && SvEVALED(PL_lex_repl))
43a16006 2591 {
e9fa98b2 2592 if (PL_bufptr != PL_bufend)
cea2e8a9 2593 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2594 PL_lex_repl = Nullsv;
2595 }
79072805
LW
2596 /* FALLTHROUGH */
2597 case LEX_INTERPCONCAT:
2598#ifdef DEBUGGING
3280af22 2599 if (PL_lex_brackets)
cea2e8a9 2600 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2601#endif
3280af22 2602 if (PL_bufptr == PL_bufend)
bbf60fe6 2603 return REPORT(sublex_done());
79072805 2604
3280af22
NIS
2605 if (SvIVX(PL_linestr) == '\'') {
2606 SV *sv = newSVsv(PL_linestr);
2607 if (!PL_lex_inpat)
76e3520e 2608 sv = tokeq(sv);
3280af22 2609 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2610 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2611 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2612 s = PL_bufend;
79072805
LW
2613 }
2614 else {
3280af22 2615 s = scan_const(PL_bufptr);
79072805 2616 if (*s == '\\')
3280af22 2617 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2618 else
3280af22 2619 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2620 }
2621
3280af22
NIS
2622 if (s != PL_bufptr) {
2623 PL_nextval[PL_nexttoke] = yylval;
2624 PL_expect = XTERM;
79072805 2625 force_next(THING);
131b3ad0
DM
2626 if (PL_lex_starts++) {
2627 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2628 if (!PL_lex_casemods && PL_lex_inpat)
2629 OPERATOR(',');
2630 else
2631 Aop(OP_CONCAT);
2632 }
79072805 2633 else {
3280af22 2634 PL_bufptr = s;
cea2e8a9 2635 return yylex();
79072805
LW
2636 }
2637 }
2638
cea2e8a9 2639 return yylex();
a0d0e21e 2640 case LEX_FORMLINE:
3280af22
NIS
2641 PL_lex_state = LEX_NORMAL;
2642 s = scan_formline(PL_bufptr);
2643 if (!PL_lex_formbrack)
a0d0e21e
LW
2644 goto rightbracket;
2645 OPERATOR(';');
79072805
LW
2646 }
2647
3280af22
NIS
2648 s = PL_bufptr;
2649 PL_oldoldbufptr = PL_oldbufptr;
2650 PL_oldbufptr = s;
463ee0b2
LW
2651
2652 retry:
378cc40b
LW
2653 switch (*s) {
2654 default:
7e2040f0 2655 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2656 goto keylookup;
cea2e8a9 2657 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2658 case 4:
2659 case 26:
2660 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2661 case 0:
3280af22
NIS
2662 if (!PL_rsfp) {
2663 PL_last_uni = 0;
2664 PL_last_lop = 0;
c5ee2135 2665 if (PL_lex_brackets) {
0bd48802
AL
2666 yyerror(PL_lex_formbrack
2667 ? "Format not terminated"
2668 : "Missing right curly or square bracket");
c5ee2135 2669 }
4e553d73 2670 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2671 "### Tokener got EOF\n");
5f80b19c 2672 } );
79072805 2673 TOKEN(0);
463ee0b2 2674 }
3280af22 2675 if (s++ < PL_bufend)
a687059c 2676 goto retry; /* ignore stray nulls */
3280af22
NIS
2677 PL_last_uni = 0;
2678 PL_last_lop = 0;
2679 if (!PL_in_eval && !PL_preambled) {
2680 PL_preambled = TRUE;
2681 sv_setpv(PL_linestr,incl_perldb());
2682 if (SvCUR(PL_linestr))
4147a61b 2683 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2684 if (PL_preambleav){
2685 while(AvFILLp(PL_preambleav) >= 0) {
2686 SV *tmpsv = av_shift(PL_preambleav);
2687 sv_catsv(PL_linestr, tmpsv);
4147a61b 2688 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2689 sv_free(tmpsv);
2690 }
3280af22
NIS
2691 sv_free((SV*)PL_preambleav);
2692 PL_preambleav = NULL;
91b7def8 2693 }
3280af22
NIS
2694 if (PL_minus_n || PL_minus_p) {
2695 sv_catpv(PL_linestr, "LINE: while (<>) {");
2696 if (PL_minus_l)
2697 sv_catpv(PL_linestr,"chomp;");
2698 if (PL_minus_a) {
3280af22 2699 if (PL_minus_F) {
3792a11b
NC
2700 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2701 || *PL_splitstr == '"')
3280af22 2702 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2703 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2704 else {
c8ef6a4b
NC
2705 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2706 bytes can be used as quoting characters. :-) */
48c4c863
NC
2707 /* The count here deliberately includes the NUL
2708 that terminates the C string constant. This
2709 embeds the opening NUL into the string. */
dd374669 2710 const char *splits = PL_splitstr;
9a3fb652 2711 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2712 do {
2713 /* Need to \ \s */
dd374669
AL
2714 if (*splits == '\\')
2715 sv_catpvn(PL_linestr, splits, 1);
2716 sv_catpvn(PL_linestr, splits, 1);
2717 } while (*splits++);
48c4c863
NC
2718 /* This loop will embed the trailing NUL of
2719 PL_linestr as the last thing it does before
2720 terminating. */
4fccd7c6 2721 sv_catpvn(PL_linestr, ");", 2);
54310121 2722 }
2304df62
AD
2723 }
2724 else
75c72d73 2725 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2726 }
79072805 2727 }
4147a61b 2728 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2729 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2731 PL_last_lop = PL_last_uni = Nullch;
3280af22 2732 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2733 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2734
2735 sv_upgrade(sv, SVt_PVMG);
3280af22 2736 sv_setsv(sv,PL_linestr);
0ac0412a 2737 (void)SvIOK_on(sv);
45977657 2738 SvIV_set(sv, 0);
57843af0 2739 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2740 }
79072805 2741 goto retry;
a687059c 2742 }
e929a76b 2743 do {
aa7440fb 2744 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2745 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2746 fake_eof:
2747 if (PL_rsfp) {
2748 if (PL_preprocess && !PL_in_eval)
2749 (void)PerlProc_pclose(PL_rsfp);
2750 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2751 PerlIO_clearerr(PL_rsfp);
2752 else
2753 (void)PerlIO_close(PL_rsfp);
2754 PL_rsfp = Nullfp;
2755 PL_doextract = FALSE;
2756 }
2757 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2758 sv_setpv(PL_linestr,PL_minus_p
2759 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2760 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2761 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2762 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2763 PL_minus_n = PL_minus_p = 0;
2764 goto retry;
2765 }
2766 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2767 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2768 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2769 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2770 }
7aa207d6
JH
2771 /* If it looks like the start of a BOM or raw UTF-16,
2772 * check if it in fact is. */
2773 else if (bof &&
2774 (*s == 0 ||
2775 *(U8*)s == 0xEF ||
2776 *(U8*)s >= 0xFE ||
2777 s[1] == 0)) {
226017aa 2778#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2779# ifdef __GNU_LIBRARY__
2780# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2781# define FTELL_FOR_PIPE_IS_BROKEN
2782# endif
e3f494f1
JH
2783# else
2784# ifdef __GLIBC__
2785# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2786# define FTELL_FOR_PIPE_IS_BROKEN
2787# endif
2788# endif
226017aa
DD
2789# endif
2790#endif
2791#ifdef FTELL_FOR_PIPE_IS_BROKEN
2792 /* This loses the possibility to detect the bof
2793 * situation on perl -P when the libc5 is being used.
2794 * Workaround? Maybe attach some extra state to PL_rsfp?
2795 */
2796 if (!PL_preprocess)
7e28d3af 2797 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2798#else
eb160463 2799 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2800#endif
7e28d3af 2801 if (bof) {
3280af22 2802 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2803 s = swallow_bom((U8*)s);
e929a76b 2804 }
378cc40b 2805 }
3280af22 2806 if (PL_doextract) {
a0d0e21e
LW
2807 /* Incest with pod. */
2808 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2809 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2810 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2811 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2812 PL_last_lop = PL_last_uni = Nullch;
3280af22 2813 PL_doextract = FALSE;
a0d0e21e 2814 }
4e553d73 2815 }
463ee0b2 2816 incline(s);
3280af22
NIS
2817 } while (PL_doextract);
2818 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2819 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2820 SV * const sv = NEWSV(85,0);
a687059c 2821
93a17b20 2822 sv_upgrade(sv, SVt_PVMG);
3280af22 2823 sv_setsv(sv,PL_linestr);
0ac0412a 2824 (void)SvIOK_on(sv);
45977657 2825 SvIV_set(sv, 0);
57843af0 2826 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2827 }
3280af22 2828 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2829 PL_last_lop = PL_last_uni = Nullch;
57843af0 2830 if (CopLINE(PL_curcop) == 1) {
3280af22 2831 while (s < PL_bufend && isSPACE(*s))
79072805 2832 s++;
a0d0e21e 2833 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2834 s++;
44a8e56a 2835 d = Nullch;
3280af22 2836 if (!PL_in_eval) {
44a8e56a
PP
2837 if (*s == '#' && *(s+1) == '!')
2838 d = s + 2;
2839#ifdef ALTERNATE_SHEBANG
2840 else {
bfed75c6 2841 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2842 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2843 d = s + (sizeof(as) - 1);
2844 }
2845#endif /* ALTERNATE_SHEBANG */
2846 }
2847 if (d) {
b8378b72 2848 char *ipath;
774d564b 2849 char *ipathend;
b8378b72 2850
774d564b 2851 while (isSPACE(*d))
b8378b72
CS
2852 d++;
2853 ipath = d;
774d564b
PP
2854 while (*d && !isSPACE(*d))
2855 d++;
2856 ipathend = d;
2857
2858#ifdef ARG_ZERO_IS_SCRIPT
2859 if (ipathend > ipath) {
2860 /*
2861 * HP-UX (at least) sets argv[0] to the script name,
2862 * which makes $^X incorrect. And Digital UNIX and Linux,
2863 * at least, set argv[0] to the basename of the Perl
2864 * interpreter. So, having found "#!", we'll set it right.
2865 */
551405c4 2866 SV * const x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2867 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2868 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2869 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2870 SvSETMAGIC(x);
2871 }
556c1dec
JH
2872 else {
2873 STRLEN blen;
2874 STRLEN llen;
cfd0369c 2875 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2876 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2877 if (llen < blen) {
2878 bstart += blen - llen;
2879 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2880 sv_setpvn(x, ipath, ipathend - ipath);
2881 SvSETMAGIC(x);
2882 }
2883 }
2884 }
774d564b 2885 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2886 }
774d564b 2887#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2888
2889 /*
2890 * Look for options.
2891 */
748a9306 2892 d = instr(s,"perl -");
84e30d1a 2893 if (!d) {
748a9306 2894 d = instr(s,"perl");
84e30d1a
GS
2895#if defined(DOSISH)
2896 /* avoid getting into infinite loops when shebang
2897 * line contains "Perl" rather than "perl" */
2898 if (!d) {
2899 for (d = ipathend-4; d >= ipath; --d) {
2900 if ((*d == 'p' || *d == 'P')
2901 && !ibcmp(d, "perl", 4))
2902 {
2903 break;
2904 }
2905 }
2906 if (d < ipath)
2907 d = Nullch;
2908 }
2909#endif
2910 }
44a8e56a
PP
2911#ifdef ALTERNATE_SHEBANG
2912 /*
2913 * If the ALTERNATE_SHEBANG on this system starts with a
2914 * character that can be part of a Perl expression, then if
2915 * we see it but not "perl", we're probably looking at the
2916 * start of Perl code, not a request to hand off to some
2917 * other interpreter. Similarly, if "perl" is there, but
2918 * not in the first 'word' of the line, we assume the line
2919 * contains the start of the Perl program.
44a8e56a
PP
2920 */
2921 if (d && *s != '#') {
f54cb97a 2922 const char *c = ipath;
44a8e56a
PP
2923 while (*c && !strchr("; \t\r\n\f\v#", *c))
2924 c++;
2925 if (c < d)
2926 d = Nullch; /* "perl" not in first word; ignore */
2927 else
2928 *s = '#'; /* Don't try to parse shebang line */
2929 }
774d564b 2930#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2931#ifndef MACOS_TRADITIONAL
748a9306 2932 if (!d &&
44a8e56a 2933 *s == '#' &&
774d564b 2934 ipathend > ipath &&
3280af22 2935 !PL_minus_c &&
748a9306 2936 !instr(s,"indir") &&
3280af22 2937 instr(PL_origargv[0],"perl"))
748a9306 2938 {
27da23d5 2939 dVAR;
9f68db38 2940 char **newargv;
9f68db38 2941
774d564b
PP
2942 *ipathend = '\0';
2943 s = ipathend + 1;
3280af22 2944 while (s < PL_bufend && isSPACE(*s))
9f68db38 2945 s++;
3280af22 2946 if (s < PL_bufend) {
a02a5408 2947 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2948 newargv[1] = s;
3280af22 2949 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2950 s++;
2951 *s = '\0';
3280af22 2952 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2953 }
2954 else
3280af22 2955 newargv = PL_origargv;
774d564b 2956 newargv[0] = ipath;
b35112e7 2957 PERL_FPU_PRE_EXEC
b4748376 2958 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2959 PERL_FPU_POST_EXEC
cea2e8a9 2960 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2961 }
bf4acbe4 2962#endif
748a9306 2963 if (d) {
f54cb97a
AL
2964 const U32 oldpdb = PL_perldb;
2965 const bool oldn = PL_minus_n;
2966 const bool oldp = PL_minus_p;
748a9306
LW
2967
2968 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2969 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2970
2971 if (*d++ == '-') {
f54cb97a 2972 const bool switches_done = PL_doswitches;
8cc95fdb 2973 do {
3ffe3ee4 2974 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2975 const char * const m = d;
8cc95fdb 2976 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2977 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2978 (int)(d - m), m);
2979 }
2980 d = moreswitches(d);
2981 } while (d);
f0b2cf55
YST
2982 if (PL_doswitches && !switches_done) {
2983 int argc = PL_origargc;
2984 char **argv = PL_origargv;
2985 do {
2986 argc--,argv++;
2987 } while (argc && argv[0][0] == '-' && argv[0][1]);
2988 init_argv_symbols(argc,argv);
2989 }
155aba94
GS
2990 if ((PERLDB_LINE && !oldpdb) ||
2991 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2992 /* if we have already added "LINE: while (<>) {",
2993 we must not do it again */
748a9306 2994 {
c69006e4 2995 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2996 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2997 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2998 PL_last_lop = PL_last_uni = Nullch;
3280af22 2999 PL_preambled = FALSE;
84902520 3000 if (PERLDB_LINE)
3280af22 3001 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3002 goto retry;
3003 }
a11ec5a9
RGS
3004 if (PL_doswitches && !switches_done) {
3005 int argc = PL_origargc;
3006 char **argv = PL_origargv;
3007 do {
3008 argc--,argv++;
3009 } while (argc && argv[0][0] == '-' && argv[0][1]);
3010 init_argv_symbols(argc,argv);
3011 }
a0d0e21e 3012 }
79072805 3013 }
9f68db38 3014 }
79072805 3015 }
3280af22
NIS
3016 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3017 PL_bufptr = s;
3018 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3019 return yylex();
ae986130 3020 }
378cc40b 3021 goto retry;
4fdae800 3022 case '\r':
6a27c188 3023#ifdef PERL_STRICT_CR
cea2e8a9 3024 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3025 Perl_croak(aTHX_
cc507455 3026 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3027#endif
4fdae800 3028 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3029#ifdef MACOS_TRADITIONAL
3030 case '\312':
3031#endif
378cc40b
LW
3032 s++;
3033 goto retry;
378cc40b 3034 case '#':
e929a76b 3035 case '\n':
3280af22 3036 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3037 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3038 /* handle eval qq[#line 1 "foo"\n ...] */
3039 CopLINE_dec(PL_curcop);
3040 incline(s);
3041 }
3280af22 3042 d = PL_bufend;
a687059c 3043 while (s < d && *s != '\n')
378cc40b 3044 s++;
0f85fab0 3045 if (s < d)
378cc40b 3046 s++;
78c267c1 3047 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3048 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3049 incline(s);
3280af22
NIS
3050 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3051 PL_bufptr = s;
3052 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3053 return yylex();
a687059c 3054 }
378cc40b 3055 }
a687059c 3056 else {
378cc40b 3057 *s = '\0';
3280af22 3058 PL_bufend = s;
a687059c 3059 }
378cc40b
LW
3060 goto retry;
3061 case '-':
79072805 3062 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
3063 I32 ftst = 0;
3064
378cc40b 3065 s++;
3280af22 3066 PL_bufptr = s;
748a9306
LW
3067 tmp = *s++;
3068
bf4acbe4 3069 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3070 s++;
3071
3072 if (strnEQ(s,"=>",2)) {
3280af22 3073 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3074 DEBUG_T( { S_printbuf(aTHX_
3075 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3076 } );
748a9306
LW
3077 OPERATOR('-'); /* unary minus */
3078 }
3280af22 3079 PL_last_uni = PL_oldbufptr;
748a9306 3080 switch (tmp) {
e5edeb50
JH
3081 case 'r': ftst = OP_FTEREAD; break;
3082 case 'w': ftst = OP_FTEWRITE; break;
3083 case 'x': ftst = OP_FTEEXEC; break;
3084 case 'o': ftst = OP_FTEOWNED; break;
3085 case 'R': ftst = OP_FTRREAD; break;
3086 case 'W': ftst = OP_FTRWRITE; break;
3087 case 'X': ftst = OP_FTREXEC; break;
3088 case 'O': ftst = OP_FTROWNED; break;
3089 case 'e': ftst = OP_FTIS; break;
3090 case 'z': ftst = OP_FTZERO; break;
3091 case 's': ftst = OP_FTSIZE; break;
3092 case 'f': ftst = OP_FTFILE; break;
3093 case 'd': ftst = OP_FTDIR; break;
3094 case 'l': ftst = OP_FTLINK; break;
3095 case 'p': ftst = OP_FTPIPE; break;
3096 case 'S': ftst = OP_FTSOCK; break;
3097 case 'u': ftst = OP_FTSUID; break;
3098 case 'g': ftst = OP_FTSGID; break;
3099 case 'k': ftst = OP_FTSVTX; break;
3100 case 'b': ftst = OP_FTBLK; break;
3101 case 'c': ftst = OP_FTCHR; break;
3102 case 't': ftst = OP_FTTTY; break;
3103 case 'T': ftst = OP_FTTEXT; break;
3104 case 'B': ftst = OP_FTBINARY; break;
3105 case 'M': case 'A': case 'C':
3106 gv_fetchpv("\024",TRUE, SVt_PV);
3107 switch (tmp) {
3108 case 'M': ftst = OP_FTMTIME; break;
3109 case 'A': ftst = OP_FTATIME; break;
3110 case 'C': ftst = OP_FTCTIME; break;
3111 default: break;
3112 }
3113 break;
378cc40b 3114 default:
378cc40b
LW
3115 break;
3116 }
e5edeb50 3117 if (ftst) {
eb160463 3118 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3119 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3120 "### Saw file test %c\n", (int)tmp);
5f80b19c 3121 } );
e5edeb50
JH
3122 FTST(ftst);
3123 }
3124 else {
3125 /* Assume it was a minus followed by a one-letter named
3126 * subroutine call (or a -bareword), then. */
95c31fe3 3127 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3128 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3129 (int) tmp);
5f80b19c 3130 } );
3cf7b4c4 3131 s = --PL_bufptr;
e5edeb50 3132 }
378cc40b 3133 }
a687059c
LW
3134 tmp = *s++;
3135 if (*s == tmp) {
3136 s++;
3280af22 3137 if (PL_expect == XOPERATOR)
79072805
LW
3138 TERM(POSTDEC);
3139 else
3140 OPERATOR(PREDEC);
3141 }
3142 else if (*s == '>') {
3143 s++;
3144 s = skipspace(s);
7e2040f0 3145 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3146 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3147 TOKEN(ARROW);
79072805 3148 }
748a9306
LW
3149 else if (*s == '$')
3150 OPERATOR(ARROW);
463ee0b2 3151 else
748a9306 3152 TERM(ARROW);
a687059c 3153 }
3280af22 3154 if (PL_expect == XOPERATOR)
79072805
LW
3155 Aop(OP_SUBTRACT);
3156 else {
3280af22 3157 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3158 check_uni();
79072805 3159 OPERATOR('-'); /* unary minus */
2f3197b3 3160 }
79072805 3161
378cc40b 3162 case '+':
a687059c
LW
3163 tmp = *s++;
3164 if (*s == tmp) {
378cc40b 3165 s++;
3280af22 3166 if (PL_expect == XOPERATOR)
79072805
LW
3167 TERM(POSTINC);
3168 else
3169 OPERATOR(PREINC);
378cc40b 3170 }
3280af22 3171 if (PL_expect == XOPERATOR)
79072805
LW
3172 Aop(OP_ADD);
3173 else {
3280af22 3174 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3175 check_uni();
a687059c 3176 OPERATOR('+');
2f3197b3 3177 }
a687059c 3178
378cc40b 3179 case '*':
3280af22
NIS
3180 if (PL_expect != XOPERATOR) {
3181 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3182 PL_expect = XOPERATOR;
3183 force_ident(PL_tokenbuf, '*');
3184 if (!*PL_tokenbuf)
a0d0e21e 3185 PREREF('*');
79072805 3186 TERM('*');
a687059c 3187 }
79072805
LW
3188 s++;
3189 if (*s == '*') {
a687059c 3190 s++;
79072805 3191 PWop(OP_POW);
a687059c 3192 }
79072805
LW
3193 Mop(OP_MULTIPLY);
3194
378cc40b 3195 case '%':
3280af22 3196 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3197 ++s;
3198 Mop(OP_MODULO);
a687059c 3199 }
3280af22
NIS
3200 PL_tokenbuf[0] = '%';
3201 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3202 if (!PL_tokenbuf[1]) {
bbce6d69 3203 PREREF('%');
a687059c 3204 }
3280af22 3205 PL_pending_ident = '%';
bbce6d69 3206 TERM('%');
a687059c 3207
378cc40b 3208 case '^':
79072805 3209 s++;
a0d0e21e 3210 BOop(OP_BIT_XOR);
79072805 3211 case '[':
3280af22 3212 PL_lex_brackets++;
79072805 3213 /* FALL THROUGH */
378cc40b 3214 case '~':
0d863452
RH
3215 if (s[1] == '~'
3216 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3217 && FEATURE_IS_ENABLED("~~", 2))
3218 {
3219 s += 2;
3220 Eop(OP_SMARTMATCH);
3221 }
378cc40b 3222 case ',':
378cc40b
LW
3223 tmp = *s++;
3224 OPERATOR(tmp);
a0d0e21e
LW
3225 case ':':
3226 if (s[1] == ':') {
3227 len = 0;
3228 goto just_a_word;
3229 }
3230 s++;
09bef843
SB
3231 switch (PL_expect) {
3232 OP *attrs;
3233 case XOPERATOR:
3234 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3235 break;
3236 PL_bufptr = s; /* update in case we back off */
3237 goto grabattrs;
3238 case XATTRBLOCK:
3239 PL_expect = XBLOCK;
3240 goto grabattrs;
3241 case XATTRTERM:
3242 PL_expect = XTERMBLOCK;
3243 grabattrs:
3244 s = skipspace(s);
3245 attrs = Nullop;
7e2040f0 3246 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3247 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3248 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3249 if (tmp < 0) tmp = -tmp;
3250 switch (tmp) {
3251 case KEY_or:
3252 case KEY_and:
c963b151 3253 case KEY_err:
f9829d6b
GS
3254 case KEY_for:
3255 case KEY_unless:
3256 case KEY_if:
3257 case KEY_while:
3258 case KEY_until:
3259 goto got_attrs;
3260 default:
3261 break;
3262 }
3263 }
09bef843
SB
3264 if (*d == '(') {
3265 d = scan_str(d,TRUE,TRUE);
3266 if (!d) {
09bef843
SB
3267 /* MUST advance bufptr here to avoid bogus
3268 "at end of line" context messages from yyerror().
3269 */
3270 PL_bufptr = s + len;
3271 yyerror("Unterminated attribute parameter in attribute list");
3272 if (attrs)
3273 op_free(attrs);
bbf60fe6 3274 return REPORT(0); /* EOF indicator */
09bef843
SB
3275 }
3276 }
3277 if (PL_lex_stuff) {
3278 SV *sv = newSVpvn(s, len);
3279 sv_catsv(sv, PL_lex_stuff);
3280 attrs = append_elem(OP_LIST, attrs,
3281 newSVOP(OP_CONST, 0, sv));
3282 SvREFCNT_dec(PL_lex_stuff);
3283 PL_lex_stuff = Nullsv;
3284 }
3285 else {
371fce9b
DM
3286 if (len == 6 && strnEQ(s, "unique", len)) {
3287 if (PL_in_my == KEY_our)
3288#ifdef USE_ITHREADS
3289 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3290#else
3291 ; /* skip to avoid loading attributes.pm */
3292#endif
bfed75c6 3293 else
371fce9b
DM
3294 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3295 }
3296
d3cea301
SB
3297 /* NOTE: any CV attrs applied here need to be part of
3298 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3299 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3300 CvLVALUE_on(PL_compcv);
3301 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3302 CvLOCKED_on(PL_compcv);
3303 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3304 CvMETHOD_on(PL_compcv);
06492da6
SF
3305 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3306 CvASSERTION_on(PL_compcv);
78f9721b
SM
3307 /* After we've set the flags, it could be argued that
3308 we don't need to do the attributes.pm-based setting
3309 process, and shouldn't bother appending recognized
d3cea301
SB
3310 flags. To experiment with that, uncomment the
3311 following "else". (Note that's already been
3312 uncommented. That keeps the above-applied built-in
3313 attributes from being intercepted (and possibly
3314 rejected) by a package's attribute routines, but is
3315 justified by the performance win for the common case
3316 of applying only built-in attributes.) */
0256094b 3317 else
78f9721b
SM
3318 attrs = append_elem(OP_LIST, attrs,
3319 newSVOP(OP_CONST, 0,
3320 newSVpvn(s, len)));
09bef843
SB
3321 }
3322 s = skipspace(d);
0120eecf 3323 if (*s == ':' && s[1] != ':')
09bef843 3324 s = skipspace(s+1);
0120eecf
GS
3325 else if (s == d)
3326 break; /* require real whitespace or :'s */
09bef843 3327 }
f9829d6b 3328 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3329 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3330 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3331 /* If here for an expression, and parsed no attrs, back off. */
3332 if (tmp == '=' && !attrs) {
3333 s = PL_bufptr;
3334 break;
3335 }
3336 /* MUST advance bufptr here to avoid bogus "at end of line"
3337 context messages from yyerror().
3338 */
3339 PL_bufptr = s;
0bd48802
AL
3340 yyerror( *s
3341 ? Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list", q, *s, q)
3342 : "Unterminated attribute list" );
09bef843
SB
3343 if (attrs)
3344 op_free(attrs);
3345 OPERATOR(':');
3346 }
f9829d6b 3347 got_attrs:
09bef843
SB
3348 if (attrs) {
3349 PL_nextval[PL_nexttoke].opval = attrs;
3350 force_next(THING);
3351 }
3352 TOKEN(COLONATTR);
3353 }
a0d0e21e 3354 OPERATOR(':');
8990e307
LW
3355 case '(':
3356 s++;
3280af22
NIS
3357 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3358 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3359 else
3280af22 3360 PL_expect = XTERM;
4a202259 3361 s = skipspace(s);
a0d0e21e 3362 TOKEN('(');
378cc40b 3363 case ';':
f4dd75d9 3364 CLINE;
378cc40b
LW
3365 tmp = *s++;
3366 OPERATOR(tmp);
3367 case ')':
378cc40b 3368 tmp = *s++;
16d20bd9
AD
3369 s = skipspace(s);
3370 if (*s == '{')
3371 PREBLOCK(tmp);
378cc40b 3372 TERM(tmp);
79072805
LW
3373 case ']':
3374 s++;
3280af22 3375 if (PL_lex_brackets <= 0)
d98d5fff 3376 yyerror("Unmatched right square bracket");
463ee0b2 3377 else
3280af22
NIS
3378 --PL_lex_brackets;
3379 if (PL_lex_state == LEX_INTERPNORMAL) {
3380 if (PL_lex_brackets == 0) {
a0d0e21e 3381 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3382 PL_lex_state = LEX_INTERPEND;
79072805
LW
3383 }
3384 }
4633a7c4 3385 TERM(']');
79072805
LW
3386 case '{':
3387 leftbracket:
79072805 3388 s++;
3280af22 3389 if (PL_lex_brackets > 100) {
8edd5f42 3390 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3391 }
3280af22 3392 switch (PL_expect) {
a0d0e21e 3393 case XTERM:
3280af22 3394 if (PL_lex_formbrack) {
a0d0e21e
LW
3395 s--;
3396 PRETERMBLOCK(DO);
3397 }
3280af22
NIS
3398 if (PL_oldoldbufptr == PL_last_lop)
3399 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3400 else
3280af22 3401 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3402 OPERATOR(HASHBRACK);
a0d0e21e 3403 case XOPERATOR:
bf4acbe4 3404 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3405 s++;
44a8e56a 3406 d = s;
3280af22
NIS
3407 PL_tokenbuf[0] = '\0';
3408 if (d < PL_bufend && *d == '-') {
3409 PL_tokenbuf[0] = '-';
44a8e56a 3410 d++;
bf4acbe4 3411 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3412 d++;
3413 }
7e2040f0 3414 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3415 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3416 FALSE, &len);
bf4acbe4 3417 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3418 d++;
3419 if (*d == '}') {
f54cb97a 3420 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3421 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3422 if (minus)
3423 force_next('-');
748a9306
LW
3424 }
3425 }
3426 /* FALL THROUGH */
09bef843 3427 case XATTRBLOCK:
748a9306 3428 case XBLOCK:
3280af22
NIS
3429 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3430 PL_expect = XSTATE;
a0d0e21e 3431 break;
09bef843 3432 case XATTRTERM:
a0d0e21e 3433 case XTERMBLOCK:
3280af22
NIS
3434 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3435 PL_expect = XSTATE;
a0d0e21e
LW
3436 break;
3437 default: {
f54cb97a 3438 const char *t;
3280af22
NIS
3439 if (PL_oldoldbufptr == PL_last_lop)
3440 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3441 else
3280af22 3442 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3443 s = skipspace(s);
8452ff4b
SB
3444 if (*s == '}') {
3445 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3446 PL_expect = XTERM;
3447 /* This hack is to get the ${} in the message. */
3448 PL_bufptr = s+1;
3449 yyerror("syntax error");
3450 break;
3451 }
a0d0e21e 3452 OPERATOR(HASHBRACK);
8452ff4b 3453 }
b8a4b1be
GS
3454 /* This hack serves to disambiguate a pair of curlies
3455 * as being a block or an anon hash. Normally, expectation
3456 * determines that, but in cases where we're not in a
3457 * position to expect anything in particular (like inside
3458 * eval"") we have to resolve the ambiguity. This code
3459 * covers the case where the first term in the curlies is a
3460 * quoted string. Most other cases need to be explicitly
a0288114 3461 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3462 * curly in order to force resolution as an anon hash.
3463 *
3464 * XXX should probably propagate the outer expectation
3465 * into eval"" to rely less on this hack, but that could
3466 * potentially break current behavior of eval"".
3467 * GSAR 97-07-21
3468 */
3469 t = s;
3470 if (*s == '\'' || *s == '"' || *s == '`') {
3471 /* common case: get past first string, handling escapes */
3280af22 3472 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3473 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3474 t++;
3475 t++;
a0d0e21e 3476 }
b8a4b1be 3477 else if (*s == 'q') {
3280af22 3478 if (++t < PL_bufend
b8a4b1be 3479 && (!isALNUM(*t)
3280af22 3480 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3481 && !isALNUM(*t))))
3482 {
abc667d1 3483 /* skip q//-like construct */
f54cb97a 3484 const char *tmps;
b8a4b1be
GS
3485 char open, close, term;
3486 I32 brackets = 1;
3487
3280af22 3488 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3489 t++;
abc667d1
DM
3490 /* check for q => */
3491 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3492 OPERATOR(HASHBRACK);
3493 }
b8a4b1be
GS
3494 term = *t;
3495 open = term;
3496 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3497 term = tmps[5];
3498 close = term;
3499 if (open == close)
3280af22
NIS
3500 for (t++; t < PL_bufend; t++) {
3501 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3502 t++;
6d07e5e9 3503 else if (*t == open)
b8a4b1be
GS
3504 break;
3505 }
abc667d1 3506 else {
3280af22
NIS
3507 for (t++; t < PL_bufend; t++) {
3508 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3509 t++;
6d07e5e9 3510 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3511 break;
3512 else if (*t == open)
3513 brackets++;
3514 }
abc667d1
DM
3515 }
3516 t++;
b8a4b1be 3517 }
abc667d1
DM
3518 else
3519 /* skip plain q word */
3520 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3521 t += UTF8SKIP(t);
a0d0e21e 3522 }
7e2040f0 3523 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3524 t += UTF8SKIP(t);
7e2040f0 3525 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3526 t += UTF8SKIP(t);
a0d0e21e 3527 }
3280af22 3528 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3529 t++;