This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Assert that orig_keyword, gv and gvp aren't used before the main
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
8903cb82 30
acfe0abc 31static void restore_rsfp(pTHX_ void *f);
6e3aabd6 32#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
33static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 35#endif
51371543 36
9059aa12
LW
37#define XFAKEBRACK 128
38#define XENUMMASK 127
39
39e02b42
JH
40#ifdef USE_UTF8_SCRIPTS
41# define UTF (!IN_BYTES)
2b9d42f0 42#else
746b446a 43# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 44#endif
a0ed51b3 45
61f0cdd9 46/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
47 * 1999-02-27 mjd-perl-patch@plover.com */
48#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
49
bf4acbe4
GS
50/* On MacOS, respect nonbreaking spaces */
51#ifdef MACOS_TRADITIONAL
52#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
53#else
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
55#endif
56
ffb4593c
NT
57/* LEX_* are values for PL_lex_state, the state of the lexer.
58 * They are arranged oddly so that the guard on the switch statement
79072805
LW
59 * can get by with a single comparison (if the compiler is smart enough).
60 */
61
fb73857a
PP
62/* #define LEX_NOTPARSING 11 is done in perl.h. */
63
b6007c36
DM
64#define LEX_NORMAL 10 /* normal code (ie not within "...") */
65#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
66#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
67#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
68#define LEX_INTERPSTART 6 /* expecting the start of a $var */
69
70 /* at end of code, eg "$x" followed by: */
71#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
72#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
73
74#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
75 string or after \E, $foo, etc */
76#define LEX_INTERPCONST 2 /* NOT USED */
77#define LEX_FORMLINE 1 /* expecting a format line */
78#define LEX_KNOWNEXT 0 /* next token known; just return it */
79
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 139# define REPORT(retval) tokereport((I32)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
376fcdbf
AL
170#define UNI2(f,x) { \
171 yylval.ival = f; \
172 PL_expect = x; \
173 PL_bufptr = s; \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
176 if (*s == '(') \
177 return REPORT( (int)FUNC1 ); \
178 s = skipspace(s); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
180 }
6f33ba73
RGS
181#define UNI(f) UNI2(f,XTERM)
182#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 183
376fcdbf
AL
184#define UNIBRACK(f) { \
185 yylval.ival = f; \
186 PL_bufptr = s; \
187 PL_last_uni = PL_oldbufptr; \
188 if (*s == '(') \
189 return REPORT( (int)FUNC1 ); \
190 s = skipspace(s); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
192 }
79072805 193
9f68db38 194/* grandfather return to old style */
3280af22 195#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 196
8fa7f367
JH
197#ifdef DEBUGGING
198
bbf60fe6
DM
199/* how to interpret the yylval associated with the token */
200enum token_type {
201 TOKENTYPE_NONE,
202 TOKENTYPE_IVAL,
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
204 TOKENTYPE_PVAL,
205 TOKENTYPE_OPVAL,
206 TOKENTYPE_GVVAL
207};
208
27da23d5
JH
209static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
9041c2e3 211{
bbf60fe6
DM
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 222 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
223 { DO, TOKENTYPE_NONE, "DO" },
224 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
225 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
226 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
227 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
228 { ELSE, TOKENTYPE_NONE, "ELSE" },
229 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
230 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
231 { FOR, TOKENTYPE_IVAL, "FOR" },
232 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
233 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
234 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
235 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
236 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
237 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 238 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
239 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
240 { IF, TOKENTYPE_IVAL, "IF" },
241 { LABEL, TOKENTYPE_PVAL, "LABEL" },
242 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
243 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
244 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
245 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
246 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
247 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
248 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
249 { MY, TOKENTYPE_IVAL, "MY" },
250 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
251 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
252 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
253 { OROP, TOKENTYPE_IVAL, "OROP" },
254 { OROR, TOKENTYPE_NONE, "OROR" },
255 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
256 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
257 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
258 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
259 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
260 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
261 { PREINC, TOKENTYPE_NONE, "PREINC" },
262 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
263 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
264 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
265 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
266 { SUB, TOKENTYPE_NONE, "SUB" },
267 { THING, TOKENTYPE_OPVAL, "THING" },
268 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
269 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
270 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
271 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
272 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
273 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 274 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
275 { WHILE, TOKENTYPE_IVAL, "WHILE" },
276 { WORD, TOKENTYPE_OPVAL, "WORD" },
277 { 0, TOKENTYPE_NONE, 0 }
278};
279
280/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 281
bbf60fe6 282STATIC int
f5bd084c 283S_tokereport(pTHX_ I32 rv)
bbf60fe6
DM
284{
285 if (DEBUG_T_TEST) {
bfed75c6 286 const char *name = Nullch;
bbf60fe6 287 enum token_type type = TOKENTYPE_NONE;
f54cb97a 288 const struct debug_tokens *p;
9d4ba2ae 289 SV* const report = newSVpvn("<== ", 4);
bbf60fe6 290
f54cb97a 291 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
292 if (p->token == (int)rv) {
293 name = p->name;
294 type = p->type;
295 break;
296 }
297 }
298 if (name)
54667de8 299 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
300 else if ((char)rv > ' ' && (char)rv < '~')
301 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
302 else if (!rv)
54667de8 303 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
304 else
305 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
306 switch (type) {
307 case TOKENTYPE_NONE:
308 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
309 break;
310 case TOKENTYPE_IVAL:
e4584336 311 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
312 break;
313 case TOKENTYPE_OPNUM:
314 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
315 PL_op_name[yylval.ival]);
316 break;
317 case TOKENTYPE_PVAL:
318 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
319 break;
320 case TOKENTYPE_OPVAL:
b6007c36 321 if (yylval.opval) {
401441c0 322 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 323 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
324 if (yylval.opval->op_type == OP_CONST) {
325 Perl_sv_catpvf(aTHX_ report, " %s",
326 SvPEEK(cSVOPx_sv(yylval.opval)));
327 }
328
329 }
401441c0
RGS
330 else
331 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
332 break;
333 }
b6007c36 334 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
335 };
336 return (int)rv;
998054bd
SC
337}
338
b6007c36
DM
339
340/* print the buffer with suitable escapes */
341
342STATIC void
343S_printbuf(pTHX_ const char* fmt, const char* s)
344{
c4420975 345 SV* const tmp = newSVpvn("", 0);
b6007c36
DM
346 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
347 SvREFCNT_dec(tmp);
348}
349
8fa7f367
JH
350#endif
351
ffb4593c
NT
352/*
353 * S_ao
354 *
c963b151
BD
355 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
356 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
357 */
358
76e3520e 359STATIC int
cea2e8a9 360S_ao(pTHX_ int toketype)
a0d0e21e 361{
3280af22
NIS
362 if (*PL_bufptr == '=') {
363 PL_bufptr++;
a0d0e21e
LW
364 if (toketype == ANDAND)
365 yylval.ival = OP_ANDASSIGN;
366 else if (toketype == OROR)
367 yylval.ival = OP_ORASSIGN;
c963b151
BD
368 else if (toketype == DORDOR)
369 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
370 toketype = ASSIGNOP;
371 }
372 return toketype;
373}
374
ffb4593c
NT
375/*
376 * S_no_op
377 * When Perl expects an operator and finds something else, no_op
378 * prints the warning. It always prints "<something> found where
379 * operator expected. It prints "Missing semicolon on previous line?"
380 * if the surprise occurs at the start of the line. "do you need to
381 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
382 * where the compiler doesn't know if foo is a method call or a function.
383 * It prints "Missing operator before end of line" if there's nothing
384 * after the missing operator, or "... before <...>" if there is something
385 * after the missing operator.
386 */
387
76e3520e 388STATIC void
bfed75c6 389S_no_op(pTHX_ const char *what, char *s)
463ee0b2 390{
9d4ba2ae
AL
391 char * const oldbp = PL_bufptr;
392 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 393
1189a94a
GS
394 if (!s)
395 s = oldbp;
07c798fb 396 else
1189a94a 397 PL_bufptr = s;
cea2e8a9 398 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
399 if (ckWARN_d(WARN_SYNTAX)) {
400 if (is_first)
401 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
402 "\t(Missing semicolon on previous line?)\n");
403 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 404 const char *t;
56da5a46
RGS
405 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
406 if (t < PL_bufptr && isSPACE(*t))
407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
408 "\t(Do you need to predeclare %.*s?)\n",
551405c4 409 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
410 }
411 else {
412 assert(s >= oldbp);
413 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 414 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 415 }
07c798fb 416 }
3280af22 417 PL_bufptr = oldbp;
8990e307
LW
418}
419
ffb4593c
NT
420/*
421 * S_missingterm
422 * Complain about missing quote/regexp/heredoc terminator.
423 * If it's called with (char *)NULL then it cauterizes the line buffer.
424 * If we're in a delimited string and the delimiter is a control
425 * character, it's reformatted into a two-char sequence like ^C.
426 * This is fatal.
427 */
428
76e3520e 429STATIC void
cea2e8a9 430S_missingterm(pTHX_ char *s)
8990e307
LW
431{
432 char tmpbuf[3];
433 char q;
434 if (s) {
9d4ba2ae 435 char * const nl = strrchr(s,'\n');
d2719217 436 if (nl)
8990e307
LW
437 *nl = '\0';
438 }
9d116dd7
JH
439 else if (
440#ifdef EBCDIC
441 iscntrl(PL_multi_close)
442#else
443 PL_multi_close < 32 || PL_multi_close == 127
444#endif
445 ) {
8990e307 446 *tmpbuf = '^';
585ec06d 447 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
448 tmpbuf[2] = '\0';
449 s = tmpbuf;
450 }
451 else {
eb160463 452 *tmpbuf = (char)PL_multi_close;
8990e307
LW
453 tmpbuf[1] = '\0';
454 s = tmpbuf;
455 }
456 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 457 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 458}
79072805 459
0d863452
RH
460#define FEATURE_IS_ENABLED(name, namelen) \
461 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
462 && feature_is_enabled(name, namelen))
463/*
464 * S_feature_is_enabled
465 * Check whether the named feature is enabled.
466 */
467STATIC bool
468S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
469{
470 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
471 char he_name[32] = "feature_";
472 (void) strncpy(&he_name[8], name, 24);
473
474 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
475}
476
ffb4593c
NT
477/*
478 * Perl_deprecate
ffb4593c
NT
479 */
480
79072805 481void
bfed75c6 482Perl_deprecate(pTHX_ const char *s)
a0d0e21e 483{
599cee73 484 if (ckWARN(WARN_DEPRECATED))
9014280d 485 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
486}
487
12bcd1a6 488void
bfed75c6 489Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
490{
491 /* This function should NOT be called for any new deprecated warnings */
492 /* Use Perl_deprecate instead */
493 /* */
494 /* It is here to maintain backward compatibility with the pre-5.8 */
495 /* warnings category hierarchy. The "deprecated" category used to */
496 /* live under the "syntax" category. It is now a top-level category */
497 /* in its own right. */
498
499 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 500 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
501 "Use of %s is deprecated", s);
502}
503
ffb4593c
NT
504/*
505 * depcom
9cbb5ea2 506 * Deprecate a comma-less variable list.
ffb4593c
NT
507 */
508
76e3520e 509STATIC void
cea2e8a9 510S_depcom(pTHX)
a0d0e21e 511{
12bcd1a6 512 deprecate_old("comma-less variable list");
a0d0e21e
LW
513}
514
ffb4593c 515/*
9cbb5ea2
GS
516 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
517 * utf16-to-utf8-reversed.
ffb4593c
NT
518 */
519
c39cd008
GS
520#ifdef PERL_CR_FILTER
521static void
522strip_return(SV *sv)
523{
95a20fc0 524 register const char *s = SvPVX_const(sv);
9d4ba2ae 525 register const char * const e = s + SvCUR(sv);
c39cd008
GS
526 /* outer loop optimized to do nothing if there are no CR-LFs */
527 while (s < e) {
528 if (*s++ == '\r' && *s == '\n') {
529 /* hit a CR-LF, need to copy the rest */
530 register char *d = s - 1;
531 *d++ = *s++;
532 while (s < e) {
533 if (*s == '\r' && s[1] == '\n')
534 s++;
535 *d++ = *s++;
536 }
537 SvCUR(sv) -= s - d;
538 return;
539 }
540 }
541}
a868473f 542
76e3520e 543STATIC I32
c39cd008 544S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 545{
f54cb97a 546 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
547 if (count > 0 && !maxlen)
548 strip_return(sv);
549 return count;
a868473f
NIS
550}
551#endif
552
ffb4593c
NT
553/*
554 * Perl_lex_start
9cbb5ea2
GS
555 * Initialize variables. Uses the Perl save_stack to save its state (for
556 * recursive calls to the parser).
ffb4593c
NT
557 */
558
a0d0e21e 559void
864dbfa3 560Perl_lex_start(pTHX_ SV *line)
79072805 561{
cfd0369c 562 const char *s;
8990e307
LW
563 STRLEN len;
564
3280af22
NIS
565 SAVEI32(PL_lex_dojoin);
566 SAVEI32(PL_lex_brackets);
3280af22
NIS
567 SAVEI32(PL_lex_casemods);
568 SAVEI32(PL_lex_starts);
569 SAVEI32(PL_lex_state);
7766f137 570 SAVEVPTR(PL_lex_inpat);
3280af22 571 SAVEI32(PL_lex_inwhat);
18b09519
GS
572 if (PL_lex_state == LEX_KNOWNEXT) {
573 I32 toke = PL_nexttoke;
574 while (--toke >= 0) {
575 SAVEI32(PL_nexttype[toke]);
576 SAVEVPTR(PL_nextval[toke]);
577 }
578 SAVEI32(PL_nexttoke);
18b09519 579 }
57843af0 580 SAVECOPLINE(PL_curcop);
3280af22
NIS
581 SAVEPPTR(PL_bufptr);
582 SAVEPPTR(PL_bufend);
583 SAVEPPTR(PL_oldbufptr);
584 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
585 SAVEPPTR(PL_last_lop);
586 SAVEPPTR(PL_last_uni);
3280af22
NIS
587 SAVEPPTR(PL_linestart);
588 SAVESPTR(PL_linestr);
8edd5f42
RGS
589 SAVEGENERICPV(PL_lex_brackstack);
590 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 591 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
592 SAVESPTR(PL_lex_stuff);
593 SAVEI32(PL_lex_defer);
09bef843 594 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 595 SAVESPTR(PL_lex_repl);
bebdddfc
GS
596 SAVEINT(PL_expect);
597 SAVEINT(PL_lex_expect);
3280af22
NIS
598
599 PL_lex_state = LEX_NORMAL;
600 PL_lex_defer = 0;
601 PL_expect = XSTATE;
602 PL_lex_brackets = 0;
a02a5408
JC
603 Newx(PL_lex_brackstack, 120, char);
604 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
605 PL_lex_casemods = 0;
606 *PL_lex_casestack = '\0';
607 PL_lex_dojoin = 0;
608 PL_lex_starts = 0;
609 PL_lex_stuff = Nullsv;
610 PL_lex_repl = Nullsv;
611 PL_lex_inpat = 0;
76be56bc 612 PL_nexttoke = 0;
3280af22 613 PL_lex_inwhat = 0;
09bef843 614 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
615 PL_linestr = line;
616 if (SvREADONLY(PL_linestr))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 618 s = SvPV_const(PL_linestr, len);
6f27f9a7 619 if (!len || s[len-1] != ';') {
3280af22
NIS
620 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
621 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
622 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 623 }
3280af22
NIS
624 SvTEMP_off(PL_linestr);
625 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
626 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 627 PL_last_lop = PL_last_uni = Nullch;
3280af22 628 PL_rsfp = 0;
79072805 629}
a687059c 630
ffb4593c
NT
631/*
632 * Perl_lex_end
9cbb5ea2
GS
633 * Finalizer for lexing operations. Must be called when the parser is
634 * done with the lexer.
ffb4593c
NT
635 */
636
463ee0b2 637void
864dbfa3 638Perl_lex_end(pTHX)
463ee0b2 639{
3280af22 640 PL_doextract = FALSE;
463ee0b2
LW
641}
642
ffb4593c
NT
643/*
644 * S_incline
645 * This subroutine has nothing to do with tilting, whether at windmills
646 * or pinball tables. Its name is short for "increment line". It
57843af0 647 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 648 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
649 * # line 500 "foo.pm"
650 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
651 */
652
76e3520e 653STATIC void
cea2e8a9 654S_incline(pTHX_ char *s)
463ee0b2
LW
655{
656 char *t;
657 char *n;
73659bf1 658 char *e;
463ee0b2 659 char ch;
463ee0b2 660
57843af0 661 CopLINE_inc(PL_curcop);
463ee0b2
LW
662 if (*s++ != '#')
663 return;
bf4acbe4 664 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
665 if (strnEQ(s, "line", 4))
666 s += 4;
667 else
668 return;
084592ab 669 if (SPACE_OR_TAB(*s))
73659bf1 670 s++;
4e553d73 671 else
73659bf1 672 return;
bf4acbe4 673 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
674 if (!isDIGIT(*s))
675 return;
676 n = s;
677 while (isDIGIT(*s))
678 s++;
bf4acbe4 679 while (SPACE_OR_TAB(*s))
463ee0b2 680 s++;
73659bf1 681 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 682 s++;
73659bf1
GS
683 e = t + 1;
684 }
463ee0b2 685 else {
463ee0b2 686 for (t = s; !isSPACE(*t); t++) ;
73659bf1 687 e = t;
463ee0b2 688 }
bf4acbe4 689 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
690 e++;
691 if (*e != '\n' && *e != '\0')
692 return; /* false alarm */
693
463ee0b2
LW
694 ch = *t;
695 *t = '\0';
f4dd75d9 696 if (t - s > 0) {
8a5ee598 697#ifndef USE_ITHREADS
c4420975 698 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
699 STRLEN tmplen = cf ? strlen(cf) : 0;
700 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
701 /* must copy *{"::_<(eval N)[oldfilename:L]"}
702 * to *{"::_<newfilename"} */
703 char smallbuf[256], smallbuf2[256];
704 char *tmpbuf, *tmpbuf2;
8a5ee598 705 GV **gvp, *gv2;
e66cf94c
RGS
706 STRLEN tmplen2 = strlen(s);
707 if (tmplen + 3 < sizeof smallbuf)
708 tmpbuf = smallbuf;
709 else
710 Newx(tmpbuf, tmplen + 3, char);
711 if (tmplen2 + 3 < sizeof smallbuf2)
712 tmpbuf2 = smallbuf2;
713 else
714 Newx(tmpbuf2, tmplen2 + 3, char);
715 tmpbuf[0] = tmpbuf2[0] = '_';
716 tmpbuf[1] = tmpbuf2[1] = '<';
717 memcpy(tmpbuf + 2, cf, ++tmplen);
718 memcpy(tmpbuf2 + 2, s, ++tmplen2);
719 ++tmplen; ++tmplen2;
8a5ee598
RGS
720 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
721 if (gvp) {
722 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
723 if (!isGV(gv2))
724 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
725 /* adjust ${"::_<newfilename"} to store the new file name */
726 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
727 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
728 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
729 }
e66cf94c
RGS
730 if (tmpbuf != smallbuf) Safefree(tmpbuf);
731 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
732 }
8a5ee598 733#endif
05ec9bb3 734 CopFILE_free(PL_curcop);
57843af0 735 CopFILE_set(PL_curcop, s);
f4dd75d9 736 }
463ee0b2 737 *t = ch;
57843af0 738 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
739}
740
ffb4593c
NT
741/*
742 * S_skipspace
743 * Called to gobble the appropriate amount and type of whitespace.
744 * Skips comments as well.
745 */
746
76e3520e 747STATIC char *
cea2e8a9 748S_skipspace(pTHX_ register char *s)
a687059c 749{
3280af22 750 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 751 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
752 s++;
753 return s;
754 }
755 for (;;) {
fd049845 756 STRLEN prevlen;
09bef843 757 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 758 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
759 while (s < PL_bufend && isSPACE(*s)) {
760 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
761 incline(s);
762 }
ffb4593c
NT
763
764 /* comment */
3280af22
NIS
765 if (s < PL_bufend && *s == '#') {
766 while (s < PL_bufend && *s != '\n')
463ee0b2 767 s++;
60e6418e 768 if (s < PL_bufend) {
463ee0b2 769 s++;
60e6418e
GS
770 if (PL_in_eval && !PL_rsfp) {
771 incline(s);
772 continue;
773 }
774 }
463ee0b2 775 }
ffb4593c
NT
776
777 /* only continue to recharge the buffer if we're at the end
778 * of the buffer, we're not reading from a source filter, and
779 * we're in normal lexing mode
780 */
09bef843
SB
781 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
782 PL_lex_state == LEX_FORMLINE)
463ee0b2 783 return s;
ffb4593c
NT
784
785 /* try to recharge the buffer */
9cbb5ea2
GS
786 if ((s = filter_gets(PL_linestr, PL_rsfp,
787 (prevlen = SvCUR(PL_linestr)))) == Nullch)
788 {
789 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
790 if (PL_minus_p) {
791 sv_setpv(PL_linestr,
792 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 793 PL_minus_n = PL_minus_p = 0;
a0d0e21e 794 }
01a19ab0
NC
795 else if (PL_minus_n) {
796 sv_setpvn(PL_linestr, ";}", 2);
797 PL_minus_n = 0;
798 }
a0d0e21e 799 else
4147a61b 800 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
801
802 /* reset variables for next time we lex */
9cbb5ea2
GS
803 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
804 = SvPVX(PL_linestr);
3280af22 805 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 806 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
807
808 /* Close the filehandle. Could be from -P preprocessor,
809 * STDIN, or a regular file. If we were reading code from
810 * STDIN (because the commandline held no -e or filename)
811 * then we don't close it, we reset it so the code can
812 * read from STDIN too.
813 */
814
3280af22
NIS
815 if (PL_preprocess && !PL_in_eval)
816 (void)PerlProc_pclose(PL_rsfp);
817 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
818 PerlIO_clearerr(PL_rsfp);
8990e307 819 else
3280af22
NIS
820 (void)PerlIO_close(PL_rsfp);
821 PL_rsfp = Nullfp;
463ee0b2
LW
822 return s;
823 }
ffb4593c
NT
824
825 /* not at end of file, so we only read another line */
09bef843
SB
826 /* make corresponding updates to old pointers, for yyerror() */
827 oldprevlen = PL_oldbufptr - PL_bufend;
828 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
829 if (PL_last_uni)
830 oldunilen = PL_last_uni - PL_bufend;
831 if (PL_last_lop)
832 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
833 PL_linestart = PL_bufptr = s + prevlen;
834 PL_bufend = s + SvCUR(PL_linestr);
835 s = PL_bufptr;
09bef843
SB
836 PL_oldbufptr = s + oldprevlen;
837 PL_oldoldbufptr = s + oldoldprevlen;
838 if (PL_last_uni)
839 PL_last_uni = s + oldunilen;
840 if (PL_last_lop)
841 PL_last_lop = s + oldloplen;
a0d0e21e 842 incline(s);
ffb4593c
NT
843
844 /* debugger active and we're not compiling the debugger code,
845 * so store the line into the debugger's array of lines
846 */
3280af22 847 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 848 SV * const sv = NEWSV(85,0);
8990e307
LW
849
850 sv_upgrade(sv, SVt_PVMG);
3280af22 851 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 852 (void)SvIOK_on(sv);
45977657 853 SvIV_set(sv, 0);
57843af0 854 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 855 }
463ee0b2 856 }
a687059c 857}
378cc40b 858
ffb4593c
NT
859/*
860 * S_check_uni
861 * Check the unary operators to ensure there's no ambiguity in how they're
862 * used. An ambiguous piece of code would be:
863 * rand + 5
864 * This doesn't mean rand() + 5. Because rand() is a unary operator,
865 * the +5 is its argument.
866 */
867
76e3520e 868STATIC void
cea2e8a9 869S_check_uni(pTHX)
ba106d47 870{
2f3197b3 871 char *s;
a0d0e21e 872 char *t;
2f3197b3 873
3280af22 874 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 875 return;
3280af22
NIS
876 while (isSPACE(*PL_last_uni))
877 PL_last_uni++;
7e2040f0 878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 879 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 880 return;
0453d815 881 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 882 const char ch = *s;
0453d815 883 *s = '\0';
9014280d 884 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 885 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
886 PL_last_uni);
887 *s = ch;
888 }
2f3197b3
LW
889}
890
ffb4593c
NT
891/*
892 * LOP : macro to build a list operator. Its behaviour has been replaced
893 * with a subroutine, S_lop() for which LOP is just another name.
894 */
895
a0d0e21e
LW
896#define LOP(f,x) return lop(f,x,s)
897
ffb4593c
NT
898/*
899 * S_lop
900 * Build a list operator (or something that might be one). The rules:
901 * - if we have a next token, then it's a list operator [why?]
902 * - if the next thing is an opening paren, then it's a function
903 * - else it's a list operator
904 */
905
76e3520e 906STATIC I32
a0be28da 907S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 908{
79072805 909 yylval.ival = f;
35c8bce7 910 CLINE;
3280af22
NIS
911 PL_expect = x;
912 PL_bufptr = s;
913 PL_last_lop = PL_oldbufptr;
eb160463 914 PL_last_lop_op = (OPCODE)f;
3280af22 915 if (PL_nexttoke)
bbf60fe6 916 return REPORT(LSTOP);
79072805 917 if (*s == '(')
bbf60fe6 918 return REPORT(FUNC);
79072805
LW
919 s = skipspace(s);
920 if (*s == '(')
bbf60fe6 921 return REPORT(FUNC);
79072805 922 else
bbf60fe6 923 return REPORT(LSTOP);
79072805
LW
924}
925
ffb4593c
NT
926/*
927 * S_force_next
9cbb5ea2 928 * When the lexer realizes it knows the next token (for instance,
ffb4593c 929 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
930 * to know what token to return the next time the lexer is called. Caller
931 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
932 * handles the token correctly.
ffb4593c
NT
933 */
934
4e553d73 935STATIC void
cea2e8a9 936S_force_next(pTHX_ I32 type)
79072805 937{
3280af22
NIS
938 PL_nexttype[PL_nexttoke] = type;
939 PL_nexttoke++;
940 if (PL_lex_state != LEX_KNOWNEXT) {
941 PL_lex_defer = PL_lex_state;
942 PL_lex_expect = PL_expect;
943 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
944 }
945}
946
d0a148a6
NC
947STATIC SV *
948S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
949{
9d4ba2ae 950 SV * const sv = newSVpvn(start,len);
bfed75c6 951 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
952 SvUTF8_on(sv);
953 return sv;
954}
955
ffb4593c
NT
956/*
957 * S_force_word
958 * When the lexer knows the next thing is a word (for instance, it has
959 * just seen -> and it knows that the next char is a word char, then
960 * it calls S_force_word to stick the next word into the PL_next lookahead.
961 *
962 * Arguments:
b1b65b59 963 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
964 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
965 * int check_keyword : if true, Perl checks to make sure the word isn't
966 * a keyword (do this if the word is a label, e.g. goto FOO)
967 * int allow_pack : if true, : characters will also be allowed (require,
968 * use, etc. do this)
9cbb5ea2 969 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
970 */
971
76e3520e 972STATIC char *
cea2e8a9 973S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 974{
463ee0b2
LW
975 register char *s;
976 STRLEN len;
4e553d73 977
463ee0b2
LW
978 start = skipspace(start);
979 s = start;
7e2040f0 980 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 981 (allow_pack && *s == ':') ||
15f0808c 982 (allow_initial_tick && *s == '\'') )
a0d0e21e 983 {
3280af22
NIS
984 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
985 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
986 return start;
987 if (token == METHOD) {
988 s = skipspace(s);
989 if (*s == '(')
3280af22 990 PL_expect = XTERM;
463ee0b2 991 else {
3280af22 992 PL_expect = XOPERATOR;
463ee0b2 993 }
79072805 994 }
d0a148a6
NC
995 PL_nextval[PL_nexttoke].opval
996 = (OP*)newSVOP(OP_CONST,0,
997 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 998 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
999 force_next(token);
1000 }
1001 return s;
1002}
1003
ffb4593c
NT
1004/*
1005 * S_force_ident
9cbb5ea2 1006 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1007 * text only contains the "foo" portion. The first argument is a pointer
1008 * to the "foo", and the second argument is the type symbol to prefix.
1009 * Forces the next token to be a "WORD".
9cbb5ea2 1010 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1011 */
1012
76e3520e 1013STATIC void
bfed75c6 1014S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
1015{
1016 if (s && *s) {
bfed75c6 1017 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 1018 PL_nextval[PL_nexttoke].opval = o;
79072805 1019 force_next(WORD);
748a9306 1020 if (kind) {
11343788 1021 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1022 /* XXX see note in pp_entereval() for why we forgo typo
1023 warnings if the symbol must be introduced in an eval.
1024 GSAR 96-10-12 */
3280af22 1025 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
1026 kind == '$' ? SVt_PV :
1027 kind == '@' ? SVt_PVAV :
1028 kind == '%' ? SVt_PVHV :
1029 SVt_PVGV
1030 );
748a9306 1031 }
79072805
LW
1032 }
1033}
1034
1571675a
GS
1035NV
1036Perl_str_to_version(pTHX_ SV *sv)
1037{
1038 NV retval = 0.0;
1039 NV nshift = 1.0;
1040 STRLEN len;
cfd0369c 1041 const char *start = SvPV_const(sv,len);
9d4ba2ae 1042 const char * const end = start + len;
504618e9 1043 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1044 while (start < end) {
ba210ebe 1045 STRLEN skip;
1571675a
GS
1046 UV n;
1047 if (utf)
9041c2e3 1048 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1049 else {
1050 n = *(U8*)start;
1051 skip = 1;
1052 }
1053 retval += ((NV)n)/nshift;
1054 start += skip;
1055 nshift *= 1000;
1056 }
1057 return retval;
1058}
1059
4e553d73 1060/*
ffb4593c
NT
1061 * S_force_version
1062 * Forces the next token to be a version number.
e759cc13
RGS
1063 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1064 * and if "guessing" is TRUE, then no new token is created (and the caller
1065 * must use an alternative parsing method).
ffb4593c
NT
1066 */
1067
76e3520e 1068STATIC char *
e759cc13 1069S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
1070{
1071 OP *version = Nullop;
44dcb63b 1072 char *d;
89bfa8cd
PP
1073
1074 s = skipspace(s);
1075
44dcb63b 1076 d = s;
dd629d5b 1077 if (*d == 'v')
44dcb63b 1078 d++;
44dcb63b 1079 if (isDIGIT(*d)) {
e759cc13
RGS
1080 while (isDIGIT(*d) || *d == '_' || *d == '.')
1081 d++;
9f3d182e 1082 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1083 SV *ver;
b73d6f50 1084 s = scan_num(s, &yylval);
89bfa8cd 1085 version = yylval.opval;
dd629d5b
GS
1086 ver = cSVOPx(version)->op_sv;
1087 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1088 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1089 SvNV_set(ver, str_to_version(ver));
1571675a 1090 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1091 }
89bfa8cd 1092 }
e759cc13
RGS
1093 else if (guessing)
1094 return s;
89bfa8cd
PP
1095 }
1096
1097 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1098 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1099 force_next(WORD);
89bfa8cd 1100
e759cc13 1101 return s;
89bfa8cd
PP
1102}
1103
ffb4593c
NT
1104/*
1105 * S_tokeq
1106 * Tokenize a quoted string passed in as an SV. It finds the next
1107 * chunk, up to end of string or a backslash. It may make a new
1108 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1109 * turns \\ into \.
1110 */
1111
76e3520e 1112STATIC SV *
cea2e8a9 1113S_tokeq(pTHX_ SV *sv)
79072805
LW
1114{
1115 register char *s;
1116 register char *send;
1117 register char *d;
b3ac6de7
IZ
1118 STRLEN len = 0;
1119 SV *pv = sv;
79072805
LW
1120
1121 if (!SvLEN(sv))
b3ac6de7 1122 goto finish;
79072805 1123
a0d0e21e 1124 s = SvPV_force(sv, len);
21a311ee 1125 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1126 goto finish;
463ee0b2 1127 send = s + len;
79072805
LW
1128 while (s < send && *s != '\\')
1129 s++;
1130 if (s == send)
b3ac6de7 1131 goto finish;
79072805 1132 d = s;
be4731d2 1133 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1134 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1135 if (SvUTF8(sv))
1136 SvUTF8_on(pv);
1137 }
79072805
LW
1138 while (s < send) {
1139 if (*s == '\\') {
a0d0e21e 1140 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1141 s++; /* all that, just for this */
1142 }
1143 *d++ = *s++;
1144 }
1145 *d = '\0';
95a20fc0 1146 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1147 finish:
3280af22 1148 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1149 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1150 return sv;
1151}
1152
ffb4593c
NT
1153/*
1154 * Now come three functions related to double-quote context,
1155 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1156 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1157 * interact with PL_lex_state, and create fake ( ... ) argument lists
1158 * to handle functions and concatenation.
1159 * They assume that whoever calls them will be setting up a fake
1160 * join call, because each subthing puts a ',' after it. This lets
1161 * "lower \luPpEr"
1162 * become
1163 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1164 *
1165 * (I'm not sure whether the spurious commas at the end of lcfirst's
1166 * arguments and join's arguments are created or not).
1167 */
1168
1169/*
1170 * S_sublex_start
1171 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1172 *
1173 * Pattern matching will set PL_lex_op to the pattern-matching op to
1174 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1175 *
1176 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1177 *
1178 * Everything else becomes a FUNC.
1179 *
1180 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1181 * had an OP_CONST or OP_READLINE). This just sets us up for a
1182 * call to S_sublex_push().
1183 */
1184
76e3520e 1185STATIC I32
cea2e8a9 1186S_sublex_start(pTHX)
79072805 1187{
0d46e09a 1188 register const I32 op_type = yylval.ival;
79072805
LW
1189
1190 if (op_type == OP_NULL) {
3280af22
NIS
1191 yylval.opval = PL_lex_op;
1192 PL_lex_op = Nullop;
79072805
LW
1193 return THING;
1194 }
1195 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1196 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1197
1198 if (SvTYPE(sv) == SVt_PVIV) {
1199 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1200 STRLEN len;
cfd0369c 1201 const char *p = SvPV_const(sv, len);
f54cb97a 1202 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1203 if (SvUTF8(sv))
1204 SvUTF8_on(nsv);
b3ac6de7
IZ
1205 SvREFCNT_dec(sv);
1206 sv = nsv;
4e553d73 1207 }
b3ac6de7 1208 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1209 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1210 /* Allow <FH> // "foo" */
1211 if (op_type == OP_READLINE)
1212 PL_expect = XTERMORDORDOR;
79072805
LW
1213 return THING;
1214 }
1215
3280af22
NIS
1216 PL_sublex_info.super_state = PL_lex_state;
1217 PL_sublex_info.sub_inwhat = op_type;
1218 PL_sublex_info.sub_op = PL_lex_op;
1219 PL_lex_state = LEX_INTERPPUSH;
55497cff 1220
3280af22
NIS
1221 PL_expect = XTERM;
1222 if (PL_lex_op) {
1223 yylval.opval = PL_lex_op;
1224 PL_lex_op = Nullop;
55497cff
PP
1225 return PMFUNC;
1226 }
1227 else
1228 return FUNC;
1229}
1230
ffb4593c
NT
1231/*
1232 * S_sublex_push
1233 * Create a new scope to save the lexing state. The scope will be
1234 * ended in S_sublex_done. Returns a '(', starting the function arguments
1235 * to the uc, lc, etc. found before.
1236 * Sets PL_lex_state to LEX_INTERPCONCAT.
1237 */
1238
76e3520e 1239STATIC I32
cea2e8a9 1240S_sublex_push(pTHX)
55497cff 1241{
27da23d5 1242 dVAR;
f46d017c 1243 ENTER;
55497cff 1244
3280af22
NIS
1245 PL_lex_state = PL_sublex_info.super_state;
1246 SAVEI32(PL_lex_dojoin);
1247 SAVEI32(PL_lex_brackets);
3280af22
NIS
1248 SAVEI32(PL_lex_casemods);
1249 SAVEI32(PL_lex_starts);
1250 SAVEI32(PL_lex_state);
7766f137 1251 SAVEVPTR(PL_lex_inpat);
3280af22 1252 SAVEI32(PL_lex_inwhat);
57843af0 1253 SAVECOPLINE(PL_curcop);
3280af22 1254 SAVEPPTR(PL_bufptr);
8452ff4b 1255 SAVEPPTR(PL_bufend);
3280af22
NIS
1256 SAVEPPTR(PL_oldbufptr);
1257 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1258 SAVEPPTR(PL_last_lop);
1259 SAVEPPTR(PL_last_uni);
3280af22
NIS
1260 SAVEPPTR(PL_linestart);
1261 SAVESPTR(PL_linestr);
8edd5f42
RGS
1262 SAVEGENERICPV(PL_lex_brackstack);
1263 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1264
1265 PL_linestr = PL_lex_stuff;
1266 PL_lex_stuff = Nullsv;
1267
9cbb5ea2
GS
1268 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1269 = SvPVX(PL_linestr);
3280af22 1270 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1271 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1272 SAVEFREESV(PL_linestr);
1273
1274 PL_lex_dojoin = FALSE;
1275 PL_lex_brackets = 0;
a02a5408
JC
1276 Newx(PL_lex_brackstack, 120, char);
1277 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1278 PL_lex_casemods = 0;
1279 *PL_lex_casestack = '\0';
1280 PL_lex_starts = 0;
1281 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1282 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1283
1284 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1285 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1286 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1287 else
3280af22 1288 PL_lex_inpat = Nullop;
79072805 1289
55497cff 1290 return '(';
79072805
LW
1291}
1292
ffb4593c
NT
1293/*
1294 * S_sublex_done
1295 * Restores lexer state after a S_sublex_push.
1296 */
1297
76e3520e 1298STATIC I32
cea2e8a9 1299S_sublex_done(pTHX)
79072805 1300{
27da23d5 1301 dVAR;
3280af22 1302 if (!PL_lex_starts++) {
9d4ba2ae 1303 SV * const sv = newSVpvn("",0);
9aa983d2
JH
1304 if (SvUTF8(PL_linestr))
1305 SvUTF8_on(sv);
3280af22 1306 PL_expect = XOPERATOR;
9aa983d2 1307 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1308 return THING;
1309 }
1310
3280af22
NIS
1311 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1312 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1313 return yylex();
79072805
LW
1314 }
1315
ffb4593c 1316 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1317 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1318 PL_linestr = PL_lex_repl;
1319 PL_lex_inpat = 0;
1320 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1321 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1322 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1323 SAVEFREESV(PL_linestr);
1324 PL_lex_dojoin = FALSE;
1325 PL_lex_brackets = 0;
3280af22
NIS
1326 PL_lex_casemods = 0;
1327 *PL_lex_casestack = '\0';
1328 PL_lex_starts = 0;
25da4f38 1329 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1330 PL_lex_state = LEX_INTERPNORMAL;
1331 PL_lex_starts++;
e9fa98b2
HS
1332 /* we don't clear PL_lex_repl here, so that we can check later
1333 whether this is an evalled subst; that means we rely on the
1334 logic to ensure sublex_done() is called again only via the
1335 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1336 }
e9fa98b2 1337 else {
3280af22 1338 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1339 PL_lex_repl = Nullsv;
1340 }
79072805 1341 return ',';
ffed7fef
LW
1342 }
1343 else {
f46d017c 1344 LEAVE;
3280af22
NIS
1345 PL_bufend = SvPVX(PL_linestr);
1346 PL_bufend += SvCUR(PL_linestr);
1347 PL_expect = XOPERATOR;
09bef843 1348 PL_sublex_info.sub_inwhat = 0;
79072805 1349 return ')';
ffed7fef
LW
1350 }
1351}
1352
02aa26ce
NT
1353/*
1354 scan_const
1355
1356 Extracts a pattern, double-quoted string, or transliteration. This
1357 is terrifying code.
1358
3280af22
NIS
1359 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1360 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1361 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1362
9b599b2a
GS
1363 Returns a pointer to the character scanned up to. Iff this is
1364 advanced from the start pointer supplied (ie if anything was
1365 successfully parsed), will leave an OP for the substring scanned
1366 in yylval. Caller must intuit reason for not parsing further
1367 by looking at the next characters herself.
1368
02aa26ce
NT
1369 In patterns:
1370 backslashes:
1371 double-quoted style: \r and \n
1372 regexp special ones: \D \s
1373 constants: \x3
1374 backrefs: \1 (deprecated in substitution replacements)
1375 case and quoting: \U \Q \E
1376 stops on @ and $, but not for $ as tail anchor
1377
1378 In transliterations:
1379 characters are VERY literal, except for - not at the start or end
1380 of the string, which indicates a range. scan_const expands the
1381 range to the full set of intermediate characters.
1382
1383 In double-quoted strings:
1384 backslashes:
1385 double-quoted style: \r and \n
1386 constants: \x3
1387 backrefs: \1 (deprecated)
1388 case and quoting: \U \Q \E
1389 stops on @ and $
1390
1391 scan_const does *not* construct ops to handle interpolated strings.
1392 It stops processing as soon as it finds an embedded $ or @ variable
1393 and leaves it to the caller to work out what's going on.
1394
da6eedaa 1395 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1396
1397 $ in pattern could be $foo or could be tail anchor. Assumption:
1398 it's a tail anchor if $ is the last thing in the string, or if it's
1399 followed by one of ")| \n\t"
1400
1401 \1 (backreferences) are turned into $1
1402
1403 The structure of the code is
1404 while (there's a character to process) {
1405 handle transliteration ranges
1406 skip regexp comments
1407 skip # initiated comments in //x patterns
1408 check for embedded @foo
1409 check for embedded scalars
1410 if (backslash) {
1411 leave intact backslashes from leave (below)
1412 deprecate \1 in strings and sub replacements
1413 handle string-changing backslashes \l \U \Q \E, etc.
1414 switch (what was escaped) {
1415 handle - in a transliteration (becomes a literal -)
1416 handle \132 octal characters
1417 handle 0x15 hex characters
1418 handle \cV (control V)
1419 handle printf backslashes (\f, \r, \n, etc)
1420 } (end switch)
1421 } (end if backslash)
1422 } (end while character to read)
4e553d73 1423
02aa26ce
NT
1424*/
1425
76e3520e 1426STATIC char *
cea2e8a9 1427S_scan_const(pTHX_ char *start)
79072805 1428{
3280af22 1429 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1430 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1431 register char *s = start; /* start of the constant */
1432 register char *d = SvPVX(sv); /* destination for copies */
1433 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1434 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1435 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1436 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1437 UV uv;
4c3a8340
ST
1438#ifdef EBCDIC
1439 UV literal_endpoint = 0;
1440#endif
012bcf8d 1441
dff6d3cd 1442 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1443 PL_lex_inpat
b6d5fef8 1444 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1445 : "";
79072805 1446
2b9d42f0
NIS
1447 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1448 /* If we are doing a trans and we know we want UTF8 set expectation */
1449 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1450 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1451 }
1452
1453
79072805 1454 while (s < send || dorange) {
02aa26ce 1455 /* get transliterations out of the way (they're most literal) */
3280af22 1456 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1457 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1458 if (dorange) {
1ba5c669
JH
1459 I32 i; /* current expanded character */
1460 I32 min; /* first character in range */
1461 I32 max; /* last character in range */
02aa26ce 1462
2b9d42f0 1463 if (has_utf8) {
9d4ba2ae 1464 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1465 char *e = d++;
1466 while (e-- > c)
1467 *(e + 1) = *e;
25716404 1468 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1469 /* mark the range as done, and continue */
1470 dorange = FALSE;
1471 didrange = TRUE;
1472 continue;
1473 }
2b9d42f0 1474
95a20fc0 1475 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1476 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1477 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1478 d -= 2; /* eat the first char and the - */
1479
8ada0baa
JH
1480 min = (U8)*d; /* first char in range */
1481 max = (U8)d[1]; /* last char in range */
1482
c2e66d9e 1483 if (min > max) {
01ec43d0 1484 Perl_croak(aTHX_
d1573ac7 1485 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1486 (char)min, (char)max);
c2e66d9e
GS
1487 }
1488
c7f1f016 1489#ifdef EBCDIC
4c3a8340
ST
1490 if (literal_endpoint == 2 &&
1491 ((isLOWER(min) && isLOWER(max)) ||
1492 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1493 if (isLOWER(min)) {
1494 for (i = min; i <= max; i++)
1495 if (isLOWER(i))
db42d148 1496 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1497 } else {
1498 for (i = min; i <= max; i++)
1499 if (isUPPER(i))
db42d148 1500 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1501 }
1502 }
1503 else
1504#endif
1505 for (i = min; i <= max; i++)
eb160463 1506 *d++ = (char)i;
02aa26ce
NT
1507
1508 /* mark the range as done, and continue */
79072805 1509 dorange = FALSE;
01ec43d0 1510 didrange = TRUE;
4c3a8340
ST
1511#ifdef EBCDIC
1512 literal_endpoint = 0;
1513#endif
79072805 1514 continue;
4e553d73 1515 }
02aa26ce
NT
1516
1517 /* range begins (ignore - as first or last char) */
79072805 1518 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1519 if (didrange) {
1fafa243 1520 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1521 }
2b9d42f0 1522 if (has_utf8) {
25716404 1523 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1524 s++;
1525 continue;
1526 }
79072805
LW
1527 dorange = TRUE;
1528 s++;
01ec43d0
GS
1529 }
1530 else {
1531 didrange = FALSE;
4c3a8340
ST
1532#ifdef EBCDIC
1533 literal_endpoint = 0;
1534#endif
01ec43d0 1535 }
79072805 1536 }
02aa26ce
NT
1537
1538 /* if we get here, we're not doing a transliteration */
1539
0f5d15d6
IZ
1540 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1541 except for the last char, which will be done separately. */
3280af22 1542 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1543 if (s[2] == '#') {
e994fd66 1544 while (s+1 < send && *s != ')')
db42d148 1545 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1546 }
1547 else if (s[2] == '{' /* This should match regcomp.c */
1548 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1549 {
cc6b7395 1550 I32 count = 1;
0f5d15d6 1551 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1552 char c;
1553
d9f97599
GS
1554 while (count && (c = *regparse)) {
1555 if (c == '\\' && regparse[1])
1556 regparse++;
4e553d73 1557 else if (c == '{')
cc6b7395 1558 count++;
4e553d73 1559 else if (c == '}')
cc6b7395 1560 count--;
d9f97599 1561 regparse++;
cc6b7395 1562 }
e994fd66 1563 if (*regparse != ')')
5bdf89e7 1564 regparse--; /* Leave one char for continuation. */
0f5d15d6 1565 while (s < regparse)
db42d148 1566 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1567 }
748a9306 1568 }
02aa26ce
NT
1569
1570 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1571 else if (*s == '#' && PL_lex_inpat &&
1572 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1573 while (s+1 < send && *s != '\n')
db42d148 1574 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1575 }
02aa26ce 1576
5d1d4326 1577 /* check for embedded arrays
da6eedaa 1578 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1579 */
7e2040f0 1580 else if (*s == '@' && s[1]
5d1d4326 1581 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1582 break;
02aa26ce
NT
1583
1584 /* check for embedded scalars. only stop if we're sure it's a
1585 variable.
1586 */
79072805 1587 else if (*s == '$') {
3280af22 1588 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1589 break;
6002328a 1590 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1591 break; /* in regexp, $ might be tail anchor */
1592 }
02aa26ce 1593
2b9d42f0
NIS
1594 /* End of else if chain - OP_TRANS rejoin rest */
1595
02aa26ce 1596 /* backslashes */
79072805
LW
1597 if (*s == '\\' && s+1 < send) {
1598 s++;
02aa26ce
NT
1599
1600 /* some backslashes we leave behind */
c9f97d15 1601 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1602 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1603 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1604 continue;
1605 }
02aa26ce
NT
1606
1607 /* deprecate \1 in strings and substitution replacements */
3280af22 1608 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1609 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1610 {
599cee73 1611 if (ckWARN(WARN_SYNTAX))
9014280d 1612 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1613 *--s = '$';
1614 break;
1615 }
02aa26ce
NT
1616
1617 /* string-change backslash escapes */
3280af22 1618 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1619 --s;
1620 break;
1621 }
02aa26ce
NT
1622
1623 /* if we get here, it's either a quoted -, or a digit */
79072805 1624 switch (*s) {
02aa26ce
NT
1625
1626 /* quoted - in transliterations */
79072805 1627 case '-':
3280af22 1628 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1629 *d++ = *s++;
1630 continue;
1631 }
1632 /* FALL THROUGH */
1633 default:
11b8faa4 1634 {
041457d9
DM
1635 if (isALNUM(*s) &&
1636 *s != '_' &&
1637 ckWARN(WARN_MISC))
9014280d 1638 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1639 "Unrecognized escape \\%c passed through",
1640 *s);
1641 /* default action is to copy the quoted character */
f9a63242 1642 goto default_action;
11b8faa4 1643 }
02aa26ce
NT
1644
1645 /* \132 indicates an octal constant */
79072805
LW
1646 case '0': case '1': case '2': case '3':
1647 case '4': case '5': case '6': case '7':
ba210ebe 1648 {
53305cf1
NC
1649 I32 flags = 0;
1650 STRLEN len = 3;
1651 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1652 s += len;
1653 }
012bcf8d 1654 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1655
1656 /* \x24 indicates a hex constant */
79072805 1657 case 'x':
a0ed51b3
LW
1658 ++s;
1659 if (*s == '{') {
9d4ba2ae 1660 char* const e = strchr(s, '}');
a4c04bdc
NC
1661 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1662 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1663 STRLEN len;
355860ce 1664
53305cf1 1665 ++s;
adaeee49 1666 if (!e) {
a0ed51b3 1667 yyerror("Missing right brace on \\x{}");
355860ce 1668 continue;
ba210ebe 1669 }
53305cf1
NC
1670 len = e - s;
1671 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1672 s = e + 1;
a0ed51b3
LW
1673 }
1674 else {
ba210ebe 1675 {
53305cf1 1676 STRLEN len = 2;
a4c04bdc 1677 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1678 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1679 s += len;
1680 }
012bcf8d
GS
1681 }
1682
1683 NUM_ESCAPE_INSERT:
1684 /* Insert oct or hex escaped character.
301d3d20 1685 * There will always enough room in sv since such
db42d148 1686 * escapes will be longer than any UTF-8 sequence
301d3d20 1687 * they can end up as. */
ba7cea30 1688
c7f1f016
NIS
1689 /* We need to map to chars to ASCII before doing the tests
1690 to cover EBCDIC
1691 */
c4d5f83a 1692 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1693 if (!has_utf8 && uv > 255) {
301d3d20
JH
1694 /* Might need to recode whatever we have
1695 * accumulated so far if it contains any
1696 * hibit chars.
1697 *
1698 * (Can't we keep track of that and avoid
1699 * this rescan? --jhi)
012bcf8d 1700 */
c7f1f016 1701 int hicount = 0;
63cd0674
NIS
1702 U8 *c;
1703 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1704 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1705 hicount++;
db42d148 1706 }
012bcf8d 1707 }
63cd0674 1708 if (hicount) {
9d4ba2ae 1709 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1710 U8 *src, *dst;
1711 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1712 src = (U8 *)d - 1;
1713 dst = src+hicount;
1714 d += hicount;
cfd0369c 1715 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1716 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1717 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1718 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1719 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1720 }
1721 else {
63cd0674 1722 *dst-- = *src;
012bcf8d 1723 }
c7f1f016 1724 src--;
012bcf8d
GS
1725 }
1726 }
1727 }
1728
9aa983d2 1729 if (has_utf8 || uv > 255) {
9041c2e3 1730 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1731 has_utf8 = TRUE;
f9a63242
JH
1732 if (PL_lex_inwhat == OP_TRANS &&
1733 PL_sublex_info.sub_op) {
1734 PL_sublex_info.sub_op->op_private |=
1735 (PL_lex_repl ? OPpTRANS_FROM_UTF
1736 : OPpTRANS_TO_UTF);
f9a63242 1737 }
012bcf8d 1738 }
a0ed51b3 1739 else {
012bcf8d 1740 *d++ = (char)uv;
a0ed51b3 1741 }
012bcf8d
GS
1742 }
1743 else {
c4d5f83a 1744 *d++ = (char) uv;
a0ed51b3 1745 }
79072805 1746 continue;
02aa26ce 1747
b239daa5 1748 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1749 case 'N':
55eda711 1750 ++s;
423cee85
JH
1751 if (*s == '{') {
1752 char* e = strchr(s, '}');
155aba94 1753 SV *res;
423cee85 1754 STRLEN len;
cfd0369c 1755 const char *str;
4e553d73 1756
423cee85 1757 if (!e) {
5777a3f7 1758 yyerror("Missing right brace on \\N{}");
423cee85
JH
1759 e = s - 1;
1760 goto cont_scan;
1761 }
dbc0d4f2
JH
1762 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1763 /* \N{U+...} */
1764 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1765 PERL_SCAN_DISALLOW_PREFIX;
1766 s += 3;
1767 len = e - s;
1768 uv = grok_hex(s, &len, &flags, NULL);
1769 s = e + 1;
1770 goto NUM_ESCAPE_INSERT;
1771 }
55eda711
JH
1772 res = newSVpvn(s + 1, e - s - 1);
1773 res = new_constant( Nullch, 0, "charnames",
1774 res, Nullsv, "\\N{...}" );
f9a63242
JH
1775 if (has_utf8)
1776 sv_utf8_upgrade(res);
cfd0369c 1777 str = SvPV_const(res,len);
1c47067b
JH
1778#ifdef EBCDIC_NEVER_MIND
1779 /* charnames uses pack U and that has been
1780 * recently changed to do the below uni->native
1781 * mapping, so this would be redundant (and wrong,
1782 * the code point would be doubly converted).
1783 * But leave this in just in case the pack U change
1784 * gets revoked, but the semantics is still
1785 * desireable for charnames. --jhi */
cddc7ef4 1786 {
cfd0369c 1787 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1788
1789 if (uv < 0x100) {
89ebb4a3 1790 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1791
1792 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1793 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1794 str = SvPV_const(res, len);
cddc7ef4
JH
1795 }
1796 }
1797#endif
89491803 1798 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1799 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1800 SvCUR_set(sv, d - ostart);
1801 SvPOK_on(sv);
e4f3eed8 1802 *d = '\0';
f08d6ad9 1803 sv_utf8_upgrade(sv);
d2f449dd 1804 /* this just broke our allocation above... */
eb160463 1805 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1806 d = SvPVX(sv) + SvCUR(sv);
89491803 1807 has_utf8 = TRUE;
f08d6ad9 1808 }
eb160463 1809 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1810 const char * const odest = SvPVX_const(sv);
423cee85 1811
8973db79 1812 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1813 d = SvPVX(sv) + (d - odest);
1814 }
1815 Copy(str, d, len, char);
1816 d += len;
1817 SvREFCNT_dec(res);
1818 cont_scan:
1819 s = e + 1;
1820 }
1821 else
5777a3f7 1822 yyerror("Missing braces on \\N{}");
423cee85
JH
1823 continue;
1824
02aa26ce 1825 /* \c is a control character */
79072805
LW
1826 case 'c':
1827 s++;
961ce445 1828 if (s < send) {
ba210ebe 1829 U8 c = *s++;
c7f1f016
NIS
1830#ifdef EBCDIC
1831 if (isLOWER(c))
1832 c = toUPPER(c);
1833#endif
db42d148 1834 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1835 }
961ce445
RGS
1836 else {
1837 yyerror("Missing control char name in \\c");
1838 }
79072805 1839 continue;
02aa26ce
NT
1840
1841 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1842 case 'b':
db42d148 1843 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1844 break;
1845 case 'n':
db42d148 1846 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1847 break;
1848 case 'r':
db42d148 1849 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1850 break;
1851 case 'f':
db42d148 1852 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1853 break;
1854 case 't':
db42d148 1855 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1856 break;
34a3fe2a 1857 case 'e':
db42d148 1858 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1859 break;
1860 case 'a':
db42d148 1861 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1862 break;
02aa26ce
NT
1863 } /* end switch */
1864
79072805
LW
1865 s++;
1866 continue;
02aa26ce 1867 } /* end if (backslash) */
4c3a8340
ST
1868#ifdef EBCDIC
1869 else
1870 literal_endpoint++;
1871#endif
02aa26ce 1872
f9a63242 1873 default_action:
2b9d42f0
NIS
1874 /* If we started with encoded form, or already know we want it
1875 and then encode the next character */
1876 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1877 STRLEN len = 1;
9d4ba2ae
AL
1878 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1879 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1880 s += len;
1881 if (need > len) {
1882 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1883 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1884 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1885 }
1886 d = (char*)uvchr_to_utf8((U8*)d, uv);
1887 has_utf8 = TRUE;
1888 }
1889 else {
1890 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1891 }
02aa26ce
NT
1892 } /* while loop to process each character */
1893
1894 /* terminate the string and set up the sv */
79072805 1895 *d = '\0';
95a20fc0 1896 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1897 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1898 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1899
79072805 1900 SvPOK_on(sv);
9f4817db 1901 if (PL_encoding && !has_utf8) {
d0063567
DK
1902 sv_recode_to_utf8(sv, PL_encoding);
1903 if (SvUTF8(sv))
1904 has_utf8 = TRUE;
9f4817db 1905 }
2b9d42f0 1906 if (has_utf8) {
7e2040f0 1907 SvUTF8_on(sv);
2b9d42f0 1908 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1909 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1910 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1911 }
1912 }
79072805 1913
02aa26ce 1914 /* shrink the sv if we allocated more than we used */
79072805 1915 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1916 SvPV_shrink_to_cur(sv);
79072805 1917 }
02aa26ce 1918
9b599b2a 1919 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1920 if (s > PL_bufptr) {
1921 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1922 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1923 sv, Nullsv,
4e553d73 1924 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1925 ? "tr"
3280af22 1926 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1927 ? "s"
1928 : "qq")));
79072805 1929 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1930 } else
8990e307 1931 SvREFCNT_dec(sv);
79072805
LW
1932 return s;
1933}
1934
ffb4593c
NT
1935/* S_intuit_more
1936 * Returns TRUE if there's more to the expression (e.g., a subscript),
1937 * FALSE otherwise.
ffb4593c
NT
1938 *
1939 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1940 *
1941 * ->[ and ->{ return TRUE
1942 * { and [ outside a pattern are always subscripts, so return TRUE
1943 * if we're outside a pattern and it's not { or [, then return FALSE
1944 * if we're in a pattern and the first char is a {
1945 * {4,5} (any digits around the comma) returns FALSE
1946 * if we're in a pattern and the first char is a [
1947 * [] returns FALSE
1948 * [SOMETHING] has a funky algorithm to decide whether it's a
1949 * character class or not. It has to deal with things like
1950 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1951 * anything else returns TRUE
1952 */
1953
9cbb5ea2
GS
1954/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1955
76e3520e 1956STATIC int
cea2e8a9 1957S_intuit_more(pTHX_ register char *s)
79072805 1958{
3280af22 1959 if (PL_lex_brackets)
79072805
LW
1960 return TRUE;
1961 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1962 return TRUE;
1963 if (*s != '{' && *s != '[')
1964 return FALSE;
3280af22 1965 if (!PL_lex_inpat)
79072805
LW
1966 return TRUE;
1967
1968 /* In a pattern, so maybe we have {n,m}. */
1969 if (*s == '{') {
1970 s++;
1971 if (!isDIGIT(*s))
1972 return TRUE;
1973 while (isDIGIT(*s))
1974 s++;
1975 if (*s == ',')
1976 s++;
1977 while (isDIGIT(*s))
1978 s++;
1979 if (*s == '}')
1980 return FALSE;
1981 return TRUE;
1982
1983 }
1984
1985 /* On the other hand, maybe we have a character class */
1986
1987 s++;
1988 if (*s == ']' || *s == '^')
1989 return FALSE;
1990 else {
ffb4593c 1991 /* this is terrifying, and it works */
79072805
LW
1992 int weight = 2; /* let's weigh the evidence */
1993 char seen[256];
f27ffc4a 1994 unsigned char un_char = 255, last_un_char;
9d4ba2ae 1995 const char * const send = strchr(s,']');
3280af22 1996 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1997
1998 if (!send) /* has to be an expression */
1999 return TRUE;
2000
2001 Zero(seen,256,char);
2002 if (*s == '$')
2003 weight -= 3;
2004 else if (isDIGIT(*s)) {
2005 if (s[1] != ']') {
2006 if (isDIGIT(s[1]) && s[2] == ']')
2007 weight -= 10;
2008 }
2009 else
2010 weight -= 100;
2011 }
2012 for (; s < send; s++) {
2013 last_un_char = un_char;
2014 un_char = (unsigned char)*s;
2015 switch (*s) {
2016 case '@':
2017 case '&':
2018 case '$':
2019 weight -= seen[un_char] * 10;
7e2040f0 2020 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 2021 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 2022 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
2023 weight -= 100;
2024 else
2025 weight -= 10;
2026 }
2027 else if (*s == '$' && s[1] &&
93a17b20
LW
2028 strchr("[#!%*<>()-=",s[1])) {
2029 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2030 weight -= 10;
2031 else
2032 weight -= 1;
2033 }
2034 break;
2035 case '\\':
2036 un_char = 254;
2037 if (s[1]) {
93a17b20 2038 if (strchr("wds]",s[1]))
79072805
LW
2039 weight += 100;
2040 else if (seen['\''] || seen['"'])
2041 weight += 1;
93a17b20 2042 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2043 weight += 40;
2044 else if (isDIGIT(s[1])) {
2045 weight += 40;
2046 while (s[1] && isDIGIT(s[1]))
2047 s++;
2048 }
2049 }
2050 else
2051 weight += 100;
2052 break;
2053 case '-':
2054 if (s[1] == '\\')
2055 weight += 50;
93a17b20 2056 if (strchr("aA01! ",last_un_char))
79072805 2057 weight += 30;
93a17b20 2058 if (strchr("zZ79~",s[1]))
79072805 2059 weight += 30;
f27ffc4a
GS
2060 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2061 weight -= 5; /* cope with negative subscript */
79072805
LW
2062 break;
2063 default:
3792a11b
NC
2064 if (!isALNUM(last_un_char)
2065 && !(last_un_char == '$' || last_un_char == '@'
2066 || last_un_char == '&')
2067 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2068 char *d = tmpbuf;
2069 while (isALPHA(*s))
2070 *d++ = *s++;
2071 *d = '\0';
2072 if (keyword(tmpbuf, d - tmpbuf))
2073 weight -= 150;
2074 }
2075 if (un_char == last_un_char + 1)
2076 weight += 5;
2077 weight -= seen[un_char];
2078 break;
2079 }
2080 seen[un_char]++;
2081 }
2082 if (weight >= 0) /* probably a character class */
2083 return FALSE;
2084 }
2085
2086 return TRUE;
2087}
ffed7fef 2088
ffb4593c
NT
2089/*
2090 * S_intuit_method
2091 *
2092 * Does all the checking to disambiguate
2093 * foo bar
2094 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2095 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2096 *
2097 * First argument is the stuff after the first token, e.g. "bar".
2098 *
2099 * Not a method if bar is a filehandle.
2100 * Not a method if foo is a subroutine prototyped to take a filehandle.
2101 * Not a method if it's really "Foo $bar"
2102 * Method if it's "foo $bar"
2103 * Not a method if it's really "print foo $bar"
2104 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2105 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2106 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2107 * =>
2108 */
2109
76e3520e 2110STATIC int
cea2e8a9 2111S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2112{
2113 char *s = start + (*start == '$');
3280af22 2114 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2115 STRLEN len;
2116 GV* indirgv;
2117
2118 if (gv) {
b6c543e3 2119 CV *cv;
a0d0e21e
LW
2120 if (GvIO(gv))
2121 return 0;
b6c543e3 2122 if ((cv = GvCVu(gv))) {
95a20fc0 2123 const char *proto = SvPVX_const(cv);
b6c543e3
IZ
2124 if (proto) {
2125 if (*proto == ';')
2126 proto++;
2127 if (*proto == '*')
2128 return 0;
2129 }
2130 } else
a0d0e21e
LW
2131 gv = 0;
2132 }
8903cb82 2133 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2134 /* start is the beginning of the possible filehandle/object,
2135 * and s is the end of it
2136 * tmpbuf is a copy of it
2137 */
2138
a0d0e21e 2139 if (*start == '$') {
3280af22 2140 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2141 return 0;
2142 s = skipspace(s);
3280af22
NIS
2143 PL_bufptr = start;
2144 PL_expect = XREF;
a0d0e21e
LW
2145 return *s == '(' ? FUNCMETH : METHOD;
2146 }
2147 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2148 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2149 len -= 2;
2150 tmpbuf[len] = '\0';
2151 goto bare_package;
2152 }
2153 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2154 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2155 return 0;
2156 /* filehandle or package name makes it a method */
89bfa8cd 2157 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2158 s = skipspace(s);
3280af22 2159 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2160 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2161 bare_package:
3280af22 2162 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2163 newSVpvn(tmpbuf,len));
3280af22
NIS
2164 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2165 PL_expect = XTERM;
a0d0e21e 2166 force_next(WORD);
3280af22 2167 PL_bufptr = s;
a0d0e21e
LW
2168 return *s == '(' ? FUNCMETH : METHOD;
2169 }
2170 }
2171 return 0;
2172}
2173
ffb4593c
NT
2174/*
2175 * S_incl_perldb
2176 * Return a string of Perl code to load the debugger. If PERL5DB
2177 * is set, it will return the contents of that, otherwise a
2178 * compile-time require of perl5db.pl.
2179 */
2180
bfed75c6 2181STATIC const char*
cea2e8a9 2182S_incl_perldb(pTHX)
a0d0e21e 2183{
3280af22 2184 if (PL_perldb) {
9d4ba2ae 2185 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2186
2187 if (pdb)
2188 return pdb;
93189314 2189 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2190 return "BEGIN { require 'perl5db.pl' }";
2191 }
2192 return "";
2193}
2194
2195
16d20bd9 2196/* Encoded script support. filter_add() effectively inserts a
4e553d73 2197 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2198 * Note that the filter function only applies to the current source file
2199 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2200 *
2201 * The datasv parameter (which may be NULL) can be used to pass
2202 * private data to this instance of the filter. The filter function
2203 * can recover the SV using the FILTER_DATA macro and use it to
2204 * store private buffers and state information.
2205 *
2206 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2207 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2208 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2209 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2210 * private use must be set using malloc'd pointers.
2211 */
16d20bd9
AD
2212
2213SV *
864dbfa3 2214Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2215{
f4c556ac
GS
2216 if (!funcp)
2217 return Nullsv;
2218
3280af22
NIS
2219 if (!PL_rsfp_filters)
2220 PL_rsfp_filters = newAV();
16d20bd9 2221 if (!datasv)
8c52afec 2222 datasv = NEWSV(255,0);
862a34c6 2223 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2224 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2225 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2226 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2227 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2228 av_unshift(PL_rsfp_filters, 1);
2229 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2230 return(datasv);
2231}
4e553d73 2232
16d20bd9
AD
2233
2234/* Delete most recently added instance of this filter function. */
a0d0e21e 2235void
864dbfa3 2236Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2237{
e0c19803 2238 SV *datasv;
24801a4b 2239
33073adb 2240#ifdef DEBUGGING
8141890a 2241 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2242#endif
3280af22 2243 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2244 return;
2245 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2246 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2247 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2248 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2249 IoANY(datasv) = (void *)NULL;
3280af22 2250 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2251
16d20bd9
AD
2252 return;
2253 }
2254 /* we need to search for the correct entry and clear it */
cea2e8a9 2255 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2256}
2257
2258
1de9afcd
RGS
2259/* Invoke the idxth filter function for the current rsfp. */
2260/* maxlen 0 = read one text line */
16d20bd9 2261I32
864dbfa3 2262Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2263{
16d20bd9
AD
2264 filter_t funcp;
2265 SV *datasv = NULL;
e50aee73 2266
3280af22 2267 if (!PL_rsfp_filters)
16d20bd9 2268 return -1;
1de9afcd 2269 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2270 /* Provide a default input filter to make life easy. */
2271 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2272 DEBUG_P(PerlIO_printf(Perl_debug_log,
2273 "filter_read %d: from rsfp\n", idx));
4e553d73 2274 if (maxlen) {
16d20bd9
AD
2275 /* Want a block */
2276 int len ;
f54cb97a 2277 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2278
2279 /* ensure buf_sv is large enough */
eb160463 2280 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2281 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2282 if (PerlIO_error(PL_rsfp))
37120919
AD
2283 return -1; /* error */
2284 else
2285 return 0 ; /* end of file */
2286 }
16d20bd9
AD
2287 SvCUR_set(buf_sv, old_len + len) ;
2288 } else {
2289 /* Want a line */
3280af22
NIS
2290 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2291 if (PerlIO_error(PL_rsfp))
37120919
AD
2292 return -1; /* error */
2293 else
2294 return 0 ; /* end of file */
2295 }
16d20bd9
AD
2296 }
2297 return SvCUR(buf_sv);
2298 }
2299 /* Skip this filter slot if filter has been deleted */
1de9afcd 2300 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2301 DEBUG_P(PerlIO_printf(Perl_debug_log,
2302 "filter_read %d: skipped (filter deleted)\n",
2303 idx));
16d20bd9
AD
2304 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2305 }
2306 /* Get function pointer hidden within datasv */
8141890a 2307 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2308 DEBUG_P(PerlIO_printf(Perl_debug_log,
2309 "filter_read %d: via function %p (%s)\n",
cfd0369c 2310 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2311 /* Call function. The function is expected to */
2312 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2313 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2314 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2315}
2316
76e3520e 2317STATIC char *
cea2e8a9 2318S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2319{
c39cd008 2320#ifdef PERL_CR_FILTER
3280af22 2321 if (!PL_rsfp_filters) {
c39cd008 2322 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2323 }
2324#endif
3280af22 2325 if (PL_rsfp_filters) {
55497cff
PP
2326 if (!append)
2327 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2328 if (FILTER_READ(0, sv, 0) > 0)
2329 return ( SvPVX(sv) ) ;
2330 else
2331 return Nullch ;
2332 }
9d116dd7 2333 else
fd049845 2334 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2335}
2336
01ec43d0 2337STATIC HV *
7fc63493 2338S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2339{
2340 GV *gv;
2341
01ec43d0 2342 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2343 return PL_curstash;
2344
2345 if (len > 2 &&
2346 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2347 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2348 {
2349 return GvHV(gv); /* Foo:: */
def3634b
GS
2350 }
2351
2352 /* use constant CLASS => 'MyClass' */
2353 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2354 SV *sv;
2355 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2356 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2357 }
2358 }
2359
2360 return gv_stashpv(pkgname, FALSE);
2361}
a0d0e21e 2362
468aa647 2363STATIC char *
cc6ed77d 2364S_tokenize_use(pTHX_ int is_use, char *s) {
468aa647
RGS
2365 if (PL_expect != XSTATE)
2366 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2367 is_use ? "use" : "no"));
2368 s = skipspace(s);
2369 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2370 s = force_version(s, TRUE);
2371 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2372 PL_nextval[PL_nexttoke].opval = Nullop;
2373 force_next(WORD);
2374 }
2375 else if (*s == 'v') {
2376 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2377 s = force_version(s, FALSE);
2378 }
2379 }
2380 else {
2381 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2382 s = force_version(s, FALSE);
2383 }
2384 yylval.ival = is_use;
2385 return s;
2386}
748a9306 2387#ifdef DEBUGGING
27da23d5 2388 static const char* const exp_name[] =
09bef843 2389 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2390 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2391 };
748a9306 2392#endif
463ee0b2 2393
02aa26ce
NT
2394/*
2395 yylex
2396
2397 Works out what to call the token just pulled out of the input
2398 stream. The yacc parser takes care of taking the ops we return and
2399 stitching them into a tree.
2400
2401 Returns:
2402 PRIVATEREF
2403
2404 Structure:
2405 if read an identifier
2406 if we're in a my declaration
2407 croak if they tried to say my($foo::bar)
2408 build the ops for a my() declaration
2409 if it's an access to a my() variable
2410 are we in a sort block?
2411 croak if my($a); $a <=> $b
2412 build ops for access to a my() variable
2413 if in a dq string, and they've said @foo and we can't find @foo
2414 croak
2415 build ops for a bareword
2416 if we already built the token before, use it.
2417*/
2418
20141f0e 2419
dba4d153
JH
2420#ifdef __SC__
2421#pragma segment Perl_yylex
2422#endif
dba4d153 2423int
dba4d153 2424Perl_yylex(pTHX)
20141f0e 2425{
3afc138a 2426 register char *s = PL_bufptr;
378cc40b 2427 register char *d;
79072805 2428 register I32 tmp;
463ee0b2 2429 STRLEN len;
161b471a
NIS
2430 GV *gv = Nullgv;
2431 GV **gvp = 0;
aa7440fb 2432 bool bof = FALSE;
1d239bbb 2433 I32 orig_keyword = 0;
a687059c 2434
bbf60fe6 2435 DEBUG_T( {
b6007c36
DM
2436 SV* tmp = newSVpvn("", 0);
2437 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2438 (IV)CopLINE(PL_curcop),
2439 lex_state_names[PL_lex_state],
2440 exp_name[PL_expect],
2441 pv_display(tmp, s, strlen(s), 0, 60));
2442 SvREFCNT_dec(tmp);
bbf60fe6 2443 } );
02aa26ce 2444 /* check if there's an identifier for us to look at */
ba979b31 2445 if (PL_pending_ident)
bbf60fe6 2446 return REPORT(S_pending_ident(aTHX));
bbce6d69 2447
02aa26ce
NT
2448 /* no identifier pending identification */
2449
3280af22 2450 switch (PL_lex_state) {
79072805
LW
2451#ifdef COMMENTARY
2452 case LEX_NORMAL: /* Some compilers will produce faster */
2453 case LEX_INTERPNORMAL: /* code if we comment these out. */
2454 break;
2455#endif
2456
09bef843 2457 /* when we've already built the next token, just pull it out of the queue */
79072805 2458 case LEX_KNOWNEXT:
3280af22
NIS
2459 PL_nexttoke--;
2460 yylval = PL_nextval[PL_nexttoke];
2461 if (!PL_nexttoke) {
2462 PL_lex_state = PL_lex_defer;
2463 PL_expect = PL_lex_expect;
2464 PL_lex_defer = LEX_NORMAL;
463ee0b2 2465 }
bbf60fe6 2466 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2467
02aa26ce 2468 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2469 when we get here, PL_bufptr is at the \
02aa26ce 2470 */
79072805
LW
2471 case LEX_INTERPCASEMOD:
2472#ifdef DEBUGGING
3280af22 2473 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2474 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2475#endif
02aa26ce 2476 /* handle \E or end of string */
3280af22 2477 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2478 /* if at a \E */
3280af22 2479 if (PL_lex_casemods) {
f54cb97a 2480 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2481 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2482
3792a11b
NC
2483 if (PL_bufptr != PL_bufend
2484 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2485 PL_bufptr += 2;
2486 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2487 }
bbf60fe6 2488 return REPORT(')');
79072805 2489 }
3280af22
NIS
2490 if (PL_bufptr != PL_bufend)
2491 PL_bufptr += 2;
2492 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2493 return yylex();
79072805
LW
2494 }
2495 else {
607df283 2496 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2497 "### Saw case modifier\n"); });
3280af22 2498 s = PL_bufptr + 1;
6e909404
JH
2499 if (s[1] == '\\' && s[2] == 'E') {
2500 PL_bufptr = s + 3;
2501 PL_lex_state = LEX_INTERPCONCAT;
2502 return yylex();
a0d0e21e 2503 }
6e909404
JH
2504 else {
2505 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2506 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2507 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2508 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2509 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2510 return REPORT(')');
6e909404
JH
2511 }
2512 if (PL_lex_casemods > 10)
2513 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2514 PL_lex_casestack[PL_lex_casemods++] = *s;
2515 PL_lex_casestack[PL_lex_casemods] = '\0';
2516 PL_lex_state = LEX_INTERPCONCAT;
2517 PL_nextval[PL_nexttoke].ival = 0;
2518 force_next('(');
2519 if (*s == 'l')
2520 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2521 else if (*s == 'u')
2522 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2523 else if (*s == 'L')
2524 PL_nextval[PL_nexttoke].ival = OP_LC;
2525 else if (*s == 'U')
2526 PL_nextval[PL_nexttoke].ival = OP_UC;
2527 else if (*s == 'Q')
2528 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2529 else
2530 Perl_croak(aTHX_ "panic: yylex");
2531 PL_bufptr = s + 1;
a0d0e21e 2532 }
79072805 2533 force_next(FUNC);
3280af22
NIS
2534 if (PL_lex_starts) {
2535 s = PL_bufptr;
2536 PL_lex_starts = 0;
131b3ad0
DM
2537 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2538 if (PL_lex_casemods == 1 && PL_lex_inpat)
2539 OPERATOR(',');
2540 else
2541 Aop(OP_CONCAT);
79072805
LW
2542 }
2543 else
cea2e8a9 2544 return yylex();
79072805
LW
2545 }
2546
55497cff 2547 case LEX_INTERPPUSH:
bbf60fe6 2548 return REPORT(sublex_push());
55497cff 2549
79072805 2550 case LEX_INTERPSTART:
3280af22 2551 if (PL_bufptr == PL_bufend)
bbf60fe6 2552 return REPORT(sublex_done());
607df283 2553 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2554 "### Interpolated variable\n"); });
3280af22
NIS
2555 PL_expect = XTERM;
2556 PL_lex_dojoin = (*PL_bufptr == '@');
2557 PL_lex_state = LEX_INTERPNORMAL;
2558 if (PL_lex_dojoin) {
2559 PL_nextval[PL_nexttoke].ival = 0;
79072805 2560 force_next(',');
a0d0e21e 2561 force_ident("\"", '$');
3280af22 2562 PL_nextval[PL_nexttoke].ival = 0;
79072805 2563 force_next('$');
3280af22 2564 PL_nextval[PL_nexttoke].ival = 0;
79072805 2565 force_next('(');
3280af22 2566 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2567 force_next(FUNC);
2568 }
3280af22
NIS
2569 if (PL_lex_starts++) {
2570 s = PL_bufptr;
131b3ad0
DM
2571 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2572 if (!PL_lex_casemods && PL_lex_inpat)
2573 OPERATOR(',');
2574 else
2575 Aop(OP_CONCAT);
79072805 2576 }
cea2e8a9 2577 return yylex();
79072805
LW
2578
2579 case LEX_INTERPENDMAYBE:
3280af22
NIS
2580 if (intuit_more(PL_bufptr)) {
2581 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2582 break;
2583 }
2584 /* FALL THROUGH */
2585
2586 case LEX_INTERPEND:
3280af22
NIS
2587 if (PL_lex_dojoin) {
2588 PL_lex_dojoin = FALSE;
2589 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2590 return REPORT(')');
79072805 2591 }
43a16006 2592 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2593 && SvEVALED(PL_lex_repl))
43a16006 2594 {
e9fa98b2 2595 if (PL_bufptr != PL_bufend)
cea2e8a9 2596 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2597 PL_lex_repl = Nullsv;
2598 }
79072805
LW
2599 /* FALLTHROUGH */
2600 case LEX_INTERPCONCAT:
2601#ifdef DEBUGGING
3280af22 2602 if (PL_lex_brackets)
cea2e8a9 2603 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2604#endif
3280af22 2605 if (PL_bufptr == PL_bufend)
bbf60fe6 2606 return REPORT(sublex_done());
79072805 2607
3280af22
NIS
2608 if (SvIVX(PL_linestr) == '\'') {
2609 SV *sv = newSVsv(PL_linestr);
2610 if (!PL_lex_inpat)
76e3520e 2611 sv = tokeq(sv);
3280af22 2612 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2613 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2614 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2615 s = PL_bufend;
79072805
LW
2616 }
2617 else {
3280af22 2618 s = scan_const(PL_bufptr);
79072805 2619 if (*s == '\\')
3280af22 2620 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2621 else
3280af22 2622 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2623 }
2624
3280af22
NIS
2625 if (s != PL_bufptr) {
2626 PL_nextval[PL_nexttoke] = yylval;
2627 PL_expect = XTERM;
79072805 2628 force_next(THING);
131b3ad0
DM
2629 if (PL_lex_starts++) {
2630 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2631 if (!PL_lex_casemods && PL_lex_inpat)
2632 OPERATOR(',');
2633 else
2634 Aop(OP_CONCAT);
2635 }
79072805 2636 else {
3280af22 2637 PL_bufptr = s;
cea2e8a9 2638 return yylex();
79072805
LW
2639 }
2640 }
2641
cea2e8a9 2642 return yylex();
a0d0e21e 2643 case LEX_FORMLINE:
3280af22
NIS
2644 PL_lex_state = LEX_NORMAL;
2645 s = scan_formline(PL_bufptr);
2646 if (!PL_lex_formbrack)
a0d0e21e
LW
2647 goto rightbracket;
2648 OPERATOR(';');
79072805
LW
2649 }
2650
3280af22
NIS
2651 s = PL_bufptr;
2652 PL_oldoldbufptr = PL_oldbufptr;
2653 PL_oldbufptr = s;
463ee0b2
LW
2654
2655 retry:
378cc40b
LW
2656 switch (*s) {
2657 default:
7e2040f0 2658 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2659 goto keylookup;
cea2e8a9 2660 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2661 case 4:
2662 case 26:
2663 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2664 case 0:
3280af22
NIS
2665 if (!PL_rsfp) {
2666 PL_last_uni = 0;
2667 PL_last_lop = 0;
c5ee2135 2668 if (PL_lex_brackets) {
0bd48802
AL
2669 yyerror(PL_lex_formbrack
2670 ? "Format not terminated"
2671 : "Missing right curly or square bracket");
c5ee2135 2672 }
4e553d73 2673 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2674 "### Tokener got EOF\n");
5f80b19c 2675 } );
79072805 2676 TOKEN(0);
463ee0b2 2677 }
3280af22 2678 if (s++ < PL_bufend)
a687059c 2679 goto retry; /* ignore stray nulls */
3280af22
NIS
2680 PL_last_uni = 0;
2681 PL_last_lop = 0;
2682 if (!PL_in_eval && !PL_preambled) {
2683 PL_preambled = TRUE;
2684 sv_setpv(PL_linestr,incl_perldb());
2685 if (SvCUR(PL_linestr))
4147a61b 2686 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2687 if (PL_preambleav){
2688 while(AvFILLp(PL_preambleav) >= 0) {
2689 SV *tmpsv = av_shift(PL_preambleav);
2690 sv_catsv(PL_linestr, tmpsv);
4147a61b 2691 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2692 sv_free(tmpsv);
2693 }
3280af22
NIS
2694 sv_free((SV*)PL_preambleav);
2695 PL_preambleav = NULL;
91b7def8 2696 }
3280af22
NIS
2697 if (PL_minus_n || PL_minus_p) {
2698 sv_catpv(PL_linestr, "LINE: while (<>) {");
2699 if (PL_minus_l)
2700 sv_catpv(PL_linestr,"chomp;");
2701 if (PL_minus_a) {
3280af22 2702 if (PL_minus_F) {
3792a11b
NC
2703 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2704 || *PL_splitstr == '"')
3280af22 2705 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2706 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2707 else {
c8ef6a4b
NC
2708 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2709 bytes can be used as quoting characters. :-) */
48c4c863
NC
2710 /* The count here deliberately includes the NUL
2711 that terminates the C string constant. This
2712 embeds the opening NUL into the string. */
dd374669 2713 const char *splits = PL_splitstr;
9a3fb652 2714 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2715 do {
2716 /* Need to \ \s */
dd374669
AL
2717 if (*splits == '\\')
2718 sv_catpvn(PL_linestr, splits, 1);
2719 sv_catpvn(PL_linestr, splits, 1);
2720 } while (*splits++);
48c4c863
NC
2721 /* This loop will embed the trailing NUL of
2722 PL_linestr as the last thing it does before
2723 terminating. */
4fccd7c6 2724 sv_catpvn(PL_linestr, ");", 2);
54310121 2725 }
2304df62
AD
2726 }
2727 else
75c72d73 2728 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2729 }
79072805 2730 }
4147a61b 2731 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2732 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2733 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2734 PL_last_lop = PL_last_uni = Nullch;
3280af22 2735 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2736 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2737
2738 sv_upgrade(sv, SVt_PVMG);
3280af22 2739 sv_setsv(sv,PL_linestr);
0ac0412a 2740 (void)SvIOK_on(sv);
45977657 2741 SvIV_set(sv, 0);
57843af0 2742 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2743 }
79072805 2744 goto retry;
a687059c 2745 }
e929a76b 2746 do {
aa7440fb 2747 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2748 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2749 fake_eof:
2750 if (PL_rsfp) {
2751 if (PL_preprocess && !PL_in_eval)
2752 (void)PerlProc_pclose(PL_rsfp);
2753 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2754 PerlIO_clearerr(PL_rsfp);
2755 else
2756 (void)PerlIO_close(PL_rsfp);
2757 PL_rsfp = Nullfp;
2758 PL_doextract = FALSE;
2759 }
2760 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2761 sv_setpv(PL_linestr,PL_minus_p
2762 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2763 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2764 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2765 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2766 PL_minus_n = PL_minus_p = 0;
2767 goto retry;
2768 }
2769 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2770 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2771 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2772 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2773 }
7aa207d6
JH
2774 /* If it looks like the start of a BOM or raw UTF-16,
2775 * check if it in fact is. */
2776 else if (bof &&
2777 (*s == 0 ||
2778 *(U8*)s == 0xEF ||
2779 *(U8*)s >= 0xFE ||
2780 s[1] == 0)) {
226017aa 2781#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2782# ifdef __GNU_LIBRARY__
2783# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2784# define FTELL_FOR_PIPE_IS_BROKEN
2785# endif
e3f494f1
JH
2786# else
2787# ifdef __GLIBC__
2788# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2789# define FTELL_FOR_PIPE_IS_BROKEN
2790# endif
2791# endif
226017aa
DD
2792# endif
2793#endif
2794#ifdef FTELL_FOR_PIPE_IS_BROKEN
2795 /* This loses the possibility to detect the bof
2796 * situation on perl -P when the libc5 is being used.
2797 * Workaround? Maybe attach some extra state to PL_rsfp?
2798 */
2799 if (!PL_preprocess)
7e28d3af 2800 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2801#else
eb160463 2802 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2803#endif
7e28d3af 2804 if (bof) {
3280af22 2805 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2806 s = swallow_bom((U8*)s);
e929a76b 2807 }
378cc40b 2808 }
3280af22 2809 if (PL_doextract) {
a0d0e21e
LW
2810 /* Incest with pod. */
2811 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2812 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2813 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2814 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2815 PL_last_lop = PL_last_uni = Nullch;
3280af22 2816 PL_doextract = FALSE;
a0d0e21e 2817 }
4e553d73 2818 }
463ee0b2 2819 incline(s);
3280af22
NIS
2820 } while (PL_doextract);
2821 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2822 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2823 SV * const sv = NEWSV(85,0);
a687059c 2824
93a17b20 2825 sv_upgrade(sv, SVt_PVMG);
3280af22 2826 sv_setsv(sv,PL_linestr);
0ac0412a 2827 (void)SvIOK_on(sv);
45977657 2828 SvIV_set(sv, 0);
57843af0 2829 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2830 }
3280af22 2831 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2832 PL_last_lop = PL_last_uni = Nullch;
57843af0 2833 if (CopLINE(PL_curcop) == 1) {
3280af22 2834 while (s < PL_bufend && isSPACE(*s))
79072805 2835 s++;
a0d0e21e 2836 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2837 s++;
44a8e56a 2838 d = Nullch;
3280af22 2839 if (!PL_in_eval) {
44a8e56a
PP
2840 if (*s == '#' && *(s+1) == '!')
2841 d = s + 2;
2842#ifdef ALTERNATE_SHEBANG
2843 else {
bfed75c6 2844 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2845 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2846 d = s + (sizeof(as) - 1);
2847 }
2848#endif /* ALTERNATE_SHEBANG */
2849 }
2850 if (d) {
b8378b72 2851 char *ipath;
774d564b 2852 char *ipathend;
b8378b72 2853
774d564b 2854 while (isSPACE(*d))
b8378b72
CS
2855 d++;
2856 ipath = d;
774d564b
PP
2857 while (*d && !isSPACE(*d))
2858 d++;
2859 ipathend = d;
2860
2861#ifdef ARG_ZERO_IS_SCRIPT
2862 if (ipathend > ipath) {
2863 /*
2864 * HP-UX (at least) sets argv[0] to the script name,
2865 * which makes $^X incorrect. And Digital UNIX and Linux,
2866 * at least, set argv[0] to the basename of the Perl
2867 * interpreter. So, having found "#!", we'll set it right.
2868 */
551405c4 2869 SV * const x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2870 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2871 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2872 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2873 SvSETMAGIC(x);
2874 }
556c1dec
JH
2875 else {
2876 STRLEN blen;
2877 STRLEN llen;
cfd0369c 2878 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2879 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2880 if (llen < blen) {
2881 bstart += blen - llen;
2882 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2883 sv_setpvn(x, ipath, ipathend - ipath);
2884 SvSETMAGIC(x);
2885 }
2886 }
2887 }
774d564b 2888 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2889 }
774d564b 2890#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2891
2892 /*
2893 * Look for options.
2894 */
748a9306 2895 d = instr(s,"perl -");
84e30d1a 2896 if (!d) {
748a9306 2897 d = instr(s,"perl");
84e30d1a
GS
2898#if defined(DOSISH)
2899 /* avoid getting into infinite loops when shebang
2900 * line contains "Perl" rather than "perl" */
2901 if (!d) {
2902 for (d = ipathend-4; d >= ipath; --d) {
2903 if ((*d == 'p' || *d == 'P')
2904 && !ibcmp(d, "perl", 4))
2905 {
2906 break;
2907 }
2908 }
2909 if (d < ipath)
2910 d = Nullch;
2911 }
2912#endif
2913 }
44a8e56a
PP
2914#ifdef ALTERNATE_SHEBANG
2915 /*
2916 * If the ALTERNATE_SHEBANG on this system starts with a
2917 * character that can be part of a Perl expression, then if
2918 * we see it but not "perl", we're probably looking at the
2919 * start of Perl code, not a request to hand off to some
2920 * other interpreter. Similarly, if "perl" is there, but
2921 * not in the first 'word' of the line, we assume the line
2922 * contains the start of the Perl program.
44a8e56a
PP
2923 */
2924 if (d && *s != '#') {
f54cb97a 2925 const char *c = ipath;
44a8e56a
PP
2926 while (*c && !strchr("; \t\r\n\f\v#", *c))
2927 c++;
2928 if (c < d)
2929 d = Nullch; /* "perl" not in first word; ignore */
2930 else
2931 *s = '#'; /* Don't try to parse shebang line */
2932 }
774d564b 2933#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2934#ifndef MACOS_TRADITIONAL
748a9306 2935 if (!d &&
44a8e56a 2936 *s == '#' &&
774d564b 2937 ipathend > ipath &&
3280af22 2938 !PL_minus_c &&
748a9306 2939 !instr(s,"indir") &&
3280af22 2940 instr(PL_origargv[0],"perl"))
748a9306 2941 {
27da23d5 2942 dVAR;
9f68db38 2943 char **newargv;
9f68db38 2944
774d564b
PP
2945 *ipathend = '\0';
2946 s = ipathend + 1;
3280af22 2947 while (s < PL_bufend && isSPACE(*s))
9f68db38 2948 s++;
3280af22 2949 if (s < PL_bufend) {
a02a5408 2950 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2951 newargv[1] = s;
3280af22 2952 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2953 s++;
2954 *s = '\0';
3280af22 2955 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2956 }
2957 else
3280af22 2958 newargv = PL_origargv;
774d564b 2959 newargv[0] = ipath;
b35112e7 2960 PERL_FPU_PRE_EXEC
b4748376 2961 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2962 PERL_FPU_POST_EXEC
cea2e8a9 2963 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2964 }
bf4acbe4 2965#endif
748a9306 2966 if (d) {
f54cb97a
AL
2967 const U32 oldpdb = PL_perldb;
2968 const bool oldn = PL_minus_n;
2969 const bool oldp = PL_minus_p;
748a9306
LW
2970
2971 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2972 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2973
2974 if (*d++ == '-') {
f54cb97a 2975 const bool switches_done = PL_doswitches;
8cc95fdb 2976 do {
3ffe3ee4 2977 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2978 const char * const m = d;
8cc95fdb 2979 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2980 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2981 (int)(d - m), m);
2982 }
2983 d = moreswitches(d);
2984 } while (d);
f0b2cf55
YST
2985 if (PL_doswitches && !switches_done) {
2986 int argc = PL_origargc;
2987 char **argv = PL_origargv;
2988 do {
2989 argc--,argv++;
2990 } while (argc && argv[0][0] == '-' && argv[0][1]);
2991 init_argv_symbols(argc,argv);
2992 }
155aba94
GS
2993 if ((PERLDB_LINE && !oldpdb) ||
2994 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2995 /* if we have already added "LINE: while (<>) {",
2996 we must not do it again */
748a9306 2997 {
c69006e4 2998 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2999 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3000 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 3001 PL_last_lop = PL_last_uni = Nullch;
3280af22 3002 PL_preambled = FALSE;
84902520 3003 if (PERLDB_LINE)
3280af22 3004 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3005 goto retry;
3006 }
a11ec5a9
RGS
3007 if (PL_doswitches && !switches_done) {
3008 int argc = PL_origargc;
3009 char **argv = PL_origargv;
3010 do {
3011 argc--,argv++;
3012 } while (argc && argv[0][0] == '-' && argv[0][1]);
3013 init_argv_symbols(argc,argv);
3014 }
a0d0e21e 3015 }
79072805 3016 }
9f68db38 3017 }
79072805 3018 }
3280af22
NIS
3019 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3020 PL_bufptr = s;
3021 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3022 return yylex();
ae986130 3023 }
378cc40b 3024 goto retry;
4fdae800 3025 case '\r':
6a27c188 3026#ifdef PERL_STRICT_CR
cea2e8a9 3027 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3028 Perl_croak(aTHX_
cc507455 3029 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3030#endif
4fdae800 3031 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3032#ifdef MACOS_TRADITIONAL
3033 case '\312':
3034#endif
378cc40b
LW
3035 s++;
3036 goto retry;
378cc40b 3037 case '#':
e929a76b 3038 case '\n':
3280af22 3039 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3040 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3041 /* handle eval qq[#line 1 "foo"\n ...] */
3042 CopLINE_dec(PL_curcop);
3043 incline(s);
3044 }
3280af22 3045 d = PL_bufend;
a687059c 3046 while (s < d && *s != '\n')
378cc40b 3047 s++;
0f85fab0 3048 if (s < d)
378cc40b 3049 s++;
78c267c1 3050 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3051 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3052 incline(s);
3280af22
NIS
3053 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3054 PL_bufptr = s;
3055 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3056 return yylex();
a687059c 3057 }
378cc40b 3058 }
a687059c 3059 else {
378cc40b 3060 *s = '\0';
3280af22 3061 PL_bufend = s;
a687059c 3062 }
378cc40b
LW
3063 goto retry;
3064 case '-':
79072805 3065 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
3066 I32 ftst = 0;
3067
378cc40b 3068 s++;
3280af22 3069 PL_bufptr = s;
748a9306
LW
3070 tmp = *s++;
3071
bf4acbe4 3072 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3073 s++;
3074
3075 if (strnEQ(s,"=>",2)) {
3280af22 3076 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3077 DEBUG_T( { S_printbuf(aTHX_
3078 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3079 } );
748a9306
LW
3080 OPERATOR('-'); /* unary minus */
3081 }
3280af22 3082 PL_last_uni = PL_oldbufptr;
748a9306 3083 switch (tmp) {
e5edeb50
JH
3084 case 'r': ftst = OP_FTEREAD; break;
3085 case 'w': ftst = OP_FTEWRITE; break;
3086 case 'x': ftst = OP_FTEEXEC; break;
3087 case 'o': ftst = OP_FTEOWNED; break;
3088 case 'R': ftst = OP_FTRREAD; break;
3089 case 'W': ftst = OP_FTRWRITE; break;
3090 case 'X': ftst = OP_FTREXEC; break;
3091 case 'O': ftst = OP_FTROWNED; break;
3092 case 'e': ftst = OP_FTIS; break;
3093 case 'z': ftst = OP_FTZERO; break;
3094 case 's': ftst = OP_FTSIZE; break;
3095 case 'f': ftst = OP_FTFILE; break;
3096 case 'd': ftst = OP_FTDIR; break;
3097 case 'l': ftst = OP_FTLINK; break;
3098 case 'p': ftst = OP_FTPIPE; break;
3099 case 'S': ftst = OP_FTSOCK; break;
3100 case 'u': ftst = OP_FTSUID; break;
3101 case 'g': ftst = OP_FTSGID; break;
3102 case 'k': ftst = OP_FTSVTX; break;
3103 case 'b': ftst = OP_FTBLK; break;
3104 case 'c': ftst = OP_FTCHR; break;
3105 case 't': ftst = OP_FTTTY; break;
3106 case 'T': ftst = OP_FTTEXT; break;
3107 case 'B': ftst = OP_FTBINARY; break;
3108 case 'M': case 'A': case 'C':
3109 gv_fetchpv("\024",TRUE, SVt_PV);
3110 switch (tmp) {
3111 case 'M': ftst = OP_FTMTIME; break;
3112 case 'A': ftst = OP_FTATIME; break;
3113 case 'C': ftst = OP_FTCTIME; break;
3114 default: break;
3115 }
3116 break;
378cc40b 3117 default:
378cc40b
LW
3118 break;
3119 }
e5edeb50 3120 if (ftst) {
eb160463 3121 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3122 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3123 "### Saw file test %c\n", (int)tmp);
5f80b19c 3124 } );
e5edeb50
JH
3125 FTST(ftst);
3126 }
3127 else {
3128 /* Assume it was a minus followed by a one-letter named
3129 * subroutine call (or a -bareword), then. */
95c31fe3 3130 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3131 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3132 (int) tmp);
5f80b19c 3133 } );
3cf7b4c4 3134 s = --PL_bufptr;
e5edeb50 3135 }
378cc40b 3136 }
a687059c
LW
3137 tmp = *s++;
3138 if (*s == tmp) {
3139 s++;
3280af22 3140 if (PL_expect == XOPERATOR)
79072805
LW
3141 TERM(POSTDEC);
3142 else
3143 OPERATOR(PREDEC);
3144 }
3145 else if (*s == '>') {
3146 s++;
3147 s = skipspace(s);
7e2040f0 3148 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3149 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3150 TOKEN(ARROW);
79072805 3151 }
748a9306
LW
3152 else if (*s == '$')
3153 OPERATOR(ARROW);
463ee0b2 3154 else
748a9306 3155 TERM(ARROW);
a687059c 3156 }
3280af22 3157 if (PL_expect == XOPERATOR)
79072805
LW
3158 Aop(OP_SUBTRACT);
3159 else {
3280af22 3160 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3161 check_uni();
79072805 3162 OPERATOR('-'); /* unary minus */
2f3197b3 3163 }
79072805 3164
378cc40b 3165 case '+':
a687059c
LW
3166 tmp = *s++;
3167 if (*s == tmp) {
378cc40b 3168 s++;
3280af22 3169 if (PL_expect == XOPERATOR)
79072805
LW
3170 TERM(POSTINC);
3171 else
3172 OPERATOR(PREINC);
378cc40b 3173 }
3280af22 3174 if (PL_expect == XOPERATOR)
79072805
LW
3175 Aop(OP_ADD);
3176 else {
3280af22 3177 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3178 check_uni();
a687059c 3179 OPERATOR('+');
2f3197b3 3180 }
a687059c 3181
378cc40b 3182 case '*':
3280af22
NIS
3183 if (PL_expect != XOPERATOR) {
3184 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3185 PL_expect = XOPERATOR;
3186 force_ident(PL_tokenbuf, '*');
3187 if (!*PL_tokenbuf)
a0d0e21e 3188 PREREF('*');
79072805 3189 TERM('*');
a687059c 3190 }
79072805
LW
3191 s++;
3192 if (*s == '*') {
a687059c 3193 s++;
79072805 3194 PWop(OP_POW);
a687059c 3195 }
79072805
LW
3196 Mop(OP_MULTIPLY);
3197
378cc40b 3198 case '%':
3280af22 3199 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3200 ++s;
3201 Mop(OP_MODULO);
a687059c 3202 }
3280af22
NIS
3203 PL_tokenbuf[0] = '%';
3204 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3205 if (!PL_tokenbuf[1]) {
bbce6d69 3206 PREREF('%');
a687059c 3207 }
3280af22 3208 PL_pending_ident = '%';
bbce6d69 3209 TERM('%');
a687059c 3210
378cc40b 3211 case '^':
79072805 3212 s++;
a0d0e21e 3213 BOop(OP_BIT_XOR);
79072805 3214 case '[':
3280af22 3215 PL_lex_brackets++;
79072805 3216 /* FALL THROUGH */
378cc40b 3217 case '~':
0d863452
RH
3218 if (s[1] == '~'
3219 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3220 && FEATURE_IS_ENABLED("~~", 2))
3221 {
3222 s += 2;
3223 Eop(OP_SMARTMATCH);
3224 }
378cc40b 3225 case ',':
378cc40b
LW
3226 tmp = *s++;
3227 OPERATOR(tmp);
a0d0e21e
LW
3228 case ':':
3229 if (s[1] == ':') {
3230 len = 0;
3231 goto just_a_word;
3232 }
3233 s++;
09bef843
SB
3234 switch (PL_expect) {
3235 OP *attrs;
3236 case XOPERATOR:
3237 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3238 break;
3239 PL_bufptr = s; /* update in case we back off */
3240 goto grabattrs;
3241 case XATTRBLOCK:
3242 PL_expect = XBLOCK;
3243 goto grabattrs;
3244 case XATTRTERM:
3245 PL_expect = XTERMBLOCK;
3246 grabattrs:
3247 s = skipspace(s);
3248 attrs = Nullop;
7e2040f0 3249 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3250 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3251 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3252 if (tmp < 0) tmp = -tmp;
3253 switch (tmp) {
3254 case KEY_or:
3255 case KEY_and:
c963b151 3256 case KEY_err:
f9829d6b
GS
3257 case KEY_for:
3258 case KEY_unless:
3259 case KEY_if:
3260 case KEY_while:
3261 case KEY_until:
3262 goto got_attrs;
3263 default:
3264 break;
3265 }
3266 }
09bef843
SB
3267 if (*d == '(') {
3268 d = scan_str(d,TRUE,TRUE);
3269 if (!d) {
09bef843
SB
3270 /* MUST advance bufptr here to avoid bogus
3271 "at end of line" context messages from yyerror().
3272 */
3273 PL_bufptr = s + len;
3274 yyerror("Unterminated attribute parameter in attribute list");
3275 if (attrs)
3276 op_free(attrs);
bbf60fe6 3277 return REPORT(0); /* EOF indicator */
09bef843
SB
3278 }
3279 }
3280 if (PL_lex_stuff) {
3281 SV *sv = newSVpvn(s, len);
3282 sv_catsv(sv, PL_lex_stuff);
3283 attrs = append_elem(OP_LIST, attrs,
3284 newSVOP(OP_CONST, 0, sv));
3285 SvREFCNT_dec(PL_lex_stuff);
3286 PL_lex_stuff = Nullsv;
3287 }
3288 else {
371fce9b
DM
3289 if (len == 6 && strnEQ(s, "unique", len)) {
3290 if (PL_in_my == KEY_our)
3291#ifdef USE_ITHREADS
3292 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3293#else
3294 ; /* skip to avoid loading attributes.pm */
3295#endif
bfed75c6 3296 else
371fce9b
DM
3297 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3298 }
3299
d3cea301
SB
3300 /* NOTE: any CV attrs applied here need to be part of
3301 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3302 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3303 CvLVALUE_on(PL_compcv);
3304 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3305 CvLOCKED_on(PL_compcv);
3306 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3307 CvMETHOD_on(PL_compcv);
06492da6
SF
3308 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3309 CvASSERTION_on(PL_compcv);
78f9721b
SM
3310 /* After we've set the flags, it could be argued that
3311 we don't need to do the attributes.pm-based setting
3312 process, and shouldn't bother appending recognized
d3cea301
SB
3313 flags. To experiment with that, uncomment the
3314 following "else". (Note that's already been
3315 uncommented. That keeps the above-applied built-in
3316 attributes from being intercepted (and possibly
3317 rejected) by a package's attribute routines, but is
3318 justified by the performance win for the common case
3319 of applying only built-in attributes.) */
0256094b 3320 else
78f9721b
SM
3321 attrs = append_elem(OP_LIST, attrs,
3322 newSVOP(OP_CONST, 0,
3323 newSVpvn(s, len)));
09bef843
SB
3324 }
3325 s = skipspace(d);
0120eecf 3326 if (*s == ':' && s[1] != ':')
09bef843 3327 s = skipspace(s+1);
0120eecf
GS
3328 else if (s == d)
3329 break; /* require real whitespace or :'s */
09bef843 3330 }
f9829d6b 3331 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3332 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
f54cb97a 3333 const char q = ((*s == '\'') ? '"' : '\'');
09bef843
SB
3334 /* If here for an expression, and parsed no attrs, back off. */
3335 if (tmp == '=' && !attrs) {
3336 s = PL_bufptr;
3337 break;
3338 }
3339 /* MUST advance bufptr here to avoid bogus "at end of line"
3340 context messages from yyerror().
3341 */
3342 PL_bufptr = s;
0bd48802
AL
3343 yyerror( *s
3344 ? Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list", q, *s, q)
3345 : "Unterminated attribute list" );
09bef843
SB
3346 if (attrs)
3347 op_free(attrs);
3348 OPERATOR(':');
3349 }
f9829d6b 3350 got_attrs:
09bef843
SB
3351 if (attrs) {
3352 PL_nextval[PL_nexttoke].opval = attrs;
3353 force_next(THING);
3354 }
3355 TOKEN(COLONATTR);
3356 }
a0d0e21e 3357 OPERATOR(':');
8990e307
LW
3358 case '(':
3359 s++;
3280af22
NIS
3360 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3361 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3362 else
3280af22 3363 PL_expect = XTERM;
4a202259 3364 s = skipspace(s);
a0d0e21e 3365 TOKEN('(');
378cc40b 3366 case ';':
f4dd75d9 3367 CLINE;
378cc40b
LW
3368 tmp = *s++;
3369 OPERATOR(tmp);
3370 case ')':
378cc40b 3371 tmp = *s++;
16d20bd9
AD
3372 s = skipspace(s);
3373 if (*s == '{')
3374 PREBLOCK(tmp);
378cc40b 3375 TERM(tmp);
79072805
LW
3376 case ']':
3377 s++;
3280af22 3378 if (PL_lex_brackets <= 0)
d98d5fff 3379 yyerror("Unmatched right square bracket");
463ee0b2 3380 else
3280af22
NIS
3381 --PL_lex_brackets;
3382 if (PL_lex_state == LEX_INTERPNORMAL) {
3383 if (PL_lex_brackets == 0) {
a0d0e21e 3384 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3385 PL_lex_state = LEX_INTERPEND;
79072805
LW
3386 }
3387 }
4633a7c4 3388 TERM(']');
79072805
LW
3389 case '{':
3390 leftbracket:
79072805 3391 s++;
3280af22 3392 if (PL_lex_brackets > 100) {
8edd5f42 3393 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3394 }
3280af22 3395 switch (PL_expect) {
a0d0e21e 3396 case XTERM:
3280af22 3397 if (PL_lex_formbrack) {
a0d0e21e
LW
3398 s--;
3399 PRETERMBLOCK(DO);
3400 }
3280af22
NIS
3401 if (PL_oldoldbufptr == PL_last_lop)
3402 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3403 else
3280af22 3404 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3405 OPERATOR(HASHBRACK);
a0d0e21e 3406 case XOPERATOR:
bf4acbe4 3407 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3408 s++;
44a8e56a 3409 d = s;
3280af22
NIS
3410 PL_tokenbuf[0] = '\0';
3411 if (d < PL_bufend && *d == '-') {
3412 PL_tokenbuf[0] = '-';
44a8e56a 3413 d++;
bf4acbe4 3414 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3415 d++;
3416 }
7e2040f0 3417 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3418 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3419 FALSE, &len);
bf4acbe4 3420 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3421 d++;
3422 if (*d == '}') {
f54cb97a 3423 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3424 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3425 if (minus)
3426 force_next('-');
748a9306
LW
3427 }
3428 }
3429 /* FALL THROUGH */
09bef843 3430 case XATTRBLOCK:
748a9306 3431 case XBLOCK:
3280af22
NIS
3432 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3433 PL_expect = XSTATE;
a0d0e21e 3434 break;
09bef843 3435 case XATTRTERM:
a0d0e21e 3436 case XTERMBLOCK:
3280af22
NIS
3437 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3438 PL_expect = XSTATE;
a0d0e21e
LW
3439 break;
3440 default: {
f54cb97a 3441 const char *t;
3280af22
NIS
3442 if (PL_oldoldbufptr == PL_last_lop)
3443 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3444 else
3280af22 3445 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3446 s = skipspace(s);
8452ff4b
SB
3447 if (*s == '}') {
3448 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3449 PL_expect = XTERM;
3450 /* This hack is to get the ${} in the message. */
3451 PL_bufptr = s+1;
3452 yyerror("syntax error");
3453 break;
3454 }
a0d0e21e 3455 OPERATOR(HASHBRACK);
8452ff4b 3456 }
b8a4b1be
GS
3457 /* This hack serves to disambiguate a pair of curlies
3458 * as being a block or an anon hash. Normally, expectation
3459 * determines that, but in cases where we're not in a
3460 * position to expect anything in particular (like inside
3461 * eval"") we have to resolve the ambiguity. This code
3462 * covers the case where the first term in the curlies is a
3463 * quoted string. Most other cases need to be explicitly
a0288114 3464 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3465 * curly in order to force resolution as an anon hash.
3466 *
3467 * XXX should probably propagate the outer expectation
3468 * into eval"" to rely less on this hack, but that could
3469 * potentially break current behavior of eval"".
3470 * GSAR 97-07-21
3471 */
3472 t = s;
3473 if (*s == '\'' || *s == '"' || *s == '`') {
3474 /* common case: get past first string, handling escapes */
3280af22 3475 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3476 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3477 t++;
3478 t++;
a0d0e21e 3479 }
b8a4b1be 3480 else if (*s == 'q') {
3280af22 3481 if (++t < PL_bufend
b8a4b1be 3482 && (!isALNUM(*t)
3280af22 3483 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3484 && !isALNUM(*t))))
3485 {
abc667d1 3486 /* skip q//-like construct */
f54cb97a 3487 const char *tmps;
b8a4b1be
GS
3488 char open, close, term;
3489 I32 brackets = 1;
3490
3280af22 3491 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3492 t++;
abc667d1
DM
3493 /* check for q => */
3494 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3495 OPERATOR(HASHBRACK);
3496 }
b8a4b1be
GS
3497 term = *t;
3498 open = term;
3499 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3500 term = tmps[5];
3501 close = term;
3502 if (open == close)
3280af22
NIS
3503 for (t++; t < PL_bufend; t++) {
3504 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3505 t++;
6d07e5e9 3506 else if (*t == open)
b8a4b1be
GS
3507 break;
3508 }
abc667d1 3509 else {
3280af22
NIS
3510 for (t++; t < PL_bufend; t++) {
3511 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3512 t++;
6d07e5e9 3513 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3514 break;
3515 else if (*t == open)
3516 brackets++;
3517 }
abc667d1
DM
3518 }
3519 t++;
b8a4b1be 3520 }
abc667d1
DM
3521 else
3522 /* skip plain q word */
3523 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3524 t += UTF8SKIP(t);
a0d0e21e 3525 }
7e2040f0 3526 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3527 t += UTF8SKIP(t);
7e2040f0 3528 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3529 t += UTF8SKIP(t);