This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Eliminate the use of PL_curforce outside of toke.c
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
52
53#ifdef PERL_MAD
54# define PL_endwhite (PL_parser->endwhite)
55# define PL_faketokens (PL_parser->faketokens)
56# define PL_lasttoke (PL_parser->lasttoke)
57# define PL_nextwhite (PL_parser->nextwhite)
58# define PL_realtokenstart (PL_parser->realtokenstart)
59# define PL_skipwhite (PL_parser->skipwhite)
60# define PL_thisclose (PL_parser->thisclose)
61# define PL_thismad (PL_parser->thismad)
62# define PL_thisopen (PL_parser->thisopen)
63# define PL_thisstuff (PL_parser->thisstuff)
64# define PL_thistoken (PL_parser->thistoken)
65# define PL_thiswhite (PL_parser->thiswhite)
66#endif
67
3cbf51f5
DM
68static int
69S_pending_ident(pTHX);
199e78b7 70
0bd48802 71static const char ident_too_long[] = "Identifier too long";
c445ea15 72static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 73
acfe0abc 74static void restore_rsfp(pTHX_ void *f);
6e3aabd6 75#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
76static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 78#endif
51371543 79
29595ff2 80#ifdef PERL_MAD
29595ff2 81# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 82# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 83#else
5db06880 84# define CURMAD(slot,sv)
9ded7720 85# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
86#endif
87
9059aa12
LW
88#define XFAKEBRACK 128
89#define XENUMMASK 127
90
39e02b42
JH
91#ifdef USE_UTF8_SCRIPTS
92# define UTF (!IN_BYTES)
2b9d42f0 93#else
746b446a 94# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 95#endif
a0ed51b3 96
61f0cdd9 97/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
98 * 1999-02-27 mjd-perl-patch@plover.com */
99#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
100
bf4acbe4
GS
101/* On MacOS, respect nonbreaking spaces */
102#ifdef MACOS_TRADITIONAL
103#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
104#else
105#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
106#endif
107
ffb4593c
NT
108/* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
79072805
LW
110 * can get by with a single comparison (if the compiler is smart enough).
111 */
112
fb73857a
PP
113/* #define LEX_NOTPARSING 11 is done in perl.h. */
114
b6007c36
DM
115#define LEX_NORMAL 10 /* normal code (ie not within "...") */
116#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119#define LEX_INTERPSTART 6 /* expecting the start of a $var */
120
121 /* at end of code, eg "$x" followed by: */
122#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
124
125#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127#define LEX_INTERPCONST 2 /* NOT USED */
128#define LEX_FORMLINE 1 /* expecting a format line */
129#define LEX_KNOWNEXT 0 /* next token known; just return it */
130
79072805 131
bbf60fe6 132#ifdef DEBUGGING
27da23d5 133static const char* const lex_state_names[] = {
bbf60fe6
DM
134 "KNOWNEXT",
135 "FORMLINE",
136 "INTERPCONST",
137 "INTERPCONCAT",
138 "INTERPENDMAYBE",
139 "INTERPEND",
140 "INTERPSTART",
141 "INTERPPUSH",
142 "INTERPCASEMOD",
143 "INTERPNORMAL",
144 "NORMAL"
145};
146#endif
147
79072805
LW
148#ifdef ff_next
149#undef ff_next
d48672a2
LW
150#endif
151
79072805 152#include "keywords.h"
fe14fcc3 153
ffb4593c
NT
154/* CLINE is a macro that ensures PL_copline has a sane value */
155
ae986130
LW
156#ifdef CLINE
157#undef CLINE
158#endif
57843af0 159#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 160
5db06880 161#ifdef PERL_MAD
29595ff2
NC
162# define SKIPSPACE0(s) skipspace0(s)
163# define SKIPSPACE1(s) skipspace1(s)
164# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165# define PEEKSPACE(s) skipspace2(s,0)
166#else
167# define SKIPSPACE0(s) skipspace(s)
168# define SKIPSPACE1(s) skipspace(s)
169# define SKIPSPACE2(s,tsv) skipspace(s)
170# define PEEKSPACE(s) skipspace(s)
171#endif
172
ffb4593c
NT
173/*
174 * Convenience functions to return different tokens and prime the
9cbb5ea2 175 * lexer for the next token. They all take an argument.
ffb4593c
NT
176 *
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
2d2e263d 187 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
188 * BOop : bitwise or or xor
189 * BAop : bitwise and
190 * SHop : shift operator
191 * PWop : power operator
9cbb5ea2 192 * PMop : pattern-matching operator
ffb4593c
NT
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
e5edeb50 196 * Rop : relational operator <= != gt
ffb4593c
NT
197 *
198 * Also see LOP and lop() below.
199 */
200
998054bd 201#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 202# define REPORT(retval) tokereport((I32)retval)
998054bd 203#else
bbf60fe6 204# define REPORT(retval) (retval)
998054bd
SC
205#endif
206
bbf60fe6
DM
207#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 227
a687059c
LW
228/* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
a687059c 232 */
376fcdbf
AL
233#define UNI2(f,x) { \
234 yylval.ival = f; \
235 PL_expect = x; \
236 PL_bufptr = s; \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
239 if (*s == '(') \
240 return REPORT( (int)FUNC1 ); \
29595ff2 241 s = PEEKSPACE(s); \
376fcdbf
AL
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
243 }
6f33ba73
RGS
244#define UNI(f) UNI2(f,XTERM)
245#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 246
376fcdbf
AL
247#define UNIBRACK(f) { \
248 yylval.ival = f; \
249 PL_bufptr = s; \
250 PL_last_uni = PL_oldbufptr; \
251 if (*s == '(') \
252 return REPORT( (int)FUNC1 ); \
29595ff2 253 s = PEEKSPACE(s); \
376fcdbf
AL
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
255 }
79072805 256
9f68db38 257/* grandfather return to old style */
3280af22 258#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 259
8fa7f367
JH
260#ifdef DEBUGGING
261
bbf60fe6
DM
262/* how to interpret the yylval associated with the token */
263enum token_type {
264 TOKENTYPE_NONE,
265 TOKENTYPE_IVAL,
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
267 TOKENTYPE_PVAL,
268 TOKENTYPE_OPVAL,
269 TOKENTYPE_GVVAL
270};
271
6d4a66ac
NC
272static struct debug_tokens {
273 const int token;
274 enum token_type type;
275 const char *name;
276} const debug_tokens[] =
9041c2e3 277{
bbf60fe6
DM
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 343 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
344};
345
346/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 347
bbf60fe6 348STATIC int
f5bd084c 349S_tokereport(pTHX_ I32 rv)
bbf60fe6 350{
97aff369 351 dVAR;
bbf60fe6 352 if (DEBUG_T_TEST) {
bd61b366 353 const char *name = NULL;
bbf60fe6 354 enum token_type type = TOKENTYPE_NONE;
f54cb97a 355 const struct debug_tokens *p;
396482e1 356 SV* const report = newSVpvs("<== ");
bbf60fe6 357
f54cb97a 358 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
359 if (p->token == (int)rv) {
360 name = p->name;
361 type = p->type;
362 break;
363 }
364 }
365 if (name)
54667de8 366 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
369 else if (!rv)
396482e1 370 sv_catpvs(report, "EOF");
bbf60fe6
DM
371 else
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
373 switch (type) {
374 case TOKENTYPE_NONE:
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
376 break;
377 case TOKENTYPE_IVAL:
e4584336 378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
379 break;
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
383 break;
384 case TOKENTYPE_PVAL:
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
386 break;
387 case TOKENTYPE_OPVAL:
b6007c36 388 if (yylval.opval) {
401441c0 389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 390 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
394 }
395
396 }
401441c0 397 else
396482e1 398 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
399 break;
400 }
b6007c36 401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
402 };
403 return (int)rv;
998054bd
SC
404}
405
b6007c36
DM
406
407/* print the buffer with suitable escapes */
408
409STATIC void
410S_printbuf(pTHX_ const char* fmt, const char* s)
411{
396482e1 412 SV* const tmp = newSVpvs("");
b6007c36
DM
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
414 SvREFCNT_dec(tmp);
415}
416
8fa7f367
JH
417#endif
418
ffb4593c
NT
419/*
420 * S_ao
421 *
c963b151
BD
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
424 */
425
76e3520e 426STATIC int
cea2e8a9 427S_ao(pTHX_ int toketype)
a0d0e21e 428{
97aff369 429 dVAR;
3280af22
NIS
430 if (*PL_bufptr == '=') {
431 PL_bufptr++;
a0d0e21e
LW
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
c963b151
BD
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
438 toketype = ASSIGNOP;
439 }
440 return toketype;
441}
442
ffb4593c
NT
443/*
444 * S_no_op
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
454 */
455
76e3520e 456STATIC void
bfed75c6 457S_no_op(pTHX_ const char *what, char *s)
463ee0b2 458{
97aff369 459 dVAR;
9d4ba2ae
AL
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 462
1189a94a
GS
463 if (!s)
464 s = oldbp;
07c798fb 465 else
1189a94a 466 PL_bufptr = s;
cea2e8a9 467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
468 if (ckWARN_d(WARN_SYNTAX)) {
469 if (is_first)
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 473 const char *t;
c35e046a
AL
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
475 NOOP;
56da5a46
RGS
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
551405c4 479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
480 }
481 else {
482 assert(s >= oldbp);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 485 }
07c798fb 486 }
3280af22 487 PL_bufptr = oldbp;
8990e307
LW
488}
489
ffb4593c
NT
490/*
491 * S_missingterm
492 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 493 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
496 * This is fatal.
497 */
498
76e3520e 499STATIC void
cea2e8a9 500S_missingterm(pTHX_ char *s)
8990e307 501{
97aff369 502 dVAR;
8990e307
LW
503 char tmpbuf[3];
504 char q;
505 if (s) {
9d4ba2ae 506 char * const nl = strrchr(s,'\n');
d2719217 507 if (nl)
8990e307
LW
508 *nl = '\0';
509 }
9d116dd7
JH
510 else if (
511#ifdef EBCDIC
512 iscntrl(PL_multi_close)
513#else
514 PL_multi_close < 32 || PL_multi_close == 127
515#endif
516 ) {
8990e307 517 *tmpbuf = '^';
585ec06d 518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
519 tmpbuf[2] = '\0';
520 s = tmpbuf;
521 }
522 else {
eb160463 523 *tmpbuf = (char)PL_multi_close;
8990e307
LW
524 tmpbuf[1] = '\0';
525 s = tmpbuf;
526 }
527 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 529}
79072805 530
ef89dcc3 531#define FEATURE_IS_ENABLED(name) \
0d863452 532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
534/*
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
537 */
538STATIC bool
d4c19fe8 539S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 540{
97aff369 541 dVAR;
0d863452 542 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 543 char he_name[32] = "feature_";
6fca0082 544 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 545
7b9ef140 546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
547}
548
ffb4593c
NT
549/*
550 * Perl_deprecate
ffb4593c
NT
551 */
552
79072805 553void
bfed75c6 554Perl_deprecate(pTHX_ const char *s)
a0d0e21e 555{
599cee73 556 if (ckWARN(WARN_DEPRECATED))
9014280d 557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
558}
559
12bcd1a6 560void
bfed75c6 561Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
562{
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
565 /* */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
570
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
573 "Use of %s is deprecated", s);
574}
575
ffb4593c 576/*
9cbb5ea2
GS
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
ffb4593c
NT
579 */
580
c39cd008
GS
581#ifdef PERL_CR_FILTER
582static void
583strip_return(SV *sv)
584{
95a20fc0 585 register const char *s = SvPVX_const(sv);
9d4ba2ae 586 register const char * const e = s + SvCUR(sv);
c39cd008
GS
587 /* outer loop optimized to do nothing if there are no CR-LFs */
588 while (s < e) {
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
592 *d++ = *s++;
593 while (s < e) {
594 if (*s == '\r' && s[1] == '\n')
595 s++;
596 *d++ = *s++;
597 }
598 SvCUR(sv) -= s - d;
599 return;
600 }
601 }
602}
a868473f 603
76e3520e 604STATIC I32
c39cd008 605S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 606{
f54cb97a 607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
608 if (count > 0 && !maxlen)
609 strip_return(sv);
610 return count;
a868473f
NIS
611}
612#endif
613
199e78b7
DM
614
615
ffb4593c
NT
616/*
617 * Perl_lex_start
9cbb5ea2
GS
618 * Initialize variables. Uses the Perl save_stack to save its state (for
619 * recursive calls to the parser).
ffb4593c
NT
620 */
621
a0d0e21e 622void
864dbfa3 623Perl_lex_start(pTHX_ SV *line)
79072805 624{
97aff369 625 dVAR;
6ef55633 626 const char *s = NULL;
8990e307 627 STRLEN len;
acdf0a21
DM
628 yy_parser *parser;
629
630 /* create and initialise a parser */
631
199e78b7 632 Newxz(parser, 1, yy_parser);
acdf0a21
DM
633 parser->old_parser = PL_parser;
634 PL_parser = parser;
635
636 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
637 parser->ps = parser->stack;
638 parser->stack_size = YYINITDEPTH;
639
640 parser->stack->state = 0;
641 parser->yyerrstatus = 0;
642 parser->yychar = YYEMPTY; /* Cause a token to be read. */
643
644 /* initialise lexer state */
8990e307 645
3280af22 646 SAVEI32(PL_lex_state);
5db06880
NC
647#ifdef PERL_MAD
648 if (PL_lex_state == LEX_KNOWNEXT) {
199e78b7 649 I32 toke = parser->old_parser->lasttoke;
5db06880
NC
650 while (--toke >= 0) {
651 SAVEI32(PL_nexttoke[toke].next_type);
652 SAVEVPTR(PL_nexttoke[toke].next_val);
653 if (PL_madskills)
654 SAVEVPTR(PL_nexttoke[toke].next_mad);
655 }
5db06880 656 }
cd81e915 657 SAVEI32(PL_curforce);
90e3715f 658 PL_curforce = -1;
5db06880 659#else
18b09519
GS
660 if (PL_lex_state == LEX_KNOWNEXT) {
661 I32 toke = PL_nexttoke;
662 while (--toke >= 0) {
663 SAVEI32(PL_nexttype[toke]);
664 SAVEVPTR(PL_nextval[toke]);
665 }
666 SAVEI32(PL_nexttoke);
18b09519 667 }
5db06880 668#endif
57843af0 669 SAVECOPLINE(PL_curcop);
3280af22
NIS
670 SAVEPPTR(PL_bufptr);
671 SAVEPPTR(PL_bufend);
672 SAVEPPTR(PL_oldbufptr);
673 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
674 SAVEPPTR(PL_last_lop);
675 SAVEPPTR(PL_last_uni);
3280af22
NIS
676 SAVEPPTR(PL_linestart);
677 SAVESPTR(PL_linestr);
c76ac1ee 678 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
bebdddfc 679 SAVEINT(PL_expect);
3280af22 680
ebca63ee 681 PL_copline = NOLINE;
3280af22 682 PL_lex_state = LEX_NORMAL;
3280af22 683 PL_expect = XSTATE;
199e78b7
DM
684 Newx(parser->lex_brackstack, 120, char);
685 Newx(parser->lex_casestack, 12, char);
686 *parser->lex_casestack = '\0';
687#ifndef PERL_MAD
76be56bc 688 PL_nexttoke = 0;
5db06880 689#endif
02b34bbe 690
10efb74f
NC
691 if (line) {
692 s = SvPV_const(line, len);
693 } else {
694 len = 0;
695 }
696 if (!len) {
697 PL_linestr = newSVpvs("\n;");
698 } else if (SvREADONLY(line) || s[len-1] != ';') {
699 PL_linestr = newSVsv(line);
700 if (s[len-1] != ';')
0eb20fa2 701 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
702 } else {
703 SvTEMP_off(line);
704 SvREFCNT_inc_simple_void_NN(line);
705 PL_linestr = line;
8990e307 706 }
db4997f0
NC
707 /* PL_linestr needs to survive until end of scope, not just the next
708 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 709 SAVEFREESV(PL_linestr);
3280af22
NIS
710 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
711 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 712 PL_last_lop = PL_last_uni = NULL;
3280af22 713 PL_rsfp = 0;
79072805 714}
a687059c 715
ffb4593c
NT
716/*
717 * Perl_lex_end
9cbb5ea2
GS
718 * Finalizer for lexing operations. Must be called when the parser is
719 * done with the lexer.
ffb4593c
NT
720 */
721
463ee0b2 722void
864dbfa3 723Perl_lex_end(pTHX)
463ee0b2 724{
97aff369 725 dVAR;
3280af22 726 PL_doextract = FALSE;
463ee0b2
LW
727}
728
ffb4593c
NT
729/*
730 * S_incline
731 * This subroutine has nothing to do with tilting, whether at windmills
732 * or pinball tables. Its name is short for "increment line". It
57843af0 733 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 734 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
735 * # line 500 "foo.pm"
736 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
737 */
738
76e3520e 739STATIC void
d9095cec 740S_incline(pTHX_ const char *s)
463ee0b2 741{
97aff369 742 dVAR;
d9095cec
NC
743 const char *t;
744 const char *n;
745 const char *e;
463ee0b2 746
57843af0 747 CopLINE_inc(PL_curcop);
463ee0b2
LW
748 if (*s++ != '#')
749 return;
d4c19fe8
AL
750 while (SPACE_OR_TAB(*s))
751 s++;
73659bf1
GS
752 if (strnEQ(s, "line", 4))
753 s += 4;
754 else
755 return;
084592ab 756 if (SPACE_OR_TAB(*s))
73659bf1 757 s++;
4e553d73 758 else
73659bf1 759 return;
d4c19fe8
AL
760 while (SPACE_OR_TAB(*s))
761 s++;
463ee0b2
LW
762 if (!isDIGIT(*s))
763 return;
d4c19fe8 764
463ee0b2
LW
765 n = s;
766 while (isDIGIT(*s))
767 s++;
bf4acbe4 768 while (SPACE_OR_TAB(*s))
463ee0b2 769 s++;
73659bf1 770 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 771 s++;
73659bf1
GS
772 e = t + 1;
773 }
463ee0b2 774 else {
c35e046a
AL
775 t = s;
776 while (!isSPACE(*t))
777 t++;
73659bf1 778 e = t;
463ee0b2 779 }
bf4acbe4 780 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
781 e++;
782 if (*e != '\n' && *e != '\0')
783 return; /* false alarm */
784
f4dd75d9 785 if (t - s > 0) {
d9095cec 786 const STRLEN len = t - s;
8a5ee598 787#ifndef USE_ITHREADS
c4420975 788 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
789 STRLEN tmplen = cf ? strlen(cf) : 0;
790 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
791 /* must copy *{"::_<(eval N)[oldfilename:L]"}
792 * to *{"::_<newfilename"} */
44867030
NC
793 /* However, the long form of evals is only turned on by the
794 debugger - usually they're "(eval %lu)" */
795 char smallbuf[128];
796 char *tmpbuf;
797 GV **gvp;
d9095cec 798 STRLEN tmplen2 = len;
798b63bc 799 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
800 tmpbuf = smallbuf;
801 else
2ae0db35 802 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
803 tmpbuf[0] = '_';
804 tmpbuf[1] = '<';
2ae0db35 805 memcpy(tmpbuf + 2, cf, tmplen);
44867030 806 tmplen += 2;
8a5ee598
RGS
807 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
808 if (gvp) {
44867030
NC
809 char *tmpbuf2;
810 GV *gv2;
811
812 if (tmplen2 + 2 <= sizeof smallbuf)
813 tmpbuf2 = smallbuf;
814 else
815 Newx(tmpbuf2, tmplen2 + 2, char);
816
817 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
818 /* Either they malloc'd it, or we malloc'd it,
819 so no prefix is present in ours. */
820 tmpbuf2[0] = '_';
821 tmpbuf2[1] = '<';
822 }
823
824 memcpy(tmpbuf2 + 2, s, tmplen2);
825 tmplen2 += 2;
826
8a5ee598 827 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 828 if (!isGV(gv2)) {
8a5ee598 829 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
830 /* adjust ${"::_<newfilename"} to store the new file name */
831 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
832 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
833 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
834 }
44867030
NC
835
836 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 837 }
e66cf94c 838 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 839 }
8a5ee598 840#endif
05ec9bb3 841 CopFILE_free(PL_curcop);
d9095cec 842 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 843 }
57843af0 844 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
845}
846
29595ff2 847#ifdef PERL_MAD
cd81e915 848/* skip space before PL_thistoken */
29595ff2
NC
849
850STATIC char *
851S_skipspace0(pTHX_ register char *s)
852{
853 s = skipspace(s);
854 if (!PL_madskills)
855 return s;
cd81e915
NC
856 if (PL_skipwhite) {
857 if (!PL_thiswhite)
6b29d1f5 858 PL_thiswhite = newSVpvs("");
cd81e915
NC
859 sv_catsv(PL_thiswhite, PL_skipwhite);
860 sv_free(PL_skipwhite);
861 PL_skipwhite = 0;
862 }
863 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
864 return s;
865}
866
cd81e915 867/* skip space after PL_thistoken */
29595ff2
NC
868
869STATIC char *
870S_skipspace1(pTHX_ register char *s)
871{
d4c19fe8 872 const char *start = s;
29595ff2
NC
873 I32 startoff = start - SvPVX(PL_linestr);
874
875 s = skipspace(s);
876 if (!PL_madskills)
877 return s;
878 start = SvPVX(PL_linestr) + startoff;
cd81e915 879 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 880 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
881 PL_thistoken = newSVpvn(tstart, start - tstart);
882 }
883 PL_realtokenstart = -1;
884 if (PL_skipwhite) {
885 if (!PL_nextwhite)
6b29d1f5 886 PL_nextwhite = newSVpvs("");
cd81e915
NC
887 sv_catsv(PL_nextwhite, PL_skipwhite);
888 sv_free(PL_skipwhite);
889 PL_skipwhite = 0;
29595ff2
NC
890 }
891 return s;
892}
893
894STATIC char *
895S_skipspace2(pTHX_ register char *s, SV **svp)
896{
c35e046a
AL
897 char *start;
898 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
899 const I32 startoff = s - SvPVX(PL_linestr);
900
29595ff2
NC
901 s = skipspace(s);
902 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
903 if (!PL_madskills || !svp)
904 return s;
905 start = SvPVX(PL_linestr) + startoff;
cd81e915 906 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 907 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
908 PL_thistoken = newSVpvn(tstart, start - tstart);
909 PL_realtokenstart = -1;
29595ff2 910 }
cd81e915 911 if (PL_skipwhite) {
29595ff2 912 if (!*svp)
6b29d1f5 913 *svp = newSVpvs("");
cd81e915
NC
914 sv_setsv(*svp, PL_skipwhite);
915 sv_free(PL_skipwhite);
916 PL_skipwhite = 0;
29595ff2
NC
917 }
918
919 return s;
920}
921#endif
922
80a702cd 923STATIC void
5fa550fb 924S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
925{
926 AV *av = CopFILEAVx(PL_curcop);
927 if (av) {
b9f83d2f 928 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
929 if (orig_sv)
930 sv_setsv(sv, orig_sv);
931 else
932 sv_setpvn(sv, buf, len);
80a702cd
RGS
933 (void)SvIOK_on(sv);
934 SvIV_set(sv, 0);
935 av_store(av, (I32)CopLINE(PL_curcop), sv);
936 }
937}
938
ffb4593c
NT
939/*
940 * S_skipspace
941 * Called to gobble the appropriate amount and type of whitespace.
942 * Skips comments as well.
943 */
944
76e3520e 945STATIC char *
cea2e8a9 946S_skipspace(pTHX_ register char *s)
a687059c 947{
97aff369 948 dVAR;
5db06880
NC
949#ifdef PERL_MAD
950 int curoff;
951 int startoff = s - SvPVX(PL_linestr);
952
cd81e915
NC
953 if (PL_skipwhite) {
954 sv_free(PL_skipwhite);
955 PL_skipwhite = 0;
5db06880
NC
956 }
957#endif
958
3280af22 959 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 960 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 961 s++;
5db06880
NC
962#ifdef PERL_MAD
963 goto done;
964#else
463ee0b2 965 return s;
5db06880 966#endif
463ee0b2
LW
967 }
968 for (;;) {
fd049845 969 STRLEN prevlen;
09bef843 970 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 971 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
972 while (s < PL_bufend && isSPACE(*s)) {
973 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
974 incline(s);
975 }
ffb4593c
NT
976
977 /* comment */
3280af22
NIS
978 if (s < PL_bufend && *s == '#') {
979 while (s < PL_bufend && *s != '\n')
463ee0b2 980 s++;
60e6418e 981 if (s < PL_bufend) {
463ee0b2 982 s++;
60e6418e
GS
983 if (PL_in_eval && !PL_rsfp) {
984 incline(s);
985 continue;
986 }
987 }
463ee0b2 988 }
ffb4593c
NT
989
990 /* only continue to recharge the buffer if we're at the end
991 * of the buffer, we're not reading from a source filter, and
992 * we're in normal lexing mode
993 */
09bef843
SB
994 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
995 PL_lex_state == LEX_FORMLINE)
5db06880
NC
996#ifdef PERL_MAD
997 goto done;
998#else
463ee0b2 999 return s;
5db06880 1000#endif
ffb4593c
NT
1001
1002 /* try to recharge the buffer */
5db06880
NC
1003#ifdef PERL_MAD
1004 curoff = s - SvPVX(PL_linestr);
1005#endif
1006
9cbb5ea2 1007 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1008 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1009 {
5db06880
NC
1010#ifdef PERL_MAD
1011 if (PL_madskills && curoff != startoff) {
cd81e915 1012 if (!PL_skipwhite)
6b29d1f5 1013 PL_skipwhite = newSVpvs("");
cd81e915 1014 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1015 curoff - startoff);
1016 }
1017
1018 /* mustn't throw out old stuff yet if madpropping */
1019 SvCUR(PL_linestr) = curoff;
1020 s = SvPVX(PL_linestr) + curoff;
1021 *s = 0;
1022 if (curoff && s[-1] == '\n')
1023 s[-1] = ' ';
1024#endif
1025
9cbb5ea2 1026 /* end of file. Add on the -p or -n magic */
cd81e915 1027 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1028 if (PL_minus_p) {
5db06880 1029#ifdef PERL_MAD
6502358f 1030 sv_catpvs(PL_linestr,
5db06880
NC
1031 ";}continue{print or die qq(-p destination: $!\\n);}");
1032#else
6502358f 1033 sv_setpvs(PL_linestr,
01a19ab0 1034 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1035#endif
3280af22 1036 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1037 }
01a19ab0 1038 else if (PL_minus_n) {
5db06880
NC
1039#ifdef PERL_MAD
1040 sv_catpvn(PL_linestr, ";}", 2);
1041#else
01a19ab0 1042 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1043#endif
01a19ab0
NC
1044 PL_minus_n = 0;
1045 }
a0d0e21e 1046 else
5db06880
NC
1047#ifdef PERL_MAD
1048 sv_catpvn(PL_linestr,";", 1);
1049#else
4147a61b 1050 sv_setpvn(PL_linestr,";", 1);
5db06880 1051#endif
ffb4593c
NT
1052
1053 /* reset variables for next time we lex */
9cbb5ea2 1054 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1055 = SvPVX(PL_linestr)
1056#ifdef PERL_MAD
1057 + curoff
1058#endif
1059 ;
3280af22 1060 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1061 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1062
1063 /* Close the filehandle. Could be from -P preprocessor,
1064 * STDIN, or a regular file. If we were reading code from
1065 * STDIN (because the commandline held no -e or filename)
1066 * then we don't close it, we reset it so the code can
1067 * read from STDIN too.
1068 */
1069
3280af22
NIS
1070 if (PL_preprocess && !PL_in_eval)
1071 (void)PerlProc_pclose(PL_rsfp);
1072 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1073 PerlIO_clearerr(PL_rsfp);
8990e307 1074 else
3280af22 1075 (void)PerlIO_close(PL_rsfp);
4608196e 1076 PL_rsfp = NULL;
463ee0b2
LW
1077 return s;
1078 }
ffb4593c
NT
1079
1080 /* not at end of file, so we only read another line */
09bef843
SB
1081 /* make corresponding updates to old pointers, for yyerror() */
1082 oldprevlen = PL_oldbufptr - PL_bufend;
1083 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1084 if (PL_last_uni)
1085 oldunilen = PL_last_uni - PL_bufend;
1086 if (PL_last_lop)
1087 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1088 PL_linestart = PL_bufptr = s + prevlen;
1089 PL_bufend = s + SvCUR(PL_linestr);
1090 s = PL_bufptr;
09bef843
SB
1091 PL_oldbufptr = s + oldprevlen;
1092 PL_oldoldbufptr = s + oldoldprevlen;
1093 if (PL_last_uni)
1094 PL_last_uni = s + oldunilen;
1095 if (PL_last_lop)
1096 PL_last_lop = s + oldloplen;
a0d0e21e 1097 incline(s);
ffb4593c
NT
1098
1099 /* debugger active and we're not compiling the debugger code,
1100 * so store the line into the debugger's array of lines
1101 */
80a702cd 1102 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1103 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1104 }
5db06880
NC
1105
1106#ifdef PERL_MAD
1107 done:
1108 if (PL_madskills) {
cd81e915 1109 if (!PL_skipwhite)
6b29d1f5 1110 PL_skipwhite = newSVpvs("");
5db06880
NC
1111 curoff = s - SvPVX(PL_linestr);
1112 if (curoff - startoff)
cd81e915 1113 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1114 curoff - startoff);
1115 }
1116 return s;
1117#endif
a687059c 1118}
378cc40b 1119
ffb4593c
NT
1120/*
1121 * S_check_uni
1122 * Check the unary operators to ensure there's no ambiguity in how they're
1123 * used. An ambiguous piece of code would be:
1124 * rand + 5
1125 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1126 * the +5 is its argument.
1127 */
1128
76e3520e 1129STATIC void
cea2e8a9 1130S_check_uni(pTHX)
ba106d47 1131{
97aff369 1132 dVAR;
d4c19fe8
AL
1133 const char *s;
1134 const char *t;
2f3197b3 1135
3280af22 1136 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1137 return;
3280af22
NIS
1138 while (isSPACE(*PL_last_uni))
1139 PL_last_uni++;
c35e046a
AL
1140 s = PL_last_uni;
1141 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1142 s++;
3280af22 1143 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1144 return;
6136c704 1145
0453d815 1146 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1147 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1148 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1149 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1150 }
2f3197b3
LW
1151}
1152
ffb4593c
NT
1153/*
1154 * LOP : macro to build a list operator. Its behaviour has been replaced
1155 * with a subroutine, S_lop() for which LOP is just another name.
1156 */
1157
a0d0e21e
LW
1158#define LOP(f,x) return lop(f,x,s)
1159
ffb4593c
NT
1160/*
1161 * S_lop
1162 * Build a list operator (or something that might be one). The rules:
1163 * - if we have a next token, then it's a list operator [why?]
1164 * - if the next thing is an opening paren, then it's a function
1165 * - else it's a list operator
1166 */
1167
76e3520e 1168STATIC I32
a0be28da 1169S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1170{
97aff369 1171 dVAR;
79072805 1172 yylval.ival = f;
35c8bce7 1173 CLINE;
3280af22
NIS
1174 PL_expect = x;
1175 PL_bufptr = s;
1176 PL_last_lop = PL_oldbufptr;
eb160463 1177 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1178#ifdef PERL_MAD
1179 if (PL_lasttoke)
1180 return REPORT(LSTOP);
1181#else
3280af22 1182 if (PL_nexttoke)
bbf60fe6 1183 return REPORT(LSTOP);
5db06880 1184#endif
79072805 1185 if (*s == '(')
bbf60fe6 1186 return REPORT(FUNC);
29595ff2 1187 s = PEEKSPACE(s);
79072805 1188 if (*s == '(')
bbf60fe6 1189 return REPORT(FUNC);
79072805 1190 else
bbf60fe6 1191 return REPORT(LSTOP);
79072805
LW
1192}
1193
5db06880
NC
1194#ifdef PERL_MAD
1195 /*
1196 * S_start_force
1197 * Sets up for an eventual force_next(). start_force(0) basically does
1198 * an unshift, while start_force(-1) does a push. yylex removes items
1199 * on the "pop" end.
1200 */
1201
1202STATIC void
1203S_start_force(pTHX_ int where)
1204{
1205 int i;
1206
cd81e915 1207 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1208 where = PL_lasttoke;
cd81e915
NC
1209 assert(PL_curforce < 0 || PL_curforce == where);
1210 if (PL_curforce != where) {
5db06880
NC
1211 for (i = PL_lasttoke; i > where; --i) {
1212 PL_nexttoke[i] = PL_nexttoke[i-1];
1213 }
1214 PL_lasttoke++;
1215 }
cd81e915 1216 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1217 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1218 PL_curforce = where;
1219 if (PL_nextwhite) {
5db06880 1220 if (PL_madskills)
6b29d1f5 1221 curmad('^', newSVpvs(""));
cd81e915 1222 CURMAD('_', PL_nextwhite);
5db06880
NC
1223 }
1224}
1225
1226STATIC void
1227S_curmad(pTHX_ char slot, SV *sv)
1228{
1229 MADPROP **where;
1230
1231 if (!sv)
1232 return;
cd81e915
NC
1233 if (PL_curforce < 0)
1234 where = &PL_thismad;
5db06880 1235 else
cd81e915 1236 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1237
cd81e915 1238 if (PL_faketokens)
5db06880
NC
1239 sv_setpvn(sv, "", 0);
1240 else {
1241 if (!IN_BYTES) {
1242 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1243 SvUTF8_on(sv);
1244 else if (PL_encoding) {
1245 sv_recode_to_utf8(sv, PL_encoding);
1246 }
1247 }
1248 }
1249
1250 /* keep a slot open for the head of the list? */
1251 if (slot != '_' && *where && (*where)->mad_key == '^') {
1252 (*where)->mad_key = slot;
1253 sv_free((*where)->mad_val);
1254 (*where)->mad_val = (void*)sv;
1255 }
1256 else
1257 addmad(newMADsv(slot, sv), where, 0);
1258}
1259#else
b3f24c00
MHM
1260# define start_force(where) NOOP
1261# define curmad(slot, sv) NOOP
5db06880
NC
1262#endif
1263
ffb4593c
NT
1264/*
1265 * S_force_next
9cbb5ea2 1266 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1267 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1268 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1269 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1270 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1271 */
1272
4e553d73 1273STATIC void
cea2e8a9 1274S_force_next(pTHX_ I32 type)
79072805 1275{
97aff369 1276 dVAR;
5db06880 1277#ifdef PERL_MAD
cd81e915 1278 if (PL_curforce < 0)
5db06880 1279 start_force(PL_lasttoke);
cd81e915 1280 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1281 if (PL_lex_state != LEX_KNOWNEXT)
1282 PL_lex_defer = PL_lex_state;
1283 PL_lex_state = LEX_KNOWNEXT;
1284 PL_lex_expect = PL_expect;
cd81e915 1285 PL_curforce = -1;
5db06880 1286#else
3280af22
NIS
1287 PL_nexttype[PL_nexttoke] = type;
1288 PL_nexttoke++;
1289 if (PL_lex_state != LEX_KNOWNEXT) {
1290 PL_lex_defer = PL_lex_state;
1291 PL_lex_expect = PL_expect;
1292 PL_lex_state = LEX_KNOWNEXT;
79072805 1293 }
5db06880 1294#endif
79072805
LW
1295}
1296
d0a148a6
NC
1297STATIC SV *
1298S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1299{
97aff369 1300 dVAR;
9d4ba2ae 1301 SV * const sv = newSVpvn(start,len);
bfed75c6 1302 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1303 SvUTF8_on(sv);
1304 return sv;
1305}
1306
ffb4593c
NT
1307/*
1308 * S_force_word
1309 * When the lexer knows the next thing is a word (for instance, it has
1310 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1311 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1312 * lookahead.
ffb4593c
NT
1313 *
1314 * Arguments:
b1b65b59 1315 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1316 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1317 * int check_keyword : if true, Perl checks to make sure the word isn't
1318 * a keyword (do this if the word is a label, e.g. goto FOO)
1319 * int allow_pack : if true, : characters will also be allowed (require,
1320 * use, etc. do this)
9cbb5ea2 1321 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1322 */
1323
76e3520e 1324STATIC char *
cea2e8a9 1325S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1326{
97aff369 1327 dVAR;
463ee0b2
LW
1328 register char *s;
1329 STRLEN len;
4e553d73 1330
29595ff2 1331 start = SKIPSPACE1(start);
463ee0b2 1332 s = start;
7e2040f0 1333 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1334 (allow_pack && *s == ':') ||
15f0808c 1335 (allow_initial_tick && *s == '\'') )
a0d0e21e 1336 {
3280af22 1337 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1338 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1339 return start;
cd81e915 1340 start_force(PL_curforce);
5db06880
NC
1341 if (PL_madskills)
1342 curmad('X', newSVpvn(start,s-start));
463ee0b2 1343 if (token == METHOD) {
29595ff2 1344 s = SKIPSPACE1(s);
463ee0b2 1345 if (*s == '(')
3280af22 1346 PL_expect = XTERM;
463ee0b2 1347 else {
3280af22 1348 PL_expect = XOPERATOR;
463ee0b2 1349 }
79072805 1350 }
9ded7720 1351 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1352 = (OP*)newSVOP(OP_CONST,0,
1353 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1354 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1355 force_next(token);
1356 }
1357 return s;
1358}
1359
ffb4593c
NT
1360/*
1361 * S_force_ident
9cbb5ea2 1362 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1363 * text only contains the "foo" portion. The first argument is a pointer
1364 * to the "foo", and the second argument is the type symbol to prefix.
1365 * Forces the next token to be a "WORD".
9cbb5ea2 1366 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1367 */
1368
76e3520e 1369STATIC void
bfed75c6 1370S_force_ident(pTHX_ register const char *s, int kind)
79072805 1371{
97aff369 1372 dVAR;
c35e046a 1373 if (*s) {
90e5519e
NC
1374 const STRLEN len = strlen(s);
1375 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1376 start_force(PL_curforce);
9ded7720 1377 NEXTVAL_NEXTTOKE.opval = o;
79072805 1378 force_next(WORD);
748a9306 1379 if (kind) {
11343788 1380 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1381 /* XXX see note in pp_entereval() for why we forgo typo
1382 warnings if the symbol must be introduced in an eval.
1383 GSAR 96-10-12 */
90e5519e
NC
1384 gv_fetchpvn_flags(s, len,
1385 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1386 : GV_ADD,
1387 kind == '$' ? SVt_PV :
1388 kind == '@' ? SVt_PVAV :
1389 kind == '%' ? SVt_PVHV :
a0d0e21e 1390 SVt_PVGV
90e5519e 1391 );
748a9306 1392 }
79072805
LW
1393 }
1394}
1395
1571675a
GS
1396NV
1397Perl_str_to_version(pTHX_ SV *sv)
1398{
1399 NV retval = 0.0;
1400 NV nshift = 1.0;
1401 STRLEN len;
cfd0369c 1402 const char *start = SvPV_const(sv,len);
9d4ba2ae 1403 const char * const end = start + len;
504618e9 1404 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1405 while (start < end) {
ba210ebe 1406 STRLEN skip;
1571675a
GS
1407 UV n;
1408 if (utf)
9041c2e3 1409 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1410 else {
1411 n = *(U8*)start;
1412 skip = 1;
1413 }
1414 retval += ((NV)n)/nshift;
1415 start += skip;
1416 nshift *= 1000;
1417 }
1418 return retval;
1419}
1420
4e553d73 1421/*
ffb4593c
NT
1422 * S_force_version
1423 * Forces the next token to be a version number.
e759cc13
RGS
1424 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1425 * and if "guessing" is TRUE, then no new token is created (and the caller
1426 * must use an alternative parsing method).
ffb4593c
NT
1427 */
1428
76e3520e 1429STATIC char *
e759cc13 1430S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1431{
97aff369 1432 dVAR;
5f66b61c 1433 OP *version = NULL;
44dcb63b 1434 char *d;
5db06880
NC
1435#ifdef PERL_MAD
1436 I32 startoff = s - SvPVX(PL_linestr);
1437#endif
89bfa8cd 1438
29595ff2 1439 s = SKIPSPACE1(s);
89bfa8cd 1440
44dcb63b 1441 d = s;
dd629d5b 1442 if (*d == 'v')
44dcb63b 1443 d++;
44dcb63b 1444 if (isDIGIT(*d)) {
e759cc13
RGS
1445 while (isDIGIT(*d) || *d == '_' || *d == '.')
1446 d++;
5db06880
NC
1447#ifdef PERL_MAD
1448 if (PL_madskills) {
cd81e915 1449 start_force(PL_curforce);
5db06880
NC
1450 curmad('X', newSVpvn(s,d-s));
1451 }
1452#endif
9f3d182e 1453 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1454 SV *ver;
b73d6f50 1455 s = scan_num(s, &yylval);
89bfa8cd 1456 version = yylval.opval;
dd629d5b
GS
1457 ver = cSVOPx(version)->op_sv;
1458 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1459 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1460 SvNV_set(ver, str_to_version(ver));
1571675a 1461 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1462 }
89bfa8cd 1463 }
5db06880
NC
1464 else if (guessing) {
1465#ifdef PERL_MAD
1466 if (PL_madskills) {
cd81e915
NC
1467 sv_free(PL_nextwhite); /* let next token collect whitespace */
1468 PL_nextwhite = 0;
5db06880
NC
1469 s = SvPVX(PL_linestr) + startoff;
1470 }
1471#endif
e759cc13 1472 return s;
5db06880 1473 }
89bfa8cd
PP
1474 }
1475
5db06880
NC
1476#ifdef PERL_MAD
1477 if (PL_madskills && !version) {
cd81e915
NC
1478 sv_free(PL_nextwhite); /* let next token collect whitespace */
1479 PL_nextwhite = 0;
5db06880
NC
1480 s = SvPVX(PL_linestr) + startoff;
1481 }
1482#endif
89bfa8cd 1483 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1484 start_force(PL_curforce);
9ded7720 1485 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1486 force_next(WORD);
89bfa8cd 1487
e759cc13 1488 return s;
89bfa8cd
PP
1489}
1490
ffb4593c
NT
1491/*
1492 * S_tokeq
1493 * Tokenize a quoted string passed in as an SV. It finds the next
1494 * chunk, up to end of string or a backslash. It may make a new
1495 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1496 * turns \\ into \.
1497 */
1498
76e3520e 1499STATIC SV *
cea2e8a9 1500S_tokeq(pTHX_ SV *sv)
79072805 1501{
97aff369 1502 dVAR;
79072805
LW
1503 register char *s;
1504 register char *send;
1505 register char *d;
b3ac6de7
IZ
1506 STRLEN len = 0;
1507 SV *pv = sv;
79072805
LW
1508
1509 if (!SvLEN(sv))
b3ac6de7 1510 goto finish;
79072805 1511
a0d0e21e 1512 s = SvPV_force(sv, len);
21a311ee 1513 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1514 goto finish;
463ee0b2 1515 send = s + len;
79072805
LW
1516 while (s < send && *s != '\\')
1517 s++;
1518 if (s == send)
b3ac6de7 1519 goto finish;
79072805 1520 d = s;
be4731d2 1521 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1522 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1523 if (SvUTF8(sv))
1524 SvUTF8_on(pv);
1525 }
79072805
LW
1526 while (s < send) {
1527 if (*s == '\\') {
a0d0e21e 1528 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1529 s++; /* all that, just for this */
1530 }
1531 *d++ = *s++;
1532 }
1533 *d = '\0';
95a20fc0 1534 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1535 finish:
3280af22 1536 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1537 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1538 return sv;
1539}
1540
ffb4593c
NT
1541/*
1542 * Now come three functions related to double-quote context,
1543 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1544 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1545 * interact with PL_lex_state, and create fake ( ... ) argument lists
1546 * to handle functions and concatenation.
1547 * They assume that whoever calls them will be setting up a fake
1548 * join call, because each subthing puts a ',' after it. This lets
1549 * "lower \luPpEr"
1550 * become
1551 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1552 *
1553 * (I'm not sure whether the spurious commas at the end of lcfirst's
1554 * arguments and join's arguments are created or not).
1555 */
1556
1557/*
1558 * S_sublex_start
1559 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1560 *
1561 * Pattern matching will set PL_lex_op to the pattern-matching op to
1562 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1563 *
1564 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1565 *
1566 * Everything else becomes a FUNC.
1567 *
1568 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1569 * had an OP_CONST or OP_READLINE). This just sets us up for a
1570 * call to S_sublex_push().
1571 */
1572
76e3520e 1573STATIC I32
cea2e8a9 1574S_sublex_start(pTHX)
79072805 1575{
97aff369 1576 dVAR;
0d46e09a 1577 register const I32 op_type = yylval.ival;
79072805
LW
1578
1579 if (op_type == OP_NULL) {
3280af22 1580 yylval.opval = PL_lex_op;
5f66b61c 1581 PL_lex_op = NULL;
79072805
LW
1582 return THING;
1583 }
1584 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1585 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1586
1587 if (SvTYPE(sv) == SVt_PVIV) {
1588 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1589 STRLEN len;
96a5add6 1590 const char * const p = SvPV_const(sv, len);
f54cb97a 1591 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1592 if (SvUTF8(sv))
1593 SvUTF8_on(nsv);
b3ac6de7
IZ
1594 SvREFCNT_dec(sv);
1595 sv = nsv;
4e553d73 1596 }
b3ac6de7 1597 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1598 PL_lex_stuff = NULL;
6f33ba73
RGS
1599 /* Allow <FH> // "foo" */
1600 if (op_type == OP_READLINE)
1601 PL_expect = XTERMORDORDOR;
79072805
LW
1602 return THING;
1603 }
e3f73d4e
RGS
1604 else if (op_type == OP_BACKTICK && PL_lex_op) {
1605 /* readpipe() vas overriden */
1606 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1607 yylval.opval = PL_lex_op;
9b201d7d 1608 PL_lex_op = NULL;
e3f73d4e
RGS
1609 PL_lex_stuff = NULL;
1610 return THING;
1611 }
79072805 1612
3280af22
NIS
1613 PL_sublex_info.super_state = PL_lex_state;
1614 PL_sublex_info.sub_inwhat = op_type;
1615 PL_sublex_info.sub_op = PL_lex_op;
1616 PL_lex_state = LEX_INTERPPUSH;
55497cff 1617
3280af22
NIS
1618 PL_expect = XTERM;
1619 if (PL_lex_op) {
1620 yylval.opval = PL_lex_op;
5f66b61c 1621 PL_lex_op = NULL;
55497cff
PP
1622 return PMFUNC;
1623 }
1624 else
1625 return FUNC;
1626}
1627
ffb4593c
NT
1628/*
1629 * S_sublex_push
1630 * Create a new scope to save the lexing state. The scope will be
1631 * ended in S_sublex_done. Returns a '(', starting the function arguments
1632 * to the uc, lc, etc. found before.
1633 * Sets PL_lex_state to LEX_INTERPCONCAT.
1634 */
1635
76e3520e 1636STATIC I32
cea2e8a9 1637S_sublex_push(pTHX)
55497cff 1638{
27da23d5 1639 dVAR;
f46d017c 1640 ENTER;
55497cff 1641
3280af22
NIS
1642 PL_lex_state = PL_sublex_info.super_state;
1643 SAVEI32(PL_lex_dojoin);
1644 SAVEI32(PL_lex_brackets);
3280af22
NIS
1645 SAVEI32(PL_lex_casemods);
1646 SAVEI32(PL_lex_starts);
1647 SAVEI32(PL_lex_state);
7766f137 1648 SAVEVPTR(PL_lex_inpat);
3280af22 1649 SAVEI32(PL_lex_inwhat);
57843af0 1650 SAVECOPLINE(PL_curcop);
3280af22 1651 SAVEPPTR(PL_bufptr);
8452ff4b 1652 SAVEPPTR(PL_bufend);
3280af22
NIS
1653 SAVEPPTR(PL_oldbufptr);
1654 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1655 SAVEPPTR(PL_last_lop);
1656 SAVEPPTR(PL_last_uni);
3280af22
NIS
1657 SAVEPPTR(PL_linestart);
1658 SAVESPTR(PL_linestr);
8edd5f42
RGS
1659 SAVEGENERICPV(PL_lex_brackstack);
1660 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1661
1662 PL_linestr = PL_lex_stuff;
a0714e2c 1663 PL_lex_stuff = NULL;
3280af22 1664
9cbb5ea2
GS
1665 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1666 = SvPVX(PL_linestr);
3280af22 1667 PL_bufend += SvCUR(PL_linestr);
bd61b366 1668 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1669 SAVEFREESV(PL_linestr);
1670
1671 PL_lex_dojoin = FALSE;
1672 PL_lex_brackets = 0;
a02a5408
JC
1673 Newx(PL_lex_brackstack, 120, char);
1674 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1675 PL_lex_casemods = 0;
1676 *PL_lex_casestack = '\0';
1677 PL_lex_starts = 0;
1678 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1679 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1680
1681 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1682 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1683 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1684 else
5f66b61c 1685 PL_lex_inpat = NULL;
79072805 1686
55497cff 1687 return '(';
79072805
LW
1688}
1689
ffb4593c
NT
1690/*
1691 * S_sublex_done
1692 * Restores lexer state after a S_sublex_push.
1693 */
1694
76e3520e 1695STATIC I32
cea2e8a9 1696S_sublex_done(pTHX)
79072805 1697{
27da23d5 1698 dVAR;
3280af22 1699 if (!PL_lex_starts++) {
396482e1 1700 SV * const sv = newSVpvs("");
9aa983d2
JH
1701 if (SvUTF8(PL_linestr))
1702 SvUTF8_on(sv);
3280af22 1703 PL_expect = XOPERATOR;
9aa983d2 1704 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1705 return THING;
1706 }
1707
3280af22
NIS
1708 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1709 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1710 return yylex();
79072805
LW
1711 }
1712
ffb4593c 1713 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1714 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1715 PL_linestr = PL_lex_repl;
1716 PL_lex_inpat = 0;
1717 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1718 PL_bufend += SvCUR(PL_linestr);
bd61b366 1719 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1720 SAVEFREESV(PL_linestr);
1721 PL_lex_dojoin = FALSE;
1722 PL_lex_brackets = 0;
3280af22
NIS
1723 PL_lex_casemods = 0;
1724 *PL_lex_casestack = '\0';
1725 PL_lex_starts = 0;
25da4f38 1726 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1727 PL_lex_state = LEX_INTERPNORMAL;
1728 PL_lex_starts++;
e9fa98b2
HS
1729 /* we don't clear PL_lex_repl here, so that we can check later
1730 whether this is an evalled subst; that means we rely on the
1731 logic to ensure sublex_done() is called again only via the
1732 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1733 }
e9fa98b2 1734 else {
3280af22 1735 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1736 PL_lex_repl = NULL;
e9fa98b2 1737 }
79072805 1738 return ',';
ffed7fef
LW
1739 }
1740 else {
5db06880
NC
1741#ifdef PERL_MAD
1742 if (PL_madskills) {
cd81e915
NC
1743 if (PL_thiswhite) {
1744 if (!PL_endwhite)
6b29d1f5 1745 PL_endwhite = newSVpvs("");
cd81e915
NC
1746 sv_catsv(PL_endwhite, PL_thiswhite);
1747 PL_thiswhite = 0;
1748 }
1749 if (PL_thistoken)
1750 sv_setpvn(PL_thistoken,"",0);
5db06880 1751 else
cd81e915 1752 PL_realtokenstart = -1;
5db06880
NC
1753 }
1754#endif
f46d017c 1755 LEAVE;
3280af22
NIS
1756 PL_bufend = SvPVX(PL_linestr);
1757 PL_bufend += SvCUR(PL_linestr);
1758 PL_expect = XOPERATOR;
09bef843 1759 PL_sublex_info.sub_inwhat = 0;
79072805 1760 return ')';
ffed7fef
LW
1761 }
1762}
1763
02aa26ce
NT
1764/*
1765 scan_const
1766
1767 Extracts a pattern, double-quoted string, or transliteration. This
1768 is terrifying code.
1769
94def140 1770 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1771 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1772 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1773
94def140
ST
1774 Returns a pointer to the character scanned up to. If this is
1775 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1776 successfully parsed), will leave an OP for the substring scanned
1777 in yylval. Caller must intuit reason for not parsing further
1778 by looking at the next characters herself.
1779
02aa26ce
NT
1780 In patterns:
1781 backslashes:
1782 double-quoted style: \r and \n
1783 regexp special ones: \D \s
94def140
ST
1784 constants: \x31
1785 backrefs: \1
02aa26ce
NT
1786 case and quoting: \U \Q \E
1787 stops on @ and $, but not for $ as tail anchor
1788
1789 In transliterations:
1790 characters are VERY literal, except for - not at the start or end
94def140
ST
1791 of the string, which indicates a range. If the range is in bytes,
1792 scan_const expands the range to the full set of intermediate
1793 characters. If the range is in utf8, the hyphen is replaced with
1794 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1795
1796 In double-quoted strings:
1797 backslashes:
1798 double-quoted style: \r and \n
94def140
ST
1799 constants: \x31
1800 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1801 case and quoting: \U \Q \E
1802 stops on @ and $
1803
1804 scan_const does *not* construct ops to handle interpolated strings.
1805 It stops processing as soon as it finds an embedded $ or @ variable
1806 and leaves it to the caller to work out what's going on.
1807
94def140
ST
1808 embedded arrays (whether in pattern or not) could be:
1809 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1810
1811 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1812
1813 $ in pattern could be $foo or could be tail anchor. Assumption:
1814 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1815 followed by one of "()| \r\n\t"
02aa26ce
NT
1816
1817 \1 (backreferences) are turned into $1
1818
1819 The structure of the code is
1820 while (there's a character to process) {
94def140
ST
1821 handle transliteration ranges
1822 skip regexp comments /(?#comment)/ and codes /(?{code})/
1823 skip #-initiated comments in //x patterns
1824 check for embedded arrays
02aa26ce
NT
1825 check for embedded scalars
1826 if (backslash) {
94def140
ST
1827 leave intact backslashes from leaveit (below)
1828 deprecate \1 in substitution replacements
02aa26ce
NT
1829 handle string-changing backslashes \l \U \Q \E, etc.
1830 switch (what was escaped) {
94def140
ST
1831 handle \- in a transliteration (becomes a literal -)
1832 handle \132 (octal characters)
1833 handle \x15 and \x{1234} (hex characters)
1834 handle \N{name} (named characters)
1835 handle \cV (control characters)
1836 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1837 } (end switch)
1838 } (end if backslash)
1839 } (end while character to read)
4e553d73 1840
02aa26ce
NT
1841*/
1842
76e3520e 1843STATIC char *
cea2e8a9 1844S_scan_const(pTHX_ char *start)
79072805 1845{
97aff369 1846 dVAR;
3280af22 1847 register char *send = PL_bufend; /* end of the constant */
561b68a9 1848 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1849 register char *s = start; /* start of the constant */
1850 register char *d = SvPVX(sv); /* destination for copies */
1851 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1852 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1853 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1854 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1855 UV uv;
4c3a8340
ST
1856#ifdef EBCDIC
1857 UV literal_endpoint = 0;
e294cc5d 1858 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1859#endif
012bcf8d 1860
2b9d42f0
NIS
1861 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1862 /* If we are doing a trans and we know we want UTF8 set expectation */
1863 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1864 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1865 }
1866
1867
79072805 1868 while (s < send || dorange) {
02aa26ce 1869 /* get transliterations out of the way (they're most literal) */
3280af22 1870 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1871 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1872 if (dorange) {
1ba5c669
JH
1873 I32 i; /* current expanded character */
1874 I32 min; /* first character in range */
1875 I32 max; /* last character in range */
02aa26ce 1876
e294cc5d
JH
1877#ifdef EBCDIC
1878 UV uvmax = 0;
1879#endif
1880
1881 if (has_utf8
1882#ifdef EBCDIC
1883 && !native_range
1884#endif
1885 ) {
9d4ba2ae 1886 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1887 char *e = d++;
1888 while (e-- > c)
1889 *(e + 1) = *e;
25716404 1890 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1891 /* mark the range as done, and continue */
1892 dorange = FALSE;
1893 didrange = TRUE;
1894 continue;
1895 }
2b9d42f0 1896
95a20fc0 1897 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1898#ifdef EBCDIC
1899 SvGROW(sv,
1900 SvLEN(sv) + (has_utf8 ?
1901 (512 - UTF_CONTINUATION_MARK +
1902 UNISKIP(0x100))
1903 : 256));
1904 /* How many two-byte within 0..255: 128 in UTF-8,
1905 * 96 in UTF-8-mod. */
1906#else
9cbb5ea2 1907 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1908#endif
9cbb5ea2 1909 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1910#ifdef EBCDIC
1911 if (has_utf8) {
1912 int j;
1913 for (j = 0; j <= 1; j++) {
1914 char * const c = (char*)utf8_hop((U8*)d, -1);
1915 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1916 if (j)
1917 min = (U8)uv;
1918 else if (uv < 256)
1919 max = (U8)uv;
1920 else {
1921 max = (U8)0xff; /* only to \xff */
1922 uvmax = uv; /* \x{100} to uvmax */
1923 }
1924 d = c; /* eat endpoint chars */
1925 }
1926 }
1927 else {
1928#endif
1929 d -= 2; /* eat the first char and the - */
1930 min = (U8)*d; /* first char in range */
1931 max = (U8)d[1]; /* last char in range */
1932#ifdef EBCDIC
1933 }
1934#endif
8ada0baa 1935
c2e66d9e 1936 if (min > max) {
01ec43d0 1937 Perl_croak(aTHX_
d1573ac7 1938 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1939 (char)min, (char)max);
c2e66d9e
GS
1940 }
1941
c7f1f016 1942#ifdef EBCDIC
4c3a8340
ST
1943 if (literal_endpoint == 2 &&
1944 ((isLOWER(min) && isLOWER(max)) ||
1945 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1946 if (isLOWER(min)) {
1947 for (i = min; i <= max; i++)
1948 if (isLOWER(i))
db42d148 1949 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1950 } else {
1951 for (i = min; i <= max; i++)
1952 if (isUPPER(i))
db42d148 1953 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1954 }
1955 }
1956 else
1957#endif
1958 for (i = min; i <= max; i++)
e294cc5d
JH
1959#ifdef EBCDIC
1960 if (has_utf8) {
1961 const U8 ch = (U8)NATIVE_TO_UTF(i);
1962 if (UNI_IS_INVARIANT(ch))
1963 *d++ = (U8)i;
1964 else {
1965 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1966 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1967 }
1968 }
1969 else
1970#endif
1971 *d++ = (char)i;
1972
1973#ifdef EBCDIC
1974 if (uvmax) {
1975 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1976 if (uvmax > 0x101)
1977 *d++ = (char)UTF_TO_NATIVE(0xff);
1978 if (uvmax > 0x100)
1979 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1980 }
1981#endif
02aa26ce
NT
1982
1983 /* mark the range as done, and continue */
79072805 1984 dorange = FALSE;
01ec43d0 1985 didrange = TRUE;
4c3a8340
ST
1986#ifdef EBCDIC
1987 literal_endpoint = 0;
1988#endif
79072805 1989 continue;
4e553d73 1990 }
02aa26ce
NT
1991
1992 /* range begins (ignore - as first or last char) */
79072805 1993 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1994 if (didrange) {
1fafa243 1995 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1996 }
e294cc5d
JH
1997 if (has_utf8
1998#ifdef EBCDIC
1999 && !native_range
2000#endif
2001 ) {
25716404 2002 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2003 s++;
2004 continue;
2005 }
79072805
LW
2006 dorange = TRUE;
2007 s++;
01ec43d0
GS
2008 }
2009 else {
2010 didrange = FALSE;
4c3a8340
ST
2011#ifdef EBCDIC
2012 literal_endpoint = 0;
e294cc5d 2013 native_range = TRUE;
4c3a8340 2014#endif
01ec43d0 2015 }
79072805 2016 }
02aa26ce
NT
2017
2018 /* if we get here, we're not doing a transliteration */
2019
0f5d15d6
IZ
2020 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2021 except for the last char, which will be done separately. */
3280af22 2022 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2023 if (s[2] == '#') {
e994fd66 2024 while (s+1 < send && *s != ')')
db42d148 2025 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2026 }
2027 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2028 || (s[2] == '?' && s[3] == '{'))
155aba94 2029 {
cc6b7395 2030 I32 count = 1;
0f5d15d6 2031 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2032 char c;
2033
d9f97599
GS
2034 while (count && (c = *regparse)) {
2035 if (c == '\\' && regparse[1])
2036 regparse++;
4e553d73 2037 else if (c == '{')
cc6b7395 2038 count++;
4e553d73 2039 else if (c == '}')
cc6b7395 2040 count--;
d9f97599 2041 regparse++;
cc6b7395 2042 }
e994fd66 2043 if (*regparse != ')')
5bdf89e7 2044 regparse--; /* Leave one char for continuation. */
0f5d15d6 2045 while (s < regparse)
db42d148 2046 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2047 }
748a9306 2048 }
02aa26ce
NT
2049
2050 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2051 else if (*s == '#' && PL_lex_inpat &&
2052 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2053 while (s+1 < send && *s != '\n')
db42d148 2054 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2055 }
02aa26ce 2056
5d1d4326 2057 /* check for embedded arrays
da6eedaa 2058 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2059 */
1749ea0d
ST
2060 else if (*s == '@' && s[1]) {
2061 if (isALNUM_lazy_if(s+1,UTF))
2062 break;
2063 if (strchr(":'{$", s[1]))
2064 break;
2065 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2066 break; /* in regexp, neither @+ nor @- are interpolated */
2067 }
02aa26ce
NT
2068
2069 /* check for embedded scalars. only stop if we're sure it's a
2070 variable.
2071 */
79072805 2072 else if (*s == '$') {
3280af22 2073 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2074 break;
6002328a 2075 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2076 break; /* in regexp, $ might be tail anchor */
2077 }
02aa26ce 2078
2b9d42f0
NIS
2079 /* End of else if chain - OP_TRANS rejoin rest */
2080
02aa26ce 2081 /* backslashes */
79072805
LW
2082 if (*s == '\\' && s+1 < send) {
2083 s++;
02aa26ce 2084
02aa26ce 2085 /* deprecate \1 in strings and substitution replacements */
3280af22 2086 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2087 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2088 {
599cee73 2089 if (ckWARN(WARN_SYNTAX))
9014280d 2090 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2091 *--s = '$';
2092 break;
2093 }
02aa26ce
NT
2094
2095 /* string-change backslash escapes */
3280af22 2096 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2097 --s;
2098 break;
2099 }
cc74c5bd
ST
2100 /* skip any other backslash escapes in a pattern */
2101 else if (PL_lex_inpat) {
2102 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2103 goto default_action;
2104 }
02aa26ce
NT
2105
2106 /* if we get here, it's either a quoted -, or a digit */
79072805 2107 switch (*s) {
02aa26ce
NT
2108
2109 /* quoted - in transliterations */
79072805 2110 case '-':
3280af22 2111 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2112 *d++ = *s++;
2113 continue;
2114 }
2115 /* FALL THROUGH */
2116 default:
11b8faa4 2117 {
86f97054 2118 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2119 ckWARN(WARN_MISC))
9014280d 2120 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2121 "Unrecognized escape \\%c passed through",
2122 *s);
11b8faa4 2123 /* default action is to copy the quoted character */
f9a63242 2124 goto default_action;
11b8faa4 2125 }
02aa26ce
NT
2126
2127 /* \132 indicates an octal constant */
79072805
LW
2128 case '0': case '1': case '2': case '3':
2129 case '4': case '5': case '6': case '7':
ba210ebe 2130 {
53305cf1
NC
2131 I32 flags = 0;
2132 STRLEN len = 3;
2133 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2134 s += len;
2135 }
012bcf8d 2136 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2137
2138 /* \x24 indicates a hex constant */
79072805 2139 case 'x':
a0ed51b3
LW
2140 ++s;
2141 if (*s == '{') {
9d4ba2ae 2142 char* const e = strchr(s, '}');
a4c04bdc
NC
2143 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2144 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2145 STRLEN len;
355860ce 2146
53305cf1 2147 ++s;
adaeee49 2148 if (!e) {
a0ed51b3 2149 yyerror("Missing right brace on \\x{}");
355860ce 2150 continue;
ba210ebe 2151 }
53305cf1
NC
2152 len = e - s;
2153 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2154 s = e + 1;
a0ed51b3
LW
2155 }
2156 else {
ba210ebe 2157 {
53305cf1 2158 STRLEN len = 2;
a4c04bdc 2159 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2160 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2161 s += len;
2162 }
012bcf8d
GS
2163 }
2164
2165 NUM_ESCAPE_INSERT:
2166 /* Insert oct or hex escaped character.
301d3d20 2167 * There will always enough room in sv since such
db42d148 2168 * escapes will be longer than any UTF-8 sequence
301d3d20 2169 * they can end up as. */
ba7cea30 2170
c7f1f016
NIS
2171 /* We need to map to chars to ASCII before doing the tests
2172 to cover EBCDIC
2173 */
c4d5f83a 2174 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2175 if (!has_utf8 && uv > 255) {
301d3d20
JH
2176 /* Might need to recode whatever we have
2177 * accumulated so far if it contains any
2178 * hibit chars.
2179 *
2180 * (Can't we keep track of that and avoid
2181 * this rescan? --jhi)
012bcf8d 2182 */
c7f1f016 2183 int hicount = 0;
63cd0674
NIS
2184 U8 *c;
2185 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2186 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2187 hicount++;
db42d148 2188 }
012bcf8d 2189 }
63cd0674 2190 if (hicount) {
9d4ba2ae 2191 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2192 U8 *src, *dst;
2193 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2194 src = (U8 *)d - 1;
2195 dst = src+hicount;
2196 d += hicount;
cfd0369c 2197 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2198 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2199 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2200 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2201 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2202 }
2203 else {
63cd0674 2204 *dst-- = *src;
012bcf8d 2205 }
c7f1f016 2206 src--;
012bcf8d
GS
2207 }
2208 }
2209 }
2210
9aa983d2 2211 if (has_utf8 || uv > 255) {
9041c2e3 2212 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2213 has_utf8 = TRUE;
f9a63242
JH
2214 if (PL_lex_inwhat == OP_TRANS &&
2215 PL_sublex_info.sub_op) {
2216 PL_sublex_info.sub_op->op_private |=
2217 (PL_lex_repl ? OPpTRANS_FROM_UTF
2218 : OPpTRANS_TO_UTF);
f9a63242 2219 }
e294cc5d
JH
2220#ifdef EBCDIC
2221 if (uv > 255 && !dorange)
2222 native_range = FALSE;
2223#endif
012bcf8d 2224 }
a0ed51b3 2225 else {
012bcf8d 2226 *d++ = (char)uv;
a0ed51b3 2227 }
012bcf8d
GS
2228 }
2229 else {
c4d5f83a 2230 *d++ = (char) uv;
a0ed51b3 2231 }
79072805 2232 continue;
02aa26ce 2233
b239daa5 2234 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2235 case 'N':
55eda711 2236 ++s;
423cee85
JH
2237 if (*s == '{') {
2238 char* e = strchr(s, '}');
155aba94 2239 SV *res;
423cee85 2240 STRLEN len;
cfd0369c 2241 const char *str;
fc8cd66c 2242 SV *type;
4e553d73 2243
423cee85 2244 if (!e) {
5777a3f7 2245 yyerror("Missing right brace on \\N{}");
423cee85
JH
2246 e = s - 1;
2247 goto cont_scan;
2248 }
dbc0d4f2
JH
2249 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2250 /* \N{U+...} */
2251 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2252 PERL_SCAN_DISALLOW_PREFIX;
2253 s += 3;
2254 len = e - s;
2255 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2256 if ( e > s && len != (STRLEN)(e - s) ) {
2257 uv = 0xFFFD;
fc8cd66c 2258 }
dbc0d4f2
JH
2259 s = e + 1;
2260 goto NUM_ESCAPE_INSERT;
2261 }
55eda711 2262 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2263 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2264 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2265 res, NULL, SvPVX(type) );
2266 SvREFCNT_dec(type);
f9a63242
JH
2267 if (has_utf8)
2268 sv_utf8_upgrade(res);
cfd0369c 2269 str = SvPV_const(res,len);
1c47067b
JH
2270#ifdef EBCDIC_NEVER_MIND
2271 /* charnames uses pack U and that has been
2272 * recently changed to do the below uni->native
2273 * mapping, so this would be redundant (and wrong,
2274 * the code point would be doubly converted).
2275 * But leave this in just in case the pack U change
2276 * gets revoked, but the semantics is still
2277 * desireable for charnames. --jhi */
cddc7ef4 2278 {
cfd0369c 2279 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2280
2281 if (uv < 0x100) {
89ebb4a3 2282 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2283
2284 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2285 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2286 str = SvPV_const(res, len);
cddc7ef4
JH
2287 }
2288 }
2289#endif
89491803 2290 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2291 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2292 SvCUR_set(sv, d - ostart);
2293 SvPOK_on(sv);
e4f3eed8 2294 *d = '\0';
f08d6ad9 2295 sv_utf8_upgrade(sv);
d2f449dd 2296 /* this just broke our allocation above... */
eb160463 2297 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2298 d = SvPVX(sv) + SvCUR(sv);
89491803 2299 has_utf8 = TRUE;
f08d6ad9 2300 }
eb160463 2301 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2302 const char * const odest = SvPVX_const(sv);
423cee85 2303
8973db79 2304 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2305 d = SvPVX(sv) + (d - odest);
2306 }
e294cc5d
JH
2307#ifdef EBCDIC
2308 if (!dorange)
2309 native_range = FALSE; /* \N{} is guessed to be Unicode */
2310#endif
423cee85
JH
2311 Copy(str, d, len, char);
2312 d += len;
2313 SvREFCNT_dec(res);
2314 cont_scan:
2315 s = e + 1;
2316 }
2317 else
5777a3f7 2318 yyerror("Missing braces on \\N{}");
423cee85
JH
2319 continue;
2320
02aa26ce 2321 /* \c is a control character */
79072805
LW
2322 case 'c':
2323 s++;
961ce445 2324 if (s < send) {
ba210ebe 2325 U8 c = *s++;
c7f1f016
NIS
2326#ifdef EBCDIC
2327 if (isLOWER(c))
2328 c = toUPPER(c);
2329#endif
db42d148 2330 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2331 }
961ce445
RGS
2332 else {
2333 yyerror("Missing control char name in \\c");
2334 }
79072805 2335 continue;
02aa26ce
NT
2336
2337 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2338 case 'b':
db42d148 2339 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2340 break;
2341 case 'n':
db42d148 2342 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2343 break;
2344 case 'r':
db42d148 2345 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2346 break;
2347 case 'f':
db42d148 2348 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2349 break;
2350 case 't':
db42d148 2351 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2352 break;
34a3fe2a 2353 case 'e':
db42d148 2354 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2355 break;
2356 case 'a':
db42d148 2357 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2358 break;
02aa26ce
NT
2359 } /* end switch */
2360
79072805
LW
2361 s++;
2362 continue;
02aa26ce 2363 } /* end if (backslash) */
4c3a8340
ST
2364#ifdef EBCDIC
2365 else
2366 literal_endpoint++;
2367#endif
02aa26ce 2368
f9a63242 2369 default_action:
2b9d42f0
NIS
2370 /* If we started with encoded form, or already know we want it
2371 and then encode the next character */
2372 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2373 STRLEN len = 1;
5f66b61c
AL
2374 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2375 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2376 s += len;
2377 if (need > len) {
2378 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2379 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2380 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2381 }
5f66b61c 2382 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2383 has_utf8 = TRUE;
e294cc5d
JH
2384#ifdef EBCDIC
2385 if (uv > 255 && !dorange)
2386 native_range = FALSE;
2387#endif
2b9d42f0
NIS
2388 }
2389 else {
2390 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2391 }
02aa26ce
NT
2392 } /* while loop to process each character */
2393
2394 /* terminate the string and set up the sv */
79072805 2395 *d = '\0';
95a20fc0 2396 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2397 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2398 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2399
79072805 2400 SvPOK_on(sv);
9f4817db 2401 if (PL_encoding && !has_utf8) {
d0063567
DK
2402 sv_recode_to_utf8(sv, PL_encoding);
2403 if (SvUTF8(sv))
2404 has_utf8 = TRUE;
9f4817db 2405 }
2b9d42f0 2406 if (has_utf8) {
7e2040f0 2407 SvUTF8_on(sv);
2b9d42f0 2408 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2409 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2410 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2411 }
2412 }
79072805 2413
02aa26ce 2414 /* shrink the sv if we allocated more than we used */
79072805 2415 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2416 SvPV_shrink_to_cur(sv);
79072805 2417 }
02aa26ce 2418
9b599b2a 2419 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2420 if (s > PL_bufptr) {
2421 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2422 sv = new_constant(start, s - start,
2423 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2424 sv, NULL,
10edeb5d
JH
2425 (const char *)
2426 (( PL_lex_inwhat == OP_TRANS
2427 ? "tr"
2428 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2429 ? "s"
2430 : "qq"))));
79072805 2431 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2432 } else
8990e307 2433 SvREFCNT_dec(sv);
79072805
LW
2434 return s;
2435}
2436
ffb4593c
NT
2437/* S_intuit_more
2438 * Returns TRUE if there's more to the expression (e.g., a subscript),
2439 * FALSE otherwise.
ffb4593c
NT
2440 *
2441 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2442 *
2443 * ->[ and ->{ return TRUE
2444 * { and [ outside a pattern are always subscripts, so return TRUE
2445 * if we're outside a pattern and it's not { or [, then return FALSE
2446 * if we're in a pattern and the first char is a {
2447 * {4,5} (any digits around the comma) returns FALSE
2448 * if we're in a pattern and the first char is a [
2449 * [] returns FALSE
2450 * [SOMETHING] has a funky algorithm to decide whether it's a
2451 * character class or not. It has to deal with things like
2452 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2453 * anything else returns TRUE
2454 */
2455
9cbb5ea2
GS
2456/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2457
76e3520e 2458STATIC int
cea2e8a9 2459S_intuit_more(pTHX_ register char *s)
79072805 2460{
97aff369 2461 dVAR;
3280af22 2462 if (PL_lex_brackets)
79072805
LW
2463 return TRUE;
2464 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2465 return TRUE;
2466 if (*s != '{' && *s != '[')
2467 return FALSE;
3280af22 2468 if (!PL_lex_inpat)
79072805
LW
2469 return TRUE;
2470
2471 /* In a pattern, so maybe we have {n,m}. */
2472 if (*s == '{') {
2473 s++;
2474 if (!isDIGIT(*s))
2475 return TRUE;
2476 while (isDIGIT(*s))
2477 s++;
2478 if (*s == ',')
2479 s++;
2480 while (isDIGIT(*s))
2481 s++;
2482 if (*s == '}')
2483 return FALSE;
2484 return TRUE;
2485
2486 }
2487
2488 /* On the other hand, maybe we have a character class */
2489
2490 s++;
2491 if (*s == ']' || *s == '^')
2492 return FALSE;
2493 else {
ffb4593c 2494 /* this is terrifying, and it works */
79072805
LW
2495 int weight = 2; /* let's weigh the evidence */
2496 char seen[256];
f27ffc4a 2497 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2498 const char * const send = strchr(s,']');
3280af22 2499 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2500
2501 if (!send) /* has to be an expression */
2502 return TRUE;
2503
2504 Zero(seen,256,char);
2505 if (*s == '$')
2506 weight -= 3;
2507 else if (isDIGIT(*s)) {
2508 if (s[1] != ']') {
2509 if (isDIGIT(s[1]) && s[2] == ']')
2510 weight -= 10;
2511 }
2512 else
2513 weight -= 100;
2514 }
2515 for (; s < send; s++) {
2516 last_un_char = un_char;
2517 un_char = (unsigned char)*s;
2518 switch (*s) {
2519 case '@':
2520 case '&':
2521 case '$':
2522 weight -= seen[un_char] * 10;
7e2040f0 2523 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2524 int len;
8903cb82 2525 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2526 len = (int)strlen(tmpbuf);
2527 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2528 weight -= 100;
2529 else
2530 weight -= 10;
2531 }
2532 else if (*s == '$' && s[1] &&
93a17b20
LW
2533 strchr("[#!%*<>()-=",s[1])) {
2534 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2535 weight -= 10;
2536 else
2537 weight -= 1;
2538 }
2539 break;
2540 case '\\':
2541 un_char = 254;
2542 if (s[1]) {
93a17b20 2543 if (strchr("wds]",s[1]))
79072805 2544 weight += 100;
10edeb5d 2545 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2546 weight += 1;
93a17b20 2547 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2548 weight += 40;
2549 else if (isDIGIT(s[1])) {
2550 weight += 40;
2551 while (s[1] && isDIGIT(s[1]))
2552 s++;
2553 }
2554 }
2555 else
2556 weight += 100;
2557 break;
2558 case '-':
2559 if (s[1] == '\\')
2560 weight += 50;
93a17b20 2561 if (strchr("aA01! ",last_un_char))
79072805 2562 weight += 30;
93a17b20 2563 if (strchr("zZ79~",s[1]))
79072805 2564 weight += 30;
f27ffc4a
GS
2565 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2566 weight -= 5; /* cope with negative subscript */
79072805
LW
2567 break;
2568 default:
3792a11b
NC
2569 if (!isALNUM(last_un_char)
2570 && !(last_un_char == '$' || last_un_char == '@'
2571 || last_un_char == '&')
2572 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2573 char *d = tmpbuf;
2574 while (isALPHA(*s))
2575 *d++ = *s++;
2576 *d = '\0';
5458a98a 2577 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2578 weight -= 150;
2579 }
2580 if (un_char == last_un_char + 1)
2581 weight += 5;
2582 weight -= seen[un_char];
2583 break;
2584 }
2585 seen[un_char]++;
2586 }
2587 if (weight >= 0) /* probably a character class */
2588 return FALSE;
2589 }
2590
2591 return TRUE;
2592}
ffed7fef 2593
ffb4593c
NT
2594/*
2595 * S_intuit_method
2596 *
2597 * Does all the checking to disambiguate
2598 * foo bar
2599 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2600 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2601 *
2602 * First argument is the stuff after the first token, e.g. "bar".
2603 *
2604 * Not a method if bar is a filehandle.
2605 * Not a method if foo is a subroutine prototyped to take a filehandle.
2606 * Not a method if it's really "Foo $bar"
2607 * Method if it's "foo $bar"
2608 * Not a method if it's really "print foo $bar"
2609 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2610 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2611 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2612 * =>
2613 */
2614
76e3520e 2615STATIC int
62d55b22 2616S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2617{
97aff369 2618 dVAR;
a0d0e21e 2619 char *s = start + (*start == '$');
3280af22 2620 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2621 STRLEN len;
2622 GV* indirgv;
5db06880
NC
2623#ifdef PERL_MAD
2624 int soff;
2625#endif
a0d0e21e
LW
2626
2627 if (gv) {
62d55b22 2628 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2629 return 0;
62d55b22
NC
2630 if (cv) {
2631 if (SvPOK(cv)) {
2632 const char *proto = SvPVX_const(cv);
2633 if (proto) {
2634 if (*proto == ';')
2635 proto++;
2636 if (*proto == '*')
2637 return 0;
2638 }
b6c543e3
IZ
2639 }
2640 } else
c35e046a 2641 gv = NULL;
a0d0e21e 2642 }
8903cb82 2643 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2644 /* start is the beginning of the possible filehandle/object,
2645 * and s is the end of it
2646 * tmpbuf is a copy of it
2647 */
2648
a0d0e21e 2649 if (*start == '$') {
3ef1310e
RGS
2650 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2651 isUPPER(*PL_tokenbuf))
a0d0e21e 2652 return 0;
5db06880
NC
2653#ifdef PERL_MAD
2654 len = start - SvPVX(PL_linestr);
2655#endif
29595ff2 2656 s = PEEKSPACE(s);
f0092767 2657#ifdef PERL_MAD
5db06880
NC
2658 start = SvPVX(PL_linestr) + len;
2659#endif
3280af22
NIS
2660 PL_bufptr = start;
2661 PL_expect = XREF;
a0d0e21e
LW
2662 return *s == '(' ? FUNCMETH : METHOD;
2663 }
5458a98a 2664 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2665 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2666 len -= 2;
2667 tmpbuf[len] = '\0';
5db06880
NC
2668#ifdef PERL_MAD
2669 soff = s - SvPVX(PL_linestr);
2670#endif
c3e0f903
GS
2671 goto bare_package;
2672 }
90e5519e 2673 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2674 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2675 return 0;
2676 /* filehandle or package name makes it a method */
da51bb9b 2677 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2678#ifdef PERL_MAD
2679 soff = s - SvPVX(PL_linestr);
2680#endif
29595ff2 2681 s = PEEKSPACE(s);
3280af22 2682 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2683 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2684 bare_package:
cd81e915 2685 start_force(PL_curforce);
9ded7720 2686 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2687 newSVpvn(tmpbuf,len));
9ded7720 2688 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2689 if (PL_madskills)
2690 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2691 PL_expect = XTERM;
a0d0e21e 2692 force_next(WORD);
3280af22 2693 PL_bufptr = s;
5db06880
NC
2694#ifdef PERL_MAD
2695 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2696#endif
a0d0e21e
LW
2697 return *s == '(' ? FUNCMETH : METHOD;
2698 }
2699 }
2700 return 0;
2701}
2702
ffb4593c
NT
2703/*
2704 * S_incl_perldb
2705 * Return a string of Perl code to load the debugger. If PERL5DB
2706 * is set, it will return the contents of that, otherwise a
2707 * compile-time require of perl5db.pl.
2708 */
2709
bfed75c6 2710STATIC const char*
cea2e8a9 2711S_incl_perldb(pTHX)
a0d0e21e 2712{
97aff369 2713 dVAR;
3280af22 2714 if (PL_perldb) {
9d4ba2ae 2715 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2716
2717 if (pdb)
2718 return pdb;
93189314 2719 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2720 return "BEGIN { require 'perl5db.pl' }";
2721 }
2722 return "";
2723}
2724
2725
16d20bd9 2726/* Encoded script support. filter_add() effectively inserts a
4e553d73 2727 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2728 * Note that the filter function only applies to the current source file
2729 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2730 *
2731 * The datasv parameter (which may be NULL) can be used to pass
2732 * private data to this instance of the filter. The filter function
2733 * can recover the SV using the FILTER_DATA macro and use it to
2734 * store private buffers and state information.
2735 *
2736 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2737 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2738 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2739 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2740 * private use must be set using malloc'd pointers.
2741 */
16d20bd9
AD
2742
2743SV *
864dbfa3 2744Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2745{
97aff369 2746 dVAR;
f4c556ac 2747 if (!funcp)
a0714e2c 2748 return NULL;
f4c556ac 2749
3280af22
NIS
2750 if (!PL_rsfp_filters)
2751 PL_rsfp_filters = newAV();
16d20bd9 2752 if (!datasv)
561b68a9 2753 datasv = newSV(0);
862a34c6 2754 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2755 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2756 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2757 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2758 FPTR2DPTR(void *, IoANY(datasv)),
2759 SvPV_nolen(datasv)));
3280af22
NIS
2760 av_unshift(PL_rsfp_filters, 1);
2761 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2762 return(datasv);
2763}
4e553d73 2764
16d20bd9
AD
2765
2766/* Delete most recently added instance of this filter function. */
a0d0e21e 2767void
864dbfa3 2768Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2769{
97aff369 2770 dVAR;
e0c19803 2771 SV *datasv;
24801a4b 2772
33073adb 2773#ifdef DEBUGGING
55662e27
JH
2774 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2775 FPTR2DPTR(void*, funcp)));
33073adb 2776#endif
3280af22 2777 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2778 return;
2779 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2780 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2781 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2782 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2783 IoANY(datasv) = (void *)NULL;
3280af22 2784 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2785
16d20bd9
AD
2786 return;
2787 }
2788 /* we need to search for the correct entry and clear it */
cea2e8a9 2789 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2790}
2791
2792
1de9afcd
RGS
2793/* Invoke the idxth filter function for the current rsfp. */
2794/* maxlen 0 = read one text line */
16d20bd9 2795I32
864dbfa3 2796Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2797{
97aff369 2798 dVAR;
16d20bd9
AD
2799 filter_t funcp;
2800 SV *datasv = NULL;
f482118e
NC
2801 /* This API is bad. It should have been using unsigned int for maxlen.
2802 Not sure if we want to change the API, but if not we should sanity
2803 check the value here. */
39cd7a59
NC
2804 const unsigned int correct_length
2805 = maxlen < 0 ?
2806#ifdef PERL_MICRO
2807 0x7FFFFFFF
2808#else
2809 INT_MAX
2810#endif
2811 : maxlen;
e50aee73 2812
3280af22 2813 if (!PL_rsfp_filters)
16d20bd9 2814 return -1;
1de9afcd 2815 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2816 /* Provide a default input filter to make life easy. */
2817 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2818 DEBUG_P(PerlIO_printf(Perl_debug_log,
2819 "filter_read %d: from rsfp\n", idx));
f482118e 2820 if (correct_length) {
16d20bd9
AD
2821 /* Want a block */
2822 int len ;
f54cb97a 2823 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2824
2825 /* ensure buf_sv is large enough */
f482118e
NC
2826 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2827 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2828 correct_length)) <= 0) {
3280af22 2829 if (PerlIO_error(PL_rsfp))
37120919
AD
2830 return -1; /* error */
2831 else
2832 return 0 ; /* end of file */
2833 }
16d20bd9
AD
2834 SvCUR_set(buf_sv, old_len + len) ;
2835 } else {
2836 /* Want a line */
3280af22
NIS
2837 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2838 if (PerlIO_error(PL_rsfp))
37120919
AD
2839 return -1; /* error */
2840 else
2841 return 0 ; /* end of file */
2842 }
16d20bd9
AD
2843 }
2844 return SvCUR(buf_sv);
2845 }
2846 /* Skip this filter slot if filter has been deleted */
1de9afcd 2847 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2848 DEBUG_P(PerlIO_printf(Perl_debug_log,
2849 "filter_read %d: skipped (filter deleted)\n",
2850 idx));
f482118e 2851 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2852 }
2853 /* Get function pointer hidden within datasv */
8141890a 2854 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2855 DEBUG_P(PerlIO_printf(Perl_debug_log,
2856 "filter_read %d: via function %p (%s)\n",
ca0270c4 2857 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2858 /* Call function. The function is expected to */
2859 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2860 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2861 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2862}
2863
76e3520e 2864STATIC char *
cea2e8a9 2865S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2866{
97aff369 2867 dVAR;
c39cd008 2868#ifdef PERL_CR_FILTER
3280af22 2869 if (!PL_rsfp_filters) {
c39cd008 2870 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2871 }
2872#endif
3280af22 2873 if (PL_rsfp_filters) {
55497cff
PP
2874 if (!append)
2875 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2876 if (FILTER_READ(0, sv, 0) > 0)
2877 return ( SvPVX(sv) ) ;
2878 else
bd61b366 2879 return NULL ;
16d20bd9 2880 }
9d116dd7 2881 else
fd049845 2882 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2883}
2884
01ec43d0 2885STATIC HV *
7fc63493 2886S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2887{
97aff369 2888 dVAR;
def3634b
GS
2889 GV *gv;
2890
01ec43d0 2891 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2892 return PL_curstash;
2893
2894 if (len > 2 &&
2895 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2896 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2897 {
2898 return GvHV(gv); /* Foo:: */
def3634b
GS
2899 }
2900
2901 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2902 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2903 if (gv && GvCV(gv)) {
2904 SV * const sv = cv_const_sv(GvCV(gv));
2905 if (sv)
83003860 2906 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2907 }
2908
da51bb9b 2909 return gv_stashpv(pkgname, 0);
def3634b 2910}
a0d0e21e 2911
e3f73d4e
RGS
2912/*
2913 * S_readpipe_override
2914 * Check whether readpipe() is overriden, and generates the appropriate
2915 * optree, provided sublex_start() is called afterwards.
2916 */
2917STATIC void
1d51329b 2918S_readpipe_override(pTHX)
e3f73d4e
RGS
2919{
2920 GV **gvp;
2921 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2922 yylval.ival = OP_BACKTICK;
2923 if ((gv_readpipe
2924 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2925 ||
2926 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2927 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2928 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2929 {
2930 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2931 append_elem(OP_LIST,
2932 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2933 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2934 }
2935 else {
2936 set_csh();
2937 }
2938}
2939
5db06880
NC
2940#ifdef PERL_MAD
2941 /*
2942 * Perl_madlex
2943 * The intent of this yylex wrapper is to minimize the changes to the
2944 * tokener when we aren't interested in collecting madprops. It remains
2945 * to be seen how successful this strategy will be...
2946 */
2947
2948int
2949Perl_madlex(pTHX)
2950{
2951 int optype;
2952 char *s = PL_bufptr;
2953
cd81e915
NC
2954 /* make sure PL_thiswhite is initialized */
2955 PL_thiswhite = 0;
2956 PL_thismad = 0;
5db06880 2957
cd81e915 2958 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2959 if (PL_pending_ident)
2960 return S_pending_ident(aTHX);
2961
2962 /* previous token ate up our whitespace? */
cd81e915
NC
2963 if (!PL_lasttoke && PL_nextwhite) {
2964 PL_thiswhite = PL_nextwhite;
2965 PL_nextwhite = 0;
5db06880
NC
2966 }
2967
2968 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2969 PL_realtokenstart = -1;
2970 PL_thistoken = 0;
5db06880
NC
2971 optype = yylex();
2972 s = PL_bufptr;
cd81e915 2973 assert(PL_curforce < 0);
5db06880 2974
cd81e915
NC
2975 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2976 if (!PL_thistoken) {
2977 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2978 PL_thistoken = newSVpvs("");
5db06880 2979 else {
c35e046a 2980 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2981 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2982 }
2983 }
cd81e915
NC
2984 if (PL_thismad) /* install head */
2985 CURMAD('X', PL_thistoken);
5db06880
NC
2986 }
2987
2988 /* last whitespace of a sublex? */
cd81e915
NC
2989 if (optype == ')' && PL_endwhite) {
2990 CURMAD('X', PL_endwhite);
5db06880
NC
2991 }
2992
cd81e915 2993 if (!PL_thismad) {
5db06880
NC
2994
2995 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2996 if (!PL_thiswhite && !PL_endwhite && !optype) {
2997 sv_free(PL_thistoken);
2998 PL_thistoken = 0;
5db06880
NC
2999 return 0;
3000 }
3001
3002 /* put off final whitespace till peg */
3003 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3004 PL_nextwhite = PL_thiswhite;
3005 PL_thiswhite = 0;
5db06880 3006 }
cd81e915
NC
3007 else if (PL_thisopen) {
3008 CURMAD('q', PL_thisopen);
3009 if (PL_thistoken)
3010 sv_free(PL_thistoken);
3011 PL_thistoken = 0;
5db06880
NC
3012 }
3013 else {
3014 /* Store actual token text as madprop X */
cd81e915 3015 CURMAD('X', PL_thistoken);
5db06880
NC
3016 }
3017
cd81e915 3018 if (PL_thiswhite) {
5db06880 3019 /* add preceding whitespace as madprop _ */
cd81e915 3020 CURMAD('_', PL_thiswhite);
5db06880
NC
3021 }
3022
cd81e915 3023 if (PL_thisstuff) {
5db06880 3024 /* add quoted material as madprop = */
cd81e915 3025 CURMAD('=', PL_thisstuff);
5db06880
NC
3026 }
3027
cd81e915 3028 if (PL_thisclose) {
5db06880 3029 /* add terminating quote as madprop Q */
cd81e915 3030 CURMAD('Q', PL_thisclose);
5db06880
NC
3031 }
3032 }
3033
3034 /* special processing based on optype */
3035
3036 switch (optype) {
3037
3038 /* opval doesn't need a TOKEN since it can already store mp */
3039 case WORD:
3040 case METHOD:
3041 case FUNCMETH:
3042 case THING:
3043 case PMFUNC:
3044 case PRIVATEREF:
3045 case FUNC0SUB:
3046 case UNIOPSUB:
3047 case LSTOPSUB:
3048 if (yylval.opval)
cd81e915
NC
3049 append_madprops(PL_thismad, yylval.opval, 0);
3050 PL_thismad = 0;
5db06880
NC
3051 return optype;
3052
3053 /* fake EOF */
3054 case 0:
3055 optype = PEG;
cd81e915
NC
3056 if (PL_endwhite) {
3057 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3058 PL_endwhite = 0;
5db06880
NC
3059 }
3060 break;
3061
3062 case ']':
3063 case '}':
cd81e915 3064 if (PL_faketokens)
5db06880
NC
3065 break;
3066 /* remember any fake bracket that lexer is about to discard */
3067 if (PL_lex_brackets == 1 &&
3068 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3069 {
3070 s = PL_bufptr;
3071 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3072 s++;
3073 if (*s == '}') {
cd81e915
NC
3074 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3075 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3076 PL_thiswhite = 0;
5db06880
NC
3077 PL_bufptr = s - 1;
3078 break; /* don't bother looking for trailing comment */
3079 }
3080 else
3081 s = PL_bufptr;
3082 }
3083 if (optype == ']')
3084 break;
3085 /* FALLTHROUGH */
3086
3087 /* attach a trailing comment to its statement instead of next token */
3088 case ';':
cd81e915 3089 if (PL_faketokens)
5db06880
NC
3090 break;
3091 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3092 s = PL_bufptr;
3093 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3094 s++;
3095 if (*s == '\n' || *s == '#') {
3096 while (s < PL_bufend && *s != '\n')
3097 s++;
3098 if (s < PL_bufend)
3099 s++;
cd81e915
NC
3100 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3101 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3102 PL_thiswhite = 0;
5db06880
NC
3103 PL_bufptr = s;
3104 }
3105 }
3106 break;
3107
3108 /* pval */
3109 case LABEL:
3110 break;
3111
3112 /* ival */
3113 default:
3114 break;
3115
3116 }
3117
3118 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3119 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3120 PL_thismad = 0;
5db06880
NC
3121 return optype;
3122}
3123#endif
3124
468aa647 3125STATIC char *
cc6ed77d 3126S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3127 dVAR;
468aa647
RGS
3128 if (PL_expect != XSTATE)
3129 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3130 is_use ? "use" : "no"));
29595ff2 3131 s = SKIPSPACE1(s);
468aa647
RGS
3132 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3133 s = force_version(s, TRUE);
29595ff2 3134 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3135 start_force(PL_curforce);
9ded7720 3136 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3137 force_next(WORD);
3138 }
3139 else if (*s == 'v') {
3140 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3141 s = force_version(s, FALSE);
3142 }
3143 }
3144 else {
3145 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3146 s = force_version(s, FALSE);
3147 }
3148 yylval.ival = is_use;
3149 return s;
3150}
748a9306 3151#ifdef DEBUGGING
27da23d5 3152 static const char* const exp_name[] =
09bef843 3153 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3154 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3155 };
748a9306 3156#endif
463ee0b2 3157
02aa26ce
NT
3158/*
3159 yylex
3160
3161 Works out what to call the token just pulled out of the input
3162 stream. The yacc parser takes care of taking the ops we return and
3163 stitching them into a tree.
3164
3165 Returns:
3166 PRIVATEREF
3167
3168 Structure:
3169 if read an identifier
3170 if we're in a my declaration
3171 croak if they tried to say my($foo::bar)
3172 build the ops for a my() declaration
3173 if it's an access to a my() variable
3174 are we in a sort block?
3175 croak if my($a); $a <=> $b
3176 build ops for access to a my() variable
3177 if in a dq string, and they've said @foo and we can't find @foo
3178 croak
3179 build ops for a bareword
3180 if we already built the token before, use it.
3181*/
3182
20141f0e 3183
dba4d153
JH
3184#ifdef __SC__
3185#pragma segment Perl_yylex
3186#endif
dba4d153 3187int
dba4d153 3188Perl_yylex(pTHX)
20141f0e 3189{
97aff369 3190 dVAR;
3afc138a 3191 register char *s = PL_bufptr;
378cc40b 3192 register char *d;
463ee0b2 3193 STRLEN len;
aa7440fb 3194 bool bof = FALSE;
a687059c 3195
10edeb5d
JH
3196 /* orig_keyword, gvp, and gv are initialized here because
3197 * jump to the label just_a_word_zero can bypass their
3198 * initialization later. */
3199 I32 orig_keyword = 0;
3200 GV *gv = NULL;
3201 GV **gvp = NULL;
3202
bbf60fe6 3203 DEBUG_T( {
396482e1 3204 SV* tmp = newSVpvs("");
b6007c36
DM
3205 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3206 (IV)CopLINE(PL_curcop),
3207 lex_state_names[PL_lex_state],
3208 exp_name[PL_expect],
3209 pv_display(tmp, s, strlen(s), 0, 60));
3210 SvREFCNT_dec(tmp);
bbf60fe6 3211 } );
02aa26ce 3212 /* check if there's an identifier for us to look at */
ba979b31 3213 if (PL_pending_ident)
bbf60fe6 3214 return REPORT(S_pending_ident(aTHX));
bbce6d69 3215
02aa26ce
NT
3216 /* no identifier pending identification */
3217
3280af22 3218 switch (PL_lex_state) {
79072805
LW
3219#ifdef COMMENTARY
3220 case LEX_NORMAL: /* Some compilers will produce faster */
3221 case LEX_INTERPNORMAL: /* code if we comment these out. */
3222 break;
3223#endif
3224
09bef843 3225 /* when we've already built the next token, just pull it out of the queue */
79072805 3226 case LEX_KNOWNEXT:
5db06880
NC
3227#ifdef PERL_MAD
3228 PL_lasttoke--;
3229 yylval = PL_nexttoke[PL_lasttoke].next_val;
3230 if (PL_madskills) {
cd81e915 3231 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3232 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3233 if (PL_thismad && PL_thismad->mad_key == '_') {
3234 PL_thiswhite = (SV*)PL_thismad->mad_val;
3235 PL_thismad->mad_val = 0;
3236 mad_free(PL_thismad);
3237 PL_thismad = 0;
5db06880
NC
3238 }
3239 }
3240 if (!PL_lasttoke) {
3241 PL_lex_state = PL_lex_defer;
3242 PL_expect = PL_lex_expect;
3243 PL_lex_defer = LEX_NORMAL;
3244 if (!PL_nexttoke[PL_lasttoke].next_type)
3245 return yylex();
3246 }
3247#else
3280af22 3248 PL_nexttoke--;
5db06880 3249 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3250 if (!PL_nexttoke) {
3251 PL_lex_state = PL_lex_defer;
3252 PL_expect = PL_lex_expect;
3253 PL_lex_defer = LEX_NORMAL;
463ee0b2 3254 }
5db06880
NC
3255#endif
3256#ifdef PERL_MAD
3257 /* FIXME - can these be merged? */
3258 return(PL_nexttoke[PL_lasttoke].next_type);
3259#else
bbf60fe6 3260 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3261#endif
79072805 3262
02aa26ce 3263 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3264 when we get here, PL_bufptr is at the \
02aa26ce 3265 */
79072805
LW
3266 case LEX_INTERPCASEMOD:
3267#ifdef DEBUGGING
3280af22 3268 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3269 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3270#endif
02aa26ce 3271 /* handle \E or end of string */
3280af22 3272 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3273 /* if at a \E */
3280af22 3274 if (PL_lex_casemods) {
f54cb97a 3275 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3276 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3277
3792a11b
NC
3278 if (PL_bufptr != PL_bufend
3279 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3280 PL_bufptr += 2;
3281 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3282#ifdef PERL_MAD
3283 if (PL_madskills)
6b29d1f5 3284 PL_thistoken = newSVpvs("\\E");
5db06880 3285#endif
a0d0e21e 3286 }
bbf60fe6 3287 return REPORT(')');
79072805 3288 }
5db06880
NC
3289#ifdef PERL_MAD
3290 while (PL_bufptr != PL_bufend &&
3291 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3292 if (!PL_thiswhite)
6b29d1f5 3293 PL_thiswhite = newSVpvs("");
cd81e915 3294 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3295 PL_bufptr += 2;
3296 }
3297#else
3280af22
NIS
3298 if (PL_bufptr != PL_bufend)
3299 PL_bufptr += 2;
5db06880 3300#endif
3280af22 3301 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3302 return yylex();
79072805
LW
3303 }
3304 else {
607df283 3305 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3306 "### Saw case modifier\n"); });
3280af22 3307 s = PL_bufptr + 1;
6e909404 3308 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3309#ifdef PERL_MAD
cd81e915 3310 if (!PL_thiswhite)
6b29d1f5 3311 PL_thiswhite = newSVpvs("");
cd81e915 3312 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3313#endif
89122651 3314 PL_bufptr = s + 3;
6e909404
JH
3315 PL_lex_state = LEX_INTERPCONCAT;
3316 return yylex();
a0d0e21e 3317 }
6e909404 3318 else {
90771dc0 3319 I32 tmp;
5db06880
NC
3320 if (!PL_madskills) /* when just compiling don't need correct */
3321 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3322 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3323 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3324 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3325 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3326 return REPORT(')');
6e909404
JH
3327 }
3328 if (PL_lex_casemods > 10)
3329 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3330 PL_lex_casestack[PL_lex_casemods++] = *s;
3331 PL_lex_casestack[PL_lex_casemods] = '\0';
3332 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3333 start_force(PL_curforce);
9ded7720 3334 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3335 force_next('(');
cd81e915 3336 start_force(PL_curforce);
6e909404 3337 if (*s == 'l')
9ded7720 3338 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3339 else if (*s == 'u')
9ded7720 3340 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3341 else if (*s == 'L')
9ded7720 3342 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3343 else if (*s == 'U')
9ded7720 3344 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3345 else if (*s == 'Q')
9ded7720 3346 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3347 else
3348 Perl_croak(aTHX_ "panic: yylex");
5db06880 3349 if (PL_madskills) {
6b29d1f5 3350 SV* const tmpsv = newSVpvs("");
5db06880
NC
3351 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3352 curmad('_', tmpsv);
3353 }
6e909404 3354 PL_bufptr = s + 1;
a0d0e21e 3355 }
79072805 3356 force_next(FUNC);
3280af22
NIS
3357 if (PL_lex_starts) {
3358 s = PL_bufptr;
3359 PL_lex_starts = 0;
5db06880
NC
3360#ifdef PERL_MAD
3361 if (PL_madskills) {
cd81e915
NC
3362 if (PL_thistoken)
3363 sv_free(PL_thistoken);
6b29d1f5 3364 PL_thistoken = newSVpvs("");
5db06880
NC
3365 }
3366#endif
131b3ad0
DM
3367 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3368 if (PL_lex_casemods == 1 && PL_lex_inpat)
3369 OPERATOR(',');
3370 else
3371 Aop(OP_CONCAT);
79072805
LW
3372 }
3373 else
cea2e8a9 3374 return yylex();
79072805
LW
3375 }
3376
55497cff 3377 case LEX_INTERPPUSH:
bbf60fe6 3378 return REPORT(sublex_push());
55497cff 3379
79072805 3380 case LEX_INTERPSTART:
3280af22 3381 if (PL_bufptr == PL_bufend)
bbf60fe6 3382 return REPORT(sublex_done());
607df283 3383 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3384 "### Interpolated variable\n"); });
3280af22
NIS
3385 PL_expect = XTERM;
3386 PL_lex_dojoin = (*PL_bufptr == '@');
3387 PL_lex_state = LEX_INTERPNORMAL;
3388 if (PL_lex_dojoin) {
cd81e915 3389 start_force(PL_curforce);
9ded7720 3390 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3391 force_next(',');
cd81e915 3392 start_force(PL_curforce);
a0d0e21e 3393 force_ident("\"", '$');
cd81e915 3394 start_force(PL_curforce);
9ded7720 3395 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3396 force_next('$');
cd81e915 3397 start_force(PL_curforce);
9ded7720 3398 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3399 force_next('(');
cd81e915 3400 start_force(PL_curforce);
9ded7720 3401 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3402 force_next(FUNC);
3403 }
3280af22
NIS
3404 if (PL_lex_starts++) {
3405 s = PL_bufptr;
5db06880
NC
3406#ifdef PERL_MAD
3407 if (PL_madskills) {
cd81e915
NC
3408 if (PL_thistoken)
3409 sv_free(PL_thistoken);
6b29d1f5 3410 PL_thistoken = newSVpvs("");
5db06880
NC
3411 }
3412#endif
131b3ad0
DM
3413 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3414 if (!PL_lex_casemods && PL_lex_inpat)
3415 OPERATOR(',');
3416 else
3417 Aop(OP_CONCAT);
79072805 3418 }
cea2e8a9 3419 return yylex();
79072805
LW
3420
3421 case LEX_INTERPENDMAYBE:
3280af22
NIS
3422 if (intuit_more(PL_bufptr)) {
3423 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3424 break;
3425 }
3426 /* FALL THROUGH */
3427
3428 case LEX_INTERPEND:
3280af22
NIS
3429 if (PL_lex_dojoin) {
3430 PL_lex_dojoin = FALSE;
3431 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3432#ifdef PERL_MAD
3433 if (PL_madskills) {
cd81e915
NC
3434 if (PL_thistoken)
3435 sv_free(PL_thistoken);
6b29d1f5 3436 PL_thistoken = newSVpvs("");
5db06880
NC
3437 }
3438#endif
bbf60fe6 3439 return REPORT(')');
79072805 3440 }
43a16006 3441 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3442 && SvEVALED(PL_lex_repl))
43a16006 3443 {
e9fa98b2 3444 if (PL_bufptr != PL_bufend)
cea2e8a9 3445 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3446 PL_lex_repl = NULL;
e9fa98b2 3447 }
79072805
LW
3448 /* FALLTHROUGH */
3449 case LEX_INTERPCONCAT:
3450#ifdef DEBUGGING
3280af22 3451 if (PL_lex_brackets)
cea2e8a9 3452 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3453#endif
3280af22 3454 if (PL_bufptr == PL_bufend)
bbf60fe6 3455 return REPORT(sublex_done());
79072805 3456
3280af22
NIS
3457 if (SvIVX(PL_linestr) == '\'') {
3458 SV *sv = newSVsv(PL_linestr);
3459 if (!PL_lex_inpat)
76e3520e 3460 sv = tokeq(sv);
3280af22 3461 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3462 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3463 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3464 s = PL_bufend;
79072805
LW
3465 }
3466 else {
3280af22 3467 s = scan_const(PL_bufptr);
79072805 3468 if (*s == '\\')
3280af22 3469 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3470 else
3280af22 3471 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3472 }
3473
3280af22 3474 if (s != PL_bufptr) {
cd81e915 3475 start_force(PL_curforce);
5db06880
NC
3476 if (PL_madskills) {
3477 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3478 }
9ded7720 3479 NEXTVAL_NEXTTOKE = yylval;
3280af22 3480 PL_expect = XTERM;
79072805 3481 force_next(THING);
131b3ad0 3482 if (PL_lex_starts++) {
5db06880
NC
3483#ifdef PERL_MAD
3484 if (PL_madskills) {
cd81e915
NC
3485 if (PL_thistoken)
3486 sv_free(PL_thistoken);
6b29d1f5 3487 PL_thistoken = newSVpvs("");
5db06880
NC
3488 }
3489#endif
131b3ad0
DM
3490 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3491 if (!PL_lex_casemods && PL_lex_inpat)
3492 OPERATOR(',');
3493 else
3494 Aop(OP_CONCAT);
3495 }
79072805 3496 else {
3280af22 3497 PL_bufptr = s;
cea2e8a9 3498 return yylex();
79072805
LW
3499 }
3500 }
3501
cea2e8a9 3502 return yylex();
a0d0e21e 3503 case LEX_FORMLINE:
3280af22
NIS
3504 PL_lex_state = LEX_NORMAL;
3505 s = scan_formline(PL_bufptr);
3506 if (!PL_lex_formbrack)
a0d0e21e
LW
3507 goto rightbracket;
3508 OPERATOR(';');
79072805
LW
3509 }
3510
3280af22
NIS
3511 s = PL_bufptr;
3512 PL_oldoldbufptr = PL_oldbufptr;
3513 PL_oldbufptr = s;
463ee0b2
LW
3514
3515 retry:
5db06880 3516#ifdef PERL_MAD
cd81e915
NC
3517 if (PL_thistoken) {
3518 sv_free(PL_thistoken);
3519 PL_thistoken = 0;
5db06880 3520 }
cd81e915 3521 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3522#endif
378cc40b
LW
3523 switch (*s) {
3524 default:
7e2040f0 3525 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3526 goto keylookup;
cea2e8a9 3527 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3528 case 4:
3529 case 26:
3530 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3531 case 0:
5db06880
NC
3532#ifdef PERL_MAD
3533 if (PL_madskills)
cd81e915 3534 PL_faketokens = 0;
5db06880 3535#endif
3280af22
NIS
3536 if (!PL_rsfp) {
3537 PL_last_uni = 0;
3538 PL_last_lop = 0;
c5ee2135 3539 if (PL_lex_brackets) {
10edeb5d
JH
3540 yyerror((const char *)
3541 (PL_lex_formbrack
3542 ? "Format not terminated"
3543 : "Missing right curly or square bracket"));
c5ee2135 3544 }
4e553d73 3545 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3546 "### Tokener got EOF\n");
5f80b19c 3547 } );
79072805 3548 TOKEN(0);
463ee0b2 3549 }