This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Avoid an assertion failure when overloading readpipe.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
52
53#ifdef PERL_MAD
54# define PL_endwhite (PL_parser->endwhite)
55# define PL_faketokens (PL_parser->faketokens)
56# define PL_lasttoke (PL_parser->lasttoke)
57# define PL_nextwhite (PL_parser->nextwhite)
58# define PL_realtokenstart (PL_parser->realtokenstart)
59# define PL_skipwhite (PL_parser->skipwhite)
60# define PL_thisclose (PL_parser->thisclose)
61# define PL_thismad (PL_parser->thismad)
62# define PL_thisopen (PL_parser->thisopen)
63# define PL_thisstuff (PL_parser->thisstuff)
64# define PL_thistoken (PL_parser->thistoken)
65# define PL_thiswhite (PL_parser->thiswhite)
66#endif
67
3cbf51f5
DM
68static int
69S_pending_ident(pTHX);
199e78b7 70
0bd48802 71static const char ident_too_long[] = "Identifier too long";
c445ea15 72static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 73
acfe0abc 74static void restore_rsfp(pTHX_ void *f);
6e3aabd6 75#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
76static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 78#endif
51371543 79
29595ff2 80#ifdef PERL_MAD
29595ff2 81# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 82# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 83#else
5db06880 84# define CURMAD(slot,sv)
9ded7720 85# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
86#endif
87
9059aa12
LW
88#define XFAKEBRACK 128
89#define XENUMMASK 127
90
39e02b42
JH
91#ifdef USE_UTF8_SCRIPTS
92# define UTF (!IN_BYTES)
2b9d42f0 93#else
746b446a 94# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 95#endif
a0ed51b3 96
61f0cdd9 97/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
98 * 1999-02-27 mjd-perl-patch@plover.com */
99#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
100
bf4acbe4
GS
101/* On MacOS, respect nonbreaking spaces */
102#ifdef MACOS_TRADITIONAL
103#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
104#else
105#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
106#endif
107
ffb4593c
NT
108/* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
79072805
LW
110 * can get by with a single comparison (if the compiler is smart enough).
111 */
112
fb73857a
PP
113/* #define LEX_NOTPARSING 11 is done in perl.h. */
114
b6007c36
DM
115#define LEX_NORMAL 10 /* normal code (ie not within "...") */
116#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119#define LEX_INTERPSTART 6 /* expecting the start of a $var */
120
121 /* at end of code, eg "$x" followed by: */
122#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
124
125#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127#define LEX_INTERPCONST 2 /* NOT USED */
128#define LEX_FORMLINE 1 /* expecting a format line */
129#define LEX_KNOWNEXT 0 /* next token known; just return it */
130
79072805 131
bbf60fe6 132#ifdef DEBUGGING
27da23d5 133static const char* const lex_state_names[] = {
bbf60fe6
DM
134 "KNOWNEXT",
135 "FORMLINE",
136 "INTERPCONST",
137 "INTERPCONCAT",
138 "INTERPENDMAYBE",
139 "INTERPEND",
140 "INTERPSTART",
141 "INTERPPUSH",
142 "INTERPCASEMOD",
143 "INTERPNORMAL",
144 "NORMAL"
145};
146#endif
147
79072805
LW
148#ifdef ff_next
149#undef ff_next
d48672a2
LW
150#endif
151
79072805 152#include "keywords.h"
fe14fcc3 153
ffb4593c
NT
154/* CLINE is a macro that ensures PL_copline has a sane value */
155
ae986130
LW
156#ifdef CLINE
157#undef CLINE
158#endif
57843af0 159#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 160
5db06880 161#ifdef PERL_MAD
29595ff2
NC
162# define SKIPSPACE0(s) skipspace0(s)
163# define SKIPSPACE1(s) skipspace1(s)
164# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165# define PEEKSPACE(s) skipspace2(s,0)
166#else
167# define SKIPSPACE0(s) skipspace(s)
168# define SKIPSPACE1(s) skipspace(s)
169# define SKIPSPACE2(s,tsv) skipspace(s)
170# define PEEKSPACE(s) skipspace(s)
171#endif
172
ffb4593c
NT
173/*
174 * Convenience functions to return different tokens and prime the
9cbb5ea2 175 * lexer for the next token. They all take an argument.
ffb4593c
NT
176 *
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
2d2e263d 187 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
188 * BOop : bitwise or or xor
189 * BAop : bitwise and
190 * SHop : shift operator
191 * PWop : power operator
9cbb5ea2 192 * PMop : pattern-matching operator
ffb4593c
NT
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
e5edeb50 196 * Rop : relational operator <= != gt
ffb4593c
NT
197 *
198 * Also see LOP and lop() below.
199 */
200
998054bd 201#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 202# define REPORT(retval) tokereport((I32)retval)
998054bd 203#else
bbf60fe6 204# define REPORT(retval) (retval)
998054bd
SC
205#endif
206
bbf60fe6
DM
207#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 227
a687059c
LW
228/* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
a687059c 232 */
376fcdbf
AL
233#define UNI2(f,x) { \
234 yylval.ival = f; \
235 PL_expect = x; \
236 PL_bufptr = s; \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
239 if (*s == '(') \
240 return REPORT( (int)FUNC1 ); \
29595ff2 241 s = PEEKSPACE(s); \
376fcdbf
AL
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
243 }
6f33ba73
RGS
244#define UNI(f) UNI2(f,XTERM)
245#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 246
376fcdbf
AL
247#define UNIBRACK(f) { \
248 yylval.ival = f; \
249 PL_bufptr = s; \
250 PL_last_uni = PL_oldbufptr; \
251 if (*s == '(') \
252 return REPORT( (int)FUNC1 ); \
29595ff2 253 s = PEEKSPACE(s); \
376fcdbf
AL
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
255 }
79072805 256
9f68db38 257/* grandfather return to old style */
3280af22 258#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 259
8fa7f367
JH
260#ifdef DEBUGGING
261
bbf60fe6
DM
262/* how to interpret the yylval associated with the token */
263enum token_type {
264 TOKENTYPE_NONE,
265 TOKENTYPE_IVAL,
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
267 TOKENTYPE_PVAL,
268 TOKENTYPE_OPVAL,
269 TOKENTYPE_GVVAL
270};
271
6d4a66ac
NC
272static struct debug_tokens {
273 const int token;
274 enum token_type type;
275 const char *name;
276} const debug_tokens[] =
9041c2e3 277{
bbf60fe6
DM
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 343 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
344};
345
346/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 347
bbf60fe6 348STATIC int
f5bd084c 349S_tokereport(pTHX_ I32 rv)
bbf60fe6 350{
97aff369 351 dVAR;
bbf60fe6 352 if (DEBUG_T_TEST) {
bd61b366 353 const char *name = NULL;
bbf60fe6 354 enum token_type type = TOKENTYPE_NONE;
f54cb97a 355 const struct debug_tokens *p;
396482e1 356 SV* const report = newSVpvs("<== ");
bbf60fe6 357
f54cb97a 358 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
359 if (p->token == (int)rv) {
360 name = p->name;
361 type = p->type;
362 break;
363 }
364 }
365 if (name)
54667de8 366 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
369 else if (!rv)
396482e1 370 sv_catpvs(report, "EOF");
bbf60fe6
DM
371 else
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
373 switch (type) {
374 case TOKENTYPE_NONE:
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
376 break;
377 case TOKENTYPE_IVAL:
e4584336 378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
379 break;
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
383 break;
384 case TOKENTYPE_PVAL:
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
386 break;
387 case TOKENTYPE_OPVAL:
b6007c36 388 if (yylval.opval) {
401441c0 389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 390 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
394 }
395
396 }
401441c0 397 else
396482e1 398 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
399 break;
400 }
b6007c36 401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
402 };
403 return (int)rv;
998054bd
SC
404}
405
b6007c36
DM
406
407/* print the buffer with suitable escapes */
408
409STATIC void
410S_printbuf(pTHX_ const char* fmt, const char* s)
411{
396482e1 412 SV* const tmp = newSVpvs("");
b6007c36
DM
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
414 SvREFCNT_dec(tmp);
415}
416
8fa7f367
JH
417#endif
418
ffb4593c
NT
419/*
420 * S_ao
421 *
c963b151
BD
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
424 */
425
76e3520e 426STATIC int
cea2e8a9 427S_ao(pTHX_ int toketype)
a0d0e21e 428{
97aff369 429 dVAR;
3280af22
NIS
430 if (*PL_bufptr == '=') {
431 PL_bufptr++;
a0d0e21e
LW
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
c963b151
BD
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
438 toketype = ASSIGNOP;
439 }
440 return toketype;
441}
442
ffb4593c
NT
443/*
444 * S_no_op
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
454 */
455
76e3520e 456STATIC void
bfed75c6 457S_no_op(pTHX_ const char *what, char *s)
463ee0b2 458{
97aff369 459 dVAR;
9d4ba2ae
AL
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 462
1189a94a
GS
463 if (!s)
464 s = oldbp;
07c798fb 465 else
1189a94a 466 PL_bufptr = s;
cea2e8a9 467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
468 if (ckWARN_d(WARN_SYNTAX)) {
469 if (is_first)
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 473 const char *t;
c35e046a
AL
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
475 NOOP;
56da5a46
RGS
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
551405c4 479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
480 }
481 else {
482 assert(s >= oldbp);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 485 }
07c798fb 486 }
3280af22 487 PL_bufptr = oldbp;
8990e307
LW
488}
489
ffb4593c
NT
490/*
491 * S_missingterm
492 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 493 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
496 * This is fatal.
497 */
498
76e3520e 499STATIC void
cea2e8a9 500S_missingterm(pTHX_ char *s)
8990e307 501{
97aff369 502 dVAR;
8990e307
LW
503 char tmpbuf[3];
504 char q;
505 if (s) {
9d4ba2ae 506 char * const nl = strrchr(s,'\n');
d2719217 507 if (nl)
8990e307
LW
508 *nl = '\0';
509 }
9d116dd7
JH
510 else if (
511#ifdef EBCDIC
512 iscntrl(PL_multi_close)
513#else
514 PL_multi_close < 32 || PL_multi_close == 127
515#endif
516 ) {
8990e307 517 *tmpbuf = '^';
585ec06d 518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
519 tmpbuf[2] = '\0';
520 s = tmpbuf;
521 }
522 else {
eb160463 523 *tmpbuf = (char)PL_multi_close;
8990e307
LW
524 tmpbuf[1] = '\0';
525 s = tmpbuf;
526 }
527 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 529}
79072805 530
ef89dcc3 531#define FEATURE_IS_ENABLED(name) \
0d863452 532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
534/*
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
537 */
538STATIC bool
d4c19fe8 539S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 540{
97aff369 541 dVAR;
0d863452 542 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 543 char he_name[32] = "feature_";
6fca0082 544 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 545
7b9ef140 546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
547}
548
ffb4593c
NT
549/*
550 * Perl_deprecate
ffb4593c
NT
551 */
552
79072805 553void
bfed75c6 554Perl_deprecate(pTHX_ const char *s)
a0d0e21e 555{
599cee73 556 if (ckWARN(WARN_DEPRECATED))
9014280d 557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
558}
559
12bcd1a6 560void
bfed75c6 561Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
562{
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
565 /* */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
570
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
573 "Use of %s is deprecated", s);
574}
575
ffb4593c 576/*
9cbb5ea2
GS
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
ffb4593c
NT
579 */
580
c39cd008
GS
581#ifdef PERL_CR_FILTER
582static void
583strip_return(SV *sv)
584{
95a20fc0 585 register const char *s = SvPVX_const(sv);
9d4ba2ae 586 register const char * const e = s + SvCUR(sv);
c39cd008
GS
587 /* outer loop optimized to do nothing if there are no CR-LFs */
588 while (s < e) {
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
592 *d++ = *s++;
593 while (s < e) {
594 if (*s == '\r' && s[1] == '\n')
595 s++;
596 *d++ = *s++;
597 }
598 SvCUR(sv) -= s - d;
599 return;
600 }
601 }
602}
a868473f 603
76e3520e 604STATIC I32
c39cd008 605S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 606{
f54cb97a 607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
608 if (count > 0 && !maxlen)
609 strip_return(sv);
610 return count;
a868473f
NIS
611}
612#endif
613
199e78b7
DM
614
615
ffb4593c
NT
616/*
617 * Perl_lex_start
9cbb5ea2
GS
618 * Initialize variables. Uses the Perl save_stack to save its state (for
619 * recursive calls to the parser).
ffb4593c
NT
620 */
621
a0d0e21e 622void
864dbfa3 623Perl_lex_start(pTHX_ SV *line)
79072805 624{
97aff369 625 dVAR;
6ef55633 626 const char *s = NULL;
8990e307 627 STRLEN len;
acdf0a21
DM
628 yy_parser *parser;
629
630 /* create and initialise a parser */
631
199e78b7 632 Newxz(parser, 1, yy_parser);
acdf0a21
DM
633 parser->old_parser = PL_parser;
634 PL_parser = parser;
635
636 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
637 parser->ps = parser->stack;
638 parser->stack_size = YYINITDEPTH;
639
640 parser->stack->state = 0;
641 parser->yyerrstatus = 0;
642 parser->yychar = YYEMPTY; /* Cause a token to be read. */
643
644 /* initialise lexer state */
8990e307 645
3280af22 646 SAVEI32(PL_lex_state);
5db06880
NC
647#ifdef PERL_MAD
648 if (PL_lex_state == LEX_KNOWNEXT) {
199e78b7 649 I32 toke = parser->old_parser->lasttoke;
5db06880
NC
650 while (--toke >= 0) {
651 SAVEI32(PL_nexttoke[toke].next_type);
652 SAVEVPTR(PL_nexttoke[toke].next_val);
653 if (PL_madskills)
654 SAVEVPTR(PL_nexttoke[toke].next_mad);
655 }
5db06880 656 }
cd81e915 657 SAVEI32(PL_curforce);
5db06880 658#else
18b09519
GS
659 if (PL_lex_state == LEX_KNOWNEXT) {
660 I32 toke = PL_nexttoke;
661 while (--toke >= 0) {
662 SAVEI32(PL_nexttype[toke]);
663 SAVEVPTR(PL_nextval[toke]);
664 }
665 SAVEI32(PL_nexttoke);
18b09519 666 }
5db06880 667#endif
57843af0 668 SAVECOPLINE(PL_curcop);
3280af22
NIS
669 SAVEPPTR(PL_bufptr);
670 SAVEPPTR(PL_bufend);
671 SAVEPPTR(PL_oldbufptr);
672 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
673 SAVEPPTR(PL_last_lop);
674 SAVEPPTR(PL_last_uni);
3280af22
NIS
675 SAVEPPTR(PL_linestart);
676 SAVESPTR(PL_linestr);
c76ac1ee 677 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
bebdddfc 678 SAVEINT(PL_expect);
3280af22 679
ebca63ee 680 PL_copline = NOLINE;
3280af22 681 PL_lex_state = LEX_NORMAL;
3280af22 682 PL_expect = XSTATE;
199e78b7
DM
683 Newx(parser->lex_brackstack, 120, char);
684 Newx(parser->lex_casestack, 12, char);
685 *parser->lex_casestack = '\0';
686#ifndef PERL_MAD
76be56bc 687 PL_nexttoke = 0;
5db06880 688#endif
02b34bbe 689
10efb74f
NC
690 if (line) {
691 s = SvPV_const(line, len);
692 } else {
693 len = 0;
694 }
695 if (!len) {
696 PL_linestr = newSVpvs("\n;");
697 } else if (SvREADONLY(line) || s[len-1] != ';') {
698 PL_linestr = newSVsv(line);
699 if (s[len-1] != ';')
0eb20fa2 700 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
701 } else {
702 SvTEMP_off(line);
703 SvREFCNT_inc_simple_void_NN(line);
704 PL_linestr = line;
8990e307 705 }
db4997f0
NC
706 /* PL_linestr needs to survive until end of scope, not just the next
707 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 708 SAVEFREESV(PL_linestr);
3280af22
NIS
709 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
710 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 711 PL_last_lop = PL_last_uni = NULL;
3280af22 712 PL_rsfp = 0;
79072805 713}
a687059c 714
ffb4593c
NT
715/*
716 * Perl_lex_end
9cbb5ea2
GS
717 * Finalizer for lexing operations. Must be called when the parser is
718 * done with the lexer.
ffb4593c
NT
719 */
720
463ee0b2 721void
864dbfa3 722Perl_lex_end(pTHX)
463ee0b2 723{
97aff369 724 dVAR;
3280af22 725 PL_doextract = FALSE;
463ee0b2
LW
726}
727
ffb4593c
NT
728/*
729 * S_incline
730 * This subroutine has nothing to do with tilting, whether at windmills
731 * or pinball tables. Its name is short for "increment line". It
57843af0 732 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 733 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
734 * # line 500 "foo.pm"
735 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
736 */
737
76e3520e 738STATIC void
d9095cec 739S_incline(pTHX_ const char *s)
463ee0b2 740{
97aff369 741 dVAR;
d9095cec
NC
742 const char *t;
743 const char *n;
744 const char *e;
463ee0b2 745
57843af0 746 CopLINE_inc(PL_curcop);
463ee0b2
LW
747 if (*s++ != '#')
748 return;
d4c19fe8
AL
749 while (SPACE_OR_TAB(*s))
750 s++;
73659bf1
GS
751 if (strnEQ(s, "line", 4))
752 s += 4;
753 else
754 return;
084592ab 755 if (SPACE_OR_TAB(*s))
73659bf1 756 s++;
4e553d73 757 else
73659bf1 758 return;
d4c19fe8
AL
759 while (SPACE_OR_TAB(*s))
760 s++;
463ee0b2
LW
761 if (!isDIGIT(*s))
762 return;
d4c19fe8 763
463ee0b2
LW
764 n = s;
765 while (isDIGIT(*s))
766 s++;
bf4acbe4 767 while (SPACE_OR_TAB(*s))
463ee0b2 768 s++;
73659bf1 769 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 770 s++;
73659bf1
GS
771 e = t + 1;
772 }
463ee0b2 773 else {
c35e046a
AL
774 t = s;
775 while (!isSPACE(*t))
776 t++;
73659bf1 777 e = t;
463ee0b2 778 }
bf4acbe4 779 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
780 e++;
781 if (*e != '\n' && *e != '\0')
782 return; /* false alarm */
783
f4dd75d9 784 if (t - s > 0) {
d9095cec 785 const STRLEN len = t - s;
8a5ee598 786#ifndef USE_ITHREADS
c4420975 787 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
788 STRLEN tmplen = cf ? strlen(cf) : 0;
789 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
790 /* must copy *{"::_<(eval N)[oldfilename:L]"}
791 * to *{"::_<newfilename"} */
44867030
NC
792 /* However, the long form of evals is only turned on by the
793 debugger - usually they're "(eval %lu)" */
794 char smallbuf[128];
795 char *tmpbuf;
796 GV **gvp;
d9095cec 797 STRLEN tmplen2 = len;
798b63bc 798 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
799 tmpbuf = smallbuf;
800 else
2ae0db35 801 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
802 tmpbuf[0] = '_';
803 tmpbuf[1] = '<';
2ae0db35 804 memcpy(tmpbuf + 2, cf, tmplen);
44867030 805 tmplen += 2;
8a5ee598
RGS
806 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
807 if (gvp) {
44867030
NC
808 char *tmpbuf2;
809 GV *gv2;
810
811 if (tmplen2 + 2 <= sizeof smallbuf)
812 tmpbuf2 = smallbuf;
813 else
814 Newx(tmpbuf2, tmplen2 + 2, char);
815
816 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
817 /* Either they malloc'd it, or we malloc'd it,
818 so no prefix is present in ours. */
819 tmpbuf2[0] = '_';
820 tmpbuf2[1] = '<';
821 }
822
823 memcpy(tmpbuf2 + 2, s, tmplen2);
824 tmplen2 += 2;
825
8a5ee598 826 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 827 if (!isGV(gv2)) {
8a5ee598 828 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
829 /* adjust ${"::_<newfilename"} to store the new file name */
830 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
831 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
832 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
833 }
44867030
NC
834
835 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 836 }
e66cf94c 837 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 838 }
8a5ee598 839#endif
05ec9bb3 840 CopFILE_free(PL_curcop);
d9095cec 841 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 842 }
57843af0 843 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
844}
845
29595ff2 846#ifdef PERL_MAD
cd81e915 847/* skip space before PL_thistoken */
29595ff2
NC
848
849STATIC char *
850S_skipspace0(pTHX_ register char *s)
851{
852 s = skipspace(s);
853 if (!PL_madskills)
854 return s;
cd81e915
NC
855 if (PL_skipwhite) {
856 if (!PL_thiswhite)
6b29d1f5 857 PL_thiswhite = newSVpvs("");
cd81e915
NC
858 sv_catsv(PL_thiswhite, PL_skipwhite);
859 sv_free(PL_skipwhite);
860 PL_skipwhite = 0;
861 }
862 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
863 return s;
864}
865
cd81e915 866/* skip space after PL_thistoken */
29595ff2
NC
867
868STATIC char *
869S_skipspace1(pTHX_ register char *s)
870{
d4c19fe8 871 const char *start = s;
29595ff2
NC
872 I32 startoff = start - SvPVX(PL_linestr);
873
874 s = skipspace(s);
875 if (!PL_madskills)
876 return s;
877 start = SvPVX(PL_linestr) + startoff;
cd81e915 878 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 879 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
880 PL_thistoken = newSVpvn(tstart, start - tstart);
881 }
882 PL_realtokenstart = -1;
883 if (PL_skipwhite) {
884 if (!PL_nextwhite)
6b29d1f5 885 PL_nextwhite = newSVpvs("");
cd81e915
NC
886 sv_catsv(PL_nextwhite, PL_skipwhite);
887 sv_free(PL_skipwhite);
888 PL_skipwhite = 0;
29595ff2
NC
889 }
890 return s;
891}
892
893STATIC char *
894S_skipspace2(pTHX_ register char *s, SV **svp)
895{
c35e046a
AL
896 char *start;
897 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
898 const I32 startoff = s - SvPVX(PL_linestr);
899
29595ff2
NC
900 s = skipspace(s);
901 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
902 if (!PL_madskills || !svp)
903 return s;
904 start = SvPVX(PL_linestr) + startoff;
cd81e915 905 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 906 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
907 PL_thistoken = newSVpvn(tstart, start - tstart);
908 PL_realtokenstart = -1;
29595ff2 909 }
cd81e915 910 if (PL_skipwhite) {
29595ff2 911 if (!*svp)
6b29d1f5 912 *svp = newSVpvs("");
cd81e915
NC
913 sv_setsv(*svp, PL_skipwhite);
914 sv_free(PL_skipwhite);
915 PL_skipwhite = 0;
29595ff2
NC
916 }
917
918 return s;
919}
920#endif
921
80a702cd 922STATIC void
5fa550fb 923S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
924{
925 AV *av = CopFILEAVx(PL_curcop);
926 if (av) {
b9f83d2f 927 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
928 if (orig_sv)
929 sv_setsv(sv, orig_sv);
930 else
931 sv_setpvn(sv, buf, len);
80a702cd
RGS
932 (void)SvIOK_on(sv);
933 SvIV_set(sv, 0);
934 av_store(av, (I32)CopLINE(PL_curcop), sv);
935 }
936}
937
ffb4593c
NT
938/*
939 * S_skipspace
940 * Called to gobble the appropriate amount and type of whitespace.
941 * Skips comments as well.
942 */
943
76e3520e 944STATIC char *
cea2e8a9 945S_skipspace(pTHX_ register char *s)
a687059c 946{
97aff369 947 dVAR;
5db06880
NC
948#ifdef PERL_MAD
949 int curoff;
950 int startoff = s - SvPVX(PL_linestr);
951
cd81e915
NC
952 if (PL_skipwhite) {
953 sv_free(PL_skipwhite);
954 PL_skipwhite = 0;
5db06880
NC
955 }
956#endif
957
3280af22 958 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 959 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 960 s++;
5db06880
NC
961#ifdef PERL_MAD
962 goto done;
963#else
463ee0b2 964 return s;
5db06880 965#endif
463ee0b2
LW
966 }
967 for (;;) {
fd049845 968 STRLEN prevlen;
09bef843 969 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 970 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
971 while (s < PL_bufend && isSPACE(*s)) {
972 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
973 incline(s);
974 }
ffb4593c
NT
975
976 /* comment */
3280af22
NIS
977 if (s < PL_bufend && *s == '#') {
978 while (s < PL_bufend && *s != '\n')
463ee0b2 979 s++;
60e6418e 980 if (s < PL_bufend) {
463ee0b2 981 s++;
60e6418e
GS
982 if (PL_in_eval && !PL_rsfp) {
983 incline(s);
984 continue;
985 }
986 }
463ee0b2 987 }
ffb4593c
NT
988
989 /* only continue to recharge the buffer if we're at the end
990 * of the buffer, we're not reading from a source filter, and
991 * we're in normal lexing mode
992 */
09bef843
SB
993 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
994 PL_lex_state == LEX_FORMLINE)
5db06880
NC
995#ifdef PERL_MAD
996 goto done;
997#else
463ee0b2 998 return s;
5db06880 999#endif
ffb4593c
NT
1000
1001 /* try to recharge the buffer */
5db06880
NC
1002#ifdef PERL_MAD
1003 curoff = s - SvPVX(PL_linestr);
1004#endif
1005
9cbb5ea2 1006 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1007 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1008 {
5db06880
NC
1009#ifdef PERL_MAD
1010 if (PL_madskills && curoff != startoff) {
cd81e915 1011 if (!PL_skipwhite)
6b29d1f5 1012 PL_skipwhite = newSVpvs("");
cd81e915 1013 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1014 curoff - startoff);
1015 }
1016
1017 /* mustn't throw out old stuff yet if madpropping */
1018 SvCUR(PL_linestr) = curoff;
1019 s = SvPVX(PL_linestr) + curoff;
1020 *s = 0;
1021 if (curoff && s[-1] == '\n')
1022 s[-1] = ' ';
1023#endif
1024
9cbb5ea2 1025 /* end of file. Add on the -p or -n magic */
cd81e915 1026 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1027 if (PL_minus_p) {
5db06880 1028#ifdef PERL_MAD
6502358f 1029 sv_catpvs(PL_linestr,
5db06880
NC
1030 ";}continue{print or die qq(-p destination: $!\\n);}");
1031#else
6502358f 1032 sv_setpvs(PL_linestr,
01a19ab0 1033 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1034#endif
3280af22 1035 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1036 }
01a19ab0 1037 else if (PL_minus_n) {
5db06880
NC
1038#ifdef PERL_MAD
1039 sv_catpvn(PL_linestr, ";}", 2);
1040#else
01a19ab0 1041 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1042#endif
01a19ab0
NC
1043 PL_minus_n = 0;
1044 }
a0d0e21e 1045 else
5db06880
NC
1046#ifdef PERL_MAD
1047 sv_catpvn(PL_linestr,";", 1);
1048#else
4147a61b 1049 sv_setpvn(PL_linestr,";", 1);
5db06880 1050#endif
ffb4593c
NT
1051
1052 /* reset variables for next time we lex */
9cbb5ea2 1053 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1054 = SvPVX(PL_linestr)
1055#ifdef PERL_MAD
1056 + curoff
1057#endif
1058 ;
3280af22 1059 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1060 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1061
1062 /* Close the filehandle. Could be from -P preprocessor,
1063 * STDIN, or a regular file. If we were reading code from
1064 * STDIN (because the commandline held no -e or filename)
1065 * then we don't close it, we reset it so the code can
1066 * read from STDIN too.
1067 */
1068
3280af22
NIS
1069 if (PL_preprocess && !PL_in_eval)
1070 (void)PerlProc_pclose(PL_rsfp);
1071 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1072 PerlIO_clearerr(PL_rsfp);
8990e307 1073 else
3280af22 1074 (void)PerlIO_close(PL_rsfp);
4608196e 1075 PL_rsfp = NULL;
463ee0b2
LW
1076 return s;
1077 }
ffb4593c
NT
1078
1079 /* not at end of file, so we only read another line */
09bef843
SB
1080 /* make corresponding updates to old pointers, for yyerror() */
1081 oldprevlen = PL_oldbufptr - PL_bufend;
1082 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1083 if (PL_last_uni)
1084 oldunilen = PL_last_uni - PL_bufend;
1085 if (PL_last_lop)
1086 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1087 PL_linestart = PL_bufptr = s + prevlen;
1088 PL_bufend = s + SvCUR(PL_linestr);
1089 s = PL_bufptr;
09bef843
SB
1090 PL_oldbufptr = s + oldprevlen;
1091 PL_oldoldbufptr = s + oldoldprevlen;
1092 if (PL_last_uni)
1093 PL_last_uni = s + oldunilen;
1094 if (PL_last_lop)
1095 PL_last_lop = s + oldloplen;
a0d0e21e 1096 incline(s);
ffb4593c
NT
1097
1098 /* debugger active and we're not compiling the debugger code,
1099 * so store the line into the debugger's array of lines
1100 */
80a702cd 1101 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1102 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1103 }
5db06880
NC
1104
1105#ifdef PERL_MAD
1106 done:
1107 if (PL_madskills) {
cd81e915 1108 if (!PL_skipwhite)
6b29d1f5 1109 PL_skipwhite = newSVpvs("");
5db06880
NC
1110 curoff = s - SvPVX(PL_linestr);
1111 if (curoff - startoff)
cd81e915 1112 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1113 curoff - startoff);
1114 }
1115 return s;
1116#endif
a687059c 1117}
378cc40b 1118
ffb4593c
NT
1119/*
1120 * S_check_uni
1121 * Check the unary operators to ensure there's no ambiguity in how they're
1122 * used. An ambiguous piece of code would be:
1123 * rand + 5
1124 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1125 * the +5 is its argument.
1126 */
1127
76e3520e 1128STATIC void
cea2e8a9 1129S_check_uni(pTHX)
ba106d47 1130{
97aff369 1131 dVAR;
d4c19fe8
AL
1132 const char *s;
1133 const char *t;
2f3197b3 1134
3280af22 1135 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1136 return;
3280af22
NIS
1137 while (isSPACE(*PL_last_uni))
1138 PL_last_uni++;
c35e046a
AL
1139 s = PL_last_uni;
1140 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1141 s++;
3280af22 1142 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1143 return;
6136c704 1144
0453d815 1145 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1146 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1147 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1148 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1149 }
2f3197b3
LW
1150}
1151
ffb4593c
NT
1152/*
1153 * LOP : macro to build a list operator. Its behaviour has been replaced
1154 * with a subroutine, S_lop() for which LOP is just another name.
1155 */
1156
a0d0e21e
LW
1157#define LOP(f,x) return lop(f,x,s)
1158
ffb4593c
NT
1159/*
1160 * S_lop
1161 * Build a list operator (or something that might be one). The rules:
1162 * - if we have a next token, then it's a list operator [why?]
1163 * - if the next thing is an opening paren, then it's a function
1164 * - else it's a list operator
1165 */
1166
76e3520e 1167STATIC I32
a0be28da 1168S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1169{
97aff369 1170 dVAR;
79072805 1171 yylval.ival = f;
35c8bce7 1172 CLINE;
3280af22
NIS
1173 PL_expect = x;
1174 PL_bufptr = s;
1175 PL_last_lop = PL_oldbufptr;
eb160463 1176 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1177#ifdef PERL_MAD
1178 if (PL_lasttoke)
1179 return REPORT(LSTOP);
1180#else
3280af22 1181 if (PL_nexttoke)
bbf60fe6 1182 return REPORT(LSTOP);
5db06880 1183#endif
79072805 1184 if (*s == '(')
bbf60fe6 1185 return REPORT(FUNC);
29595ff2 1186 s = PEEKSPACE(s);
79072805 1187 if (*s == '(')
bbf60fe6 1188 return REPORT(FUNC);
79072805 1189 else
bbf60fe6 1190 return REPORT(LSTOP);
79072805
LW
1191}
1192
5db06880
NC
1193#ifdef PERL_MAD
1194 /*
1195 * S_start_force
1196 * Sets up for an eventual force_next(). start_force(0) basically does
1197 * an unshift, while start_force(-1) does a push. yylex removes items
1198 * on the "pop" end.
1199 */
1200
1201STATIC void
1202S_start_force(pTHX_ int where)
1203{
1204 int i;
1205
cd81e915 1206 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1207 where = PL_lasttoke;
cd81e915
NC
1208 assert(PL_curforce < 0 || PL_curforce == where);
1209 if (PL_curforce != where) {
5db06880
NC
1210 for (i = PL_lasttoke; i > where; --i) {
1211 PL_nexttoke[i] = PL_nexttoke[i-1];
1212 }
1213 PL_lasttoke++;
1214 }
cd81e915 1215 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1216 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1217 PL_curforce = where;
1218 if (PL_nextwhite) {
5db06880 1219 if (PL_madskills)
6b29d1f5 1220 curmad('^', newSVpvs(""));
cd81e915 1221 CURMAD('_', PL_nextwhite);
5db06880
NC
1222 }
1223}
1224
1225STATIC void
1226S_curmad(pTHX_ char slot, SV *sv)
1227{
1228 MADPROP **where;
1229
1230 if (!sv)
1231 return;
cd81e915
NC
1232 if (PL_curforce < 0)
1233 where = &PL_thismad;
5db06880 1234 else
cd81e915 1235 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1236
cd81e915 1237 if (PL_faketokens)
5db06880
NC
1238 sv_setpvn(sv, "", 0);
1239 else {
1240 if (!IN_BYTES) {
1241 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1242 SvUTF8_on(sv);
1243 else if (PL_encoding) {
1244 sv_recode_to_utf8(sv, PL_encoding);
1245 }
1246 }
1247 }
1248
1249 /* keep a slot open for the head of the list? */
1250 if (slot != '_' && *where && (*where)->mad_key == '^') {
1251 (*where)->mad_key = slot;
1252 sv_free((*where)->mad_val);
1253 (*where)->mad_val = (void*)sv;
1254 }
1255 else
1256 addmad(newMADsv(slot, sv), where, 0);
1257}
1258#else
b3f24c00
MHM
1259# define start_force(where) NOOP
1260# define curmad(slot, sv) NOOP
5db06880
NC
1261#endif
1262
ffb4593c
NT
1263/*
1264 * S_force_next
9cbb5ea2 1265 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1266 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1267 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1268 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1269 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1270 */
1271
4e553d73 1272STATIC void
cea2e8a9 1273S_force_next(pTHX_ I32 type)
79072805 1274{
97aff369 1275 dVAR;
5db06880 1276#ifdef PERL_MAD
cd81e915 1277 if (PL_curforce < 0)
5db06880 1278 start_force(PL_lasttoke);
cd81e915 1279 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1280 if (PL_lex_state != LEX_KNOWNEXT)
1281 PL_lex_defer = PL_lex_state;
1282 PL_lex_state = LEX_KNOWNEXT;
1283 PL_lex_expect = PL_expect;
cd81e915 1284 PL_curforce = -1;
5db06880 1285#else
3280af22
NIS
1286 PL_nexttype[PL_nexttoke] = type;
1287 PL_nexttoke++;
1288 if (PL_lex_state != LEX_KNOWNEXT) {
1289 PL_lex_defer = PL_lex_state;
1290 PL_lex_expect = PL_expect;
1291 PL_lex_state = LEX_KNOWNEXT;
79072805 1292 }
5db06880 1293#endif
79072805
LW
1294}
1295
d0a148a6
NC
1296STATIC SV *
1297S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1298{
97aff369 1299 dVAR;
9d4ba2ae 1300 SV * const sv = newSVpvn(start,len);
bfed75c6 1301 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1302 SvUTF8_on(sv);
1303 return sv;
1304}
1305
ffb4593c
NT
1306/*
1307 * S_force_word
1308 * When the lexer knows the next thing is a word (for instance, it has
1309 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1310 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1311 * lookahead.
ffb4593c
NT
1312 *
1313 * Arguments:
b1b65b59 1314 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1315 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1316 * int check_keyword : if true, Perl checks to make sure the word isn't
1317 * a keyword (do this if the word is a label, e.g. goto FOO)
1318 * int allow_pack : if true, : characters will also be allowed (require,
1319 * use, etc. do this)
9cbb5ea2 1320 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1321 */
1322
76e3520e 1323STATIC char *
cea2e8a9 1324S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1325{
97aff369 1326 dVAR;
463ee0b2
LW
1327 register char *s;
1328 STRLEN len;
4e553d73 1329
29595ff2 1330 start = SKIPSPACE1(start);
463ee0b2 1331 s = start;
7e2040f0 1332 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1333 (allow_pack && *s == ':') ||
15f0808c 1334 (allow_initial_tick && *s == '\'') )
a0d0e21e 1335 {
3280af22 1336 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1337 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1338 return start;
cd81e915 1339 start_force(PL_curforce);
5db06880
NC
1340 if (PL_madskills)
1341 curmad('X', newSVpvn(start,s-start));
463ee0b2 1342 if (token == METHOD) {
29595ff2 1343 s = SKIPSPACE1(s);
463ee0b2 1344 if (*s == '(')
3280af22 1345 PL_expect = XTERM;
463ee0b2 1346 else {
3280af22 1347 PL_expect = XOPERATOR;
463ee0b2 1348 }
79072805 1349 }
9ded7720 1350 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1351 = (OP*)newSVOP(OP_CONST,0,
1352 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1353 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1354 force_next(token);
1355 }
1356 return s;
1357}
1358
ffb4593c
NT
1359/*
1360 * S_force_ident
9cbb5ea2 1361 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1362 * text only contains the "foo" portion. The first argument is a pointer
1363 * to the "foo", and the second argument is the type symbol to prefix.
1364 * Forces the next token to be a "WORD".
9cbb5ea2 1365 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1366 */
1367
76e3520e 1368STATIC void
bfed75c6 1369S_force_ident(pTHX_ register const char *s, int kind)
79072805 1370{
97aff369 1371 dVAR;
c35e046a 1372 if (*s) {
90e5519e
NC
1373 const STRLEN len = strlen(s);
1374 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1375 start_force(PL_curforce);
9ded7720 1376 NEXTVAL_NEXTTOKE.opval = o;
79072805 1377 force_next(WORD);
748a9306 1378 if (kind) {
11343788 1379 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1380 /* XXX see note in pp_entereval() for why we forgo typo
1381 warnings if the symbol must be introduced in an eval.
1382 GSAR 96-10-12 */
90e5519e
NC
1383 gv_fetchpvn_flags(s, len,
1384 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1385 : GV_ADD,
1386 kind == '$' ? SVt_PV :
1387 kind == '@' ? SVt_PVAV :
1388 kind == '%' ? SVt_PVHV :
a0d0e21e 1389 SVt_PVGV
90e5519e 1390 );
748a9306 1391 }
79072805
LW
1392 }
1393}
1394
1571675a
GS
1395NV
1396Perl_str_to_version(pTHX_ SV *sv)
1397{
1398 NV retval = 0.0;
1399 NV nshift = 1.0;
1400 STRLEN len;
cfd0369c 1401 const char *start = SvPV_const(sv,len);
9d4ba2ae 1402 const char * const end = start + len;
504618e9 1403 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1404 while (start < end) {
ba210ebe 1405 STRLEN skip;
1571675a
GS
1406 UV n;
1407 if (utf)
9041c2e3 1408 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1409 else {
1410 n = *(U8*)start;
1411 skip = 1;
1412 }
1413 retval += ((NV)n)/nshift;
1414 start += skip;
1415 nshift *= 1000;
1416 }
1417 return retval;
1418}
1419
4e553d73 1420/*
ffb4593c
NT
1421 * S_force_version
1422 * Forces the next token to be a version number.
e759cc13
RGS
1423 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1424 * and if "guessing" is TRUE, then no new token is created (and the caller
1425 * must use an alternative parsing method).
ffb4593c
NT
1426 */
1427
76e3520e 1428STATIC char *
e759cc13 1429S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1430{
97aff369 1431 dVAR;
5f66b61c 1432 OP *version = NULL;
44dcb63b 1433 char *d;
5db06880
NC
1434#ifdef PERL_MAD
1435 I32 startoff = s - SvPVX(PL_linestr);
1436#endif
89bfa8cd 1437
29595ff2 1438 s = SKIPSPACE1(s);
89bfa8cd 1439
44dcb63b 1440 d = s;
dd629d5b 1441 if (*d == 'v')
44dcb63b 1442 d++;
44dcb63b 1443 if (isDIGIT(*d)) {
e759cc13
RGS
1444 while (isDIGIT(*d) || *d == '_' || *d == '.')
1445 d++;
5db06880
NC
1446#ifdef PERL_MAD
1447 if (PL_madskills) {
cd81e915 1448 start_force(PL_curforce);
5db06880
NC
1449 curmad('X', newSVpvn(s,d-s));
1450 }
1451#endif
9f3d182e 1452 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1453 SV *ver;
b73d6f50 1454 s = scan_num(s, &yylval);
89bfa8cd 1455 version = yylval.opval;
dd629d5b
GS
1456 ver = cSVOPx(version)->op_sv;
1457 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1458 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1459 SvNV_set(ver, str_to_version(ver));
1571675a 1460 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1461 }
89bfa8cd 1462 }
5db06880
NC
1463 else if (guessing) {
1464#ifdef PERL_MAD
1465 if (PL_madskills) {
cd81e915
NC
1466 sv_free(PL_nextwhite); /* let next token collect whitespace */
1467 PL_nextwhite = 0;
5db06880
NC
1468 s = SvPVX(PL_linestr) + startoff;
1469 }
1470#endif
e759cc13 1471 return s;
5db06880 1472 }
89bfa8cd
PP
1473 }
1474
5db06880
NC
1475#ifdef PERL_MAD
1476 if (PL_madskills && !version) {
cd81e915
NC
1477 sv_free(PL_nextwhite); /* let next token collect whitespace */
1478 PL_nextwhite = 0;
5db06880
NC
1479 s = SvPVX(PL_linestr) + startoff;
1480 }
1481#endif
89bfa8cd 1482 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1483 start_force(PL_curforce);
9ded7720 1484 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1485 force_next(WORD);
89bfa8cd 1486
e759cc13 1487 return s;
89bfa8cd
PP
1488}
1489
ffb4593c
NT
1490/*
1491 * S_tokeq
1492 * Tokenize a quoted string passed in as an SV. It finds the next
1493 * chunk, up to end of string or a backslash. It may make a new
1494 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1495 * turns \\ into \.
1496 */
1497
76e3520e 1498STATIC SV *
cea2e8a9 1499S_tokeq(pTHX_ SV *sv)
79072805 1500{
97aff369 1501 dVAR;
79072805
LW
1502 register char *s;
1503 register char *send;
1504 register char *d;
b3ac6de7
IZ
1505 STRLEN len = 0;
1506 SV *pv = sv;
79072805
LW
1507
1508 if (!SvLEN(sv))
b3ac6de7 1509 goto finish;
79072805 1510
a0d0e21e 1511 s = SvPV_force(sv, len);
21a311ee 1512 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1513 goto finish;
463ee0b2 1514 send = s + len;
79072805
LW
1515 while (s < send && *s != '\\')
1516 s++;
1517 if (s == send)
b3ac6de7 1518 goto finish;
79072805 1519 d = s;
be4731d2 1520 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1521 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1522 if (SvUTF8(sv))
1523 SvUTF8_on(pv);
1524 }
79072805
LW
1525 while (s < send) {
1526 if (*s == '\\') {
a0d0e21e 1527 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1528 s++; /* all that, just for this */
1529 }
1530 *d++ = *s++;
1531 }
1532 *d = '\0';
95a20fc0 1533 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1534 finish:
3280af22 1535 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1536 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1537 return sv;
1538}
1539
ffb4593c
NT
1540/*
1541 * Now come three functions related to double-quote context,
1542 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1543 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1544 * interact with PL_lex_state, and create fake ( ... ) argument lists
1545 * to handle functions and concatenation.
1546 * They assume that whoever calls them will be setting up a fake
1547 * join call, because each subthing puts a ',' after it. This lets
1548 * "lower \luPpEr"
1549 * become
1550 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1551 *
1552 * (I'm not sure whether the spurious commas at the end of lcfirst's
1553 * arguments and join's arguments are created or not).
1554 */
1555
1556/*
1557 * S_sublex_start
1558 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1559 *
1560 * Pattern matching will set PL_lex_op to the pattern-matching op to
1561 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1562 *
1563 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1564 *
1565 * Everything else becomes a FUNC.
1566 *
1567 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1568 * had an OP_CONST or OP_READLINE). This just sets us up for a
1569 * call to S_sublex_push().
1570 */
1571
76e3520e 1572STATIC I32
cea2e8a9 1573S_sublex_start(pTHX)
79072805 1574{
97aff369 1575 dVAR;
0d46e09a 1576 register const I32 op_type = yylval.ival;
79072805
LW
1577
1578 if (op_type == OP_NULL) {
3280af22 1579 yylval.opval = PL_lex_op;
5f66b61c 1580 PL_lex_op = NULL;
79072805
LW
1581 return THING;
1582 }
1583 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1584 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1585
1586 if (SvTYPE(sv) == SVt_PVIV) {
1587 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1588 STRLEN len;
96a5add6 1589 const char * const p = SvPV_const(sv, len);
f54cb97a 1590 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1591 if (SvUTF8(sv))
1592 SvUTF8_on(nsv);
b3ac6de7
IZ
1593 SvREFCNT_dec(sv);
1594 sv = nsv;
4e553d73 1595 }
b3ac6de7 1596 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1597 PL_lex_stuff = NULL;
6f33ba73
RGS
1598 /* Allow <FH> // "foo" */
1599 if (op_type == OP_READLINE)
1600 PL_expect = XTERMORDORDOR;
79072805
LW
1601 return THING;
1602 }
e3f73d4e
RGS
1603 else if (op_type == OP_BACKTICK && PL_lex_op) {
1604 /* readpipe() vas overriden */
1605 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1606 yylval.opval = PL_lex_op;
9b201d7d 1607 PL_lex_op = NULL;
e3f73d4e
RGS
1608 PL_lex_stuff = NULL;
1609 return THING;
1610 }
79072805 1611
3280af22
NIS
1612 PL_sublex_info.super_state = PL_lex_state;
1613 PL_sublex_info.sub_inwhat = op_type;
1614 PL_sublex_info.sub_op = PL_lex_op;
1615 PL_lex_state = LEX_INTERPPUSH;
55497cff 1616
3280af22
NIS
1617 PL_expect = XTERM;
1618 if (PL_lex_op) {
1619 yylval.opval = PL_lex_op;
5f66b61c 1620 PL_lex_op = NULL;
55497cff
PP
1621 return PMFUNC;
1622 }
1623 else
1624 return FUNC;
1625}
1626
ffb4593c
NT
1627/*
1628 * S_sublex_push
1629 * Create a new scope to save the lexing state. The scope will be
1630 * ended in S_sublex_done. Returns a '(', starting the function arguments
1631 * to the uc, lc, etc. found before.
1632 * Sets PL_lex_state to LEX_INTERPCONCAT.
1633 */
1634
76e3520e 1635STATIC I32
cea2e8a9 1636S_sublex_push(pTHX)
55497cff 1637{
27da23d5 1638 dVAR;
f46d017c 1639 ENTER;
55497cff 1640
3280af22
NIS
1641 PL_lex_state = PL_sublex_info.super_state;
1642 SAVEI32(PL_lex_dojoin);
1643 SAVEI32(PL_lex_brackets);
3280af22
NIS
1644 SAVEI32(PL_lex_casemods);
1645 SAVEI32(PL_lex_starts);
1646 SAVEI32(PL_lex_state);
7766f137 1647 SAVEVPTR(PL_lex_inpat);
3280af22 1648 SAVEI32(PL_lex_inwhat);
57843af0 1649 SAVECOPLINE(PL_curcop);
3280af22 1650 SAVEPPTR(PL_bufptr);
8452ff4b 1651 SAVEPPTR(PL_bufend);
3280af22
NIS
1652 SAVEPPTR(PL_oldbufptr);
1653 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1654 SAVEPPTR(PL_last_lop);
1655 SAVEPPTR(PL_last_uni);
3280af22
NIS
1656 SAVEPPTR(PL_linestart);
1657 SAVESPTR(PL_linestr);
8edd5f42
RGS
1658 SAVEGENERICPV(PL_lex_brackstack);
1659 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1660
1661 PL_linestr = PL_lex_stuff;
a0714e2c 1662 PL_lex_stuff = NULL;
3280af22 1663
9cbb5ea2
GS
1664 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1665 = SvPVX(PL_linestr);
3280af22 1666 PL_bufend += SvCUR(PL_linestr);
bd61b366 1667 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1668 SAVEFREESV(PL_linestr);
1669
1670 PL_lex_dojoin = FALSE;
1671 PL_lex_brackets = 0;
a02a5408
JC
1672 Newx(PL_lex_brackstack, 120, char);
1673 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1674 PL_lex_casemods = 0;
1675 *PL_lex_casestack = '\0';
1676 PL_lex_starts = 0;
1677 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1678 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1679
1680 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1681 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1682 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1683 else
5f66b61c 1684 PL_lex_inpat = NULL;
79072805 1685
55497cff 1686 return '(';
79072805
LW
1687}
1688
ffb4593c
NT
1689/*
1690 * S_sublex_done
1691 * Restores lexer state after a S_sublex_push.
1692 */
1693
76e3520e 1694STATIC I32
cea2e8a9 1695S_sublex_done(pTHX)
79072805 1696{
27da23d5 1697 dVAR;
3280af22 1698 if (!PL_lex_starts++) {
396482e1 1699 SV * const sv = newSVpvs("");
9aa983d2
JH
1700 if (SvUTF8(PL_linestr))
1701 SvUTF8_on(sv);
3280af22 1702 PL_expect = XOPERATOR;
9aa983d2 1703 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1704 return THING;
1705 }
1706
3280af22
NIS
1707 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1708 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1709 return yylex();
79072805
LW
1710 }
1711
ffb4593c 1712 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1713 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1714 PL_linestr = PL_lex_repl;
1715 PL_lex_inpat = 0;
1716 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1717 PL_bufend += SvCUR(PL_linestr);
bd61b366 1718 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1719 SAVEFREESV(PL_linestr);
1720 PL_lex_dojoin = FALSE;
1721 PL_lex_brackets = 0;
3280af22
NIS
1722 PL_lex_casemods = 0;
1723 *PL_lex_casestack = '\0';
1724 PL_lex_starts = 0;
25da4f38 1725 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1726 PL_lex_state = LEX_INTERPNORMAL;
1727 PL_lex_starts++;
e9fa98b2
HS
1728 /* we don't clear PL_lex_repl here, so that we can check later
1729 whether this is an evalled subst; that means we rely on the
1730 logic to ensure sublex_done() is called again only via the
1731 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1732 }
e9fa98b2 1733 else {
3280af22 1734 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1735 PL_lex_repl = NULL;
e9fa98b2 1736 }
79072805 1737 return ',';
ffed7fef
LW
1738 }
1739 else {
5db06880
NC
1740#ifdef PERL_MAD
1741 if (PL_madskills) {
cd81e915
NC
1742 if (PL_thiswhite) {
1743 if (!PL_endwhite)
6b29d1f5 1744 PL_endwhite = newSVpvs("");
cd81e915
NC
1745 sv_catsv(PL_endwhite, PL_thiswhite);
1746 PL_thiswhite = 0;
1747 }
1748 if (PL_thistoken)
1749 sv_setpvn(PL_thistoken,"",0);
5db06880 1750 else
cd81e915 1751 PL_realtokenstart = -1;
5db06880
NC
1752 }
1753#endif
f46d017c 1754 LEAVE;
3280af22
NIS
1755 PL_bufend = SvPVX(PL_linestr);
1756 PL_bufend += SvCUR(PL_linestr);
1757 PL_expect = XOPERATOR;
09bef843 1758 PL_sublex_info.sub_inwhat = 0;
79072805 1759 return ')';
ffed7fef
LW
1760 }
1761}
1762
02aa26ce
NT
1763/*
1764 scan_const
1765
1766 Extracts a pattern, double-quoted string, or transliteration. This
1767 is terrifying code.
1768
94def140 1769 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1770 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1771 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1772
94def140
ST
1773 Returns a pointer to the character scanned up to. If this is
1774 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1775 successfully parsed), will leave an OP for the substring scanned
1776 in yylval. Caller must intuit reason for not parsing further
1777 by looking at the next characters herself.
1778
02aa26ce
NT
1779 In patterns:
1780 backslashes:
1781 double-quoted style: \r and \n
1782 regexp special ones: \D \s
94def140
ST
1783 constants: \x31
1784 backrefs: \1
02aa26ce
NT
1785 case and quoting: \U \Q \E
1786 stops on @ and $, but not for $ as tail anchor
1787
1788 In transliterations:
1789 characters are VERY literal, except for - not at the start or end
94def140
ST
1790 of the string, which indicates a range. If the range is in bytes,
1791 scan_const expands the range to the full set of intermediate
1792 characters. If the range is in utf8, the hyphen is replaced with
1793 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1794
1795 In double-quoted strings:
1796 backslashes:
1797 double-quoted style: \r and \n
94def140
ST
1798 constants: \x31
1799 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1800 case and quoting: \U \Q \E
1801 stops on @ and $
1802
1803 scan_const does *not* construct ops to handle interpolated strings.
1804 It stops processing as soon as it finds an embedded $ or @ variable
1805 and leaves it to the caller to work out what's going on.
1806
94def140
ST
1807 embedded arrays (whether in pattern or not) could be:
1808 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1809
1810 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1811
1812 $ in pattern could be $foo or could be tail anchor. Assumption:
1813 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1814 followed by one of "()| \r\n\t"
02aa26ce
NT
1815
1816 \1 (backreferences) are turned into $1
1817
1818 The structure of the code is
1819 while (there's a character to process) {
94def140
ST
1820 handle transliteration ranges
1821 skip regexp comments /(?#comment)/ and codes /(?{code})/
1822 skip #-initiated comments in //x patterns
1823 check for embedded arrays
02aa26ce
NT
1824 check for embedded scalars
1825 if (backslash) {
94def140
ST
1826 leave intact backslashes from leaveit (below)
1827 deprecate \1 in substitution replacements
02aa26ce
NT
1828 handle string-changing backslashes \l \U \Q \E, etc.
1829 switch (what was escaped) {
94def140
ST
1830 handle \- in a transliteration (becomes a literal -)
1831 handle \132 (octal characters)
1832 handle \x15 and \x{1234} (hex characters)
1833 handle \N{name} (named characters)
1834 handle \cV (control characters)
1835 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1836 } (end switch)
1837 } (end if backslash)
1838 } (end while character to read)
4e553d73 1839
02aa26ce
NT
1840*/
1841
76e3520e 1842STATIC char *
cea2e8a9 1843S_scan_const(pTHX_ char *start)
79072805 1844{
97aff369 1845 dVAR;
3280af22 1846 register char *send = PL_bufend; /* end of the constant */
561b68a9 1847 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1848 register char *s = start; /* start of the constant */
1849 register char *d = SvPVX(sv); /* destination for copies */
1850 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1851 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1852 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1853 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1854 UV uv;
4c3a8340
ST
1855#ifdef EBCDIC
1856 UV literal_endpoint = 0;
e294cc5d 1857 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1858#endif
012bcf8d 1859
2b9d42f0
NIS
1860 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1861 /* If we are doing a trans and we know we want UTF8 set expectation */
1862 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1863 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1864 }
1865
1866
79072805 1867 while (s < send || dorange) {
02aa26ce 1868 /* get transliterations out of the way (they're most literal) */
3280af22 1869 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1870 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1871 if (dorange) {
1ba5c669
JH
1872 I32 i; /* current expanded character */
1873 I32 min; /* first character in range */
1874 I32 max; /* last character in range */
02aa26ce 1875
e294cc5d
JH
1876#ifdef EBCDIC
1877 UV uvmax = 0;
1878#endif
1879
1880 if (has_utf8
1881#ifdef EBCDIC
1882 && !native_range
1883#endif
1884 ) {
9d4ba2ae 1885 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1886 char *e = d++;
1887 while (e-- > c)
1888 *(e + 1) = *e;
25716404 1889 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1890 /* mark the range as done, and continue */
1891 dorange = FALSE;
1892 didrange = TRUE;
1893 continue;
1894 }
2b9d42f0 1895
95a20fc0 1896 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1897#ifdef EBCDIC
1898 SvGROW(sv,
1899 SvLEN(sv) + (has_utf8 ?
1900 (512 - UTF_CONTINUATION_MARK +
1901 UNISKIP(0x100))
1902 : 256));
1903 /* How many two-byte within 0..255: 128 in UTF-8,
1904 * 96 in UTF-8-mod. */
1905#else
9cbb5ea2 1906 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1907#endif
9cbb5ea2 1908 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1909#ifdef EBCDIC
1910 if (has_utf8) {
1911 int j;
1912 for (j = 0; j <= 1; j++) {
1913 char * const c = (char*)utf8_hop((U8*)d, -1);
1914 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1915 if (j)
1916 min = (U8)uv;
1917 else if (uv < 256)
1918 max = (U8)uv;
1919 else {
1920 max = (U8)0xff; /* only to \xff */
1921 uvmax = uv; /* \x{100} to uvmax */
1922 }
1923 d = c; /* eat endpoint chars */
1924 }
1925 }
1926 else {
1927#endif
1928 d -= 2; /* eat the first char and the - */
1929 min = (U8)*d; /* first char in range */
1930 max = (U8)d[1]; /* last char in range */
1931#ifdef EBCDIC
1932 }
1933#endif
8ada0baa 1934
c2e66d9e 1935 if (min > max) {
01ec43d0 1936 Perl_croak(aTHX_
d1573ac7 1937 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1938 (char)min, (char)max);
c2e66d9e
GS
1939 }
1940
c7f1f016 1941#ifdef EBCDIC
4c3a8340
ST
1942 if (literal_endpoint == 2 &&
1943 ((isLOWER(min) && isLOWER(max)) ||
1944 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1945 if (isLOWER(min)) {
1946 for (i = min; i <= max; i++)
1947 if (isLOWER(i))
db42d148 1948 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1949 } else {
1950 for (i = min; i <= max; i++)
1951 if (isUPPER(i))
db42d148 1952 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1953 }
1954 }
1955 else
1956#endif
1957 for (i = min; i <= max; i++)
e294cc5d
JH
1958#ifdef EBCDIC
1959 if (has_utf8) {
1960 const U8 ch = (U8)NATIVE_TO_UTF(i);
1961 if (UNI_IS_INVARIANT(ch))
1962 *d++ = (U8)i;
1963 else {
1964 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1965 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1966 }
1967 }
1968 else
1969#endif
1970 *d++ = (char)i;
1971
1972#ifdef EBCDIC
1973 if (uvmax) {
1974 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1975 if (uvmax > 0x101)
1976 *d++ = (char)UTF_TO_NATIVE(0xff);
1977 if (uvmax > 0x100)
1978 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1979 }
1980#endif
02aa26ce
NT
1981
1982 /* mark the range as done, and continue */
79072805 1983 dorange = FALSE;
01ec43d0 1984 didrange = TRUE;
4c3a8340
ST
1985#ifdef EBCDIC
1986 literal_endpoint = 0;
1987#endif
79072805 1988 continue;
4e553d73 1989 }
02aa26ce
NT
1990
1991 /* range begins (ignore - as first or last char) */
79072805 1992 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1993 if (didrange) {
1fafa243 1994 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1995 }
e294cc5d
JH
1996 if (has_utf8
1997#ifdef EBCDIC
1998 && !native_range
1999#endif
2000 ) {
25716404 2001 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2002 s++;
2003 continue;
2004 }
79072805
LW
2005 dorange = TRUE;
2006 s++;
01ec43d0
GS
2007 }
2008 else {
2009 didrange = FALSE;
4c3a8340
ST
2010#ifdef EBCDIC
2011 literal_endpoint = 0;
e294cc5d 2012 native_range = TRUE;
4c3a8340 2013#endif
01ec43d0 2014 }
79072805 2015 }
02aa26ce
NT
2016
2017 /* if we get here, we're not doing a transliteration */
2018
0f5d15d6
IZ
2019 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2020 except for the last char, which will be done separately. */
3280af22 2021 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2022 if (s[2] == '#') {
e994fd66 2023 while (s+1 < send && *s != ')')
db42d148 2024 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2025 }
2026 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2027 || (s[2] == '?' && s[3] == '{'))
155aba94 2028 {
cc6b7395 2029 I32 count = 1;
0f5d15d6 2030 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2031 char c;
2032
d9f97599
GS
2033 while (count && (c = *regparse)) {
2034 if (c == '\\' && regparse[1])
2035 regparse++;
4e553d73 2036 else if (c == '{')
cc6b7395 2037 count++;
4e553d73 2038 else if (c == '}')
cc6b7395 2039 count--;
d9f97599 2040 regparse++;
cc6b7395 2041 }
e994fd66 2042 if (*regparse != ')')
5bdf89e7 2043 regparse--; /* Leave one char for continuation. */
0f5d15d6 2044 while (s < regparse)
db42d148 2045 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2046 }
748a9306 2047 }
02aa26ce
NT
2048
2049 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2050 else if (*s == '#' && PL_lex_inpat &&
2051 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2052 while (s+1 < send && *s != '\n')
db42d148 2053 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2054 }
02aa26ce 2055
5d1d4326 2056 /* check for embedded arrays
da6eedaa 2057 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2058 */
1749ea0d
ST
2059 else if (*s == '@' && s[1]) {
2060 if (isALNUM_lazy_if(s+1,UTF))
2061 break;
2062 if (strchr(":'{$", s[1]))
2063 break;
2064 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2065 break; /* in regexp, neither @+ nor @- are interpolated */
2066 }
02aa26ce
NT
2067
2068 /* check for embedded scalars. only stop if we're sure it's a
2069 variable.
2070 */
79072805 2071 else if (*s == '$') {
3280af22 2072 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2073 break;
6002328a 2074 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2075 break; /* in regexp, $ might be tail anchor */
2076 }
02aa26ce 2077
2b9d42f0
NIS
2078 /* End of else if chain - OP_TRANS rejoin rest */
2079
02aa26ce 2080 /* backslashes */
79072805
LW
2081 if (*s == '\\' && s+1 < send) {
2082 s++;
02aa26ce 2083
02aa26ce 2084 /* deprecate \1 in strings and substitution replacements */
3280af22 2085 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2086 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2087 {
599cee73 2088 if (ckWARN(WARN_SYNTAX))
9014280d 2089 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2090 *--s = '$';
2091 break;
2092 }
02aa26ce
NT
2093
2094 /* string-change backslash escapes */
3280af22 2095 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2096 --s;
2097 break;
2098 }
cc74c5bd
ST
2099 /* skip any other backslash escapes in a pattern */
2100 else if (PL_lex_inpat) {
2101 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2102 goto default_action;
2103 }
02aa26ce
NT
2104
2105 /* if we get here, it's either a quoted -, or a digit */
79072805 2106 switch (*s) {
02aa26ce
NT
2107
2108 /* quoted - in transliterations */
79072805 2109 case '-':
3280af22 2110 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2111 *d++ = *s++;
2112 continue;
2113 }
2114 /* FALL THROUGH */
2115 default:
11b8faa4 2116 {
86f97054 2117 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2118 ckWARN(WARN_MISC))
9014280d 2119 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2120 "Unrecognized escape \\%c passed through",
2121 *s);
11b8faa4 2122 /* default action is to copy the quoted character */
f9a63242 2123 goto default_action;
11b8faa4 2124 }
02aa26ce
NT
2125
2126 /* \132 indicates an octal constant */
79072805
LW
2127 case '0': case '1': case '2': case '3':
2128 case '4': case '5': case '6': case '7':
ba210ebe 2129 {
53305cf1
NC
2130 I32 flags = 0;
2131 STRLEN len = 3;
2132 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2133 s += len;
2134 }
012bcf8d 2135 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2136
2137 /* \x24 indicates a hex constant */
79072805 2138 case 'x':
a0ed51b3
LW
2139 ++s;
2140 if (*s == '{') {
9d4ba2ae 2141 char* const e = strchr(s, '}');
a4c04bdc
NC
2142 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2143 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2144 STRLEN len;
355860ce 2145
53305cf1 2146 ++s;
adaeee49 2147 if (!e) {
a0ed51b3 2148 yyerror("Missing right brace on \\x{}");
355860ce 2149 continue;
ba210ebe 2150 }
53305cf1
NC
2151 len = e - s;
2152 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2153 s = e + 1;
a0ed51b3
LW
2154 }
2155 else {
ba210ebe 2156 {
53305cf1 2157 STRLEN len = 2;
a4c04bdc 2158 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2159 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2160 s += len;
2161 }
012bcf8d
GS
2162 }
2163
2164 NUM_ESCAPE_INSERT:
2165 /* Insert oct or hex escaped character.
301d3d20 2166 * There will always enough room in sv since such
db42d148 2167 * escapes will be longer than any UTF-8 sequence
301d3d20 2168 * they can end up as. */
ba7cea30 2169
c7f1f016
NIS
2170 /* We need to map to chars to ASCII before doing the tests
2171 to cover EBCDIC
2172 */
c4d5f83a 2173 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2174 if (!has_utf8 && uv > 255) {
301d3d20
JH
2175 /* Might need to recode whatever we have
2176 * accumulated so far if it contains any
2177 * hibit chars.
2178 *
2179 * (Can't we keep track of that and avoid
2180 * this rescan? --jhi)
012bcf8d 2181 */
c7f1f016 2182 int hicount = 0;
63cd0674
NIS
2183 U8 *c;
2184 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2185 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2186 hicount++;
db42d148 2187 }
012bcf8d 2188 }
63cd0674 2189 if (hicount) {
9d4ba2ae 2190 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2191 U8 *src, *dst;
2192 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2193 src = (U8 *)d - 1;
2194 dst = src+hicount;
2195 d += hicount;
cfd0369c 2196 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2197 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2198 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2199 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2200 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2201 }
2202 else {
63cd0674 2203 *dst-- = *src;
012bcf8d 2204 }
c7f1f016 2205 src--;
012bcf8d
GS
2206 }
2207 }
2208 }
2209
9aa983d2 2210 if (has_utf8 || uv > 255) {
9041c2e3 2211 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2212 has_utf8 = TRUE;
f9a63242
JH
2213 if (PL_lex_inwhat == OP_TRANS &&
2214 PL_sublex_info.sub_op) {
2215 PL_sublex_info.sub_op->op_private |=
2216 (PL_lex_repl ? OPpTRANS_FROM_UTF
2217 : OPpTRANS_TO_UTF);
f9a63242 2218 }
e294cc5d
JH
2219#ifdef EBCDIC
2220 if (uv > 255 && !dorange)
2221 native_range = FALSE;
2222#endif
012bcf8d 2223 }
a0ed51b3 2224 else {
012bcf8d 2225 *d++ = (char)uv;
a0ed51b3 2226 }
012bcf8d
GS
2227 }
2228 else {
c4d5f83a 2229 *d++ = (char) uv;
a0ed51b3 2230 }
79072805 2231 continue;
02aa26ce 2232
b239daa5 2233 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2234 case 'N':
55eda711 2235 ++s;
423cee85
JH
2236 if (*s == '{') {
2237 char* e = strchr(s, '}');
155aba94 2238 SV *res;
423cee85 2239 STRLEN len;
cfd0369c 2240 const char *str;
fc8cd66c 2241 SV *type;
4e553d73 2242
423cee85 2243 if (!e) {
5777a3f7 2244 yyerror("Missing right brace on \\N{}");
423cee85
JH
2245 e = s - 1;
2246 goto cont_scan;
2247 }
dbc0d4f2
JH
2248 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2249 /* \N{U+...} */
2250 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2251 PERL_SCAN_DISALLOW_PREFIX;
2252 s += 3;
2253 len = e - s;
2254 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2255 if ( e > s && len != (STRLEN)(e - s) ) {
2256 uv = 0xFFFD;
fc8cd66c 2257 }
dbc0d4f2
JH
2258 s = e + 1;
2259 goto NUM_ESCAPE_INSERT;
2260 }
55eda711 2261 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2262 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2263 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2264 res, NULL, SvPVX(type) );
2265 SvREFCNT_dec(type);
f9a63242
JH
2266 if (has_utf8)
2267 sv_utf8_upgrade(res);
cfd0369c 2268 str = SvPV_const(res,len);
1c47067b
JH
2269#ifdef EBCDIC_NEVER_MIND
2270 /* charnames uses pack U and that has been
2271 * recently changed to do the below uni->native
2272 * mapping, so this would be redundant (and wrong,
2273 * the code point would be doubly converted).
2274 * But leave this in just in case the pack U change
2275 * gets revoked, but the semantics is still
2276 * desireable for charnames. --jhi */
cddc7ef4 2277 {
cfd0369c 2278 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2279
2280 if (uv < 0x100) {
89ebb4a3 2281 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2282
2283 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2284 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2285 str = SvPV_const(res, len);
cddc7ef4
JH
2286 }
2287 }
2288#endif
89491803 2289 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2290 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2291 SvCUR_set(sv, d - ostart);
2292 SvPOK_on(sv);
e4f3eed8 2293 *d = '\0';
f08d6ad9 2294 sv_utf8_upgrade(sv);
d2f449dd 2295 /* this just broke our allocation above... */
eb160463 2296 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2297 d = SvPVX(sv) + SvCUR(sv);
89491803 2298 has_utf8 = TRUE;
f08d6ad9 2299 }
eb160463 2300 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2301 const char * const odest = SvPVX_const(sv);
423cee85 2302
8973db79 2303 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2304 d = SvPVX(sv) + (d - odest);
2305 }
e294cc5d
JH
2306#ifdef EBCDIC
2307 if (!dorange)
2308 native_range = FALSE; /* \N{} is guessed to be Unicode */
2309#endif
423cee85
JH
2310 Copy(str, d, len, char);
2311 d += len;
2312 SvREFCNT_dec(res);
2313 cont_scan:
2314 s = e + 1;
2315 }
2316 else
5777a3f7 2317 yyerror("Missing braces on \\N{}");
423cee85
JH
2318 continue;
2319
02aa26ce 2320 /* \c is a control character */
79072805
LW
2321 case 'c':
2322 s++;
961ce445 2323 if (s < send) {
ba210ebe 2324 U8 c = *s++;
c7f1f016
NIS
2325#ifdef EBCDIC
2326 if (isLOWER(c))
2327 c = toUPPER(c);
2328#endif
db42d148 2329 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2330 }
961ce445
RGS
2331 else {
2332 yyerror("Missing control char name in \\c");
2333 }
79072805 2334 continue;
02aa26ce
NT
2335
2336 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2337 case 'b':
db42d148 2338 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2339 break;
2340 case 'n':
db42d148 2341 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2342 break;
2343 case 'r':
db42d148 2344 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2345 break;
2346 case 'f':
db42d148 2347 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2348 break;
2349 case 't':
db42d148 2350 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2351 break;
34a3fe2a 2352 case 'e':
db42d148 2353 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2354 break;
2355 case 'a':
db42d148 2356 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2357 break;
02aa26ce
NT
2358 } /* end switch */
2359
79072805
LW
2360 s++;
2361 continue;
02aa26ce 2362 } /* end if (backslash) */
4c3a8340
ST
2363#ifdef EBCDIC
2364 else
2365 literal_endpoint++;
2366#endif
02aa26ce 2367
f9a63242 2368 default_action:
2b9d42f0
NIS
2369 /* If we started with encoded form, or already know we want it
2370 and then encode the next character */
2371 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2372 STRLEN len = 1;
5f66b61c
AL
2373 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2374 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2375 s += len;
2376 if (need > len) {
2377 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2378 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2379 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2380 }
5f66b61c 2381 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2382 has_utf8 = TRUE;
e294cc5d
JH
2383#ifdef EBCDIC
2384 if (uv > 255 && !dorange)
2385 native_range = FALSE;
2386#endif
2b9d42f0
NIS
2387 }
2388 else {
2389 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2390 }
02aa26ce
NT
2391 } /* while loop to process each character */
2392
2393 /* terminate the string and set up the sv */
79072805 2394 *d = '\0';
95a20fc0 2395 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2396 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2397 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2398
79072805 2399 SvPOK_on(sv);
9f4817db 2400 if (PL_encoding && !has_utf8) {
d0063567
DK
2401 sv_recode_to_utf8(sv, PL_encoding);
2402 if (SvUTF8(sv))
2403 has_utf8 = TRUE;
9f4817db 2404 }
2b9d42f0 2405 if (has_utf8) {
7e2040f0 2406 SvUTF8_on(sv);
2b9d42f0 2407 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2408 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2409 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2410 }
2411 }
79072805 2412
02aa26ce 2413 /* shrink the sv if we allocated more than we used */
79072805 2414 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2415 SvPV_shrink_to_cur(sv);
79072805 2416 }
02aa26ce 2417
9b599b2a 2418 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2419 if (s > PL_bufptr) {
2420 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2421 sv = new_constant(start, s - start,
2422 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2423 sv, NULL,
10edeb5d
JH
2424 (const char *)
2425 (( PL_lex_inwhat == OP_TRANS
2426 ? "tr"
2427 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2428 ? "s"
2429 : "qq"))));
79072805 2430 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2431 } else
8990e307 2432 SvREFCNT_dec(sv);
79072805
LW
2433 return s;
2434}
2435
ffb4593c
NT
2436/* S_intuit_more
2437 * Returns TRUE if there's more to the expression (e.g., a subscript),
2438 * FALSE otherwise.
ffb4593c
NT
2439 *
2440 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2441 *
2442 * ->[ and ->{ return TRUE
2443 * { and [ outside a pattern are always subscripts, so return TRUE
2444 * if we're outside a pattern and it's not { or [, then return FALSE
2445 * if we're in a pattern and the first char is a {
2446 * {4,5} (any digits around the comma) returns FALSE
2447 * if we're in a pattern and the first char is a [
2448 * [] returns FALSE
2449 * [SOMETHING] has a funky algorithm to decide whether it's a
2450 * character class or not. It has to deal with things like
2451 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2452 * anything else returns TRUE
2453 */
2454
9cbb5ea2
GS
2455/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2456
76e3520e 2457STATIC int
cea2e8a9 2458S_intuit_more(pTHX_ register char *s)
79072805 2459{
97aff369 2460 dVAR;
3280af22 2461 if (PL_lex_brackets)
79072805
LW
2462 return TRUE;
2463 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2464 return TRUE;
2465 if (*s != '{' && *s != '[')
2466 return FALSE;
3280af22 2467 if (!PL_lex_inpat)
79072805
LW
2468 return TRUE;
2469
2470 /* In a pattern, so maybe we have {n,m}. */
2471 if (*s == '{') {
2472 s++;
2473 if (!isDIGIT(*s))
2474 return TRUE;
2475 while (isDIGIT(*s))
2476 s++;
2477 if (*s == ',')
2478 s++;
2479 while (isDIGIT(*s))
2480 s++;
2481 if (*s == '}')
2482 return FALSE;
2483 return TRUE;
2484
2485 }
2486
2487 /* On the other hand, maybe we have a character class */
2488
2489 s++;
2490 if (*s == ']' || *s == '^')
2491 return FALSE;
2492 else {
ffb4593c 2493 /* this is terrifying, and it works */
79072805
LW
2494 int weight = 2; /* let's weigh the evidence */
2495 char seen[256];
f27ffc4a 2496 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2497 const char * const send = strchr(s,']');
3280af22 2498 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2499
2500 if (!send) /* has to be an expression */
2501 return TRUE;
2502
2503 Zero(seen,256,char);
2504 if (*s == '$')
2505 weight -= 3;
2506 else if (isDIGIT(*s)) {
2507 if (s[1] != ']') {
2508 if (isDIGIT(s[1]) && s[2] == ']')
2509 weight -= 10;
2510 }
2511 else
2512 weight -= 100;
2513 }
2514 for (; s < send; s++) {
2515 last_un_char = un_char;
2516 un_char = (unsigned char)*s;
2517 switch (*s) {
2518 case '@':
2519 case '&':
2520 case '$':
2521 weight -= seen[un_char] * 10;
7e2040f0 2522 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2523 int len;
8903cb82 2524 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2525 len = (int)strlen(tmpbuf);
2526 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2527 weight -= 100;
2528 else
2529 weight -= 10;
2530 }
2531 else if (*s == '$' && s[1] &&
93a17b20
LW
2532 strchr("[#!%*<>()-=",s[1])) {
2533 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2534 weight -= 10;
2535 else
2536 weight -= 1;
2537 }
2538 break;
2539 case '\\':
2540 un_char = 254;
2541 if (s[1]) {
93a17b20 2542 if (strchr("wds]",s[1]))
79072805 2543 weight += 100;
10edeb5d 2544 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2545 weight += 1;
93a17b20 2546 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2547 weight += 40;
2548 else if (isDIGIT(s[1])) {
2549 weight += 40;
2550 while (s[1] && isDIGIT(s[1]))
2551 s++;
2552 }
2553 }
2554 else
2555 weight += 100;
2556 break;
2557 case '-':
2558 if (s[1] == '\\')
2559 weight += 50;
93a17b20 2560 if (strchr("aA01! ",last_un_char))
79072805 2561 weight += 30;
93a17b20 2562 if (strchr("zZ79~",s[1]))
79072805 2563 weight += 30;
f27ffc4a
GS
2564 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2565 weight -= 5; /* cope with negative subscript */
79072805
LW
2566 break;
2567 default:
3792a11b
NC
2568 if (!isALNUM(last_un_char)
2569 && !(last_un_char == '$' || last_un_char == '@'
2570 || last_un_char == '&')
2571 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2572 char *d = tmpbuf;
2573 while (isALPHA(*s))
2574 *d++ = *s++;
2575 *d = '\0';
5458a98a 2576 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2577 weight -= 150;
2578 }
2579 if (un_char == last_un_char + 1)
2580 weight += 5;
2581 weight -= seen[un_char];
2582 break;
2583 }
2584 seen[un_char]++;
2585 }
2586 if (weight >= 0) /* probably a character class */
2587 return FALSE;
2588 }
2589
2590 return TRUE;
2591}
ffed7fef 2592
ffb4593c
NT
2593/*
2594 * S_intuit_method
2595 *
2596 * Does all the checking to disambiguate
2597 * foo bar
2598 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2599 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2600 *
2601 * First argument is the stuff after the first token, e.g. "bar".
2602 *
2603 * Not a method if bar is a filehandle.
2604 * Not a method if foo is a subroutine prototyped to take a filehandle.
2605 * Not a method if it's really "Foo $bar"
2606 * Method if it's "foo $bar"
2607 * Not a method if it's really "print foo $bar"
2608 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2609 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2610 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2611 * =>
2612 */
2613
76e3520e 2614STATIC int
62d55b22 2615S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2616{
97aff369 2617 dVAR;
a0d0e21e 2618 char *s = start + (*start == '$');
3280af22 2619 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2620 STRLEN len;
2621 GV* indirgv;
5db06880
NC
2622#ifdef PERL_MAD
2623 int soff;
2624#endif
a0d0e21e
LW
2625
2626 if (gv) {
62d55b22 2627 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2628 return 0;
62d55b22
NC
2629 if (cv) {
2630 if (SvPOK(cv)) {
2631 const char *proto = SvPVX_const(cv);
2632 if (proto) {
2633 if (*proto == ';')
2634 proto++;
2635 if (*proto == '*')
2636 return 0;
2637 }
b6c543e3
IZ
2638 }
2639 } else
c35e046a 2640 gv = NULL;
a0d0e21e 2641 }
8903cb82 2642 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2643 /* start is the beginning of the possible filehandle/object,
2644 * and s is the end of it
2645 * tmpbuf is a copy of it
2646 */
2647
a0d0e21e 2648 if (*start == '$') {
3280af22 2649 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2650 return 0;
5db06880
NC
2651#ifdef PERL_MAD
2652 len = start - SvPVX(PL_linestr);
2653#endif
29595ff2 2654 s = PEEKSPACE(s);
f0092767 2655#ifdef PERL_MAD
5db06880
NC
2656 start = SvPVX(PL_linestr) + len;
2657#endif
3280af22
NIS
2658 PL_bufptr = start;
2659 PL_expect = XREF;
a0d0e21e
LW
2660 return *s == '(' ? FUNCMETH : METHOD;
2661 }
5458a98a 2662 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2663 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2664 len -= 2;
2665 tmpbuf[len] = '\0';
5db06880
NC
2666#ifdef PERL_MAD
2667 soff = s - SvPVX(PL_linestr);
2668#endif
c3e0f903
GS
2669 goto bare_package;
2670 }
90e5519e 2671 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2672 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2673 return 0;
2674 /* filehandle or package name makes it a method */
da51bb9b 2675 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2676#ifdef PERL_MAD
2677 soff = s - SvPVX(PL_linestr);
2678#endif
29595ff2 2679 s = PEEKSPACE(s);
3280af22 2680 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2681 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2682 bare_package:
cd81e915 2683 start_force(PL_curforce);
9ded7720 2684 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2685 newSVpvn(tmpbuf,len));
9ded7720 2686 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2687 if (PL_madskills)
2688 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2689 PL_expect = XTERM;
a0d0e21e 2690 force_next(WORD);
3280af22 2691 PL_bufptr = s;
5db06880
NC
2692#ifdef PERL_MAD
2693 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2694#endif
a0d0e21e
LW
2695 return *s == '(' ? FUNCMETH : METHOD;
2696 }
2697 }
2698 return 0;
2699}
2700
ffb4593c
NT
2701/*
2702 * S_incl_perldb
2703 * Return a string of Perl code to load the debugger. If PERL5DB
2704 * is set, it will return the contents of that, otherwise a
2705 * compile-time require of perl5db.pl.
2706 */
2707
bfed75c6 2708STATIC const char*
cea2e8a9 2709S_incl_perldb(pTHX)
a0d0e21e 2710{
97aff369 2711 dVAR;
3280af22 2712 if (PL_perldb) {
9d4ba2ae 2713 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2714
2715 if (pdb)
2716 return pdb;
93189314 2717 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2718 return "BEGIN { require 'perl5db.pl' }";
2719 }
2720 return "";
2721}
2722
2723
16d20bd9 2724/* Encoded script support. filter_add() effectively inserts a
4e553d73 2725 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2726 * Note that the filter function only applies to the current source file
2727 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2728 *
2729 * The datasv parameter (which may be NULL) can be used to pass
2730 * private data to this instance of the filter. The filter function
2731 * can recover the SV using the FILTER_DATA macro and use it to
2732 * store private buffers and state information.
2733 *
2734 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2735 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2736 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2737 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2738 * private use must be set using malloc'd pointers.
2739 */
16d20bd9
AD
2740
2741SV *
864dbfa3 2742Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2743{
97aff369 2744 dVAR;
f4c556ac 2745 if (!funcp)
a0714e2c 2746 return NULL;
f4c556ac 2747
3280af22
NIS
2748 if (!PL_rsfp_filters)
2749 PL_rsfp_filters = newAV();
16d20bd9 2750 if (!datasv)
561b68a9 2751 datasv = newSV(0);
862a34c6 2752 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2753 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2754 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2755 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2756 FPTR2DPTR(void *, IoANY(datasv)),
2757 SvPV_nolen(datasv)));
3280af22
NIS
2758 av_unshift(PL_rsfp_filters, 1);
2759 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2760 return(datasv);
2761}
4e553d73 2762
16d20bd9
AD
2763
2764/* Delete most recently added instance of this filter function. */
a0d0e21e 2765void
864dbfa3 2766Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2767{
97aff369 2768 dVAR;
e0c19803 2769 SV *datasv;
24801a4b 2770
33073adb 2771#ifdef DEBUGGING
55662e27
JH
2772 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2773 FPTR2DPTR(void*, funcp)));
33073adb 2774#endif
3280af22 2775 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2776 return;
2777 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2778 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2779 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2780 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2781 IoANY(datasv) = (void *)NULL;
3280af22 2782 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2783
16d20bd9
AD
2784 return;
2785 }
2786 /* we need to search for the correct entry and clear it */
cea2e8a9 2787 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2788}
2789
2790
1de9afcd
RGS
2791/* Invoke the idxth filter function for the current rsfp. */
2792/* maxlen 0 = read one text line */
16d20bd9 2793I32
864dbfa3 2794Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2795{
97aff369 2796 dVAR;
16d20bd9
AD
2797 filter_t funcp;
2798 SV *datasv = NULL;
f482118e
NC
2799 /* This API is bad. It should have been using unsigned int for maxlen.
2800 Not sure if we want to change the API, but if not we should sanity
2801 check the value here. */
39cd7a59
NC
2802 const unsigned int correct_length
2803 = maxlen < 0 ?
2804#ifdef PERL_MICRO
2805 0x7FFFFFFF
2806#else
2807 INT_MAX
2808#endif
2809 : maxlen;
e50aee73 2810
3280af22 2811 if (!PL_rsfp_filters)
16d20bd9 2812 return -1;
1de9afcd 2813 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2814 /* Provide a default input filter to make life easy. */
2815 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2816 DEBUG_P(PerlIO_printf(Perl_debug_log,
2817 "filter_read %d: from rsfp\n", idx));
f482118e 2818 if (correct_length) {
16d20bd9
AD
2819 /* Want a block */
2820 int len ;
f54cb97a 2821 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2822
2823 /* ensure buf_sv is large enough */
f482118e
NC
2824 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2825 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2826 correct_length)) <= 0) {
3280af22 2827 if (PerlIO_error(PL_rsfp))
37120919
AD
2828 return -1; /* error */
2829 else
2830 return 0 ; /* end of file */
2831 }
16d20bd9
AD
2832 SvCUR_set(buf_sv, old_len + len) ;
2833 } else {
2834 /* Want a line */
3280af22
NIS
2835 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2836 if (PerlIO_error(PL_rsfp))
37120919
AD
2837 return -1; /* error */
2838 else
2839 return 0 ; /* end of file */
2840 }
16d20bd9
AD
2841 }
2842 return SvCUR(buf_sv);
2843 }
2844 /* Skip this filter slot if filter has been deleted */
1de9afcd 2845 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2846 DEBUG_P(PerlIO_printf(Perl_debug_log,
2847 "filter_read %d: skipped (filter deleted)\n",
2848 idx));
f482118e 2849 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2850 }
2851 /* Get function pointer hidden within datasv */
8141890a 2852 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2853 DEBUG_P(PerlIO_printf(Perl_debug_log,
2854 "filter_read %d: via function %p (%s)\n",
ca0270c4 2855 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2856 /* Call function. The function is expected to */
2857 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2858 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2859 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2860}
2861
76e3520e 2862STATIC char *
cea2e8a9 2863S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2864{
97aff369 2865 dVAR;
c39cd008 2866#ifdef PERL_CR_FILTER
3280af22 2867 if (!PL_rsfp_filters) {
c39cd008 2868 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2869 }
2870#endif
3280af22 2871 if (PL_rsfp_filters) {
55497cff
PP
2872 if (!append)
2873 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2874 if (FILTER_READ(0, sv, 0) > 0)
2875 return ( SvPVX(sv) ) ;
2876 else
bd61b366 2877 return NULL ;
16d20bd9 2878 }
9d116dd7 2879 else
fd049845 2880 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2881}
2882
01ec43d0 2883STATIC HV *
7fc63493 2884S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2885{
97aff369 2886 dVAR;
def3634b
GS
2887 GV *gv;
2888
01ec43d0 2889 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2890 return PL_curstash;
2891
2892 if (len > 2 &&
2893 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2894 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2895 {
2896 return GvHV(gv); /* Foo:: */
def3634b
GS
2897 }
2898
2899 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2900 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2901 if (gv && GvCV(gv)) {
2902 SV * const sv = cv_const_sv(GvCV(gv));
2903 if (sv)
83003860 2904 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2905 }
2906
da51bb9b 2907 return gv_stashpv(pkgname, 0);
def3634b 2908}
a0d0e21e 2909
e3f73d4e
RGS
2910/*
2911 * S_readpipe_override
2912 * Check whether readpipe() is overriden, and generates the appropriate
2913 * optree, provided sublex_start() is called afterwards.
2914 */
2915STATIC void
1d51329b 2916S_readpipe_override(pTHX)
e3f73d4e
RGS
2917{
2918 GV **gvp;
2919 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2920 yylval.ival = OP_BACKTICK;
2921 if ((gv_readpipe
2922 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2923 ||
2924 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2925 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2926 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2927 {
2928 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2929 append_elem(OP_LIST,
2930 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2931 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2932 }
2933 else {
2934 set_csh();
2935 }
2936}
2937
5db06880
NC
2938#ifdef PERL_MAD
2939 /*
2940 * Perl_madlex
2941 * The intent of this yylex wrapper is to minimize the changes to the
2942 * tokener when we aren't interested in collecting madprops. It remains
2943 * to be seen how successful this strategy will be...
2944 */
2945
2946int
2947Perl_madlex(pTHX)
2948{
2949 int optype;
2950 char *s = PL_bufptr;
2951
cd81e915
NC
2952 /* make sure PL_thiswhite is initialized */
2953 PL_thiswhite = 0;
2954 PL_thismad = 0;
5db06880 2955
cd81e915 2956 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2957 if (PL_pending_ident)
2958 return S_pending_ident(aTHX);
2959
2960 /* previous token ate up our whitespace? */
cd81e915
NC
2961 if (!PL_lasttoke && PL_nextwhite) {
2962 PL_thiswhite = PL_nextwhite;
2963 PL_nextwhite = 0;
5db06880
NC
2964 }
2965
2966 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2967 PL_realtokenstart = -1;
2968 PL_thistoken = 0;
5db06880
NC
2969 optype = yylex();
2970 s = PL_bufptr;
cd81e915 2971 assert(PL_curforce < 0);
5db06880 2972
cd81e915
NC
2973 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2974 if (!PL_thistoken) {
2975 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2976 PL_thistoken = newSVpvs("");
5db06880 2977 else {
c35e046a 2978 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2979 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2980 }
2981 }
cd81e915
NC
2982 if (PL_thismad) /* install head */
2983 CURMAD('X', PL_thistoken);
5db06880
NC
2984 }
2985
2986 /* last whitespace of a sublex? */
cd81e915
NC
2987 if (optype == ')' && PL_endwhite) {
2988 CURMAD('X', PL_endwhite);
5db06880
NC
2989 }
2990
cd81e915 2991 if (!PL_thismad) {
5db06880
NC
2992
2993 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2994 if (!PL_thiswhite && !PL_endwhite && !optype) {
2995 sv_free(PL_thistoken);
2996 PL_thistoken = 0;
5db06880
NC
2997 return 0;
2998 }
2999
3000 /* put off final whitespace till peg */
3001 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3002 PL_nextwhite = PL_thiswhite;
3003 PL_thiswhite = 0;
5db06880 3004 }
cd81e915
NC
3005 else if (PL_thisopen) {
3006 CURMAD('q', PL_thisopen);
3007 if (PL_thistoken)
3008 sv_free(PL_thistoken);
3009 PL_thistoken = 0;
5db06880
NC
3010 }
3011 else {
3012 /* Store actual token text as madprop X */
cd81e915 3013 CURMAD('X', PL_thistoken);
5db06880
NC
3014 }
3015
cd81e915 3016 if (PL_thiswhite) {
5db06880 3017 /* add preceding whitespace as madprop _ */
cd81e915 3018 CURMAD('_', PL_thiswhite);
5db06880
NC
3019 }
3020
cd81e915 3021 if (PL_thisstuff) {
5db06880 3022 /* add quoted material as madprop = */
cd81e915 3023 CURMAD('=', PL_thisstuff);
5db06880
NC
3024 }
3025
cd81e915 3026 if (PL_thisclose) {
5db06880 3027 /* add terminating quote as madprop Q */
cd81e915 3028 CURMAD('Q', PL_thisclose);
5db06880
NC
3029 }
3030 }
3031
3032 /* special processing based on optype */
3033
3034 switch (optype) {
3035
3036 /* opval doesn't need a TOKEN since it can already store mp */
3037 case WORD:
3038 case METHOD:
3039 case FUNCMETH:
3040 case THING:
3041 case PMFUNC:
3042 case PRIVATEREF:
3043 case FUNC0SUB:
3044 case UNIOPSUB:
3045 case LSTOPSUB:
3046 if (yylval.opval)
cd81e915
NC
3047 append_madprops(PL_thismad, yylval.opval, 0);
3048 PL_thismad = 0;
5db06880
NC
3049 return optype;
3050
3051 /* fake EOF */
3052 case 0:
3053 optype = PEG;
cd81e915
NC
3054 if (PL_endwhite) {
3055 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3056 PL_endwhite = 0;
5db06880
NC
3057 }
3058 break;
3059
3060 case ']':
3061 case '}':
cd81e915 3062 if (PL_faketokens)
5db06880
NC
3063 break;
3064 /* remember any fake bracket that lexer is about to discard */
3065 if (PL_lex_brackets == 1 &&
3066 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3067 {
3068 s = PL_bufptr;
3069 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3070 s++;
3071 if (*s == '}') {
cd81e915
NC
3072 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3073 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3074 PL_thiswhite = 0;
5db06880
NC
3075 PL_bufptr = s - 1;
3076 break; /* don't bother looking for trailing comment */
3077 }
3078 else
3079 s = PL_bufptr;
3080 }
3081 if (optype == ']')
3082 break;
3083 /* FALLTHROUGH */
3084
3085 /* attach a trailing comment to its statement instead of next token */
3086 case ';':
cd81e915 3087 if (PL_faketokens)
5db06880
NC
3088 break;
3089 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3090 s = PL_bufptr;
3091 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3092 s++;
3093 if (*s == '\n' || *s == '#') {
3094 while (s < PL_bufend && *s != '\n')
3095 s++;
3096 if (s < PL_bufend)
3097 s++;
cd81e915
NC
3098 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3099 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3100 PL_thiswhite = 0;
5db06880
NC
3101 PL_bufptr = s;
3102 }
3103 }
3104 break;
3105
3106 /* pval */
3107 case LABEL:
3108 break;
3109
3110 /* ival */
3111 default:
3112 break;
3113
3114 }
3115
3116 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3117 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3118 PL_thismad = 0;
5db06880
NC
3119 return optype;
3120}
3121#endif
3122
468aa647 3123STATIC char *
cc6ed77d 3124S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3125 dVAR;
468aa647
RGS
3126 if (PL_expect != XSTATE)
3127 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3128 is_use ? "use" : "no"));
29595ff2 3129 s = SKIPSPACE1(s);
468aa647
RGS
3130 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3131 s = force_version(s, TRUE);
29595ff2 3132 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3133 start_force(PL_curforce);
9ded7720 3134 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3135 force_next(WORD);
3136 }
3137 else if (*s == 'v') {
3138 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3139 s = force_version(s, FALSE);
3140 }
3141 }
3142 else {
3143 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3144 s = force_version(s, FALSE);
3145 }
3146 yylval.ival = is_use;
3147 return s;
3148}
748a9306 3149#ifdef DEBUGGING
27da23d5 3150 static const char* const exp_name[] =
09bef843 3151 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3152 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3153 };
748a9306 3154#endif
463ee0b2 3155
02aa26ce
NT
3156/*
3157 yylex
3158
3159 Works out what to call the token just pulled out of the input
3160 stream. The yacc parser takes care of taking the ops we return and
3161 stitching them into a tree.
3162
3163 Returns:
3164 PRIVATEREF
3165
3166 Structure:
3167 if read an identifier
3168 if we're in a my declaration
3169 croak if they tried to say my($foo::bar)
3170 build the ops for a my() declaration
3171 if it's an access to a my() variable
3172 are we in a sort block?
3173 croak if my($a); $a <=> $b
3174 build ops for access to a my() variable
3175 if in a dq string, and they've said @foo and we can't find @foo
3176 croak
3177 build ops for a bareword
3178 if we already built the token before, use it.
3179*/
3180
20141f0e 3181
dba4d153
JH
3182#ifdef __SC__
3183#pragma segment Perl_yylex
3184#endif
dba4d153 3185int
dba4d153 3186Perl_yylex(pTHX)
20141f0e 3187{
97aff369 3188 dVAR;
3afc138a 3189 register char *s = PL_bufptr;
378cc40b 3190 register char *d;
463ee0b2 3191 STRLEN len;
aa7440fb 3192 bool bof = FALSE;
a687059c 3193
10edeb5d
JH
3194 /* orig_keyword, gvp, and gv are initialized here because
3195 * jump to the label just_a_word_zero can bypass their
3196 * initialization later. */
3197 I32 orig_keyword = 0;
3198 GV *gv = NULL;
3199 GV **gvp = NULL;
3200
bbf60fe6 3201 DEBUG_T( {
396482e1 3202 SV* tmp = newSVpvs("");
b6007c36
DM
3203 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3204 (IV)CopLINE(PL_curcop),
3205 lex_state_names[PL_lex_state],
3206 exp_name[PL_expect],
3207 pv_display(tmp, s, strlen(s), 0, 60));
3208 SvREFCNT_dec(tmp);
bbf60fe6 3209 } );
02aa26ce 3210 /* check if there's an identifier for us to look at */
ba979b31 3211 if (PL_pending_ident)
bbf60fe6 3212 return REPORT(S_pending_ident(aTHX));
bbce6d69 3213
02aa26ce
NT
3214 /* no identifier pending identification */
3215
3280af22 3216 switch (PL_lex_state) {
79072805
LW
3217#ifdef COMMENTARY
3218 case LEX_NORMAL: /* Some compilers will produce faster */
3219 case LEX_INTERPNORMAL: /* code if we comment these out. */
3220 break;
3221#endif
3222
09bef843 3223 /* when we've already built the next token, just pull it out of the queue */
79072805 3224 case LEX_KNOWNEXT:
5db06880
NC
3225#ifdef PERL_MAD
3226 PL_lasttoke--;
3227 yylval = PL_nexttoke[PL_lasttoke].next_val;
3228 if (PL_madskills) {
cd81e915 3229 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3230 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3231 if (PL_thismad && PL_thismad->mad_key == '_') {
3232 PL_thiswhite = (SV*)PL_thismad->mad_val;
3233 PL_thismad->mad_val = 0;
3234 mad_free(PL_thismad);
3235 PL_thismad = 0;
5db06880
NC
3236 }
3237 }
3238 if (!PL_lasttoke) {
3239 PL_lex_state = PL_lex_defer;
3240 PL_expect = PL_lex_expect;
3241 PL_lex_defer = LEX_NORMAL;
3242 if (!PL_nexttoke[PL_lasttoke].next_type)
3243 return yylex();
3244 }
3245#else
3280af22 3246 PL_nexttoke--;
5db06880 3247 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3248 if (!PL_nexttoke) {
3249 PL_lex_state = PL_lex_defer;
3250 PL_expect = PL_lex_expect;
3251 PL_lex_defer = LEX_NORMAL;
463ee0b2 3252 }
5db06880
NC
3253#endif
3254#ifdef PERL_MAD
3255 /* FIXME - can these be merged? */
3256 return(PL_nexttoke[PL_lasttoke].next_type);
3257#else
bbf60fe6 3258 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3259#endif
79072805 3260
02aa26ce 3261 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3262 when we get here, PL_bufptr is at the \
02aa26ce 3263 */
79072805
LW
3264 case LEX_INTERPCASEMOD:
3265#ifdef DEBUGGING
3280af22 3266 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3267 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3268#endif
02aa26ce 3269 /* handle \E or end of string */
3280af22 3270 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3271 /* if at a \E */
3280af22 3272 if (PL_lex_casemods) {
f54cb97a 3273 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3274 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3275
3792a11b
NC
3276 if (PL_bufptr != PL_bufend
3277 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3278 PL_bufptr += 2;
3279 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3280#ifdef PERL_MAD
3281 if (PL_madskills)
6b29d1f5 3282 PL_thistoken = newSVpvs("\\E");
5db06880 3283#endif
a0d0e21e 3284 }
bbf60fe6 3285 return REPORT(')');
79072805 3286 }
5db06880
NC
3287#ifdef PERL_MAD
3288 while (PL_bufptr != PL_bufend &&
3289 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3290 if (!PL_thiswhite)
6b29d1f5 3291 PL_thiswhite = newSVpvs("");
cd81e915 3292 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3293 PL_bufptr += 2;
3294 }
3295#else
3280af22
NIS
3296 if (PL_bufptr != PL_bufend)
3297 PL_bufptr += 2;
5db06880 3298#endif
3280af22 3299 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3300 return yylex();
79072805
LW
3301 }
3302 else {
607df283 3303 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3304 "### Saw case modifier\n"); });
3280af22 3305 s = PL_bufptr + 1;
6e909404 3306 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3307#ifdef PERL_MAD
cd81e915 3308 if (!PL_thiswhite)
6b29d1f5 3309 PL_thiswhite = newSVpvs("");
cd81e915 3310 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3311#endif
89122651 3312 PL_bufptr = s + 3;
6e909404
JH
3313 PL_lex_state = LEX_INTERPCONCAT;
3314 return yylex();
a0d0e21e 3315 }
6e909404 3316 else {
90771dc0 3317 I32 tmp;
5db06880
NC
3318 if (!PL_madskills) /* when just compiling don't need correct */
3319 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3320 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3321 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3322 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3323 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3324 return REPORT(')');
6e909404
JH
3325 }
3326 if (PL_lex_casemods > 10)
3327 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3328 PL_lex_casestack[PL_lex_casemods++] = *s;
3329 PL_lex_casestack[PL_lex_casemods] = '\0';
3330 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3331 start_force(PL_curforce);
9ded7720 3332 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3333 force_next('(');
cd81e915 3334 start_force(PL_curforce);
6e909404 3335 if (*s == 'l')
9ded7720 3336 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3337 else if (*s == 'u')
9ded7720 3338 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3339 else if (*s == 'L')
9ded7720 3340 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3341 else if (*s == 'U')
9ded7720 3342 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3343 else if (*s == 'Q')
9ded7720 3344 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3345 else
3346 Perl_croak(aTHX_ "panic: yylex");
5db06880 3347 if (PL_madskills) {
6b29d1f5 3348 SV* const tmpsv = newSVpvs("");
5db06880
NC
3349 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3350 curmad('_', tmpsv);
3351 }
6e909404 3352 PL_bufptr = s + 1;
a0d0e21e 3353 }
79072805 3354 force_next(FUNC);
3280af22
NIS
3355 if (PL_lex_starts) {
3356 s = PL_bufptr;
3357 PL_lex_starts = 0;
5db06880
NC
3358#ifdef PERL_MAD
3359 if (PL_madskills) {
cd81e915
NC
3360 if (PL_thistoken)
3361 sv_free(PL_thistoken);
6b29d1f5 3362 PL_thistoken = newSVpvs("");
5db06880
NC
3363 }
3364#endif
131b3ad0
DM
3365 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3366 if (PL_lex_casemods == 1 && PL_lex_inpat)
3367 OPERATOR(',');
3368 else
3369 Aop(OP_CONCAT);
79072805
LW
3370 }
3371 else
cea2e8a9 3372 return yylex();
79072805
LW
3373 }
3374
55497cff 3375 case LEX_INTERPPUSH:
bbf60fe6 3376 return REPORT(sublex_push());
55497cff 3377
79072805 3378 case LEX_INTERPSTART:
3280af22 3379 if (PL_bufptr == PL_bufend)
bbf60fe6 3380 return REPORT(sublex_done());
607df283 3381 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3382 "### Interpolated variable\n"); });
3280af22
NIS
3383 PL_expect = XTERM;
3384 PL_lex_dojoin = (*PL_bufptr == '@');
3385 PL_lex_state = LEX_INTERPNORMAL;
3386 if (PL_lex_dojoin) {
cd81e915 3387 start_force(PL_curforce);
9ded7720 3388 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3389 force_next(',');
cd81e915 3390 start_force(PL_curforce);
a0d0e21e 3391 force_ident("\"", '$');
cd81e915 3392 start_force(PL_curforce);
9ded7720 3393 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3394 force_next('$');
cd81e915 3395 start_force(PL_curforce);
9ded7720 3396 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3397 force_next('(');
cd81e915 3398 start_force(PL_curforce);
9ded7720 3399 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3400 force_next(FUNC);
3401 }
3280af22
NIS
3402 if (PL_lex_starts++) {
3403 s = PL_bufptr;
5db06880
NC
3404#ifdef PERL_MAD
3405 if (PL_madskills) {
cd81e915
NC
3406 if (PL_thistoken)
3407 sv_free(PL_thistoken);
6b29d1f5 3408 PL_thistoken = newSVpvs("");
5db06880
NC
3409 }
3410#endif
131b3ad0
DM
3411 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3412 if (!PL_lex_casemods && PL_lex_inpat)
3413 OPERATOR(',');
3414 else
3415 Aop(OP_CONCAT);
79072805 3416 }
cea2e8a9 3417 return yylex();
79072805
LW
3418
3419 case LEX_INTERPENDMAYBE:
3280af22
NIS
3420 if (intuit_more(PL_bufptr)) {
3421 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3422 break;
3423 }
3424 /* FALL THROUGH */
3425
3426 case LEX_INTERPEND:
3280af22
NIS
3427 if (PL_lex_dojoin) {
3428 PL_lex_dojoin = FALSE;
3429 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3430#ifdef PERL_MAD
3431 if (PL_madskills) {
cd81e915
NC
3432 if (PL_thistoken)
3433 sv_free(PL_thistoken);
6b29d1f5 3434 PL_thistoken = newSVpvs("");
5db06880
NC
3435 }
3436#endif
bbf60fe6 3437 return REPORT(')');
79072805 3438 }
43a16006 3439 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3440 && SvEVALED(PL_lex_repl))
43a16006 3441 {
e9fa98b2 3442 if (PL_bufptr != PL_bufend)
cea2e8a9 3443 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3444 PL_lex_repl = NULL;
e9fa98b2 3445 }
79072805
LW
3446 /* FALLTHROUGH */
3447 case LEX_INTERPCONCAT:
3448#ifdef DEBUGGING
3280af22 3449 if (PL_lex_brackets)
cea2e8a9 3450 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3451#endif
3280af22 3452 if (PL_bufptr == PL_bufend)
bbf60fe6 3453 return REPORT(sublex_done());
79072805 3454
3280af22
NIS
3455 if (SvIVX(PL_linestr) == '\'') {
3456 SV *sv = newSVsv(PL_linestr);
3457 if (!PL_lex_inpat)
76e3520e 3458 sv = tokeq(sv);
3280af22 3459 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3460 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3461 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3462 s = PL_bufend;
79072805
LW
3463 }
3464 else {
3280af22 3465 s = scan_const(PL_bufptr);
79072805 3466 if (*s == '\\')
3280af22 3467 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3468 else
3280af22 3469 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3470 }
3471
3280af22 3472 if (s != PL_bufptr) {
cd81e915 3473 start_force(PL_curforce);
5db06880
NC
3474 if (PL_madskills) {
3475 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3476 }
9ded7720 3477 NEXTVAL_NEXTTOKE = yylval;
3280af22 3478 PL_expect = XTERM;
79072805 3479 force_next(THING);
131b3ad0 3480 if (PL_lex_starts++) {
5db06880
NC
3481#ifdef PERL_MAD
3482 if (PL_madskills) {
cd81e915
NC
3483 if (PL_thistoken)
3484 sv_free(PL_thistoken);
6b29d1f5 3485 PL_thistoken = newSVpvs("");
5db06880
NC
3486 }
3487#endif
131b3ad0
DM
3488 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3489 if (!PL_lex_casemods && PL_lex_inpat)
3490 OPERATOR(',');
3491 else
3492 Aop(OP_CONCAT);
3493 }
79072805 3494 else {
3280af22 3495 PL_bufptr = s;
cea2e8a9 3496 return yylex();
79072805
LW
3497 }
3498 }
3499
cea2e8a9 3500 return yylex();
a0d0e21e 3501 case LEX_FORMLINE:
3280af22
NIS
3502 PL_lex_state = LEX_NORMAL;
3503 s = scan_formline(PL_bufptr);
3504 if (!PL_lex_formbrack)
a0d0e21e
LW
3505 goto rightbracket;
3506 OPERATOR(';');
79072805
LW
3507 }
3508
3280af22
NIS
3509 s = PL_bufptr;
3510 PL_oldoldbufptr = PL_oldbufptr;
3511 PL_oldbufptr = s;
463ee0b2
LW
3512
3513 retry:
5db06880 3514#ifdef PERL_MAD
cd81e915
NC
3515 if (PL_thistoken) {
3516 sv_free(PL_thistoken);
3517 PL_thistoken = 0;
5db06880 3518 }
cd81e915 3519 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3520#endif
378cc40b
LW
3521 switch (*s) {
3522 default:
7e2040f0 3523 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3524 goto keylookup;
cea2e8a9 3525 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3526 case 4:
3527 case 26:
3528 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3529 case 0:
5db06880
NC
3530#ifdef PERL_MAD
3531 if (PL_madskills)
cd81e915 3532 PL_faketokens = 0;
5db06880 3533#endif
3280af22
NIS
3534 if (!PL_rsfp) {
3535 PL_last_uni = 0;
3536 PL_last_lop = 0;
c5ee2135 3537 if (PL_lex_brackets) {
10edeb5d
JH
3538 yyerror((const char *)
3539 (PL_lex_formbrack
3540 ? "Format not terminated"
3541 : "Missing right curly or square bracket"));
c5ee2135 3542 }
4e553d73 3543 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3544 "### Tokener got EOF\n");
5f80b19c 3545 } );
79072805 3546 TOKEN(0);
463ee0b2 3547 }
3280af22 3548 if (s++ < PL_bufend)