This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add a few manpages to Maintainers.pl
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 52#define PL_linestr (PL_parser->linestr)
c2598295
DM
53#define PL_expect (PL_parser->expect)
54#define PL_copline (PL_parser->copline)
bdc0bf6f 55
199e78b7
DM
56
57#ifdef PERL_MAD
58# define PL_endwhite (PL_parser->endwhite)
59# define PL_faketokens (PL_parser->faketokens)
60# define PL_lasttoke (PL_parser->lasttoke)
61# define PL_nextwhite (PL_parser->nextwhite)
62# define PL_realtokenstart (PL_parser->realtokenstart)
63# define PL_skipwhite (PL_parser->skipwhite)
64# define PL_thisclose (PL_parser->thisclose)
65# define PL_thismad (PL_parser->thismad)
66# define PL_thisopen (PL_parser->thisopen)
67# define PL_thisstuff (PL_parser->thisstuff)
68# define PL_thistoken (PL_parser->thistoken)
69# define PL_thiswhite (PL_parser->thiswhite)
70#endif
71
3cbf51f5
DM
72static int
73S_pending_ident(pTHX);
199e78b7 74
0bd48802 75static const char ident_too_long[] = "Identifier too long";
c445ea15 76static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 77
acfe0abc 78static void restore_rsfp(pTHX_ void *f);
6e3aabd6 79#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
80static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
81static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 82#endif
51371543 83
29595ff2 84#ifdef PERL_MAD
29595ff2 85# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 86# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 87#else
5db06880 88# define CURMAD(slot,sv)
9ded7720 89# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
90#endif
91
9059aa12
LW
92#define XFAKEBRACK 128
93#define XENUMMASK 127
94
39e02b42
JH
95#ifdef USE_UTF8_SCRIPTS
96# define UTF (!IN_BYTES)
2b9d42f0 97#else
746b446a 98# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 99#endif
a0ed51b3 100
61f0cdd9 101/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
102 * 1999-02-27 mjd-perl-patch@plover.com */
103#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
104
bf4acbe4
GS
105/* On MacOS, respect nonbreaking spaces */
106#ifdef MACOS_TRADITIONAL
107#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
108#else
109#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
110#endif
111
ffb4593c
NT
112/* LEX_* are values for PL_lex_state, the state of the lexer.
113 * They are arranged oddly so that the guard on the switch statement
79072805
LW
114 * can get by with a single comparison (if the compiler is smart enough).
115 */
116
fb73857a 117/* #define LEX_NOTPARSING 11 is done in perl.h. */
118
b6007c36
DM
119#define LEX_NORMAL 10 /* normal code (ie not within "...") */
120#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
121#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
122#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
123#define LEX_INTERPSTART 6 /* expecting the start of a $var */
124
125 /* at end of code, eg "$x" followed by: */
126#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
127#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
128
129#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
130 string or after \E, $foo, etc */
131#define LEX_INTERPCONST 2 /* NOT USED */
132#define LEX_FORMLINE 1 /* expecting a format line */
133#define LEX_KNOWNEXT 0 /* next token known; just return it */
134
79072805 135
bbf60fe6 136#ifdef DEBUGGING
27da23d5 137static const char* const lex_state_names[] = {
bbf60fe6
DM
138 "KNOWNEXT",
139 "FORMLINE",
140 "INTERPCONST",
141 "INTERPCONCAT",
142 "INTERPENDMAYBE",
143 "INTERPEND",
144 "INTERPSTART",
145 "INTERPPUSH",
146 "INTERPCASEMOD",
147 "INTERPNORMAL",
148 "NORMAL"
149};
150#endif
151
79072805
LW
152#ifdef ff_next
153#undef ff_next
d48672a2
LW
154#endif
155
79072805 156#include "keywords.h"
fe14fcc3 157
ffb4593c
NT
158/* CLINE is a macro that ensures PL_copline has a sane value */
159
ae986130
LW
160#ifdef CLINE
161#undef CLINE
162#endif
57843af0 163#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 164
5db06880 165#ifdef PERL_MAD
29595ff2
NC
166# define SKIPSPACE0(s) skipspace0(s)
167# define SKIPSPACE1(s) skipspace1(s)
168# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
169# define PEEKSPACE(s) skipspace2(s,0)
170#else
171# define SKIPSPACE0(s) skipspace(s)
172# define SKIPSPACE1(s) skipspace(s)
173# define SKIPSPACE2(s,tsv) skipspace(s)
174# define PEEKSPACE(s) skipspace(s)
175#endif
176
ffb4593c
NT
177/*
178 * Convenience functions to return different tokens and prime the
9cbb5ea2 179 * lexer for the next token. They all take an argument.
ffb4593c
NT
180 *
181 * TOKEN : generic token (used for '(', DOLSHARP, etc)
182 * OPERATOR : generic operator
183 * AOPERATOR : assignment operator
184 * PREBLOCK : beginning the block after an if, while, foreach, ...
185 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
186 * PREREF : *EXPR where EXPR is not a simple identifier
187 * TERM : expression term
188 * LOOPX : loop exiting command (goto, last, dump, etc)
189 * FTST : file test operator
190 * FUN0 : zero-argument function
2d2e263d 191 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
192 * BOop : bitwise or or xor
193 * BAop : bitwise and
194 * SHop : shift operator
195 * PWop : power operator
9cbb5ea2 196 * PMop : pattern-matching operator
ffb4593c
NT
197 * Aop : addition-level operator
198 * Mop : multiplication-level operator
199 * Eop : equality-testing operator
e5edeb50 200 * Rop : relational operator <= != gt
ffb4593c
NT
201 *
202 * Also see LOP and lop() below.
203 */
204
998054bd 205#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 206# define REPORT(retval) tokereport((I32)retval)
998054bd 207#else
bbf60fe6 208# define REPORT(retval) (retval)
998054bd
SC
209#endif
210
bbf60fe6
DM
211#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
212#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
213#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
214#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
215#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
216#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
217#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
218#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
219#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
220#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
221#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
222#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
223#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
224#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
225#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
226#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
227#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
228#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
229#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
230#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 231
a687059c
LW
232/* This bit of chicanery makes a unary function followed by
233 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
234 * The UNIDOR macro is for unary functions that can be followed by the //
235 * operator (such as C<shift // 0>).
a687059c 236 */
376fcdbf
AL
237#define UNI2(f,x) { \
238 yylval.ival = f; \
239 PL_expect = x; \
240 PL_bufptr = s; \
241 PL_last_uni = PL_oldbufptr; \
242 PL_last_lop_op = f; \
243 if (*s == '(') \
244 return REPORT( (int)FUNC1 ); \
29595ff2 245 s = PEEKSPACE(s); \
376fcdbf
AL
246 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
247 }
6f33ba73
RGS
248#define UNI(f) UNI2(f,XTERM)
249#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 250
376fcdbf
AL
251#define UNIBRACK(f) { \
252 yylval.ival = f; \
253 PL_bufptr = s; \
254 PL_last_uni = PL_oldbufptr; \
255 if (*s == '(') \
256 return REPORT( (int)FUNC1 ); \
29595ff2 257 s = PEEKSPACE(s); \
376fcdbf
AL
258 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
259 }
79072805 260
9f68db38 261/* grandfather return to old style */
3280af22 262#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 263
8fa7f367
JH
264#ifdef DEBUGGING
265
bbf60fe6
DM
266/* how to interpret the yylval associated with the token */
267enum token_type {
268 TOKENTYPE_NONE,
269 TOKENTYPE_IVAL,
270 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
271 TOKENTYPE_PVAL,
272 TOKENTYPE_OPVAL,
273 TOKENTYPE_GVVAL
274};
275
6d4a66ac
NC
276static struct debug_tokens {
277 const int token;
278 enum token_type type;
279 const char *name;
280} const debug_tokens[] =
9041c2e3 281{
bbf60fe6
DM
282 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
283 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
284 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
285 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
286 { ARROW, TOKENTYPE_NONE, "ARROW" },
287 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
288 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
289 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
290 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
291 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 292 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
293 { DO, TOKENTYPE_NONE, "DO" },
294 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
295 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
296 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
297 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
298 { ELSE, TOKENTYPE_NONE, "ELSE" },
299 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
300 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
301 { FOR, TOKENTYPE_IVAL, "FOR" },
302 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
303 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
304 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
305 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
306 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
307 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 308 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
309 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
310 { IF, TOKENTYPE_IVAL, "IF" },
311 { LABEL, TOKENTYPE_PVAL, "LABEL" },
312 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
313 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
314 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
315 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
316 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
317 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
318 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
319 { MY, TOKENTYPE_IVAL, "MY" },
320 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
321 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
322 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
323 { OROP, TOKENTYPE_IVAL, "OROP" },
324 { OROR, TOKENTYPE_NONE, "OROR" },
325 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
326 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
327 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
328 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
329 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
330 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
331 { PREINC, TOKENTYPE_NONE, "PREINC" },
332 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
333 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
334 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
335 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
336 { SUB, TOKENTYPE_NONE, "SUB" },
337 { THING, TOKENTYPE_OPVAL, "THING" },
338 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
339 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
340 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
341 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
342 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
343 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 344 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
345 { WHILE, TOKENTYPE_IVAL, "WHILE" },
346 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 347 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
348};
349
350/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 351
bbf60fe6 352STATIC int
f5bd084c 353S_tokereport(pTHX_ I32 rv)
bbf60fe6 354{
97aff369 355 dVAR;
bbf60fe6 356 if (DEBUG_T_TEST) {
bd61b366 357 const char *name = NULL;
bbf60fe6 358 enum token_type type = TOKENTYPE_NONE;
f54cb97a 359 const struct debug_tokens *p;
396482e1 360 SV* const report = newSVpvs("<== ");
bbf60fe6 361
f54cb97a 362 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
363 if (p->token == (int)rv) {
364 name = p->name;
365 type = p->type;
366 break;
367 }
368 }
369 if (name)
54667de8 370 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
371 else if ((char)rv > ' ' && (char)rv < '~')
372 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
373 else if (!rv)
396482e1 374 sv_catpvs(report, "EOF");
bbf60fe6
DM
375 else
376 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
377 switch (type) {
378 case TOKENTYPE_NONE:
379 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
380 break;
381 case TOKENTYPE_IVAL:
e4584336 382 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
383 break;
384 case TOKENTYPE_OPNUM:
385 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
386 PL_op_name[yylval.ival]);
387 break;
388 case TOKENTYPE_PVAL:
389 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
390 break;
391 case TOKENTYPE_OPVAL:
b6007c36 392 if (yylval.opval) {
401441c0 393 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 394 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
395 if (yylval.opval->op_type == OP_CONST) {
396 Perl_sv_catpvf(aTHX_ report, " %s",
397 SvPEEK(cSVOPx_sv(yylval.opval)));
398 }
399
400 }
401441c0 401 else
396482e1 402 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
403 break;
404 }
b6007c36 405 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
406 };
407 return (int)rv;
998054bd
SC
408}
409
b6007c36
DM
410
411/* print the buffer with suitable escapes */
412
413STATIC void
414S_printbuf(pTHX_ const char* fmt, const char* s)
415{
396482e1 416 SV* const tmp = newSVpvs("");
b6007c36
DM
417 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
418 SvREFCNT_dec(tmp);
419}
420
8fa7f367
JH
421#endif
422
ffb4593c
NT
423/*
424 * S_ao
425 *
c963b151
BD
426 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
427 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
428 */
429
76e3520e 430STATIC int
cea2e8a9 431S_ao(pTHX_ int toketype)
a0d0e21e 432{
97aff369 433 dVAR;
3280af22
NIS
434 if (*PL_bufptr == '=') {
435 PL_bufptr++;
a0d0e21e
LW
436 if (toketype == ANDAND)
437 yylval.ival = OP_ANDASSIGN;
438 else if (toketype == OROR)
439 yylval.ival = OP_ORASSIGN;
c963b151
BD
440 else if (toketype == DORDOR)
441 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
442 toketype = ASSIGNOP;
443 }
444 return toketype;
445}
446
ffb4593c
NT
447/*
448 * S_no_op
449 * When Perl expects an operator and finds something else, no_op
450 * prints the warning. It always prints "<something> found where
451 * operator expected. It prints "Missing semicolon on previous line?"
452 * if the surprise occurs at the start of the line. "do you need to
453 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
454 * where the compiler doesn't know if foo is a method call or a function.
455 * It prints "Missing operator before end of line" if there's nothing
456 * after the missing operator, or "... before <...>" if there is something
457 * after the missing operator.
458 */
459
76e3520e 460STATIC void
bfed75c6 461S_no_op(pTHX_ const char *what, char *s)
463ee0b2 462{
97aff369 463 dVAR;
9d4ba2ae
AL
464 char * const oldbp = PL_bufptr;
465 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 466
1189a94a
GS
467 if (!s)
468 s = oldbp;
07c798fb 469 else
1189a94a 470 PL_bufptr = s;
cea2e8a9 471 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
472 if (ckWARN_d(WARN_SYNTAX)) {
473 if (is_first)
474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
475 "\t(Missing semicolon on previous line?)\n");
476 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 477 const char *t;
c35e046a
AL
478 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
479 NOOP;
56da5a46
RGS
480 if (t < PL_bufptr && isSPACE(*t))
481 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
482 "\t(Do you need to predeclare %.*s?)\n",
551405c4 483 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
484 }
485 else {
486 assert(s >= oldbp);
487 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 488 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 489 }
07c798fb 490 }
3280af22 491 PL_bufptr = oldbp;
8990e307
LW
492}
493
ffb4593c
NT
494/*
495 * S_missingterm
496 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 497 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
498 * If we're in a delimited string and the delimiter is a control
499 * character, it's reformatted into a two-char sequence like ^C.
500 * This is fatal.
501 */
502
76e3520e 503STATIC void
cea2e8a9 504S_missingterm(pTHX_ char *s)
8990e307 505{
97aff369 506 dVAR;
8990e307
LW
507 char tmpbuf[3];
508 char q;
509 if (s) {
9d4ba2ae 510 char * const nl = strrchr(s,'\n');
d2719217 511 if (nl)
8990e307
LW
512 *nl = '\0';
513 }
9d116dd7
JH
514 else if (
515#ifdef EBCDIC
516 iscntrl(PL_multi_close)
517#else
518 PL_multi_close < 32 || PL_multi_close == 127
519#endif
520 ) {
8990e307 521 *tmpbuf = '^';
585ec06d 522 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
523 tmpbuf[2] = '\0';
524 s = tmpbuf;
525 }
526 else {
eb160463 527 *tmpbuf = (char)PL_multi_close;
8990e307
LW
528 tmpbuf[1] = '\0';
529 s = tmpbuf;
530 }
531 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 532 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 533}
79072805 534
ef89dcc3 535#define FEATURE_IS_ENABLED(name) \
0d863452 536 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 537 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
538/*
539 * S_feature_is_enabled
540 * Check whether the named feature is enabled.
541 */
542STATIC bool
d4c19fe8 543S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 544{
97aff369 545 dVAR;
0d863452 546 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 547 char he_name[32] = "feature_";
6fca0082 548 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 549
7b9ef140 550 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
551}
552
ffb4593c
NT
553/*
554 * Perl_deprecate
ffb4593c
NT
555 */
556
79072805 557void
bfed75c6 558Perl_deprecate(pTHX_ const char *s)
a0d0e21e 559{
599cee73 560 if (ckWARN(WARN_DEPRECATED))
9014280d 561 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
562}
563
12bcd1a6 564void
bfed75c6 565Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
566{
567 /* This function should NOT be called for any new deprecated warnings */
568 /* Use Perl_deprecate instead */
569 /* */
570 /* It is here to maintain backward compatibility with the pre-5.8 */
571 /* warnings category hierarchy. The "deprecated" category used to */
572 /* live under the "syntax" category. It is now a top-level category */
573 /* in its own right. */
574
575 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 576 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
577 "Use of %s is deprecated", s);
578}
579
ffb4593c 580/*
9cbb5ea2
GS
581 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
582 * utf16-to-utf8-reversed.
ffb4593c
NT
583 */
584
c39cd008
GS
585#ifdef PERL_CR_FILTER
586static void
587strip_return(SV *sv)
588{
95a20fc0 589 register const char *s = SvPVX_const(sv);
9d4ba2ae 590 register const char * const e = s + SvCUR(sv);
c39cd008
GS
591 /* outer loop optimized to do nothing if there are no CR-LFs */
592 while (s < e) {
593 if (*s++ == '\r' && *s == '\n') {
594 /* hit a CR-LF, need to copy the rest */
595 register char *d = s - 1;
596 *d++ = *s++;
597 while (s < e) {
598 if (*s == '\r' && s[1] == '\n')
599 s++;
600 *d++ = *s++;
601 }
602 SvCUR(sv) -= s - d;
603 return;
604 }
605 }
606}
a868473f 607
76e3520e 608STATIC I32
c39cd008 609S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 610{
f54cb97a 611 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
612 if (count > 0 && !maxlen)
613 strip_return(sv);
614 return count;
a868473f
NIS
615}
616#endif
617
199e78b7
DM
618
619
ffb4593c
NT
620/*
621 * Perl_lex_start
e3abe207 622 * Create a parser object and initialise its parser and lexer fields
ffb4593c
NT
623 */
624
a0d0e21e 625void
864dbfa3 626Perl_lex_start(pTHX_ SV *line)
79072805 627{
97aff369 628 dVAR;
6ef55633 629 const char *s = NULL;
8990e307 630 STRLEN len;
acdf0a21
DM
631 yy_parser *parser;
632
633 /* create and initialise a parser */
634
199e78b7 635 Newxz(parser, 1, yy_parser);
acdf0a21
DM
636 parser->old_parser = PL_parser;
637 PL_parser = parser;
638
639 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
640 parser->ps = parser->stack;
641 parser->stack_size = YYINITDEPTH;
642
643 parser->stack->state = 0;
644 parser->yyerrstatus = 0;
645 parser->yychar = YYEMPTY; /* Cause a token to be read. */
646
e3abe207
DM
647 /* on scope exit, free this parser and restore any outer one */
648 SAVEPARSER(parser);
649
acdf0a21 650 /* initialise lexer state */
8990e307 651
651b5b28 652 SAVEI8(PL_lex_state);
5db06880
NC
653#ifdef PERL_MAD
654 if (PL_lex_state == LEX_KNOWNEXT) {
199e78b7 655 I32 toke = parser->old_parser->lasttoke;
5db06880
NC
656 while (--toke >= 0) {
657 SAVEI32(PL_nexttoke[toke].next_type);
658 SAVEVPTR(PL_nexttoke[toke].next_val);
659 if (PL_madskills)
660 SAVEVPTR(PL_nexttoke[toke].next_mad);
661 }
5db06880 662 }
cd81e915 663 SAVEI32(PL_curforce);
90e3715f 664 PL_curforce = -1;
5db06880 665#else
18b09519
GS
666 if (PL_lex_state == LEX_KNOWNEXT) {
667 I32 toke = PL_nexttoke;
668 while (--toke >= 0) {
669 SAVEI32(PL_nexttype[toke]);
670 SAVEVPTR(PL_nextval[toke]);
671 }
672 SAVEI32(PL_nexttoke);
18b09519 673 }
5db06880 674#endif
57843af0 675 SAVECOPLINE(PL_curcop);
3280af22
NIS
676 SAVEPPTR(PL_bufptr);
677 SAVEPPTR(PL_bufend);
678 SAVEPPTR(PL_oldbufptr);
679 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
680 SAVEPPTR(PL_last_lop);
681 SAVEPPTR(PL_last_uni);
3280af22 682 SAVEPPTR(PL_linestart);
c76ac1ee 683 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22 684
c2598295 685 parser->copline = NOLINE;
3280af22 686 PL_lex_state = LEX_NORMAL;
c2598295 687 parser->expect = XSTATE;
199e78b7
DM
688 Newx(parser->lex_brackstack, 120, char);
689 Newx(parser->lex_casestack, 12, char);
690 *parser->lex_casestack = '\0';
691#ifndef PERL_MAD
76be56bc 692 PL_nexttoke = 0;
5db06880 693#endif
02b34bbe 694
10efb74f
NC
695 if (line) {
696 s = SvPV_const(line, len);
697 } else {
698 len = 0;
699 }
bdc0bf6f 700
10efb74f 701 if (!len) {
bdc0bf6f 702 parser->linestr = newSVpvs("\n;");
10efb74f 703 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 704 parser->linestr = newSVsv(line);
10efb74f 705 if (s[len-1] != ';')
bdc0bf6f 706 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
707 } else {
708 SvTEMP_off(line);
709 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 710 parser->linestr = line;
8990e307 711 }
bdc0bf6f
DM
712 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(parser->linestr);
713 PL_bufend = PL_bufptr + SvCUR(parser->linestr);
bd61b366 714 PL_last_lop = PL_last_uni = NULL;
3280af22 715 PL_rsfp = 0;
79072805 716}
a687059c 717
e3abe207
DM
718
719/* delete a parser object */
720
721void
722Perl_parser_free(pTHX_ const yy_parser *parser)
723{
bdc0bf6f
DM
724 SvREFCNT_dec(parser->linestr);
725
e3abe207
DM
726 Safefree(parser->stack);
727 Safefree(parser->lex_brackstack);
728 Safefree(parser->lex_casestack);
729 PL_parser = parser->old_parser;
730 Safefree(parser);
731}
732
733
ffb4593c
NT
734/*
735 * Perl_lex_end
9cbb5ea2
GS
736 * Finalizer for lexing operations. Must be called when the parser is
737 * done with the lexer.
ffb4593c
NT
738 */
739
463ee0b2 740void
864dbfa3 741Perl_lex_end(pTHX)
463ee0b2 742{
97aff369 743 dVAR;
3280af22 744 PL_doextract = FALSE;
463ee0b2
LW
745}
746
ffb4593c
NT
747/*
748 * S_incline
749 * This subroutine has nothing to do with tilting, whether at windmills
750 * or pinball tables. Its name is short for "increment line". It
57843af0 751 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 752 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
753 * # line 500 "foo.pm"
754 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
755 */
756
76e3520e 757STATIC void
d9095cec 758S_incline(pTHX_ const char *s)
463ee0b2 759{
97aff369 760 dVAR;
d9095cec
NC
761 const char *t;
762 const char *n;
763 const char *e;
463ee0b2 764
57843af0 765 CopLINE_inc(PL_curcop);
463ee0b2
LW
766 if (*s++ != '#')
767 return;
d4c19fe8
AL
768 while (SPACE_OR_TAB(*s))
769 s++;
73659bf1
GS
770 if (strnEQ(s, "line", 4))
771 s += 4;
772 else
773 return;
084592ab 774 if (SPACE_OR_TAB(*s))
73659bf1 775 s++;
4e553d73 776 else
73659bf1 777 return;
d4c19fe8
AL
778 while (SPACE_OR_TAB(*s))
779 s++;
463ee0b2
LW
780 if (!isDIGIT(*s))
781 return;
d4c19fe8 782
463ee0b2
LW
783 n = s;
784 while (isDIGIT(*s))
785 s++;
bf4acbe4 786 while (SPACE_OR_TAB(*s))
463ee0b2 787 s++;
73659bf1 788 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 789 s++;
73659bf1
GS
790 e = t + 1;
791 }
463ee0b2 792 else {
c35e046a
AL
793 t = s;
794 while (!isSPACE(*t))
795 t++;
73659bf1 796 e = t;
463ee0b2 797 }
bf4acbe4 798 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
799 e++;
800 if (*e != '\n' && *e != '\0')
801 return; /* false alarm */
802
f4dd75d9 803 if (t - s > 0) {
d9095cec 804 const STRLEN len = t - s;
8a5ee598 805#ifndef USE_ITHREADS
c4420975 806 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
807 STRLEN tmplen = cf ? strlen(cf) : 0;
808 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
809 /* must copy *{"::_<(eval N)[oldfilename:L]"}
810 * to *{"::_<newfilename"} */
44867030
NC
811 /* However, the long form of evals is only turned on by the
812 debugger - usually they're "(eval %lu)" */
813 char smallbuf[128];
814 char *tmpbuf;
815 GV **gvp;
d9095cec 816 STRLEN tmplen2 = len;
798b63bc 817 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
818 tmpbuf = smallbuf;
819 else
2ae0db35 820 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
821 tmpbuf[0] = '_';
822 tmpbuf[1] = '<';
2ae0db35 823 memcpy(tmpbuf + 2, cf, tmplen);
44867030 824 tmplen += 2;
8a5ee598
RGS
825 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
826 if (gvp) {
44867030
NC
827 char *tmpbuf2;
828 GV *gv2;
829
830 if (tmplen2 + 2 <= sizeof smallbuf)
831 tmpbuf2 = smallbuf;
832 else
833 Newx(tmpbuf2, tmplen2 + 2, char);
834
835 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
836 /* Either they malloc'd it, or we malloc'd it,
837 so no prefix is present in ours. */
838 tmpbuf2[0] = '_';
839 tmpbuf2[1] = '<';
840 }
841
842 memcpy(tmpbuf2 + 2, s, tmplen2);
843 tmplen2 += 2;
844
8a5ee598 845 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 846 if (!isGV(gv2)) {
8a5ee598 847 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
848 /* adjust ${"::_<newfilename"} to store the new file name */
849 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
850 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
851 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
852 }
44867030
NC
853
854 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 855 }
e66cf94c 856 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 857 }
8a5ee598 858#endif
05ec9bb3 859 CopFILE_free(PL_curcop);
d9095cec 860 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 861 }
57843af0 862 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
863}
864
29595ff2 865#ifdef PERL_MAD
cd81e915 866/* skip space before PL_thistoken */
29595ff2
NC
867
868STATIC char *
869S_skipspace0(pTHX_ register char *s)
870{
871 s = skipspace(s);
872 if (!PL_madskills)
873 return s;
cd81e915
NC
874 if (PL_skipwhite) {
875 if (!PL_thiswhite)
6b29d1f5 876 PL_thiswhite = newSVpvs("");
cd81e915
NC
877 sv_catsv(PL_thiswhite, PL_skipwhite);
878 sv_free(PL_skipwhite);
879 PL_skipwhite = 0;
880 }
881 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
882 return s;
883}
884
cd81e915 885/* skip space after PL_thistoken */
29595ff2
NC
886
887STATIC char *
888S_skipspace1(pTHX_ register char *s)
889{
d4c19fe8 890 const char *start = s;
29595ff2
NC
891 I32 startoff = start - SvPVX(PL_linestr);
892
893 s = skipspace(s);
894 if (!PL_madskills)
895 return s;
896 start = SvPVX(PL_linestr) + startoff;
cd81e915 897 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 898 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
899 PL_thistoken = newSVpvn(tstart, start - tstart);
900 }
901 PL_realtokenstart = -1;
902 if (PL_skipwhite) {
903 if (!PL_nextwhite)
6b29d1f5 904 PL_nextwhite = newSVpvs("");
cd81e915
NC
905 sv_catsv(PL_nextwhite, PL_skipwhite);
906 sv_free(PL_skipwhite);
907 PL_skipwhite = 0;
29595ff2
NC
908 }
909 return s;
910}
911
912STATIC char *
913S_skipspace2(pTHX_ register char *s, SV **svp)
914{
c35e046a
AL
915 char *start;
916 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
917 const I32 startoff = s - SvPVX(PL_linestr);
918
29595ff2
NC
919 s = skipspace(s);
920 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
921 if (!PL_madskills || !svp)
922 return s;
923 start = SvPVX(PL_linestr) + startoff;
cd81e915 924 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 925 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
926 PL_thistoken = newSVpvn(tstart, start - tstart);
927 PL_realtokenstart = -1;
29595ff2 928 }
cd81e915 929 if (PL_skipwhite) {
29595ff2 930 if (!*svp)
6b29d1f5 931 *svp = newSVpvs("");
cd81e915
NC
932 sv_setsv(*svp, PL_skipwhite);
933 sv_free(PL_skipwhite);
934 PL_skipwhite = 0;
29595ff2
NC
935 }
936
937 return s;
938}
939#endif
940
80a702cd 941STATIC void
5fa550fb 942S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
943{
944 AV *av = CopFILEAVx(PL_curcop);
945 if (av) {
b9f83d2f 946 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
947 if (orig_sv)
948 sv_setsv(sv, orig_sv);
949 else
950 sv_setpvn(sv, buf, len);
80a702cd
RGS
951 (void)SvIOK_on(sv);
952 SvIV_set(sv, 0);
953 av_store(av, (I32)CopLINE(PL_curcop), sv);
954 }
955}
956
ffb4593c
NT
957/*
958 * S_skipspace
959 * Called to gobble the appropriate amount and type of whitespace.
960 * Skips comments as well.
961 */
962
76e3520e 963STATIC char *
cea2e8a9 964S_skipspace(pTHX_ register char *s)
a687059c 965{
97aff369 966 dVAR;
5db06880
NC
967#ifdef PERL_MAD
968 int curoff;
969 int startoff = s - SvPVX(PL_linestr);
970
cd81e915
NC
971 if (PL_skipwhite) {
972 sv_free(PL_skipwhite);
973 PL_skipwhite = 0;
5db06880
NC
974 }
975#endif
976
3280af22 977 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 978 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 979 s++;
5db06880
NC
980#ifdef PERL_MAD
981 goto done;
982#else
463ee0b2 983 return s;
5db06880 984#endif
463ee0b2
LW
985 }
986 for (;;) {
fd049845 987 STRLEN prevlen;
09bef843 988 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 989 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
990 while (s < PL_bufend && isSPACE(*s)) {
991 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
992 incline(s);
993 }
ffb4593c
NT
994
995 /* comment */
3280af22
NIS
996 if (s < PL_bufend && *s == '#') {
997 while (s < PL_bufend && *s != '\n')
463ee0b2 998 s++;
60e6418e 999 if (s < PL_bufend) {
463ee0b2 1000 s++;
60e6418e
GS
1001 if (PL_in_eval && !PL_rsfp) {
1002 incline(s);
1003 continue;
1004 }
1005 }
463ee0b2 1006 }
ffb4593c
NT
1007
1008 /* only continue to recharge the buffer if we're at the end
1009 * of the buffer, we're not reading from a source filter, and
1010 * we're in normal lexing mode
1011 */
09bef843
SB
1012 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1013 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1014#ifdef PERL_MAD
1015 goto done;
1016#else
463ee0b2 1017 return s;
5db06880 1018#endif
ffb4593c
NT
1019
1020 /* try to recharge the buffer */
5db06880
NC
1021#ifdef PERL_MAD
1022 curoff = s - SvPVX(PL_linestr);
1023#endif
1024
9cbb5ea2 1025 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1026 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1027 {
5db06880
NC
1028#ifdef PERL_MAD
1029 if (PL_madskills && curoff != startoff) {
cd81e915 1030 if (!PL_skipwhite)
6b29d1f5 1031 PL_skipwhite = newSVpvs("");
cd81e915 1032 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1033 curoff - startoff);
1034 }
1035
1036 /* mustn't throw out old stuff yet if madpropping */
1037 SvCUR(PL_linestr) = curoff;
1038 s = SvPVX(PL_linestr) + curoff;
1039 *s = 0;
1040 if (curoff && s[-1] == '\n')
1041 s[-1] = ' ';
1042#endif
1043
9cbb5ea2 1044 /* end of file. Add on the -p or -n magic */
cd81e915 1045 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1046 if (PL_minus_p) {
5db06880 1047#ifdef PERL_MAD
6502358f 1048 sv_catpvs(PL_linestr,
5db06880
NC
1049 ";}continue{print or die qq(-p destination: $!\\n);}");
1050#else
6502358f 1051 sv_setpvs(PL_linestr,
01a19ab0 1052 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1053#endif
3280af22 1054 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1055 }
01a19ab0 1056 else if (PL_minus_n) {
5db06880
NC
1057#ifdef PERL_MAD
1058 sv_catpvn(PL_linestr, ";}", 2);
1059#else
01a19ab0 1060 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1061#endif
01a19ab0
NC
1062 PL_minus_n = 0;
1063 }
a0d0e21e 1064 else
5db06880
NC
1065#ifdef PERL_MAD
1066 sv_catpvn(PL_linestr,";", 1);
1067#else
4147a61b 1068 sv_setpvn(PL_linestr,";", 1);
5db06880 1069#endif
ffb4593c
NT
1070
1071 /* reset variables for next time we lex */
9cbb5ea2 1072 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1073 = SvPVX(PL_linestr)
1074#ifdef PERL_MAD
1075 + curoff
1076#endif
1077 ;
3280af22 1078 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1079 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1080
1081 /* Close the filehandle. Could be from -P preprocessor,
1082 * STDIN, or a regular file. If we were reading code from
1083 * STDIN (because the commandline held no -e or filename)
1084 * then we don't close it, we reset it so the code can
1085 * read from STDIN too.
1086 */
1087
3280af22
NIS
1088 if (PL_preprocess && !PL_in_eval)
1089 (void)PerlProc_pclose(PL_rsfp);
1090 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1091 PerlIO_clearerr(PL_rsfp);
8990e307 1092 else
3280af22 1093 (void)PerlIO_close(PL_rsfp);
4608196e 1094 PL_rsfp = NULL;
463ee0b2
LW
1095 return s;
1096 }
ffb4593c
NT
1097
1098 /* not at end of file, so we only read another line */
09bef843
SB
1099 /* make corresponding updates to old pointers, for yyerror() */
1100 oldprevlen = PL_oldbufptr - PL_bufend;
1101 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1102 if (PL_last_uni)
1103 oldunilen = PL_last_uni - PL_bufend;
1104 if (PL_last_lop)
1105 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1106 PL_linestart = PL_bufptr = s + prevlen;
1107 PL_bufend = s + SvCUR(PL_linestr);
1108 s = PL_bufptr;
09bef843
SB
1109 PL_oldbufptr = s + oldprevlen;
1110 PL_oldoldbufptr = s + oldoldprevlen;
1111 if (PL_last_uni)
1112 PL_last_uni = s + oldunilen;
1113 if (PL_last_lop)
1114 PL_last_lop = s + oldloplen;
a0d0e21e 1115 incline(s);
ffb4593c
NT
1116
1117 /* debugger active and we're not compiling the debugger code,
1118 * so store the line into the debugger's array of lines
1119 */
80a702cd 1120 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1121 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1122 }
5db06880
NC
1123
1124#ifdef PERL_MAD
1125 done:
1126 if (PL_madskills) {
cd81e915 1127 if (!PL_skipwhite)
6b29d1f5 1128 PL_skipwhite = newSVpvs("");
5db06880
NC
1129 curoff = s - SvPVX(PL_linestr);
1130 if (curoff - startoff)
cd81e915 1131 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1132 curoff - startoff);
1133 }
1134 return s;
1135#endif
a687059c 1136}
378cc40b 1137
ffb4593c
NT
1138/*
1139 * S_check_uni
1140 * Check the unary operators to ensure there's no ambiguity in how they're
1141 * used. An ambiguous piece of code would be:
1142 * rand + 5
1143 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1144 * the +5 is its argument.
1145 */
1146
76e3520e 1147STATIC void
cea2e8a9 1148S_check_uni(pTHX)
ba106d47 1149{
97aff369 1150 dVAR;
d4c19fe8
AL
1151 const char *s;
1152 const char *t;
2f3197b3 1153
3280af22 1154 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1155 return;
3280af22
NIS
1156 while (isSPACE(*PL_last_uni))
1157 PL_last_uni++;
c35e046a
AL
1158 s = PL_last_uni;
1159 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1160 s++;
3280af22 1161 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1162 return;
6136c704 1163
0453d815 1164 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1165 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1166 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1167 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1168 }
2f3197b3
LW
1169}
1170
ffb4593c
NT
1171/*
1172 * LOP : macro to build a list operator. Its behaviour has been replaced
1173 * with a subroutine, S_lop() for which LOP is just another name.
1174 */
1175
a0d0e21e
LW
1176#define LOP(f,x) return lop(f,x,s)
1177
ffb4593c
NT
1178/*
1179 * S_lop
1180 * Build a list operator (or something that might be one). The rules:
1181 * - if we have a next token, then it's a list operator [why?]
1182 * - if the next thing is an opening paren, then it's a function
1183 * - else it's a list operator
1184 */
1185
76e3520e 1186STATIC I32
a0be28da 1187S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1188{
97aff369 1189 dVAR;
79072805 1190 yylval.ival = f;
35c8bce7 1191 CLINE;
3280af22
NIS
1192 PL_expect = x;
1193 PL_bufptr = s;
1194 PL_last_lop = PL_oldbufptr;
eb160463 1195 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1196#ifdef PERL_MAD
1197 if (PL_lasttoke)
1198 return REPORT(LSTOP);
1199#else
3280af22 1200 if (PL_nexttoke)
bbf60fe6 1201 return REPORT(LSTOP);
5db06880 1202#endif
79072805 1203 if (*s == '(')
bbf60fe6 1204 return REPORT(FUNC);
29595ff2 1205 s = PEEKSPACE(s);
79072805 1206 if (*s == '(')
bbf60fe6 1207 return REPORT(FUNC);
79072805 1208 else
bbf60fe6 1209 return REPORT(LSTOP);
79072805
LW
1210}
1211
5db06880
NC
1212#ifdef PERL_MAD
1213 /*
1214 * S_start_force
1215 * Sets up for an eventual force_next(). start_force(0) basically does
1216 * an unshift, while start_force(-1) does a push. yylex removes items
1217 * on the "pop" end.
1218 */
1219
1220STATIC void
1221S_start_force(pTHX_ int where)
1222{
1223 int i;
1224
cd81e915 1225 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1226 where = PL_lasttoke;
cd81e915
NC
1227 assert(PL_curforce < 0 || PL_curforce == where);
1228 if (PL_curforce != where) {
5db06880
NC
1229 for (i = PL_lasttoke; i > where; --i) {
1230 PL_nexttoke[i] = PL_nexttoke[i-1];
1231 }
1232 PL_lasttoke++;
1233 }
cd81e915 1234 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1235 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1236 PL_curforce = where;
1237 if (PL_nextwhite) {
5db06880 1238 if (PL_madskills)
6b29d1f5 1239 curmad('^', newSVpvs(""));
cd81e915 1240 CURMAD('_', PL_nextwhite);
5db06880
NC
1241 }
1242}
1243
1244STATIC void
1245S_curmad(pTHX_ char slot, SV *sv)
1246{
1247 MADPROP **where;
1248
1249 if (!sv)
1250 return;
cd81e915
NC
1251 if (PL_curforce < 0)
1252 where = &PL_thismad;
5db06880 1253 else
cd81e915 1254 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1255
cd81e915 1256 if (PL_faketokens)
5db06880
NC
1257 sv_setpvn(sv, "", 0);
1258 else {
1259 if (!IN_BYTES) {
1260 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1261 SvUTF8_on(sv);
1262 else if (PL_encoding) {
1263 sv_recode_to_utf8(sv, PL_encoding);
1264 }
1265 }
1266 }
1267
1268 /* keep a slot open for the head of the list? */
1269 if (slot != '_' && *where && (*where)->mad_key == '^') {
1270 (*where)->mad_key = slot;
1271 sv_free((*where)->mad_val);
1272 (*where)->mad_val = (void*)sv;
1273 }
1274 else
1275 addmad(newMADsv(slot, sv), where, 0);
1276}
1277#else
b3f24c00
MHM
1278# define start_force(where) NOOP
1279# define curmad(slot, sv) NOOP
5db06880
NC
1280#endif
1281
ffb4593c
NT
1282/*
1283 * S_force_next
9cbb5ea2 1284 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1285 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1286 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1287 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1288 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1289 */
1290
4e553d73 1291STATIC void
cea2e8a9 1292S_force_next(pTHX_ I32 type)
79072805 1293{
97aff369 1294 dVAR;
5db06880 1295#ifdef PERL_MAD
cd81e915 1296 if (PL_curforce < 0)
5db06880 1297 start_force(PL_lasttoke);
cd81e915 1298 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1299 if (PL_lex_state != LEX_KNOWNEXT)
1300 PL_lex_defer = PL_lex_state;
1301 PL_lex_state = LEX_KNOWNEXT;
1302 PL_lex_expect = PL_expect;
cd81e915 1303 PL_curforce = -1;
5db06880 1304#else
3280af22
NIS
1305 PL_nexttype[PL_nexttoke] = type;
1306 PL_nexttoke++;
1307 if (PL_lex_state != LEX_KNOWNEXT) {
1308 PL_lex_defer = PL_lex_state;
1309 PL_lex_expect = PL_expect;
1310 PL_lex_state = LEX_KNOWNEXT;
79072805 1311 }
5db06880 1312#endif
79072805
LW
1313}
1314
d0a148a6
NC
1315STATIC SV *
1316S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1317{
97aff369 1318 dVAR;
9d4ba2ae 1319 SV * const sv = newSVpvn(start,len);
bfed75c6 1320 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1321 SvUTF8_on(sv);
1322 return sv;
1323}
1324
ffb4593c
NT
1325/*
1326 * S_force_word
1327 * When the lexer knows the next thing is a word (for instance, it has
1328 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1329 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1330 * lookahead.
ffb4593c
NT
1331 *
1332 * Arguments:
b1b65b59 1333 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1334 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1335 * int check_keyword : if true, Perl checks to make sure the word isn't
1336 * a keyword (do this if the word is a label, e.g. goto FOO)
1337 * int allow_pack : if true, : characters will also be allowed (require,
1338 * use, etc. do this)
9cbb5ea2 1339 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1340 */
1341
76e3520e 1342STATIC char *
cea2e8a9 1343S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1344{
97aff369 1345 dVAR;
463ee0b2
LW
1346 register char *s;
1347 STRLEN len;
4e553d73 1348
29595ff2 1349 start = SKIPSPACE1(start);
463ee0b2 1350 s = start;
7e2040f0 1351 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1352 (allow_pack && *s == ':') ||
15f0808c 1353 (allow_initial_tick && *s == '\'') )
a0d0e21e 1354 {
3280af22 1355 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1356 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1357 return start;
cd81e915 1358 start_force(PL_curforce);
5db06880
NC
1359 if (PL_madskills)
1360 curmad('X', newSVpvn(start,s-start));
463ee0b2 1361 if (token == METHOD) {
29595ff2 1362 s = SKIPSPACE1(s);
463ee0b2 1363 if (*s == '(')
3280af22 1364 PL_expect = XTERM;
463ee0b2 1365 else {
3280af22 1366 PL_expect = XOPERATOR;
463ee0b2 1367 }
79072805 1368 }
e74e6b3d 1369 if (PL_madskills)
63575281 1370 curmad('g', newSVpvs( "forced" ));
9ded7720 1371 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1372 = (OP*)newSVOP(OP_CONST,0,
1373 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1374 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1375 force_next(token);
1376 }
1377 return s;
1378}
1379
ffb4593c
NT
1380/*
1381 * S_force_ident
9cbb5ea2 1382 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1383 * text only contains the "foo" portion. The first argument is a pointer
1384 * to the "foo", and the second argument is the type symbol to prefix.
1385 * Forces the next token to be a "WORD".
9cbb5ea2 1386 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1387 */
1388
76e3520e 1389STATIC void
bfed75c6 1390S_force_ident(pTHX_ register const char *s, int kind)
79072805 1391{
97aff369 1392 dVAR;
c35e046a 1393 if (*s) {
90e5519e
NC
1394 const STRLEN len = strlen(s);
1395 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1396 start_force(PL_curforce);
9ded7720 1397 NEXTVAL_NEXTTOKE.opval = o;
79072805 1398 force_next(WORD);
748a9306 1399 if (kind) {
11343788 1400 o->op_private = OPpCONST_ENTERED;
55497cff 1401 /* XXX see note in pp_entereval() for why we forgo typo
1402 warnings if the symbol must be introduced in an eval.
1403 GSAR 96-10-12 */
90e5519e
NC
1404 gv_fetchpvn_flags(s, len,
1405 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1406 : GV_ADD,
1407 kind == '$' ? SVt_PV :
1408 kind == '@' ? SVt_PVAV :
1409 kind == '%' ? SVt_PVHV :
a0d0e21e 1410 SVt_PVGV
90e5519e 1411 );
748a9306 1412 }
79072805
LW
1413 }
1414}
1415
1571675a
GS
1416NV
1417Perl_str_to_version(pTHX_ SV *sv)
1418{
1419 NV retval = 0.0;
1420 NV nshift = 1.0;
1421 STRLEN len;
cfd0369c 1422 const char *start = SvPV_const(sv,len);
9d4ba2ae 1423 const char * const end = start + len;
504618e9 1424 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1425 while (start < end) {
ba210ebe 1426 STRLEN skip;
1571675a
GS
1427 UV n;
1428 if (utf)
9041c2e3 1429 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1430 else {
1431 n = *(U8*)start;
1432 skip = 1;
1433 }
1434 retval += ((NV)n)/nshift;
1435 start += skip;
1436 nshift *= 1000;
1437 }
1438 return retval;
1439}
1440
4e553d73 1441/*
ffb4593c
NT
1442 * S_force_version
1443 * Forces the next token to be a version number.
e759cc13
RGS
1444 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1445 * and if "guessing" is TRUE, then no new token is created (and the caller
1446 * must use an alternative parsing method).
ffb4593c
NT
1447 */
1448
76e3520e 1449STATIC char *
e759cc13 1450S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1451{
97aff369 1452 dVAR;
5f66b61c 1453 OP *version = NULL;
44dcb63b 1454 char *d;
5db06880
NC
1455#ifdef PERL_MAD
1456 I32 startoff = s - SvPVX(PL_linestr);
1457#endif
89bfa8cd 1458
29595ff2 1459 s = SKIPSPACE1(s);
89bfa8cd 1460
44dcb63b 1461 d = s;
dd629d5b 1462 if (*d == 'v')
44dcb63b 1463 d++;
44dcb63b 1464 if (isDIGIT(*d)) {
e759cc13
RGS
1465 while (isDIGIT(*d) || *d == '_' || *d == '.')
1466 d++;
5db06880
NC
1467#ifdef PERL_MAD
1468 if (PL_madskills) {
cd81e915 1469 start_force(PL_curforce);
5db06880
NC
1470 curmad('X', newSVpvn(s,d-s));
1471 }
1472#endif
9f3d182e 1473 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1474 SV *ver;
b73d6f50 1475 s = scan_num(s, &yylval);
89bfa8cd 1476 version = yylval.opval;
dd629d5b
GS
1477 ver = cSVOPx(version)->op_sv;
1478 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1479 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1480 SvNV_set(ver, str_to_version(ver));
1571675a 1481 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1482 }
89bfa8cd 1483 }
5db06880
NC
1484 else if (guessing) {
1485#ifdef PERL_MAD
1486 if (PL_madskills) {
cd81e915
NC
1487 sv_free(PL_nextwhite); /* let next token collect whitespace */
1488 PL_nextwhite = 0;
5db06880
NC
1489 s = SvPVX(PL_linestr) + startoff;
1490 }
1491#endif
e759cc13 1492 return s;
5db06880 1493 }
89bfa8cd 1494 }
1495
5db06880
NC
1496#ifdef PERL_MAD
1497 if (PL_madskills && !version) {
cd81e915
NC
1498 sv_free(PL_nextwhite); /* let next token collect whitespace */
1499 PL_nextwhite = 0;
5db06880
NC
1500 s = SvPVX(PL_linestr) + startoff;
1501 }
1502#endif
89bfa8cd 1503 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1504 start_force(PL_curforce);
9ded7720 1505 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1506 force_next(WORD);
89bfa8cd 1507
e759cc13 1508 return s;
89bfa8cd 1509}
1510
ffb4593c
NT
1511/*
1512 * S_tokeq
1513 * Tokenize a quoted string passed in as an SV. It finds the next
1514 * chunk, up to end of string or a backslash. It may make a new
1515 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1516 * turns \\ into \.
1517 */
1518
76e3520e 1519STATIC SV *
cea2e8a9 1520S_tokeq(pTHX_ SV *sv)
79072805 1521{
97aff369 1522 dVAR;
79072805
LW
1523 register char *s;
1524 register char *send;
1525 register char *d;
b3ac6de7
IZ
1526 STRLEN len = 0;
1527 SV *pv = sv;
79072805
LW
1528
1529 if (!SvLEN(sv))
b3ac6de7 1530 goto finish;
79072805 1531
a0d0e21e 1532 s = SvPV_force(sv, len);
21a311ee 1533 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1534 goto finish;
463ee0b2 1535 send = s + len;
79072805
LW
1536 while (s < send && *s != '\\')
1537 s++;
1538 if (s == send)
b3ac6de7 1539 goto finish;
79072805 1540 d = s;
be4731d2 1541 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1542 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1543 if (SvUTF8(sv))
1544 SvUTF8_on(pv);
1545 }
79072805
LW
1546 while (s < send) {
1547 if (*s == '\\') {
a0d0e21e 1548 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1549 s++; /* all that, just for this */
1550 }
1551 *d++ = *s++;
1552 }
1553 *d = '\0';
95a20fc0 1554 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1555 finish:
3280af22 1556 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1557 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1558 return sv;
1559}
1560
ffb4593c
NT
1561/*
1562 * Now come three functions related to double-quote context,
1563 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1564 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1565 * interact with PL_lex_state, and create fake ( ... ) argument lists
1566 * to handle functions and concatenation.
1567 * They assume that whoever calls them will be setting up a fake
1568 * join call, because each subthing puts a ',' after it. This lets
1569 * "lower \luPpEr"
1570 * become
1571 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1572 *
1573 * (I'm not sure whether the spurious commas at the end of lcfirst's
1574 * arguments and join's arguments are created or not).
1575 */
1576
1577/*
1578 * S_sublex_start
1579 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1580 *
1581 * Pattern matching will set PL_lex_op to the pattern-matching op to
1582 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1583 *
1584 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1585 *
1586 * Everything else becomes a FUNC.
1587 *
1588 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1589 * had an OP_CONST or OP_READLINE). This just sets us up for a
1590 * call to S_sublex_push().
1591 */
1592
76e3520e 1593STATIC I32
cea2e8a9 1594S_sublex_start(pTHX)
79072805 1595{
97aff369 1596 dVAR;
0d46e09a 1597 register const I32 op_type = yylval.ival;
79072805
LW
1598
1599 if (op_type == OP_NULL) {
3280af22 1600 yylval.opval = PL_lex_op;
5f66b61c 1601 PL_lex_op = NULL;
79072805
LW
1602 return THING;
1603 }
1604 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1605 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1606
1607 if (SvTYPE(sv) == SVt_PVIV) {
1608 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1609 STRLEN len;
96a5add6 1610 const char * const p = SvPV_const(sv, len);
f54cb97a 1611 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1612 if (SvUTF8(sv))
1613 SvUTF8_on(nsv);
b3ac6de7
IZ
1614 SvREFCNT_dec(sv);
1615 sv = nsv;
4e553d73 1616 }
b3ac6de7 1617 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1618 PL_lex_stuff = NULL;
6f33ba73
RGS
1619 /* Allow <FH> // "foo" */
1620 if (op_type == OP_READLINE)
1621 PL_expect = XTERMORDORDOR;
79072805
LW
1622 return THING;
1623 }
e3f73d4e
RGS
1624 else if (op_type == OP_BACKTICK && PL_lex_op) {
1625 /* readpipe() vas overriden */
1626 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1627 yylval.opval = PL_lex_op;
9b201d7d 1628 PL_lex_op = NULL;
e3f73d4e
RGS
1629 PL_lex_stuff = NULL;
1630 return THING;
1631 }
79072805 1632
3280af22 1633 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1634 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1635 PL_sublex_info.sub_op = PL_lex_op;
1636 PL_lex_state = LEX_INTERPPUSH;
55497cff 1637
3280af22
NIS
1638 PL_expect = XTERM;
1639 if (PL_lex_op) {
1640 yylval.opval = PL_lex_op;
5f66b61c 1641 PL_lex_op = NULL;
55497cff 1642 return PMFUNC;
1643 }
1644 else
1645 return FUNC;
1646}
1647
ffb4593c
NT
1648/*
1649 * S_sublex_push
1650 * Create a new scope to save the lexing state. The scope will be
1651 * ended in S_sublex_done. Returns a '(', starting the function arguments
1652 * to the uc, lc, etc. found before.
1653 * Sets PL_lex_state to LEX_INTERPCONCAT.
1654 */
1655
76e3520e 1656STATIC I32
cea2e8a9 1657S_sublex_push(pTHX)
55497cff 1658{
27da23d5 1659 dVAR;
f46d017c 1660 ENTER;
55497cff 1661
3280af22 1662 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1663 SAVEBOOL(PL_lex_dojoin);
3280af22 1664 SAVEI32(PL_lex_brackets);
3280af22
NIS
1665 SAVEI32(PL_lex_casemods);
1666 SAVEI32(PL_lex_starts);
651b5b28 1667 SAVEI8(PL_lex_state);
7766f137 1668 SAVEVPTR(PL_lex_inpat);
98246f1e 1669 SAVEI16(PL_lex_inwhat);
57843af0 1670 SAVECOPLINE(PL_curcop);
3280af22 1671 SAVEPPTR(PL_bufptr);
8452ff4b 1672 SAVEPPTR(PL_bufend);
3280af22
NIS
1673 SAVEPPTR(PL_oldbufptr);
1674 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1675 SAVEPPTR(PL_last_lop);
1676 SAVEPPTR(PL_last_uni);
3280af22
NIS
1677 SAVEPPTR(PL_linestart);
1678 SAVESPTR(PL_linestr);
8edd5f42
RGS
1679 SAVEGENERICPV(PL_lex_brackstack);
1680 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1681
1682 PL_linestr = PL_lex_stuff;
a0714e2c 1683 PL_lex_stuff = NULL;
3280af22 1684
9cbb5ea2
GS
1685 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1686 = SvPVX(PL_linestr);
3280af22 1687 PL_bufend += SvCUR(PL_linestr);
bd61b366 1688 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1689 SAVEFREESV(PL_linestr);
1690
1691 PL_lex_dojoin = FALSE;
1692 PL_lex_brackets = 0;
a02a5408
JC
1693 Newx(PL_lex_brackstack, 120, char);
1694 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1695 PL_lex_casemods = 0;
1696 *PL_lex_casestack = '\0';
1697 PL_lex_starts = 0;
1698 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1699 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1700
1701 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1702 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1703 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1704 else
5f66b61c 1705 PL_lex_inpat = NULL;
79072805 1706
55497cff 1707 return '(';
79072805
LW
1708}
1709
ffb4593c
NT
1710/*
1711 * S_sublex_done
1712 * Restores lexer state after a S_sublex_push.
1713 */
1714
76e3520e 1715STATIC I32
cea2e8a9 1716S_sublex_done(pTHX)
79072805 1717{
27da23d5 1718 dVAR;
3280af22 1719 if (!PL_lex_starts++) {
396482e1 1720 SV * const sv = newSVpvs("");
9aa983d2
JH
1721 if (SvUTF8(PL_linestr))
1722 SvUTF8_on(sv);
3280af22 1723 PL_expect = XOPERATOR;
9aa983d2 1724 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1725 return THING;
1726 }
1727
3280af22
NIS
1728 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1729 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1730 return yylex();
79072805
LW
1731 }
1732
ffb4593c 1733 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1734 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1735 PL_linestr = PL_lex_repl;
1736 PL_lex_inpat = 0;
1737 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1738 PL_bufend += SvCUR(PL_linestr);
bd61b366 1739 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1740 SAVEFREESV(PL_linestr);
1741 PL_lex_dojoin = FALSE;
1742 PL_lex_brackets = 0;
3280af22
NIS
1743 PL_lex_casemods = 0;
1744 *PL_lex_casestack = '\0';
1745 PL_lex_starts = 0;
25da4f38 1746 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1747 PL_lex_state = LEX_INTERPNORMAL;
1748 PL_lex_starts++;
e9fa98b2
HS
1749 /* we don't clear PL_lex_repl here, so that we can check later
1750 whether this is an evalled subst; that means we rely on the
1751 logic to ensure sublex_done() is called again only via the
1752 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1753 }
e9fa98b2 1754 else {
3280af22 1755 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1756 PL_lex_repl = NULL;
e9fa98b2 1757 }
79072805 1758 return ',';
ffed7fef
LW
1759 }
1760 else {
5db06880
NC
1761#ifdef PERL_MAD
1762 if (PL_madskills) {
cd81e915
NC
1763 if (PL_thiswhite) {
1764 if (!PL_endwhite)
6b29d1f5 1765 PL_endwhite = newSVpvs("");
cd81e915
NC
1766 sv_catsv(PL_endwhite, PL_thiswhite);
1767 PL_thiswhite = 0;
1768 }
1769 if (PL_thistoken)
1770 sv_setpvn(PL_thistoken,"",0);
5db06880 1771 else
cd81e915 1772 PL_realtokenstart = -1;
5db06880
NC
1773 }
1774#endif
f46d017c 1775 LEAVE;
3280af22
NIS
1776 PL_bufend = SvPVX(PL_linestr);
1777 PL_bufend += SvCUR(PL_linestr);
1778 PL_expect = XOPERATOR;
09bef843 1779 PL_sublex_info.sub_inwhat = 0;
79072805 1780 return ')';
ffed7fef
LW
1781 }
1782}
1783
02aa26ce
NT
1784/*
1785 scan_const
1786
1787 Extracts a pattern, double-quoted string, or transliteration. This
1788 is terrifying code.
1789
94def140 1790 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1791 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1792 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1793
94def140
TS
1794 Returns a pointer to the character scanned up to. If this is
1795 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1796 successfully parsed), will leave an OP for the substring scanned
1797 in yylval. Caller must intuit reason for not parsing further
1798 by looking at the next characters herself.
1799
02aa26ce
NT
1800 In patterns:
1801 backslashes:
1802 double-quoted style: \r and \n
1803 regexp special ones: \D \s
94def140
TS
1804 constants: \x31
1805 backrefs: \1
02aa26ce
NT
1806 case and quoting: \U \Q \E
1807 stops on @ and $, but not for $ as tail anchor
1808
1809 In transliterations:
1810 characters are VERY literal, except for - not at the start or end
94def140
TS
1811 of the string, which indicates a range. If the range is in bytes,
1812 scan_const expands the range to the full set of intermediate
1813 characters. If the range is in utf8, the hyphen is replaced with
1814 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1815
1816 In double-quoted strings:
1817 backslashes:
1818 double-quoted style: \r and \n
94def140
TS
1819 constants: \x31
1820 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1821 case and quoting: \U \Q \E
1822 stops on @ and $
1823
1824 scan_const does *not* construct ops to handle interpolated strings.
1825 It stops processing as soon as it finds an embedded $ or @ variable
1826 and leaves it to the caller to work out what's going on.
1827
94def140
TS
1828 embedded arrays (whether in pattern or not) could be:
1829 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1830
1831 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1832
1833 $ in pattern could be $foo or could be tail anchor. Assumption:
1834 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1835 followed by one of "()| \r\n\t"
02aa26ce
NT
1836
1837 \1 (backreferences) are turned into $1
1838
1839 The structure of the code is
1840 while (there's a character to process) {
94def140
TS
1841 handle transliteration ranges
1842 skip regexp comments /(?#comment)/ and codes /(?{code})/
1843 skip #-initiated comments in //x patterns
1844 check for embedded arrays
02aa26ce
NT
1845 check for embedded scalars
1846 if (backslash) {
94def140
TS
1847 leave intact backslashes from leaveit (below)
1848 deprecate \1 in substitution replacements
02aa26ce
NT
1849 handle string-changing backslashes \l \U \Q \E, etc.
1850 switch (what was escaped) {
94def140
TS
1851 handle \- in a transliteration (becomes a literal -)
1852 handle \132 (octal characters)
1853 handle \x15 and \x{1234} (hex characters)
1854 handle \N{name} (named characters)
1855 handle \cV (control characters)
1856 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1857 } (end switch)
1858 } (end if backslash)
1859 } (end while character to read)
4e553d73 1860
02aa26ce
NT
1861*/
1862
76e3520e 1863STATIC char *
cea2e8a9 1864S_scan_const(pTHX_ char *start)
79072805 1865{
97aff369 1866 dVAR;
3280af22 1867 register char *send = PL_bufend; /* end of the constant */
561b68a9 1868 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1869 register char *s = start; /* start of the constant */
1870 register char *d = SvPVX(sv); /* destination for copies */
1871 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1872 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1873 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1874 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1875 UV uv;
4c3a8340
TS
1876#ifdef EBCDIC
1877 UV literal_endpoint = 0;
e294cc5d 1878 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1879#endif
012bcf8d 1880
2b9d42f0
NIS
1881 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1882 /* If we are doing a trans and we know we want UTF8 set expectation */
1883 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1884 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1885 }
1886
1887
79072805 1888 while (s < send || dorange) {
02aa26ce 1889 /* get transliterations out of the way (they're most literal) */
3280af22 1890 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1891 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1892 if (dorange) {
1ba5c669
JH
1893 I32 i; /* current expanded character */
1894 I32 min; /* first character in range */
1895 I32 max; /* last character in range */
02aa26ce 1896
e294cc5d
JH
1897#ifdef EBCDIC
1898 UV uvmax = 0;
1899#endif
1900
1901 if (has_utf8
1902#ifdef EBCDIC
1903 && !native_range
1904#endif
1905 ) {
9d4ba2ae 1906 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1907 char *e = d++;
1908 while (e-- > c)
1909 *(e + 1) = *e;
25716404 1910 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1911 /* mark the range as done, and continue */
1912 dorange = FALSE;
1913 didrange = TRUE;
1914 continue;
1915 }
2b9d42f0 1916
95a20fc0 1917 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1918#ifdef EBCDIC
1919 SvGROW(sv,
1920 SvLEN(sv) + (has_utf8 ?
1921 (512 - UTF_CONTINUATION_MARK +
1922 UNISKIP(0x100))
1923 : 256));
1924 /* How many two-byte within 0..255: 128 in UTF-8,
1925 * 96 in UTF-8-mod. */
1926#else
9cbb5ea2 1927 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1928#endif
9cbb5ea2 1929 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1930#ifdef EBCDIC
1931 if (has_utf8) {
1932 int j;
1933 for (j = 0; j <= 1; j++) {
1934 char * const c = (char*)utf8_hop((U8*)d, -1);
1935 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1936 if (j)
1937 min = (U8)uv;
1938 else if (uv < 256)
1939 max = (U8)uv;
1940 else {
1941 max = (U8)0xff; /* only to \xff */
1942 uvmax = uv; /* \x{100} to uvmax */
1943 }
1944 d = c; /* eat endpoint chars */
1945 }
1946 }
1947 else {
1948#endif
1949 d -= 2; /* eat the first char and the - */
1950 min = (U8)*d; /* first char in range */
1951 max = (U8)d[1]; /* last char in range */
1952#ifdef EBCDIC
1953 }
1954#endif
8ada0baa 1955
c2e66d9e 1956 if (min > max) {
01ec43d0 1957 Perl_croak(aTHX_
d1573ac7 1958 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1959 (char)min, (char)max);
c2e66d9e
GS
1960 }
1961
c7f1f016 1962#ifdef EBCDIC
4c3a8340
TS
1963 if (literal_endpoint == 2 &&
1964 ((isLOWER(min) && isLOWER(max)) ||
1965 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1966 if (isLOWER(min)) {
1967 for (i = min; i <= max; i++)
1968 if (isLOWER(i))
db42d148 1969 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1970 } else {
1971 for (i = min; i <= max; i++)
1972 if (isUPPER(i))
db42d148 1973 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1974 }
1975 }
1976 else
1977#endif
1978 for (i = min; i <= max; i++)
e294cc5d
JH
1979#ifdef EBCDIC
1980 if (has_utf8) {
1981 const U8 ch = (U8)NATIVE_TO_UTF(i);
1982 if (UNI_IS_INVARIANT(ch))
1983 *d++ = (U8)i;
1984 else {
1985 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1986 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1987 }
1988 }
1989 else
1990#endif
1991 *d++ = (char)i;
1992
1993#ifdef EBCDIC
1994 if (uvmax) {
1995 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1996 if (uvmax > 0x101)
1997 *d++ = (char)UTF_TO_NATIVE(0xff);
1998 if (uvmax > 0x100)
1999 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2000 }
2001#endif
02aa26ce
NT
2002
2003 /* mark the range as done, and continue */
79072805 2004 dorange = FALSE;
01ec43d0 2005 didrange = TRUE;
4c3a8340
TS
2006#ifdef EBCDIC
2007 literal_endpoint = 0;
2008#endif
79072805 2009 continue;
4e553d73 2010 }
02aa26ce
NT
2011
2012 /* range begins (ignore - as first or last char) */
79072805 2013 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2014 if (didrange) {
1fafa243 2015 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2016 }
e294cc5d
JH
2017 if (has_utf8
2018#ifdef EBCDIC
2019 && !native_range
2020#endif
2021 ) {
25716404 2022 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2023 s++;
2024 continue;
2025 }
79072805
LW
2026 dorange = TRUE;
2027 s++;
01ec43d0
GS
2028 }
2029 else {
2030 didrange = FALSE;
4c3a8340
TS
2031#ifdef EBCDIC
2032 literal_endpoint = 0;
e294cc5d 2033 native_range = TRUE;
4c3a8340 2034#endif
01ec43d0 2035 }
79072805 2036 }
02aa26ce
NT
2037
2038 /* if we get here, we're not doing a transliteration */
2039
0f5d15d6
IZ
2040 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2041 except for the last char, which will be done separately. */
3280af22 2042 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2043 if (s[2] == '#') {
e994fd66 2044 while (s+1 < send && *s != ')')
db42d148 2045 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2046 }
2047 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2048 || (s[2] == '?' && s[3] == '{'))
155aba94 2049 {
cc6b7395 2050 I32 count = 1;
0f5d15d6 2051 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2052 char c;
2053
d9f97599
GS
2054 while (count && (c = *regparse)) {
2055 if (c == '\\' && regparse[1])
2056 regparse++;
4e553d73 2057 else if (c == '{')
cc6b7395 2058 count++;
4e553d73 2059 else if (c == '}')
cc6b7395 2060 count--;
d9f97599 2061 regparse++;
cc6b7395 2062 }
e994fd66 2063 if (*regparse != ')')
5bdf89e7 2064 regparse--; /* Leave one char for continuation. */
0f5d15d6 2065 while (s < regparse)
db42d148 2066 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2067 }
748a9306 2068 }
02aa26ce
NT
2069
2070 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2071 else if (*s == '#' && PL_lex_inpat &&
2072 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2073 while (s+1 < send && *s != '\n')
db42d148 2074 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2075 }
02aa26ce 2076
5d1d4326 2077 /* check for embedded arrays
da6eedaa 2078 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2079 */
1749ea0d
TS
2080 else if (*s == '@' && s[1]) {
2081 if (isALNUM_lazy_if(s+1,UTF))
2082 break;
2083 if (strchr(":'{$", s[1]))
2084 break;
2085 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2086 break; /* in regexp, neither @+ nor @- are interpolated */
2087 }
02aa26ce
NT
2088
2089 /* check for embedded scalars. only stop if we're sure it's a
2090 variable.
2091 */
79072805 2092 else if (*s == '$') {
3280af22 2093 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2094 break;
6002328a 2095 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2096 break; /* in regexp, $ might be tail anchor */
2097 }
02aa26ce 2098
2b9d42f0
NIS
2099 /* End of else if chain - OP_TRANS rejoin rest */
2100
02aa26ce 2101 /* backslashes */
79072805
LW
2102 if (*s == '\\' && s+1 < send) {
2103 s++;
02aa26ce 2104
02aa26ce 2105 /* deprecate \1 in strings and substitution replacements */
3280af22 2106 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2107 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2108 {
599cee73 2109 if (ckWARN(WARN_SYNTAX))
9014280d 2110 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2111 *--s = '$';
2112 break;
2113 }
02aa26ce
NT
2114
2115 /* string-change backslash escapes */
3280af22 2116 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2117 --s;
2118 break;
2119 }
cc74c5bd
TS
2120 /* skip any other backslash escapes in a pattern */
2121 else if (PL_lex_inpat) {
2122 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2123 goto default_action;
2124 }
02aa26ce
NT
2125
2126 /* if we get here, it's either a quoted -, or a digit */
79072805 2127 switch (*s) {
02aa26ce
NT
2128
2129 /* quoted - in transliterations */
79072805 2130 case '-':
3280af22 2131 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2132 *d++ = *s++;
2133 continue;
2134 }
2135 /* FALL THROUGH */
2136 default:
11b8faa4 2137 {
86f97054 2138 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2139 ckWARN(WARN_MISC))
9014280d 2140 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2141 "Unrecognized escape \\%c passed through",
2142 *s);
11b8faa4 2143 /* default action is to copy the quoted character */
f9a63242 2144 goto default_action;
11b8faa4 2145 }
02aa26ce
NT
2146
2147 /* \132 indicates an octal constant */
79072805
LW
2148 case '0': case '1': case '2': case '3':
2149 case '4': case '5': case '6': case '7':
ba210ebe 2150 {
53305cf1
NC
2151 I32 flags = 0;
2152 STRLEN len = 3;
2153 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2154 s += len;
2155 }
012bcf8d 2156 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2157
2158 /* \x24 indicates a hex constant */
79072805 2159 case 'x':
a0ed51b3
LW
2160 ++s;
2161 if (*s == '{') {
9d4ba2ae 2162 char* const e = strchr(s, '}');
a4c04bdc
NC
2163 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2164 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2165 STRLEN len;
355860ce 2166
53305cf1 2167 ++s;
adaeee49 2168 if (!e) {
a0ed51b3 2169 yyerror("Missing right brace on \\x{}");
355860ce 2170 continue;
ba210ebe 2171 }
53305cf1
NC
2172 len = e - s;
2173 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2174 s = e + 1;
a0ed51b3
LW
2175 }
2176 else {
ba210ebe 2177 {
53305cf1 2178 STRLEN len = 2;
a4c04bdc 2179 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2180 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2181 s += len;
2182 }
012bcf8d
GS
2183 }
2184
2185 NUM_ESCAPE_INSERT:
2186 /* Insert oct or hex escaped character.
301d3d20 2187 * There will always enough room in sv since such
db42d148 2188 * escapes will be longer than any UTF-8 sequence
301d3d20 2189 * they can end up as. */
ba7cea30 2190
c7f1f016
NIS
2191 /* We need to map to chars to ASCII before doing the tests
2192 to cover EBCDIC
2193 */
c4d5f83a 2194 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2195 if (!has_utf8 && uv > 255) {
301d3d20
JH
2196 /* Might need to recode whatever we have
2197 * accumulated so far if it contains any
2198 * hibit chars.
2199 *
2200 * (Can't we keep track of that and avoid
2201 * this rescan? --jhi)
012bcf8d 2202 */
c7f1f016 2203 int hicount = 0;
63cd0674
NIS
2204 U8 *c;
2205 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2206 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2207 hicount++;
db42d148 2208 }
012bcf8d 2209 }
63cd0674 2210 if (hicount) {
9d4ba2ae 2211 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2212 U8 *src, *dst;
2213 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2214 src = (U8 *)d - 1;
2215 dst = src+hicount;
2216 d += hicount;
cfd0369c 2217 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2218 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2219 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2220 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2221 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2222 }
2223 else {
63cd0674 2224 *dst-- = *src;
012bcf8d 2225 }
c7f1f016 2226 src--;
012bcf8d
GS
2227 }
2228 }
2229 }
2230
9aa983d2 2231 if (has_utf8 || uv > 255) {
9041c2e3 2232 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2233 has_utf8 = TRUE;
f9a63242
JH
2234 if (PL_lex_inwhat == OP_TRANS &&
2235 PL_sublex_info.sub_op) {
2236 PL_sublex_info.sub_op->op_private |=
2237 (PL_lex_repl ? OPpTRANS_FROM_UTF
2238 : OPpTRANS_TO_UTF);
f9a63242 2239 }
e294cc5d
JH
2240#ifdef EBCDIC
2241 if (uv > 255 && !dorange)
2242 native_range = FALSE;
2243#endif
012bcf8d 2244 }
a0ed51b3 2245 else {
012bcf8d 2246 *d++ = (char)uv;
a0ed51b3 2247 }
012bcf8d
GS
2248 }
2249 else {
c4d5f83a 2250 *d++ = (char) uv;
a0ed51b3 2251 }
79072805 2252 continue;
02aa26ce 2253
b239daa5 2254 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2255 case 'N':
55eda711 2256 ++s;
423cee85
JH
2257 if (*s == '{') {
2258 char* e = strchr(s, '}');
155aba94 2259 SV *res;
423cee85 2260 STRLEN len;
cfd0369c 2261 const char *str;
fc8cd66c 2262 SV *type;
4e553d73 2263
423cee85 2264 if (!e) {
5777a3f7 2265 yyerror("Missing right brace on \\N{}");
423cee85
JH
2266 e = s - 1;
2267 goto cont_scan;
2268 }
dbc0d4f2
JH
2269 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2270 /* \N{U+...} */
2271 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2272 PERL_SCAN_DISALLOW_PREFIX;
2273 s += 3;
2274 len = e - s;
2275 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2276 if ( e > s && len != (STRLEN)(e - s) ) {
2277 uv = 0xFFFD;
fc8cd66c 2278 }
dbc0d4f2
JH
2279 s = e + 1;
2280 goto NUM_ESCAPE_INSERT;
2281 }
55eda711 2282 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2283 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2284 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2285 res, NULL, SvPVX(type) );
2286 SvREFCNT_dec(type);
f9a63242
JH
2287 if (has_utf8)
2288 sv_utf8_upgrade(res);
cfd0369c 2289 str = SvPV_const(res,len);
1c47067b
JH
2290#ifdef EBCDIC_NEVER_MIND
2291 /* charnames uses pack U and that has been
2292 * recently changed to do the below uni->native
2293 * mapping, so this would be redundant (and wrong,
2294 * the code point would be doubly converted).
2295 * But leave this in just in case the pack U change
2296 * gets revoked, but the semantics is still
2297 * desireable for charnames. --jhi */
cddc7ef4 2298 {
cfd0369c 2299 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2300
2301 if (uv < 0x100) {
89ebb4a3 2302 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2303
2304 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2305 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2306 str = SvPV_const(res, len);
cddc7ef4
JH
2307 }
2308 }
2309#endif
89491803 2310 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2311 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2312 SvCUR_set(sv, d - ostart);
2313 SvPOK_on(sv);
e4f3eed8 2314 *d = '\0';
f08d6ad9 2315 sv_utf8_upgrade(sv);
d2f449dd 2316 /* this just broke our allocation above... */
eb160463 2317 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2318 d = SvPVX(sv) + SvCUR(sv);
89491803 2319 has_utf8 = TRUE;
f08d6ad9 2320 }
eb160463 2321 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2322 const char * const odest = SvPVX_const(sv);
423cee85 2323
8973db79 2324 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2325 d = SvPVX(sv) + (d - odest);
2326 }
e294cc5d
JH
2327#ifdef EBCDIC
2328 if (!dorange)
2329 native_range = FALSE; /* \N{} is guessed to be Unicode */
2330#endif
423cee85
JH
2331 Copy(str, d, len, char);
2332 d += len;
2333 SvREFCNT_dec(res);
2334 cont_scan:
2335 s = e + 1;
2336 }
2337 else
5777a3f7 2338 yyerror("Missing braces on \\N{}");
423cee85
JH
2339 continue;
2340
02aa26ce 2341 /* \c is a control character */
79072805
LW
2342 case 'c':
2343 s++;
961ce445 2344 if (s < send) {
ba210ebe 2345 U8 c = *s++;
c7f1f016
NIS
2346#ifdef EBCDIC
2347 if (isLOWER(c))
2348 c = toUPPER(c);
2349#endif
db42d148 2350 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2351 }
961ce445
RGS
2352 else {
2353 yyerror("Missing control char name in \\c");
2354 }
79072805 2355 continue;
02aa26ce
NT
2356
2357 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2358 case 'b':
db42d148 2359 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2360 break;
2361 case 'n':
db42d148 2362 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2363 break;
2364 case 'r':
db42d148 2365 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2366 break;
2367 case 'f':
db42d148 2368 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2369 break;
2370 case 't':
db42d148 2371 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2372 break;
34a3fe2a 2373 case 'e':
db42d148 2374 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2375 break;
2376 case 'a':
db42d148 2377 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2378 break;
02aa26ce
NT
2379 } /* end switch */
2380
79072805
LW
2381 s++;
2382 continue;
02aa26ce 2383 } /* end if (backslash) */
4c3a8340
TS
2384#ifdef EBCDIC
2385 else
2386 literal_endpoint++;
2387#endif
02aa26ce 2388
f9a63242 2389 default_action:
2b9d42f0
NIS
2390 /* If we started with encoded form, or already know we want it
2391 and then encode the next character */
2392 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2393 STRLEN len = 1;
5f66b61c
AL
2394 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2395 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2396 s += len;
2397 if (need > len) {
2398 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2399 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2400 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2401 }
5f66b61c 2402 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2403 has_utf8 = TRUE;
e294cc5d
JH
2404#ifdef EBCDIC
2405 if (uv > 255 && !dorange)
2406 native_range = FALSE;
2407#endif
2b9d42f0
NIS
2408 }
2409 else {
2410 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2411 }
02aa26ce
NT
2412 } /* while loop to process each character */
2413
2414 /* terminate the string and set up the sv */
79072805 2415 *d = '\0';
95a20fc0 2416 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2417 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2418 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2419
79072805 2420 SvPOK_on(sv);
9f4817db 2421 if (PL_encoding && !has_utf8) {
d0063567
DK
2422 sv_recode_to_utf8(sv, PL_encoding);
2423 if (SvUTF8(sv))
2424 has_utf8 = TRUE;
9f4817db 2425 }
2b9d42f0 2426 if (has_utf8) {
7e2040f0 2427 SvUTF8_on(sv);
2b9d42f0 2428 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2429 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2430 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2431 }
2432 }
79072805 2433
02aa26ce 2434 /* shrink the sv if we allocated more than we used */
79072805 2435 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2436 SvPV_shrink_to_cur(sv);
79072805 2437 }
02aa26ce 2438
9b599b2a 2439 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2440 if (s > PL_bufptr) {
2441 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2442 sv = new_constant(start, s - start,
2443 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2444 sv, NULL,
10edeb5d
JH
2445 (const char *)
2446 (( PL_lex_inwhat == OP_TRANS
2447 ? "tr"
2448 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2449 ? "s"
2450 : "qq"))));
79072805 2451 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2452 } else
8990e307 2453 SvREFCNT_dec(sv);
79072805
LW
2454 return s;
2455}
2456
ffb4593c
NT
2457/* S_intuit_more
2458 * Returns TRUE if there's more to the expression (e.g., a subscript),
2459 * FALSE otherwise.
ffb4593c
NT
2460 *
2461 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2462 *
2463 * ->[ and ->{ return TRUE
2464 * { and [ outside a pattern are always subscripts, so return TRUE
2465 * if we're outside a pattern and it's not { or [, then return FALSE
2466 * if we're in a pattern and the first char is a {
2467 * {4,5} (any digits around the comma) returns FALSE
2468 * if we're in a pattern and the first char is a [
2469 * [] returns FALSE
2470 * [SOMETHING] has a funky algorithm to decide whether it's a
2471 * character class or not. It has to deal with things like
2472 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2473 * anything else returns TRUE
2474 */
2475
9cbb5ea2
GS
2476/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2477
76e3520e 2478STATIC int
cea2e8a9 2479S_intuit_more(pTHX_ register char *s)
79072805 2480{
97aff369 2481 dVAR;
3280af22 2482 if (PL_lex_brackets)
79072805
LW
2483 return TRUE;
2484 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2485 return TRUE;
2486 if (*s != '{' && *s != '[')
2487 return FALSE;
3280af22 2488 if (!PL_lex_inpat)
79072805
LW
2489 return TRUE;
2490
2491 /* In a pattern, so maybe we have {n,m}. */
2492 if (*s == '{') {
2493 s++;
2494 if (!isDIGIT(*s))
2495 return TRUE;
2496 while (isDIGIT(*s))
2497 s++;
2498 if (*s == ',')
2499 s++;
2500 while (isDIGIT(*s))
2501 s++;
2502 if (*s == '}')
2503 return FALSE;
2504 return TRUE;
2505
2506 }
2507
2508 /* On the other hand, maybe we have a character class */
2509
2510 s++;
2511 if (*s == ']' || *s == '^')
2512 return FALSE;
2513 else {
ffb4593c 2514 /* this is terrifying, and it works */
79072805
LW
2515 int weight = 2; /* let's weigh the evidence */
2516 char seen[256];
f27ffc4a 2517 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2518 const char * const send = strchr(s,']');
3280af22 2519 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2520
2521 if (!send) /* has to be an expression */
2522 return TRUE;
2523
2524 Zero(seen,256,char);
2525 if (*s == '$')
2526 weight -= 3;
2527 else if (isDIGIT(*s)) {
2528 if (s[1] != ']') {
2529 if (isDIGIT(s[1]) && s[2] == ']')
2530 weight -= 10;
2531 }
2532 else
2533 weight -= 100;
2534 }
2535 for (; s < send; s++) {
2536 last_un_char = un_char;
2537 un_char = (unsigned char)*s;
2538 switch (*s) {
2539 case '@':
2540 case '&':
2541 case '$':
2542 weight -= seen[un_char] * 10;
7e2040f0 2543 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2544 int len;
8903cb82 2545 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2546 len = (int)strlen(tmpbuf);
2547 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2548 weight -= 100;
2549 else
2550 weight -= 10;
2551 }
2552 else if (*s == '$' && s[1] &&
93a17b20
LW
2553 strchr("[#!%*<>()-=",s[1])) {
2554 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2555 weight -= 10;
2556 else
2557 weight -= 1;
2558 }
2559 break;
2560 case '\\':
2561 un_char = 254;
2562 if (s[1]) {
93a17b20 2563 if (strchr("wds]",s[1]))
79072805 2564 weight += 100;
10edeb5d 2565 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2566 weight += 1;
93a17b20 2567 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2568 weight += 40;
2569 else if (isDIGIT(s[1])) {
2570 weight += 40;
2571 while (s[1] && isDIGIT(s[1]))
2572 s++;
2573 }
2574 }
2575 else
2576 weight += 100;
2577 break;
2578 case '-':
2579 if (s[1] == '\\')
2580 weight += 50;
93a17b20 2581 if (strchr("aA01! ",last_un_char))
79072805 2582 weight += 30;
93a17b20 2583 if (strchr("zZ79~",s[1]))
79072805 2584 weight += 30;
f27ffc4a
GS
2585 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2586 weight -= 5; /* cope with negative subscript */
79072805
LW
2587 break;
2588 default:
3792a11b
NC
2589 if (!isALNUM(last_un_char)
2590 && !(last_un_char == '$' || last_un_char == '@'
2591 || last_un_char == '&')
2592 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2593 char *d = tmpbuf;
2594 while (isALPHA(*s))
2595 *d++ = *s++;
2596 *d = '\0';
5458a98a 2597 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2598 weight -= 150;
2599 }
2600 if (un_char == last_un_char + 1)
2601 weight += 5;
2602 weight -= seen[un_char];
2603 break;
2604 }
2605 seen[un_char]++;
2606 }
2607 if (weight >= 0) /* probably a character class */
2608 return FALSE;
2609 }
2610
2611 return TRUE;
2612}
ffed7fef 2613
ffb4593c
NT
2614/*
2615 * S_intuit_method
2616 *
2617 * Does all the checking to disambiguate
2618 * foo bar
2619 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2620 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2621 *
2622 * First argument is the stuff after the first token, e.g. "bar".
2623 *
2624 * Not a method if bar is a filehandle.
2625 * Not a method if foo is a subroutine prototyped to take a filehandle.
2626 * Not a method if it's really "Foo $bar"
2627 * Method if it's "foo $bar"
2628 * Not a method if it's really "print foo $bar"
2629 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2630 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2631 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2632 * =>
2633 */
2634
76e3520e 2635STATIC int
62d55b22 2636S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2637{
97aff369 2638 dVAR;
a0d0e21e 2639 char *s = start + (*start == '$');
3280af22 2640 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2641 STRLEN len;
2642 GV* indirgv;
5db06880
NC
2643#ifdef PERL_MAD
2644 int soff;
2645#endif
a0d0e21e
LW
2646
2647 if (gv) {
62d55b22 2648 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2649 return 0;
62d55b22
NC
2650 if (cv) {
2651 if (SvPOK(cv)) {
2652 const char *proto = SvPVX_const(cv);
2653 if (proto) {
2654 if (*proto == ';')
2655 proto++;
2656 if (*proto == '*')
2657 return 0;
2658 }
b6c543e3
IZ
2659 }
2660 } else
c35e046a 2661 gv = NULL;
a0d0e21e 2662 }
8903cb82 2663 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2664 /* start is the beginning of the possible filehandle/object,
2665 * and s is the end of it
2666 * tmpbuf is a copy of it
2667 */
2668
a0d0e21e 2669 if (*start == '$') {
3ef1310e
RGS
2670 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2671 isUPPER(*PL_tokenbuf))
a0d0e21e 2672 return 0;
5db06880
NC
2673#ifdef PERL_MAD
2674 len = start - SvPVX(PL_linestr);
2675#endif
29595ff2 2676 s = PEEKSPACE(s);
f0092767 2677#ifdef PERL_MAD
5db06880
NC
2678 start = SvPVX(PL_linestr) + len;
2679#endif
3280af22
NIS
2680 PL_bufptr = start;
2681 PL_expect = XREF;
a0d0e21e
LW
2682 return *s == '(' ? FUNCMETH : METHOD;
2683 }
5458a98a 2684 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2685 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2686 len -= 2;
2687 tmpbuf[len] = '\0';
5db06880
NC
2688#ifdef PERL_MAD
2689 soff = s - SvPVX(PL_linestr);
2690#endif
c3e0f903
GS
2691 goto bare_package;
2692 }
90e5519e 2693 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2694 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2695 return 0;
2696 /* filehandle or package name makes it a method */
da51bb9b 2697 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2698#ifdef PERL_MAD
2699 soff = s - SvPVX(PL_linestr);
2700#endif
29595ff2 2701 s = PEEKSPACE(s);
3280af22 2702 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2703 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2704 bare_package:
cd81e915 2705 start_force(PL_curforce);
9ded7720 2706 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2707 newSVpvn(tmpbuf,len));
9ded7720 2708 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2709 if (PL_madskills)
2710 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2711 PL_expect = XTERM;
a0d0e21e 2712 force_next(WORD);
3280af22 2713 PL_bufptr = s;
5db06880
NC
2714#ifdef PERL_MAD
2715 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2716#endif
a0d0e21e
LW
2717 return *s == '(' ? FUNCMETH : METHOD;
2718 }
2719 }
2720 return 0;
2721}
2722
ffb4593c
NT
2723/*
2724 * S_incl_perldb
2725 * Return a string of Perl code to load the debugger. If PERL5DB
2726 * is set, it will return the contents of that, otherwise a
2727 * compile-time require of perl5db.pl.
2728 */
2729
bfed75c6 2730STATIC const char*
cea2e8a9 2731S_incl_perldb(pTHX)
a0d0e21e 2732{
97aff369 2733 dVAR;
3280af22 2734 if (PL_perldb) {
9d4ba2ae 2735 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2736
2737 if (pdb)
2738 return pdb;
93189314 2739 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2740 return "BEGIN { require 'perl5db.pl' }";
2741 }
2742 return "";
2743}
2744
2745
16d20bd9 2746/* Encoded script support. filter_add() effectively inserts a
4e553d73 2747 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2748 * Note that the filter function only applies to the current source file
2749 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2750 *
2751 * The datasv parameter (which may be NULL) can be used to pass
2752 * private data to this instance of the filter. The filter function
2753 * can recover the SV using the FILTER_DATA macro and use it to
2754 * store private buffers and state information.
2755 *
2756 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2757 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2758 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2759 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2760 * private use must be set using malloc'd pointers.
2761 */
16d20bd9
AD
2762
2763SV *
864dbfa3 2764Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2765{
97aff369 2766 dVAR;
f4c556ac 2767 if (!funcp)
a0714e2c 2768 return NULL;
f4c556ac 2769
3280af22
NIS
2770 if (!PL_rsfp_filters)
2771 PL_rsfp_filters = newAV();
16d20bd9 2772 if (!datasv)
561b68a9 2773 datasv = newSV(0);
862a34c6 2774 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2775 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2776 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2777 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2778 FPTR2DPTR(void *, IoANY(datasv)),
2779 SvPV_nolen(datasv)));
3280af22
NIS
2780 av_unshift(PL_rsfp_filters, 1);
2781 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2782 return(datasv);
2783}
4e553d73 2784
16d20bd9
AD
2785
2786/* Delete most recently added instance of this filter function. */
a0d0e21e 2787void
864dbfa3 2788Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2789{
97aff369 2790 dVAR;
e0c19803 2791 SV *datasv;
24801a4b 2792
33073adb 2793#ifdef DEBUGGING
55662e27
JH
2794 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2795 FPTR2DPTR(void*, funcp)));
33073adb 2796#endif
3280af22 2797 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2798 return;
2799 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2800 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2801 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2802 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2803 IoANY(datasv) = (void *)NULL;
3280af22 2804 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2805
16d20bd9
AD
2806 return;
2807 }
2808 /* we need to search for the correct entry and clear it */
cea2e8a9 2809 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2810}
2811
2812
1de9afcd
RGS
2813/* Invoke the idxth filter function for the current rsfp. */
2814/* maxlen 0 = read one text line */
16d20bd9 2815I32
864dbfa3 2816Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2817{
97aff369 2818 dVAR;
16d20bd9
AD
2819 filter_t funcp;
2820 SV *datasv = NULL;
f482118e
NC
2821 /* This API is bad. It should have been using unsigned int for maxlen.
2822 Not sure if we want to change the API, but if not we should sanity
2823 check the value here. */
39cd7a59
NC
2824 const unsigned int correct_length
2825 = maxlen < 0 ?
2826#ifdef PERL_MICRO
2827 0x7FFFFFFF
2828#else
2829 INT_MAX
2830#endif
2831 : maxlen;
e50aee73 2832
3280af22 2833 if (!PL_rsfp_filters)
16d20bd9 2834 return -1;
1de9afcd 2835 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2836 /* Provide a default input filter to make life easy. */
2837 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2838 DEBUG_P(PerlIO_printf(Perl_debug_log,
2839 "filter_read %d: from rsfp\n", idx));
f482118e 2840 if (correct_length) {
16d20bd9
AD
2841 /* Want a block */
2842 int len ;
f54cb97a 2843 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2844
2845 /* ensure buf_sv is large enough */
f482118e
NC
2846 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2847 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2848 correct_length)) <= 0) {
3280af22 2849 if (PerlIO_error(PL_rsfp))
37120919
AD
2850 return -1; /* error */
2851 else
2852 return 0 ; /* end of file */
2853 }
16d20bd9
AD
2854 SvCUR_set(buf_sv, old_len + len) ;
2855 } else {
2856 /* Want a line */
3280af22
NIS
2857 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2858 if (PerlIO_error(PL_rsfp))
37120919
AD
2859 return -1; /* error */
2860 else
2861 return 0 ; /* end of file */
2862 }
16d20bd9
AD
2863 }
2864 return SvCUR(buf_sv);
2865 }
2866 /* Skip this filter slot if filter has been deleted */
1de9afcd 2867 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2868 DEBUG_P(PerlIO_printf(Perl_debug_log,
2869 "filter_read %d: skipped (filter deleted)\n",
2870 idx));
f482118e 2871 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2872 }
2873 /* Get function pointer hidden within datasv */
8141890a 2874 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2875 DEBUG_P(PerlIO_printf(Perl_debug_log,
2876 "filter_read %d: via function %p (%s)\n",
ca0270c4 2877 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2878 /* Call function. The function is expected to */
2879 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2880 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2881 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2882}
2883
76e3520e 2884STATIC char *
cea2e8a9 2885S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2886{
97aff369 2887 dVAR;
c39cd008 2888#ifdef PERL_CR_FILTER
3280af22 2889 if (!PL_rsfp_filters) {
c39cd008 2890 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2891 }
2892#endif
3280af22 2893 if (PL_rsfp_filters) {
55497cff 2894 if (!append)
2895 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2896 if (FILTER_READ(0, sv, 0) > 0)
2897 return ( SvPVX(sv) ) ;
2898 else
bd61b366 2899 return NULL ;
16d20bd9 2900 }
9d116dd7 2901 else
fd049845 2902 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2903}
2904
01ec43d0 2905STATIC HV *
7fc63493 2906S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2907{
97aff369 2908 dVAR;
def3634b
GS
2909 GV *gv;
2910
01ec43d0 2911 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2912 return PL_curstash;
2913
2914 if (len > 2 &&
2915 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2916 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2917 {
2918 return GvHV(gv); /* Foo:: */
def3634b
GS
2919 }
2920
2921 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2922 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2923 if (gv && GvCV(gv)) {
2924 SV * const sv = cv_const_sv(GvCV(gv));
2925 if (sv)
83003860 2926 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2927 }
2928
da51bb9b 2929 return gv_stashpv(pkgname, 0);
def3634b 2930}
a0d0e21e 2931
e3f73d4e
RGS
2932/*
2933 * S_readpipe_override
2934 * Check whether readpipe() is overriden, and generates the appropriate
2935 * optree, provided sublex_start() is called afterwards.
2936 */
2937STATIC void
1d51329b 2938S_readpipe_override(pTHX)
e3f73d4e
RGS
2939{
2940 GV **gvp;
2941 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2942 yylval.ival = OP_BACKTICK;
2943 if ((gv_readpipe
2944 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2945 ||
2946 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2947 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2948 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2949 {
2950 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2951 append_elem(OP_LIST,
2952 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2953 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2954 }
2955 else {
2956 set_csh();
2957 }
2958}
2959
5db06880
NC
2960#ifdef PERL_MAD
2961 /*
2962 * Perl_madlex
2963 * The intent of this yylex wrapper is to minimize the changes to the
2964 * tokener when we aren't interested in collecting madprops. It remains
2965 * to be seen how successful this strategy will be...
2966 */
2967
2968int
2969Perl_madlex(pTHX)
2970{
2971 int optype;
2972 char *s = PL_bufptr;
2973
cd81e915
NC
2974 /* make sure PL_thiswhite is initialized */
2975 PL_thiswhite = 0;
2976 PL_thismad = 0;
5db06880 2977
cd81e915 2978 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2979 if (PL_pending_ident)
2980 return S_pending_ident(aTHX);
2981
2982 /* previous token ate up our whitespace? */
cd81e915
NC
2983 if (!PL_lasttoke && PL_nextwhite) {
2984 PL_thiswhite = PL_nextwhite;
2985 PL_nextwhite = 0;
5db06880
NC
2986 }
2987
2988 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2989 PL_realtokenstart = -1;
2990 PL_thistoken = 0;
5db06880
NC
2991 optype = yylex();
2992 s = PL_bufptr;
cd81e915 2993 assert(PL_curforce < 0);
5db06880 2994
cd81e915
NC
2995 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2996 if (!PL_thistoken) {
2997 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2998 PL_thistoken = newSVpvs("");
5db06880 2999 else {
c35e046a 3000 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3001 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3002 }
3003 }
cd81e915
NC
3004 if (PL_thismad) /* install head */
3005 CURMAD('X', PL_thistoken);
5db06880
NC
3006 }
3007
3008 /* last whitespace of a sublex? */
cd81e915
NC
3009 if (optype == ')' && PL_endwhite) {
3010 CURMAD('X', PL_endwhite);
5db06880
NC
3011 }
3012
cd81e915 3013 if (!PL_thismad) {
5db06880
NC
3014
3015 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3016 if (!PL_thiswhite && !PL_endwhite && !optype) {
3017 sv_free(PL_thistoken);
3018 PL_thistoken = 0;
5db06880
NC
3019 return 0;
3020 }
3021
3022 /* put off final whitespace till peg */
3023 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3024 PL_nextwhite = PL_thiswhite;
3025 PL_thiswhite = 0;
5db06880 3026 }
cd81e915
NC
3027 else if (PL_thisopen) {
3028 CURMAD('q', PL_thisopen);
3029 if (PL_thistoken)
3030 sv_free(PL_thistoken);
3031 PL_thistoken = 0;
5db06880
NC
3032 }
3033 else {
3034 /* Store actual token text as madprop X */
cd81e915 3035 CURMAD('X', PL_thistoken);
5db06880
NC
3036 }
3037
cd81e915 3038 if (PL_thiswhite) {
5db06880 3039 /* add preceding whitespace as madprop _ */
cd81e915 3040 CURMAD('_', PL_thiswhite);
5db06880
NC
3041 }
3042
cd81e915 3043 if (PL_thisstuff) {
5db06880 3044 /* add quoted material as madprop = */
cd81e915 3045 CURMAD('=', PL_thisstuff);
5db06880
NC
3046 }
3047
cd81e915 3048 if (PL_thisclose) {
5db06880 3049 /* add terminating quote as madprop Q */
cd81e915 3050 CURMAD('Q', PL_thisclose);
5db06880
NC
3051 }
3052 }
3053
3054 /* special processing based on optype */
3055
3056 switch (optype) {
3057
3058 /* opval doesn't need a TOKEN since it can already store mp */
3059 case WORD:
3060 case METHOD:
3061 case FUNCMETH:
3062 case THING:
3063 case PMFUNC:
3064 case PRIVATEREF:
3065 case FUNC0SUB:
3066 case UNIOPSUB:
3067 case LSTOPSUB:
3068 if (yylval.opval)
cd81e915
NC
3069 append_madprops(PL_thismad, yylval.opval, 0);
3070 PL_thismad = 0;
5db06880
NC
3071 return optype;
3072
3073 /* fake EOF */
3074 case 0:
3075 optype = PEG;
cd81e915
NC
3076 if (PL_endwhite) {
3077 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3078 PL_endwhite = 0;
5db06880
NC
3079 }
3080 break;
3081
3082 case ']':
3083 case '}':
cd81e915 3084 if (PL_faketokens)
5db06880
NC
3085 break;
3086 /* remember any fake bracket that lexer is about to discard */
3087 if (PL_lex_brackets == 1 &&
3088 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3089 {
3090 s = PL_bufptr;
3091 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3092 s++;
3093 if (*s == '}') {
cd81e915
NC
3094 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3095 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3096 PL_thiswhite = 0;
5db06880
NC
3097 PL_bufptr = s - 1;
3098 break; /* don't bother looking for trailing comment */
3099 }
3100 else
3101 s = PL_bufptr;
3102 }
3103 if (optype == ']')
3104 break;
3105 /* FALLTHROUGH */
3106
3107 /* attach a trailing comment to its statement instead of next token */
3108 case ';':
cd81e915 3109 if (PL_faketokens)
5db06880
NC
3110 break;
3111 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3112 s = PL_bufptr;
3113 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3114 s++;
3115 if (*s == '\n' || *s == '#') {
3116 while (s < PL_bufend && *s != '\n')
3117 s++;
3118 if (s < PL_bufend)
3119 s++;
cd81e915
NC
3120 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3121 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3122 PL_thiswhite = 0;
5db06880
NC
3123 PL_bufptr = s;
3124 }
3125 }
3126 break;
3127
3128 /* pval */
3129 case LABEL:
3130 break;
3131
3132 /* ival */
3133 default:
3134 break;
3135
3136 }
3137
3138 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3139 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3140 PL_thismad = 0;
5db06880
NC
3141 return optype;
3142}
3143#endif
3144
468aa647 3145STATIC char *
cc6ed77d 3146S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3147 dVAR;
468aa647
RGS
3148 if (PL_expect != XSTATE)
3149 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3150 is_use ? "use" : "no"));
29595ff2 3151 s = SKIPSPACE1(s);
468aa647
RGS
3152 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3153 s = force_version(s, TRUE);
29595ff2 3154 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3155 start_force(PL_curforce);
9ded7720 3156 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3157 force_next(WORD);
3158 }
3159 else if (*s == 'v') {
3160 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3161 s = force_version(s, FALSE);
3162 }
3163 }
3164 else {
3165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3166 s = force_version(s, FALSE);
3167 }
3168 yylval.ival = is_use;
3169 return s;
3170}
748a9306 3171#ifdef DEBUGGING
27da23d5 3172 static const char* const exp_name[] =
09bef843 3173 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3174 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3175 };
748a9306 3176#endif
463ee0b2 3177
02aa26ce
NT
3178/*
3179 yylex
3180
3181 Works out what to call the token just pulled out of the input
3182 stream. The yacc parser takes care of taking the ops we return and
3183 stitching them into a tree.
3184
3185 Returns:
3186 PRIVATEREF
3187
3188 Structure:
3189 if read an identifier
3190 if we're in a my declaration
3191 croak if they tried to say my($foo::bar)
3192 build the ops for a my() declaration
3193 if it's an access to a my() variable
3194 are we in a sort block?
3195 croak if my($a); $a <=> $b
3196 build ops for access to a my() variable
3197 if in a dq string, and they've said @foo and we can't find @foo
3198 croak
3199 build ops for a bareword
3200 if we already built the token before, use it.
3201*/
3202
20141f0e 3203
dba4d153
JH
3204#ifdef __SC__
3205#pragma segment Perl_yylex
3206#endif
dba4d153 3207int
dba4d153 3208Perl_yylex(pTHX)
20141f0e 3209{
97aff369 3210 dVAR;
3afc138a 3211 register char *s = PL_bufptr;
378cc40b 3212 register char *d;
463ee0b2 3213 STRLEN len;
aa7440fb 3214 bool bof = FALSE;
a687059c 3215
10edeb5d
JH
3216 /* orig_keyword, gvp, and gv are initialized here because
3217 * jump to the label just_a_word_zero can bypass their
3218 * initialization later. */
3219 I32 orig_keyword = 0;
3220 GV *gv = NULL;
3221 GV **gvp = NULL;
3222
bbf60fe6 3223 DEBUG_T( {
396482e1 3224 SV* tmp = newSVpvs("");
b6007c36
DM
3225 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3226 (IV)CopLINE(PL_curcop),
3227 lex_state_names[PL_lex_state],
3228 exp_name[PL_expect],
3229 pv_display(tmp, s, strlen(s), 0, 60));
3230 SvREFCNT_dec(tmp);
bbf60fe6 3231 } );
02aa26ce 3232 /* check if there's an identifier for us to look at */
ba979b31 3233 if (PL_pending_ident)
bbf60fe6 3234 return REPORT(S_pending_ident(aTHX));
bbce6d69 3235
02aa26ce
NT
3236 /* no identifier pending identification */
3237
3280af22 3238 switch (PL_lex_state) {
79072805
LW
3239#ifdef COMMENTARY
3240 case LEX_NORMAL: /* Some compilers will produce faster */
3241 case LEX_INTERPNORMAL: /* code if we comment these out. */
3242 break;
3243#endif
3244
09bef843 3245 /* when we've already built the next token, just pull it out of the queue */
79072805 3246 case LEX_KNOWNEXT:
5db06880
NC
3247#ifdef PERL_MAD
3248 PL_lasttoke--;
3249 yylval = PL_nexttoke[PL_lasttoke].next_val;
3250 if (PL_madskills) {
cd81e915 3251 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3252 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3253 if (PL_thismad && PL_thismad->mad_key == '_') {
3254 PL_thiswhite = (SV*)PL_thismad->mad_val;
3255 PL_thismad->mad_val = 0;
3256 mad_free(PL_thismad);
3257 PL_thismad = 0;
5db06880
NC
3258 }
3259 }
3260 if (!PL_lasttoke) {
3261 PL_lex_state = PL_lex_defer;
3262 PL_expect = PL_lex_expect;
3263 PL_lex_defer = LEX_NORMAL;
3264 if (!PL_nexttoke[PL_lasttoke].next_type)
3265 return yylex();
3266 }
3267#else
3280af22 3268 PL_nexttoke--;
5db06880 3269 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3270 if (!PL_nexttoke) {
3271 PL_lex_state = PL_lex_defer;
3272 PL_expect = PL_lex_expect;
3273 PL_lex_defer = LEX_NORMAL;
463ee0b2 3274 }
5db06880
NC
3275#endif
3276#ifdef PERL_MAD
3277 /* FIXME - can these be merged? */
3278 return(PL_nexttoke[PL_lasttoke].next_type);
3279#else
bbf60fe6 3280 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3281#endif
79072805 3282
02aa26ce 3283 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3284 when we get here, PL_bufptr is at the \
02aa26ce 3285 */
79072805
LW
3286 case LEX_INTERPCASEMOD:
3287#ifdef DEBUGGING
3280af22 3288 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3289 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3290#endif
02aa26ce 3291 /* handle \E or end of string */
3280af22 3292 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3293 /* if at a \E */
3280af22 3294 if (PL_lex_casemods) {
f54cb97a 3295 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3296 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3297
3792a11b
NC
3298 if (PL_bufptr != PL_bufend
3299 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3300 PL_bufptr += 2;
3301 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3302#ifdef PERL_MAD
3303 if (PL_madskills)
6b29d1f5 3304 PL_thistoken = newSVpvs("\\E");
5db06880 3305#endif
a0d0e21e 3306 }
bbf60fe6 3307 return REPORT(')');
79072805 3308 }
5db06880
NC
3309#ifdef PERL_MAD
3310 while (PL_bufptr != PL_bufend &&
3311 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3312 if (!PL_thiswhite)
6b29d1f5 3313 PL_thiswhite = newSVpvs("");
cd81e915 3314 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3315 PL_bufptr += 2;
3316 }
3317#else
3280af22
NIS
3318 if (PL_bufptr != PL_bufend)
3319 PL_bufptr += 2;
5db06880 3320#endif
3280af22 3321 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3322 return yylex();
79072805
LW
3323 }
3324 else {
607df283 3325 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3326 "### Saw case modifier\n"); });
3280af22 3327 s = PL_bufptr + 1;
6e909404 3328 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3329#ifdef PERL_MAD
cd81e915 3330 if (!PL_thiswhite)
6b29d1f5 3331 PL_thiswhite = newSVpvs("");
cd81e915 3332 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3333#endif
89122651 3334 PL_bufptr = s + 3;
6e909404
JH
3335 PL_lex_state = LEX_INTERPCONCAT;
3336 return yylex();
a0d0e21e 3337 }
6e909404 3338 else {
90771dc0 3339 I32 tmp;
5db06880
NC
3340 if (!PL_madskills) /* when just compiling don't need correct */
3341 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3342 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3343 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3344 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3345 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3346 return REPORT(')');
6e909404
JH
3347 }
3348 if (PL_lex_casemods > 10)
3349 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3350 PL_lex_casestack[PL_lex_casemods++] = *s;
3351 PL_lex_casestack[PL_lex_casemods] = '\0';
3352 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3353 start_force(PL_curforce);
9ded7720 3354 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3355 force_next('(');
cd81e915 3356 start_force(PL_curforce);
6e909404 3357 if (*s == 'l')
9ded7720 3358 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3359 else if (*s == 'u')
9ded7720 3360 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3361 else if (*s == 'L')
9ded7720 3362 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3363 else if (*s == 'U')
9ded7720 3364 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3365 else if (*s == 'Q')
9ded7720 3366 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3367 else
3368 Perl_croak(aTHX_ "panic: yylex");
5db06880 3369 if (PL_madskills) {
6b29d1f5 3370 SV* const tmpsv = newSVpvs("");
5db06880
NC
3371 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3372 curmad('_', tmpsv);
3373 }
6e909404 3374 PL_bufptr = s + 1;
a0d0e21e 3375 }
79072805 3376 force_next(FUNC);
3280af22
NIS
3377 if (PL_lex_starts) {
3378 s = PL_bufptr;
3379 PL_lex_starts = 0;
5db06880
NC
3380#ifdef PERL_MAD
3381 if (PL_madskills) {
cd81e915
NC
3382 if (PL_thistoken)
3383 sv_free(PL_thistoken);
6b29d1f5 3384 PL_thistoken = newSVpvs("");
5db06880
NC
3385 }
3386#endif
131b3ad0
DM
3387 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3388 if (PL_lex_casemods == 1 && PL_lex_inpat)
3389 OPERATOR(',');
3390 else
3391 Aop(OP_CONCAT);
79072805
LW
3392 }
3393 else
cea2e8a9 3394 return yylex();
79072805
LW
3395 }
3396
55497cff 3397 case LEX_INTERPPUSH:
bbf60fe6 3398 return REPORT(sublex_push());
55497cff 3399
79072805 3400 case LEX_INTERPSTART:
3280af22 3401 if (PL_bufptr == PL_bufend)
bbf60fe6 3402 return REPORT(sublex_done());
607df283 3403 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3404 "### Interpolated variable\n"); });
3280af22
NIS
3405 PL_expect = XTERM;
3406 PL_lex_dojoin = (*PL_bufptr == '@');
3407 PL_lex_state = LEX_INTERPNORMAL;
3408 if (PL_lex_dojoin) {
cd81e915 3409 start_force(PL_curforce);
9ded7720 3410 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3411 force_next(',');
cd81e915 3412 start_force(PL_curforce);
a0d0e21e 3413 force_ident("\"", '$');
cd81e915 3414 start_force(PL_curforce);
9ded7720 3415 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3416 force_next('$');
cd81e915 3417 start_force(PL_curforce);
9ded7720 3418 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3419 force_next('(');
cd81e915 3420 start_force(PL_curforce);
9ded7720 3421 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3422 force_next(FUNC);
3423 }
3280af22
NIS
3424 if (PL_lex_starts++) {
3425 s = PL_bufptr;
5db06880
NC
3426#ifdef PERL_MAD
3427 if (PL_madskills) {
cd81e915
NC
3428 if (PL_thistoken)
3429 sv_free(PL_thistoken);
6b29d1f5 3430 PL_thistoken = newSVpvs("");
5db06880
NC
3431 }
3432#endif
131b3ad0
DM
3433 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3434 if (!PL_lex_casemods && PL_lex_inpat)
3435 OPERATOR(',');
3436 else
3437 Aop(OP_CONCAT);
79072805 3438 }
cea2e8a9 3439 return yylex();
79072805
LW
3440
3441 case LEX_INTERPENDMAYBE:
3280af22
NIS
3442 if (intuit_more(PL_bufptr)) {
3443 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3444 break;
3445 }
3446 /* FALL THROUGH */
3447
3448 case LEX_INTERPEND:
3280af22
NIS
3449 if (PL_lex_dojoin) {
3450 PL_lex_dojoin = FALSE;
3451 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3452#ifdef PERL_MAD
3453 if (PL_madskills) {
cd81e915
NC
3454 if (PL_thistoken)
3455 sv_free(PL_thistoken);
6b29d1f5 3456 PL_thistoken = newSVpvs("");
5db06880
NC
3457 }
3458#endif
bbf60fe6 3459 return REPORT(')');
79072805 3460 }
43a16006 3461 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3462 && SvEVALED(PL_lex_repl))
43a16006 3463 {
e9fa98b2 3464 if (PL_bufptr != PL_bufend)
cea2e8a9 3465 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3466 PL_lex_repl = NULL;
e9fa98b2 3467 }
79072805
LW
3468 /* FALLTHROUGH */
3469 case LEX_INTERPCONCAT:
3470#ifdef DEBUGGING
3280af22 3471 if (PL_lex_brackets)
cea2e8a9 3472 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3473#endif
3280af22 3474 if (PL_bufptr == PL_bufend)
bbf60fe6 3475 return REPORT(sublex_done());
79072805 3476
3280af22
NIS
3477 if (SvIVX(PL_linestr) == '\'') {
3478 SV *sv = newSVsv(PL_linestr);
3479 if (!PL_lex_inpat)
76e3520e 3480 sv = tokeq(sv);
3280af22 3481 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3482 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3483 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3484 s = PL_bufend;
79072805
LW
3485 }
3486 else {
3280af22 3487 s = scan_const(PL_bufptr);
79072805 3488 if (*s == '\\')
3280af22 3489 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3490 else
3280af22 3491 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3492 }
3493
3280af22 3494 if (s != PL_bufptr) {
cd81e915 3495 start_force(PL_curforce);
5db06880
NC
3496 if (PL_madskills) {
3497 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3498 }
9ded7720 3499 NEXTVAL_NEXTTOKE = yylval;
3280af22 3500 PL_expect = XTERM;
79072805 3501 force_next(THING);
131b3ad0 3502 if (PL_lex_starts++) {
5db06880
NC
3503#ifdef PERL_MAD
3504 if (PL_madskills) {
cd81e915
NC
3505 if (PL_thistoken)
3506 sv_free(PL_thistoken);
6b29d1f5 3507 PL_thistoken = newSVpvs("");
5db06880
NC
3508 }
3509#endif
131b3ad0
DM
3510 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3511 if (!PL_lex_casemods && PL_lex_inpat)
3512 OPERATOR(',');
3513 else
3514 Aop(OP_CONCAT);
3515 }
79072805 3516 else {
3280af22 3517 PL_bufptr = s;
cea2e8a9 3518 return yylex();
79072805
LW
3519 }
3520 }
3521
cea2e8a9 3522 return yylex();
a0d0e21e 3523 case LEX_FORMLINE:
3280af22
NIS
3524 PL_lex_state = LEX_NORMAL;
3525 s = scan_formline(PL_bufptr);
3526 if (!PL_lex_formbrack)
a0d0e21e
LW
3527 goto rightbracket;
3528 OPERATOR(';');
79072805
LW
3529 }
3530
3280af22
NIS
3531 s = PL_bufptr;
3532 PL_oldoldbufptr = PL_oldbufptr;
3533 PL_oldbufptr = s;
463ee0b2
LW
3534
3535 retry:
5db06880 3536#ifdef PERL_MAD
cd81e915
NC
3537 if (PL_thistoken) {
3538 sv_free(PL_thistoken);
3539 PL_thistoken = 0;
5db06880 3540 }
cd81e915 3541 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3542#endif
378cc40b
LW
3543 switch (*s) {
3544 default:
7e2040f0 3545 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3546 goto keylookup;
cea2e8a9 3547