This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Neither gv_fetchpvn_flags() nor hv_fetch() need a NUL terminated
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
52
53#ifdef PERL_MAD
54# define PL_endwhite (PL_parser->endwhite)
55# define PL_faketokens (PL_parser->faketokens)
56# define PL_lasttoke (PL_parser->lasttoke)
57# define PL_nextwhite (PL_parser->nextwhite)
58# define PL_realtokenstart (PL_parser->realtokenstart)
59# define PL_skipwhite (PL_parser->skipwhite)
60# define PL_thisclose (PL_parser->thisclose)
61# define PL_thismad (PL_parser->thismad)
62# define PL_thisopen (PL_parser->thisopen)
63# define PL_thisstuff (PL_parser->thisstuff)
64# define PL_thistoken (PL_parser->thistoken)
65# define PL_thiswhite (PL_parser->thiswhite)
66#endif
67
3cbf51f5
DM
68static int
69S_pending_ident(pTHX);
199e78b7 70
0bd48802 71static const char ident_too_long[] = "Identifier too long";
c445ea15 72static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 73
acfe0abc 74static void restore_rsfp(pTHX_ void *f);
6e3aabd6 75#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
76static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 78#endif
51371543 79
29595ff2 80#ifdef PERL_MAD
29595ff2 81# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 82# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 83#else
5db06880 84# define CURMAD(slot,sv)
9ded7720 85# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
86#endif
87
9059aa12
LW
88#define XFAKEBRACK 128
89#define XENUMMASK 127
90
39e02b42
JH
91#ifdef USE_UTF8_SCRIPTS
92# define UTF (!IN_BYTES)
2b9d42f0 93#else
746b446a 94# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 95#endif
a0ed51b3 96
61f0cdd9 97/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
98 * 1999-02-27 mjd-perl-patch@plover.com */
99#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
100
bf4acbe4
GS
101/* On MacOS, respect nonbreaking spaces */
102#ifdef MACOS_TRADITIONAL
103#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
104#else
105#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
106#endif
107
ffb4593c
NT
108/* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
79072805
LW
110 * can get by with a single comparison (if the compiler is smart enough).
111 */
112
fb73857a
PP
113/* #define LEX_NOTPARSING 11 is done in perl.h. */
114
b6007c36
DM
115#define LEX_NORMAL 10 /* normal code (ie not within "...") */
116#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119#define LEX_INTERPSTART 6 /* expecting the start of a $var */
120
121 /* at end of code, eg "$x" followed by: */
122#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
124
125#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127#define LEX_INTERPCONST 2 /* NOT USED */
128#define LEX_FORMLINE 1 /* expecting a format line */
129#define LEX_KNOWNEXT 0 /* next token known; just return it */
130
79072805 131
bbf60fe6 132#ifdef DEBUGGING
27da23d5 133static const char* const lex_state_names[] = {
bbf60fe6
DM
134 "KNOWNEXT",
135 "FORMLINE",
136 "INTERPCONST",
137 "INTERPCONCAT",
138 "INTERPENDMAYBE",
139 "INTERPEND",
140 "INTERPSTART",
141 "INTERPPUSH",
142 "INTERPCASEMOD",
143 "INTERPNORMAL",
144 "NORMAL"
145};
146#endif
147
79072805
LW
148#ifdef ff_next
149#undef ff_next
d48672a2
LW
150#endif
151
79072805 152#include "keywords.h"
fe14fcc3 153
ffb4593c
NT
154/* CLINE is a macro that ensures PL_copline has a sane value */
155
ae986130
LW
156#ifdef CLINE
157#undef CLINE
158#endif
57843af0 159#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 160
5db06880 161#ifdef PERL_MAD
29595ff2
NC
162# define SKIPSPACE0(s) skipspace0(s)
163# define SKIPSPACE1(s) skipspace1(s)
164# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165# define PEEKSPACE(s) skipspace2(s,0)
166#else
167# define SKIPSPACE0(s) skipspace(s)
168# define SKIPSPACE1(s) skipspace(s)
169# define SKIPSPACE2(s,tsv) skipspace(s)
170# define PEEKSPACE(s) skipspace(s)
171#endif
172
ffb4593c
NT
173/*
174 * Convenience functions to return different tokens and prime the
9cbb5ea2 175 * lexer for the next token. They all take an argument.
ffb4593c
NT
176 *
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
2d2e263d 187 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
188 * BOop : bitwise or or xor
189 * BAop : bitwise and
190 * SHop : shift operator
191 * PWop : power operator
9cbb5ea2 192 * PMop : pattern-matching operator
ffb4593c
NT
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
e5edeb50 196 * Rop : relational operator <= != gt
ffb4593c
NT
197 *
198 * Also see LOP and lop() below.
199 */
200
998054bd 201#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 202# define REPORT(retval) tokereport((I32)retval)
998054bd 203#else
bbf60fe6 204# define REPORT(retval) (retval)
998054bd
SC
205#endif
206
bbf60fe6
DM
207#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 227
a687059c
LW
228/* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
a687059c 232 */
376fcdbf
AL
233#define UNI2(f,x) { \
234 yylval.ival = f; \
235 PL_expect = x; \
236 PL_bufptr = s; \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
239 if (*s == '(') \
240 return REPORT( (int)FUNC1 ); \
29595ff2 241 s = PEEKSPACE(s); \
376fcdbf
AL
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
243 }
6f33ba73
RGS
244#define UNI(f) UNI2(f,XTERM)
245#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 246
376fcdbf
AL
247#define UNIBRACK(f) { \
248 yylval.ival = f; \
249 PL_bufptr = s; \
250 PL_last_uni = PL_oldbufptr; \
251 if (*s == '(') \
252 return REPORT( (int)FUNC1 ); \
29595ff2 253 s = PEEKSPACE(s); \
376fcdbf
AL
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
255 }
79072805 256
9f68db38 257/* grandfather return to old style */
3280af22 258#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 259
8fa7f367
JH
260#ifdef DEBUGGING
261
bbf60fe6
DM
262/* how to interpret the yylval associated with the token */
263enum token_type {
264 TOKENTYPE_NONE,
265 TOKENTYPE_IVAL,
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
267 TOKENTYPE_PVAL,
268 TOKENTYPE_OPVAL,
269 TOKENTYPE_GVVAL
270};
271
6d4a66ac
NC
272static struct debug_tokens {
273 const int token;
274 enum token_type type;
275 const char *name;
276} const debug_tokens[] =
9041c2e3 277{
bbf60fe6
DM
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 343 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
344};
345
346/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 347
bbf60fe6 348STATIC int
f5bd084c 349S_tokereport(pTHX_ I32 rv)
bbf60fe6 350{
97aff369 351 dVAR;
bbf60fe6 352 if (DEBUG_T_TEST) {
bd61b366 353 const char *name = NULL;
bbf60fe6 354 enum token_type type = TOKENTYPE_NONE;
f54cb97a 355 const struct debug_tokens *p;
396482e1 356 SV* const report = newSVpvs("<== ");
bbf60fe6 357
f54cb97a 358 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
359 if (p->token == (int)rv) {
360 name = p->name;
361 type = p->type;
362 break;
363 }
364 }
365 if (name)
54667de8 366 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
369 else if (!rv)
396482e1 370 sv_catpvs(report, "EOF");
bbf60fe6
DM
371 else
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
373 switch (type) {
374 case TOKENTYPE_NONE:
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
376 break;
377 case TOKENTYPE_IVAL:
e4584336 378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
379 break;
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
383 break;
384 case TOKENTYPE_PVAL:
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
386 break;
387 case TOKENTYPE_OPVAL:
b6007c36 388 if (yylval.opval) {
401441c0 389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 390 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
394 }
395
396 }
401441c0 397 else
396482e1 398 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
399 break;
400 }
b6007c36 401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
402 };
403 return (int)rv;
998054bd
SC
404}
405
b6007c36
DM
406
407/* print the buffer with suitable escapes */
408
409STATIC void
410S_printbuf(pTHX_ const char* fmt, const char* s)
411{
396482e1 412 SV* const tmp = newSVpvs("");
b6007c36
DM
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
414 SvREFCNT_dec(tmp);
415}
416
8fa7f367
JH
417#endif
418
ffb4593c
NT
419/*
420 * S_ao
421 *
c963b151
BD
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
424 */
425
76e3520e 426STATIC int
cea2e8a9 427S_ao(pTHX_ int toketype)
a0d0e21e 428{
97aff369 429 dVAR;
3280af22
NIS
430 if (*PL_bufptr == '=') {
431 PL_bufptr++;
a0d0e21e
LW
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
c963b151
BD
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
438 toketype = ASSIGNOP;
439 }
440 return toketype;
441}
442
ffb4593c
NT
443/*
444 * S_no_op
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
454 */
455
76e3520e 456STATIC void
bfed75c6 457S_no_op(pTHX_ const char *what, char *s)
463ee0b2 458{
97aff369 459 dVAR;
9d4ba2ae
AL
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 462
1189a94a
GS
463 if (!s)
464 s = oldbp;
07c798fb 465 else
1189a94a 466 PL_bufptr = s;
cea2e8a9 467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
468 if (ckWARN_d(WARN_SYNTAX)) {
469 if (is_first)
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 473 const char *t;
c35e046a
AL
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
475 NOOP;
56da5a46
RGS
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
551405c4 479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
480 }
481 else {
482 assert(s >= oldbp);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 485 }
07c798fb 486 }
3280af22 487 PL_bufptr = oldbp;
8990e307
LW
488}
489
ffb4593c
NT
490/*
491 * S_missingterm
492 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 493 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
496 * This is fatal.
497 */
498
76e3520e 499STATIC void
cea2e8a9 500S_missingterm(pTHX_ char *s)
8990e307 501{
97aff369 502 dVAR;
8990e307
LW
503 char tmpbuf[3];
504 char q;
505 if (s) {
9d4ba2ae 506 char * const nl = strrchr(s,'\n');
d2719217 507 if (nl)
8990e307
LW
508 *nl = '\0';
509 }
9d116dd7
JH
510 else if (
511#ifdef EBCDIC
512 iscntrl(PL_multi_close)
513#else
514 PL_multi_close < 32 || PL_multi_close == 127
515#endif
516 ) {
8990e307 517 *tmpbuf = '^';
585ec06d 518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
519 tmpbuf[2] = '\0';
520 s = tmpbuf;
521 }
522 else {
eb160463 523 *tmpbuf = (char)PL_multi_close;
8990e307
LW
524 tmpbuf[1] = '\0';
525 s = tmpbuf;
526 }
527 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 529}
79072805 530
ef89dcc3 531#define FEATURE_IS_ENABLED(name) \
0d863452 532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
534/*
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
537 */
538STATIC bool
d4c19fe8 539S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 540{
97aff369 541 dVAR;
0d863452 542 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 543 char he_name[32] = "feature_";
6fca0082 544 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 545
7b9ef140 546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
547}
548
ffb4593c
NT
549/*
550 * Perl_deprecate
ffb4593c
NT
551 */
552
79072805 553void
bfed75c6 554Perl_deprecate(pTHX_ const char *s)
a0d0e21e 555{
599cee73 556 if (ckWARN(WARN_DEPRECATED))
9014280d 557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
558}
559
12bcd1a6 560void
bfed75c6 561Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
562{
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
565 /* */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
570
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
573 "Use of %s is deprecated", s);
574}
575
ffb4593c 576/*
9cbb5ea2
GS
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
ffb4593c
NT
579 */
580
c39cd008
GS
581#ifdef PERL_CR_FILTER
582static void
583strip_return(SV *sv)
584{
95a20fc0 585 register const char *s = SvPVX_const(sv);
9d4ba2ae 586 register const char * const e = s + SvCUR(sv);
c39cd008
GS
587 /* outer loop optimized to do nothing if there are no CR-LFs */
588 while (s < e) {
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
592 *d++ = *s++;
593 while (s < e) {
594 if (*s == '\r' && s[1] == '\n')
595 s++;
596 *d++ = *s++;
597 }
598 SvCUR(sv) -= s - d;
599 return;
600 }
601 }
602}
a868473f 603
76e3520e 604STATIC I32
c39cd008 605S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 606{
f54cb97a 607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
608 if (count > 0 && !maxlen)
609 strip_return(sv);
610 return count;
a868473f
NIS
611}
612#endif
613
199e78b7
DM
614
615
ffb4593c
NT
616/*
617 * Perl_lex_start
9cbb5ea2
GS
618 * Initialize variables. Uses the Perl save_stack to save its state (for
619 * recursive calls to the parser).
ffb4593c
NT
620 */
621
a0d0e21e 622void
864dbfa3 623Perl_lex_start(pTHX_ SV *line)
79072805 624{
97aff369 625 dVAR;
6ef55633 626 const char *s = NULL;
8990e307 627 STRLEN len;
acdf0a21
DM
628 yy_parser *parser;
629
630 /* create and initialise a parser */
631
199e78b7 632 Newxz(parser, 1, yy_parser);
acdf0a21
DM
633 parser->old_parser = PL_parser;
634 PL_parser = parser;
635
636 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
637 parser->ps = parser->stack;
638 parser->stack_size = YYINITDEPTH;
639
640 parser->stack->state = 0;
641 parser->yyerrstatus = 0;
642 parser->yychar = YYEMPTY; /* Cause a token to be read. */
643
644 /* initialise lexer state */
8990e307 645
3280af22 646 SAVEI32(PL_lex_state);
5db06880
NC
647#ifdef PERL_MAD
648 if (PL_lex_state == LEX_KNOWNEXT) {
199e78b7 649 I32 toke = parser->old_parser->lasttoke;
5db06880
NC
650 while (--toke >= 0) {
651 SAVEI32(PL_nexttoke[toke].next_type);
652 SAVEVPTR(PL_nexttoke[toke].next_val);
653 if (PL_madskills)
654 SAVEVPTR(PL_nexttoke[toke].next_mad);
655 }
5db06880 656 }
cd81e915 657 SAVEI32(PL_curforce);
5db06880 658#else
18b09519
GS
659 if (PL_lex_state == LEX_KNOWNEXT) {
660 I32 toke = PL_nexttoke;
661 while (--toke >= 0) {
662 SAVEI32(PL_nexttype[toke]);
663 SAVEVPTR(PL_nextval[toke]);
664 }
665 SAVEI32(PL_nexttoke);
18b09519 666 }
5db06880 667#endif
57843af0 668 SAVECOPLINE(PL_curcop);
3280af22
NIS
669 SAVEPPTR(PL_bufptr);
670 SAVEPPTR(PL_bufend);
671 SAVEPPTR(PL_oldbufptr);
672 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
673 SAVEPPTR(PL_last_lop);
674 SAVEPPTR(PL_last_uni);
3280af22
NIS
675 SAVEPPTR(PL_linestart);
676 SAVESPTR(PL_linestr);
c76ac1ee 677 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
bebdddfc 678 SAVEINT(PL_expect);
3280af22
NIS
679
680 PL_lex_state = LEX_NORMAL;
3280af22 681 PL_expect = XSTATE;
199e78b7
DM
682 Newx(parser->lex_brackstack, 120, char);
683 Newx(parser->lex_casestack, 12, char);
684 *parser->lex_casestack = '\0';
685#ifndef PERL_MAD
76be56bc 686 PL_nexttoke = 0;
5db06880 687#endif
02b34bbe 688
10efb74f
NC
689 if (line) {
690 s = SvPV_const(line, len);
691 } else {
692 len = 0;
693 }
694 if (!len) {
695 PL_linestr = newSVpvs("\n;");
696 } else if (SvREADONLY(line) || s[len-1] != ';') {
697 PL_linestr = newSVsv(line);
698 if (s[len-1] != ';')
0eb20fa2 699 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
700 } else {
701 SvTEMP_off(line);
702 SvREFCNT_inc_simple_void_NN(line);
703 PL_linestr = line;
8990e307 704 }
db4997f0
NC
705 /* PL_linestr needs to survive until end of scope, not just the next
706 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 707 SAVEFREESV(PL_linestr);
3280af22
NIS
708 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
709 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 710 PL_last_lop = PL_last_uni = NULL;
3280af22 711 PL_rsfp = 0;
79072805 712}
a687059c 713
ffb4593c
NT
714/*
715 * Perl_lex_end
9cbb5ea2
GS
716 * Finalizer for lexing operations. Must be called when the parser is
717 * done with the lexer.
ffb4593c
NT
718 */
719
463ee0b2 720void
864dbfa3 721Perl_lex_end(pTHX)
463ee0b2 722{
97aff369 723 dVAR;
3280af22 724 PL_doextract = FALSE;
463ee0b2
LW
725}
726
ffb4593c
NT
727/*
728 * S_incline
729 * This subroutine has nothing to do with tilting, whether at windmills
730 * or pinball tables. Its name is short for "increment line". It
57843af0 731 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 732 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
733 * # line 500 "foo.pm"
734 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
735 */
736
76e3520e 737STATIC void
cea2e8a9 738S_incline(pTHX_ char *s)
463ee0b2 739{
97aff369 740 dVAR;
463ee0b2
LW
741 char *t;
742 char *n;
73659bf1 743 char *e;
463ee0b2 744 char ch;
463ee0b2 745
57843af0 746 CopLINE_inc(PL_curcop);
463ee0b2
LW
747 if (*s++ != '#')
748 return;
d4c19fe8
AL
749 while (SPACE_OR_TAB(*s))
750 s++;
73659bf1
GS
751 if (strnEQ(s, "line", 4))
752 s += 4;
753 else
754 return;
084592ab 755 if (SPACE_OR_TAB(*s))
73659bf1 756 s++;
4e553d73 757 else
73659bf1 758 return;
d4c19fe8
AL
759 while (SPACE_OR_TAB(*s))
760 s++;
463ee0b2
LW
761 if (!isDIGIT(*s))
762 return;
d4c19fe8 763
463ee0b2
LW
764 n = s;
765 while (isDIGIT(*s))
766 s++;
bf4acbe4 767 while (SPACE_OR_TAB(*s))
463ee0b2 768 s++;
73659bf1 769 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 770 s++;
73659bf1
GS
771 e = t + 1;
772 }
463ee0b2 773 else {
c35e046a
AL
774 t = s;
775 while (!isSPACE(*t))
776 t++;
73659bf1 777 e = t;
463ee0b2 778 }
bf4acbe4 779 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
780 e++;
781 if (*e != '\n' && *e != '\0')
782 return; /* false alarm */
783
463ee0b2
LW
784 ch = *t;
785 *t = '\0';
f4dd75d9 786 if (t - s > 0) {
8a5ee598 787#ifndef USE_ITHREADS
c4420975 788 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
789 STRLEN tmplen = cf ? strlen(cf) : 0;
790 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
791 /* must copy *{"::_<(eval N)[oldfilename:L]"}
792 * to *{"::_<newfilename"} */
793 char smallbuf[256], smallbuf2[256];
794 char *tmpbuf, *tmpbuf2;
8a5ee598 795 GV **gvp, *gv2;
e66cf94c 796 STRLEN tmplen2 = strlen(s);
2ae0db35 797 if (tmplen + 2 < sizeof smallbuf)
e66cf94c
RGS
798 tmpbuf = smallbuf;
799 else
2ae0db35
NC
800 Newx(tmpbuf, tmplen + 2, char);
801 if (tmplen2 + 2 < sizeof smallbuf2)
e66cf94c
RGS
802 tmpbuf2 = smallbuf2;
803 else
2ae0db35 804 Newx(tmpbuf2, tmplen2 + 2, char);
e66cf94c
RGS
805 tmpbuf[0] = tmpbuf2[0] = '_';
806 tmpbuf[1] = tmpbuf2[1] = '<';
2ae0db35
NC
807 memcpy(tmpbuf + 2, cf, tmplen);
808 memcpy(tmpbuf2 + 2, s, tmplen2);
809 tmplen += 2; tmplen2 += 2;
8a5ee598
RGS
810 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
811 if (gvp) {
812 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 813 if (!isGV(gv2)) {
8a5ee598 814 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
815 /* adjust ${"::_<newfilename"} to store the new file name */
816 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
817 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
818 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
819 }
8a5ee598 820 }
e66cf94c
RGS
821 if (tmpbuf != smallbuf) Safefree(tmpbuf);
822 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
823 }
8a5ee598 824#endif
05ec9bb3 825 CopFILE_free(PL_curcop);
57843af0 826 CopFILE_set(PL_curcop, s);
f4dd75d9 827 }
463ee0b2 828 *t = ch;
57843af0 829 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
830}
831
29595ff2 832#ifdef PERL_MAD
cd81e915 833/* skip space before PL_thistoken */
29595ff2
NC
834
835STATIC char *
836S_skipspace0(pTHX_ register char *s)
837{
838 s = skipspace(s);
839 if (!PL_madskills)
840 return s;
cd81e915
NC
841 if (PL_skipwhite) {
842 if (!PL_thiswhite)
6b29d1f5 843 PL_thiswhite = newSVpvs("");
cd81e915
NC
844 sv_catsv(PL_thiswhite, PL_skipwhite);
845 sv_free(PL_skipwhite);
846 PL_skipwhite = 0;
847 }
848 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
849 return s;
850}
851
cd81e915 852/* skip space after PL_thistoken */
29595ff2
NC
853
854STATIC char *
855S_skipspace1(pTHX_ register char *s)
856{
d4c19fe8 857 const char *start = s;
29595ff2
NC
858 I32 startoff = start - SvPVX(PL_linestr);
859
860 s = skipspace(s);
861 if (!PL_madskills)
862 return s;
863 start = SvPVX(PL_linestr) + startoff;
cd81e915 864 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 865 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
866 PL_thistoken = newSVpvn(tstart, start - tstart);
867 }
868 PL_realtokenstart = -1;
869 if (PL_skipwhite) {
870 if (!PL_nextwhite)
6b29d1f5 871 PL_nextwhite = newSVpvs("");
cd81e915
NC
872 sv_catsv(PL_nextwhite, PL_skipwhite);
873 sv_free(PL_skipwhite);
874 PL_skipwhite = 0;
29595ff2
NC
875 }
876 return s;
877}
878
879STATIC char *
880S_skipspace2(pTHX_ register char *s, SV **svp)
881{
c35e046a
AL
882 char *start;
883 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
884 const I32 startoff = s - SvPVX(PL_linestr);
885
29595ff2
NC
886 s = skipspace(s);
887 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
888 if (!PL_madskills || !svp)
889 return s;
890 start = SvPVX(PL_linestr) + startoff;
cd81e915 891 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 892 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
893 PL_thistoken = newSVpvn(tstart, start - tstart);
894 PL_realtokenstart = -1;
29595ff2 895 }
cd81e915 896 if (PL_skipwhite) {
29595ff2 897 if (!*svp)
6b29d1f5 898 *svp = newSVpvs("");
cd81e915
NC
899 sv_setsv(*svp, PL_skipwhite);
900 sv_free(PL_skipwhite);
901 PL_skipwhite = 0;
29595ff2
NC
902 }
903
904 return s;
905}
906#endif
907
80a702cd 908STATIC void
5fa550fb 909S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
910{
911 AV *av = CopFILEAVx(PL_curcop);
912 if (av) {
913 SV * const sv = newSV(0);
914 sv_upgrade(sv, SVt_PVMG);
5fa550fb
NC
915 if (orig_sv)
916 sv_setsv(sv, orig_sv);
917 else
918 sv_setpvn(sv, buf, len);
80a702cd
RGS
919 (void)SvIOK_on(sv);
920 SvIV_set(sv, 0);
921 av_store(av, (I32)CopLINE(PL_curcop), sv);
922 }
923}
924
ffb4593c
NT
925/*
926 * S_skipspace
927 * Called to gobble the appropriate amount and type of whitespace.
928 * Skips comments as well.
929 */
930
76e3520e 931STATIC char *
cea2e8a9 932S_skipspace(pTHX_ register char *s)
a687059c 933{
97aff369 934 dVAR;
5db06880
NC
935#ifdef PERL_MAD
936 int curoff;
937 int startoff = s - SvPVX(PL_linestr);
938
cd81e915
NC
939 if (PL_skipwhite) {
940 sv_free(PL_skipwhite);
941 PL_skipwhite = 0;
5db06880
NC
942 }
943#endif
944
3280af22 945 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 946 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 947 s++;
5db06880
NC
948#ifdef PERL_MAD
949 goto done;
950#else
463ee0b2 951 return s;
5db06880 952#endif
463ee0b2
LW
953 }
954 for (;;) {
fd049845 955 STRLEN prevlen;
09bef843 956 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 957 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
958 while (s < PL_bufend && isSPACE(*s)) {
959 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
960 incline(s);
961 }
ffb4593c
NT
962
963 /* comment */
3280af22
NIS
964 if (s < PL_bufend && *s == '#') {
965 while (s < PL_bufend && *s != '\n')
463ee0b2 966 s++;
60e6418e 967 if (s < PL_bufend) {
463ee0b2 968 s++;
60e6418e
GS
969 if (PL_in_eval && !PL_rsfp) {
970 incline(s);
971 continue;
972 }
973 }
463ee0b2 974 }
ffb4593c
NT
975
976 /* only continue to recharge the buffer if we're at the end
977 * of the buffer, we're not reading from a source filter, and
978 * we're in normal lexing mode
979 */
09bef843
SB
980 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
981 PL_lex_state == LEX_FORMLINE)
5db06880
NC
982#ifdef PERL_MAD
983 goto done;
984#else
463ee0b2 985 return s;
5db06880 986#endif
ffb4593c
NT
987
988 /* try to recharge the buffer */
5db06880
NC
989#ifdef PERL_MAD
990 curoff = s - SvPVX(PL_linestr);
991#endif
992
9cbb5ea2 993 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 994 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 995 {
5db06880
NC
996#ifdef PERL_MAD
997 if (PL_madskills && curoff != startoff) {
cd81e915 998 if (!PL_skipwhite)
6b29d1f5 999 PL_skipwhite = newSVpvs("");
cd81e915 1000 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1001 curoff - startoff);
1002 }
1003
1004 /* mustn't throw out old stuff yet if madpropping */
1005 SvCUR(PL_linestr) = curoff;
1006 s = SvPVX(PL_linestr) + curoff;
1007 *s = 0;
1008 if (curoff && s[-1] == '\n')
1009 s[-1] = ' ';
1010#endif
1011
9cbb5ea2 1012 /* end of file. Add on the -p or -n magic */
cd81e915 1013 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1014 if (PL_minus_p) {
5db06880
NC
1015#ifdef PERL_MAD
1016 sv_catpv(PL_linestr,
1017 ";}continue{print or die qq(-p destination: $!\\n);}");
1018#else
01a19ab0
NC
1019 sv_setpv(PL_linestr,
1020 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1021#endif
3280af22 1022 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1023 }
01a19ab0 1024 else if (PL_minus_n) {
5db06880
NC
1025#ifdef PERL_MAD
1026 sv_catpvn(PL_linestr, ";}", 2);
1027#else
01a19ab0 1028 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1029#endif
01a19ab0
NC
1030 PL_minus_n = 0;
1031 }
a0d0e21e 1032 else
5db06880
NC
1033#ifdef PERL_MAD
1034 sv_catpvn(PL_linestr,";", 1);
1035#else
4147a61b 1036 sv_setpvn(PL_linestr,";", 1);
5db06880 1037#endif
ffb4593c
NT
1038
1039 /* reset variables for next time we lex */
9cbb5ea2 1040 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1041 = SvPVX(PL_linestr)
1042#ifdef PERL_MAD
1043 + curoff
1044#endif
1045 ;
3280af22 1046 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1047 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1048
1049 /* Close the filehandle. Could be from -P preprocessor,
1050 * STDIN, or a regular file. If we were reading code from
1051 * STDIN (because the commandline held no -e or filename)
1052 * then we don't close it, we reset it so the code can
1053 * read from STDIN too.
1054 */
1055
3280af22
NIS
1056 if (PL_preprocess && !PL_in_eval)
1057 (void)PerlProc_pclose(PL_rsfp);
1058 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1059 PerlIO_clearerr(PL_rsfp);
8990e307 1060 else
3280af22 1061 (void)PerlIO_close(PL_rsfp);
4608196e 1062 PL_rsfp = NULL;
463ee0b2
LW
1063 return s;
1064 }
ffb4593c
NT
1065
1066 /* not at end of file, so we only read another line */
09bef843
SB
1067 /* make corresponding updates to old pointers, for yyerror() */
1068 oldprevlen = PL_oldbufptr - PL_bufend;
1069 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1070 if (PL_last_uni)
1071 oldunilen = PL_last_uni - PL_bufend;
1072 if (PL_last_lop)
1073 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1074 PL_linestart = PL_bufptr = s + prevlen;
1075 PL_bufend = s + SvCUR(PL_linestr);
1076 s = PL_bufptr;
09bef843
SB
1077 PL_oldbufptr = s + oldprevlen;
1078 PL_oldoldbufptr = s + oldoldprevlen;
1079 if (PL_last_uni)
1080 PL_last_uni = s + oldunilen;
1081 if (PL_last_lop)
1082 PL_last_lop = s + oldloplen;
a0d0e21e 1083 incline(s);
ffb4593c
NT
1084
1085 /* debugger active and we're not compiling the debugger code,
1086 * so store the line into the debugger's array of lines
1087 */
80a702cd 1088 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1089 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1090 }
5db06880
NC
1091
1092#ifdef PERL_MAD
1093 done:
1094 if (PL_madskills) {
cd81e915 1095 if (!PL_skipwhite)
6b29d1f5 1096 PL_skipwhite = newSVpvs("");
5db06880
NC
1097 curoff = s - SvPVX(PL_linestr);
1098 if (curoff - startoff)
cd81e915 1099 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1100 curoff - startoff);
1101 }
1102 return s;
1103#endif
a687059c 1104}
378cc40b 1105
ffb4593c
NT
1106/*
1107 * S_check_uni
1108 * Check the unary operators to ensure there's no ambiguity in how they're
1109 * used. An ambiguous piece of code would be:
1110 * rand + 5
1111 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1112 * the +5 is its argument.
1113 */
1114
76e3520e 1115STATIC void
cea2e8a9 1116S_check_uni(pTHX)
ba106d47 1117{
97aff369 1118 dVAR;
d4c19fe8
AL
1119 const char *s;
1120 const char *t;
2f3197b3 1121
3280af22 1122 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1123 return;
3280af22
NIS
1124 while (isSPACE(*PL_last_uni))
1125 PL_last_uni++;
c35e046a
AL
1126 s = PL_last_uni;
1127 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1128 s++;
3280af22 1129 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1130 return;
6136c704 1131
0453d815 1132 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1133 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1134 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1135 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1136 }
2f3197b3
LW
1137}
1138
ffb4593c
NT
1139/*
1140 * LOP : macro to build a list operator. Its behaviour has been replaced
1141 * with a subroutine, S_lop() for which LOP is just another name.
1142 */
1143
a0d0e21e
LW
1144#define LOP(f,x) return lop(f,x,s)
1145
ffb4593c
NT
1146/*
1147 * S_lop
1148 * Build a list operator (or something that might be one). The rules:
1149 * - if we have a next token, then it's a list operator [why?]
1150 * - if the next thing is an opening paren, then it's a function
1151 * - else it's a list operator
1152 */
1153
76e3520e 1154STATIC I32
a0be28da 1155S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1156{
97aff369 1157 dVAR;
79072805 1158 yylval.ival = f;
35c8bce7 1159 CLINE;
3280af22
NIS
1160 PL_expect = x;
1161 PL_bufptr = s;
1162 PL_last_lop = PL_oldbufptr;
eb160463 1163 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1164#ifdef PERL_MAD
1165 if (PL_lasttoke)
1166 return REPORT(LSTOP);
1167#else
3280af22 1168 if (PL_nexttoke)
bbf60fe6 1169 return REPORT(LSTOP);
5db06880 1170#endif
79072805 1171 if (*s == '(')
bbf60fe6 1172 return REPORT(FUNC);
29595ff2 1173 s = PEEKSPACE(s);
79072805 1174 if (*s == '(')
bbf60fe6 1175 return REPORT(FUNC);
79072805 1176 else
bbf60fe6 1177 return REPORT(LSTOP);
79072805
LW
1178}
1179
5db06880
NC
1180#ifdef PERL_MAD
1181 /*
1182 * S_start_force
1183 * Sets up for an eventual force_next(). start_force(0) basically does
1184 * an unshift, while start_force(-1) does a push. yylex removes items
1185 * on the "pop" end.
1186 */
1187
1188STATIC void
1189S_start_force(pTHX_ int where)
1190{
1191 int i;
1192
cd81e915 1193 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1194 where = PL_lasttoke;
cd81e915
NC
1195 assert(PL_curforce < 0 || PL_curforce == where);
1196 if (PL_curforce != where) {
5db06880
NC
1197 for (i = PL_lasttoke; i > where; --i) {
1198 PL_nexttoke[i] = PL_nexttoke[i-1];
1199 }
1200 PL_lasttoke++;
1201 }
cd81e915 1202 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1203 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1204 PL_curforce = where;
1205 if (PL_nextwhite) {
5db06880 1206 if (PL_madskills)
6b29d1f5 1207 curmad('^', newSVpvs(""));
cd81e915 1208 CURMAD('_', PL_nextwhite);
5db06880
NC
1209 }
1210}
1211
1212STATIC void
1213S_curmad(pTHX_ char slot, SV *sv)
1214{
1215 MADPROP **where;
1216
1217 if (!sv)
1218 return;
cd81e915
NC
1219 if (PL_curforce < 0)
1220 where = &PL_thismad;
5db06880 1221 else
cd81e915 1222 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1223
cd81e915 1224 if (PL_faketokens)
5db06880
NC
1225 sv_setpvn(sv, "", 0);
1226 else {
1227 if (!IN_BYTES) {
1228 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1229 SvUTF8_on(sv);
1230 else if (PL_encoding) {
1231 sv_recode_to_utf8(sv, PL_encoding);
1232 }
1233 }
1234 }
1235
1236 /* keep a slot open for the head of the list? */
1237 if (slot != '_' && *where && (*where)->mad_key == '^') {
1238 (*where)->mad_key = slot;
1239 sv_free((*where)->mad_val);
1240 (*where)->mad_val = (void*)sv;
1241 }
1242 else
1243 addmad(newMADsv(slot, sv), where, 0);
1244}
1245#else
b3f24c00
MHM
1246# define start_force(where) NOOP
1247# define curmad(slot, sv) NOOP
5db06880
NC
1248#endif
1249
ffb4593c
NT
1250/*
1251 * S_force_next
9cbb5ea2 1252 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1253 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1254 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1255 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1256 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1257 */
1258
4e553d73 1259STATIC void
cea2e8a9 1260S_force_next(pTHX_ I32 type)
79072805 1261{
97aff369 1262 dVAR;
5db06880 1263#ifdef PERL_MAD
cd81e915 1264 if (PL_curforce < 0)
5db06880 1265 start_force(PL_lasttoke);
cd81e915 1266 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1267 if (PL_lex_state != LEX_KNOWNEXT)
1268 PL_lex_defer = PL_lex_state;
1269 PL_lex_state = LEX_KNOWNEXT;
1270 PL_lex_expect = PL_expect;
cd81e915 1271 PL_curforce = -1;
5db06880 1272#else
3280af22
NIS
1273 PL_nexttype[PL_nexttoke] = type;
1274 PL_nexttoke++;
1275 if (PL_lex_state != LEX_KNOWNEXT) {
1276 PL_lex_defer = PL_lex_state;
1277 PL_lex_expect = PL_expect;
1278 PL_lex_state = LEX_KNOWNEXT;
79072805 1279 }
5db06880 1280#endif
79072805
LW
1281}
1282
d0a148a6
NC
1283STATIC SV *
1284S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1285{
97aff369 1286 dVAR;
9d4ba2ae 1287 SV * const sv = newSVpvn(start,len);
bfed75c6 1288 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1289 SvUTF8_on(sv);
1290 return sv;
1291}
1292
ffb4593c
NT
1293/*
1294 * S_force_word
1295 * When the lexer knows the next thing is a word (for instance, it has
1296 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1297 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1298 * lookahead.
ffb4593c
NT
1299 *
1300 * Arguments:
b1b65b59 1301 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1302 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1303 * int check_keyword : if true, Perl checks to make sure the word isn't
1304 * a keyword (do this if the word is a label, e.g. goto FOO)
1305 * int allow_pack : if true, : characters will also be allowed (require,
1306 * use, etc. do this)
9cbb5ea2 1307 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1308 */
1309
76e3520e 1310STATIC char *
cea2e8a9 1311S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1312{
97aff369 1313 dVAR;
463ee0b2
LW
1314 register char *s;
1315 STRLEN len;
4e553d73 1316
29595ff2 1317 start = SKIPSPACE1(start);
463ee0b2 1318 s = start;
7e2040f0 1319 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1320 (allow_pack && *s == ':') ||
15f0808c 1321 (allow_initial_tick && *s == '\'') )
a0d0e21e 1322 {
3280af22 1323 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1324 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1325 return start;
cd81e915 1326 start_force(PL_curforce);
5db06880
NC
1327 if (PL_madskills)
1328 curmad('X', newSVpvn(start,s-start));
463ee0b2 1329 if (token == METHOD) {
29595ff2 1330 s = SKIPSPACE1(s);
463ee0b2 1331 if (*s == '(')
3280af22 1332 PL_expect = XTERM;
463ee0b2 1333 else {
3280af22 1334 PL_expect = XOPERATOR;
463ee0b2 1335 }
79072805 1336 }
9ded7720 1337 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1338 = (OP*)newSVOP(OP_CONST,0,
1339 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1340 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1341 force_next(token);
1342 }
1343 return s;
1344}
1345
ffb4593c
NT
1346/*
1347 * S_force_ident
9cbb5ea2 1348 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1349 * text only contains the "foo" portion. The first argument is a pointer
1350 * to the "foo", and the second argument is the type symbol to prefix.
1351 * Forces the next token to be a "WORD".
9cbb5ea2 1352 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1353 */
1354
76e3520e 1355STATIC void
bfed75c6 1356S_force_ident(pTHX_ register const char *s, int kind)
79072805 1357{
97aff369 1358 dVAR;
c35e046a 1359 if (*s) {
90e5519e
NC
1360 const STRLEN len = strlen(s);
1361 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1362 start_force(PL_curforce);
9ded7720 1363 NEXTVAL_NEXTTOKE.opval = o;
79072805 1364 force_next(WORD);
748a9306 1365 if (kind) {
11343788 1366 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1367 /* XXX see note in pp_entereval() for why we forgo typo
1368 warnings if the symbol must be introduced in an eval.
1369 GSAR 96-10-12 */
90e5519e
NC
1370 gv_fetchpvn_flags(s, len,
1371 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1372 : GV_ADD,
1373 kind == '$' ? SVt_PV :
1374 kind == '@' ? SVt_PVAV :
1375 kind == '%' ? SVt_PVHV :
a0d0e21e 1376 SVt_PVGV
90e5519e 1377 );
748a9306 1378 }
79072805
LW
1379 }
1380}
1381
1571675a
GS
1382NV
1383Perl_str_to_version(pTHX_ SV *sv)
1384{
1385 NV retval = 0.0;
1386 NV nshift = 1.0;
1387 STRLEN len;
cfd0369c 1388 const char *start = SvPV_const(sv,len);
9d4ba2ae 1389 const char * const end = start + len;
504618e9 1390 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1391 while (start < end) {
ba210ebe 1392 STRLEN skip;
1571675a
GS
1393 UV n;
1394 if (utf)
9041c2e3 1395 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1396 else {
1397 n = *(U8*)start;
1398 skip = 1;
1399 }
1400 retval += ((NV)n)/nshift;
1401 start += skip;
1402 nshift *= 1000;
1403 }
1404 return retval;
1405}
1406
4e553d73 1407/*
ffb4593c
NT
1408 * S_force_version
1409 * Forces the next token to be a version number.
e759cc13
RGS
1410 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1411 * and if "guessing" is TRUE, then no new token is created (and the caller
1412 * must use an alternative parsing method).
ffb4593c
NT
1413 */
1414
76e3520e 1415STATIC char *
e759cc13 1416S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1417{
97aff369 1418 dVAR;
5f66b61c 1419 OP *version = NULL;
44dcb63b 1420 char *d;
5db06880
NC
1421#ifdef PERL_MAD
1422 I32 startoff = s - SvPVX(PL_linestr);
1423#endif
89bfa8cd 1424
29595ff2 1425 s = SKIPSPACE1(s);
89bfa8cd 1426
44dcb63b 1427 d = s;
dd629d5b 1428 if (*d == 'v')
44dcb63b 1429 d++;
44dcb63b 1430 if (isDIGIT(*d)) {
e759cc13
RGS
1431 while (isDIGIT(*d) || *d == '_' || *d == '.')
1432 d++;
5db06880
NC
1433#ifdef PERL_MAD
1434 if (PL_madskills) {
cd81e915 1435 start_force(PL_curforce);
5db06880
NC
1436 curmad('X', newSVpvn(s,d-s));
1437 }
1438#endif
9f3d182e 1439 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1440 SV *ver;
b73d6f50 1441 s = scan_num(s, &yylval);
89bfa8cd 1442 version = yylval.opval;
dd629d5b
GS
1443 ver = cSVOPx(version)->op_sv;
1444 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1445 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1446 SvNV_set(ver, str_to_version(ver));
1571675a 1447 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1448 }
89bfa8cd 1449 }
5db06880
NC
1450 else if (guessing) {
1451#ifdef PERL_MAD
1452 if (PL_madskills) {
cd81e915
NC
1453 sv_free(PL_nextwhite); /* let next token collect whitespace */
1454 PL_nextwhite = 0;
5db06880
NC
1455 s = SvPVX(PL_linestr) + startoff;
1456 }
1457#endif
e759cc13 1458 return s;
5db06880 1459 }
89bfa8cd
PP
1460 }
1461
5db06880
NC
1462#ifdef PERL_MAD
1463 if (PL_madskills && !version) {
cd81e915
NC
1464 sv_free(PL_nextwhite); /* let next token collect whitespace */
1465 PL_nextwhite = 0;
5db06880
NC
1466 s = SvPVX(PL_linestr) + startoff;
1467 }
1468#endif
89bfa8cd 1469 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1470 start_force(PL_curforce);
9ded7720 1471 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1472 force_next(WORD);
89bfa8cd 1473
e759cc13 1474 return s;
89bfa8cd
PP
1475}
1476
ffb4593c
NT
1477/*
1478 * S_tokeq
1479 * Tokenize a quoted string passed in as an SV. It finds the next
1480 * chunk, up to end of string or a backslash. It may make a new
1481 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1482 * turns \\ into \.
1483 */
1484
76e3520e 1485STATIC SV *
cea2e8a9 1486S_tokeq(pTHX_ SV *sv)
79072805 1487{
97aff369 1488 dVAR;
79072805
LW
1489 register char *s;
1490 register char *send;
1491 register char *d;
b3ac6de7
IZ
1492 STRLEN len = 0;
1493 SV *pv = sv;
79072805
LW
1494
1495 if (!SvLEN(sv))
b3ac6de7 1496 goto finish;
79072805 1497
a0d0e21e 1498 s = SvPV_force(sv, len);
21a311ee 1499 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1500 goto finish;
463ee0b2 1501 send = s + len;
79072805
LW
1502 while (s < send && *s != '\\')
1503 s++;
1504 if (s == send)
b3ac6de7 1505 goto finish;
79072805 1506 d = s;
be4731d2 1507 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1508 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1509 if (SvUTF8(sv))
1510 SvUTF8_on(pv);
1511 }
79072805
LW
1512 while (s < send) {
1513 if (*s == '\\') {
a0d0e21e 1514 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1515 s++; /* all that, just for this */
1516 }
1517 *d++ = *s++;
1518 }
1519 *d = '\0';
95a20fc0 1520 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1521 finish:
3280af22 1522 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1523 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1524 return sv;
1525}
1526
ffb4593c
NT
1527/*
1528 * Now come three functions related to double-quote context,
1529 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1530 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1531 * interact with PL_lex_state, and create fake ( ... ) argument lists
1532 * to handle functions and concatenation.
1533 * They assume that whoever calls them will be setting up a fake
1534 * join call, because each subthing puts a ',' after it. This lets
1535 * "lower \luPpEr"
1536 * become
1537 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1538 *
1539 * (I'm not sure whether the spurious commas at the end of lcfirst's
1540 * arguments and join's arguments are created or not).
1541 */
1542
1543/*
1544 * S_sublex_start
1545 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1546 *
1547 * Pattern matching will set PL_lex_op to the pattern-matching op to
1548 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1549 *
1550 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1551 *
1552 * Everything else becomes a FUNC.
1553 *
1554 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1555 * had an OP_CONST or OP_READLINE). This just sets us up for a
1556 * call to S_sublex_push().
1557 */
1558
76e3520e 1559STATIC I32
cea2e8a9 1560S_sublex_start(pTHX)
79072805 1561{
97aff369 1562 dVAR;
0d46e09a 1563 register const I32 op_type = yylval.ival;
79072805
LW
1564
1565 if (op_type == OP_NULL) {
3280af22 1566 yylval.opval = PL_lex_op;
5f66b61c 1567 PL_lex_op = NULL;
79072805
LW
1568 return THING;
1569 }
1570 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1571 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1572
1573 if (SvTYPE(sv) == SVt_PVIV) {
1574 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1575 STRLEN len;
96a5add6 1576 const char * const p = SvPV_const(sv, len);
f54cb97a 1577 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1578 if (SvUTF8(sv))
1579 SvUTF8_on(nsv);
b3ac6de7
IZ
1580 SvREFCNT_dec(sv);
1581 sv = nsv;
4e553d73 1582 }
b3ac6de7 1583 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1584 PL_lex_stuff = NULL;
6f33ba73
RGS
1585 /* Allow <FH> // "foo" */
1586 if (op_type == OP_READLINE)
1587 PL_expect = XTERMORDORDOR;
79072805
LW
1588 return THING;
1589 }
e3f73d4e
RGS
1590 else if (op_type == OP_BACKTICK && PL_lex_op) {
1591 /* readpipe() vas overriden */
1592 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1593 yylval.opval = PL_lex_op;
9b201d7d 1594 PL_lex_op = NULL;
e3f73d4e
RGS
1595 PL_lex_stuff = NULL;
1596 return THING;
1597 }
79072805 1598
3280af22
NIS
1599 PL_sublex_info.super_state = PL_lex_state;
1600 PL_sublex_info.sub_inwhat = op_type;
1601 PL_sublex_info.sub_op = PL_lex_op;
1602 PL_lex_state = LEX_INTERPPUSH;
55497cff 1603
3280af22
NIS
1604 PL_expect = XTERM;
1605 if (PL_lex_op) {
1606 yylval.opval = PL_lex_op;
5f66b61c 1607 PL_lex_op = NULL;
55497cff
PP
1608 return PMFUNC;
1609 }
1610 else
1611 return FUNC;
1612}
1613
ffb4593c
NT
1614/*
1615 * S_sublex_push
1616 * Create a new scope to save the lexing state. The scope will be
1617 * ended in S_sublex_done. Returns a '(', starting the function arguments
1618 * to the uc, lc, etc. found before.
1619 * Sets PL_lex_state to LEX_INTERPCONCAT.
1620 */
1621
76e3520e 1622STATIC I32
cea2e8a9 1623S_sublex_push(pTHX)
55497cff 1624{
27da23d5 1625 dVAR;
f46d017c 1626 ENTER;
55497cff 1627
3280af22
NIS
1628 PL_lex_state = PL_sublex_info.super_state;
1629 SAVEI32(PL_lex_dojoin);
1630 SAVEI32(PL_lex_brackets);
3280af22
NIS
1631 SAVEI32(PL_lex_casemods);
1632 SAVEI32(PL_lex_starts);
1633 SAVEI32(PL_lex_state);
7766f137 1634 SAVEVPTR(PL_lex_inpat);
3280af22 1635 SAVEI32(PL_lex_inwhat);
57843af0 1636 SAVECOPLINE(PL_curcop);
3280af22 1637 SAVEPPTR(PL_bufptr);
8452ff4b 1638 SAVEPPTR(PL_bufend);
3280af22
NIS
1639 SAVEPPTR(PL_oldbufptr);
1640 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1641 SAVEPPTR(PL_last_lop);
1642 SAVEPPTR(PL_last_uni);
3280af22
NIS
1643 SAVEPPTR(PL_linestart);
1644 SAVESPTR(PL_linestr);
8edd5f42
RGS
1645 SAVEGENERICPV(PL_lex_brackstack);
1646 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1647
1648 PL_linestr = PL_lex_stuff;
a0714e2c 1649 PL_lex_stuff = NULL;
3280af22 1650
9cbb5ea2
GS
1651 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1652 = SvPVX(PL_linestr);
3280af22 1653 PL_bufend += SvCUR(PL_linestr);
bd61b366 1654 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1655 SAVEFREESV(PL_linestr);
1656
1657 PL_lex_dojoin = FALSE;
1658 PL_lex_brackets = 0;
a02a5408
JC
1659 Newx(PL_lex_brackstack, 120, char);
1660 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1661 PL_lex_casemods = 0;
1662 *PL_lex_casestack = '\0';
1663 PL_lex_starts = 0;
1664 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1665 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1666
1667 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1668 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1669 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1670 else
5f66b61c 1671 PL_lex_inpat = NULL;
79072805 1672
55497cff 1673 return '(';
79072805
LW
1674}
1675
ffb4593c
NT
1676/*
1677 * S_sublex_done
1678 * Restores lexer state after a S_sublex_push.
1679 */
1680
76e3520e 1681STATIC I32
cea2e8a9 1682S_sublex_done(pTHX)
79072805 1683{
27da23d5 1684 dVAR;
3280af22 1685 if (!PL_lex_starts++) {
396482e1 1686 SV * const sv = newSVpvs("");
9aa983d2
JH
1687 if (SvUTF8(PL_linestr))
1688 SvUTF8_on(sv);
3280af22 1689 PL_expect = XOPERATOR;
9aa983d2 1690 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1691 return THING;
1692 }
1693
3280af22
NIS
1694 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1695 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1696 return yylex();
79072805
LW
1697 }
1698
ffb4593c 1699 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1700 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1701 PL_linestr = PL_lex_repl;
1702 PL_lex_inpat = 0;
1703 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1704 PL_bufend += SvCUR(PL_linestr);
bd61b366 1705 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1706 SAVEFREESV(PL_linestr);
1707 PL_lex_dojoin = FALSE;
1708 PL_lex_brackets = 0;
3280af22
NIS
1709 PL_lex_casemods = 0;
1710 *PL_lex_casestack = '\0';
1711 PL_lex_starts = 0;
25da4f38 1712 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1713 PL_lex_state = LEX_INTERPNORMAL;
1714 PL_lex_starts++;
e9fa98b2
HS
1715 /* we don't clear PL_lex_repl here, so that we can check later
1716 whether this is an evalled subst; that means we rely on the
1717 logic to ensure sublex_done() is called again only via the
1718 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1719 }
e9fa98b2 1720 else {
3280af22 1721 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1722 PL_lex_repl = NULL;
e9fa98b2 1723 }
79072805 1724 return ',';
ffed7fef
LW
1725 }
1726 else {
5db06880
NC
1727#ifdef PERL_MAD
1728 if (PL_madskills) {
cd81e915
NC
1729 if (PL_thiswhite) {
1730 if (!PL_endwhite)
6b29d1f5 1731 PL_endwhite = newSVpvs("");
cd81e915
NC
1732 sv_catsv(PL_endwhite, PL_thiswhite);
1733 PL_thiswhite = 0;
1734 }
1735 if (PL_thistoken)
1736 sv_setpvn(PL_thistoken,"",0);
5db06880 1737 else
cd81e915 1738 PL_realtokenstart = -1;
5db06880
NC
1739 }
1740#endif
f46d017c 1741 LEAVE;
3280af22
NIS
1742 PL_bufend = SvPVX(PL_linestr);
1743 PL_bufend += SvCUR(PL_linestr);
1744 PL_expect = XOPERATOR;
09bef843 1745 PL_sublex_info.sub_inwhat = 0;
79072805 1746 return ')';
ffed7fef
LW
1747 }
1748}
1749
02aa26ce
NT
1750/*
1751 scan_const
1752
1753 Extracts a pattern, double-quoted string, or transliteration. This
1754 is terrifying code.
1755
94def140 1756 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1757 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1758 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1759
94def140
ST
1760 Returns a pointer to the character scanned up to. If this is
1761 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1762 successfully parsed), will leave an OP for the substring scanned
1763 in yylval. Caller must intuit reason for not parsing further
1764 by looking at the next characters herself.
1765
02aa26ce
NT
1766 In patterns:
1767 backslashes:
1768 double-quoted style: \r and \n
1769 regexp special ones: \D \s
94def140
ST
1770 constants: \x31
1771 backrefs: \1
02aa26ce
NT
1772 case and quoting: \U \Q \E
1773 stops on @ and $, but not for $ as tail anchor
1774
1775 In transliterations:
1776 characters are VERY literal, except for - not at the start or end
94def140
ST
1777 of the string, which indicates a range. If the range is in bytes,
1778 scan_const expands the range to the full set of intermediate
1779 characters. If the range is in utf8, the hyphen is replaced with
1780 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1781
1782 In double-quoted strings:
1783 backslashes:
1784 double-quoted style: \r and \n
94def140
ST
1785 constants: \x31
1786 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1787 case and quoting: \U \Q \E
1788 stops on @ and $
1789
1790 scan_const does *not* construct ops to handle interpolated strings.
1791 It stops processing as soon as it finds an embedded $ or @ variable
1792 and leaves it to the caller to work out what's going on.
1793
94def140
ST
1794 embedded arrays (whether in pattern or not) could be:
1795 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1796
1797 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1798
1799 $ in pattern could be $foo or could be tail anchor. Assumption:
1800 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1801 followed by one of "()| \r\n\t"
02aa26ce
NT
1802
1803 \1 (backreferences) are turned into $1
1804
1805 The structure of the code is
1806 while (there's a character to process) {
94def140
ST
1807 handle transliteration ranges
1808 skip regexp comments /(?#comment)/ and codes /(?{code})/
1809 skip #-initiated comments in //x patterns
1810 check for embedded arrays
02aa26ce
NT
1811 check for embedded scalars
1812 if (backslash) {
94def140
ST
1813 leave intact backslashes from leaveit (below)
1814 deprecate \1 in substitution replacements
02aa26ce
NT
1815 handle string-changing backslashes \l \U \Q \E, etc.
1816 switch (what was escaped) {
94def140
ST
1817 handle \- in a transliteration (becomes a literal -)
1818 handle \132 (octal characters)
1819 handle \x15 and \x{1234} (hex characters)
1820 handle \N{name} (named characters)
1821 handle \cV (control characters)
1822 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1823 } (end switch)
1824 } (end if backslash)
1825 } (end while character to read)
4e553d73 1826
02aa26ce
NT
1827*/
1828
76e3520e 1829STATIC char *
cea2e8a9 1830S_scan_const(pTHX_ char *start)
79072805 1831{
97aff369 1832 dVAR;
3280af22 1833 register char *send = PL_bufend; /* end of the constant */
561b68a9 1834 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1835 register char *s = start; /* start of the constant */
1836 register char *d = SvPVX(sv); /* destination for copies */
1837 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1838 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1839 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1840 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1841 UV uv;
4c3a8340
ST
1842#ifdef EBCDIC
1843 UV literal_endpoint = 0;
e294cc5d 1844 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1845#endif
012bcf8d 1846
2b9d42f0
NIS
1847 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1848 /* If we are doing a trans and we know we want UTF8 set expectation */
1849 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1850 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1851 }
1852
1853
79072805 1854 while (s < send || dorange) {
02aa26ce 1855 /* get transliterations out of the way (they're most literal) */
3280af22 1856 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1857 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1858 if (dorange) {
1ba5c669
JH
1859 I32 i; /* current expanded character */
1860 I32 min; /* first character in range */
1861 I32 max; /* last character in range */
02aa26ce 1862
e294cc5d
JH
1863#ifdef EBCDIC
1864 UV uvmax = 0;
1865#endif
1866
1867 if (has_utf8
1868#ifdef EBCDIC
1869 && !native_range
1870#endif
1871 ) {
9d4ba2ae 1872 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1873 char *e = d++;
1874 while (e-- > c)
1875 *(e + 1) = *e;
25716404 1876 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1877 /* mark the range as done, and continue */
1878 dorange = FALSE;
1879 didrange = TRUE;
1880 continue;
1881 }
2b9d42f0 1882
95a20fc0 1883 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1884#ifdef EBCDIC
1885 SvGROW(sv,
1886 SvLEN(sv) + (has_utf8 ?
1887 (512 - UTF_CONTINUATION_MARK +
1888 UNISKIP(0x100))
1889 : 256));
1890 /* How many two-byte within 0..255: 128 in UTF-8,
1891 * 96 in UTF-8-mod. */
1892#else
9cbb5ea2 1893 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1894#endif
9cbb5ea2 1895 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1896#ifdef EBCDIC
1897 if (has_utf8) {
1898 int j;
1899 for (j = 0; j <= 1; j++) {
1900 char * const c = (char*)utf8_hop((U8*)d, -1);
1901 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1902 if (j)
1903 min = (U8)uv;
1904 else if (uv < 256)
1905 max = (U8)uv;
1906 else {
1907 max = (U8)0xff; /* only to \xff */
1908 uvmax = uv; /* \x{100} to uvmax */
1909 }
1910 d = c; /* eat endpoint chars */
1911 }
1912 }
1913 else {
1914#endif
1915 d -= 2; /* eat the first char and the - */
1916 min = (U8)*d; /* first char in range */
1917 max = (U8)d[1]; /* last char in range */
1918#ifdef EBCDIC
1919 }
1920#endif
8ada0baa 1921
c2e66d9e 1922 if (min > max) {
01ec43d0 1923 Perl_croak(aTHX_
d1573ac7 1924 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1925 (char)min, (char)max);
c2e66d9e
GS
1926 }
1927
c7f1f016 1928#ifdef EBCDIC
4c3a8340
ST
1929 if (literal_endpoint == 2 &&
1930 ((isLOWER(min) && isLOWER(max)) ||
1931 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1932 if (isLOWER(min)) {
1933 for (i = min; i <= max; i++)
1934 if (isLOWER(i))
db42d148 1935 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1936 } else {
1937 for (i = min; i <= max; i++)
1938 if (isUPPER(i))
db42d148 1939 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1940 }
1941 }
1942 else
1943#endif
1944 for (i = min; i <= max; i++)
e294cc5d
JH
1945#ifdef EBCDIC
1946 if (has_utf8) {
1947 const U8 ch = (U8)NATIVE_TO_UTF(i);
1948 if (UNI_IS_INVARIANT(ch))
1949 *d++ = (U8)i;
1950 else {
1951 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1952 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1953 }
1954 }
1955 else
1956#endif
1957 *d++ = (char)i;
1958
1959#ifdef EBCDIC
1960 if (uvmax) {
1961 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1962 if (uvmax > 0x101)
1963 *d++ = (char)UTF_TO_NATIVE(0xff);
1964 if (uvmax > 0x100)
1965 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1966 }
1967#endif
02aa26ce
NT
1968
1969 /* mark the range as done, and continue */
79072805 1970 dorange = FALSE;
01ec43d0 1971 didrange = TRUE;
4c3a8340
ST
1972#ifdef EBCDIC
1973 literal_endpoint = 0;
1974#endif
79072805 1975 continue;
4e553d73 1976 }
02aa26ce
NT
1977
1978 /* range begins (ignore - as first or last char) */
79072805 1979 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1980 if (didrange) {
1fafa243 1981 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1982 }
e294cc5d
JH
1983 if (has_utf8
1984#ifdef EBCDIC
1985 && !native_range
1986#endif
1987 ) {
25716404 1988 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1989 s++;
1990 continue;
1991 }
79072805
LW
1992 dorange = TRUE;
1993 s++;
01ec43d0
GS
1994 }
1995 else {
1996 didrange = FALSE;
4c3a8340
ST
1997#ifdef EBCDIC
1998 literal_endpoint = 0;
e294cc5d 1999 native_range = TRUE;
4c3a8340 2000#endif
01ec43d0 2001 }
79072805 2002 }
02aa26ce
NT
2003
2004 /* if we get here, we're not doing a transliteration */
2005
0f5d15d6
IZ
2006 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2007 except for the last char, which will be done separately. */
3280af22 2008 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2009 if (s[2] == '#') {
e994fd66 2010 while (s+1 < send && *s != ')')
db42d148 2011 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2012 }
2013 else if (s[2] == '{' /* This should match regcomp.c */
2014 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2015 {
cc6b7395 2016 I32 count = 1;
0f5d15d6 2017 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2018 char c;
2019
d9f97599
GS
2020 while (count && (c = *regparse)) {
2021 if (c == '\\' && regparse[1])
2022 regparse++;
4e553d73 2023 else if (c == '{')
cc6b7395 2024 count++;
4e553d73 2025 else if (c == '}')
cc6b7395 2026 count--;
d9f97599 2027 regparse++;
cc6b7395 2028 }
e994fd66 2029 if (*regparse != ')')
5bdf89e7 2030 regparse--; /* Leave one char for continuation. */
0f5d15d6 2031 while (s < regparse)
db42d148 2032 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2033 }
748a9306 2034 }
02aa26ce
NT
2035
2036 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2037 else if (*s == '#' && PL_lex_inpat &&
2038 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2039 while (s+1 < send && *s != '\n')
db42d148 2040 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2041 }
02aa26ce 2042
5d1d4326 2043 /* check for embedded arrays
da6eedaa 2044 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2045 */
1749ea0d
ST
2046 else if (*s == '@' && s[1]) {
2047 if (isALNUM_lazy_if(s+1,UTF))
2048 break;
2049 if (strchr(":'{$", s[1]))
2050 break;
2051 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2052 break; /* in regexp, neither @+ nor @- are interpolated */
2053 }
02aa26ce
NT
2054
2055 /* check for embedded scalars. only stop if we're sure it's a
2056 variable.
2057 */
79072805 2058 else if (*s == '$') {
3280af22 2059 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2060 break;
6002328a 2061 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2062 break; /* in regexp, $ might be tail anchor */
2063 }
02aa26ce 2064
2b9d42f0
NIS
2065 /* End of else if chain - OP_TRANS rejoin rest */
2066
02aa26ce 2067 /* backslashes */
79072805
LW
2068 if (*s == '\\' && s+1 < send) {
2069 s++;
02aa26ce 2070
02aa26ce 2071 /* deprecate \1 in strings and substitution replacements */
3280af22 2072 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2073 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2074 {
599cee73 2075 if (ckWARN(WARN_SYNTAX))
9014280d 2076 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2077 *--s = '$';
2078 break;
2079 }
02aa26ce
NT
2080
2081 /* string-change backslash escapes */
3280af22 2082 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2083 --s;
2084 break;
2085 }
cc74c5bd
ST
2086 /* skip any other backslash escapes in a pattern */
2087 else if (PL_lex_inpat) {
2088 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2089 goto default_action;
2090 }
02aa26ce
NT
2091
2092 /* if we get here, it's either a quoted -, or a digit */
79072805 2093 switch (*s) {
02aa26ce
NT
2094
2095 /* quoted - in transliterations */
79072805 2096 case '-':
3280af22 2097 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2098 *d++ = *s++;
2099 continue;
2100 }
2101 /* FALL THROUGH */
2102 default:
11b8faa4 2103 {
86f97054 2104 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2105 ckWARN(WARN_MISC))
9014280d 2106 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2107 "Unrecognized escape \\%c passed through",
2108 *s);
11b8faa4 2109 /* default action is to copy the quoted character */
f9a63242 2110 goto default_action;
11b8faa4 2111 }
02aa26ce
NT
2112
2113 /* \132 indicates an octal constant */
79072805
LW
2114 case '0': case '1': case '2': case '3':
2115 case '4': case '5': case '6': case '7':
ba210ebe 2116 {
53305cf1
NC
2117 I32 flags = 0;
2118 STRLEN len = 3;
2119 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2120 s += len;
2121 }
012bcf8d 2122 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2123
2124 /* \x24 indicates a hex constant */
79072805 2125 case 'x':
a0ed51b3
LW
2126 ++s;
2127 if (*s == '{') {
9d4ba2ae 2128 char* const e = strchr(s, '}');
a4c04bdc
NC
2129 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2130 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2131 STRLEN len;
355860ce 2132
53305cf1 2133 ++s;
adaeee49 2134 if (!e) {
a0ed51b3 2135 yyerror("Missing right brace on \\x{}");
355860ce 2136 continue;
ba210ebe 2137 }
53305cf1
NC
2138 len = e - s;
2139 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2140 s = e + 1;
a0ed51b3
LW
2141 }
2142 else {
ba210ebe 2143 {
53305cf1 2144 STRLEN len = 2;
a4c04bdc 2145 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2146 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2147 s += len;
2148 }
012bcf8d
GS
2149 }
2150
2151 NUM_ESCAPE_INSERT:
2152 /* Insert oct or hex escaped character.
301d3d20 2153 * There will always enough room in sv since such
db42d148 2154 * escapes will be longer than any UTF-8 sequence
301d3d20 2155 * they can end up as. */
ba7cea30 2156
c7f1f016
NIS
2157 /* We need to map to chars to ASCII before doing the tests
2158 to cover EBCDIC
2159 */
c4d5f83a 2160 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2161 if (!has_utf8 && uv > 255) {
301d3d20
JH
2162 /* Might need to recode whatever we have
2163 * accumulated so far if it contains any
2164 * hibit chars.
2165 *
2166 * (Can't we keep track of that and avoid
2167 * this rescan? --jhi)
012bcf8d 2168 */
c7f1f016 2169 int hicount = 0;
63cd0674
NIS
2170 U8 *c;
2171 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2172 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2173 hicount++;
db42d148 2174 }
012bcf8d 2175 }
63cd0674 2176 if (hicount) {
9d4ba2ae 2177 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2178 U8 *src, *dst;
2179 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2180 src = (U8 *)d - 1;
2181 dst = src+hicount;
2182 d += hicount;
cfd0369c 2183 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2184 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2185 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2186 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2187 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2188 }
2189 else {
63cd0674 2190 *dst-- = *src;
012bcf8d 2191 }
c7f1f016 2192 src--;
012bcf8d
GS
2193 }
2194 }
2195 }
2196
9aa983d2 2197 if (has_utf8 || uv > 255) {
9041c2e3 2198 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2199 has_utf8 = TRUE;
f9a63242
JH
2200 if (PL_lex_inwhat == OP_TRANS &&
2201 PL_sublex_info.sub_op) {
2202 PL_sublex_info.sub_op->op_private |=
2203 (PL_lex_repl ? OPpTRANS_FROM_UTF
2204 : OPpTRANS_TO_UTF);
f9a63242 2205 }
e294cc5d
JH
2206#ifdef EBCDIC
2207 if (uv > 255 && !dorange)
2208 native_range = FALSE;
2209#endif
012bcf8d 2210 }
a0ed51b3 2211 else {
012bcf8d 2212 *d++ = (char)uv;
a0ed51b3 2213 }
012bcf8d
GS
2214 }
2215 else {
c4d5f83a 2216 *d++ = (char) uv;
a0ed51b3 2217 }
79072805 2218 continue;
02aa26ce 2219
b239daa5 2220 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2221 case 'N':
55eda711 2222 ++s;
423cee85
JH
2223 if (*s == '{') {
2224 char* e = strchr(s, '}');
155aba94 2225 SV *res;
423cee85 2226 STRLEN len;
cfd0369c 2227 const char *str;
fc8cd66c 2228 SV *type;
4e553d73 2229
423cee85 2230 if (!e) {
5777a3f7 2231 yyerror("Missing right brace on \\N{}");
423cee85
JH
2232 e = s - 1;
2233 goto cont_scan;
2234 }
dbc0d4f2
JH
2235 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2236 /* \N{U+...} */
2237 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2238 PERL_SCAN_DISALLOW_PREFIX;
2239 s += 3;
2240 len = e - s;
2241 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2242 if ( e > s && len != (STRLEN)(e - s) ) {
2243 uv = 0xFFFD;
fc8cd66c 2244 }
dbc0d4f2
JH
2245 s = e + 1;
2246 goto NUM_ESCAPE_INSERT;
2247 }
55eda711 2248 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2249 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2250 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2251 res, NULL, SvPVX(type) );
2252 SvREFCNT_dec(type);
f9a63242
JH
2253 if (has_utf8)
2254 sv_utf8_upgrade(res);
cfd0369c 2255 str = SvPV_const(res,len);
1c47067b
JH
2256#ifdef EBCDIC_NEVER_MIND
2257 /* charnames uses pack U and that has been
2258 * recently changed to do the below uni->native
2259 * mapping, so this would be redundant (and wrong,
2260 * the code point would be doubly converted).
2261 * But leave this in just in case the pack U change
2262 * gets revoked, but the semantics is still
2263 * desireable for charnames. --jhi */
cddc7ef4 2264 {
cfd0369c 2265 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2266
2267 if (uv < 0x100) {
89ebb4a3 2268 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2269
2270 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2271 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2272 str = SvPV_const(res, len);
cddc7ef4
JH
2273 }
2274 }
2275#endif
89491803 2276 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2277 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2278 SvCUR_set(sv, d - ostart);
2279 SvPOK_on(sv);
e4f3eed8 2280 *d = '\0';
f08d6ad9 2281 sv_utf8_upgrade(sv);
d2f449dd 2282 /* this just broke our allocation above... */
eb160463 2283 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2284 d = SvPVX(sv) + SvCUR(sv);
89491803 2285 has_utf8 = TRUE;
f08d6ad9 2286 }
eb160463 2287 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2288 const char * const odest = SvPVX_const(sv);
423cee85 2289
8973db79 2290 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2291 d = SvPVX(sv) + (d - odest);
2292 }
e294cc5d
JH
2293#ifdef EBCDIC
2294 if (!dorange)
2295 native_range = FALSE; /* \N{} is guessed to be Unicode */
2296#endif
423cee85
JH
2297 Copy(str, d, len, char);
2298 d += len;
2299 SvREFCNT_dec(res);
2300 cont_scan:
2301 s = e + 1;
2302 }
2303 else
5777a3f7 2304 yyerror("Missing braces on \\N{}");
423cee85
JH
2305 continue;
2306
02aa26ce 2307 /* \c is a control character */
79072805
LW
2308 case 'c':
2309 s++;
961ce445 2310 if (s < send) {
ba210ebe 2311 U8 c = *s++;
c7f1f016
NIS
2312#ifdef EBCDIC
2313 if (isLOWER(c))
2314 c = toUPPER(c);
2315#endif
db42d148 2316 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2317 }
961ce445
RGS
2318 else {
2319 yyerror("Missing control char name in \\c");
2320 }
79072805 2321 continue;
02aa26ce
NT
2322
2323 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2324 case 'b':
db42d148 2325 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2326 break;
2327 case 'n':
db42d148 2328 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2329 break;
2330 case 'r':
db42d148 2331 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2332 break;
2333 case 'f':
db42d148 2334 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2335 break;
2336 case 't':
db42d148 2337 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2338 break;
34a3fe2a 2339 case 'e':
db42d148 2340 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2341 break;
2342 case 'a':
db42d148 2343 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2344 break;
02aa26ce
NT
2345 } /* end switch */
2346
79072805
LW
2347 s++;
2348 continue;
02aa26ce 2349 } /* end if (backslash) */
4c3a8340
ST
2350#ifdef EBCDIC
2351 else
2352 literal_endpoint++;
2353#endif
02aa26ce 2354
f9a63242 2355 default_action:
2b9d42f0
NIS
2356 /* If we started with encoded form, or already know we want it
2357 and then encode the next character */
2358 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2359 STRLEN len = 1;
5f66b61c
AL
2360 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2361 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2362 s += len;
2363 if (need > len) {
2364 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2365 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2366 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2367 }
5f66b61c 2368 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2369 has_utf8 = TRUE;
e294cc5d
JH
2370#ifdef EBCDIC
2371 if (uv > 255 && !dorange)
2372 native_range = FALSE;
2373#endif
2b9d42f0
NIS
2374 }
2375 else {
2376 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2377 }
02aa26ce
NT
2378 } /* while loop to process each character */
2379
2380 /* terminate the string and set up the sv */
79072805 2381 *d = '\0';
95a20fc0 2382 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2383 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2384 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2385
79072805 2386 SvPOK_on(sv);
9f4817db 2387 if (PL_encoding && !has_utf8) {
d0063567
DK
2388 sv_recode_to_utf8(sv, PL_encoding);
2389 if (SvUTF8(sv))
2390 has_utf8 = TRUE;
9f4817db 2391 }
2b9d42f0 2392 if (has_utf8) {
7e2040f0 2393 SvUTF8_on(sv);
2b9d42f0 2394 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2395 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2396 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2397 }
2398 }
79072805 2399
02aa26ce 2400 /* shrink the sv if we allocated more than we used */
79072805 2401 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2402 SvPV_shrink_to_cur(sv);
79072805 2403 }
02aa26ce 2404
9b599b2a 2405 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2406 if (s > PL_bufptr) {
2407 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2408 sv = new_constant(start, s - start,
2409 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2410 sv, NULL,
10edeb5d
JH
2411 (const char *)
2412 (( PL_lex_inwhat == OP_TRANS
2413 ? "tr"
2414 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2415 ? "s"
2416 : "qq"))));
79072805 2417 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2418 } else
8990e307 2419 SvREFCNT_dec(sv);
79072805
LW
2420 return s;
2421}
2422
ffb4593c
NT
2423/* S_intuit_more
2424 * Returns TRUE if there's more to the expression (e.g., a subscript),
2425 * FALSE otherwise.
ffb4593c
NT
2426 *
2427 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2428 *
2429 * ->[ and ->{ return TRUE
2430 * { and [ outside a pattern are always subscripts, so return TRUE
2431 * if we're outside a pattern and it's not { or [, then return FALSE
2432 * if we're in a pattern and the first char is a {
2433 * {4,5} (any digits around the comma) returns FALSE
2434 * if we're in a pattern and the first char is a [
2435 * [] returns FALSE
2436 * [SOMETHING] has a funky algorithm to decide whether it's a
2437 * character class or not. It has to deal with things like
2438 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2439 * anything else returns TRUE
2440 */
2441
9cbb5ea2
GS
2442/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2443
76e3520e 2444STATIC int
cea2e8a9 2445S_intuit_more(pTHX_ register char *s)
79072805 2446{
97aff369 2447 dVAR;
3280af22 2448 if (PL_lex_brackets)
79072805
LW
2449 return TRUE;
2450 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2451 return TRUE;
2452 if (*s != '{' && *s != '[')
2453 return FALSE;
3280af22 2454 if (!PL_lex_inpat)
79072805
LW
2455 return TRUE;
2456
2457 /* In a pattern, so maybe we have {n,m}. */
2458 if (*s == '{') {
2459 s++;
2460 if (!isDIGIT(*s))
2461 return TRUE;
2462 while (isDIGIT(*s))
2463 s++;
2464 if (*s == ',')
2465 s++;
2466 while (isDIGIT(*s))
2467 s++;
2468 if (*s == '}')
2469 return FALSE;
2470 return TRUE;
2471
2472 }
2473
2474 /* On the other hand, maybe we have a character class */
2475
2476 s++;
2477 if (*s == ']' || *s == '^')
2478 return FALSE;
2479 else {
ffb4593c 2480 /* this is terrifying, and it works */
79072805
LW
2481 int weight = 2; /* let's weigh the evidence */
2482 char seen[256];
f27ffc4a 2483 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2484 const char * const send = strchr(s,']');
3280af22 2485 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2486
2487 if (!send) /* has to be an expression */
2488 return TRUE;
2489
2490 Zero(seen,256,char);
2491 if (*s == '$')
2492 weight -= 3;
2493 else if (isDIGIT(*s)) {
2494 if (s[1] != ']') {
2495 if (isDIGIT(s[1]) && s[2] == ']')
2496 weight -= 10;
2497 }
2498 else
2499 weight -= 100;
2500 }
2501 for (; s < send; s++) {
2502 last_un_char = un_char;
2503 un_char = (unsigned char)*s;
2504 switch (*s) {
2505 case '@':
2506 case '&':
2507 case '$':
2508 weight -= seen[un_char] * 10;
7e2040f0 2509 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2510 int len;
8903cb82 2511 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2512 len = (int)strlen(tmpbuf);
2513 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2514 weight -= 100;
2515 else
2516 weight -= 10;
2517 }
2518 else if (*s == '$' && s[1] &&
93a17b20
LW
2519 strchr("[#!%*<>()-=",s[1])) {
2520 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2521 weight -= 10;
2522 else
2523 weight -= 1;
2524 }
2525 break;
2526 case '\\':
2527 un_char = 254;
2528 if (s[1]) {
93a17b20 2529 if (strchr("wds]",s[1]))
79072805 2530 weight += 100;
10edeb5d 2531 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2532 weight += 1;
93a17b20 2533 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2534 weight += 40;
2535 else if (isDIGIT(s[1])) {
2536 weight += 40;
2537 while (s[1] && isDIGIT(s[1]))
2538 s++;
2539 }
2540 }
2541 else
2542 weight += 100;
2543 break;
2544 case '-':
2545 if (s[1] == '\\')
2546 weight += 50;
93a17b20 2547 if (strchr("aA01! ",last_un_char))
79072805 2548 weight += 30;
93a17b20 2549 if (strchr("zZ79~",s[1]))
79072805 2550 weight += 30;
f27ffc4a
GS
2551 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2552 weight -= 5; /* cope with negative subscript */
79072805
LW
2553 break;
2554 default:
3792a11b
NC
2555 if (!isALNUM(last_un_char)
2556 && !(last_un_char == '$' || last_un_char == '@'
2557 || last_un_char == '&')
2558 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2559 char *d = tmpbuf;
2560 while (isALPHA(*s))
2561 *d++ = *s++;
2562 *d = '\0';
5458a98a 2563 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2564 weight -= 150;
2565 }
2566 if (un_char == last_un_char + 1)
2567 weight += 5;
2568 weight -= seen[un_char];
2569 break;
2570 }
2571 seen[un_char]++;
2572 }
2573 if (weight >= 0) /* probably a character class */
2574 return FALSE;
2575 }
2576
2577 return TRUE;
2578}
ffed7fef 2579
ffb4593c
NT
2580/*
2581 * S_intuit_method
2582 *
2583 * Does all the checking to disambiguate
2584 * foo bar
2585 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2586 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2587 *
2588 * First argument is the stuff after the first token, e.g. "bar".
2589 *
2590 * Not a method if bar is a filehandle.
2591 * Not a method if foo is a subroutine prototyped to take a filehandle.
2592 * Not a method if it's really "Foo $bar"
2593 * Method if it's "foo $bar"
2594 * Not a method if it's really "print foo $bar"
2595 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2596 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2597 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2598 * =>
2599 */
2600
76e3520e 2601STATIC int
62d55b22 2602S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2603{
97aff369 2604 dVAR;
a0d0e21e 2605 char *s = start + (*start == '$');
3280af22 2606 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2607 STRLEN len;
2608 GV* indirgv;
5db06880
NC
2609#ifdef PERL_MAD
2610 int soff;
2611#endif
a0d0e21e
LW
2612
2613 if (gv) {
62d55b22 2614 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2615 return 0;
62d55b22
NC
2616 if (cv) {
2617 if (SvPOK(cv)) {
2618 const char *proto = SvPVX_const(cv);
2619 if (proto) {
2620 if (*proto == ';')
2621 proto++;
2622 if (*proto == '*')
2623 return 0;
2624 }
b6c543e3
IZ
2625 }
2626 } else
c35e046a 2627 gv = NULL;
a0d0e21e 2628 }
8903cb82 2629 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2630 /* start is the beginning of the possible filehandle/object,
2631 * and s is the end of it
2632 * tmpbuf is a copy of it
2633 */
2634
a0d0e21e 2635 if (*start == '$') {
3280af22 2636 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2637 return 0;
5db06880
NC
2638#ifdef PERL_MAD
2639 len = start - SvPVX(PL_linestr);
2640#endif
29595ff2 2641 s = PEEKSPACE(s);
f0092767 2642#ifdef PERL_MAD
5db06880
NC
2643 start = SvPVX(PL_linestr) + len;
2644#endif
3280af22
NIS
2645 PL_bufptr = start;
2646 PL_expect = XREF;
a0d0e21e
LW
2647 return *s == '(' ? FUNCMETH : METHOD;
2648 }
5458a98a 2649 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2650 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2651 len -= 2;
2652 tmpbuf[len] = '\0';
5db06880
NC
2653#ifdef PERL_MAD
2654 soff = s - SvPVX(PL_linestr);
2655#endif
c3e0f903
GS
2656 goto bare_package;
2657 }
90e5519e 2658 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2659 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2660 return 0;
2661 /* filehandle or package name makes it a method */
da51bb9b 2662 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2663#ifdef PERL_MAD
2664 soff = s - SvPVX(PL_linestr);
2665#endif
29595ff2 2666 s = PEEKSPACE(s);
3280af22 2667 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2668 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2669 bare_package:
cd81e915 2670 start_force(PL_curforce);
9ded7720 2671 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2672 newSVpvn(tmpbuf,len));
9ded7720 2673 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2674 if (PL_madskills)
2675 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2676 PL_expect = XTERM;
a0d0e21e 2677 force_next(WORD);
3280af22 2678 PL_bufptr = s;
5db06880
NC
2679#ifdef PERL_MAD
2680 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2681#endif
a0d0e21e
LW
2682 return *s == '(' ? FUNCMETH : METHOD;
2683 }
2684 }
2685 return 0;
2686}
2687
ffb4593c
NT
2688/*
2689 * S_incl_perldb
2690 * Return a string of Perl code to load the debugger. If PERL5DB
2691 * is set, it will return the contents of that, otherwise a
2692 * compile-time require of perl5db.pl.
2693 */
2694
bfed75c6 2695STATIC const char*
cea2e8a9 2696S_incl_perldb(pTHX)
a0d0e21e 2697{
97aff369 2698 dVAR;
3280af22 2699 if (PL_perldb) {
9d4ba2ae 2700 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2701
2702 if (pdb)
2703 return pdb;
93189314 2704 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2705 return "BEGIN { require 'perl5db.pl' }";
2706 }
2707 return "";
2708}
2709
2710
16d20bd9 2711/* Encoded script support. filter_add() effectively inserts a
4e553d73 2712 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2713 * Note that the filter function only applies to the current source file
2714 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2715 *
2716 * The datasv parameter (which may be NULL) can be used to pass
2717 * private data to this instance of the filter. The filter function
2718 * can recover the SV using the FILTER_DATA macro and use it to
2719 * store private buffers and state information.
2720 *
2721 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2722 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2723 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2724 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2725 * private use must be set using malloc'd pointers.
2726 */
16d20bd9
AD
2727
2728SV *
864dbfa3 2729Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2730{
97aff369 2731 dVAR;
f4c556ac 2732 if (!funcp)
a0714e2c 2733 return NULL;
f4c556ac 2734
3280af22
NIS
2735 if (!PL_rsfp_filters)
2736 PL_rsfp_filters = newAV();
16d20bd9 2737 if (!datasv)
561b68a9 2738 datasv = newSV(0);
862a34c6 2739 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2740 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2741 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2742 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2743 FPTR2DPTR(void *, IoANY(datasv)),
2744 SvPV_nolen(datasv)));
3280af22
NIS
2745 av_unshift(PL_rsfp_filters, 1);
2746 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2747 return(datasv);
2748}
4e553d73 2749
16d20bd9
AD
2750
2751/* Delete most recently added instance of this filter function. */
a0d0e21e 2752void
864dbfa3 2753Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2754{
97aff369 2755 dVAR;
e0c19803 2756 SV *datasv;
24801a4b 2757
33073adb 2758#ifdef DEBUGGING
55662e27
JH
2759 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2760 FPTR2DPTR(void*, funcp)));
33073adb 2761#endif
3280af22 2762 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2763 return;
2764 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2765 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2766 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2767 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2768 IoANY(datasv) = (void *)NULL;
3280af22 2769 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2770
16d20bd9
AD
2771 return;
2772 }
2773 /* we need to search for the correct entry and clear it */
cea2e8a9 2774 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2775}
2776
2777
1de9afcd
RGS
2778/* Invoke the idxth filter function for the current rsfp. */
2779/* maxlen 0 = read one text line */
16d20bd9 2780I32
864dbfa3 2781Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2782{
97aff369 2783 dVAR;
16d20bd9
AD
2784 filter_t funcp;
2785 SV *datasv = NULL;
f482118e
NC
2786 /* This API is bad. It should have been using unsigned int for maxlen.
2787 Not sure if we want to change the API, but if not we should sanity
2788 check the value here. */
39cd7a59
NC
2789 const unsigned int correct_length
2790 = maxlen < 0 ?
2791#ifdef PERL_MICRO
2792 0x7FFFFFFF
2793#else
2794 INT_MAX
2795#endif
2796 : maxlen;
e50aee73 2797
3280af22 2798 if (!PL_rsfp_filters)
16d20bd9 2799 return -1;
1de9afcd 2800 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2801 /* Provide a default input filter to make life easy. */
2802 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2803 DEBUG_P(PerlIO_printf(Perl_debug_log,
2804 "filter_read %d: from rsfp\n", idx));
f482118e 2805 if (correct_length) {
16d20bd9
AD
2806 /* Want a block */
2807 int len ;
f54cb97a 2808 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2809
2810 /* ensure buf_sv is large enough */
f482118e
NC
2811 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2812 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2813 correct_length)) <= 0) {
3280af22 2814 if (PerlIO_error(PL_rsfp))
37120919
AD
2815 return -1; /* error */
2816 else
2817 return 0 ; /* end of file */
2818 }
16d20bd9
AD
2819 SvCUR_set(buf_sv, old_len + len) ;
2820 } else {
2821 /* Want a line */
3280af22
NIS
2822 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2823 if (PerlIO_error(PL_rsfp))
37120919
AD
2824 return -1; /* error */
2825 else
2826 return 0 ; /* end of file */
2827 }
16d20bd9
AD
2828 }
2829 return SvCUR(buf_sv);
2830 }
2831 /* Skip this filter slot if filter has been deleted */
1de9afcd 2832 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2833 DEBUG_P(PerlIO_printf(Perl_debug_log,
2834 "filter_read %d: skipped (filter deleted)\n",
2835 idx));
f482118e 2836 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2837 }
2838 /* Get function pointer hidden within datasv */
8141890a 2839 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2840 DEBUG_P(PerlIO_printf(Perl_debug_log,
2841 "filter_read %d: via function %p (%s)\n",
ca0270c4 2842 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2843 /* Call function. The function is expected to */
2844 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2845 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2846 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2847}
2848
76e3520e 2849STATIC char *
cea2e8a9 2850S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2851{
97aff369 2852 dVAR;
c39cd008 2853#ifdef PERL_CR_FILTER
3280af22 2854 if (!PL_rsfp_filters) {
c39cd008 2855 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2856 }
2857#endif
3280af22 2858 if (PL_rsfp_filters) {
55497cff
PP
2859 if (!append)
2860 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2861 if (FILTER_READ(0, sv, 0) > 0)
2862 return ( SvPVX(sv) ) ;
2863 else
bd61b366 2864 return NULL ;
16d20bd9 2865 }
9d116dd7 2866 else
fd049845 2867 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2868}
2869
01ec43d0 2870STATIC HV *
7fc63493 2871S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2872{
97aff369 2873 dVAR;
def3634b
GS
2874 GV *gv;
2875
01ec43d0 2876 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2877 return PL_curstash;
2878
2879 if (len > 2 &&
2880 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2881 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2882 {
2883 return GvHV(gv); /* Foo:: */
def3634b
GS
2884 }
2885
2886 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2887 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2888 if (gv && GvCV(gv)) {
2889 SV * const sv = cv_const_sv(GvCV(gv));
2890 if (sv)
83003860 2891 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2892 }
2893
da51bb9b 2894 return gv_stashpv(pkgname, 0);
def3634b 2895}
a0d0e21e 2896
e3f73d4e
RGS
2897/*
2898 * S_readpipe_override
2899 * Check whether readpipe() is overriden, and generates the appropriate
2900 * optree, provided sublex_start() is called afterwards.
2901 */
2902STATIC void
1d51329b 2903S_readpipe_override(pTHX)
e3f73d4e
RGS
2904{
2905 GV **gvp;
2906 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2907 yylval.ival = OP_BACKTICK;
2908 if ((gv_readpipe
2909 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2910 ||
2911 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2912 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2913 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2914 {
2915 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2916 append_elem(OP_LIST,
2917 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2918 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2919 }
2920 else {
2921 set_csh();
2922 }
2923}
2924
5db06880
NC
2925#ifdef PERL_MAD
2926 /*
2927 * Perl_madlex
2928 * The intent of this yylex wrapper is to minimize the changes to the
2929 * tokener when we aren't interested in collecting madprops. It remains
2930 * to be seen how successful this strategy will be...
2931 */
2932
2933int
2934Perl_madlex(pTHX)
2935{
2936 int optype;
2937 char *s = PL_bufptr;
2938
cd81e915
NC
2939 /* make sure PL_thiswhite is initialized */
2940 PL_thiswhite = 0;
2941 PL_thismad = 0;
5db06880 2942
cd81e915 2943 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2944 if (PL_pending_ident)
2945 return S_pending_ident(aTHX);
2946
2947 /* previous token ate up our whitespace? */
cd81e915
NC
2948 if (!PL_lasttoke && PL_nextwhite) {
2949 PL_thiswhite = PL_nextwhite;
2950 PL_nextwhite = 0;
5db06880
NC
2951 }
2952
2953 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2954 PL_realtokenstart = -1;
2955 PL_thistoken = 0;
5db06880
NC
2956 optype = yylex();
2957 s = PL_bufptr;
cd81e915 2958 assert(PL_curforce < 0);
5db06880 2959
cd81e915
NC
2960 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2961 if (!PL_thistoken) {
2962 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2963 PL_thistoken = newSVpvs("");
5db06880 2964 else {
c35e046a 2965 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2966 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2967 }
2968 }
cd81e915
NC
2969 if (PL_thismad) /* install head */
2970 CURMAD('X', PL_thistoken);
5db06880
NC
2971 }
2972
2973 /* last whitespace of a sublex? */
cd81e915
NC
2974 if (optype == ')' && PL_endwhite) {
2975 CURMAD('X', PL_endwhite);
5db06880
NC
2976 }
2977
cd81e915 2978 if (!PL_thismad) {
5db06880
NC
2979
2980 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2981 if (!PL_thiswhite && !PL_endwhite && !optype) {
2982 sv_free(PL_thistoken);
2983 PL_thistoken = 0;
5db06880
NC
2984 return 0;
2985 }
2986
2987 /* put off final whitespace till peg */
2988 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2989 PL_nextwhite = PL_thiswhite;
2990 PL_thiswhite = 0;
5db06880 2991 }
cd81e915
NC
2992 else if (PL_thisopen) {
2993 CURMAD('q', PL_thisopen);
2994 if (PL_thistoken)
2995 sv_free(PL_thistoken);
2996 PL_thistoken = 0;
5db06880
NC
2997 }
2998 else {
2999 /* Store actual token text as madprop X */
cd81e915 3000 CURMAD('X', PL_thistoken);
5db06880
NC
3001 }
3002
cd81e915 3003 if (PL_thiswhite) {
5db06880 3004 /* add preceding whitespace as madprop _ */
cd81e915 3005 CURMAD('_', PL_thiswhite);
5db06880
NC
3006 }
3007
cd81e915 3008 if (PL_thisstuff) {
5db06880 3009 /* add quoted material as madprop = */
cd81e915 3010 CURMAD('=', PL_thisstuff);
5db06880
NC
3011 }
3012
cd81e915 3013 if (PL_thisclose) {
5db06880 3014 /* add terminating quote as madprop Q */
cd81e915 3015 CURMAD('Q', PL_thisclose);
5db06880
NC
3016 }
3017 }
3018
3019 /* special processing based on optype */
3020
3021 switch (optype) {
3022
3023 /* opval doesn't need a TOKEN since it can already store mp */
3024 case WORD:
3025 case METHOD:
3026 case FUNCMETH:
3027 case THING:
3028 case PMFUNC:
3029 case PRIVATEREF:
3030 case FUNC0SUB:
3031 case UNIOPSUB:
3032 case LSTOPSUB:
3033 if (yylval.opval)
cd81e915
NC
3034 append_madprops(PL_thismad, yylval.opval, 0);
3035 PL_thismad = 0;
5db06880
NC
3036 return optype;
3037
3038 /* fake EOF */
3039 case 0:
3040 optype = PEG;
cd81e915
NC
3041 if (PL_endwhite) {
3042 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3043 PL_endwhite = 0;
5db06880
NC
3044 }
3045 break;
3046
3047 case ']':
3048 case '}':
cd81e915 3049 if (PL_faketokens)
5db06880
NC
3050 break;
3051 /* remember any fake bracket that lexer is about to discard */
3052 if (PL_lex_brackets == 1 &&
3053 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3054 {
3055 s = PL_bufptr;
3056 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3057 s++;
3058 if (*s == '}') {
cd81e915
NC
3059 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3060 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3061 PL_thiswhite = 0;
5db06880
NC
3062 PL_bufptr = s - 1;
3063 break; /* don't bother looking for trailing comment */
3064 }
3065 else
3066 s = PL_bufptr;
3067 }
3068 if (optype == ']')
3069 break;
3070 /* FALLTHROUGH */
3071
3072 /* attach a trailing comment to its statement instead of next token */
3073 case ';':
cd81e915 3074 if (PL_faketokens)
5db06880
NC
3075 break;
3076 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3077 s = PL_bufptr;
3078 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3079 s++;
3080 if (*s == '\n' || *s == '#') {
3081 while (s < PL_bufend && *s != '\n')
3082 s++;
3083 if (s < PL_bufend)
3084 s++;
cd81e915
NC
3085 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3086 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3087 PL_thiswhite = 0;
5db06880
NC
3088 PL_bufptr = s;
3089 }
3090 }
3091 break;
3092
3093 /* pval */
3094 case LABEL:
3095 break;
3096
3097 /* ival */
3098 default:
3099 break;
3100
3101 }
3102
3103 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3104 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3105 PL_thismad = 0;
5db06880
NC
3106 return optype;
3107}
3108#endif
3109
468aa647 3110STATIC char *
cc6ed77d 3111S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3112 dVAR;
468aa647
RGS
3113 if (PL_expect != XSTATE)
3114 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3115 is_use ? "use" : "no"));
29595ff2 3116 s = SKIPSPACE1(s);
468aa647
RGS
3117 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3118 s = force_version(s, TRUE);
29595ff2 3119 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3120 start_force(PL_curforce);
9ded7720 3121 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3122 force_next(WORD);
3123 }
3124 else if (*s == 'v') {
3125 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3126 s = force_version(s, FALSE);
3127 }
3128 }
3129 else {
3130 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3131 s = force_version(s, FALSE);
3132 }
3133 yylval.ival = is_use;
3134 return s;
3135}
748a9306 3136#ifdef DEBUGGING
27da23d5 3137 static const char* const exp_name[] =
09bef843 3138 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3139 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3140 };
748a9306 3141#endif
463ee0b2 3142
02aa26ce
NT
3143/*
3144 yylex
3145
3146 Works out what to call the token just pulled out of the input
3147 stream. The yacc parser takes care of taking the ops we return and
3148 stitching them into a tree.
3149
3150 Returns:
3151 PRIVATEREF
3152
3153 Structure:
3154 if read an identifier
3155 if we're in a my declaration
3156 croak if they tried to say my($foo::bar)
3157 build the ops for a my() declaration
3158 if it's an access to a my() variable
3159 are we in a sort block?
3160 croak if my($a); $a <=> $b
3161 build ops for access to a my() variable
3162 if in a dq string, and they've said @foo and we can't find @foo
3163 croak
3164 build ops for a bareword
3165 if we already built the token before, use it.
3166*/
3167
20141f0e 3168
dba4d153
JH
3169#ifdef __SC__
3170#pragma segment Perl_yylex
3171#endif
dba4d153 3172int
dba4d153 3173Perl_yylex(pTHX)
20141f0e 3174{
97aff369 3175 dVAR;
3afc138a 3176 register char *s = PL_bufptr;
378cc40b 3177 register char *d;
463ee0b2 3178 STRLEN len;
aa7440fb 3179 bool bof = FALSE;
a687059c 3180
10edeb5d
JH
3181 /* orig_keyword, gvp, and gv are initialized here because
3182 * jump to the label just_a_word_zero can bypass their
3183 * initialization later. */
3184 I32 orig_keyword = 0;
3185 GV *gv = NULL;
3186 GV **gvp = NULL;
3187
bbf60fe6 3188 DEBUG_T( {
396482e1 3189 SV* tmp = newSVpvs("");
b6007c36
DM
3190 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3191 (IV)CopLINE(PL_curcop),
3192 lex_state_names[PL_lex_state],
3193 exp_name[PL_expect],
3194 pv_display(tmp, s, strlen(s), 0, 60));
3195 SvREFCNT_dec(tmp);
bbf60fe6 3196 } );
02aa26ce 3197 /* check if there's an identifier for us to look at */
ba979b31 3198 if (PL_pending_ident)
bbf60fe6 3199 return REPORT(S_pending_ident(aTHX));
bbce6d69 3200
02aa26ce
NT
3201 /* no identifier pending identification */
3202
3280af22 3203 switch (PL_lex_state) {
79072805
LW
3204#ifdef COMMENTARY
3205 case LEX_NORMAL: /* Some compilers will produce faster */
3206 case LEX_INTERPNORMAL: /* code if we comment these out. */
3207 break;
3208#endif
3209
09bef843 3210 /* when we've already built the next token, just pull it out of the queue */
79072805 3211 case LEX_KNOWNEXT:
5db06880
NC
3212#ifdef PERL_MAD
3213 PL_lasttoke--;
3214 yylval = PL_nexttoke[PL_lasttoke].next_val;
3215 if (PL_madskills) {
cd81e915 3216 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3217 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3218 if (PL_thismad && PL_thismad->mad_key == '_') {
3219 PL_thiswhite = (SV*)PL_thismad->mad_val;
3220 PL_thismad->mad_val = 0;
3221 mad_free(PL_thismad);
3222 PL_thismad = 0;
5db06880
NC
3223 }
3224 }
3225 if (!PL_lasttoke) {
3226 PL_lex_state = PL_lex_defer;
3227 PL_expect = PL_lex_expect;
3228 PL_lex_defer = LEX_NORMAL;
3229 if (!PL_nexttoke[PL_lasttoke].next_type)
3230 return yylex();
3231 }
3232#else
3280af22 3233 PL_nexttoke--;
5db06880 3234 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3235 if (!PL_nexttoke) {
3236 PL_lex_state = PL_lex_defer;
3237 PL_expect = PL_lex_expect;
3238 PL_lex_defer = LEX_NORMAL;
463ee0b2 3239 }
5db06880
NC
3240#endif
3241#ifdef PERL_MAD
3242 /* FIXME - can these be merged? */
3243 return(PL_nexttoke[PL_lasttoke].next_type);
3244#else
bbf60fe6 3245 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3246#endif
79072805 3247
02aa26ce 3248 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3249 when we get here, PL_bufptr is at the \
02aa26ce 3250 */
79072805
LW
3251 case LEX_INTERPCASEMOD:
3252#ifdef DEBUGGING
3280af22 3253 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3254 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3255#endif
02aa26ce 3256 /* handle \E or end of string */
3280af22 3257 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3258 /* if at a \E */
3280af22 3259 if (PL_lex_casemods) {
f54cb97a 3260 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3261 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3262
3792a11b
NC
3263 if (PL_bufptr != PL_bufend
3264 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3265 PL_bufptr += 2;
3266 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3267#ifdef PERL_MAD
3268 if (PL_madskills)
6b29d1f5 3269 PL_thistoken = newSVpvs("\\E");
5db06880 3270#endif
a0d0e21e 3271 }
bbf60fe6 3272 return REPORT(')');
79072805 3273 }
5db06880
NC
3274#ifdef PERL_MAD
3275 while (PL_bufptr != PL_bufend &&
3276 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3277 if (!PL_thiswhite)
6b29d1f5 3278 PL_thiswhite = newSVpvs("");
cd81e915 3279 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3280 PL_bufptr += 2;
3281 }
3282#else
3280af22
NIS
3283 if (PL_bufptr != PL_bufend)
3284 PL_bufptr += 2;
5db06880 3285#endif
3280af22 3286 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3287 return yylex();
79072805
LW
3288 }
3289 else {
607df283 3290 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3291 "### Saw case modifier\n"); });
3280af22 3292 s = PL_bufptr + 1;
6e909404 3293 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3294#ifdef PERL_MAD
cd81e915 3295 if (!PL_thiswhite)
6b29d1f5 3296 PL_thiswhite = newSVpvs("");
cd81e915 3297 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3298#endif
89122651 3299 PL_bufptr = s + 3;
6e909404
JH
3300 PL_lex_state = LEX_INTERPCONCAT;
3301 return yylex();
a0d0e21e 3302 }
6e909404 3303 else {
90771dc0 3304 I32 tmp;
5db06880
NC
3305 if (!PL_madskills) /* when just compiling don't need correct */
3306 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3307 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3308 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3309 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3310 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3311 return REPORT(')');
6e909404
JH
3312 }
3313 if (PL_lex_casemods > 10)
3314 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3315 PL_lex_casestack[PL_lex_casemods++] = *s;
3316 PL_lex_casestack[PL_lex_casemods] = '\0';
3317 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3318 start_force(PL_curforce);
9ded7720 3319 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3320 force_next('(');
cd81e915 3321 start_force(PL_curforce);
6e909404 3322 if (*s == 'l')
9ded7720 3323 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3324 else if (*s == 'u')
9ded7720 3325 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3326 else if (*s == 'L')
9ded7720 3327 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3328 else if (*s == 'U')
9ded7720 3329 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3330 else if (*s == 'Q')
9ded7720 3331 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3332 else
3333 Perl_croak(aTHX_ "panic: yylex");
5db06880 3334 if (PL_madskills) {
6b29d1f5 3335 SV* const tmpsv = newSVpvs("");
5db06880
NC
3336 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3337 curmad('_', tmpsv);
3338 }
6e909404 3339 PL_bufptr = s + 1;
a0d0e21e 3340 }
79072805 3341 force_next(FUNC);
3280af22
NIS
3342 if (PL_lex_starts) {
3343 s = PL_bufptr;
3344 PL_lex_starts = 0;
5db06880
NC
3345#ifdef PERL_MAD
3346 if (PL_madskills) {
cd81e915
NC
3347 if (PL_thistoken)
3348 sv_free(PL_thistoken);
6b29d1f5 3349 PL_thistoken = newSVpvs("");
5db06880
NC
3350 }
3351#endif
131b3ad0
DM
3352 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3353 if (PL_lex_casemods == 1 && PL_lex_inpat)
3354 OPERATOR(',');
3355 else
3356 Aop(OP_CONCAT);
79072805
LW
3357 }
3358 else
cea2e8a9 3359 return yylex();
79072805
LW
3360 }
3361
55497cff 3362 case LEX_INTERPPUSH:
bbf60fe6 3363 return REPORT(sublex_push());
55497cff 3364
79072805 3365 case LEX_INTERPSTART:
3280af22 3366 if (PL_bufptr == PL_bufend)
bbf60fe6 3367 return REPORT(sublex_done());
607df283 3368 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3369 "### Interpolated variable\n"); });
3280af22
NIS
3370 PL_expect = XTERM;
3371 PL_lex_dojoin = (*PL_bufptr == '@');
3372 PL_lex_state = LEX_INTERPNORMAL;
3373 if (PL_lex_dojoin) {
cd81e915 3374 start_force(PL_curforce);
9ded7720 3375 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3376 force_next(',');
cd81e915 3377 start_force(PL_curforce);
a0d0e21e 3378 force_ident("\"", '$');
cd81e915 3379 start_force(PL_curforce);
9ded7720 3380 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3381 force_next('$');
cd81e915 3382 start_force(PL_curforce);
9ded7720 3383 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3384 force_next('(');
cd81e915 3385 start_force(PL_curforce);
9ded7720 3386 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3387 force_next(FUNC);
3388 }
3280af22
NIS
3389 if (PL_lex_starts++) {
3390 s = PL_bufptr;
5db06880
NC
3391#ifdef PERL_MAD
3392 if (PL_madskills) {
cd81e915
NC
3393 if (PL_thistoken)
3394 sv_free(PL_thistoken);
6b29d1f5 3395 PL_thistoken = newSVpvs("");
5db06880
NC
3396 }
3397#endif
131b3ad0
DM
3398 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3399 if (!PL_lex_casemods && PL_lex_inpat)
3400 OPERATOR(',');
3401 else
3402 Aop(OP_CONCAT);
79072805 3403 }
cea2e8a9 3404 return yylex();
79072805
LW
3405
3406 case LEX_INTERPENDMAYBE:
3280af22
NIS
3407 if (intuit_more(PL_bufptr)) {
3408 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3409 break;
3410 }
3411 /* FALL THROUGH */
3412
3413 case LEX_INTERPEND:
3280af22
NIS
3414 if (PL_lex_dojoin) {
3415 PL_lex_dojoin = FALSE;
3416 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3417#ifdef PERL_MAD
3418 if (PL_madskills) {
cd81e915
NC
3419 if (PL_thistoken)
3420 sv_free(PL_thistoken);
6b29d1f5 3421 PL_thistoken = newSVpvs("");
5db06880
NC
3422 }
3423#endif
bbf60fe6 3424 return REPORT(')');
79072805 3425 }
43a16006 3426 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3427 && SvEVALED(PL_lex_repl))
43a16006 3428 {
e9fa98b2 3429 if (PL_bufptr != PL_bufend)
cea2e8a9 3430 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3431 PL_lex_repl = NULL;
e9fa98b2 3432 }
79072805
LW
3433 /* FALLTHROUGH */
3434 case LEX_INTERPCONCAT:
3435#ifdef DEBUGGING
3280af22 3436 if (PL_lex_brackets)
cea2e8a9 3437 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3438#endif
3280af22 3439 if (PL_bufptr == PL_bufend)
bbf60fe6 3440 return REPORT(sublex_done());
79072805 3441
3280af22
NIS
3442 if (SvIVX(PL_linestr) == '\'') {
3443 SV *sv = newSVsv(PL_linestr);
3444 if (!PL_lex_inpat)
76e3520e 3445 sv = tokeq(sv);
3280af22 3446 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3447 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3448 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3449 s = PL_bufend;
79072805
LW
3450 }
3451 else {
3280af22 3452 s = scan_const(PL_bufptr);
79072805 3453 if (*s == '\\')
3280af22 3454 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3455 else
3280af22 3456 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3457 }
3458
3280af22 3459 if (s != PL_bufptr) {
cd81e915 3460 start_force(PL_curforce);
5db06880
NC
3461 if (PL_madskills) {
3462 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3463 }
9ded7720 3464 NEXTVAL_NEXTTOKE = yylval;
3280af22 3465 PL_expect = XTERM;
79072805 3466 force_next(THING);
131b3ad0 3467 if (PL_lex_starts++) {
5db06880
NC
3468#ifdef PERL_MAD
3469 if (PL_madskills) {
cd81e915
NC
3470 if (PL_thistoken)
3471 sv_free(PL_thistoken);
6b29d1f5 3472 PL_thistoken = newSVpvs("");
5db06880
NC
3473 }
3474#endif
131b3ad0
DM
3475 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3476 if (!PL_lex_casemods && PL_lex_inpat)
3477 OPERATOR(',');
3478 else
3479 Aop(OP_CONCAT);
3480 }
79072805 3481 else {
3280af22 3482 PL_bufptr = s;
cea2e8a9 3483 return yylex();
79072805
LW
3484 }
3485 }
3486
cea2e8a9 3487 return yylex();
a0d0e21e 3488 case LEX_FORMLINE:
3280af22
NIS
3489 PL_lex_state = LEX_NORMAL;
3490 s = scan_formline(PL_bufptr);
3491 if (!PL_lex_formbrack)
a0d0e21e
LW
3492 goto rightbracket;
3493 OPERATOR(';');
79072805
LW
3494 }
3495
3280af22
NIS
3496 s = PL_bufptr;
3497 PL_oldoldbufptr = PL_oldbufptr;
3498 PL_oldbufptr = s;
463ee0b2
LW
3499
3500 retry:
5db06880 3501#ifdef PERL_MAD
cd81e915
NC
3502 if (PL_thistoken) {
3503 sv_free(PL_thistoken);
3504 PL_thistoken = 0;
5db06880 3505 }
cd81e915 3506 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3507#endif
378cc40b
LW
3508 switch (*s) {
3509 default:
7e2040f0 3510 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3511 goto keylookup;
cea2e8a9 3512 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3513 case 4:
3514 case 26:
3515 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3516 case 0:
5db06880
NC
3517#ifdef PERL_MAD
3518 if (PL_madskills)
cd81e915 3519 PL_faketokens = 0;
5db06880 3520#endif
3280af22
NIS
3521 if (!PL_rsfp) {
3522 PL_last_uni = 0;
3523 PL_last_lop = 0;
c5ee2135 3524 if (PL_lex_brackets) {
10edeb5d
JH
3525 yyerror((const char *)
3526 (PL_lex_formbrack
3527 ? "Format not terminated"
3528 : "Missing right curly or square bracket"));
c5ee2135 3529 }
4e553d73 3530 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3531 "### Tokener got EOF\n");
5f80b19c 3532 } );
79072805 3533 TOKEN(0);
463ee0b2 3534 }
3280af22 3535 if (s++ < PL_bufend)
a687059c 3536 goto retry; /* ignore stray nulls */
3280af22
NIS
3537 PL_last_uni = 0;
3538 PL_last_lop = 0;
3539 if (!PL_in_eval && !PL_preambled) {
3540 PL_preambled = TRUE;
5db06880
NC
3541#ifdef PERL_MAD
3542 if (PL_madskills)
cd81e915 3543 PL_faketokens = 1;
5db06880 3544#endif
3280af22
NIS
3545 sv_setpv(PL_linestr,incl_perldb());
3546 if (SvCUR(PL_linestr))
396482e1 3547 sv_catpvs(PL_linestr,";");
3280af22
NIS
3548 if (PL_preambleav){
3549 while(AvFILLp(PL_preambleav) >= 0) {