This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Perl_ck_smartmatch needs a dVAR too.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 52#define PL_linestr (PL_parser->linestr)
c2598295
DM
53#define PL_expect (PL_parser->expect)
54#define PL_copline (PL_parser->copline)
f06b5848
DM
55#define PL_bufptr (PL_parser->bufptr)
56#define PL_oldbufptr (PL_parser->oldbufptr)
57#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
58#define PL_linestart (PL_parser->linestart)
59#define PL_bufend (PL_parser->bufend)
60#define PL_last_uni (PL_parser->last_uni)
61#define PL_last_lop (PL_parser->last_lop)
62#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 63#define PL_lex_state (PL_parser->lex_state)
2f9285f8 64#define PL_rsfp (PL_parser->rsfp)
5486870f 65#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
66#define PL_in_my (PL_parser->in_my)
67#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 68#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 69#define PL_multi_end (PL_parser->multi_end)
13765c85 70#define PL_error_count (PL_parser->error_count)
199e78b7
DM
71
72#ifdef PERL_MAD
73# define PL_endwhite (PL_parser->endwhite)
74# define PL_faketokens (PL_parser->faketokens)
75# define PL_lasttoke (PL_parser->lasttoke)
76# define PL_nextwhite (PL_parser->nextwhite)
77# define PL_realtokenstart (PL_parser->realtokenstart)
78# define PL_skipwhite (PL_parser->skipwhite)
79# define PL_thisclose (PL_parser->thisclose)
80# define PL_thismad (PL_parser->thismad)
81# define PL_thisopen (PL_parser->thisopen)
82# define PL_thisstuff (PL_parser->thisstuff)
83# define PL_thistoken (PL_parser->thistoken)
84# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
85# define PL_thiswhite (PL_parser->thiswhite)
86# define PL_nexttoke (PL_parser->nexttoke)
87# define PL_curforce (PL_parser->curforce)
88#else
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_nexttype (PL_parser->nexttype)
91# define PL_nextval (PL_parser->nextval)
199e78b7
DM
92#endif
93
3cbf51f5
DM
94static int
95S_pending_ident(pTHX);
199e78b7 96
0bd48802 97static const char ident_too_long[] = "Identifier too long";
c445ea15 98static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 99
6e3aabd6 100#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
101static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
102static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 103#endif
51371543 104
29595ff2 105#ifdef PERL_MAD
29595ff2 106# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 107# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 108#else
5db06880 109# define CURMAD(slot,sv)
9ded7720 110# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
111#endif
112
9059aa12
LW
113#define XFAKEBRACK 128
114#define XENUMMASK 127
115
39e02b42
JH
116#ifdef USE_UTF8_SCRIPTS
117# define UTF (!IN_BYTES)
2b9d42f0 118#else
746b446a 119# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 120#endif
a0ed51b3 121
61f0cdd9 122/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
123 * 1999-02-27 mjd-perl-patch@plover.com */
124#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
125
bf4acbe4
GS
126/* On MacOS, respect nonbreaking spaces */
127#ifdef MACOS_TRADITIONAL
128#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
129#else
130#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
131#endif
132
ffb4593c
NT
133/* LEX_* are values for PL_lex_state, the state of the lexer.
134 * They are arranged oddly so that the guard on the switch statement
79072805
LW
135 * can get by with a single comparison (if the compiler is smart enough).
136 */
137
fb73857a 138/* #define LEX_NOTPARSING 11 is done in perl.h. */
139
b6007c36
DM
140#define LEX_NORMAL 10 /* normal code (ie not within "...") */
141#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
142#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
143#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
144#define LEX_INTERPSTART 6 /* expecting the start of a $var */
145
146 /* at end of code, eg "$x" followed by: */
147#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
148#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
149
150#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
151 string or after \E, $foo, etc */
152#define LEX_INTERPCONST 2 /* NOT USED */
153#define LEX_FORMLINE 1 /* expecting a format line */
154#define LEX_KNOWNEXT 0 /* next token known; just return it */
155
79072805 156
bbf60fe6 157#ifdef DEBUGGING
27da23d5 158static const char* const lex_state_names[] = {
bbf60fe6
DM
159 "KNOWNEXT",
160 "FORMLINE",
161 "INTERPCONST",
162 "INTERPCONCAT",
163 "INTERPENDMAYBE",
164 "INTERPEND",
165 "INTERPSTART",
166 "INTERPPUSH",
167 "INTERPCASEMOD",
168 "INTERPNORMAL",
169 "NORMAL"
170};
171#endif
172
79072805
LW
173#ifdef ff_next
174#undef ff_next
d48672a2
LW
175#endif
176
79072805 177#include "keywords.h"
fe14fcc3 178
ffb4593c
NT
179/* CLINE is a macro that ensures PL_copline has a sane value */
180
ae986130
LW
181#ifdef CLINE
182#undef CLINE
183#endif
57843af0 184#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 185
5db06880 186#ifdef PERL_MAD
29595ff2
NC
187# define SKIPSPACE0(s) skipspace0(s)
188# define SKIPSPACE1(s) skipspace1(s)
189# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
190# define PEEKSPACE(s) skipspace2(s,0)
191#else
192# define SKIPSPACE0(s) skipspace(s)
193# define SKIPSPACE1(s) skipspace(s)
194# define SKIPSPACE2(s,tsv) skipspace(s)
195# define PEEKSPACE(s) skipspace(s)
196#endif
197
ffb4593c
NT
198/*
199 * Convenience functions to return different tokens and prime the
9cbb5ea2 200 * lexer for the next token. They all take an argument.
ffb4593c
NT
201 *
202 * TOKEN : generic token (used for '(', DOLSHARP, etc)
203 * OPERATOR : generic operator
204 * AOPERATOR : assignment operator
205 * PREBLOCK : beginning the block after an if, while, foreach, ...
206 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
207 * PREREF : *EXPR where EXPR is not a simple identifier
208 * TERM : expression term
209 * LOOPX : loop exiting command (goto, last, dump, etc)
210 * FTST : file test operator
211 * FUN0 : zero-argument function
2d2e263d 212 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
213 * BOop : bitwise or or xor
214 * BAop : bitwise and
215 * SHop : shift operator
216 * PWop : power operator
9cbb5ea2 217 * PMop : pattern-matching operator
ffb4593c
NT
218 * Aop : addition-level operator
219 * Mop : multiplication-level operator
220 * Eop : equality-testing operator
e5edeb50 221 * Rop : relational operator <= != gt
ffb4593c
NT
222 *
223 * Also see LOP and lop() below.
224 */
225
998054bd 226#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 227# define REPORT(retval) tokereport((I32)retval)
998054bd 228#else
bbf60fe6 229# define REPORT(retval) (retval)
998054bd
SC
230#endif
231
bbf60fe6
DM
232#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
233#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
234#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
235#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
236#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
237#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
238#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
239#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
240#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
241#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
242#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
243#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
244#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
245#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
246#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
247#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
248#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
249#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
250#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
251#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 252
a687059c
LW
253/* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
a687059c 257 */
376fcdbf
AL
258#define UNI2(f,x) { \
259 yylval.ival = f; \
260 PL_expect = x; \
261 PL_bufptr = s; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = f; \
264 if (*s == '(') \
265 return REPORT( (int)FUNC1 ); \
29595ff2 266 s = PEEKSPACE(s); \
376fcdbf
AL
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268 }
6f33ba73
RGS
269#define UNI(f) UNI2(f,XTERM)
270#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 271
376fcdbf
AL
272#define UNIBRACK(f) { \
273 yylval.ival = f; \
274 PL_bufptr = s; \
275 PL_last_uni = PL_oldbufptr; \
276 if (*s == '(') \
277 return REPORT( (int)FUNC1 ); \
29595ff2 278 s = PEEKSPACE(s); \
376fcdbf
AL
279 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
280 }
79072805 281
9f68db38 282/* grandfather return to old style */
3280af22 283#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 284
8fa7f367
JH
285#ifdef DEBUGGING
286
bbf60fe6
DM
287/* how to interpret the yylval associated with the token */
288enum token_type {
289 TOKENTYPE_NONE,
290 TOKENTYPE_IVAL,
291 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
292 TOKENTYPE_PVAL,
293 TOKENTYPE_OPVAL,
294 TOKENTYPE_GVVAL
295};
296
6d4a66ac
NC
297static struct debug_tokens {
298 const int token;
299 enum token_type type;
300 const char *name;
301} const debug_tokens[] =
9041c2e3 302{
bbf60fe6
DM
303 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
304 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
305 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
306 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
307 { ARROW, TOKENTYPE_NONE, "ARROW" },
308 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
309 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
310 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
311 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
312 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 313 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
314 { DO, TOKENTYPE_NONE, "DO" },
315 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
316 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
317 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
318 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
319 { ELSE, TOKENTYPE_NONE, "ELSE" },
320 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
321 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
322 { FOR, TOKENTYPE_IVAL, "FOR" },
323 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
324 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
325 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
326 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
327 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
328 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 329 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
330 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
331 { IF, TOKENTYPE_IVAL, "IF" },
332 { LABEL, TOKENTYPE_PVAL, "LABEL" },
333 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
334 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
335 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
336 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
337 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
338 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
339 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
340 { MY, TOKENTYPE_IVAL, "MY" },
341 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
342 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
343 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
344 { OROP, TOKENTYPE_IVAL, "OROP" },
345 { OROR, TOKENTYPE_NONE, "OROR" },
346 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
347 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
348 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
349 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
350 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
351 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
352 { PREINC, TOKENTYPE_NONE, "PREINC" },
353 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
354 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
355 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
356 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
357 { SUB, TOKENTYPE_NONE, "SUB" },
358 { THING, TOKENTYPE_OPVAL, "THING" },
359 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
360 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
361 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
362 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
363 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
364 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 365 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
366 { WHILE, TOKENTYPE_IVAL, "WHILE" },
367 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 368 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
369};
370
371/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 372
bbf60fe6 373STATIC int
f5bd084c 374S_tokereport(pTHX_ I32 rv)
bbf60fe6 375{
97aff369 376 dVAR;
bbf60fe6 377 if (DEBUG_T_TEST) {
bd61b366 378 const char *name = NULL;
bbf60fe6 379 enum token_type type = TOKENTYPE_NONE;
f54cb97a 380 const struct debug_tokens *p;
396482e1 381 SV* const report = newSVpvs("<== ");
bbf60fe6 382
f54cb97a 383 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
384 if (p->token == (int)rv) {
385 name = p->name;
386 type = p->type;
387 break;
388 }
389 }
390 if (name)
54667de8 391 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
392 else if ((char)rv > ' ' && (char)rv < '~')
393 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
394 else if (!rv)
396482e1 395 sv_catpvs(report, "EOF");
bbf60fe6
DM
396 else
397 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
398 switch (type) {
399 case TOKENTYPE_NONE:
400 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
401 break;
402 case TOKENTYPE_IVAL:
e4584336 403 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
404 break;
405 case TOKENTYPE_OPNUM:
406 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
407 PL_op_name[yylval.ival]);
408 break;
409 case TOKENTYPE_PVAL:
410 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
411 break;
412 case TOKENTYPE_OPVAL:
b6007c36 413 if (yylval.opval) {
401441c0 414 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 415 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
416 if (yylval.opval->op_type == OP_CONST) {
417 Perl_sv_catpvf(aTHX_ report, " %s",
418 SvPEEK(cSVOPx_sv(yylval.opval)));
419 }
420
421 }
401441c0 422 else
396482e1 423 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
424 break;
425 }
b6007c36 426 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
427 };
428 return (int)rv;
998054bd
SC
429}
430
b6007c36
DM
431
432/* print the buffer with suitable escapes */
433
434STATIC void
435S_printbuf(pTHX_ const char* fmt, const char* s)
436{
396482e1 437 SV* const tmp = newSVpvs("");
b6007c36
DM
438 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
439 SvREFCNT_dec(tmp);
440}
441
8fa7f367
JH
442#endif
443
ffb4593c
NT
444/*
445 * S_ao
446 *
c963b151
BD
447 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
448 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
449 */
450
76e3520e 451STATIC int
cea2e8a9 452S_ao(pTHX_ int toketype)
a0d0e21e 453{
97aff369 454 dVAR;
3280af22
NIS
455 if (*PL_bufptr == '=') {
456 PL_bufptr++;
a0d0e21e
LW
457 if (toketype == ANDAND)
458 yylval.ival = OP_ANDASSIGN;
459 else if (toketype == OROR)
460 yylval.ival = OP_ORASSIGN;
c963b151
BD
461 else if (toketype == DORDOR)
462 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
463 toketype = ASSIGNOP;
464 }
465 return toketype;
466}
467
ffb4593c
NT
468/*
469 * S_no_op
470 * When Perl expects an operator and finds something else, no_op
471 * prints the warning. It always prints "<something> found where
472 * operator expected. It prints "Missing semicolon on previous line?"
473 * if the surprise occurs at the start of the line. "do you need to
474 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
475 * where the compiler doesn't know if foo is a method call or a function.
476 * It prints "Missing operator before end of line" if there's nothing
477 * after the missing operator, or "... before <...>" if there is something
478 * after the missing operator.
479 */
480
76e3520e 481STATIC void
bfed75c6 482S_no_op(pTHX_ const char *what, char *s)
463ee0b2 483{
97aff369 484 dVAR;
9d4ba2ae
AL
485 char * const oldbp = PL_bufptr;
486 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 487
1189a94a
GS
488 if (!s)
489 s = oldbp;
07c798fb 490 else
1189a94a 491 PL_bufptr = s;
cea2e8a9 492 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
493 if (ckWARN_d(WARN_SYNTAX)) {
494 if (is_first)
495 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
496 "\t(Missing semicolon on previous line?)\n");
497 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 498 const char *t;
c35e046a
AL
499 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
500 NOOP;
56da5a46
RGS
501 if (t < PL_bufptr && isSPACE(*t))
502 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
503 "\t(Do you need to predeclare %.*s?)\n",
551405c4 504 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
505 }
506 else {
507 assert(s >= oldbp);
508 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 509 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 510 }
07c798fb 511 }
3280af22 512 PL_bufptr = oldbp;
8990e307
LW
513}
514
ffb4593c
NT
515/*
516 * S_missingterm
517 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 518 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
519 * If we're in a delimited string and the delimiter is a control
520 * character, it's reformatted into a two-char sequence like ^C.
521 * This is fatal.
522 */
523
76e3520e 524STATIC void
cea2e8a9 525S_missingterm(pTHX_ char *s)
8990e307 526{
97aff369 527 dVAR;
8990e307
LW
528 char tmpbuf[3];
529 char q;
530 if (s) {
9d4ba2ae 531 char * const nl = strrchr(s,'\n');
d2719217 532 if (nl)
8990e307
LW
533 *nl = '\0';
534 }
9d116dd7
JH
535 else if (
536#ifdef EBCDIC
537 iscntrl(PL_multi_close)
538#else
539 PL_multi_close < 32 || PL_multi_close == 127
540#endif
541 ) {
8990e307 542 *tmpbuf = '^';
585ec06d 543 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
544 tmpbuf[2] = '\0';
545 s = tmpbuf;
546 }
547 else {
eb160463 548 *tmpbuf = (char)PL_multi_close;
8990e307
LW
549 tmpbuf[1] = '\0';
550 s = tmpbuf;
551 }
552 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 553 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 554}
79072805 555
ef89dcc3 556#define FEATURE_IS_ENABLED(name) \
0d863452 557 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 558 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
559/*
560 * S_feature_is_enabled
561 * Check whether the named feature is enabled.
562 */
563STATIC bool
d4c19fe8 564S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 565{
97aff369 566 dVAR;
0d863452 567 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 568 char he_name[32] = "feature_";
6fca0082 569 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 570
7b9ef140 571 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
572}
573
ffb4593c
NT
574/*
575 * Perl_deprecate
ffb4593c
NT
576 */
577
79072805 578void
bfed75c6 579Perl_deprecate(pTHX_ const char *s)
a0d0e21e 580{
599cee73 581 if (ckWARN(WARN_DEPRECATED))
9014280d 582 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
583}
584
12bcd1a6 585void
bfed75c6 586Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
587{
588 /* This function should NOT be called for any new deprecated warnings */
589 /* Use Perl_deprecate instead */
590 /* */
591 /* It is here to maintain backward compatibility with the pre-5.8 */
592 /* warnings category hierarchy. The "deprecated" category used to */
593 /* live under the "syntax" category. It is now a top-level category */
594 /* in its own right. */
595
596 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 597 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
598 "Use of %s is deprecated", s);
599}
600
ffb4593c 601/*
9cbb5ea2
GS
602 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
603 * utf16-to-utf8-reversed.
ffb4593c
NT
604 */
605
c39cd008
GS
606#ifdef PERL_CR_FILTER
607static void
608strip_return(SV *sv)
609{
95a20fc0 610 register const char *s = SvPVX_const(sv);
9d4ba2ae 611 register const char * const e = s + SvCUR(sv);
c39cd008
GS
612 /* outer loop optimized to do nothing if there are no CR-LFs */
613 while (s < e) {
614 if (*s++ == '\r' && *s == '\n') {
615 /* hit a CR-LF, need to copy the rest */
616 register char *d = s - 1;
617 *d++ = *s++;
618 while (s < e) {
619 if (*s == '\r' && s[1] == '\n')
620 s++;
621 *d++ = *s++;
622 }
623 SvCUR(sv) -= s - d;
624 return;
625 }
626 }
627}
a868473f 628
76e3520e 629STATIC I32
c39cd008 630S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 631{
f54cb97a 632 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
633 if (count > 0 && !maxlen)
634 strip_return(sv);
635 return count;
a868473f
NIS
636}
637#endif
638
199e78b7
DM
639
640
ffb4593c
NT
641/*
642 * Perl_lex_start
5486870f 643 *
e3abe207 644 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
645 *
646 * rsfp is the opened file handle to read from (if any),
647 *
648 * line holds any initial content already read from the file (or in
649 * the case of no file, such as an eval, the whole contents);
650 *
651 * new_filter indicates that this is a new file and it shouldn't inherit
652 * the filters from the current parser (ie require).
ffb4593c
NT
653 */
654
a0d0e21e 655void
5486870f 656Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 657{
97aff369 658 dVAR;
6ef55633 659 const char *s = NULL;
8990e307 660 STRLEN len;
5486870f 661 yy_parser *parser, *oparser;
acdf0a21
DM
662
663 /* create and initialise a parser */
664
199e78b7 665 Newxz(parser, 1, yy_parser);
5486870f 666 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
667 PL_parser = parser;
668
669 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
670 parser->ps = parser->stack;
671 parser->stack_size = YYINITDEPTH;
672
673 parser->stack->state = 0;
674 parser->yyerrstatus = 0;
675 parser->yychar = YYEMPTY; /* Cause a token to be read. */
676
e3abe207
DM
677 /* on scope exit, free this parser and restore any outer one */
678 SAVEPARSER(parser);
7c4baf47 679 parser->saved_curcop = PL_curcop;
e3abe207 680
acdf0a21 681 /* initialise lexer state */
8990e307 682
fb205e7a
DM
683#ifdef PERL_MAD
684 parser->curforce = -1;
685#else
686 parser->nexttoke = 0;
687#endif
c2598295 688 parser->copline = NOLINE;
5afb0a62 689 parser->lex_state = LEX_NORMAL;
c2598295 690 parser->expect = XSTATE;
2f9285f8 691 parser->rsfp = rsfp;
56b27c9a 692 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
5486870f 693 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
2f9285f8 694
199e78b7
DM
695 Newx(parser->lex_brackstack, 120, char);
696 Newx(parser->lex_casestack, 12, char);
697 *parser->lex_casestack = '\0';
02b34bbe 698
10efb74f
NC
699 if (line) {
700 s = SvPV_const(line, len);
701 } else {
702 len = 0;
703 }
bdc0bf6f 704
10efb74f 705 if (!len) {
bdc0bf6f 706 parser->linestr = newSVpvs("\n;");
10efb74f 707 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 708 parser->linestr = newSVsv(line);
10efb74f 709 if (s[len-1] != ';')
bdc0bf6f 710 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
711 } else {
712 SvTEMP_off(line);
713 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 714 parser->linestr = line;
8990e307 715 }
f06b5848
DM
716 parser->oldoldbufptr =
717 parser->oldbufptr =
718 parser->bufptr =
719 parser->linestart = SvPVX(parser->linestr);
720 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
721 parser->last_lop = parser->last_uni = NULL;
79072805 722}
a687059c 723
e3abe207
DM
724
725/* delete a parser object */
726
727void
728Perl_parser_free(pTHX_ const yy_parser *parser)
729{
7c4baf47 730 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
731 SvREFCNT_dec(parser->linestr);
732
2f9285f8
DM
733 if (parser->rsfp == PerlIO_stdin())
734 PerlIO_clearerr(parser->rsfp);
735 else if (parser->rsfp && parser->old_parser
736 && parser->rsfp != parser->old_parser->rsfp)
737 PerlIO_close(parser->rsfp);
5486870f 738 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 739
e3abe207
DM
740 Safefree(parser->stack);
741 Safefree(parser->lex_brackstack);
742 Safefree(parser->lex_casestack);
743 PL_parser = parser->old_parser;
744 Safefree(parser);
745}
746
747
ffb4593c
NT
748/*
749 * Perl_lex_end
9cbb5ea2
GS
750 * Finalizer for lexing operations. Must be called when the parser is
751 * done with the lexer.
ffb4593c
NT
752 */
753
463ee0b2 754void
864dbfa3 755Perl_lex_end(pTHX)
463ee0b2 756{
97aff369 757 dVAR;
3280af22 758 PL_doextract = FALSE;
463ee0b2
LW
759}
760
ffb4593c
NT
761/*
762 * S_incline
763 * This subroutine has nothing to do with tilting, whether at windmills
764 * or pinball tables. Its name is short for "increment line". It
57843af0 765 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 766 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
767 * # line 500 "foo.pm"
768 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
769 */
770
76e3520e 771STATIC void
d9095cec 772S_incline(pTHX_ const char *s)
463ee0b2 773{
97aff369 774 dVAR;
d9095cec
NC
775 const char *t;
776 const char *n;
777 const char *e;
463ee0b2 778
57843af0 779 CopLINE_inc(PL_curcop);
463ee0b2
LW
780 if (*s++ != '#')
781 return;
d4c19fe8
AL
782 while (SPACE_OR_TAB(*s))
783 s++;
73659bf1
GS
784 if (strnEQ(s, "line", 4))
785 s += 4;
786 else
787 return;
084592ab 788 if (SPACE_OR_TAB(*s))
73659bf1 789 s++;
4e553d73 790 else
73659bf1 791 return;
d4c19fe8
AL
792 while (SPACE_OR_TAB(*s))
793 s++;
463ee0b2
LW
794 if (!isDIGIT(*s))
795 return;
d4c19fe8 796
463ee0b2
LW
797 n = s;
798 while (isDIGIT(*s))
799 s++;
bf4acbe4 800 while (SPACE_OR_TAB(*s))
463ee0b2 801 s++;
73659bf1 802 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 803 s++;
73659bf1
GS
804 e = t + 1;
805 }
463ee0b2 806 else {
c35e046a
AL
807 t = s;
808 while (!isSPACE(*t))
809 t++;
73659bf1 810 e = t;
463ee0b2 811 }
bf4acbe4 812 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
813 e++;
814 if (*e != '\n' && *e != '\0')
815 return; /* false alarm */
816
f4dd75d9 817 if (t - s > 0) {
d9095cec 818 const STRLEN len = t - s;
8a5ee598 819#ifndef USE_ITHREADS
c4420975 820 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
821 STRLEN tmplen = cf ? strlen(cf) : 0;
822 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
823 /* must copy *{"::_<(eval N)[oldfilename:L]"}
824 * to *{"::_<newfilename"} */
44867030
NC
825 /* However, the long form of evals is only turned on by the
826 debugger - usually they're "(eval %lu)" */
827 char smallbuf[128];
828 char *tmpbuf;
829 GV **gvp;
d9095cec 830 STRLEN tmplen2 = len;
798b63bc 831 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
832 tmpbuf = smallbuf;
833 else
2ae0db35 834 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
835 tmpbuf[0] = '_';
836 tmpbuf[1] = '<';
2ae0db35 837 memcpy(tmpbuf + 2, cf, tmplen);
44867030 838 tmplen += 2;
8a5ee598
RGS
839 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
840 if (gvp) {
44867030
NC
841 char *tmpbuf2;
842 GV *gv2;
843
844 if (tmplen2 + 2 <= sizeof smallbuf)
845 tmpbuf2 = smallbuf;
846 else
847 Newx(tmpbuf2, tmplen2 + 2, char);
848
849 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
850 /* Either they malloc'd it, or we malloc'd it,
851 so no prefix is present in ours. */
852 tmpbuf2[0] = '_';
853 tmpbuf2[1] = '<';
854 }
855
856 memcpy(tmpbuf2 + 2, s, tmplen2);
857 tmplen2 += 2;
858
8a5ee598 859 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 860 if (!isGV(gv2)) {
8a5ee598 861 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
862 /* adjust ${"::_<newfilename"} to store the new file name */
863 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
864 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
865 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
866 }
44867030
NC
867
868 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 869 }
e66cf94c 870 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 871 }
8a5ee598 872#endif
05ec9bb3 873 CopFILE_free(PL_curcop);
d9095cec 874 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 875 }
57843af0 876 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
877}
878
29595ff2 879#ifdef PERL_MAD
cd81e915 880/* skip space before PL_thistoken */
29595ff2
NC
881
882STATIC char *
883S_skipspace0(pTHX_ register char *s)
884{
885 s = skipspace(s);
886 if (!PL_madskills)
887 return s;
cd81e915
NC
888 if (PL_skipwhite) {
889 if (!PL_thiswhite)
6b29d1f5 890 PL_thiswhite = newSVpvs("");
cd81e915
NC
891 sv_catsv(PL_thiswhite, PL_skipwhite);
892 sv_free(PL_skipwhite);
893 PL_skipwhite = 0;
894 }
895 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
896 return s;
897}
898
cd81e915 899/* skip space after PL_thistoken */
29595ff2
NC
900
901STATIC char *
902S_skipspace1(pTHX_ register char *s)
903{
d4c19fe8 904 const char *start = s;
29595ff2
NC
905 I32 startoff = start - SvPVX(PL_linestr);
906
907 s = skipspace(s);
908 if (!PL_madskills)
909 return s;
910 start = SvPVX(PL_linestr) + startoff;
cd81e915 911 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 912 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
913 PL_thistoken = newSVpvn(tstart, start - tstart);
914 }
915 PL_realtokenstart = -1;
916 if (PL_skipwhite) {
917 if (!PL_nextwhite)
6b29d1f5 918 PL_nextwhite = newSVpvs("");
cd81e915
NC
919 sv_catsv(PL_nextwhite, PL_skipwhite);
920 sv_free(PL_skipwhite);
921 PL_skipwhite = 0;
29595ff2
NC
922 }
923 return s;
924}
925
926STATIC char *
927S_skipspace2(pTHX_ register char *s, SV **svp)
928{
c35e046a
AL
929 char *start;
930 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
931 const I32 startoff = s - SvPVX(PL_linestr);
932
29595ff2
NC
933 s = skipspace(s);
934 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
935 if (!PL_madskills || !svp)
936 return s;
937 start = SvPVX(PL_linestr) + startoff;
cd81e915 938 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 939 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
940 PL_thistoken = newSVpvn(tstart, start - tstart);
941 PL_realtokenstart = -1;
29595ff2 942 }
cd81e915 943 if (PL_skipwhite) {
29595ff2 944 if (!*svp)
6b29d1f5 945 *svp = newSVpvs("");
cd81e915
NC
946 sv_setsv(*svp, PL_skipwhite);
947 sv_free(PL_skipwhite);
948 PL_skipwhite = 0;
29595ff2
NC
949 }
950
951 return s;
952}
953#endif
954
80a702cd 955STATIC void
5fa550fb 956S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
957{
958 AV *av = CopFILEAVx(PL_curcop);
959 if (av) {
b9f83d2f 960 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
961 if (orig_sv)
962 sv_setsv(sv, orig_sv);
963 else
964 sv_setpvn(sv, buf, len);
80a702cd
RGS
965 (void)SvIOK_on(sv);
966 SvIV_set(sv, 0);
967 av_store(av, (I32)CopLINE(PL_curcop), sv);
968 }
969}
970
ffb4593c
NT
971/*
972 * S_skipspace
973 * Called to gobble the appropriate amount and type of whitespace.
974 * Skips comments as well.
975 */
976
76e3520e 977STATIC char *
cea2e8a9 978S_skipspace(pTHX_ register char *s)
a687059c 979{
97aff369 980 dVAR;
5db06880
NC
981#ifdef PERL_MAD
982 int curoff;
983 int startoff = s - SvPVX(PL_linestr);
984
cd81e915
NC
985 if (PL_skipwhite) {
986 sv_free(PL_skipwhite);
987 PL_skipwhite = 0;
5db06880
NC
988 }
989#endif
990
3280af22 991 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 992 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 993 s++;
5db06880
NC
994#ifdef PERL_MAD
995 goto done;
996#else
463ee0b2 997 return s;
5db06880 998#endif
463ee0b2
LW
999 }
1000 for (;;) {
fd049845 1001 STRLEN prevlen;
09bef843 1002 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1003 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1004 while (s < PL_bufend && isSPACE(*s)) {
1005 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1006 incline(s);
1007 }
ffb4593c
NT
1008
1009 /* comment */
3280af22
NIS
1010 if (s < PL_bufend && *s == '#') {
1011 while (s < PL_bufend && *s != '\n')
463ee0b2 1012 s++;
60e6418e 1013 if (s < PL_bufend) {
463ee0b2 1014 s++;
60e6418e
GS
1015 if (PL_in_eval && !PL_rsfp) {
1016 incline(s);
1017 continue;
1018 }
1019 }
463ee0b2 1020 }
ffb4593c
NT
1021
1022 /* only continue to recharge the buffer if we're at the end
1023 * of the buffer, we're not reading from a source filter, and
1024 * we're in normal lexing mode
1025 */
09bef843
SB
1026 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1027 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1028#ifdef PERL_MAD
1029 goto done;
1030#else
463ee0b2 1031 return s;
5db06880 1032#endif
ffb4593c
NT
1033
1034 /* try to recharge the buffer */
5db06880
NC
1035#ifdef PERL_MAD
1036 curoff = s - SvPVX(PL_linestr);
1037#endif
1038
9cbb5ea2 1039 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1040 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1041 {
5db06880
NC
1042#ifdef PERL_MAD
1043 if (PL_madskills && curoff != startoff) {
cd81e915 1044 if (!PL_skipwhite)
6b29d1f5 1045 PL_skipwhite = newSVpvs("");
cd81e915 1046 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1047 curoff - startoff);
1048 }
1049
1050 /* mustn't throw out old stuff yet if madpropping */
1051 SvCUR(PL_linestr) = curoff;
1052 s = SvPVX(PL_linestr) + curoff;
1053 *s = 0;
1054 if (curoff && s[-1] == '\n')
1055 s[-1] = ' ';
1056#endif
1057
9cbb5ea2 1058 /* end of file. Add on the -p or -n magic */
cd81e915 1059 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1060 if (PL_minus_p) {
5db06880 1061#ifdef PERL_MAD
6502358f 1062 sv_catpvs(PL_linestr,
5db06880
NC
1063 ";}continue{print or die qq(-p destination: $!\\n);}");
1064#else
6502358f 1065 sv_setpvs(PL_linestr,
01a19ab0 1066 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1067#endif
3280af22 1068 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1069 }
01a19ab0 1070 else if (PL_minus_n) {
5db06880
NC
1071#ifdef PERL_MAD
1072 sv_catpvn(PL_linestr, ";}", 2);
1073#else
01a19ab0 1074 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1075#endif
01a19ab0
NC
1076 PL_minus_n = 0;
1077 }
a0d0e21e 1078 else
5db06880
NC
1079#ifdef PERL_MAD
1080 sv_catpvn(PL_linestr,";", 1);
1081#else
4147a61b 1082 sv_setpvn(PL_linestr,";", 1);
5db06880 1083#endif
ffb4593c
NT
1084
1085 /* reset variables for next time we lex */
9cbb5ea2 1086 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1087 = SvPVX(PL_linestr)
1088#ifdef PERL_MAD
1089 + curoff
1090#endif
1091 ;
3280af22 1092 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1093 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1094
1095 /* Close the filehandle. Could be from -P preprocessor,
1096 * STDIN, or a regular file. If we were reading code from
1097 * STDIN (because the commandline held no -e or filename)
1098 * then we don't close it, we reset it so the code can
1099 * read from STDIN too.
1100 */
1101
3280af22
NIS
1102 if (PL_preprocess && !PL_in_eval)
1103 (void)PerlProc_pclose(PL_rsfp);
1104 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1105 PerlIO_clearerr(PL_rsfp);
8990e307 1106 else
3280af22 1107 (void)PerlIO_close(PL_rsfp);
4608196e 1108 PL_rsfp = NULL;
463ee0b2
LW
1109 return s;
1110 }
ffb4593c
NT
1111
1112 /* not at end of file, so we only read another line */
09bef843
SB
1113 /* make corresponding updates to old pointers, for yyerror() */
1114 oldprevlen = PL_oldbufptr - PL_bufend;
1115 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1116 if (PL_last_uni)
1117 oldunilen = PL_last_uni - PL_bufend;
1118 if (PL_last_lop)
1119 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1120 PL_linestart = PL_bufptr = s + prevlen;
1121 PL_bufend = s + SvCUR(PL_linestr);
1122 s = PL_bufptr;
09bef843
SB
1123 PL_oldbufptr = s + oldprevlen;
1124 PL_oldoldbufptr = s + oldoldprevlen;
1125 if (PL_last_uni)
1126 PL_last_uni = s + oldunilen;
1127 if (PL_last_lop)
1128 PL_last_lop = s + oldloplen;
a0d0e21e 1129 incline(s);
ffb4593c
NT
1130
1131 /* debugger active and we're not compiling the debugger code,
1132 * so store the line into the debugger's array of lines
1133 */
80a702cd 1134 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1135 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1136 }
5db06880
NC
1137
1138#ifdef PERL_MAD
1139 done:
1140 if (PL_madskills) {
cd81e915 1141 if (!PL_skipwhite)
6b29d1f5 1142 PL_skipwhite = newSVpvs("");
5db06880
NC
1143 curoff = s - SvPVX(PL_linestr);
1144 if (curoff - startoff)
cd81e915 1145 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1146 curoff - startoff);
1147 }
1148 return s;
1149#endif
a687059c 1150}
378cc40b 1151
ffb4593c
NT
1152/*
1153 * S_check_uni
1154 * Check the unary operators to ensure there's no ambiguity in how they're
1155 * used. An ambiguous piece of code would be:
1156 * rand + 5
1157 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1158 * the +5 is its argument.
1159 */
1160
76e3520e 1161STATIC void
cea2e8a9 1162S_check_uni(pTHX)
ba106d47 1163{
97aff369 1164 dVAR;
d4c19fe8
AL
1165 const char *s;
1166 const char *t;
2f3197b3 1167
3280af22 1168 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1169 return;
3280af22
NIS
1170 while (isSPACE(*PL_last_uni))
1171 PL_last_uni++;
c35e046a
AL
1172 s = PL_last_uni;
1173 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1174 s++;
3280af22 1175 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1176 return;
6136c704 1177
0453d815 1178 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1179 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1180 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1181 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1182 }
2f3197b3
LW
1183}
1184
ffb4593c
NT
1185/*
1186 * LOP : macro to build a list operator. Its behaviour has been replaced
1187 * with a subroutine, S_lop() for which LOP is just another name.
1188 */
1189
a0d0e21e
LW
1190#define LOP(f,x) return lop(f,x,s)
1191
ffb4593c
NT
1192/*
1193 * S_lop
1194 * Build a list operator (or something that might be one). The rules:
1195 * - if we have a next token, then it's a list operator [why?]
1196 * - if the next thing is an opening paren, then it's a function
1197 * - else it's a list operator
1198 */
1199
76e3520e 1200STATIC I32
a0be28da 1201S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1202{
97aff369 1203 dVAR;
79072805 1204 yylval.ival = f;
35c8bce7 1205 CLINE;
3280af22
NIS
1206 PL_expect = x;
1207 PL_bufptr = s;
1208 PL_last_lop = PL_oldbufptr;
eb160463 1209 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1210#ifdef PERL_MAD
1211 if (PL_lasttoke)
1212 return REPORT(LSTOP);
1213#else
3280af22 1214 if (PL_nexttoke)
bbf60fe6 1215 return REPORT(LSTOP);
5db06880 1216#endif
79072805 1217 if (*s == '(')
bbf60fe6 1218 return REPORT(FUNC);
29595ff2 1219 s = PEEKSPACE(s);
79072805 1220 if (*s == '(')
bbf60fe6 1221 return REPORT(FUNC);
79072805 1222 else
bbf60fe6 1223 return REPORT(LSTOP);
79072805
LW
1224}
1225
5db06880
NC
1226#ifdef PERL_MAD
1227 /*
1228 * S_start_force
1229 * Sets up for an eventual force_next(). start_force(0) basically does
1230 * an unshift, while start_force(-1) does a push. yylex removes items
1231 * on the "pop" end.
1232 */
1233
1234STATIC void
1235S_start_force(pTHX_ int where)
1236{
1237 int i;
1238
cd81e915 1239 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1240 where = PL_lasttoke;
cd81e915
NC
1241 assert(PL_curforce < 0 || PL_curforce == where);
1242 if (PL_curforce != where) {
5db06880
NC
1243 for (i = PL_lasttoke; i > where; --i) {
1244 PL_nexttoke[i] = PL_nexttoke[i-1];
1245 }
1246 PL_lasttoke++;
1247 }
cd81e915 1248 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1249 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1250 PL_curforce = where;
1251 if (PL_nextwhite) {
5db06880 1252 if (PL_madskills)
6b29d1f5 1253 curmad('^', newSVpvs(""));
cd81e915 1254 CURMAD('_', PL_nextwhite);
5db06880
NC
1255 }
1256}
1257
1258STATIC void
1259S_curmad(pTHX_ char slot, SV *sv)
1260{
1261 MADPROP **where;
1262
1263 if (!sv)
1264 return;
cd81e915
NC
1265 if (PL_curforce < 0)
1266 where = &PL_thismad;
5db06880 1267 else
cd81e915 1268 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1269
cd81e915 1270 if (PL_faketokens)
5db06880
NC
1271 sv_setpvn(sv, "", 0);
1272 else {
1273 if (!IN_BYTES) {
1274 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1275 SvUTF8_on(sv);
1276 else if (PL_encoding) {
1277 sv_recode_to_utf8(sv, PL_encoding);
1278 }
1279 }
1280 }
1281
1282 /* keep a slot open for the head of the list? */
1283 if (slot != '_' && *where && (*where)->mad_key == '^') {
1284 (*where)->mad_key = slot;
035e2bcc 1285 sv_free((SV*)((*where)->mad_val));
5db06880
NC
1286 (*where)->mad_val = (void*)sv;
1287 }
1288 else
1289 addmad(newMADsv(slot, sv), where, 0);
1290}
1291#else
b3f24c00
MHM
1292# define start_force(where) NOOP
1293# define curmad(slot, sv) NOOP
5db06880
NC
1294#endif
1295
ffb4593c
NT
1296/*
1297 * S_force_next
9cbb5ea2 1298 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1299 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1300 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1301 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1302 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1303 */
1304
4e553d73 1305STATIC void
cea2e8a9 1306S_force_next(pTHX_ I32 type)
79072805 1307{
97aff369 1308 dVAR;
5db06880 1309#ifdef PERL_MAD
cd81e915 1310 if (PL_curforce < 0)
5db06880 1311 start_force(PL_lasttoke);
cd81e915 1312 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1313 if (PL_lex_state != LEX_KNOWNEXT)
1314 PL_lex_defer = PL_lex_state;
1315 PL_lex_state = LEX_KNOWNEXT;
1316 PL_lex_expect = PL_expect;
cd81e915 1317 PL_curforce = -1;
5db06880 1318#else
3280af22
NIS
1319 PL_nexttype[PL_nexttoke] = type;
1320 PL_nexttoke++;
1321 if (PL_lex_state != LEX_KNOWNEXT) {
1322 PL_lex_defer = PL_lex_state;
1323 PL_lex_expect = PL_expect;
1324 PL_lex_state = LEX_KNOWNEXT;
79072805 1325 }
5db06880 1326#endif
79072805
LW
1327}
1328
d0a148a6
NC
1329STATIC SV *
1330S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1331{
97aff369 1332 dVAR;
9d4ba2ae 1333 SV * const sv = newSVpvn(start,len);
bfed75c6 1334 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1335 SvUTF8_on(sv);
1336 return sv;
1337}
1338
ffb4593c
NT
1339/*
1340 * S_force_word
1341 * When the lexer knows the next thing is a word (for instance, it has
1342 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1343 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1344 * lookahead.
ffb4593c
NT
1345 *
1346 * Arguments:
b1b65b59 1347 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1348 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1349 * int check_keyword : if true, Perl checks to make sure the word isn't
1350 * a keyword (do this if the word is a label, e.g. goto FOO)
1351 * int allow_pack : if true, : characters will also be allowed (require,
1352 * use, etc. do this)
9cbb5ea2 1353 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1354 */
1355
76e3520e 1356STATIC char *
cea2e8a9 1357S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1358{
97aff369 1359 dVAR;
463ee0b2
LW
1360 register char *s;
1361 STRLEN len;
4e553d73 1362
29595ff2 1363 start = SKIPSPACE1(start);
463ee0b2 1364 s = start;
7e2040f0 1365 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1366 (allow_pack && *s == ':') ||
15f0808c 1367 (allow_initial_tick && *s == '\'') )
a0d0e21e 1368 {
3280af22 1369 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1370 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1371 return start;
cd81e915 1372 start_force(PL_curforce);
5db06880
NC
1373 if (PL_madskills)
1374 curmad('X', newSVpvn(start,s-start));
463ee0b2 1375 if (token == METHOD) {
29595ff2 1376 s = SKIPSPACE1(s);
463ee0b2 1377 if (*s == '(')
3280af22 1378 PL_expect = XTERM;
463ee0b2 1379 else {
3280af22 1380 PL_expect = XOPERATOR;
463ee0b2 1381 }
79072805 1382 }
e74e6b3d 1383 if (PL_madskills)
63575281 1384 curmad('g', newSVpvs( "forced" ));
9ded7720 1385 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1386 = (OP*)newSVOP(OP_CONST,0,
1387 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1388 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1389 force_next(token);
1390 }
1391 return s;
1392}
1393
ffb4593c
NT
1394/*
1395 * S_force_ident
9cbb5ea2 1396 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1397 * text only contains the "foo" portion. The first argument is a pointer
1398 * to the "foo", and the second argument is the type symbol to prefix.
1399 * Forces the next token to be a "WORD".
9cbb5ea2 1400 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1401 */
1402
76e3520e 1403STATIC void
bfed75c6 1404S_force_ident(pTHX_ register const char *s, int kind)
79072805 1405{
97aff369 1406 dVAR;
c35e046a 1407 if (*s) {
90e5519e
NC
1408 const STRLEN len = strlen(s);
1409 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1410 start_force(PL_curforce);
9ded7720 1411 NEXTVAL_NEXTTOKE.opval = o;
79072805 1412 force_next(WORD);
748a9306 1413 if (kind) {
11343788 1414 o->op_private = OPpCONST_ENTERED;
55497cff 1415 /* XXX see note in pp_entereval() for why we forgo typo
1416 warnings if the symbol must be introduced in an eval.
1417 GSAR 96-10-12 */
90e5519e
NC
1418 gv_fetchpvn_flags(s, len,
1419 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1420 : GV_ADD,
1421 kind == '$' ? SVt_PV :
1422 kind == '@' ? SVt_PVAV :
1423 kind == '%' ? SVt_PVHV :
a0d0e21e 1424 SVt_PVGV
90e5519e 1425 );
748a9306 1426 }
79072805
LW
1427 }
1428}
1429
1571675a
GS
1430NV
1431Perl_str_to_version(pTHX_ SV *sv)
1432{
1433 NV retval = 0.0;
1434 NV nshift = 1.0;
1435 STRLEN len;
cfd0369c 1436 const char *start = SvPV_const(sv,len);
9d4ba2ae 1437 const char * const end = start + len;
504618e9 1438 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1439 while (start < end) {
ba210ebe 1440 STRLEN skip;
1571675a
GS
1441 UV n;
1442 if (utf)
9041c2e3 1443 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1444 else {
1445 n = *(U8*)start;
1446 skip = 1;
1447 }
1448 retval += ((NV)n)/nshift;
1449 start += skip;
1450 nshift *= 1000;
1451 }
1452 return retval;
1453}
1454
4e553d73 1455/*
ffb4593c
NT
1456 * S_force_version
1457 * Forces the next token to be a version number.
e759cc13
RGS
1458 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1459 * and if "guessing" is TRUE, then no new token is created (and the caller
1460 * must use an alternative parsing method).
ffb4593c
NT
1461 */
1462
76e3520e 1463STATIC char *
e759cc13 1464S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1465{
97aff369 1466 dVAR;
5f66b61c 1467 OP *version = NULL;
44dcb63b 1468 char *d;
5db06880
NC
1469#ifdef PERL_MAD
1470 I32 startoff = s - SvPVX(PL_linestr);
1471#endif
89bfa8cd 1472
29595ff2 1473 s = SKIPSPACE1(s);
89bfa8cd 1474
44dcb63b 1475 d = s;
dd629d5b 1476 if (*d == 'v')
44dcb63b 1477 d++;
44dcb63b 1478 if (isDIGIT(*d)) {
e759cc13
RGS
1479 while (isDIGIT(*d) || *d == '_' || *d == '.')
1480 d++;
5db06880
NC
1481#ifdef PERL_MAD
1482 if (PL_madskills) {
cd81e915 1483 start_force(PL_curforce);
5db06880
NC
1484 curmad('X', newSVpvn(s,d-s));
1485 }
1486#endif
9f3d182e 1487 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1488 SV *ver;
b73d6f50 1489 s = scan_num(s, &yylval);
89bfa8cd 1490 version = yylval.opval;
dd629d5b
GS
1491 ver = cSVOPx(version)->op_sv;
1492 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1493 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1494 SvNV_set(ver, str_to_version(ver));
1571675a 1495 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1496 }
89bfa8cd 1497 }
5db06880
NC
1498 else if (guessing) {
1499#ifdef PERL_MAD
1500 if (PL_madskills) {
cd81e915
NC
1501 sv_free(PL_nextwhite); /* let next token collect whitespace */
1502 PL_nextwhite = 0;
5db06880
NC
1503 s = SvPVX(PL_linestr) + startoff;
1504 }
1505#endif
e759cc13 1506 return s;
5db06880 1507 }
89bfa8cd 1508 }
1509
5db06880
NC
1510#ifdef PERL_MAD
1511 if (PL_madskills && !version) {
cd81e915
NC
1512 sv_free(PL_nextwhite); /* let next token collect whitespace */
1513 PL_nextwhite = 0;
5db06880
NC
1514 s = SvPVX(PL_linestr) + startoff;
1515 }
1516#endif
89bfa8cd 1517 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1518 start_force(PL_curforce);
9ded7720 1519 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1520 force_next(WORD);
89bfa8cd 1521
e759cc13 1522 return s;
89bfa8cd 1523}
1524
ffb4593c
NT
1525/*
1526 * S_tokeq
1527 * Tokenize a quoted string passed in as an SV. It finds the next
1528 * chunk, up to end of string or a backslash. It may make a new
1529 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1530 * turns \\ into \.
1531 */
1532
76e3520e 1533STATIC SV *
cea2e8a9 1534S_tokeq(pTHX_ SV *sv)
79072805 1535{
97aff369 1536 dVAR;
79072805
LW
1537 register char *s;
1538 register char *send;
1539 register char *d;
b3ac6de7
IZ
1540 STRLEN len = 0;
1541 SV *pv = sv;
79072805
LW
1542
1543 if (!SvLEN(sv))
b3ac6de7 1544 goto finish;
79072805 1545
a0d0e21e 1546 s = SvPV_force(sv, len);
21a311ee 1547 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1548 goto finish;
463ee0b2 1549 send = s + len;
79072805
LW
1550 while (s < send && *s != '\\')
1551 s++;
1552 if (s == send)
b3ac6de7 1553 goto finish;
79072805 1554 d = s;
be4731d2 1555 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1556 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1557 if (SvUTF8(sv))
1558 SvUTF8_on(pv);
1559 }
79072805
LW
1560 while (s < send) {
1561 if (*s == '\\') {
a0d0e21e 1562 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1563 s++; /* all that, just for this */
1564 }
1565 *d++ = *s++;
1566 }
1567 *d = '\0';
95a20fc0 1568 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1569 finish:
3280af22 1570 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1571 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1572 return sv;
1573}
1574
ffb4593c
NT
1575/*
1576 * Now come three functions related to double-quote context,
1577 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1578 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1579 * interact with PL_lex_state, and create fake ( ... ) argument lists
1580 * to handle functions and concatenation.
1581 * They assume that whoever calls them will be setting up a fake
1582 * join call, because each subthing puts a ',' after it. This lets
1583 * "lower \luPpEr"
1584 * become
1585 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1586 *
1587 * (I'm not sure whether the spurious commas at the end of lcfirst's
1588 * arguments and join's arguments are created or not).
1589 */
1590
1591/*
1592 * S_sublex_start
1593 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1594 *
1595 * Pattern matching will set PL_lex_op to the pattern-matching op to
1596 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1597 *
1598 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1599 *
1600 * Everything else becomes a FUNC.
1601 *
1602 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1603 * had an OP_CONST or OP_READLINE). This just sets us up for a
1604 * call to S_sublex_push().
1605 */
1606
76e3520e 1607STATIC I32
cea2e8a9 1608S_sublex_start(pTHX)
79072805 1609{
97aff369 1610 dVAR;
0d46e09a 1611 register const I32 op_type = yylval.ival;
79072805
LW
1612
1613 if (op_type == OP_NULL) {
3280af22 1614 yylval.opval = PL_lex_op;
5f66b61c 1615 PL_lex_op = NULL;
79072805
LW
1616 return THING;
1617 }
1618 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1619 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1620
1621 if (SvTYPE(sv) == SVt_PVIV) {
1622 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1623 STRLEN len;
96a5add6 1624 const char * const p = SvPV_const(sv, len);
f54cb97a 1625 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1626 if (SvUTF8(sv))
1627 SvUTF8_on(nsv);
b3ac6de7
IZ
1628 SvREFCNT_dec(sv);
1629 sv = nsv;
4e553d73 1630 }
b3ac6de7 1631 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1632 PL_lex_stuff = NULL;
6f33ba73
RGS
1633 /* Allow <FH> // "foo" */
1634 if (op_type == OP_READLINE)
1635 PL_expect = XTERMORDORDOR;
79072805
LW
1636 return THING;
1637 }
e3f73d4e
RGS
1638 else if (op_type == OP_BACKTICK && PL_lex_op) {
1639 /* readpipe() vas overriden */
1640 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1641 yylval.opval = PL_lex_op;
9b201d7d 1642 PL_lex_op = NULL;
e3f73d4e
RGS
1643 PL_lex_stuff = NULL;
1644 return THING;
1645 }
79072805 1646
3280af22 1647 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1648 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1649 PL_sublex_info.sub_op = PL_lex_op;
1650 PL_lex_state = LEX_INTERPPUSH;
55497cff 1651
3280af22
NIS
1652 PL_expect = XTERM;
1653 if (PL_lex_op) {
1654 yylval.opval = PL_lex_op;
5f66b61c 1655 PL_lex_op = NULL;
55497cff 1656 return PMFUNC;
1657 }
1658 else
1659 return FUNC;
1660}
1661
ffb4593c
NT
1662/*
1663 * S_sublex_push
1664 * Create a new scope to save the lexing state. The scope will be
1665 * ended in S_sublex_done. Returns a '(', starting the function arguments
1666 * to the uc, lc, etc. found before.
1667 * Sets PL_lex_state to LEX_INTERPCONCAT.
1668 */
1669
76e3520e 1670STATIC I32
cea2e8a9 1671S_sublex_push(pTHX)
55497cff 1672{
27da23d5 1673 dVAR;
f46d017c 1674 ENTER;
55497cff 1675
3280af22 1676 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1677 SAVEBOOL(PL_lex_dojoin);
3280af22 1678 SAVEI32(PL_lex_brackets);
3280af22
NIS
1679 SAVEI32(PL_lex_casemods);
1680 SAVEI32(PL_lex_starts);
651b5b28 1681 SAVEI8(PL_lex_state);
7766f137 1682 SAVEVPTR(PL_lex_inpat);
98246f1e 1683 SAVEI16(PL_lex_inwhat);
57843af0 1684 SAVECOPLINE(PL_curcop);
3280af22 1685 SAVEPPTR(PL_bufptr);
8452ff4b 1686 SAVEPPTR(PL_bufend);
3280af22
NIS
1687 SAVEPPTR(PL_oldbufptr);
1688 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1689 SAVEPPTR(PL_last_lop);
1690 SAVEPPTR(PL_last_uni);
3280af22
NIS
1691 SAVEPPTR(PL_linestart);
1692 SAVESPTR(PL_linestr);
8edd5f42
RGS
1693 SAVEGENERICPV(PL_lex_brackstack);
1694 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1695
1696 PL_linestr = PL_lex_stuff;
a0714e2c 1697 PL_lex_stuff = NULL;
3280af22 1698
9cbb5ea2
GS
1699 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1700 = SvPVX(PL_linestr);
3280af22 1701 PL_bufend += SvCUR(PL_linestr);
bd61b366 1702 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1703 SAVEFREESV(PL_linestr);
1704
1705 PL_lex_dojoin = FALSE;
1706 PL_lex_brackets = 0;
a02a5408
JC
1707 Newx(PL_lex_brackstack, 120, char);
1708 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1709 PL_lex_casemods = 0;
1710 *PL_lex_casestack = '\0';
1711 PL_lex_starts = 0;
1712 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1713 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1714
1715 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1716 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1717 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1718 else
5f66b61c 1719 PL_lex_inpat = NULL;
79072805 1720
55497cff 1721 return '(';
79072805
LW
1722}
1723
ffb4593c
NT
1724/*
1725 * S_sublex_done
1726 * Restores lexer state after a S_sublex_push.
1727 */
1728
76e3520e 1729STATIC I32
cea2e8a9 1730S_sublex_done(pTHX)
79072805 1731{
27da23d5 1732 dVAR;
3280af22 1733 if (!PL_lex_starts++) {
396482e1 1734 SV * const sv = newSVpvs("");
9aa983d2
JH
1735 if (SvUTF8(PL_linestr))
1736 SvUTF8_on(sv);
3280af22 1737 PL_expect = XOPERATOR;
9aa983d2 1738 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1739 return THING;
1740 }
1741
3280af22
NIS
1742 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1743 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1744 return yylex();
79072805
LW
1745 }
1746
ffb4593c 1747 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1748 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1749 PL_linestr = PL_lex_repl;
1750 PL_lex_inpat = 0;
1751 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1752 PL_bufend += SvCUR(PL_linestr);
bd61b366 1753 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1754 SAVEFREESV(PL_linestr);
1755 PL_lex_dojoin = FALSE;
1756 PL_lex_brackets = 0;
3280af22
NIS
1757 PL_lex_casemods = 0;
1758 *PL_lex_casestack = '\0';
1759 PL_lex_starts = 0;
25da4f38 1760 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1761 PL_lex_state = LEX_INTERPNORMAL;
1762 PL_lex_starts++;
e9fa98b2
HS
1763 /* we don't clear PL_lex_repl here, so that we can check later
1764 whether this is an evalled subst; that means we rely on the
1765 logic to ensure sublex_done() is called again only via the
1766 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1767 }
e9fa98b2 1768 else {
3280af22 1769 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1770 PL_lex_repl = NULL;
e9fa98b2 1771 }
79072805 1772 return ',';
ffed7fef
LW
1773 }
1774 else {
5db06880
NC
1775#ifdef PERL_MAD
1776 if (PL_madskills) {
cd81e915
NC
1777 if (PL_thiswhite) {
1778 if (!PL_endwhite)
6b29d1f5 1779 PL_endwhite = newSVpvs("");
cd81e915
NC
1780 sv_catsv(PL_endwhite, PL_thiswhite);
1781 PL_thiswhite = 0;
1782 }
1783 if (PL_thistoken)
1784 sv_setpvn(PL_thistoken,"",0);
5db06880 1785 else
cd81e915 1786 PL_realtokenstart = -1;
5db06880
NC
1787 }
1788#endif
f46d017c 1789 LEAVE;
3280af22
NIS
1790 PL_bufend = SvPVX(PL_linestr);
1791 PL_bufend += SvCUR(PL_linestr);
1792 PL_expect = XOPERATOR;
09bef843 1793 PL_sublex_info.sub_inwhat = 0;
79072805 1794 return ')';
ffed7fef
LW
1795 }
1796}
1797
02aa26ce
NT
1798/*
1799 scan_const
1800
1801 Extracts a pattern, double-quoted string, or transliteration. This
1802 is terrifying code.
1803
94def140 1804 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1805 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1806 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1807
94def140
TS
1808 Returns a pointer to the character scanned up to. If this is
1809 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1810 successfully parsed), will leave an OP for the substring scanned
1811 in yylval. Caller must intuit reason for not parsing further
1812 by looking at the next characters herself.
1813
02aa26ce
NT
1814 In patterns:
1815 backslashes:
1816 double-quoted style: \r and \n
1817 regexp special ones: \D \s
94def140
TS
1818 constants: \x31
1819 backrefs: \1
02aa26ce
NT
1820 case and quoting: \U \Q \E
1821 stops on @ and $, but not for $ as tail anchor
1822
1823 In transliterations:
1824 characters are VERY literal, except for - not at the start or end
94def140
TS
1825 of the string, which indicates a range. If the range is in bytes,
1826 scan_const expands the range to the full set of intermediate
1827 characters. If the range is in utf8, the hyphen is replaced with
1828 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1829
1830 In double-quoted strings:
1831 backslashes:
1832 double-quoted style: \r and \n
94def140
TS
1833 constants: \x31
1834 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1835 case and quoting: \U \Q \E
1836 stops on @ and $
1837
1838 scan_const does *not* construct ops to handle interpolated strings.
1839 It stops processing as soon as it finds an embedded $ or @ variable
1840 and leaves it to the caller to work out what's going on.
1841
94def140
TS
1842 embedded arrays (whether in pattern or not) could be:
1843 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1844
1845 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1846
1847 $ in pattern could be $foo or could be tail anchor. Assumption:
1848 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1849 followed by one of "()| \r\n\t"
02aa26ce
NT
1850
1851 \1 (backreferences) are turned into $1
1852
1853 The structure of the code is
1854 while (there's a character to process) {
94def140
TS
1855 handle transliteration ranges
1856 skip regexp comments /(?#comment)/ and codes /(?{code})/
1857 skip #-initiated comments in //x patterns
1858 check for embedded arrays
02aa26ce
NT
1859 check for embedded scalars
1860 if (backslash) {
94def140
TS
1861 leave intact backslashes from leaveit (below)
1862 deprecate \1 in substitution replacements
02aa26ce
NT
1863 handle string-changing backslashes \l \U \Q \E, etc.
1864 switch (what was escaped) {
94def140
TS
1865 handle \- in a transliteration (becomes a literal -)
1866 handle \132 (octal characters)
1867 handle \x15 and \x{1234} (hex characters)
1868 handle \N{name} (named characters)
1869 handle \cV (control characters)
1870 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1871 } (end switch)
1872 } (end if backslash)
1873 } (end while character to read)
4e553d73 1874
02aa26ce
NT
1875*/
1876
76e3520e 1877STATIC char *
cea2e8a9 1878S_scan_const(pTHX_ char *start)
79072805 1879{
97aff369 1880 dVAR;
3280af22 1881 register char *send = PL_bufend; /* end of the constant */
561b68a9 1882 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1883 register char *s = start; /* start of the constant */
1884 register char *d = SvPVX(sv); /* destination for copies */
1885 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1886 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1887 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1888 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1889 UV uv;
4c3a8340
TS
1890#ifdef EBCDIC
1891 UV literal_endpoint = 0;
e294cc5d 1892 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1893#endif
012bcf8d 1894
2b9d42f0
NIS
1895 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1896 /* If we are doing a trans and we know we want UTF8 set expectation */
1897 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1898 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1899 }
1900
1901
79072805 1902 while (s < send || dorange) {
02aa26ce 1903 /* get transliterations out of the way (they're most literal) */
3280af22 1904 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1905 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1906 if (dorange) {
1ba5c669
JH
1907 I32 i; /* current expanded character */
1908 I32 min; /* first character in range */
1909 I32 max; /* last character in range */
02aa26ce 1910
e294cc5d
JH
1911#ifdef EBCDIC
1912 UV uvmax = 0;
1913#endif
1914
1915 if (has_utf8
1916#ifdef EBCDIC
1917 && !native_range
1918#endif
1919 ) {
9d4ba2ae 1920 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1921 char *e = d++;
1922 while (e-- > c)
1923 *(e + 1) = *e;
25716404 1924 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1925 /* mark the range as done, and continue */
1926 dorange = FALSE;
1927 didrange = TRUE;
1928 continue;
1929 }
2b9d42f0 1930
95a20fc0 1931 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1932#ifdef EBCDIC
1933 SvGROW(sv,
1934 SvLEN(sv) + (has_utf8 ?
1935 (512 - UTF_CONTINUATION_MARK +
1936 UNISKIP(0x100))
1937 : 256));
1938 /* How many two-byte within 0..255: 128 in UTF-8,
1939 * 96 in UTF-8-mod. */
1940#else
9cbb5ea2 1941 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1942#endif
9cbb5ea2 1943 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1944#ifdef EBCDIC
1945 if (has_utf8) {
1946 int j;
1947 for (j = 0; j <= 1; j++) {
1948 char * const c = (char*)utf8_hop((U8*)d, -1);
1949 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1950 if (j)
1951 min = (U8)uv;
1952 else if (uv < 256)
1953 max = (U8)uv;
1954 else {
1955 max = (U8)0xff; /* only to \xff */
1956 uvmax = uv; /* \x{100} to uvmax */
1957 }
1958 d = c; /* eat endpoint chars */
1959 }
1960 }
1961 else {
1962#endif
1963 d -= 2; /* eat the first char and the - */
1964 min = (U8)*d; /* first char in range */
1965 max = (U8)d[1]; /* last char in range */
1966#ifdef EBCDIC
1967 }
1968#endif
8ada0baa 1969
c2e66d9e 1970 if (min > max) {
01ec43d0 1971 Perl_croak(aTHX_
d1573ac7 1972 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1973 (char)min, (char)max);
c2e66d9e
GS
1974 }
1975
c7f1f016 1976#ifdef EBCDIC
4c3a8340
TS
1977 if (literal_endpoint == 2 &&
1978 ((isLOWER(min) && isLOWER(max)) ||
1979 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1980 if (isLOWER(min)) {
1981 for (i = min; i <= max; i++)
1982 if (isLOWER(i))
db42d148 1983 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1984 } else {
1985 for (i = min; i <= max; i++)
1986 if (isUPPER(i))
db42d148 1987 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1988 }
1989 }
1990 else
1991#endif
1992 for (i = min; i <= max; i++)
e294cc5d
JH
1993#ifdef EBCDIC
1994 if (has_utf8) {
1995 const U8 ch = (U8)NATIVE_TO_UTF(i);
1996 if (UNI_IS_INVARIANT(ch))
1997 *d++ = (U8)i;
1998 else {
1999 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2000 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2001 }
2002 }
2003 else
2004#endif
2005 *d++ = (char)i;
2006
2007#ifdef EBCDIC
2008 if (uvmax) {
2009 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2010 if (uvmax > 0x101)
2011 *d++ = (char)UTF_TO_NATIVE(0xff);
2012 if (uvmax > 0x100)
2013 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2014 }
2015#endif
02aa26ce
NT
2016
2017 /* mark the range as done, and continue */
79072805 2018 dorange = FALSE;
01ec43d0 2019 didrange = TRUE;
4c3a8340
TS
2020#ifdef EBCDIC
2021 literal_endpoint = 0;
2022#endif
79072805 2023 continue;
4e553d73 2024 }
02aa26ce
NT
2025
2026 /* range begins (ignore - as first or last char) */
79072805 2027 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2028 if (didrange) {
1fafa243 2029 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2030 }
e294cc5d
JH
2031 if (has_utf8
2032#ifdef EBCDIC
2033 && !native_range
2034#endif
2035 ) {
25716404 2036 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2037 s++;
2038 continue;
2039 }
79072805
LW
2040 dorange = TRUE;
2041 s++;
01ec43d0
GS
2042 }
2043 else {
2044 didrange = FALSE;
4c3a8340
TS
2045#ifdef EBCDIC
2046 literal_endpoint = 0;
e294cc5d 2047 native_range = TRUE;
4c3a8340 2048#endif
01ec43d0 2049 }
79072805 2050 }
02aa26ce
NT
2051
2052 /* if we get here, we're not doing a transliteration */
2053
0f5d15d6
IZ
2054 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2055 except for the last char, which will be done separately. */
3280af22 2056 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2057 if (s[2] == '#') {
e994fd66 2058 while (s+1 < send && *s != ')')
db42d148 2059 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2060 }
2061 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2062 || (s[2] == '?' && s[3] == '{'))
155aba94 2063 {
cc6b7395 2064 I32 count = 1;
0f5d15d6 2065 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2066 char c;
2067
d9f97599
GS
2068 while (count && (c = *regparse)) {
2069 if (c == '\\' && regparse[1])
2070 regparse++;
4e553d73 2071 else if (c == '{')
cc6b7395 2072 count++;
4e553d73 2073 else if (c == '}')
cc6b7395 2074 count--;
d9f97599 2075 regparse++;
cc6b7395 2076 }
e994fd66 2077 if (*regparse != ')')
5bdf89e7 2078 regparse--; /* Leave one char for continuation. */
0f5d15d6 2079 while (s < regparse)
db42d148 2080 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2081 }
748a9306 2082 }
02aa26ce
NT
2083
2084 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2085 else if (*s == '#' && PL_lex_inpat &&
2086 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2087 while (s+1 < send && *s != '\n')
db42d148 2088 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2089 }
02aa26ce 2090
5d1d4326 2091 /* check for embedded arrays
da6eedaa 2092 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2093 */
1749ea0d
TS
2094 else if (*s == '@' && s[1]) {
2095 if (isALNUM_lazy_if(s+1,UTF))
2096 break;
2097 if (strchr(":'{$", s[1]))
2098 break;
2099 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2100 break; /* in regexp, neither @+ nor @- are interpolated */
2101 }
02aa26ce
NT
2102
2103 /* check for embedded scalars. only stop if we're sure it's a
2104 variable.
2105 */
79072805 2106 else if (*s == '$') {
3280af22 2107 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2108 break;
6002328a 2109 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2110 break; /* in regexp, $ might be tail anchor */
2111 }
02aa26ce 2112
2b9d42f0
NIS
2113 /* End of else if chain - OP_TRANS rejoin rest */
2114
02aa26ce 2115 /* backslashes */
79072805
LW
2116 if (*s == '\\' && s+1 < send) {
2117 s++;
02aa26ce 2118
02aa26ce 2119 /* deprecate \1 in strings and substitution replacements */
3280af22 2120 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2121 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2122 {
599cee73 2123 if (ckWARN(WARN_SYNTAX))
9014280d 2124 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2125 *--s = '$';
2126 break;
2127 }
02aa26ce
NT
2128
2129 /* string-change backslash escapes */
3280af22 2130 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2131 --s;
2132 break;
2133 }
cc74c5bd
TS
2134 /* skip any other backslash escapes in a pattern */
2135 else if (PL_lex_inpat) {
2136 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2137 goto default_action;
2138 }
02aa26ce
NT
2139
2140 /* if we get here, it's either a quoted -, or a digit */
79072805 2141 switch (*s) {
02aa26ce
NT
2142
2143 /* quoted - in transliterations */
79072805 2144 case '-':
3280af22 2145 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2146 *d++ = *s++;
2147 continue;
2148 }
2149 /* FALL THROUGH */
2150 default:
11b8faa4 2151 {
86f97054 2152 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2153 ckWARN(WARN_MISC))
9014280d 2154 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2155 "Unrecognized escape \\%c passed through",
2156 *s);
11b8faa4 2157 /* default action is to copy the quoted character */
f9a63242 2158 goto default_action;
11b8faa4 2159 }
02aa26ce
NT
2160
2161 /* \132 indicates an octal constant */
79072805
LW
2162 case '0': case '1': case '2': case '3':
2163 case '4': case '5': case '6': case '7':
ba210ebe 2164 {
53305cf1
NC
2165 I32 flags = 0;
2166 STRLEN len = 3;
2167 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2168 s += len;
2169 }
012bcf8d 2170 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2171
2172 /* \x24 indicates a hex constant */
79072805 2173 case 'x':
a0ed51b3
LW
2174 ++s;
2175 if (*s == '{') {
9d4ba2ae 2176 char* const e = strchr(s, '}');
a4c04bdc
NC
2177 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2178 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2179 STRLEN len;
355860ce 2180
53305cf1 2181 ++s;
adaeee49 2182 if (!e) {
a0ed51b3 2183 yyerror("Missing right brace on \\x{}");
355860ce 2184 continue;
ba210ebe 2185 }
53305cf1
NC
2186 len = e - s;
2187 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2188 s = e + 1;
a0ed51b3
LW
2189 }
2190 else {
ba210ebe 2191 {
53305cf1 2192 STRLEN len = 2;
a4c04bdc 2193 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2194 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2195 s += len;
2196 }
012bcf8d
GS
2197 }
2198
2199 NUM_ESCAPE_INSERT:
2200 /* Insert oct or hex escaped character.
301d3d20 2201 * There will always enough room in sv since such
db42d148 2202 * escapes will be longer than any UTF-8 sequence
301d3d20 2203 * they can end up as. */
ba7cea30 2204
c7f1f016
NIS
2205 /* We need to map to chars to ASCII before doing the tests
2206 to cover EBCDIC
2207 */
c4d5f83a 2208 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2209 if (!has_utf8 && uv > 255) {
301d3d20
JH
2210 /* Might need to recode whatever we have
2211 * accumulated so far if it contains any
2212 * hibit chars.
2213 *
2214 * (Can't we keep track of that and avoid
2215 * this rescan? --jhi)
012bcf8d 2216 */
c7f1f016 2217 int hicount = 0;
63cd0674
NIS
2218 U8 *c;
2219 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2220 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2221 hicount++;
db42d148 2222 }
012bcf8d 2223 }
63cd0674 2224 if (hicount) {
9d4ba2ae 2225 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2226 U8 *src, *dst;
2227 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2228 src = (U8 *)d - 1;
2229 dst = src+hicount;
2230 d += hicount;
cfd0369c 2231 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2232 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2233 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2234 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2235 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2236 }
2237 else {
63cd0674 2238 *dst-- = *src;
012bcf8d 2239 }
c7f1f016 2240 src--;
012bcf8d
GS
2241 }
2242 }
2243 }
2244
9aa983d2 2245 if (has_utf8 || uv > 255) {
9041c2e3 2246 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2247 has_utf8 = TRUE;
f9a63242
JH
2248 if (PL_lex_inwhat == OP_TRANS &&
2249 PL_sublex_info.sub_op) {
2250 PL_sublex_info.sub_op->op_private |=
2251 (PL_lex_repl ? OPpTRANS_FROM_UTF
2252 : OPpTRANS_TO_UTF);
f9a63242 2253 }
e294cc5d
JH
2254#ifdef EBCDIC
2255 if (uv > 255 && !dorange)
2256 native_range = FALSE;
2257#endif
012bcf8d 2258 }
a0ed51b3 2259 else {
012bcf8d 2260 *d++ = (char)uv;
a0ed51b3 2261 }
012bcf8d
GS
2262 }
2263 else {
c4d5f83a 2264 *d++ = (char) uv;
a0ed51b3 2265 }
79072805 2266 continue;
02aa26ce 2267
b239daa5 2268 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2269 case 'N':
55eda711 2270 ++s;
423cee85
JH
2271 if (*s == '{') {
2272 char* e = strchr(s, '}');
155aba94 2273 SV *res;
423cee85 2274 STRLEN len;
cfd0369c 2275 const char *str;
fc8cd66c 2276 SV *type;
4e553d73 2277
423cee85 2278 if (!e) {
5777a3f7 2279 yyerror("Missing right brace on \\N{}");
423cee85
JH
2280 e = s - 1;
2281 goto cont_scan;
2282 }
dbc0d4f2
JH
2283 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2284 /* \N{U+...} */
2285 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2286 PERL_SCAN_DISALLOW_PREFIX;
2287 s += 3;
2288 len = e - s;
2289 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2290 if ( e > s && len != (STRLEN)(e - s) ) {
2291 uv = 0xFFFD;
fc8cd66c 2292 }
dbc0d4f2
JH
2293 s = e + 1;
2294 goto NUM_ESCAPE_INSERT;
2295 }
55eda711 2296 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2297 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2298 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2299 res, NULL, SvPVX(type) );
2300 SvREFCNT_dec(type);
f9a63242
JH
2301 if (has_utf8)
2302 sv_utf8_upgrade(res);
cfd0369c 2303 str = SvPV_const(res,len);
1c47067b
JH
2304#ifdef EBCDIC_NEVER_MIND
2305 /* charnames uses pack U and that has been
2306 * recently changed to do the below uni->native
2307 * mapping, so this would be redundant (and wrong,
2308 * the code point would be doubly converted).
2309 * But leave this in just in case the pack U change
2310 * gets revoked, but the semantics is still
2311 * desireable for charnames. --jhi */
cddc7ef4 2312 {
cfd0369c 2313 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2314
2315 if (uv < 0x100) {
89ebb4a3 2316 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2317
2318 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2319 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2320 str = SvPV_const(res, len);
cddc7ef4
JH
2321 }
2322 }
2323#endif
89491803 2324 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2325 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2326 SvCUR_set(sv, d - ostart);
2327 SvPOK_on(sv);
e4f3eed8 2328 *d = '\0';
f08d6ad9 2329 sv_utf8_upgrade(sv);
d2f449dd 2330 /* this just broke our allocation above... */
eb160463 2331 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2332 d = SvPVX(sv) + SvCUR(sv);
89491803 2333 has_utf8 = TRUE;
f08d6ad9 2334 }
eb160463 2335 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2336 const char * const odest = SvPVX_const(sv);
423cee85 2337
8973db79 2338 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2339 d = SvPVX(sv) + (d - odest);
2340 }
e294cc5d
JH
2341#ifdef EBCDIC
2342 if (!dorange)
2343 native_range = FALSE; /* \N{} is guessed to be Unicode */
2344#endif
423cee85
JH
2345 Copy(str, d, len, char);
2346 d += len;
2347 SvREFCNT_dec(res);
2348 cont_scan:
2349 s = e + 1;
2350 }
2351 else
5777a3f7 2352 yyerror("Missing braces on \\N{}");
423cee85
JH
2353 continue;
2354
02aa26ce 2355 /* \c is a control character */
79072805
LW
2356 case 'c':
2357 s++;
961ce445 2358 if (s < send) {
ba210ebe 2359 U8 c = *s++;
c7f1f016
NIS
2360#ifdef EBCDIC
2361 if (isLOWER(c))
2362 c = toUPPER(c);
2363#endif
db42d148 2364 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2365 }
961ce445
RGS
2366 else {
2367 yyerror("Missing control char name in \\c");
2368 }
79072805 2369 continue;
02aa26ce
NT
2370
2371 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2372 case 'b':
db42d148 2373 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2374 break;
2375 case 'n':
db42d148 2376 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2377 break;
2378 case 'r':
db42d148 2379 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2380 break;
2381 case 'f':
db42d148 2382 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2383 break;
2384 case 't':
db42d148 2385 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2386 break;
34a3fe2a 2387 case 'e':
db42d148 2388 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2389 break;
2390 case 'a':
db42d148 2391 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2392 break;
02aa26ce
NT
2393 } /* end switch */
2394
79072805
LW
2395 s++;
2396 continue;
02aa26ce 2397 } /* end if (backslash) */
4c3a8340
TS
2398#ifdef EBCDIC
2399 else
2400 literal_endpoint++;
2401#endif
02aa26ce 2402
f9a63242 2403 default_action:
2b9d42f0
NIS
2404 /* If we started with encoded form, or already know we want it
2405 and then encode the next character */
2406 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2407 STRLEN len = 1;
5f66b61c
AL
2408 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2409 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2410 s += len;
2411 if (need > len) {
2412 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2413 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2414 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2415 }
5f66b61c 2416 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2417 has_utf8 = TRUE;
e294cc5d
JH
2418#ifdef EBCDIC
2419 if (uv > 255 && !dorange)
2420 native_range = FALSE;
2421#endif
2b9d42f0
NIS
2422 }
2423 else {
2424 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2425 }
02aa26ce
NT
2426 } /* while loop to process each character */
2427
2428 /* terminate the string and set up the sv */
79072805 2429 *d = '\0';
95a20fc0 2430 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2431 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2432 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2433
79072805 2434 SvPOK_on(sv);
9f4817db 2435 if (PL_encoding && !has_utf8) {
d0063567
DK
2436 sv_recode_to_utf8(sv, PL_encoding);
2437 if (SvUTF8(sv))
2438 has_utf8 = TRUE;
9f4817db 2439 }
2b9d42f0 2440 if (has_utf8) {
7e2040f0 2441 SvUTF8_on(sv);
2b9d42f0 2442 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2443 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2444 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2445 }
2446 }
79072805 2447
02aa26ce 2448 /* shrink the sv if we allocated more than we used */
79072805 2449 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2450 SvPV_shrink_to_cur(sv);
79072805 2451 }
02aa26ce 2452
9b599b2a 2453 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2454 if (s > PL_bufptr) {
2455 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2456 sv = new_constant(start, s - start,
2457 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2458 sv, NULL,
10edeb5d
JH
2459 (const char *)
2460 (( PL_lex_inwhat == OP_TRANS
2461 ? "tr"
2462 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2463 ? "s"
2464 : "qq"))));
79072805 2465 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2466 } else
8990e307 2467 SvREFCNT_dec(sv);
79072805
LW
2468 return s;
2469}
2470
ffb4593c
NT
2471/* S_intuit_more
2472 * Returns TRUE if there's more to the expression (e.g., a subscript),
2473 * FALSE otherwise.
ffb4593c
NT
2474 *
2475 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2476 *
2477 * ->[ and ->{ return TRUE
2478 * { and [ outside a pattern are always subscripts, so return TRUE
2479 * if we're outside a pattern and it's not { or [, then return FALSE
2480 * if we're in a pattern and the first char is a {
2481 * {4,5} (any digits around the comma) returns FALSE
2482 * if we're in a pattern and the first char is a [
2483 * [] returns FALSE
2484 * [SOMETHING] has a funky algorithm to decide whether it's a
2485 * character class or not. It has to deal with things like
2486 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2487 * anything else returns TRUE
2488 */
2489
9cbb5ea2
GS
2490/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2491
76e3520e 2492STATIC int
cea2e8a9 2493S_intuit_more(pTHX_ register char *s)
79072805 2494{
97aff369 2495 dVAR;
3280af22 2496 if (PL_lex_brackets)
79072805
LW
2497 return TRUE;
2498 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2499 return TRUE;
2500 if (*s != '{' && *s != '[')
2501 return FALSE;
3280af22 2502 if (!PL_lex_inpat)
79072805
LW
2503 return TRUE;
2504
2505 /* In a pattern, so maybe we have {n,m}. */
2506 if (*s == '{') {
2507 s++;
2508 if (!isDIGIT(*s))
2509 return TRUE;
2510 while (isDIGIT(*s))
2511 s++;
2512 if (*s == ',')
2513 s++;
2514 while (isDIGIT(*s))
2515 s++;
2516 if (*s == '}')
2517 return FALSE;
2518 return TRUE;
2519
2520 }
2521
2522 /* On the other hand, maybe we have a character class */
2523
2524 s++;
2525 if (*s == ']' || *s == '^')
2526 return FALSE;
2527 else {
ffb4593c 2528 /* this is terrifying, and it works */
79072805
LW
2529 int weight = 2; /* let's weigh the evidence */
2530 char seen[256];
f27ffc4a 2531 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2532 const char * const send = strchr(s,']');
3280af22 2533 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2534
2535 if (!send) /* has to be an expression */
2536 return TRUE;
2537
2538 Zero(seen,256,char);
2539 if (*s == '$')
2540 weight -= 3;
2541 else if (isDIGIT(*s)) {
2542 if (s[1] != ']') {
2543 if (isDIGIT(s[1]) && s[2] == ']')
2544 weight -= 10;
2545 }
2546 else
2547 weight -= 100;
2548 }
2549 for (; s < send; s++) {
2550 last_un_char = un_char;
2551 un_char = (unsigned char)*s;
2552 switch (*s) {
2553 case '@':
2554 case '&':
2555 case '$':
2556 weight -= seen[un_char] * 10;
7e2040f0 2557 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2558 int len;
8903cb82 2559 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2560 len = (int)strlen(tmpbuf);
2561 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2562 weight -= 100;
2563 else
2564 weight -= 10;
2565 }
2566 else if (*s == '$' && s[1] &&
93a17b20
LW
2567 strchr("[#!%*<>()-=",s[1])) {
2568 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2569 weight -= 10;
2570 else
2571 weight -= 1;
2572 }
2573 break;
2574 case '\\':
2575 un_char = 254;
2576 if (s[1]) {
93a17b20 2577 if (strchr("wds]",s[1]))
79072805 2578 weight += 100;
10edeb5d 2579 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2580 weight += 1;
93a17b20 2581 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2582 weight += 40;
2583 else if (isDIGIT(s[1])) {
2584 weight += 40;
2585 while (s[1] && isDIGIT(s[1]))
2586 s++;
2587 }
2588 }
2589 else
2590 weight += 100;
2591 break;
2592 case '-':
2593 if (s[1] == '\\')
2594 weight += 50;
93a17b20 2595 if (strchr("aA01! ",last_un_char))
79072805 2596 weight += 30;
93a17b20 2597 if (strchr("zZ79~",s[1]))
79072805 2598 weight += 30;
f27ffc4a
GS
2599 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2600 weight -= 5; /* cope with negative subscript */
79072805
LW
2601 break;
2602 default:
3792a11b
NC
2603 if (!isALNUM(last_un_char)
2604 && !(last_un_char == '$' || last_un_char == '@'
2605 || last_un_char == '&')
2606 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2607 char *d = tmpbuf;
2608 while (isALPHA(*s))
2609 *d++ = *s++;
2610 *d = '\0';
5458a98a 2611 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2612 weight -= 150;
2613 }
2614 if (un_char == last_un_char + 1)
2615 weight += 5;
2616 weight -= seen[un_char];
2617 break;
2618 }
2619 seen[un_char]++;
2620 }
2621 if (weight >= 0) /* probably a character class */
2622 return FALSE;
2623 }
2624
2625 return TRUE;
2626}
ffed7fef 2627
ffb4593c
NT
2628/*
2629 * S_intuit_method
2630 *
2631 * Does all the checking to disambiguate
2632 * foo bar
2633 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2634 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2635 *
2636 * First argument is the stuff after the first token, e.g. "bar".
2637 *
2638 * Not a method if bar is a filehandle.
2639 * Not a method if foo is a subroutine prototyped to take a filehandle.
2640 * Not a method if it's really "Foo $bar"
2641 * Method if it's "foo $bar"
2642 * Not a method if it's really "print foo $bar"
2643 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2644 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2645 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2646 * =>
2647 */
2648
76e3520e 2649STATIC int
62d55b22 2650S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2651{
97aff369 2652 dVAR;
a0d0e21e 2653 char *s = start + (*start == '$');
3280af22 2654 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2655 STRLEN len;
2656 GV* indirgv;
5db06880
NC
2657#ifdef PERL_MAD
2658 int soff;
2659#endif
a0d0e21e
LW
2660
2661 if (gv) {
62d55b22 2662 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2663 return 0;
62d55b22
NC
2664 if (cv) {
2665 if (SvPOK(cv)) {
2666 const char *proto = SvPVX_const(cv);
2667 if (proto) {
2668 if (*proto == ';')
2669 proto++;
2670 if (*proto == '*')
2671 return 0;
2672 }
b6c543e3
IZ
2673 }
2674 } else
c35e046a 2675 gv = NULL;
a0d0e21e 2676 }
8903cb82 2677 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2678 /* start is the beginning of the possible filehandle/object,
2679 * and s is the end of it
2680 * tmpbuf is a copy of it
2681 */
2682
a0d0e21e 2683 if (*start == '$') {
3ef1310e
RGS
2684 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2685 isUPPER(*PL_tokenbuf))
a0d0e21e 2686 return 0;
5db06880
NC
2687#ifdef PERL_MAD
2688 len = start - SvPVX(PL_linestr);
2689#endif
29595ff2 2690 s = PEEKSPACE(s);
f0092767 2691#ifdef PERL_MAD
5db06880
NC
2692 start = SvPVX(PL_linestr) + len;
2693#endif
3280af22
NIS
2694 PL_bufptr = start;
2695 PL_expect = XREF;
a0d0e21e
LW
2696 return *s == '(' ? FUNCMETH : METHOD;
2697 }
5458a98a 2698 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2699 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2700 len -= 2;
2701 tmpbuf[len] = '\0';
5db06880
NC
2702#ifdef PERL_MAD
2703 soff = s - SvPVX(PL_linestr);
2704#endif
c3e0f903
GS
2705 goto bare_package;
2706 }
90e5519e 2707 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2708 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2709 return 0;
2710 /* filehandle or package name makes it a method */
da51bb9b 2711 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2712#ifdef PERL_MAD
2713 soff = s - SvPVX(PL_linestr);
2714#endif
29595ff2 2715 s = PEEKSPACE(s);
3280af22 2716 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2717 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2718 bare_package:
cd81e915 2719 start_force(PL_curforce);
9ded7720 2720 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2721 newSVpvn(tmpbuf,len));
9ded7720 2722 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2723 if (PL_madskills)
2724 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2725 PL_expect = XTERM;
a0d0e21e 2726 force_next(WORD);
3280af22 2727 PL_bufptr = s;
5db06880
NC
2728#ifdef PERL_MAD
2729 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2730#endif
a0d0e21e
LW
2731 return *s == '(' ? FUNCMETH : METHOD;
2732 }
2733 }
2734 return 0;
2735}
2736
ffb4593c
NT
2737/*
2738 * S_incl_perldb
2739 * Return a string of Perl code to load the debugger. If PERL5DB
2740 * is set, it will return the contents of that, otherwise a
2741 * compile-time require of perl5db.pl.
2742 */
2743
bfed75c6 2744STATIC const char*
cea2e8a9 2745S_incl_perldb(pTHX)
a0d0e21e 2746{
97aff369 2747 dVAR;
3280af22 2748 if (PL_perldb) {
9d4ba2ae 2749 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2750
2751 if (pdb)
2752 return pdb;
93189314 2753 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2754 return "BEGIN { require 'perl5db.pl' }";
2755 }
2756 return "";
2757}
2758
2759
16d20bd9 2760/* Encoded script support. filter_add() effectively inserts a
4e553d73 2761 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2762 * Note that the filter function only applies to the current source file
2763 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2764 *
2765 * The datasv parameter (which may be NULL) can be used to pass
2766 * private data to this instance of the filter. The filter function
2767 * can recover the SV using the FILTER_DATA macro and use it to
2768 * store private buffers and state information.
2769 *
2770 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2771 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2772 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2773 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2774 * private use must be set using malloc'd pointers.
2775 */
16d20bd9
AD
2776
2777SV *
864dbfa3 2778Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2779{
97aff369 2780 dVAR;
f4c556ac 2781 if (!funcp)
a0714e2c 2782 return NULL;
f4c556ac 2783
5486870f
DM
2784 if (!PL_parser)
2785 return NULL;
2786
3280af22
NIS
2787 if (!PL_rsfp_filters)
2788 PL_rsfp_filters = newAV();
16d20bd9 2789 if (!datasv)
561b68a9 2790 datasv = newSV(0);
862a34c6 2791 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2792 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2793 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2794 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2795 FPTR2DPTR(void *, IoANY(datasv)),
2796 SvPV_nolen(datasv)));
3280af22
NIS
2797 av_unshift(PL_rsfp_filters, 1);
2798 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2799 return(datasv);
2800}
4e553d73 2801
16d20bd9
AD
2802
2803/* Delete most recently added instance of this filter function. */
a0d0e21e 2804void
864dbfa3 2805Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2806{
97aff369 2807 dVAR;
e0c19803 2808 SV *datasv;
24801a4b 2809
33073adb 2810#ifdef DEBUGGING
55662e27
JH
2811 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2812 FPTR2DPTR(void*, funcp)));
33073adb 2813#endif
5486870f 2814 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2815 return;
2816 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2817 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2818 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2819 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2820 IoANY(datasv) = (void *)NULL;
3280af22 2821 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2822
16d20bd9
AD
2823 return;
2824 }
2825 /* we need to search for the correct entry and clear it */
cea2e8a9 2826 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2827}
2828
2829
1de9afcd
RGS
2830/* Invoke the idxth filter function for the current rsfp. */
2831/* maxlen 0 = read one text line */
16d20bd9 2832I32
864dbfa3 2833Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2834{
97aff369 2835 dVAR;
16d20bd9
AD
2836 filter_t funcp;
2837 SV *datasv = NULL;
f482118e
NC
2838 /* This API is bad. It should have been using unsigned int for maxlen.
2839 Not sure if we want to change the API, but if not we should sanity
2840 check the value here. */
39cd7a59
NC
2841 const unsigned int correct_length
2842 = maxlen < 0 ?
2843#ifdef PERL_MICRO
2844 0x7FFFFFFF
2845#else
2846 INT_MAX
2847#endif
2848 : maxlen;
e50aee73 2849
5486870f 2850 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2851 return -1;
1de9afcd 2852 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2853 /* Provide a default input filter to make life easy. */
2854 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2855 DEBUG_P(PerlIO_printf(Perl_debug_log,
2856 "filter_read %d: from rsfp\n", idx));
f482118e 2857 if (correct_length) {
16d20bd9
AD
2858 /* Want a block */
2859 int len ;
f54cb97a 2860 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2861
2862 /* ensure buf_sv is large enough */
f482118e
NC
2863 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2864 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2865 correct_length)) <= 0) {
3280af22 2866 if (PerlIO_error(PL_rsfp))
37120919
AD
2867 return -1; /* error */
2868 else
2869 return 0 ; /* end of file */
2870 }
16d20bd9
AD
2871 SvCUR_set(buf_sv, old_len + len) ;
2872 } else {
2873 /* Want a line */
3280af22
NIS
2874 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2875 if (PerlIO_error(PL_rsfp))
37120919
AD
2876 return -1; /* error */
2877 else
2878 return 0 ; /* end of file */
2879 }
16d20bd9
AD
2880 }
2881 return SvCUR(buf_sv);
2882 }
2883 /* Skip this filter slot if filter has been deleted */
1de9afcd 2884 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2885 DEBUG_P(PerlIO_printf(Perl_debug_log,
2886 "filter_read %d: skipped (filter deleted)\n",
2887 idx));
f482118e 2888 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2889 }
2890 /* Get function pointer hidden within datasv */
8141890a 2891 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2892 DEBUG_P(PerlIO_printf(Perl_debug_log,
2893 "filter_read %d: via function %p (%s)\n",
ca0270c4 2894 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2895 /* Call function. The function is expected to */
2896 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2897 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2898 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2899}
2900
76e3520e 2901STATIC char *
cea2e8a9 2902S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2903{
97aff369 2904 dVAR;
c39cd008 2905#ifdef PERL_CR_FILTER
3280af22 2906 if (!PL_rsfp_filters) {
c39cd008 2907 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2908 }
2909#endif
3280af22 2910 if (PL_rsfp_filters) {
55497cff 2911 if (!append)
2912 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2913 if (FILTER_READ(0, sv, 0) > 0)
2914 return ( SvPVX(sv) ) ;
2915 else
bd61b366 2916 return NULL ;
16d20bd9 2917 }
9d116dd7 2918 else
fd049845 2919 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2920}
2921
01ec43d0 2922STATIC HV *
7fc63493 2923S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2924{
97aff369 2925 dVAR;
def3634b
GS
2926 GV *gv;
2927
01ec43d0 2928 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2929 return PL_curstash;
2930
2931 if (len > 2 &&
2932 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2933 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2934 {
2935 return GvHV(gv); /* Foo:: */
def3634b
GS
2936 }
2937
2938 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2939 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2940 if (gv && GvCV(gv)) {
2941 SV * const sv = cv_const_sv(GvCV(gv));
2942 if (sv)
83003860 2943 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2944 }
2945
da51bb9b 2946 return gv_stashpv(pkgname, 0);
def3634b 2947}
a0d0e21e 2948
e3f73d4e
RGS
2949/*
2950 * S_readpipe_override
2951 * Check whether readpipe() is overriden, and generates the appropriate
2952 * optree, provided sublex_start() is called afterwards.
2953 */
2954STATIC void
1d51329b 2955S_readpipe_override(pTHX)
e3f73d4e
RGS
2956{
2957 GV **gvp;
2958 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2959 yylval.ival = OP_BACKTICK;
2960 if ((gv_readpipe
2961 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2962 ||
2963 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2964 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2965 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2966 {
2967 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2968 append_elem(OP_LIST,
2969 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2970 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2971 }
e3f73d4e
RGS
2972}
2973
5db06880
NC
2974#ifdef PERL_MAD
2975 /*
2976 * Perl_madlex
2977 * The intent of this yylex wrapper is to minimize the changes to the
2978 * tokener when we aren't interested in collecting madprops. It remains
2979 * to be seen how successful this strategy will be...
2980 */
2981
2982int
2983Perl_madlex(pTHX)
2984{
2985 int optype;
2986 char *s = PL_bufptr;
2987
cd81e915
NC
2988 /* make sure PL_thiswhite is initialized */
2989 PL_thiswhite = 0;
2990 PL_thismad = 0;
5db06880 2991
cd81e915 2992 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2993 if (PL_pending_ident)
2994 return S_pending_ident(aTHX);
2995
2996 /* previous token ate up our whitespace? */
cd81e915
NC
2997 if (!PL_lasttoke && PL_nextwhite) {
2998 PL_thiswhite = PL_nextwhite;
2999 PL_nextwhite = 0;
5db06880
NC
3000 }
3001
3002 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3003 PL_realtokenstart = -1;
3004 PL_thistoken = 0;
5db06880
NC
3005 optype = yylex();
3006 s = PL_bufptr;
cd81e915 3007 assert(PL_curforce < 0);
5db06880 3008
cd81e915
NC
3009 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3010 if (!PL_thistoken) {
3011 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3012 PL_thistoken = newSVpvs("");
5db06880 3013 else {
c35e046a 3014 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3015 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3016 }
3017 }
cd81e915
NC
3018 if (PL_thismad) /* install head */
3019 CURMAD('X', PL_thistoken);
5db06880
NC
3020 }
3021
3022 /* last whitespace of a sublex? */
cd81e915
NC
3023 if (optype == ')' && PL_endwhite) {
3024 CURMAD('X', PL_endwhite);
5db06880
NC
3025 }
3026
cd81e915 3027 if (!PL_thismad) {
5db06880
NC
3028
3029 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3030 if (!PL_thiswhite && !PL_endwhite && !optype) {
3031 sv_free(PL_thistoken);
3032 PL_thistoken = 0;
5db06880
NC
3033 return 0;
3034 }
3035
3036 /* put off final whitespace till peg */
3037 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3038 PL_nextwhite = PL_thiswhite;
3039 PL_thiswhite = 0;
5db06880 3040 }
cd81e915
NC
3041 else if (PL_thisopen) {
3042 CURMAD('q', PL_thisopen);
3043 if (PL_thistoken)
3044 sv_free(PL_thistoken);
3045 PL_thistoken = 0;
5db06880
NC
3046 }
3047 else {
3048 /* Store actual token text as madprop X */
cd81e915 3049 CURMAD('X', PL_thistoken);
5db06880
NC
3050 }
3051
cd81e915 3052 if (PL_thiswhite) {
5db06880 3053 /* add preceding whitespace as madprop _ */
cd81e915 3054 CURMAD('_', PL_thiswhite);
5db06880
NC
3055 }
3056
cd81e915 3057 if (PL_thisstuff) {
5db06880 3058 /* add quoted material as madprop = */
cd81e915 3059 CURMAD('=', PL_thisstuff);
5db06880
NC
3060 }
3061
cd81e915 3062 if (PL_thisclose) {
5db06880 3063 /* add terminating quote as madprop Q */
cd81e915 3064 CURMAD('Q', PL_thisclose);
5db06880
NC
3065 }
3066 }
3067
3068 /* special processing based on optype */
3069
3070 switch (optype) {
3071
3072 /* opval doesn't need a TOKEN since it can already store mp */
3073 case WORD:
3074 case METHOD:
3075 case FUNCMETH:
3076 case THING:
3077 case PMFUNC:
3078 case PRIVATEREF:
3079 case FUNC0SUB:
3080 case UNIOPSUB:
3081 case LSTOPSUB:
3082 if (yylval.opval)
cd81e915
NC
3083 append_madprops(PL_thismad, yylval.opval, 0);
3084 PL_thismad = 0;
5db06880
NC
3085 return optype;
3086
3087 /* fake EOF */
3088 case 0:
3089 optype = PEG;
cd81e915
NC
3090 if (PL_endwhite) {
3091 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3092 PL_endwhite = 0;
5db06880
NC
3093 }
3094 break;
3095
3096 case ']':
3097 case '}':
cd81e915 3098 if (PL_faketokens)
5db06880
NC
3099 break;
3100 /* remember any fake bracket that lexer is about to discard */
3101 if (PL_lex_brackets == 1 &&
3102 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3103 {
3104 s = PL_bufptr;
3105 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3106 s++;
3107 if (*s == '}') {
cd81e915
NC
3108 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3109 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3110 PL_thiswhite = 0;
5db06880
NC
3111 PL_bufptr = s - 1;
3112 break; /* don't bother looking for trailing comment */
3113 }
3114 else
3115 s = PL_bufptr;
3116 }
3117 if (optype == ']')
3118 break;
3119 /* FALLTHROUGH */
3120
3121 /* attach a trailing comment to its statement instead of next token */
3122 case ';':
cd81e915 3123 if (PL_faketokens)
5db06880
NC
3124 break;
3125 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3126 s = PL_bufptr;
3127 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3128 s++;
3129 if (*s == '\n' || *s == '#') {
3130 while (s < PL_bufend && *s != '\n')
3131 s++;
3132 if (s < PL_bufend)
3133 s++;
cd81e915
NC
3134 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3135 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3136 PL_thiswhite = 0;
5db06880
NC
3137 PL_bufptr = s;
3138 }
3139 }
3140 break;
3141
3142 /* pval */
3143 case LABEL:
3144 break;
3145
3146 /* ival */
3147 default:
3148 break;
3149
3150 }
3151
3152 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3153 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3154 PL_thismad = 0;
5db06880
NC
3155 return optype;
3156}
3157#endif
3158
468aa647 3159STATIC char *
cc6ed77d 3160S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3161 dVAR;
468aa647
RGS
3162 if (PL_expect != XSTATE)
3163 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3164 is_use ? "use" : "no"));
29595ff2 3165 s = SKIPSPACE1(s);
468aa647
RGS
3166 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3167 s = force_version(s, TRUE);
29595ff2 3168 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3169 start_force(PL_curforce);
9ded7720 3170 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3171 force_next(WORD);
3172 }
3173 else if (*s == 'v') {
3174 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3175 s = force_version(s, FALSE);
3176 }
3177 }
3178 else {
3179 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3180 s = force_version(s, FALSE);
3181 }
3182 yylval.ival = is_use;
3183 return s;
3184}
748a9306 3185#ifdef DEBUGGING
27da23d5 3186 static const char* const exp_name[] =
09bef843 3187 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3188 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3189 };
748a9306 3190#endif
463ee0b2 3191
02aa26ce
NT
3192/*
3193 yylex
3194
3195 Works out what to call the token just pulled out of the input
3196 stream. The yacc parser takes care of taking the ops we return and
3197 stitching them into a tree.
3198
3199 Returns:
3200 PRIVATEREF
3201
3202 Structure:
3203 if read an identifier
3204 if we're in a my declaration
3205 croak if they tried to say my($foo::bar)
3206 build the ops for a my() declaration
3207 if it's an access to a my() variable
3208 are we in a sort block?
3209 croak if my($a); $a <=> $b
3210 build ops for access to a my() variable
3211 if in a dq string, and they've said @foo and we can't find @foo
3212 croak
3213 build ops for a bareword
3214 if we already built the token before, use it.
3215*/
3216
20141f0e 3217
dba4d153
JH
3218#ifdef __SC__
3219#pragma segment Perl_yylex
3220#endif
dba4d153 3221int
dba4d153 3222Perl_yylex(pTHX)
20141f0e 3223{
97aff369 3224 dVAR;
3afc138a 3225 register char *s = PL_bufptr;
378cc40b 3226 register char *d;
463ee0b2 3227 STRLEN len;
aa7440fb 3228 bool bof = FALSE;
a687059c 3229
10edeb5d
JH
3230 /* orig_keyword, gvp, and gv are initialized here because
3231 * jump to the label just_a_word_zero can bypass their
3232 * initialization later. */
3233 I32 orig_keyword = 0;
3234 GV *gv = NULL;
3235 GV **gvp = NULL;
3236
bbf60fe6 3237 DEBUG_T( {
396482e1 3238 SV* tmp = newSVpvs("");
b6007c36
DM
3239 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3240 (IV)CopLINE(PL_curcop),
3241 lex_state_names[PL_lex_state],
3242 exp_name[PL_expect],
3243 pv_display(tmp, s, strlen(s), 0, 60));
3244 SvREFCNT_dec(tmp);
bbf60fe6 3245 } );
02aa26ce 3246 /* check if there's an identifier for us to look at */
ba979b31 3247 if (PL_pending_ident)
bbf60fe6 3248 return REPORT(S_pending_ident(aTHX));
bbce6d69 3249
02aa26ce
NT
3250 /* no identifier pending identification */
3251
3280af22 3252 switch (PL_lex_state) {
79072805
LW
3253#ifdef COMMENTARY
3254 case LEX_NORMAL: /* Some compilers will produce faster */
3255 case LEX_INTERPNORMAL: /* code if we comment these out. */
3256 break;
3257#endif
3258
09bef843 3259 /* when we've already built the next token, just pull it out of the queue */
79072805 3260 case LEX_KNOWNEXT:
5db06880
NC
3261#ifdef PERL_MAD
3262 PL_lasttoke--;
3263 yylval = PL_nexttoke[PL_lasttoke].next_val;
3264 if (PL_madskills) {
cd81e915 3265 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3266 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3267 if (PL_thismad && PL_thismad->mad_key == '_') {
3268 PL_thiswhite = (SV*)PL_thismad->mad_val;
3269 PL_thismad->mad_val = 0;
3270 mad_free(PL_thismad);
3271 PL_thismad = 0;
5db06880
NC
3272 }
3273 }
3274 if (!PL_lasttoke) {
3275 PL_lex_state = PL_lex_defer;
3276 PL_expect = PL_lex_expect;
3277 PL_lex_defer = LEX_NORMAL;
3278 if (!PL_nexttoke[PL_lasttoke].next_type)
3279 return yylex();
3280 }
3281#else
3280af22 3282 PL_nexttoke--;
5db06880 3283 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3284 if (!PL_nexttoke) {
3285 PL_lex_state = PL_lex_defer;
3286 PL_expect = PL_lex_expect;
3287 PL_lex_defer = LEX_NORMAL;
463ee0b2 3288 }
5db06880
NC
3289#endif
3290#ifdef PERL_MAD
3291 /* FIXME - can these be merged? */
3292 return(PL_nexttoke[PL_lasttoke].next_type);
3293#else
bbf60fe6 3294 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3295#endif
79072805 3296
02aa26ce 3297 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3298 when we get here, PL_bufptr is at the \
02aa26ce 3299 */
79072805
LW
3300 case LEX_INTERPCASEMOD:
3301#ifdef DEBUGGING
3280af22 3302 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3303 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3304#endif
02aa26ce 3305 /* handle \E or end of string */
3280af22 3306 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3307 /* if at a \E */
3280af22 3308 if (PL_lex_casemods) {
f54cb97a 3309 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3310 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3311
3792a11b
NC
3312 if (PL_bufptr != PL_bufend
3313 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3314 PL_bufptr += 2;
3315 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3316#ifdef PERL_MAD
3317 if (PL_madskills)
6b29d1f5 3318 PL_thistoken = newSVpvs("\\E");
5db06880 3319#endif
a0d0e21e 3320 }
bbf60fe6 3321 return REPORT(')');
79072805 3322 }
5db06880
NC
3323#ifdef PERL_MAD
3324 while (PL_bufptr != PL_bufend &&
3325 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3326 if (!PL_thiswhite)
6b29d1f5 3327 PL_thiswhite = newSVpvs("");
cd81e915 3328 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3329 PL_bufptr += 2;
3330 }
3331#else
3280af22
NIS
3332 if (PL_bufptr != PL_bufend)
3333 PL_bufptr += 2;
5db06880 3334#endif
3280af22 3335 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3336 return yylex();
79072805
LW
3337 }
3338 else {
607df283 3339 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3340 "### Saw case modifier\n"); });
3280af22 3341 s = PL_bufptr + 1;
6e909404 3342 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3343#ifdef PERL_MAD
cd81e915 3344 if (!PL_thiswhite)
6b29d1f5 3345 PL_thiswhite = newSVpvs("");
cd81e915 3346 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3347#endif
89122651 3348 PL_bufptr = s + 3;
6e909404
JH
3349 PL_lex_state = LEX_INTERPCONCAT;
3350 return yylex();
a0d0e21e 3351 }
6e909404 3352 else {
90771dc0 3353 I32 tmp;
5db06880
NC
3354 if (!PL_madskills) /* when just compiling don't need correct */
3355 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3356 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3357 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3358 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3359 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3360 return REPORT(')');
6e909404
JH
3361 }
3362 if (PL_lex_casemods > 10)
3363 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3364 PL_lex_casestack[PL_lex_casemods++] = *s;
3365 PL_lex_casestack[PL_lex_casemods] = '\0';
3366 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3367 start_force(PL_curforce);
9ded7720 3368 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3369 force_next('(');
cd81e915 3370 start_force(PL_curforce);
6e909404 3371 if (*s == 'l')
9ded7720 3372 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3373 else if (*s == 'u')
9ded7720 3374 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3375 else if (*s == 'L')
9ded7720 3376 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3377 else if (*s == 'U')
9ded7720 3378 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3379 else if (*s == 'Q')
9ded7720 3380 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3381 else
3382 Perl_croak(aTHX_ "panic: yylex");
5db06880 3383 if (PL_madskills) {
a5849ce5
NC
3384 SV* const tmpsv = newSVpvs("\\ ");
3385 /* replace the space with the character we want to escape
3386 */
3387 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3388 curmad('_', tmpsv);
3389 }
6e909404 3390 PL_bufptr = s + 1;
a0d0e21e 3391 }
79072805 3392 force_next(FUNC);
3280af22
NIS
3393 if (PL_lex_starts) {
3394 s = PL_bufptr;
3395 PL_lex_starts = 0;
5db06880
NC
3396#ifdef PERL_MAD
3397 if (PL_madskills) {
cd81e915
NC
3398 if (PL_thistoken)
3399 sv_free(PL_thistoken);
6b29d1f5 3400 PL_thistoken = newSVpvs("");
5db06880
NC
3401 }
3402#endif
131b3ad0
DM
3403 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3404 if (PL_lex_casemods == 1 && PL_lex_inpat)
3405 OPERATOR(',');
3406 else
3407 Aop(OP_CONCAT);
79072805
LW
3408 }
3409 else
cea2e8a9 3410 return yylex();
79072805
LW
3411 }
3412
55497cff 3413 case LEX_INTERPPUSH:
bbf60fe6 3414 return REPORT(sublex_push());
55497cff 3415
79072805 3416 case LEX_INTERPSTART:
3280af22 3417 if (PL_bufptr == PL_bufend)
bbf60fe6 3418 return REPORT(sublex_done());
607df283 3419 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3420 "### Interpolated variable\n"); });
3280af22
NIS
3421 PL_expect = XTERM;
3422 PL_lex_dojoin = (*PL_bufptr == '@');
3423 PL_lex_state = LEX_INTERPNORMAL;
3424 if (PL_lex_dojoin) {
cd81e915 3425 start_force(PL_curforce);
9ded7720 3426 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3427 force_next(',');
cd81e915 3428 start_force(PL_curforce);
a0d0e21e 3429 force_ident("\"", '$');
cd81e915 3430 start_force(PL_curforce);
9ded7720 3431 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3432 force_next('$');
cd81e915 3433 start_force(PL_curforce);
9ded7720 3434 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3435 force_next('(');
cd81e915 3436 start_force(PL_curforce);
9ded7720 3437 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3438 force_next(FUNC);
3439 }
3280af22
NIS
3440 if (PL_lex_starts++) {
3441 s = PL_bufptr;
5db06880
NC
3442#ifdef PERL_MAD
3443 if (PL_madskills) {
cd81e915
NC
3444 if (PL_thistoken)
3445 sv_free(PL_thistoken);
6b29d1f5 3446 PL_thistoken = newSVpvs("");
5db06880
NC
3447 }
3448#endif
131b3ad0
DM
3449 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3450 if (!PL_lex_casemods && PL_lex_inpat)
3451 OPERATOR(',');
3452 else
3453 Aop(OP_CONCAT);
79072805 3454 }
cea2e8a9 3455 return yylex();
79072805
LW
3456
3457 case LEX_INTERPENDMAYBE:
3280af22
NIS
3458 if (intuit_more(PL_bufptr)) {
3459 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3460 break;
3461 }
3462 /* FALL THROUGH */
3463
3464 case LEX_INTERPEND:
3280af22
NIS
3465 if (PL_lex_dojoin) {
3466 PL_lex_dojoin = FALSE;
3467 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3468#ifdef PERL_MAD
3469 if (PL_madskills) {
cd81e915
NC
3470 if (PL_thistoken)
3471 sv_free(PL_thistoken);
6b29d1f5 3472 PL_thistoken = newSVpvs("");
5db06880
NC
3473 }
3474#endif
bbf60fe6 3475 return REPORT(')');
79072805 3476 }
43a16006 3477 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3478 && SvEVALED(PL_lex_repl))
43a16006 3479 {
e9fa98b2 3480 if (PL_bufptr != PL_bufend)
cea2e8a9 3481 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3482 PL_lex_repl = NULL;
e9fa98b2 3483 }
79072805
LW
3484 /* FALLTHROUGH */
3485 case LEX_INTERPCONCAT:
3486#ifdef DEBUGGING
3280af22 3487 if (PL_lex_brackets)
cea2e8a9 3488 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3489#endif
3280af22 3490 if (PL_bufptr == PL_bufend)
bbf60fe6 3491 return REPORT(sublex_done());
79072805 3492
3280af22
NIS
3493 if (SvIVX(PL_linestr) == '\'') {
3494 SV *sv = newSVsv(PL_linestr);
3495 if (!PL_lex_inpat)
76e3520e 3496 sv = tokeq(sv);
3280af22 3497 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3498 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3499 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3500 s = PL_bufend;
79072805
LW
3501 }
3502 else {
3280af22 3503 s = scan_const(PL_bufptr);
79072805 3504 if (*s == '\\')
3280af22 3505 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3506 else
3280af22 3507 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3508 }
3509
3280af22 3510 if (s != PL_bufptr) {
cd81e915 3511 start_force(PL_curforce);
5db06880
NC
3512 if (PL_madskills) {
3513 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3514 }
9ded7720 3515 NEXTVAL_NEXTTOKE = yylval;
3280af22 3516 PL_expect = XTERM;
79072805 3517 force_next(THING);
131b3ad0 3518 if (PL_lex_starts++) {
5db06880
NC
3519#ifdef PERL_MAD
3520 if (PL_madskills) {
cd81e915
NC
3521 if (PL_thistoken)
3522 sv_free(PL_thistoken);
6b29d1f5 3523 PL_thistoken = newSVpvs("");
5db06880
NC
3524 }
3525#endif
131b3ad0
DM
3526 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3527 if (!PL_lex_casemods && PL_lex_inpat)
3528 OPERATOR(',');
3529 else
3530 Aop(OP_CONCAT);
3531 }
79072805 3532 else {
3280af22 3533 PL_bufptr = s;
cea2e8a9 3534 return yylex();
79072805
LW
3535 }
3536 }
3537
cea2e8a9 3538 return yylex();
a0d0e21e 3539 case LEX_FORMLINE:
3280af22
NIS
3540 PL_lex_state = LEX_NORMAL;
3541 s = scan_formline(PL_bufptr);
3542 if (!PL_lex_formbrack)
a0d0e21e
LW
3543 goto rightbracket;
3544 OPERATOR(';');
79072805
LW
3545 }
3546