This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Work around to get Unicode tests passing.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
199e78b7
DM
31/* XXX temporary backwards compatibility */
32#define PL_lex_brackets (PL_parser->lex_brackets)
33#define PL_lex_brackstack (PL_parser->lex_brackstack)
34#define PL_lex_casemods (PL_parser->lex_casemods)
35#define PL_lex_casestack (PL_parser->lex_casestack)
36#define PL_lex_defer (PL_parser->lex_defer)
37#define PL_lex_dojoin (PL_parser->lex_dojoin)
38#define PL_lex_expect (PL_parser->lex_expect)
39#define PL_lex_formbrack (PL_parser->lex_formbrack)
40#define PL_lex_inpat (PL_parser->lex_inpat)
41#define PL_lex_inwhat (PL_parser->lex_inwhat)
42#define PL_lex_op (PL_parser->lex_op)
43#define PL_lex_repl (PL_parser->lex_repl)
44#define PL_lex_starts (PL_parser->lex_starts)
45#define PL_lex_stuff (PL_parser->lex_stuff)
46#define PL_multi_start (PL_parser->multi_start)
47#define PL_multi_open (PL_parser->multi_open)
48#define PL_multi_close (PL_parser->multi_close)
49#define PL_pending_ident (PL_parser->pending_ident)
50#define PL_preambled (PL_parser->preambled)
51#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 52#define PL_linestr (PL_parser->linestr)
c2598295
DM
53#define PL_expect (PL_parser->expect)
54#define PL_copline (PL_parser->copline)
f06b5848
DM
55#define PL_bufptr (PL_parser->bufptr)
56#define PL_oldbufptr (PL_parser->oldbufptr)
57#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
58#define PL_linestart (PL_parser->linestart)
59#define PL_bufend (PL_parser->bufend)
60#define PL_last_uni (PL_parser->last_uni)
61#define PL_last_lop (PL_parser->last_lop)
62#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 63#define PL_lex_state (PL_parser->lex_state)
199e78b7
DM
64
65#ifdef PERL_MAD
66# define PL_endwhite (PL_parser->endwhite)
67# define PL_faketokens (PL_parser->faketokens)
68# define PL_lasttoke (PL_parser->lasttoke)
69# define PL_nextwhite (PL_parser->nextwhite)
70# define PL_realtokenstart (PL_parser->realtokenstart)
71# define PL_skipwhite (PL_parser->skipwhite)
72# define PL_thisclose (PL_parser->thisclose)
73# define PL_thismad (PL_parser->thismad)
74# define PL_thisopen (PL_parser->thisopen)
75# define PL_thisstuff (PL_parser->thisstuff)
76# define PL_thistoken (PL_parser->thistoken)
77# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
78# define PL_thiswhite (PL_parser->thiswhite)
79# define PL_nexttoke (PL_parser->nexttoke)
80# define PL_curforce (PL_parser->curforce)
81#else
82# define PL_nexttoke (PL_parser->nexttoke)
83# define PL_nexttype (PL_parser->nexttype)
84# define PL_nextval (PL_parser->nextval)
199e78b7
DM
85#endif
86
3cbf51f5
DM
87static int
88S_pending_ident(pTHX);
199e78b7 89
0bd48802 90static const char ident_too_long[] = "Identifier too long";
c445ea15 91static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 92
acfe0abc 93static void restore_rsfp(pTHX_ void *f);
6e3aabd6 94#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
95static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
96static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 97#endif
51371543 98
29595ff2 99#ifdef PERL_MAD
29595ff2 100# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 101# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 102#else
5db06880 103# define CURMAD(slot,sv)
9ded7720 104# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
105#endif
106
9059aa12
LW
107#define XFAKEBRACK 128
108#define XENUMMASK 127
109
39e02b42
JH
110#ifdef USE_UTF8_SCRIPTS
111# define UTF (!IN_BYTES)
2b9d42f0 112#else
746b446a 113# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 114#endif
a0ed51b3 115
61f0cdd9 116/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
117 * 1999-02-27 mjd-perl-patch@plover.com */
118#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
119
bf4acbe4
GS
120/* On MacOS, respect nonbreaking spaces */
121#ifdef MACOS_TRADITIONAL
122#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
123#else
124#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
125#endif
126
ffb4593c
NT
127/* LEX_* are values for PL_lex_state, the state of the lexer.
128 * They are arranged oddly so that the guard on the switch statement
79072805
LW
129 * can get by with a single comparison (if the compiler is smart enough).
130 */
131
fb73857a 132/* #define LEX_NOTPARSING 11 is done in perl.h. */
133
b6007c36
DM
134#define LEX_NORMAL 10 /* normal code (ie not within "...") */
135#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
136#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
137#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
138#define LEX_INTERPSTART 6 /* expecting the start of a $var */
139
140 /* at end of code, eg "$x" followed by: */
141#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
142#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
143
144#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
145 string or after \E, $foo, etc */
146#define LEX_INTERPCONST 2 /* NOT USED */
147#define LEX_FORMLINE 1 /* expecting a format line */
148#define LEX_KNOWNEXT 0 /* next token known; just return it */
149
79072805 150
bbf60fe6 151#ifdef DEBUGGING
27da23d5 152static const char* const lex_state_names[] = {
bbf60fe6
DM
153 "KNOWNEXT",
154 "FORMLINE",
155 "INTERPCONST",
156 "INTERPCONCAT",
157 "INTERPENDMAYBE",
158 "INTERPEND",
159 "INTERPSTART",
160 "INTERPPUSH",
161 "INTERPCASEMOD",
162 "INTERPNORMAL",
163 "NORMAL"
164};
165#endif
166
79072805
LW
167#ifdef ff_next
168#undef ff_next
d48672a2
LW
169#endif
170
79072805 171#include "keywords.h"
fe14fcc3 172
ffb4593c
NT
173/* CLINE is a macro that ensures PL_copline has a sane value */
174
ae986130
LW
175#ifdef CLINE
176#undef CLINE
177#endif
57843af0 178#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 179
5db06880 180#ifdef PERL_MAD
29595ff2
NC
181# define SKIPSPACE0(s) skipspace0(s)
182# define SKIPSPACE1(s) skipspace1(s)
183# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
184# define PEEKSPACE(s) skipspace2(s,0)
185#else
186# define SKIPSPACE0(s) skipspace(s)
187# define SKIPSPACE1(s) skipspace(s)
188# define SKIPSPACE2(s,tsv) skipspace(s)
189# define PEEKSPACE(s) skipspace(s)
190#endif
191
ffb4593c
NT
192/*
193 * Convenience functions to return different tokens and prime the
9cbb5ea2 194 * lexer for the next token. They all take an argument.
ffb4593c
NT
195 *
196 * TOKEN : generic token (used for '(', DOLSHARP, etc)
197 * OPERATOR : generic operator
198 * AOPERATOR : assignment operator
199 * PREBLOCK : beginning the block after an if, while, foreach, ...
200 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
201 * PREREF : *EXPR where EXPR is not a simple identifier
202 * TERM : expression term
203 * LOOPX : loop exiting command (goto, last, dump, etc)
204 * FTST : file test operator
205 * FUN0 : zero-argument function
2d2e263d 206 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
207 * BOop : bitwise or or xor
208 * BAop : bitwise and
209 * SHop : shift operator
210 * PWop : power operator
9cbb5ea2 211 * PMop : pattern-matching operator
ffb4593c
NT
212 * Aop : addition-level operator
213 * Mop : multiplication-level operator
214 * Eop : equality-testing operator
e5edeb50 215 * Rop : relational operator <= != gt
ffb4593c
NT
216 *
217 * Also see LOP and lop() below.
218 */
219
998054bd 220#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 221# define REPORT(retval) tokereport((I32)retval)
998054bd 222#else
bbf60fe6 223# define REPORT(retval) (retval)
998054bd
SC
224#endif
225
bbf60fe6
DM
226#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
227#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
228#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
229#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
230#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
231#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
232#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
233#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
234#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
237#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
238#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
239#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
240#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
241#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
242#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
243#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
244#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
245#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 246
a687059c
LW
247/* This bit of chicanery makes a unary function followed by
248 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
249 * The UNIDOR macro is for unary functions that can be followed by the //
250 * operator (such as C<shift // 0>).
a687059c 251 */
376fcdbf
AL
252#define UNI2(f,x) { \
253 yylval.ival = f; \
254 PL_expect = x; \
255 PL_bufptr = s; \
256 PL_last_uni = PL_oldbufptr; \
257 PL_last_lop_op = f; \
258 if (*s == '(') \
259 return REPORT( (int)FUNC1 ); \
29595ff2 260 s = PEEKSPACE(s); \
376fcdbf
AL
261 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
262 }
6f33ba73
RGS
263#define UNI(f) UNI2(f,XTERM)
264#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 265
376fcdbf
AL
266#define UNIBRACK(f) { \
267 yylval.ival = f; \
268 PL_bufptr = s; \
269 PL_last_uni = PL_oldbufptr; \
270 if (*s == '(') \
271 return REPORT( (int)FUNC1 ); \
29595ff2 272 s = PEEKSPACE(s); \
376fcdbf
AL
273 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
274 }
79072805 275
9f68db38 276/* grandfather return to old style */
3280af22 277#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 278
8fa7f367
JH
279#ifdef DEBUGGING
280
bbf60fe6
DM
281/* how to interpret the yylval associated with the token */
282enum token_type {
283 TOKENTYPE_NONE,
284 TOKENTYPE_IVAL,
285 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
286 TOKENTYPE_PVAL,
287 TOKENTYPE_OPVAL,
288 TOKENTYPE_GVVAL
289};
290
6d4a66ac
NC
291static struct debug_tokens {
292 const int token;
293 enum token_type type;
294 const char *name;
295} const debug_tokens[] =
9041c2e3 296{
bbf60fe6
DM
297 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
298 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
299 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
300 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
301 { ARROW, TOKENTYPE_NONE, "ARROW" },
302 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
303 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
304 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
305 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
306 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 307 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
308 { DO, TOKENTYPE_NONE, "DO" },
309 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
310 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
311 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
312 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
313 { ELSE, TOKENTYPE_NONE, "ELSE" },
314 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
315 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
316 { FOR, TOKENTYPE_IVAL, "FOR" },
317 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
318 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
319 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
320 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
321 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
322 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 323 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
324 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
325 { IF, TOKENTYPE_IVAL, "IF" },
326 { LABEL, TOKENTYPE_PVAL, "LABEL" },
327 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
328 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
329 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
330 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
331 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
332 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
333 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
334 { MY, TOKENTYPE_IVAL, "MY" },
335 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
336 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
337 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
338 { OROP, TOKENTYPE_IVAL, "OROP" },
339 { OROR, TOKENTYPE_NONE, "OROR" },
340 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
341 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
342 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
343 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
344 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
345 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
346 { PREINC, TOKENTYPE_NONE, "PREINC" },
347 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
348 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
349 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
350 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
351 { SUB, TOKENTYPE_NONE, "SUB" },
352 { THING, TOKENTYPE_OPVAL, "THING" },
353 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
354 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
355 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
356 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
357 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
358 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 359 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
360 { WHILE, TOKENTYPE_IVAL, "WHILE" },
361 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 362 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
363};
364
365/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 366
bbf60fe6 367STATIC int
f5bd084c 368S_tokereport(pTHX_ I32 rv)
bbf60fe6 369{
97aff369 370 dVAR;
bbf60fe6 371 if (DEBUG_T_TEST) {
bd61b366 372 const char *name = NULL;
bbf60fe6 373 enum token_type type = TOKENTYPE_NONE;
f54cb97a 374 const struct debug_tokens *p;
396482e1 375 SV* const report = newSVpvs("<== ");
bbf60fe6 376
f54cb97a 377 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
378 if (p->token == (int)rv) {
379 name = p->name;
380 type = p->type;
381 break;
382 }
383 }
384 if (name)
54667de8 385 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
386 else if ((char)rv > ' ' && (char)rv < '~')
387 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
388 else if (!rv)
396482e1 389 sv_catpvs(report, "EOF");
bbf60fe6
DM
390 else
391 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
392 switch (type) {
393 case TOKENTYPE_NONE:
394 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
395 break;
396 case TOKENTYPE_IVAL:
e4584336 397 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
398 break;
399 case TOKENTYPE_OPNUM:
400 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
401 PL_op_name[yylval.ival]);
402 break;
403 case TOKENTYPE_PVAL:
404 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
405 break;
406 case TOKENTYPE_OPVAL:
b6007c36 407 if (yylval.opval) {
401441c0 408 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 409 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
410 if (yylval.opval->op_type == OP_CONST) {
411 Perl_sv_catpvf(aTHX_ report, " %s",
412 SvPEEK(cSVOPx_sv(yylval.opval)));
413 }
414
415 }
401441c0 416 else
396482e1 417 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
418 break;
419 }
b6007c36 420 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
421 };
422 return (int)rv;
998054bd
SC
423}
424
b6007c36
DM
425
426/* print the buffer with suitable escapes */
427
428STATIC void
429S_printbuf(pTHX_ const char* fmt, const char* s)
430{
396482e1 431 SV* const tmp = newSVpvs("");
b6007c36
DM
432 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
433 SvREFCNT_dec(tmp);
434}
435
8fa7f367
JH
436#endif
437
ffb4593c
NT
438/*
439 * S_ao
440 *
c963b151
BD
441 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
442 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
443 */
444
76e3520e 445STATIC int
cea2e8a9 446S_ao(pTHX_ int toketype)
a0d0e21e 447{
97aff369 448 dVAR;
3280af22
NIS
449 if (*PL_bufptr == '=') {
450 PL_bufptr++;
a0d0e21e
LW
451 if (toketype == ANDAND)
452 yylval.ival = OP_ANDASSIGN;
453 else if (toketype == OROR)
454 yylval.ival = OP_ORASSIGN;
c963b151
BD
455 else if (toketype == DORDOR)
456 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
457 toketype = ASSIGNOP;
458 }
459 return toketype;
460}
461
ffb4593c
NT
462/*
463 * S_no_op
464 * When Perl expects an operator and finds something else, no_op
465 * prints the warning. It always prints "<something> found where
466 * operator expected. It prints "Missing semicolon on previous line?"
467 * if the surprise occurs at the start of the line. "do you need to
468 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
469 * where the compiler doesn't know if foo is a method call or a function.
470 * It prints "Missing operator before end of line" if there's nothing
471 * after the missing operator, or "... before <...>" if there is something
472 * after the missing operator.
473 */
474
76e3520e 475STATIC void
bfed75c6 476S_no_op(pTHX_ const char *what, char *s)
463ee0b2 477{
97aff369 478 dVAR;
9d4ba2ae
AL
479 char * const oldbp = PL_bufptr;
480 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 481
1189a94a
GS
482 if (!s)
483 s = oldbp;
07c798fb 484 else
1189a94a 485 PL_bufptr = s;
cea2e8a9 486 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
487 if (ckWARN_d(WARN_SYNTAX)) {
488 if (is_first)
489 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
490 "\t(Missing semicolon on previous line?)\n");
491 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 492 const char *t;
c35e046a
AL
493 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
494 NOOP;
56da5a46
RGS
495 if (t < PL_bufptr && isSPACE(*t))
496 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
497 "\t(Do you need to predeclare %.*s?)\n",
551405c4 498 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
499 }
500 else {
501 assert(s >= oldbp);
502 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 503 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 504 }
07c798fb 505 }
3280af22 506 PL_bufptr = oldbp;
8990e307
LW
507}
508
ffb4593c
NT
509/*
510 * S_missingterm
511 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 512 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
513 * If we're in a delimited string and the delimiter is a control
514 * character, it's reformatted into a two-char sequence like ^C.
515 * This is fatal.
516 */
517
76e3520e 518STATIC void
cea2e8a9 519S_missingterm(pTHX_ char *s)
8990e307 520{
97aff369 521 dVAR;
8990e307
LW
522 char tmpbuf[3];
523 char q;
524 if (s) {
9d4ba2ae 525 char * const nl = strrchr(s,'\n');
d2719217 526 if (nl)
8990e307
LW
527 *nl = '\0';
528 }
9d116dd7
JH
529 else if (
530#ifdef EBCDIC
531 iscntrl(PL_multi_close)
532#else
533 PL_multi_close < 32 || PL_multi_close == 127
534#endif
535 ) {
8990e307 536 *tmpbuf = '^';
585ec06d 537 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
538 tmpbuf[2] = '\0';
539 s = tmpbuf;
540 }
541 else {
eb160463 542 *tmpbuf = (char)PL_multi_close;
8990e307
LW
543 tmpbuf[1] = '\0';
544 s = tmpbuf;
545 }
546 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 547 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 548}
79072805 549
ef89dcc3 550#define FEATURE_IS_ENABLED(name) \
0d863452 551 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 552 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
553/*
554 * S_feature_is_enabled
555 * Check whether the named feature is enabled.
556 */
557STATIC bool
d4c19fe8 558S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 559{
97aff369 560 dVAR;
0d863452 561 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 562 char he_name[32] = "feature_";
6fca0082 563 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 564
7b9ef140 565 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
566}
567
ffb4593c
NT
568/*
569 * Perl_deprecate
ffb4593c
NT
570 */
571
79072805 572void
bfed75c6 573Perl_deprecate(pTHX_ const char *s)
a0d0e21e 574{
599cee73 575 if (ckWARN(WARN_DEPRECATED))
9014280d 576 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
577}
578
12bcd1a6 579void
bfed75c6 580Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
581{
582 /* This function should NOT be called for any new deprecated warnings */
583 /* Use Perl_deprecate instead */
584 /* */
585 /* It is here to maintain backward compatibility with the pre-5.8 */
586 /* warnings category hierarchy. The "deprecated" category used to */
587 /* live under the "syntax" category. It is now a top-level category */
588 /* in its own right. */
589
590 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 591 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
592 "Use of %s is deprecated", s);
593}
594
ffb4593c 595/*
9cbb5ea2
GS
596 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
597 * utf16-to-utf8-reversed.
ffb4593c
NT
598 */
599
c39cd008
GS
600#ifdef PERL_CR_FILTER
601static void
602strip_return(SV *sv)
603{
95a20fc0 604 register const char *s = SvPVX_const(sv);
9d4ba2ae 605 register const char * const e = s + SvCUR(sv);
c39cd008
GS
606 /* outer loop optimized to do nothing if there are no CR-LFs */
607 while (s < e) {
608 if (*s++ == '\r' && *s == '\n') {
609 /* hit a CR-LF, need to copy the rest */
610 register char *d = s - 1;
611 *d++ = *s++;
612 while (s < e) {
613 if (*s == '\r' && s[1] == '\n')
614 s++;
615 *d++ = *s++;
616 }
617 SvCUR(sv) -= s - d;
618 return;
619 }
620 }
621}
a868473f 622
76e3520e 623STATIC I32
c39cd008 624S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 625{
f54cb97a 626 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
627 if (count > 0 && !maxlen)
628 strip_return(sv);
629 return count;
a868473f
NIS
630}
631#endif
632
199e78b7
DM
633
634
ffb4593c
NT
635/*
636 * Perl_lex_start
e3abe207 637 * Create a parser object and initialise its parser and lexer fields
ffb4593c
NT
638 */
639
a0d0e21e 640void
864dbfa3 641Perl_lex_start(pTHX_ SV *line)
79072805 642{
97aff369 643 dVAR;
6ef55633 644 const char *s = NULL;
8990e307 645 STRLEN len;
acdf0a21
DM
646 yy_parser *parser;
647
648 /* create and initialise a parser */
649
199e78b7 650 Newxz(parser, 1, yy_parser);
acdf0a21
DM
651 parser->old_parser = PL_parser;
652 PL_parser = parser;
653
654 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
655 parser->ps = parser->stack;
656 parser->stack_size = YYINITDEPTH;
657
658 parser->stack->state = 0;
659 parser->yyerrstatus = 0;
660 parser->yychar = YYEMPTY; /* Cause a token to be read. */
661
e3abe207
DM
662 /* on scope exit, free this parser and restore any outer one */
663 SAVEPARSER(parser);
664
acdf0a21 665 /* initialise lexer state */
8990e307 666
57843af0 667 SAVECOPLINE(PL_curcop);
c76ac1ee 668 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22 669
fb205e7a
DM
670#ifdef PERL_MAD
671 parser->curforce = -1;
672#else
673 parser->nexttoke = 0;
674#endif
c2598295 675 parser->copline = NOLINE;
3280af22 676 PL_lex_state = LEX_NORMAL;
c2598295 677 parser->expect = XSTATE;
199e78b7
DM
678 Newx(parser->lex_brackstack, 120, char);
679 Newx(parser->lex_casestack, 12, char);
680 *parser->lex_casestack = '\0';
02b34bbe 681
10efb74f
NC
682 if (line) {
683 s = SvPV_const(line, len);
684 } else {
685 len = 0;
686 }
bdc0bf6f 687
10efb74f 688 if (!len) {
bdc0bf6f 689 parser->linestr = newSVpvs("\n;");
10efb74f 690 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 691 parser->linestr = newSVsv(line);
10efb74f 692 if (s[len-1] != ';')
bdc0bf6f 693 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
694 } else {
695 SvTEMP_off(line);
696 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 697 parser->linestr = line;
8990e307 698 }
f06b5848
DM
699 parser->oldoldbufptr =
700 parser->oldbufptr =
701 parser->bufptr =
702 parser->linestart = SvPVX(parser->linestr);
703 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
704 parser->last_lop = parser->last_uni = NULL;
3280af22 705 PL_rsfp = 0;
79072805 706}
a687059c 707
e3abe207
DM
708
709/* delete a parser object */
710
711void
712Perl_parser_free(pTHX_ const yy_parser *parser)
713{
bdc0bf6f
DM
714 SvREFCNT_dec(parser->linestr);
715
e3abe207
DM
716 Safefree(parser->stack);
717 Safefree(parser->lex_brackstack);
718 Safefree(parser->lex_casestack);
719 PL_parser = parser->old_parser;
720 Safefree(parser);
721}
722
723
ffb4593c
NT
724/*
725 * Perl_lex_end
9cbb5ea2
GS
726 * Finalizer for lexing operations. Must be called when the parser is
727 * done with the lexer.
ffb4593c
NT
728 */
729
463ee0b2 730void
864dbfa3 731Perl_lex_end(pTHX)
463ee0b2 732{
97aff369 733 dVAR;
3280af22 734 PL_doextract = FALSE;
463ee0b2
LW
735}
736
ffb4593c
NT
737/*
738 * S_incline
739 * This subroutine has nothing to do with tilting, whether at windmills
740 * or pinball tables. Its name is short for "increment line". It
57843af0 741 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 742 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
743 * # line 500 "foo.pm"
744 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
745 */
746
76e3520e 747STATIC void
d9095cec 748S_incline(pTHX_ const char *s)
463ee0b2 749{
97aff369 750 dVAR;
d9095cec
NC
751 const char *t;
752 const char *n;
753 const char *e;
463ee0b2 754
57843af0 755 CopLINE_inc(PL_curcop);
463ee0b2
LW
756 if (*s++ != '#')
757 return;
d4c19fe8
AL
758 while (SPACE_OR_TAB(*s))
759 s++;
73659bf1
GS
760 if (strnEQ(s, "line", 4))
761 s += 4;
762 else
763 return;
084592ab 764 if (SPACE_OR_TAB(*s))
73659bf1 765 s++;
4e553d73 766 else
73659bf1 767 return;
d4c19fe8
AL
768 while (SPACE_OR_TAB(*s))
769 s++;
463ee0b2
LW
770 if (!isDIGIT(*s))
771 return;
d4c19fe8 772
463ee0b2
LW
773 n = s;
774 while (isDIGIT(*s))
775 s++;
bf4acbe4 776 while (SPACE_OR_TAB(*s))
463ee0b2 777 s++;
73659bf1 778 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 779 s++;
73659bf1
GS
780 e = t + 1;
781 }
463ee0b2 782 else {
c35e046a
AL
783 t = s;
784 while (!isSPACE(*t))
785 t++;
73659bf1 786 e = t;
463ee0b2 787 }
bf4acbe4 788 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
789 e++;
790 if (*e != '\n' && *e != '\0')
791 return; /* false alarm */
792
f4dd75d9 793 if (t - s > 0) {
d9095cec 794 const STRLEN len = t - s;
8a5ee598 795#ifndef USE_ITHREADS
c4420975 796 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
797 STRLEN tmplen = cf ? strlen(cf) : 0;
798 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
799 /* must copy *{"::_<(eval N)[oldfilename:L]"}
800 * to *{"::_<newfilename"} */
44867030
NC
801 /* However, the long form of evals is only turned on by the
802 debugger - usually they're "(eval %lu)" */
803 char smallbuf[128];
804 char *tmpbuf;
805 GV **gvp;
d9095cec 806 STRLEN tmplen2 = len;
798b63bc 807 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
808 tmpbuf = smallbuf;
809 else
2ae0db35 810 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
811 tmpbuf[0] = '_';
812 tmpbuf[1] = '<';
2ae0db35 813 memcpy(tmpbuf + 2, cf, tmplen);
44867030 814 tmplen += 2;
8a5ee598
RGS
815 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
816 if (gvp) {
44867030
NC
817 char *tmpbuf2;
818 GV *gv2;
819
820 if (tmplen2 + 2 <= sizeof smallbuf)
821 tmpbuf2 = smallbuf;
822 else
823 Newx(tmpbuf2, tmplen2 + 2, char);
824
825 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
826 /* Either they malloc'd it, or we malloc'd it,
827 so no prefix is present in ours. */
828 tmpbuf2[0] = '_';
829 tmpbuf2[1] = '<';
830 }
831
832 memcpy(tmpbuf2 + 2, s, tmplen2);
833 tmplen2 += 2;
834
8a5ee598 835 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 836 if (!isGV(gv2)) {
8a5ee598 837 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
838 /* adjust ${"::_<newfilename"} to store the new file name */
839 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
840 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
841 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
842 }
44867030
NC
843
844 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 845 }
e66cf94c 846 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 847 }
8a5ee598 848#endif
05ec9bb3 849 CopFILE_free(PL_curcop);
d9095cec 850 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 851 }
57843af0 852 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
853}
854
29595ff2 855#ifdef PERL_MAD
cd81e915 856/* skip space before PL_thistoken */
29595ff2
NC
857
858STATIC char *
859S_skipspace0(pTHX_ register char *s)
860{
861 s = skipspace(s);
862 if (!PL_madskills)
863 return s;
cd81e915
NC
864 if (PL_skipwhite) {
865 if (!PL_thiswhite)
6b29d1f5 866 PL_thiswhite = newSVpvs("");
cd81e915
NC
867 sv_catsv(PL_thiswhite, PL_skipwhite);
868 sv_free(PL_skipwhite);
869 PL_skipwhite = 0;
870 }
871 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
872 return s;
873}
874
cd81e915 875/* skip space after PL_thistoken */
29595ff2
NC
876
877STATIC char *
878S_skipspace1(pTHX_ register char *s)
879{
d4c19fe8 880 const char *start = s;
29595ff2
NC
881 I32 startoff = start - SvPVX(PL_linestr);
882
883 s = skipspace(s);
884 if (!PL_madskills)
885 return s;
886 start = SvPVX(PL_linestr) + startoff;
cd81e915 887 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 888 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
889 PL_thistoken = newSVpvn(tstart, start - tstart);
890 }
891 PL_realtokenstart = -1;
892 if (PL_skipwhite) {
893 if (!PL_nextwhite)
6b29d1f5 894 PL_nextwhite = newSVpvs("");
cd81e915
NC
895 sv_catsv(PL_nextwhite, PL_skipwhite);
896 sv_free(PL_skipwhite);
897 PL_skipwhite = 0;
29595ff2
NC
898 }
899 return s;
900}
901
902STATIC char *
903S_skipspace2(pTHX_ register char *s, SV **svp)
904{
c35e046a
AL
905 char *start;
906 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
907 const I32 startoff = s - SvPVX(PL_linestr);
908
29595ff2
NC
909 s = skipspace(s);
910 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
911 if (!PL_madskills || !svp)
912 return s;
913 start = SvPVX(PL_linestr) + startoff;
cd81e915 914 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 915 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
916 PL_thistoken = newSVpvn(tstart, start - tstart);
917 PL_realtokenstart = -1;
29595ff2 918 }
cd81e915 919 if (PL_skipwhite) {
29595ff2 920 if (!*svp)
6b29d1f5 921 *svp = newSVpvs("");
cd81e915
NC
922 sv_setsv(*svp, PL_skipwhite);
923 sv_free(PL_skipwhite);
924 PL_skipwhite = 0;
29595ff2
NC
925 }
926
927 return s;
928}
929#endif
930
80a702cd 931STATIC void
5fa550fb 932S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
933{
934 AV *av = CopFILEAVx(PL_curcop);
935 if (av) {
b9f83d2f 936 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
937 if (orig_sv)
938 sv_setsv(sv, orig_sv);
939 else
940 sv_setpvn(sv, buf, len);
80a702cd
RGS
941 (void)SvIOK_on(sv);
942 SvIV_set(sv, 0);
943 av_store(av, (I32)CopLINE(PL_curcop), sv);
944 }
945}
946
ffb4593c
NT
947/*
948 * S_skipspace
949 * Called to gobble the appropriate amount and type of whitespace.
950 * Skips comments as well.
951 */
952
76e3520e 953STATIC char *
cea2e8a9 954S_skipspace(pTHX_ register char *s)
a687059c 955{
97aff369 956 dVAR;
5db06880
NC
957#ifdef PERL_MAD
958 int curoff;
959 int startoff = s - SvPVX(PL_linestr);
960
cd81e915
NC
961 if (PL_skipwhite) {
962 sv_free(PL_skipwhite);
963 PL_skipwhite = 0;
5db06880
NC
964 }
965#endif
966
3280af22 967 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 968 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 969 s++;
5db06880
NC
970#ifdef PERL_MAD
971 goto done;
972#else
463ee0b2 973 return s;
5db06880 974#endif
463ee0b2
LW
975 }
976 for (;;) {
fd049845 977 STRLEN prevlen;
09bef843 978 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 979 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
980 while (s < PL_bufend && isSPACE(*s)) {
981 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
982 incline(s);
983 }
ffb4593c
NT
984
985 /* comment */
3280af22
NIS
986 if (s < PL_bufend && *s == '#') {
987 while (s < PL_bufend && *s != '\n')
463ee0b2 988 s++;
60e6418e 989 if (s < PL_bufend) {
463ee0b2 990 s++;
60e6418e
GS
991 if (PL_in_eval && !PL_rsfp) {
992 incline(s);
993 continue;
994 }
995 }
463ee0b2 996 }
ffb4593c
NT
997
998 /* only continue to recharge the buffer if we're at the end
999 * of the buffer, we're not reading from a source filter, and
1000 * we're in normal lexing mode
1001 */
09bef843
SB
1002 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1003 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1004#ifdef PERL_MAD
1005 goto done;
1006#else
463ee0b2 1007 return s;
5db06880 1008#endif
ffb4593c
NT
1009
1010 /* try to recharge the buffer */
5db06880
NC
1011#ifdef PERL_MAD
1012 curoff = s - SvPVX(PL_linestr);
1013#endif
1014
9cbb5ea2 1015 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1016 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1017 {
5db06880
NC
1018#ifdef PERL_MAD
1019 if (PL_madskills && curoff != startoff) {
cd81e915 1020 if (!PL_skipwhite)
6b29d1f5 1021 PL_skipwhite = newSVpvs("");
cd81e915 1022 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1023 curoff - startoff);
1024 }
1025
1026 /* mustn't throw out old stuff yet if madpropping */
1027 SvCUR(PL_linestr) = curoff;
1028 s = SvPVX(PL_linestr) + curoff;
1029 *s = 0;
1030 if (curoff && s[-1] == '\n')
1031 s[-1] = ' ';
1032#endif
1033
9cbb5ea2 1034 /* end of file. Add on the -p or -n magic */
cd81e915 1035 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1036 if (PL_minus_p) {
5db06880 1037#ifdef PERL_MAD
6502358f 1038 sv_catpvs(PL_linestr,
5db06880
NC
1039 ";}continue{print or die qq(-p destination: $!\\n);}");
1040#else
6502358f 1041 sv_setpvs(PL_linestr,
01a19ab0 1042 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1043#endif
3280af22 1044 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1045 }
01a19ab0 1046 else if (PL_minus_n) {
5db06880
NC
1047#ifdef PERL_MAD
1048 sv_catpvn(PL_linestr, ";}", 2);
1049#else
01a19ab0 1050 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1051#endif
01a19ab0
NC
1052 PL_minus_n = 0;
1053 }
a0d0e21e 1054 else
5db06880
NC
1055#ifdef PERL_MAD
1056 sv_catpvn(PL_linestr,";", 1);
1057#else
4147a61b 1058 sv_setpvn(PL_linestr,";", 1);
5db06880 1059#endif
ffb4593c
NT
1060
1061 /* reset variables for next time we lex */
9cbb5ea2 1062 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1063 = SvPVX(PL_linestr)
1064#ifdef PERL_MAD
1065 + curoff
1066#endif
1067 ;
3280af22 1068 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1069 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1070
1071 /* Close the filehandle. Could be from -P preprocessor,
1072 * STDIN, or a regular file. If we were reading code from
1073 * STDIN (because the commandline held no -e or filename)
1074 * then we don't close it, we reset it so the code can
1075 * read from STDIN too.
1076 */
1077
3280af22
NIS
1078 if (PL_preprocess && !PL_in_eval)
1079 (void)PerlProc_pclose(PL_rsfp);
1080 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1081 PerlIO_clearerr(PL_rsfp);
8990e307 1082 else
3280af22 1083 (void)PerlIO_close(PL_rsfp);
4608196e 1084 PL_rsfp = NULL;
463ee0b2
LW
1085 return s;
1086 }
ffb4593c
NT
1087
1088 /* not at end of file, so we only read another line */
09bef843
SB
1089 /* make corresponding updates to old pointers, for yyerror() */
1090 oldprevlen = PL_oldbufptr - PL_bufend;
1091 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1092 if (PL_last_uni)
1093 oldunilen = PL_last_uni - PL_bufend;
1094 if (PL_last_lop)
1095 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1096 PL_linestart = PL_bufptr = s + prevlen;
1097 PL_bufend = s + SvCUR(PL_linestr);
1098 s = PL_bufptr;
09bef843
SB
1099 PL_oldbufptr = s + oldprevlen;
1100 PL_oldoldbufptr = s + oldoldprevlen;
1101 if (PL_last_uni)
1102 PL_last_uni = s + oldunilen;
1103 if (PL_last_lop)
1104 PL_last_lop = s + oldloplen;
a0d0e21e 1105 incline(s);
ffb4593c
NT
1106
1107 /* debugger active and we're not compiling the debugger code,
1108 * so store the line into the debugger's array of lines
1109 */
80a702cd 1110 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1111 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1112 }
5db06880
NC
1113
1114#ifdef PERL_MAD
1115 done:
1116 if (PL_madskills) {
cd81e915 1117 if (!PL_skipwhite)
6b29d1f5 1118 PL_skipwhite = newSVpvs("");
5db06880
NC
1119 curoff = s - SvPVX(PL_linestr);
1120 if (curoff - startoff)
cd81e915 1121 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1122 curoff - startoff);
1123 }
1124 return s;
1125#endif
a687059c 1126}
378cc40b 1127
ffb4593c
NT
1128/*
1129 * S_check_uni
1130 * Check the unary operators to ensure there's no ambiguity in how they're
1131 * used. An ambiguous piece of code would be:
1132 * rand + 5
1133 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1134 * the +5 is its argument.
1135 */
1136
76e3520e 1137STATIC void
cea2e8a9 1138S_check_uni(pTHX)
ba106d47 1139{
97aff369 1140 dVAR;
d4c19fe8
AL
1141 const char *s;
1142 const char *t;
2f3197b3 1143
3280af22 1144 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1145 return;
3280af22
NIS
1146 while (isSPACE(*PL_last_uni))
1147 PL_last_uni++;
c35e046a
AL
1148 s = PL_last_uni;
1149 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1150 s++;
3280af22 1151 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1152 return;
6136c704 1153
0453d815 1154 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1155 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1156 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1157 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1158 }
2f3197b3
LW
1159}
1160
ffb4593c
NT
1161/*
1162 * LOP : macro to build a list operator. Its behaviour has been replaced
1163 * with a subroutine, S_lop() for which LOP is just another name.
1164 */
1165
a0d0e21e
LW
1166#define LOP(f,x) return lop(f,x,s)
1167
ffb4593c
NT
1168/*
1169 * S_lop
1170 * Build a list operator (or something that might be one). The rules:
1171 * - if we have a next token, then it's a list operator [why?]
1172 * - if the next thing is an opening paren, then it's a function
1173 * - else it's a list operator
1174 */
1175
76e3520e 1176STATIC I32
a0be28da 1177S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1178{
97aff369 1179 dVAR;
79072805 1180 yylval.ival = f;
35c8bce7 1181 CLINE;
3280af22
NIS
1182 PL_expect = x;
1183 PL_bufptr = s;
1184 PL_last_lop = PL_oldbufptr;
eb160463 1185 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1186#ifdef PERL_MAD
1187 if (PL_lasttoke)
1188 return REPORT(LSTOP);
1189#else
3280af22 1190 if (PL_nexttoke)
bbf60fe6 1191 return REPORT(LSTOP);
5db06880 1192#endif
79072805 1193 if (*s == '(')
bbf60fe6 1194 return REPORT(FUNC);
29595ff2 1195 s = PEEKSPACE(s);
79072805 1196 if (*s == '(')
bbf60fe6 1197 return REPORT(FUNC);
79072805 1198 else
bbf60fe6 1199 return REPORT(LSTOP);
79072805
LW
1200}
1201
5db06880
NC
1202#ifdef PERL_MAD
1203 /*
1204 * S_start_force
1205 * Sets up for an eventual force_next(). start_force(0) basically does
1206 * an unshift, while start_force(-1) does a push. yylex removes items
1207 * on the "pop" end.
1208 */
1209
1210STATIC void
1211S_start_force(pTHX_ int where)
1212{
1213 int i;
1214
cd81e915 1215 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1216 where = PL_lasttoke;
cd81e915
NC
1217 assert(PL_curforce < 0 || PL_curforce == where);
1218 if (PL_curforce != where) {
5db06880
NC
1219 for (i = PL_lasttoke; i > where; --i) {
1220 PL_nexttoke[i] = PL_nexttoke[i-1];
1221 }
1222 PL_lasttoke++;
1223 }
cd81e915 1224 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1225 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1226 PL_curforce = where;
1227 if (PL_nextwhite) {
5db06880 1228 if (PL_madskills)
6b29d1f5 1229 curmad('^', newSVpvs(""));
cd81e915 1230 CURMAD('_', PL_nextwhite);
5db06880
NC
1231 }
1232}
1233
1234STATIC void
1235S_curmad(pTHX_ char slot, SV *sv)
1236{
1237 MADPROP **where;
1238
1239 if (!sv)
1240 return;
cd81e915
NC
1241 if (PL_curforce < 0)
1242 where = &PL_thismad;
5db06880 1243 else
cd81e915 1244 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1245
cd81e915 1246 if (PL_faketokens)
5db06880
NC
1247 sv_setpvn(sv, "", 0);
1248 else {
1249 if (!IN_BYTES) {
1250 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1251 SvUTF8_on(sv);
1252 else if (PL_encoding) {
1253 sv_recode_to_utf8(sv, PL_encoding);
1254 }
1255 }
1256 }
1257
1258 /* keep a slot open for the head of the list? */
1259 if (slot != '_' && *where && (*where)->mad_key == '^') {
1260 (*where)->mad_key = slot;
1261 sv_free((*where)->mad_val);
1262 (*where)->mad_val = (void*)sv;
1263 }
1264 else
1265 addmad(newMADsv(slot, sv), where, 0);
1266}
1267#else
b3f24c00
MHM
1268# define start_force(where) NOOP
1269# define curmad(slot, sv) NOOP
5db06880
NC
1270#endif
1271
ffb4593c
NT
1272/*
1273 * S_force_next
9cbb5ea2 1274 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1275 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1276 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1277 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1278 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1279 */
1280
4e553d73 1281STATIC void
cea2e8a9 1282S_force_next(pTHX_ I32 type)
79072805 1283{
97aff369 1284 dVAR;
5db06880 1285#ifdef PERL_MAD
cd81e915 1286 if (PL_curforce < 0)
5db06880 1287 start_force(PL_lasttoke);
cd81e915 1288 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1289 if (PL_lex_state != LEX_KNOWNEXT)
1290 PL_lex_defer = PL_lex_state;
1291 PL_lex_state = LEX_KNOWNEXT;
1292 PL_lex_expect = PL_expect;
cd81e915 1293 PL_curforce = -1;
5db06880 1294#else
3280af22
NIS
1295 PL_nexttype[PL_nexttoke] = type;
1296 PL_nexttoke++;
1297 if (PL_lex_state != LEX_KNOWNEXT) {
1298 PL_lex_defer = PL_lex_state;
1299 PL_lex_expect = PL_expect;
1300 PL_lex_state = LEX_KNOWNEXT;
79072805 1301 }
5db06880 1302#endif
79072805
LW
1303}
1304
d0a148a6
NC
1305STATIC SV *
1306S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1307{
97aff369 1308 dVAR;
9d4ba2ae 1309 SV * const sv = newSVpvn(start,len);
bfed75c6 1310 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1311 SvUTF8_on(sv);
1312 return sv;
1313}
1314
ffb4593c
NT
1315/*
1316 * S_force_word
1317 * When the lexer knows the next thing is a word (for instance, it has
1318 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1319 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1320 * lookahead.
ffb4593c
NT
1321 *
1322 * Arguments:
b1b65b59 1323 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1324 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1325 * int check_keyword : if true, Perl checks to make sure the word isn't
1326 * a keyword (do this if the word is a label, e.g. goto FOO)
1327 * int allow_pack : if true, : characters will also be allowed (require,
1328 * use, etc. do this)
9cbb5ea2 1329 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1330 */
1331
76e3520e 1332STATIC char *
cea2e8a9 1333S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1334{
97aff369 1335 dVAR;
463ee0b2
LW
1336 register char *s;
1337 STRLEN len;
4e553d73 1338
29595ff2 1339 start = SKIPSPACE1(start);
463ee0b2 1340 s = start;
7e2040f0 1341 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1342 (allow_pack && *s == ':') ||
15f0808c 1343 (allow_initial_tick && *s == '\'') )
a0d0e21e 1344 {
3280af22 1345 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1346 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1347 return start;
cd81e915 1348 start_force(PL_curforce);
5db06880
NC
1349 if (PL_madskills)
1350 curmad('X', newSVpvn(start,s-start));
463ee0b2 1351 if (token == METHOD) {
29595ff2 1352 s = SKIPSPACE1(s);
463ee0b2 1353 if (*s == '(')
3280af22 1354 PL_expect = XTERM;
463ee0b2 1355 else {
3280af22 1356 PL_expect = XOPERATOR;
463ee0b2 1357 }
79072805 1358 }
e74e6b3d 1359 if (PL_madskills)
63575281 1360 curmad('g', newSVpvs( "forced" ));
9ded7720 1361 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1362 = (OP*)newSVOP(OP_CONST,0,
1363 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1364 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1365 force_next(token);
1366 }
1367 return s;
1368}
1369
ffb4593c
NT
1370/*
1371 * S_force_ident
9cbb5ea2 1372 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1373 * text only contains the "foo" portion. The first argument is a pointer
1374 * to the "foo", and the second argument is the type symbol to prefix.
1375 * Forces the next token to be a "WORD".
9cbb5ea2 1376 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1377 */
1378
76e3520e 1379STATIC void
bfed75c6 1380S_force_ident(pTHX_ register const char *s, int kind)
79072805 1381{
97aff369 1382 dVAR;
c35e046a 1383 if (*s) {
90e5519e
NC
1384 const STRLEN len = strlen(s);
1385 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1386 start_force(PL_curforce);
9ded7720 1387 NEXTVAL_NEXTTOKE.opval = o;
79072805 1388 force_next(WORD);
748a9306 1389 if (kind) {
11343788 1390 o->op_private = OPpCONST_ENTERED;
55497cff 1391 /* XXX see note in pp_entereval() for why we forgo typo
1392 warnings if the symbol must be introduced in an eval.
1393 GSAR 96-10-12 */
90e5519e
NC
1394 gv_fetchpvn_flags(s, len,
1395 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1396 : GV_ADD,
1397 kind == '$' ? SVt_PV :
1398 kind == '@' ? SVt_PVAV :
1399 kind == '%' ? SVt_PVHV :
a0d0e21e 1400 SVt_PVGV
90e5519e 1401 );
748a9306 1402 }
79072805
LW
1403 }
1404}
1405
1571675a
GS
1406NV
1407Perl_str_to_version(pTHX_ SV *sv)
1408{
1409 NV retval = 0.0;
1410 NV nshift = 1.0;
1411 STRLEN len;
cfd0369c 1412 const char *start = SvPV_const(sv,len);
9d4ba2ae 1413 const char * const end = start + len;
504618e9 1414 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1415 while (start < end) {
ba210ebe 1416 STRLEN skip;
1571675a
GS
1417 UV n;
1418 if (utf)
9041c2e3 1419 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1420 else {
1421 n = *(U8*)start;
1422 skip = 1;
1423 }
1424 retval += ((NV)n)/nshift;
1425 start += skip;
1426 nshift *= 1000;
1427 }
1428 return retval;
1429}
1430
4e553d73 1431/*
ffb4593c
NT
1432 * S_force_version
1433 * Forces the next token to be a version number.
e759cc13
RGS
1434 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1435 * and if "guessing" is TRUE, then no new token is created (and the caller
1436 * must use an alternative parsing method).
ffb4593c
NT
1437 */
1438
76e3520e 1439STATIC char *
e759cc13 1440S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1441{
97aff369 1442 dVAR;
5f66b61c 1443 OP *version = NULL;
44dcb63b 1444 char *d;
5db06880
NC
1445#ifdef PERL_MAD
1446 I32 startoff = s - SvPVX(PL_linestr);
1447#endif
89bfa8cd 1448
29595ff2 1449 s = SKIPSPACE1(s);
89bfa8cd 1450
44dcb63b 1451 d = s;
dd629d5b 1452 if (*d == 'v')
44dcb63b 1453 d++;
44dcb63b 1454 if (isDIGIT(*d)) {
e759cc13
RGS
1455 while (isDIGIT(*d) || *d == '_' || *d == '.')
1456 d++;
5db06880
NC
1457#ifdef PERL_MAD
1458 if (PL_madskills) {
cd81e915 1459 start_force(PL_curforce);
5db06880
NC
1460 curmad('X', newSVpvn(s,d-s));
1461 }
1462#endif
9f3d182e 1463 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1464 SV *ver;
b73d6f50 1465 s = scan_num(s, &yylval);
89bfa8cd 1466 version = yylval.opval;
dd629d5b
GS
1467 ver = cSVOPx(version)->op_sv;
1468 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1469 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1470 SvNV_set(ver, str_to_version(ver));
1571675a 1471 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1472 }
89bfa8cd 1473 }
5db06880
NC
1474 else if (guessing) {
1475#ifdef PERL_MAD
1476 if (PL_madskills) {
cd81e915
NC
1477 sv_free(PL_nextwhite); /* let next token collect whitespace */
1478 PL_nextwhite = 0;
5db06880
NC
1479 s = SvPVX(PL_linestr) + startoff;
1480 }
1481#endif
e759cc13 1482 return s;
5db06880 1483 }
89bfa8cd 1484 }
1485
5db06880
NC
1486#ifdef PERL_MAD
1487 if (PL_madskills && !version) {
cd81e915
NC
1488 sv_free(PL_nextwhite); /* let next token collect whitespace */
1489 PL_nextwhite = 0;
5db06880
NC
1490 s = SvPVX(PL_linestr) + startoff;
1491 }
1492#endif
89bfa8cd 1493 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1494 start_force(PL_curforce);
9ded7720 1495 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1496 force_next(WORD);
89bfa8cd 1497
e759cc13 1498 return s;
89bfa8cd 1499}
1500
ffb4593c
NT
1501/*
1502 * S_tokeq
1503 * Tokenize a quoted string passed in as an SV. It finds the next
1504 * chunk, up to end of string or a backslash. It may make a new
1505 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1506 * turns \\ into \.
1507 */
1508
76e3520e 1509STATIC SV *
cea2e8a9 1510S_tokeq(pTHX_ SV *sv)
79072805 1511{
97aff369 1512 dVAR;
79072805
LW
1513 register char *s;
1514 register char *send;
1515 register char *d;
b3ac6de7
IZ
1516 STRLEN len = 0;
1517 SV *pv = sv;
79072805
LW
1518
1519 if (!SvLEN(sv))
b3ac6de7 1520 goto finish;
79072805 1521
a0d0e21e 1522 s = SvPV_force(sv, len);
21a311ee 1523 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1524 goto finish;
463ee0b2 1525 send = s + len;
79072805
LW
1526 while (s < send && *s != '\\')
1527 s++;
1528 if (s == send)
b3ac6de7 1529 goto finish;
79072805 1530 d = s;
be4731d2 1531 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1532 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1533 if (SvUTF8(sv))
1534 SvUTF8_on(pv);
1535 }
79072805
LW
1536 while (s < send) {
1537 if (*s == '\\') {
a0d0e21e 1538 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1539 s++; /* all that, just for this */
1540 }
1541 *d++ = *s++;
1542 }
1543 *d = '\0';
95a20fc0 1544 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1545 finish:
3280af22 1546 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1547 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1548 return sv;
1549}
1550
ffb4593c
NT
1551/*
1552 * Now come three functions related to double-quote context,
1553 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1554 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1555 * interact with PL_lex_state, and create fake ( ... ) argument lists
1556 * to handle functions and concatenation.
1557 * They assume that whoever calls them will be setting up a fake
1558 * join call, because each subthing puts a ',' after it. This lets
1559 * "lower \luPpEr"
1560 * become
1561 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1562 *
1563 * (I'm not sure whether the spurious commas at the end of lcfirst's
1564 * arguments and join's arguments are created or not).
1565 */
1566
1567/*
1568 * S_sublex_start
1569 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1570 *
1571 * Pattern matching will set PL_lex_op to the pattern-matching op to
1572 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1573 *
1574 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1575 *
1576 * Everything else becomes a FUNC.
1577 *
1578 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1579 * had an OP_CONST or OP_READLINE). This just sets us up for a
1580 * call to S_sublex_push().
1581 */
1582
76e3520e 1583STATIC I32
cea2e8a9 1584S_sublex_start(pTHX)
79072805 1585{
97aff369 1586 dVAR;
0d46e09a 1587 register const I32 op_type = yylval.ival;
79072805
LW
1588
1589 if (op_type == OP_NULL) {
3280af22 1590 yylval.opval = PL_lex_op;
5f66b61c 1591 PL_lex_op = NULL;
79072805
LW
1592 return THING;
1593 }
1594 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1595 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1596
1597 if (SvTYPE(sv) == SVt_PVIV) {
1598 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1599 STRLEN len;
96a5add6 1600 const char * const p = SvPV_const(sv, len);
f54cb97a 1601 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1602 if (SvUTF8(sv))
1603 SvUTF8_on(nsv);
b3ac6de7
IZ
1604 SvREFCNT_dec(sv);
1605 sv = nsv;
4e553d73 1606 }
b3ac6de7 1607 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1608 PL_lex_stuff = NULL;
6f33ba73
RGS
1609 /* Allow <FH> // "foo" */
1610 if (op_type == OP_READLINE)
1611 PL_expect = XTERMORDORDOR;
79072805
LW
1612 return THING;
1613 }
e3f73d4e
RGS
1614 else if (op_type == OP_BACKTICK && PL_lex_op) {
1615 /* readpipe() vas overriden */
1616 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1617 yylval.opval = PL_lex_op;
9b201d7d 1618 PL_lex_op = NULL;
e3f73d4e
RGS
1619 PL_lex_stuff = NULL;
1620 return THING;
1621 }
79072805 1622
3280af22 1623 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1624 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1625 PL_sublex_info.sub_op = PL_lex_op;
1626 PL_lex_state = LEX_INTERPPUSH;
55497cff 1627
3280af22
NIS
1628 PL_expect = XTERM;
1629 if (PL_lex_op) {
1630 yylval.opval = PL_lex_op;
5f66b61c 1631 PL_lex_op = NULL;
55497cff 1632 return PMFUNC;
1633 }
1634 else
1635 return FUNC;
1636}
1637
ffb4593c
NT
1638/*
1639 * S_sublex_push
1640 * Create a new scope to save the lexing state. The scope will be
1641 * ended in S_sublex_done. Returns a '(', starting the function arguments
1642 * to the uc, lc, etc. found before.
1643 * Sets PL_lex_state to LEX_INTERPCONCAT.
1644 */
1645
76e3520e 1646STATIC I32
cea2e8a9 1647S_sublex_push(pTHX)
55497cff 1648{
27da23d5 1649 dVAR;
f46d017c 1650 ENTER;
55497cff 1651
3280af22 1652 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1653 SAVEBOOL(PL_lex_dojoin);
3280af22 1654 SAVEI32(PL_lex_brackets);
3280af22
NIS
1655 SAVEI32(PL_lex_casemods);
1656 SAVEI32(PL_lex_starts);
651b5b28 1657 SAVEI8(PL_lex_state);
7766f137 1658 SAVEVPTR(PL_lex_inpat);
98246f1e 1659 SAVEI16(PL_lex_inwhat);
57843af0 1660 SAVECOPLINE(PL_curcop);
3280af22 1661 SAVEPPTR(PL_bufptr);
8452ff4b 1662 SAVEPPTR(PL_bufend);
3280af22
NIS
1663 SAVEPPTR(PL_oldbufptr);
1664 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1665 SAVEPPTR(PL_last_lop);
1666 SAVEPPTR(PL_last_uni);
3280af22
NIS
1667 SAVEPPTR(PL_linestart);
1668 SAVESPTR(PL_linestr);
8edd5f42
RGS
1669 SAVEGENERICPV(PL_lex_brackstack);
1670 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1671
1672 PL_linestr = PL_lex_stuff;
a0714e2c 1673 PL_lex_stuff = NULL;
3280af22 1674
9cbb5ea2
GS
1675 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1676 = SvPVX(PL_linestr);
3280af22 1677 PL_bufend += SvCUR(PL_linestr);
bd61b366 1678 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1679 SAVEFREESV(PL_linestr);
1680
1681 PL_lex_dojoin = FALSE;
1682 PL_lex_brackets = 0;
a02a5408
JC
1683 Newx(PL_lex_brackstack, 120, char);
1684 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1685 PL_lex_casemods = 0;
1686 *PL_lex_casestack = '\0';
1687 PL_lex_starts = 0;
1688 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1689 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1690
1691 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1692 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1693 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1694 else
5f66b61c 1695 PL_lex_inpat = NULL;
79072805 1696
55497cff 1697 return '(';
79072805
LW
1698}
1699
ffb4593c
NT
1700/*
1701 * S_sublex_done
1702 * Restores lexer state after a S_sublex_push.
1703 */
1704
76e3520e 1705STATIC I32
cea2e8a9 1706S_sublex_done(pTHX)
79072805 1707{
27da23d5 1708 dVAR;
3280af22 1709 if (!PL_lex_starts++) {
396482e1 1710 SV * const sv = newSVpvs("");
9aa983d2
JH
1711 if (SvUTF8(PL_linestr))
1712 SvUTF8_on(sv);
3280af22 1713 PL_expect = XOPERATOR;
9aa983d2 1714 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1715 return THING;
1716 }
1717
3280af22
NIS
1718 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1719 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1720 return yylex();
79072805
LW
1721 }
1722
ffb4593c 1723 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1724 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1725 PL_linestr = PL_lex_repl;
1726 PL_lex_inpat = 0;
1727 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1728 PL_bufend += SvCUR(PL_linestr);
bd61b366 1729 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1730 SAVEFREESV(PL_linestr);
1731 PL_lex_dojoin = FALSE;
1732 PL_lex_brackets = 0;
3280af22
NIS
1733 PL_lex_casemods = 0;
1734 *PL_lex_casestack = '\0';
1735 PL_lex_starts = 0;
25da4f38 1736 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1737 PL_lex_state = LEX_INTERPNORMAL;
1738 PL_lex_starts++;
e9fa98b2
HS
1739 /* we don't clear PL_lex_repl here, so that we can check later
1740 whether this is an evalled subst; that means we rely on the
1741 logic to ensure sublex_done() is called again only via the
1742 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1743 }
e9fa98b2 1744 else {
3280af22 1745 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1746 PL_lex_repl = NULL;
e9fa98b2 1747 }
79072805 1748 return ',';
ffed7fef
LW
1749 }
1750 else {
5db06880
NC
1751#ifdef PERL_MAD
1752 if (PL_madskills) {
cd81e915
NC
1753 if (PL_thiswhite) {
1754 if (!PL_endwhite)
6b29d1f5 1755 PL_endwhite = newSVpvs("");
cd81e915
NC
1756 sv_catsv(PL_endwhite, PL_thiswhite);
1757 PL_thiswhite = 0;
1758 }
1759 if (PL_thistoken)
1760 sv_setpvn(PL_thistoken,"",0);
5db06880 1761 else
cd81e915 1762 PL_realtokenstart = -1;
5db06880
NC
1763 }
1764#endif
f46d017c 1765 LEAVE;
3280af22
NIS
1766 PL_bufend = SvPVX(PL_linestr);
1767 PL_bufend += SvCUR(PL_linestr);
1768 PL_expect = XOPERATOR;
09bef843 1769 PL_sublex_info.sub_inwhat = 0;
79072805 1770 return ')';
ffed7fef
LW
1771 }
1772}
1773
02aa26ce
NT
1774/*
1775 scan_const
1776
1777 Extracts a pattern, double-quoted string, or transliteration. This
1778 is terrifying code.
1779
94def140 1780 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1781 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1782 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1783
94def140
TS
1784 Returns a pointer to the character scanned up to. If this is
1785 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1786 successfully parsed), will leave an OP for the substring scanned
1787 in yylval. Caller must intuit reason for not parsing further
1788 by looking at the next characters herself.
1789
02aa26ce
NT
1790 In patterns:
1791 backslashes:
1792 double-quoted style: \r and \n
1793 regexp special ones: \D \s
94def140
TS
1794 constants: \x31
1795 backrefs: \1
02aa26ce
NT
1796 case and quoting: \U \Q \E
1797 stops on @ and $, but not for $ as tail anchor
1798
1799 In transliterations:
1800 characters are VERY literal, except for - not at the start or end
94def140
TS
1801 of the string, which indicates a range. If the range is in bytes,
1802 scan_const expands the range to the full set of intermediate
1803 characters. If the range is in utf8, the hyphen is replaced with
1804 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1805
1806 In double-quoted strings:
1807 backslashes:
1808 double-quoted style: \r and \n
94def140
TS
1809 constants: \x31
1810 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1811 case and quoting: \U \Q \E
1812 stops on @ and $
1813
1814 scan_const does *not* construct ops to handle interpolated strings.
1815 It stops processing as soon as it finds an embedded $ or @ variable
1816 and leaves it to the caller to work out what's going on.
1817
94def140
TS
1818 embedded arrays (whether in pattern or not) could be:
1819 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1820
1821 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1822
1823 $ in pattern could be $foo or could be tail anchor. Assumption:
1824 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1825 followed by one of "()| \r\n\t"
02aa26ce
NT
1826
1827 \1 (backreferences) are turned into $1
1828
1829 The structure of the code is
1830 while (there's a character to process) {
94def140
TS
1831 handle transliteration ranges
1832 skip regexp comments /(?#comment)/ and codes /(?{code})/
1833 skip #-initiated comments in //x patterns
1834 check for embedded arrays
02aa26ce
NT
1835 check for embedded scalars
1836 if (backslash) {
94def140
TS
1837 leave intact backslashes from leaveit (below)
1838 deprecate \1 in substitution replacements
02aa26ce
NT
1839 handle string-changing backslashes \l \U \Q \E, etc.
1840 switch (what was escaped) {
94def140
TS
1841 handle \- in a transliteration (becomes a literal -)
1842 handle \132 (octal characters)
1843 handle \x15 and \x{1234} (hex characters)
1844 handle \N{name} (named characters)
1845 handle \cV (control characters)
1846 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1847 } (end switch)
1848 } (end if backslash)
1849 } (end while character to read)
4e553d73 1850
02aa26ce
NT
1851*/
1852
76e3520e 1853STATIC char *
cea2e8a9 1854S_scan_const(pTHX_ char *start)
79072805 1855{
97aff369 1856 dVAR;
3280af22 1857 register char *send = PL_bufend; /* end of the constant */
561b68a9 1858 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1859 register char *s = start; /* start of the constant */
1860 register char *d = SvPVX(sv); /* destination for copies */
1861 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1862 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1863 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1864 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1865 UV uv;
4c3a8340
TS
1866#ifdef EBCDIC
1867 UV literal_endpoint = 0;
e294cc5d 1868 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1869#endif
012bcf8d 1870
2b9d42f0
NIS
1871 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1872 /* If we are doing a trans and we know we want UTF8 set expectation */
1873 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1874 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1875 }
1876
1877
79072805 1878 while (s < send || dorange) {
02aa26ce 1879 /* get transliterations out of the way (they're most literal) */
3280af22 1880 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1881 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1882 if (dorange) {
1ba5c669
JH
1883 I32 i; /* current expanded character */
1884 I32 min; /* first character in range */
1885 I32 max; /* last character in range */
02aa26ce 1886
e294cc5d
JH
1887#ifdef EBCDIC
1888 UV uvmax = 0;
1889#endif
1890
1891 if (has_utf8
1892#ifdef EBCDIC
1893 && !native_range
1894#endif
1895 ) {
9d4ba2ae 1896 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1897 char *e = d++;
1898 while (e-- > c)
1899 *(e + 1) = *e;
25716404 1900 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1901 /* mark the range as done, and continue */
1902 dorange = FALSE;
1903 didrange = TRUE;
1904 continue;
1905 }
2b9d42f0 1906
95a20fc0 1907 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1908#ifdef EBCDIC
1909 SvGROW(sv,
1910 SvLEN(sv) + (has_utf8 ?
1911 (512 - UTF_CONTINUATION_MARK +
1912 UNISKIP(0x100))
1913 : 256));
1914 /* How many two-byte within 0..255: 128 in UTF-8,
1915 * 96 in UTF-8-mod. */
1916#else
9cbb5ea2 1917 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1918#endif
9cbb5ea2 1919 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1920#ifdef EBCDIC
1921 if (has_utf8) {
1922 int j;
1923 for (j = 0; j <= 1; j++) {
1924 char * const c = (char*)utf8_hop((U8*)d, -1);
1925 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1926 if (j)
1927 min = (U8)uv;
1928 else if (uv < 256)
1929 max = (U8)uv;
1930 else {
1931 max = (U8)0xff; /* only to \xff */
1932 uvmax = uv; /* \x{100} to uvmax */
1933 }
1934 d = c; /* eat endpoint chars */
1935 }
1936 }
1937 else {
1938#endif
1939 d -= 2; /* eat the first char and the - */
1940 min = (U8)*d; /* first char in range */
1941 max = (U8)d[1]; /* last char in range */
1942#ifdef EBCDIC
1943 }
1944#endif
8ada0baa 1945
c2e66d9e 1946 if (min > max) {
01ec43d0 1947 Perl_croak(aTHX_
d1573ac7 1948 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1949 (char)min, (char)max);
c2e66d9e
GS
1950 }
1951
c7f1f016 1952#ifdef EBCDIC
4c3a8340
TS
1953 if (literal_endpoint == 2 &&
1954 ((isLOWER(min) && isLOWER(max)) ||
1955 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1956 if (isLOWER(min)) {
1957 for (i = min; i <= max; i++)
1958 if (isLOWER(i))
db42d148 1959 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1960 } else {
1961 for (i = min; i <= max; i++)
1962 if (isUPPER(i))
db42d148 1963 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1964 }
1965 }
1966 else
1967#endif
1968 for (i = min; i <= max; i++)
e294cc5d
JH
1969#ifdef EBCDIC
1970 if (has_utf8) {
1971 const U8 ch = (U8)NATIVE_TO_UTF(i);
1972 if (UNI_IS_INVARIANT(ch))
1973 *d++ = (U8)i;
1974 else {
1975 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1976 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1977 }
1978 }
1979 else
1980#endif
1981 *d++ = (char)i;
1982
1983#ifdef EBCDIC
1984 if (uvmax) {
1985 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1986 if (uvmax > 0x101)
1987 *d++ = (char)UTF_TO_NATIVE(0xff);
1988 if (uvmax > 0x100)
1989 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1990 }
1991#endif
02aa26ce
NT
1992
1993 /* mark the range as done, and continue */
79072805 1994 dorange = FALSE;
01ec43d0 1995 didrange = TRUE;
4c3a8340
TS
1996#ifdef EBCDIC
1997 literal_endpoint = 0;
1998#endif
79072805 1999 continue;
4e553d73 2000 }
02aa26ce
NT
2001
2002 /* range begins (ignore - as first or last char) */
79072805 2003 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2004 if (didrange) {
1fafa243 2005 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2006 }
e294cc5d
JH
2007 if (has_utf8
2008#ifdef EBCDIC
2009 && !native_range
2010#endif
2011 ) {
25716404 2012 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2013 s++;
2014 continue;
2015 }
79072805
LW
2016 dorange = TRUE;
2017 s++;
01ec43d0
GS
2018 }
2019 else {
2020 didrange = FALSE;
4c3a8340
TS
2021#ifdef EBCDIC
2022 literal_endpoint = 0;
e294cc5d 2023 native_range = TRUE;
4c3a8340 2024#endif
01ec43d0 2025 }
79072805 2026 }
02aa26ce
NT
2027
2028 /* if we get here, we're not doing a transliteration */
2029
0f5d15d6
IZ
2030 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2031 except for the last char, which will be done separately. */
3280af22 2032 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2033 if (s[2] == '#') {
e994fd66 2034 while (s+1 < send && *s != ')')
db42d148 2035 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2036 }
2037 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2038 || (s[2] == '?' && s[3] == '{'))
155aba94 2039 {
cc6b7395 2040 I32 count = 1;
0f5d15d6 2041 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2042 char c;
2043
d9f97599
GS
2044 while (count && (c = *regparse)) {
2045 if (c == '\\' && regparse[1])
2046 regparse++;
4e553d73 2047 else if (c == '{')
cc6b7395 2048 count++;
4e553d73 2049 else if (c == '}')
cc6b7395 2050 count--;
d9f97599 2051 regparse++;
cc6b7395 2052 }
e994fd66 2053 if (*regparse != ')')
5bdf89e7 2054 regparse--; /* Leave one char for continuation. */
0f5d15d6 2055 while (s < regparse)
db42d148 2056 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2057 }
748a9306 2058 }
02aa26ce
NT
2059
2060 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2061 else if (*s == '#' && PL_lex_inpat &&
2062 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2063 while (s+1 < send && *s != '\n')
db42d148 2064 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2065 }
02aa26ce 2066
5d1d4326 2067 /* check for embedded arrays
da6eedaa 2068 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2069 */
1749ea0d
TS
2070 else if (*s == '@' && s[1]) {
2071 if (isALNUM_lazy_if(s+1,UTF))
2072 break;
2073 if (strchr(":'{$", s[1]))
2074 break;
2075 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2076 break; /* in regexp, neither @+ nor @- are interpolated */
2077 }
02aa26ce
NT
2078
2079 /* check for embedded scalars. only stop if we're sure it's a
2080 variable.
2081 */
79072805 2082 else if (*s == '$') {
3280af22 2083 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2084 break;
6002328a 2085 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2086 break; /* in regexp, $ might be tail anchor */
2087 }
02aa26ce 2088
2b9d42f0
NIS
2089 /* End of else if chain - OP_TRANS rejoin rest */
2090
02aa26ce 2091 /* backslashes */
79072805
LW
2092 if (*s == '\\' && s+1 < send) {
2093 s++;
02aa26ce 2094
02aa26ce 2095 /* deprecate \1 in strings and substitution replacements */
3280af22 2096 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2097 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2098 {
599cee73 2099 if (ckWARN(WARN_SYNTAX))
9014280d 2100 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2101 *--s = '$';
2102 break;
2103 }
02aa26ce
NT
2104
2105 /* string-change backslash escapes */
3280af22 2106 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2107 --s;
2108 break;
2109 }
cc74c5bd
TS
2110 /* skip any other backslash escapes in a pattern */
2111 else if (PL_lex_inpat) {
2112 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2113 goto default_action;
2114 }
02aa26ce
NT
2115
2116 /* if we get here, it's either a quoted -, or a digit */
79072805 2117 switch (*s) {
02aa26ce
NT
2118
2119 /* quoted - in transliterations */
79072805 2120 case '-':
3280af22 2121 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2122 *d++ = *s++;
2123 continue;
2124 }
2125 /* FALL THROUGH */
2126 default:
11b8faa4 2127 {
86f97054 2128 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2129 ckWARN(WARN_MISC))
9014280d 2130 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2131 "Unrecognized escape \\%c passed through",
2132 *s);
11b8faa4 2133 /* default action is to copy the quoted character */
f9a63242 2134 goto default_action;
11b8faa4 2135 }
02aa26ce
NT
2136
2137 /* \132 indicates an octal constant */
79072805
LW
2138 case '0': case '1': case '2': case '3':
2139 case '4': case '5': case '6': case '7':
ba210ebe 2140 {
53305cf1
NC
2141 I32 flags = 0;
2142 STRLEN len = 3;
2143 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2144 s += len;
2145 }
012bcf8d 2146 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2147
2148 /* \x24 indicates a hex constant */
79072805 2149 case 'x':
a0ed51b3
LW
2150 ++s;
2151 if (*s == '{') {
9d4ba2ae 2152 char* const e = strchr(s, '}');
a4c04bdc
NC
2153 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2154 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2155 STRLEN len;
355860ce 2156
53305cf1 2157 ++s;
adaeee49 2158 if (!e) {
a0ed51b3 2159 yyerror("Missing right brace on \\x{}");
355860ce 2160 continue;
ba210ebe 2161 }
53305cf1
NC
2162 len = e - s;
2163 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2164 s = e + 1;
a0ed51b3
LW
2165 }
2166 else {
ba210ebe 2167 {
53305cf1 2168 STRLEN len = 2;
a4c04bdc 2169 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2170 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2171 s += len;
2172 }
012bcf8d
GS
2173 }
2174
2175 NUM_ESCAPE_INSERT:
2176 /* Insert oct or hex escaped character.
301d3d20 2177 * There will always enough room in sv since such
db42d148 2178 * escapes will be longer than any UTF-8 sequence
301d3d20 2179 * they can end up as. */
ba7cea30 2180
c7f1f016
NIS
2181 /* We need to map to chars to ASCII before doing the tests
2182 to cover EBCDIC
2183 */
c4d5f83a 2184 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2185 if (!has_utf8 && uv > 255) {
301d3d20
JH
2186 /* Might need to recode whatever we have
2187 * accumulated so far if it contains any
2188 * hibit chars.
2189 *
2190 * (Can't we keep track of that and avoid
2191 * this rescan? --jhi)
012bcf8d 2192 */
c7f1f016 2193 int hicount = 0;
63cd0674
NIS
2194 U8 *c;
2195 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2196 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2197 hicount++;
db42d148 2198 }
012bcf8d 2199 }
63cd0674 2200 if (hicount) {
9d4ba2ae 2201 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2202 U8 *src, *dst;
2203 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2204 src = (U8 *)d - 1;
2205 dst = src+hicount;
2206 d += hicount;
cfd0369c 2207 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2208 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2209 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2210 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2211 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2212 }
2213 else {
63cd0674 2214 *dst-- = *src;
012bcf8d 2215 }
c7f1f016 2216 src--;
012bcf8d
GS
2217 }
2218 }
2219 }
2220
9aa983d2 2221 if (has_utf8 || uv > 255) {
9041c2e3 2222 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2223 has_utf8 = TRUE;
f9a63242
JH
2224 if (PL_lex_inwhat == OP_TRANS &&
2225 PL_sublex_info.sub_op) {
2226 PL_sublex_info.sub_op->op_private |=
2227 (PL_lex_repl ? OPpTRANS_FROM_UTF
2228 : OPpTRANS_TO_UTF);
f9a63242 2229 }
e294cc5d
JH
2230#ifdef EBCDIC
2231 if (uv > 255 && !dorange)
2232 native_range = FALSE;
2233#endif
012bcf8d 2234 }
a0ed51b3 2235 else {
012bcf8d 2236 *d++ = (char)uv;
a0ed51b3 2237 }
012bcf8d
GS
2238 }
2239 else {
c4d5f83a 2240 *d++ = (char) uv;
a0ed51b3 2241 }
79072805 2242 continue;
02aa26ce 2243
b239daa5 2244 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2245 case 'N':
55eda711 2246 ++s;
423cee85
JH
2247 if (*s == '{') {
2248 char* e = strchr(s, '}');
155aba94 2249 SV *res;
423cee85 2250 STRLEN len;
cfd0369c 2251 const char *str;
fc8cd66c 2252 SV *type;
4e553d73 2253
423cee85 2254 if (!e) {
5777a3f7 2255 yyerror("Missing right brace on \\N{}");
423cee85
JH
2256 e = s - 1;
2257 goto cont_scan;
2258 }
dbc0d4f2
JH
2259 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2260 /* \N{U+...} */
2261 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2262 PERL_SCAN_DISALLOW_PREFIX;
2263 s += 3;
2264 len = e - s;
2265 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2266 if ( e > s && len != (STRLEN)(e - s) ) {
2267 uv = 0xFFFD;
fc8cd66c 2268 }
dbc0d4f2
JH
2269 s = e + 1;
2270 goto NUM_ESCAPE_INSERT;
2271 }
55eda711 2272 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2273 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2274 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2275 res, NULL, SvPVX(type) );
2276 SvREFCNT_dec(type);
f9a63242
JH
2277 if (has_utf8)
2278 sv_utf8_upgrade(res);
cfd0369c 2279 str = SvPV_const(res,len);
1c47067b
JH
2280#ifdef EBCDIC_NEVER_MIND
2281 /* charnames uses pack U and that has been
2282 * recently changed to do the below uni->native
2283 * mapping, so this would be redundant (and wrong,
2284 * the code point would be doubly converted).
2285 * But leave this in just in case the pack U change
2286 * gets revoked, but the semantics is still
2287 * desireable for charnames. --jhi */
cddc7ef4 2288 {
cfd0369c 2289 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2290
2291 if (uv < 0x100) {
89ebb4a3 2292 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2293
2294 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2295 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2296 str = SvPV_const(res, len);
cddc7ef4
JH
2297 }
2298 }
2299#endif
89491803 2300 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2301 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2302 SvCUR_set(sv, d - ostart);
2303 SvPOK_on(sv);
e4f3eed8 2304 *d = '\0';
f08d6ad9 2305 sv_utf8_upgrade(sv);
d2f449dd 2306 /* this just broke our allocation above... */
eb160463 2307 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2308 d = SvPVX(sv) + SvCUR(sv);
89491803 2309 has_utf8 = TRUE;
f08d6ad9 2310 }
eb160463 2311 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2312 const char * const odest = SvPVX_const(sv);
423cee85 2313
8973db79 2314 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2315 d = SvPVX(sv) + (d - odest);
2316 }
e294cc5d
JH
2317#ifdef EBCDIC
2318 if (!dorange)
2319 native_range = FALSE; /* \N{} is guessed to be Unicode */
2320#endif
423cee85
JH
2321 Copy(str, d, len, char);
2322 d += len;
2323 SvREFCNT_dec(res);
2324 cont_scan:
2325 s = e + 1;
2326 }
2327 else
5777a3f7 2328 yyerror("Missing braces on \\N{}");
423cee85
JH
2329 continue;
2330
02aa26ce 2331 /* \c is a control character */
79072805
LW
2332 case 'c':
2333 s++;
961ce445 2334 if (s < send) {
ba210ebe 2335 U8 c = *s++;
c7f1f016
NIS
2336#ifdef EBCDIC
2337 if (isLOWER(c))
2338 c = toUPPER(c);
2339#endif
db42d148 2340 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2341 }
961ce445
RGS
2342 else {
2343 yyerror("Missing control char name in \\c");
2344 }
79072805 2345 continue;
02aa26ce
NT
2346
2347 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2348 case 'b':
db42d148 2349 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2350 break;
2351 case 'n':
db42d148 2352 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2353 break;
2354 case 'r':
db42d148 2355 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2356 break;
2357 case 'f':
db42d148 2358 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2359 break;
2360 case 't':
db42d148 2361 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2362 break;
34a3fe2a 2363 case 'e':
db42d148 2364 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2365 break;
2366 case 'a':
db42d148 2367 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2368 break;
02aa26ce
NT
2369 } /* end switch */
2370
79072805
LW
2371 s++;
2372 continue;
02aa26ce 2373 } /* end if (backslash) */
4c3a8340
TS
2374#ifdef EBCDIC
2375 else
2376 literal_endpoint++;
2377#endif
02aa26ce 2378
f9a63242 2379 default_action:
2b9d42f0
NIS
2380 /* If we started with encoded form, or already know we want it
2381 and then encode the next character */
2382 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2383 STRLEN len = 1;
5f66b61c
AL
2384 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2385 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2386 s += len;
2387 if (need > len) {
2388 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2389 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2390 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2391 }
5f66b61c 2392 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2393 has_utf8 = TRUE;
e294cc5d
JH
2394#ifdef EBCDIC
2395 if (uv > 255 && !dorange)
2396 native_range = FALSE;
2397#endif
2b9d42f0
NIS
2398 }
2399 else {
2400 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2401 }
02aa26ce
NT
2402 } /* while loop to process each character */
2403
2404 /* terminate the string and set up the sv */
79072805 2405 *d = '\0';
95a20fc0 2406 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2407 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2408 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2409
79072805 2410 SvPOK_on(sv);
9f4817db 2411 if (PL_encoding && !has_utf8) {
d0063567
DK
2412 sv_recode_to_utf8(sv, PL_encoding);
2413 if (SvUTF8(sv))
2414 has_utf8 = TRUE;
9f4817db 2415 }
2b9d42f0 2416 if (has_utf8) {
7e2040f0 2417 SvUTF8_on(sv);
2b9d42f0 2418 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2419 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2420 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2421 }
2422 }
79072805 2423
02aa26ce 2424 /* shrink the sv if we allocated more than we used */
79072805 2425 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2426 SvPV_shrink_to_cur(sv);
79072805 2427 }
02aa26ce 2428
9b599b2a 2429 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2430 if (s > PL_bufptr) {
2431 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2432 sv = new_constant(start, s - start,
2433 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2434 sv, NULL,
10edeb5d
JH
2435 (const char *)
2436 (( PL_lex_inwhat == OP_TRANS
2437 ? "tr"
2438 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2439 ? "s"
2440 : "qq"))));
79072805 2441 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2442 } else
8990e307 2443 SvREFCNT_dec(sv);
79072805
LW
2444 return s;
2445}
2446
ffb4593c
NT
2447/* S_intuit_more
2448 * Returns TRUE if there's more to the expression (e.g., a subscript),
2449 * FALSE otherwise.
ffb4593c
NT
2450 *
2451 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2452 *
2453 * ->[ and ->{ return TRUE
2454 * { and [ outside a pattern are always subscripts, so return TRUE
2455 * if we're outside a pattern and it's not { or [, then return FALSE
2456 * if we're in a pattern and the first char is a {
2457 * {4,5} (any digits around the comma) returns FALSE
2458 * if we're in a pattern and the first char is a [
2459 * [] returns FALSE
2460 * [SOMETHING] has a funky algorithm to decide whether it's a
2461 * character class or not. It has to deal with things like
2462 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2463 * anything else returns TRUE
2464 */
2465
9cbb5ea2
GS
2466/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2467
76e3520e 2468STATIC int
cea2e8a9 2469S_intuit_more(pTHX_ register char *s)
79072805 2470{
97aff369 2471 dVAR;
3280af22 2472 if (PL_lex_brackets)
79072805
LW
2473 return TRUE;
2474 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2475 return TRUE;
2476 if (*s != '{' && *s != '[')
2477 return FALSE;
3280af22 2478 if (!PL_lex_inpat)
79072805
LW
2479 return TRUE;
2480
2481 /* In a pattern, so maybe we have {n,m}. */
2482 if (*s == '{') {
2483 s++;
2484 if (!isDIGIT(*s))
2485 return TRUE;
2486 while (isDIGIT(*s))
2487 s++;
2488 if (*s == ',')
2489 s++;
2490 while (isDIGIT(*s))
2491 s++;
2492 if (*s == '}')
2493 return FALSE;
2494 return TRUE;
2495
2496 }
2497
2498 /* On the other hand, maybe we have a character class */
2499
2500 s++;
2501 if (*s == ']' || *s == '^')
2502 return FALSE;
2503 else {
ffb4593c 2504 /* this is terrifying, and it works */
79072805
LW
2505 int weight = 2; /* let's weigh the evidence */
2506 char seen[256];
f27ffc4a 2507 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2508 const char * const send = strchr(s,']');
3280af22 2509 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2510
2511 if (!send) /* has to be an expression */
2512 return TRUE;
2513
2514 Zero(seen,256,char);
2515 if (*s == '$')
2516 weight -= 3;
2517 else if (isDIGIT(*s)) {
2518 if (s[1] != ']') {
2519 if (isDIGIT(s[1]) && s[2] == ']')
2520 weight -= 10;
2521 }
2522 else
2523 weight -= 100;
2524 }
2525 for (; s < send; s++) {
2526 last_un_char = un_char;
2527 un_char = (unsigned char)*s;
2528 switch (*s) {
2529 case '@':
2530 case '&':
2531 case '$':
2532 weight -= seen[un_char] * 10;
7e2040f0 2533 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2534 int len;
8903cb82 2535 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2536 len = (int)strlen(tmpbuf);
2537 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2538 weight -= 100;
2539 else
2540 weight -= 10;
2541 }
2542 else if (*s == '$' && s[1] &&
93a17b20
LW
2543 strchr("[#!%*<>()-=",s[1])) {
2544 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2545 weight -= 10;
2546 else
2547 weight -= 1;
2548 }
2549 break;
2550 case '\\':
2551 un_char = 254;
2552 if (s[1]) {
93a17b20 2553 if (strchr("wds]",s[1]))
79072805 2554 weight += 100;
10edeb5d 2555 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2556 weight += 1;
93a17b20 2557 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2558 weight += 40;
2559 else if (isDIGIT(s[1])) {
2560 weight += 40;
2561 while (s[1] && isDIGIT(s[1]))
2562 s++;
2563 }
2564 }
2565 else
2566 weight += 100;
2567 break;
2568 case '-':
2569 if (s[1] == '\\')
2570 weight += 50;
93a17b20 2571 if (strchr("aA01! ",last_un_char))
79072805 2572 weight += 30;
93a17b20 2573 if (strchr("zZ79~",s[1]))
79072805 2574 weight += 30;
f27ffc4a
GS
2575 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2576 weight -= 5; /* cope with negative subscript */
79072805
LW
2577 break;
2578 default:
3792a11b
NC
2579 if (!isALNUM(last_un_char)
2580 && !(last_un_char == '$' || last_un_char == '@'
2581 || last_un_char == '&')
2582 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2583 char *d = tmpbuf;
2584 while (isALPHA(*s))
2585 *d++ = *s++;
2586 *d = '\0';
5458a98a 2587 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2588 weight -= 150;
2589 }
2590 if (un_char == last_un_char + 1)
2591 weight += 5;
2592 weight -= seen[un_char];
2593 break;
2594 }
2595 seen[un_char]++;
2596 }
2597 if (weight >= 0) /* probably a character class */
2598 return FALSE;
2599 }
2600
2601 return TRUE;
2602}
ffed7fef 2603
ffb4593c
NT
2604/*
2605 * S_intuit_method
2606 *
2607 * Does all the checking to disambiguate
2608 * foo bar
2609 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2610 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2611 *
2612 * First argument is the stuff after the first token, e.g. "bar".
2613 *
2614 * Not a method if bar is a filehandle.
2615 * Not a method if foo is a subroutine prototyped to take a filehandle.
2616 * Not a method if it's really "Foo $bar"
2617 * Method if it's "foo $bar"
2618 * Not a method if it's really "print foo $bar"
2619 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2620 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2621 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2622 * =>
2623 */
2624
76e3520e 2625STATIC int
62d55b22 2626S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2627{
97aff369 2628 dVAR;
a0d0e21e 2629 char *s = start + (*start == '$');
3280af22 2630 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2631 STRLEN len;
2632 GV* indirgv;
5db06880
NC
2633#ifdef PERL_MAD
2634 int soff;
2635#endif
a0d0e21e
LW
2636
2637 if (gv) {
62d55b22 2638 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2639 return 0;
62d55b22
NC
2640 if (cv) {
2641 if (SvPOK(cv)) {
2642 const char *proto = SvPVX_const(cv);
2643 if (proto) {
2644 if (*proto == ';')
2645 proto++;
2646 if (*proto == '*')
2647 return 0;
2648 }
b6c543e3
IZ
2649 }
2650 } else
c35e046a 2651 gv = NULL;
a0d0e21e 2652 }
8903cb82 2653 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2654 /* start is the beginning of the possible filehandle/object,
2655 * and s is the end of it
2656 * tmpbuf is a copy of it
2657 */
2658
a0d0e21e 2659 if (*start == '$') {
3ef1310e
RGS
2660 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2661 isUPPER(*PL_tokenbuf))
a0d0e21e 2662 return 0;
5db06880
NC
2663#ifdef PERL_MAD
2664 len = start - SvPVX(PL_linestr);
2665#endif
29595ff2 2666 s = PEEKSPACE(s);
f0092767 2667#ifdef PERL_MAD
5db06880
NC
2668 start = SvPVX(PL_linestr) + len;
2669#endif
3280af22
NIS
2670 PL_bufptr = start;
2671 PL_expect = XREF;
a0d0e21e
LW
2672 return *s == '(' ? FUNCMETH : METHOD;
2673 }
5458a98a 2674 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2675 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2676 len -= 2;
2677 tmpbuf[len] = '\0';
5db06880
NC
2678#ifdef PERL_MAD
2679 soff = s - SvPVX(PL_linestr);
2680#endif
c3e0f903
GS
2681 goto bare_package;
2682 }
90e5519e 2683 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2684 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2685 return 0;
2686 /* filehandle or package name makes it a method */
da51bb9b 2687 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2688#ifdef PERL_MAD
2689 soff = s - SvPVX(PL_linestr);
2690#endif
29595ff2 2691 s = PEEKSPACE(s);
3280af22 2692 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2693 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2694 bare_package:
cd81e915 2695 start_force(PL_curforce);
9ded7720 2696 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2697 newSVpvn(tmpbuf,len));
9ded7720 2698 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2699 if (PL_madskills)
2700 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2701 PL_expect = XTERM;
a0d0e21e 2702 force_next(WORD);
3280af22 2703 PL_bufptr = s;
5db06880
NC
2704#ifdef PERL_MAD
2705 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2706#endif
a0d0e21e
LW
2707 return *s == '(' ? FUNCMETH : METHOD;
2708 }
2709 }
2710 return 0;
2711}
2712
ffb4593c
NT
2713/*
2714 * S_incl_perldb
2715 * Return a string of Perl code to load the debugger. If PERL5DB
2716 * is set, it will return the contents of that, otherwise a
2717 * compile-time require of perl5db.pl.
2718 */
2719
bfed75c6 2720STATIC const char*
cea2e8a9 2721S_incl_perldb(pTHX)
a0d0e21e 2722{
97aff369 2723 dVAR;
3280af22 2724 if (PL_perldb) {
9d4ba2ae 2725 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2726
2727 if (pdb)
2728 return pdb;
93189314 2729 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2730 return "BEGIN { require 'perl5db.pl' }";
2731 }
2732 return "";
2733}
2734
2735
16d20bd9 2736/* Encoded script support. filter_add() effectively inserts a
4e553d73 2737 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2738 * Note that the filter function only applies to the current source file
2739 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2740 *
2741 * The datasv parameter (which may be NULL) can be used to pass
2742 * private data to this instance of the filter. The filter function
2743 * can recover the SV using the FILTER_DATA macro and use it to
2744 * store private buffers and state information.
2745 *
2746 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2747 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2748 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2749 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2750 * private use must be set using malloc'd pointers.
2751 */
16d20bd9
AD
2752
2753SV *
864dbfa3 2754Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2755{
97aff369 2756 dVAR;
f4c556ac 2757 if (!funcp)
a0714e2c 2758 return NULL;
f4c556ac 2759
3280af22
NIS
2760 if (!PL_rsfp_filters)
2761 PL_rsfp_filters = newAV();
16d20bd9 2762 if (!datasv)
561b68a9 2763 datasv = newSV(0);
862a34c6 2764 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2765 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2766 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2767 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2768 FPTR2DPTR(void *, IoANY(datasv)),
2769 SvPV_nolen(datasv)));
3280af22
NIS
2770 av_unshift(PL_rsfp_filters, 1);
2771 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2772 return(datasv);
2773}
4e553d73 2774
16d20bd9
AD
2775
2776/* Delete most recently added instance of this filter function. */
a0d0e21e 2777void
864dbfa3 2778Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2779{
97aff369 2780 dVAR;
e0c19803 2781 SV *datasv;
24801a4b 2782
33073adb 2783#ifdef DEBUGGING
55662e27
JH
2784 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2785 FPTR2DPTR(void*, funcp)));
33073adb 2786#endif
3280af22 2787 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2788 return;
2789 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2790 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2791 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2792 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2793 IoANY(datasv) = (void *)NULL;
3280af22 2794 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2795
16d20bd9
AD
2796 return;
2797 }
2798 /* we need to search for the correct entry and clear it */
cea2e8a9 2799 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2800}
2801
2802
1de9afcd
RGS
2803/* Invoke the idxth filter function for the current rsfp. */
2804/* maxlen 0 = read one text line */
16d20bd9 2805I32
864dbfa3 2806Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2807{
97aff369 2808 dVAR;
16d20bd9
AD
2809 filter_t funcp;
2810 SV *datasv = NULL;
f482118e
NC
2811 /* This API is bad. It should have been using unsigned int for maxlen.
2812 Not sure if we want to change the API, but if not we should sanity
2813 check the value here. */
39cd7a59
NC
2814 const unsigned int correct_length
2815 = maxlen < 0 ?
2816#ifdef PERL_MICRO
2817 0x7FFFFFFF
2818#else
2819 INT_MAX
2820#endif
2821 : maxlen;
e50aee73 2822
3280af22 2823 if (!PL_rsfp_filters)
16d20bd9 2824 return -1;
1de9afcd 2825 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2826 /* Provide a default input filter to make life easy. */
2827 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2828 DEBUG_P(PerlIO_printf(Perl_debug_log,
2829 "filter_read %d: from rsfp\n", idx));
f482118e 2830 if (correct_length) {
16d20bd9
AD
2831 /* Want a block */
2832 int len ;
f54cb97a 2833 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2834
2835 /* ensure buf_sv is large enough */
f482118e
NC
2836 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2837 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2838 correct_length)) <= 0) {
3280af22 2839 if (PerlIO_error(PL_rsfp))
37120919
AD
2840 return -1; /* error */
2841 else
2842 return 0 ; /* end of file */
2843 }
16d20bd9
AD
2844 SvCUR_set(buf_sv, old_len + len) ;
2845 } else {
2846 /* Want a line */
3280af22
NIS
2847 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2848 if (PerlIO_error(PL_rsfp))
37120919
AD
2849 return -1; /* error */
2850 else
2851 return 0 ; /* end of file */
2852 }
16d20bd9
AD
2853 }
2854 return SvCUR(buf_sv);
2855 }
2856 /* Skip this filter slot if filter has been deleted */
1de9afcd 2857 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2858 DEBUG_P(PerlIO_printf(Perl_debug_log,
2859 "filter_read %d: skipped (filter deleted)\n",
2860 idx));
f482118e 2861 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2862 }
2863 /* Get function pointer hidden within datasv */
8141890a 2864 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2865 DEBUG_P(PerlIO_printf(Perl_debug_log,
2866 "filter_read %d: via function %p (%s)\n",
ca0270c4 2867 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2868 /* Call function. The function is expected to */
2869 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2870 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2871 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2872}
2873
76e3520e 2874STATIC char *
cea2e8a9 2875S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2876{
97aff369 2877 dVAR;
c39cd008 2878#ifdef PERL_CR_FILTER
3280af22 2879 if (!PL_rsfp_filters) {
c39cd008 2880 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2881 }
2882#endif
3280af22 2883 if (PL_rsfp_filters) {
55497cff 2884 if (!append)
2885 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2886 if (FILTER_READ(0, sv, 0) > 0)
2887 return ( SvPVX(sv) ) ;
2888 else
bd61b366 2889 return NULL ;
16d20bd9 2890 }
9d116dd7 2891 else
fd049845 2892 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2893}
2894
01ec43d0 2895STATIC HV *
7fc63493 2896S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2897{
97aff369 2898 dVAR;
def3634b
GS
2899 GV *gv;
2900
01ec43d0 2901 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2902 return PL_curstash;
2903
2904 if (len > 2 &&
2905 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2906 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2907 {
2908 return GvHV(gv); /* Foo:: */
def3634b
GS
2909 }
2910
2911 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2912 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2913 if (gv && GvCV(gv)) {
2914 SV * const sv = cv_const_sv(GvCV(gv));
2915 if (sv)
83003860 2916 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2917 }
2918
da51bb9b 2919 return gv_stashpv(pkgname, 0);
def3634b 2920}
a0d0e21e 2921
e3f73d4e
RGS
2922/*
2923 * S_readpipe_override
2924 * Check whether readpipe() is overriden, and generates the appropriate
2925 * optree, provided sublex_start() is called afterwards.
2926 */
2927STATIC void
1d51329b 2928S_readpipe_override(pTHX)
e3f73d4e
RGS
2929{
2930 GV **gvp;
2931 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2932 yylval.ival = OP_BACKTICK;
2933 if ((gv_readpipe
2934 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2935 ||
2936 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2937 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2938 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2939 {
2940 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2941 append_elem(OP_LIST,
2942 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2943 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2944 }
2945 else {
2946 set_csh();
2947 }
2948}
2949
5db06880
NC
2950#ifdef PERL_MAD
2951 /*
2952 * Perl_madlex
2953 * The intent of this yylex wrapper is to minimize the changes to the
2954 * tokener when we aren't interested in collecting madprops. It remains
2955 * to be seen how successful this strategy will be...
2956 */
2957
2958int
2959Perl_madlex(pTHX)
2960{
2961 int optype;
2962 char *s = PL_bufptr;
2963
cd81e915
NC
2964 /* make sure PL_thiswhite is initialized */
2965 PL_thiswhite = 0;
2966 PL_thismad = 0;
5db06880 2967
cd81e915 2968 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2969 if (PL_pending_ident)
2970 return S_pending_ident(aTHX);
2971
2972 /* previous token ate up our whitespace? */
cd81e915
NC
2973 if (!PL_lasttoke && PL_nextwhite) {
2974 PL_thiswhite = PL_nextwhite;
2975 PL_nextwhite = 0;
5db06880
NC
2976 }
2977
2978 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2979 PL_realtokenstart = -1;
2980 PL_thistoken = 0;
5db06880
NC
2981 optype = yylex();
2982 s = PL_bufptr;
cd81e915 2983 assert(PL_curforce < 0);
5db06880 2984
cd81e915
NC
2985 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2986 if (!PL_thistoken) {
2987 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2988 PL_thistoken = newSVpvs("");
5db06880 2989 else {
c35e046a 2990 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2991 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2992 }
2993 }
cd81e915
NC
2994 if (PL_thismad) /* install head */
2995 CURMAD('X', PL_thistoken);
5db06880
NC
2996 }
2997
2998 /* last whitespace of a sublex? */
cd81e915
NC
2999 if (optype == ')' && PL_endwhite) {
3000 CURMAD('X', PL_endwhite);
5db06880
NC
3001 }
3002
cd81e915 3003 if (!PL_thismad) {
5db06880
NC
3004
3005 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3006 if (!PL_thiswhite && !PL_endwhite && !optype) {
3007 sv_free(PL_thistoken);
3008 PL_thistoken = 0;
5db06880
NC
3009 return 0;
3010 }
3011
3012 /* put off final whitespace till peg */
3013 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3014 PL_nextwhite = PL_thiswhite;
3015 PL_thiswhite = 0;
5db06880 3016 }
cd81e915
NC
3017 else if (PL_thisopen) {
3018 CURMAD('q', PL_thisopen);
3019 if (PL_thistoken)
3020 sv_free(PL_thistoken);
3021 PL_thistoken = 0;
5db06880
NC
3022 }
3023 else {
3024 /* Store actual token text as madprop X */
cd81e915 3025 CURMAD('X', PL_thistoken);
5db06880
NC
3026 }
3027
cd81e915 3028 if (PL_thiswhite) {
5db06880 3029 /* add preceding whitespace as madprop _ */
cd81e915 3030 CURMAD('_', PL_thiswhite);
5db06880
NC
3031 }
3032
cd81e915 3033 if (PL_thisstuff) {
5db06880 3034 /* add quoted material as madprop = */
cd81e915 3035 CURMAD('=', PL_thisstuff);
5db06880
NC
3036 }
3037
cd81e915 3038 if (PL_thisclose) {
5db06880 3039 /* add terminating quote as madprop Q */
cd81e915 3040 CURMAD('Q', PL_thisclose);
5db06880
NC
3041 }
3042 }
3043
3044 /* special processing based on optype */
3045
3046 switch (optype) {
3047
3048 /* opval doesn't need a TOKEN since it can already store mp */
3049 case WORD:
3050 case METHOD:
3051 case FUNCMETH:
3052 case THING:
3053 case PMFUNC:
3054 case PRIVATEREF:
3055 case FUNC0SUB:
3056 case UNIOPSUB:
3057 case LSTOPSUB:
3058 if (yylval.opval)
cd81e915
NC
3059 append_madprops(PL_thismad, yylval.opval, 0);
3060 PL_thismad = 0;
5db06880
NC
3061 return optype;
3062
3063 /* fake EOF */
3064 case 0:
3065 optype = PEG;
cd81e915
NC
3066 if (PL_endwhite) {
3067 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3068 PL_endwhite = 0;
5db06880
NC
3069 }
3070 break;
3071
3072 case ']':
3073 case '}':
cd81e915 3074 if (PL_faketokens)
5db06880
NC
3075 break;
3076 /* remember any fake bracket that lexer is about to discard */
3077 if (PL_lex_brackets == 1 &&
3078 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3079 {
3080 s = PL_bufptr;
3081 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3082 s++;
3083 if (*s == '}') {
cd81e915
NC
3084 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3085 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3086 PL_thiswhite = 0;
5db06880
NC
3087 PL_bufptr = s - 1;
3088 break; /* don't bother looking for trailing comment */
3089 }
3090 else
3091 s = PL_bufptr;
3092 }
3093 if (optype == ']')
3094 break;
3095 /* FALLTHROUGH */
3096
3097 /* attach a trailing comment to its statement instead of next token */
3098 case ';':
cd81e915 3099 if (PL_faketokens)
5db06880
NC
3100 break;
3101 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3102 s = PL_bufptr;
3103 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3104 s++;
3105 if (*s == '\n' || *s == '#') {
3106 while (s < PL_bufend && *s != '\n')
3107 s++;
3108 if (s < PL_bufend)
3109 s++;
cd81e915
NC
3110 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3111 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3112 PL_thiswhite = 0;
5db06880
NC
3113 PL_bufptr = s;
3114 }
3115 }
3116 break;
3117
3118 /* pval */
3119 case LABEL:
3120 break;
3121
3122 /* ival */
3123 default:
3124 break;
3125
3126 }
3127
3128 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3129 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3130 PL_thismad = 0;
5db06880
NC
3131 return optype;
3132}
3133#endif
3134
468aa647 3135STATIC char *
cc6ed77d 3136S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3137 dVAR;
468aa647
RGS
3138 if (PL_expect != XSTATE)
3139 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3140 is_use ? "use" : "no"));
29595ff2 3141 s = SKIPSPACE1(s);
468aa647
RGS
3142 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3143 s = force_version(s, TRUE);
29595ff2 3144 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3145 start_force(PL_curforce);
9ded7720 3146 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3147 force_next(WORD);
3148 }
3149 else if (*s == 'v') {
3150 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3151 s = force_version(s, FALSE);
3152 }
3153 }
3154 else {
3155 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3156 s = force_version(s, FALSE);
3157 }
3158 yylval.ival = is_use;
3159 return s;
3160}
748a9306 3161#ifdef DEBUGGING
27da23d5 3162 static const char* const exp_name[] =
09bef843 3163 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3164 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3165 };
748a9306 3166#endif
463ee0b2 3167
02aa26ce
NT
3168/*
3169 yylex
3170
3171 Works out what to call the token just pulled out of the input
3172 stream. The yacc parser takes care of taking the ops we return and
3173 stitching them into a tree.
3174
3175 Returns:
3176 PRIVATEREF
3177
3178 Structure:
3179 if read an identifier
3180 if we're in a my declaration
3181 croak if they tried to say my($foo::bar)
3182 build the ops for a my() declaration
3183 if it's an access to a my() variable
3184 are we in a sort block?
3185 croak if my($a); $a <=> $b
3186 build ops for access to a my() variable
3187 if in a dq string, and they've said @foo and we can't find @foo
3188 croak
3189 build ops for a bareword
3190 if we already built the token before, use it.
3191*/
3192
20141f0e 3193
dba4d153
JH
3194#ifdef __SC__
3195#pragma segment Perl_yylex
3196#endif
dba4d153 3197int
dba4d153 3198Perl_yylex(pTHX)
20141f0e 3199{
97aff369 3200 dVAR;
3afc138a 3201 register char *s = PL_bufptr;
378cc40b 3202 register char *d;
463ee0b2 3203 STRLEN len;
aa7440fb 3204 bool bof = FALSE;
a687059c 3205
10edeb5d
JH
3206 /* orig_keyword, gvp, and gv are initialized here because
3207 * jump to the label just_a_word_zero can bypass their
3208 * initialization later. */
3209 I32 orig_keyword = 0;
3210 GV *gv = NULL;
3211 GV **gvp = NULL;
3212
bbf60fe6 3213 DEBUG_T( {
396482e1 3214 SV* tmp = newSVpvs("");
b6007c36
DM
3215 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3216 (IV)CopLINE(PL_curcop),
3217 lex_state_names[PL_lex_state],
3218 exp_name[PL_expect],
3219 pv_display(tmp, s, strlen(s), 0, 60));
3220 SvREFCNT_dec(tmp);
bbf60fe6 3221 } );
02aa26ce 3222 /* check if there's an identifier for us to look at */
ba979b31 3223 if (PL_pending_ident)
bbf60fe6 3224 return REPORT(S_pending_ident(aTHX));
bbce6d69 3225
02aa26ce
NT
3226 /* no identifier pending identification */
3227
3280af22 3228 switch (PL_lex_state) {
79072805
LW
3229#ifdef COMMENTARY
3230 case LEX_NORMAL: /* Some compilers will produce faster */
3231 case LEX_INTERPNORMAL: /* code if we comment these out. */
3232 break;
3233#endif
3234
09bef843 3235 /* when we've already built the next token, just pull it out of the queue */
79072805 3236 case LEX_KNOWNEXT:
5db06880
NC
3237#ifdef PERL_MAD
3238 PL_lasttoke--;
3239 yylval = PL_nexttoke[PL_lasttoke].next_val;
3240 if (PL_madskills) {
cd81e915 3241 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3242 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3243 if (PL_thismad && PL_thismad->mad_key == '_') {
3244 PL_thiswhite = (SV*)PL_thismad->mad_val;
3245 PL_thismad->mad_val = 0;
3246 mad_free(PL_thismad);
3247 PL_thismad = 0;
5db06880
NC
3248 }
3249 }
3250 if (!PL_lasttoke) {
3251 PL_lex_state = PL_lex_defer;
3252 PL_expect = PL_lex_expect;
3253 PL_lex_defer = LEX_NORMAL;
3254 if (!PL_nexttoke[PL_lasttoke].next_type)
3255 return yylex();
3256 }
3257#else
3280af22 3258 PL_nexttoke--;
5db06880 3259 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3260 if (!PL_nexttoke) {
3261 PL_lex_state = PL_lex_defer;
3262 PL_expect = PL_lex_expect;
3263 PL_lex_defer = LEX_NORMAL;
463ee0b2 3264 }
5db06880
NC
3265#endif
3266#ifdef PERL_MAD
3267 /* FIXME - can these be merged? */
3268 return(PL_nexttoke[PL_lasttoke].next_type);
3269#else
bbf60fe6 3270 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3271#endif
79072805 3272
02aa26ce 3273 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3274 when we get here, PL_bufptr is at the \
02aa26ce 3275 */
79072805
LW
3276 case LEX_INTERPCASEMOD:
3277#ifdef DEBUGGING
3280af22 3278 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3279 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3280#endif
02aa26ce 3281 /* handle \E or end of string */
3280af22 3282 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3283 /* if at a \E */
3280af22 3284 if (PL_lex_casemods) {
f54cb97a 3285 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3286 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3287
3792a11b
NC
3288 if (PL_bufptr != PL_bufend
3289 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3290 PL_bufptr += 2;
3291 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3292#ifdef PERL_MAD
3293 if (PL_madskills)
6b29d1f5 3294 PL_thistoken = newSVpvs("\\E");
5db06880 3295#endif
a0d0e21e 3296 }
bbf60fe6 3297 return REPORT(')');
79072805 3298 }
5db06880
NC
3299#ifdef PERL_MAD
3300 while (PL_bufptr != PL_bufend &&
3301 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3302 if (!PL_thiswhite)
6b29d1f5 3303 PL_thiswhite = newSVpvs("");
cd81e915 3304 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3305 PL_bufptr += 2;
3306 }
3307#else
3280af22
NIS
3308 if (PL_bufptr != PL_bufend)
3309 PL_bufptr += 2;
5db06880 3310#endif
3280af22 3311 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3312 return yylex();
79072805
LW
3313 }
3314 else {
607df283 3315 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3316 "### Saw case modifier\n"); });
3280af22 3317 s = PL_bufptr + 1;
6e909404 3318 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3319#ifdef PERL_MAD
cd81e915 3320 if (!PL_thiswhite)
6b29d1f5 3321 PL_thiswhite = newSVpvs("");
cd81e915 3322 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3323#endif
89122651 3324 PL_bufptr = s + 3;
6e909404
JH
3325 PL_lex_state = LEX_INTERPCONCAT;
3326 return yylex();
a0d0e21e 3327 }
6e909404 3328 else {
90771dc0 3329 I32 tmp;
5db06880
NC
3330 if (!PL_madskills) /* when just compiling don't need correct */
3331 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3332 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3333 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3334 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3335 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3336 return REPORT(')');
6e909404
JH
3337 }
3338 if (PL_lex_casemods > 10)
3339 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3340 PL_lex_casestack[PL_lex_casemods++] = *s;
3341 PL_lex_casestack[PL_lex_casemods] = '\0';
3342 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3343 start_force(PL_curforce);
9ded7720 3344 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3345 force_next('(');
cd81e915 3346 start_force(PL_curforce);
6e909404 3347 if (*s == 'l')
9ded7720 3348 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3349 else if (*s == 'u')
9ded7720 3350 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3351 else if (*s == 'L')
9ded7720 3352 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3353 else if (*s == 'U')
9ded7720 3354 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3355 else if (*s == 'Q')
9ded7720 3356 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3357 else
3358 Perl_croak(aTHX_ "panic: yylex");
5db06880 3359 if (PL_madskills) {
6b29d1f5 3360 SV* const tmpsv = newSVpvs("");
5db06880
NC
3361 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3362 curmad('_', tmpsv);
3363 }
6e909404 3364 PL_bufptr = s + 1;
a0d0e21e 3365 }
79072805 3366 force_next(FUNC);
3280af22
NIS
3367 if (PL_lex_starts) {
3368 s = PL_bufptr;
3369 PL_lex_starts = 0;
5db06880
NC
3370#ifdef PERL_MAD
3371 if (PL_madskills) {
cd81e915
NC
3372 if (PL_thistoken)
3373 sv_free(PL_thistoken);
6b29d1f5 3374 PL_thistoken = newSVpvs("");
5db06880
NC
3375 }
3376#endif
131b3ad0
DM
3377 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3378 if (PL_lex_casemods == 1 && PL_lex_inpat)
3379 OPERATOR(',');
3380 else
3381 Aop(OP_CONCAT);
79072805
LW
3382 }
3383 else
cea2e8a9 3384 return yylex();
79072805
LW
3385 }
3386
55497cff 3387 case LEX_INTERPPUSH:
bbf60fe6 3388 return REPORT(sublex_push());
55497cff 3389
79072805 3390 case LEX_INTERPSTART:
3280af22 3391 if (PL_bufptr == PL_bufend)
bbf60fe6 3392 return REPORT(sublex_done());
607df283 3393 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3394 "### Interpolated variable\n"); });
3280af22
NIS
3395 PL_expect = XTERM;
3396 PL_lex_dojoin = (*PL_bufptr == '@');
3397 PL_lex_state = LEX_INTERPNORMAL;
3398 if (PL_lex_dojoin) {
cd81e915 3399 start_force(PL_curforce);
9ded7720 3400 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3401 force_next(',');
cd81e915 3402 start_force(PL_curforce);
a0d0e21e 3403 force_ident("\"", '$');
cd81e915 3404 start_force(PL_curforce);
9ded7720 3405 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3406 force_next('$');
cd81e915 3407 start_force(PL_curforce);
9ded7720 3408 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3409 force_next('(');
cd81e915 3410 start_force(PL_curforce);
9ded7720 3411 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3412 force_next(FUNC);
3413 }
3280af22
NIS
3414 if (PL_lex_starts++) {
3415 s = PL_bufptr;
5db06880
NC
3416#ifdef PERL_MAD
3417 if (PL_madskills) {
cd81e915
NC
3418 if (PL_thistoken)
3419 sv_free(PL_thistoken);
6b29d1f5 3420 PL_thistoken = newSVpvs("");
5db06880
NC
3421 }
3422#endif
131b3ad0
DM
3423 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3424 if (!PL_lex_casemods && PL_lex_inpat)
3425 OPERATOR(',');
3426 else
3427 Aop(OP_CONCAT);
79072805 3428 }
cea2e8a9 3429 return yylex();
79072805
LW
3430
3431 case LEX_INTERPENDMAYBE:
3280af22
NIS
3432 if (intuit_more(PL_bufptr)) {
3433 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3434 break;
3435 }
3436 /* FALL THROUGH */
3437
3438 case LEX_INTERPEND:
3280af22
NIS
3439 if (PL_lex_dojoin) {
3440 PL_lex_dojoin = FALSE;
3441 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3442#ifdef PERL_MAD
3443 if (PL_madskills) {
cd81e915
NC
3444 if (PL_thistoken)
3445 sv_free(PL_thistoken);
6b29d1f5 3446 PL_thistoken = newSVpvs("");
5db06880
NC
3447 }
3448#endif
bbf60fe6 3449 return REPORT(')');
79072805 3450 }
43a16006 3451 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3452 && SvEVALED(PL_lex_repl))
43a16006 3453 {
e9fa98b2 3454 if (PL_bufptr != PL_bufend)
cea2e8a9 3455 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3456 PL_lex_repl = NULL;
e9fa98b2 3457 }
79072805
LW
3458 /* FALLTHROUGH */
3459 case LEX_INTERPCONCAT:
3460#ifdef DEBUGGING
3280af22 3461 if (PL_lex_brackets)
cea2e8a9 3462 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3463#endif
3280af22 3464 if (PL_bufptr == PL_bufend)
bbf60fe6 3465 return REPORT(sublex_done());
79072805 3466
3280af22
NIS
3467 if (SvIVX(PL_linestr) == '\'') {
3468 SV *sv = newSVsv(PL_linestr);
3469 if (!PL_lex_inpat)
76e3520e 3470 sv = tokeq(sv);
3280af22 3471 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3472 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3473 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3474 s = PL_bufend;
79072805
LW
3475 }
3476 else {
3280af22 3477 s = scan_const(PL_bufptr);
79072805 3478 if (*s == '\\')
3280af22 3479 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3480 else
3280af22 3481 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3482 }
3483
3280af22 3484 if (s != PL_bufptr) {
cd81e915 3485 start_force(PL_curforce);
5db06880
NC
3486 if (PL_madskills) {
3487 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3488 }
9ded7720 3489 NEXTVAL_NEXTTOKE = yylval;
3280af22 3490 PL_expect = XTERM;
79072805 3491 force_next(THING);
131b3ad0 3492 if (PL_lex_starts++) {
5db06880
NC
3493#ifdef PERL_MAD
3494 if (PL_madskills) {
cd81e915
NC
3495 if (PL_thistoken)
3496 sv_free(PL_thistoken);
6b29d1f5 3497 PL_thistoken = newSVpvs("");
5db06880
NC
3498 }
3499#endif
131b3ad0
DM
3500 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3501 if (!PL_lex_casemods && PL_lex_inpat)
3502 OPERATOR(',');
3503 else
3504 Aop(OP_CONCAT);
3505 }
79072805 3506 else {
3280af22 3507 PL_bufptr = s;
cea2e8a9 3508 return yylex();
79072805
LW
3509 }
3510 }
3511
cea2e8a9 3512 return yylex();
a0d0e21e 3513 case LEX_FORMLINE:
3280af22
NIS
3514 PL_lex_state = LEX_NORMAL;
3515 s = scan_formline(PL_bufptr);
3516 if (!PL_lex_formbrack)
a0d0e21e
LW
3517 goto rightbracket;
3518 OPERATOR(';');
79072805
LW
3519 }
3520
3280af22
NIS
3521 s = PL_bufptr;
3522 PL_oldoldbufptr = PL_oldbufptr;
3523 PL_oldbufptr = s;
463ee0b2
LW
3524
3525 retry:
5db06880 3526#ifdef PERL_MAD
cd81e915
NC
3527 if (PL_thistoken) {
3528 sv_free(PL_thistoken);
3529 PL_thistoken = 0;
5db06880 3530 }
cd81e915 3531 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3532#endif
378cc40b
LW
3533 switch (*s) {
3534 default:
7e2040f0 3535 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3536 goto keylookup;
cea2e8a9 3537 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3538 case 4:
3539 case 26:
3540 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3541 case 0:
5db06880
NC
3542#ifdef PERL_MAD
3543 if (PL_madskills)
cd81e915 3544 PL_faketokens = 0;
5db06880 3545#endif
3280af22
NIS
3546 if (!PL_rsfp) {
3547 PL_last_uni = 0;
3548 PL_last_lop = 0;
c5ee2135 3549 if (PL_lex_brackets) {