This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Quiet a smoke warning in toke.c and bump the copyright year.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
5912531f 26#define yylval (PL_parser->yylval)
d3b6f988 27
acdf0a21
DM
28/* YYINITDEPTH -- initial size of the parser's stacks. */
29#define YYINITDEPTH 200
30
0bd48802 31static const char ident_too_long[] = "Identifier too long";
c445ea15 32static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 33
acfe0abc 34static void restore_rsfp(pTHX_ void *f);
6e3aabd6 35#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
36static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
37static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 38#endif
51371543 39
29595ff2 40#ifdef PERL_MAD
29595ff2 41# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 42# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 43#else
5db06880 44# define CURMAD(slot,sv)
9ded7720 45# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
46#endif
47
9059aa12
LW
48#define XFAKEBRACK 128
49#define XENUMMASK 127
50
39e02b42
JH
51#ifdef USE_UTF8_SCRIPTS
52# define UTF (!IN_BYTES)
2b9d42f0 53#else
746b446a 54# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 55#endif
a0ed51b3 56
61f0cdd9 57/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
58 * 1999-02-27 mjd-perl-patch@plover.com */
59#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
60
bf4acbe4
GS
61/* On MacOS, respect nonbreaking spaces */
62#ifdef MACOS_TRADITIONAL
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
64#else
65#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
66#endif
67
ffb4593c
NT
68/* LEX_* are values for PL_lex_state, the state of the lexer.
69 * They are arranged oddly so that the guard on the switch statement
79072805
LW
70 * can get by with a single comparison (if the compiler is smart enough).
71 */
72
fb73857a 73/* #define LEX_NOTPARSING 11 is done in perl.h. */
74
b6007c36
DM
75#define LEX_NORMAL 10 /* normal code (ie not within "...") */
76#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
77#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
78#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
79#define LEX_INTERPSTART 6 /* expecting the start of a $var */
80
81 /* at end of code, eg "$x" followed by: */
82#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
83#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
84
85#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
86 string or after \E, $foo, etc */
87#define LEX_INTERPCONST 2 /* NOT USED */
88#define LEX_FORMLINE 1 /* expecting a format line */
89#define LEX_KNOWNEXT 0 /* next token known; just return it */
90
79072805 91
bbf60fe6 92#ifdef DEBUGGING
27da23d5 93static const char* const lex_state_names[] = {
bbf60fe6
DM
94 "KNOWNEXT",
95 "FORMLINE",
96 "INTERPCONST",
97 "INTERPCONCAT",
98 "INTERPENDMAYBE",
99 "INTERPEND",
100 "INTERPSTART",
101 "INTERPPUSH",
102 "INTERPCASEMOD",
103 "INTERPNORMAL",
104 "NORMAL"
105};
106#endif
107
79072805
LW
108#ifdef ff_next
109#undef ff_next
d48672a2
LW
110#endif
111
79072805 112#include "keywords.h"
fe14fcc3 113
ffb4593c
NT
114/* CLINE is a macro that ensures PL_copline has a sane value */
115
ae986130
LW
116#ifdef CLINE
117#undef CLINE
118#endif
57843af0 119#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 120
5db06880 121#ifdef PERL_MAD
29595ff2
NC
122# define SKIPSPACE0(s) skipspace0(s)
123# define SKIPSPACE1(s) skipspace1(s)
124# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
125# define PEEKSPACE(s) skipspace2(s,0)
126#else
127# define SKIPSPACE0(s) skipspace(s)
128# define SKIPSPACE1(s) skipspace(s)
129# define SKIPSPACE2(s,tsv) skipspace(s)
130# define PEEKSPACE(s) skipspace(s)
131#endif
132
ffb4593c
NT
133/*
134 * Convenience functions to return different tokens and prime the
9cbb5ea2 135 * lexer for the next token. They all take an argument.
ffb4593c
NT
136 *
137 * TOKEN : generic token (used for '(', DOLSHARP, etc)
138 * OPERATOR : generic operator
139 * AOPERATOR : assignment operator
140 * PREBLOCK : beginning the block after an if, while, foreach, ...
141 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
142 * PREREF : *EXPR where EXPR is not a simple identifier
143 * TERM : expression term
144 * LOOPX : loop exiting command (goto, last, dump, etc)
145 * FTST : file test operator
146 * FUN0 : zero-argument function
2d2e263d 147 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
148 * BOop : bitwise or or xor
149 * BAop : bitwise and
150 * SHop : shift operator
151 * PWop : power operator
9cbb5ea2 152 * PMop : pattern-matching operator
ffb4593c
NT
153 * Aop : addition-level operator
154 * Mop : multiplication-level operator
155 * Eop : equality-testing operator
e5edeb50 156 * Rop : relational operator <= != gt
ffb4593c
NT
157 *
158 * Also see LOP and lop() below.
159 */
160
998054bd 161#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 162# define REPORT(retval) tokereport((I32)retval)
998054bd 163#else
bbf60fe6 164# define REPORT(retval) (retval)
998054bd
SC
165#endif
166
bbf60fe6
DM
167#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
168#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
169#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
170#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
171#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
172#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
173#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
174#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
175#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
176#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
177#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
178#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
179#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
180#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
181#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
182#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
183#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
184#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
185#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
186#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 187
a687059c
LW
188/* This bit of chicanery makes a unary function followed by
189 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
190 * The UNIDOR macro is for unary functions that can be followed by the //
191 * operator (such as C<shift // 0>).
a687059c 192 */
376fcdbf
AL
193#define UNI2(f,x) { \
194 yylval.ival = f; \
195 PL_expect = x; \
196 PL_bufptr = s; \
197 PL_last_uni = PL_oldbufptr; \
198 PL_last_lop_op = f; \
199 if (*s == '(') \
200 return REPORT( (int)FUNC1 ); \
29595ff2 201 s = PEEKSPACE(s); \
376fcdbf
AL
202 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
203 }
6f33ba73
RGS
204#define UNI(f) UNI2(f,XTERM)
205#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 206
376fcdbf
AL
207#define UNIBRACK(f) { \
208 yylval.ival = f; \
209 PL_bufptr = s; \
210 PL_last_uni = PL_oldbufptr; \
211 if (*s == '(') \
212 return REPORT( (int)FUNC1 ); \
29595ff2 213 s = PEEKSPACE(s); \
376fcdbf
AL
214 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
215 }
79072805 216
9f68db38 217/* grandfather return to old style */
3280af22 218#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 219
8fa7f367
JH
220#ifdef DEBUGGING
221
bbf60fe6
DM
222/* how to interpret the yylval associated with the token */
223enum token_type {
224 TOKENTYPE_NONE,
225 TOKENTYPE_IVAL,
226 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
227 TOKENTYPE_PVAL,
228 TOKENTYPE_OPVAL,
229 TOKENTYPE_GVVAL
230};
231
6d4a66ac
NC
232static struct debug_tokens {
233 const int token;
234 enum token_type type;
235 const char *name;
236} const debug_tokens[] =
9041c2e3 237{
bbf60fe6
DM
238 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
239 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
240 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
241 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
242 { ARROW, TOKENTYPE_NONE, "ARROW" },
243 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
244 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
245 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
246 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
247 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 248 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
249 { DO, TOKENTYPE_NONE, "DO" },
250 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
251 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
252 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
253 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
254 { ELSE, TOKENTYPE_NONE, "ELSE" },
255 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
256 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
257 { FOR, TOKENTYPE_IVAL, "FOR" },
258 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
259 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
260 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
261 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
262 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
263 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 264 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
265 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
266 { IF, TOKENTYPE_IVAL, "IF" },
267 { LABEL, TOKENTYPE_PVAL, "LABEL" },
268 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
269 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
270 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
271 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
272 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
273 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
274 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
275 { MY, TOKENTYPE_IVAL, "MY" },
276 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
277 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
278 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
279 { OROP, TOKENTYPE_IVAL, "OROP" },
280 { OROR, TOKENTYPE_NONE, "OROR" },
281 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
282 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
283 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
284 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
285 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
286 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
287 { PREINC, TOKENTYPE_NONE, "PREINC" },
288 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
289 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
290 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
291 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
292 { SUB, TOKENTYPE_NONE, "SUB" },
293 { THING, TOKENTYPE_OPVAL, "THING" },
294 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
295 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
296 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
297 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
298 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
299 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 300 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
301 { WHILE, TOKENTYPE_IVAL, "WHILE" },
302 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 303 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
304};
305
306/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 307
bbf60fe6 308STATIC int
f5bd084c 309S_tokereport(pTHX_ I32 rv)
bbf60fe6 310{
97aff369 311 dVAR;
bbf60fe6 312 if (DEBUG_T_TEST) {
bd61b366 313 const char *name = NULL;
bbf60fe6 314 enum token_type type = TOKENTYPE_NONE;
f54cb97a 315 const struct debug_tokens *p;
396482e1 316 SV* const report = newSVpvs("<== ");
bbf60fe6 317
f54cb97a 318 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
319 if (p->token == (int)rv) {
320 name = p->name;
321 type = p->type;
322 break;
323 }
324 }
325 if (name)
54667de8 326 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
327 else if ((char)rv > ' ' && (char)rv < '~')
328 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
329 else if (!rv)
396482e1 330 sv_catpvs(report, "EOF");
bbf60fe6
DM
331 else
332 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
333 switch (type) {
334 case TOKENTYPE_NONE:
335 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
336 break;
337 case TOKENTYPE_IVAL:
e4584336 338 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
339 break;
340 case TOKENTYPE_OPNUM:
341 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
342 PL_op_name[yylval.ival]);
343 break;
344 case TOKENTYPE_PVAL:
345 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
346 break;
347 case TOKENTYPE_OPVAL:
b6007c36 348 if (yylval.opval) {
401441c0 349 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 350 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
351 if (yylval.opval->op_type == OP_CONST) {
352 Perl_sv_catpvf(aTHX_ report, " %s",
353 SvPEEK(cSVOPx_sv(yylval.opval)));
354 }
355
356 }
401441c0 357 else
396482e1 358 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
359 break;
360 }
b6007c36 361 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
362 };
363 return (int)rv;
998054bd
SC
364}
365
b6007c36
DM
366
367/* print the buffer with suitable escapes */
368
369STATIC void
370S_printbuf(pTHX_ const char* fmt, const char* s)
371{
396482e1 372 SV* const tmp = newSVpvs("");
b6007c36
DM
373 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
374 SvREFCNT_dec(tmp);
375}
376
8fa7f367
JH
377#endif
378
ffb4593c
NT
379/*
380 * S_ao
381 *
c963b151
BD
382 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
383 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
384 */
385
76e3520e 386STATIC int
cea2e8a9 387S_ao(pTHX_ int toketype)
a0d0e21e 388{
97aff369 389 dVAR;
3280af22
NIS
390 if (*PL_bufptr == '=') {
391 PL_bufptr++;
a0d0e21e
LW
392 if (toketype == ANDAND)
393 yylval.ival = OP_ANDASSIGN;
394 else if (toketype == OROR)
395 yylval.ival = OP_ORASSIGN;
c963b151
BD
396 else if (toketype == DORDOR)
397 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
398 toketype = ASSIGNOP;
399 }
400 return toketype;
401}
402
ffb4593c
NT
403/*
404 * S_no_op
405 * When Perl expects an operator and finds something else, no_op
406 * prints the warning. It always prints "<something> found where
407 * operator expected. It prints "Missing semicolon on previous line?"
408 * if the surprise occurs at the start of the line. "do you need to
409 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
410 * where the compiler doesn't know if foo is a method call or a function.
411 * It prints "Missing operator before end of line" if there's nothing
412 * after the missing operator, or "... before <...>" if there is something
413 * after the missing operator.
414 */
415
76e3520e 416STATIC void
bfed75c6 417S_no_op(pTHX_ const char *what, char *s)
463ee0b2 418{
97aff369 419 dVAR;
9d4ba2ae
AL
420 char * const oldbp = PL_bufptr;
421 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 422
1189a94a
GS
423 if (!s)
424 s = oldbp;
07c798fb 425 else
1189a94a 426 PL_bufptr = s;
cea2e8a9 427 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
428 if (ckWARN_d(WARN_SYNTAX)) {
429 if (is_first)
430 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
431 "\t(Missing semicolon on previous line?)\n");
432 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 433 const char *t;
c35e046a
AL
434 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
435 NOOP;
56da5a46
RGS
436 if (t < PL_bufptr && isSPACE(*t))
437 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
438 "\t(Do you need to predeclare %.*s?)\n",
551405c4 439 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
440 }
441 else {
442 assert(s >= oldbp);
443 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 444 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 445 }
07c798fb 446 }
3280af22 447 PL_bufptr = oldbp;
8990e307
LW
448}
449
ffb4593c
NT
450/*
451 * S_missingterm
452 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 453 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
454 * If we're in a delimited string and the delimiter is a control
455 * character, it's reformatted into a two-char sequence like ^C.
456 * This is fatal.
457 */
458
76e3520e 459STATIC void
cea2e8a9 460S_missingterm(pTHX_ char *s)
8990e307 461{
97aff369 462 dVAR;
8990e307
LW
463 char tmpbuf[3];
464 char q;
465 if (s) {
9d4ba2ae 466 char * const nl = strrchr(s,'\n');
d2719217 467 if (nl)
8990e307
LW
468 *nl = '\0';
469 }
9d116dd7
JH
470 else if (
471#ifdef EBCDIC
472 iscntrl(PL_multi_close)
473#else
474 PL_multi_close < 32 || PL_multi_close == 127
475#endif
476 ) {
8990e307 477 *tmpbuf = '^';
585ec06d 478 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
479 tmpbuf[2] = '\0';
480 s = tmpbuf;
481 }
482 else {
eb160463 483 *tmpbuf = (char)PL_multi_close;
8990e307
LW
484 tmpbuf[1] = '\0';
485 s = tmpbuf;
486 }
487 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 488 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 489}
79072805 490
ef89dcc3 491#define FEATURE_IS_ENABLED(name) \
0d863452 492 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 493 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
494/*
495 * S_feature_is_enabled
496 * Check whether the named feature is enabled.
497 */
498STATIC bool
d4c19fe8 499S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 500{
97aff369 501 dVAR;
0d863452 502 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 503 char he_name[32] = "feature_";
6fca0082 504 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 505
7b9ef140 506 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
507}
508
ffb4593c
NT
509/*
510 * Perl_deprecate
ffb4593c
NT
511 */
512
79072805 513void
bfed75c6 514Perl_deprecate(pTHX_ const char *s)
a0d0e21e 515{
599cee73 516 if (ckWARN(WARN_DEPRECATED))
9014280d 517 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
518}
519
12bcd1a6 520void
bfed75c6 521Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
522{
523 /* This function should NOT be called for any new deprecated warnings */
524 /* Use Perl_deprecate instead */
525 /* */
526 /* It is here to maintain backward compatibility with the pre-5.8 */
527 /* warnings category hierarchy. The "deprecated" category used to */
528 /* live under the "syntax" category. It is now a top-level category */
529 /* in its own right. */
530
531 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 532 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
533 "Use of %s is deprecated", s);
534}
535
ffb4593c 536/*
9cbb5ea2
GS
537 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
538 * utf16-to-utf8-reversed.
ffb4593c
NT
539 */
540
c39cd008
GS
541#ifdef PERL_CR_FILTER
542static void
543strip_return(SV *sv)
544{
95a20fc0 545 register const char *s = SvPVX_const(sv);
9d4ba2ae 546 register const char * const e = s + SvCUR(sv);
c39cd008
GS
547 /* outer loop optimized to do nothing if there are no CR-LFs */
548 while (s < e) {
549 if (*s++ == '\r' && *s == '\n') {
550 /* hit a CR-LF, need to copy the rest */
551 register char *d = s - 1;
552 *d++ = *s++;
553 while (s < e) {
554 if (*s == '\r' && s[1] == '\n')
555 s++;
556 *d++ = *s++;
557 }
558 SvCUR(sv) -= s - d;
559 return;
560 }
561 }
562}
a868473f 563
76e3520e 564STATIC I32
c39cd008 565S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 566{
f54cb97a 567 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
568 if (count > 0 && !maxlen)
569 strip_return(sv);
570 return count;
a868473f
NIS
571}
572#endif
573
ffb4593c
NT
574/*
575 * Perl_lex_start
9cbb5ea2
GS
576 * Initialize variables. Uses the Perl save_stack to save its state (for
577 * recursive calls to the parser).
ffb4593c
NT
578 */
579
a0d0e21e 580void
864dbfa3 581Perl_lex_start(pTHX_ SV *line)
79072805 582{
97aff369 583 dVAR;
6ef55633 584 const char *s = NULL;
8990e307 585 STRLEN len;
acdf0a21
DM
586 yy_parser *parser;
587
588 /* create and initialise a parser */
589
590 Newx(parser, 1, yy_parser);
591 parser->old_parser = PL_parser;
592 PL_parser = parser;
593
594 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
595 parser->ps = parser->stack;
596 parser->stack_size = YYINITDEPTH;
597
598 parser->stack->state = 0;
599 parser->yyerrstatus = 0;
600 parser->yychar = YYEMPTY; /* Cause a token to be read. */
601
602 /* initialise lexer state */
8990e307 603
3280af22
NIS
604 SAVEI32(PL_lex_dojoin);
605 SAVEI32(PL_lex_brackets);
3280af22
NIS
606 SAVEI32(PL_lex_casemods);
607 SAVEI32(PL_lex_starts);
608 SAVEI32(PL_lex_state);
7766f137 609 SAVEVPTR(PL_lex_inpat);
3280af22 610 SAVEI32(PL_lex_inwhat);
5db06880
NC
611#ifdef PERL_MAD
612 if (PL_lex_state == LEX_KNOWNEXT) {
613 I32 toke = PL_lasttoke;
614 while (--toke >= 0) {
615 SAVEI32(PL_nexttoke[toke].next_type);
616 SAVEVPTR(PL_nexttoke[toke].next_val);
617 if (PL_madskills)
618 SAVEVPTR(PL_nexttoke[toke].next_mad);
619 }
620 SAVEI32(PL_lasttoke);
621 }
02b34bbe
DM
622 SAVESPTR(PL_endwhite);
623 SAVESPTR(PL_thistoken);
624 SAVESPTR(PL_thiswhite);
625 SAVESPTR(PL_nextwhite);
626 SAVESPTR(PL_thisopen);
627 SAVESPTR(PL_thisclose);
628 SAVESPTR(PL_thisstuff);
629 SAVEVPTR(PL_thismad);
630 SAVEI32(PL_realtokenstart);
631 SAVEI32(PL_faketokens);
632 SAVESPTR(PL_skipwhite);
cd81e915 633 SAVEI32(PL_curforce);
5db06880 634#else
18b09519
GS
635 if (PL_lex_state == LEX_KNOWNEXT) {
636 I32 toke = PL_nexttoke;
637 while (--toke >= 0) {
638 SAVEI32(PL_nexttype[toke]);
639 SAVEVPTR(PL_nextval[toke]);
640 }
641 SAVEI32(PL_nexttoke);
18b09519 642 }
5db06880 643#endif
57843af0 644 SAVECOPLINE(PL_curcop);
3280af22
NIS
645 SAVEPPTR(PL_bufptr);
646 SAVEPPTR(PL_bufend);
647 SAVEPPTR(PL_oldbufptr);
648 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
649 SAVEPPTR(PL_last_lop);
650 SAVEPPTR(PL_last_uni);
3280af22
NIS
651 SAVEPPTR(PL_linestart);
652 SAVESPTR(PL_linestr);
8edd5f42
RGS
653 SAVEGENERICPV(PL_lex_brackstack);
654 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 655 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
656 SAVESPTR(PL_lex_stuff);
657 SAVEI32(PL_lex_defer);
09bef843 658 SAVEI32(PL_sublex_info.sub_inwhat);
02b34bbe
DM
659 SAVEI32(PL_sublex_info.super_state);
660 SAVEVPTR(PL_sublex_info.sub_op);
661 SAVEPPTR(PL_sublex_info.super_bufptr);
662 SAVEPPTR(PL_sublex_info.super_bufend);
3280af22 663 SAVESPTR(PL_lex_repl);
bebdddfc
GS
664 SAVEINT(PL_expect);
665 SAVEINT(PL_lex_expect);
02b34bbe
DM
666 SAVEI32(PL_lex_formbrack);
667 SAVEVPTR(PL_lex_op);
668 SAVEI32(PL_multi_close);
669 SAVEI32(PL_multi_open);
670 SAVEI32(PL_multi_start);
671 SAVEI8(PL_pending_ident);
672 SAVEBOOL(PL_preambled);
3280af22
NIS
673
674 PL_lex_state = LEX_NORMAL;
675 PL_lex_defer = 0;
676 PL_expect = XSTATE;
677 PL_lex_brackets = 0;
a02a5408
JC
678 Newx(PL_lex_brackstack, 120, char);
679 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
680 PL_lex_casemods = 0;
681 *PL_lex_casestack = '\0';
682 PL_lex_dojoin = 0;
683 PL_lex_starts = 0;
a0714e2c
SS
684 PL_lex_stuff = NULL;
685 PL_lex_repl = NULL;
3280af22 686 PL_lex_inpat = 0;
5db06880
NC
687#ifdef PERL_MAD
688 PL_lasttoke = 0;
02b34bbe
DM
689 PL_endwhite = NULL;
690 PL_faketokens = 0;
691 PL_nextwhite = NULL;
692 PL_realtokenstart = 0;
693 PL_skipwhite = NULL;
694 PL_thisclose = NULL;
695 PL_thisopen = NULL;
696 PL_thisstuff = NULL;
697 PL_thistoken = NULL;
698 PL_thiswhite = NULL;
699 PL_thismad = NULL;
5db06880 700#else
76be56bc 701 PL_nexttoke = 0;
5db06880 702#endif
3280af22 703 PL_lex_inwhat = 0;
09bef843 704 PL_sublex_info.sub_inwhat = 0;
02b34bbe
DM
705 PL_sublex_info.super_state = 0;
706 PL_sublex_info.sub_op = NULL;
707 PL_sublex_info.super_bufptr = NULL;
708 PL_sublex_info.super_bufend = NULL;
709 PL_lex_expect = 0;
710 PL_lex_formbrack = 0;
711 PL_lex_op = NULL;
712 PL_multi_close = 0;
713 PL_multi_open = 0;
714 PL_multi_start = 0;
715 PL_pending_ident = '\0';
716 PL_preambled = FALSE;
717
10efb74f
NC
718 if (line) {
719 s = SvPV_const(line, len);
720 } else {
721 len = 0;
722 }
723 if (!len) {
724 PL_linestr = newSVpvs("\n;");
725 } else if (SvREADONLY(line) || s[len-1] != ';') {
726 PL_linestr = newSVsv(line);
727 if (s[len-1] != ';')
0eb20fa2 728 sv_catpvs(PL_linestr, "\n;");
6c5ce11d
NC
729 } else {
730 SvTEMP_off(line);
731 SvREFCNT_inc_simple_void_NN(line);
732 PL_linestr = line;
8990e307 733 }
db4997f0
NC
734 /* PL_linestr needs to survive until end of scope, not just the next
735 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
db4997f0 736 SAVEFREESV(PL_linestr);
3280af22
NIS
737 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
738 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 739 PL_last_lop = PL_last_uni = NULL;
3280af22 740 PL_rsfp = 0;
79072805 741}
a687059c 742
ffb4593c
NT
743/*
744 * Perl_lex_end
9cbb5ea2
GS
745 * Finalizer for lexing operations. Must be called when the parser is
746 * done with the lexer.
ffb4593c
NT
747 */
748
463ee0b2 749void
864dbfa3 750Perl_lex_end(pTHX)
463ee0b2 751{
97aff369 752 dVAR;
3280af22 753 PL_doextract = FALSE;
463ee0b2
LW
754}
755
ffb4593c
NT
756/*
757 * S_incline
758 * This subroutine has nothing to do with tilting, whether at windmills
759 * or pinball tables. Its name is short for "increment line". It
57843af0 760 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 761 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
762 * # line 500 "foo.pm"
763 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
764 */
765
76e3520e 766STATIC void
cea2e8a9 767S_incline(pTHX_ char *s)
463ee0b2 768{
97aff369 769 dVAR;
463ee0b2
LW
770 char *t;
771 char *n;
73659bf1 772 char *e;
463ee0b2 773 char ch;
463ee0b2 774
57843af0 775 CopLINE_inc(PL_curcop);
463ee0b2
LW
776 if (*s++ != '#')
777 return;
d4c19fe8
AL
778 while (SPACE_OR_TAB(*s))
779 s++;
73659bf1
GS
780 if (strnEQ(s, "line", 4))
781 s += 4;
782 else
783 return;
084592ab 784 if (SPACE_OR_TAB(*s))
73659bf1 785 s++;
4e553d73 786 else
73659bf1 787 return;
d4c19fe8
AL
788 while (SPACE_OR_TAB(*s))
789 s++;
463ee0b2
LW
790 if (!isDIGIT(*s))
791 return;
d4c19fe8 792
463ee0b2
LW
793 n = s;
794 while (isDIGIT(*s))
795 s++;
bf4acbe4 796 while (SPACE_OR_TAB(*s))
463ee0b2 797 s++;
73659bf1 798 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 799 s++;
73659bf1
GS
800 e = t + 1;
801 }
463ee0b2 802 else {
c35e046a
AL
803 t = s;
804 while (!isSPACE(*t))
805 t++;
73659bf1 806 e = t;
463ee0b2 807 }
bf4acbe4 808 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
809 e++;
810 if (*e != '\n' && *e != '\0')
811 return; /* false alarm */
812
463ee0b2
LW
813 ch = *t;
814 *t = '\0';
f4dd75d9 815 if (t - s > 0) {
8a5ee598 816#ifndef USE_ITHREADS
c4420975 817 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
818 STRLEN tmplen = cf ? strlen(cf) : 0;
819 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
820 /* must copy *{"::_<(eval N)[oldfilename:L]"}
821 * to *{"::_<newfilename"} */
822 char smallbuf[256], smallbuf2[256];
823 char *tmpbuf, *tmpbuf2;
8a5ee598 824 GV **gvp, *gv2;
e66cf94c
RGS
825 STRLEN tmplen2 = strlen(s);
826 if (tmplen + 3 < sizeof smallbuf)
827 tmpbuf = smallbuf;
828 else
829 Newx(tmpbuf, tmplen + 3, char);
830 if (tmplen2 + 3 < sizeof smallbuf2)
831 tmpbuf2 = smallbuf2;
832 else
833 Newx(tmpbuf2, tmplen2 + 3, char);
834 tmpbuf[0] = tmpbuf2[0] = '_';
835 tmpbuf[1] = tmpbuf2[1] = '<';
836 memcpy(tmpbuf + 2, cf, ++tmplen);
837 memcpy(tmpbuf2 + 2, s, ++tmplen2);
838 ++tmplen; ++tmplen2;
8a5ee598
RGS
839 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
840 if (gvp) {
841 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 842 if (!isGV(gv2)) {
8a5ee598 843 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
844 /* adjust ${"::_<newfilename"} to store the new file name */
845 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
846 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
847 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
848 }
8a5ee598 849 }
e66cf94c
RGS
850 if (tmpbuf != smallbuf) Safefree(tmpbuf);
851 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
852 }
8a5ee598 853#endif
05ec9bb3 854 CopFILE_free(PL_curcop);
57843af0 855 CopFILE_set(PL_curcop, s);
f4dd75d9 856 }
463ee0b2 857 *t = ch;
57843af0 858 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
859}
860
29595ff2 861#ifdef PERL_MAD
cd81e915 862/* skip space before PL_thistoken */
29595ff2
NC
863
864STATIC char *
865S_skipspace0(pTHX_ register char *s)
866{
867 s = skipspace(s);
868 if (!PL_madskills)
869 return s;
cd81e915
NC
870 if (PL_skipwhite) {
871 if (!PL_thiswhite)
6b29d1f5 872 PL_thiswhite = newSVpvs("");
cd81e915
NC
873 sv_catsv(PL_thiswhite, PL_skipwhite);
874 sv_free(PL_skipwhite);
875 PL_skipwhite = 0;
876 }
877 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
878 return s;
879}
880
cd81e915 881/* skip space after PL_thistoken */
29595ff2
NC
882
883STATIC char *
884S_skipspace1(pTHX_ register char *s)
885{
d4c19fe8 886 const char *start = s;
29595ff2
NC
887 I32 startoff = start - SvPVX(PL_linestr);
888
889 s = skipspace(s);
890 if (!PL_madskills)
891 return s;
892 start = SvPVX(PL_linestr) + startoff;
cd81e915 893 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 894 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
895 PL_thistoken = newSVpvn(tstart, start - tstart);
896 }
897 PL_realtokenstart = -1;
898 if (PL_skipwhite) {
899 if (!PL_nextwhite)
6b29d1f5 900 PL_nextwhite = newSVpvs("");
cd81e915
NC
901 sv_catsv(PL_nextwhite, PL_skipwhite);
902 sv_free(PL_skipwhite);
903 PL_skipwhite = 0;
29595ff2
NC
904 }
905 return s;
906}
907
908STATIC char *
909S_skipspace2(pTHX_ register char *s, SV **svp)
910{
c35e046a
AL
911 char *start;
912 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
913 const I32 startoff = s - SvPVX(PL_linestr);
914
29595ff2
NC
915 s = skipspace(s);
916 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
917 if (!PL_madskills || !svp)
918 return s;
919 start = SvPVX(PL_linestr) + startoff;
cd81e915 920 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 921 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
922 PL_thistoken = newSVpvn(tstart, start - tstart);
923 PL_realtokenstart = -1;
29595ff2 924 }
cd81e915 925 if (PL_skipwhite) {
29595ff2 926 if (!*svp)
6b29d1f5 927 *svp = newSVpvs("");
cd81e915
NC
928 sv_setsv(*svp, PL_skipwhite);
929 sv_free(PL_skipwhite);
930 PL_skipwhite = 0;
29595ff2
NC
931 }
932
933 return s;
934}
935#endif
936
80a702cd
RGS
937STATIC void
938S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
939{
940 AV *av = CopFILEAVx(PL_curcop);
941 if (av) {
942 SV * const sv = newSV(0);
943 sv_upgrade(sv, SVt_PVMG);
944 sv_setpvn(sv, buf, len);
945 (void)SvIOK_on(sv);
946 SvIV_set(sv, 0);
947 av_store(av, (I32)CopLINE(PL_curcop), sv);
948 }
949}
950
951STATIC void
952S_update_debugger_info_sv(pTHX_ SV *orig_sv)
953{
954 AV *av = CopFILEAVx(PL_curcop);
955 if (av) {
956 SV * const sv = newSV(0);
957 sv_upgrade(sv, SVt_PVMG);
958 sv_setsv(sv, orig_sv);
959 (void)SvIOK_on(sv);
960 SvIV_set(sv, 0);
961 av_store(av, (I32)CopLINE(PL_curcop), sv);
962 }
963}
964
ffb4593c
NT
965/*
966 * S_skipspace
967 * Called to gobble the appropriate amount and type of whitespace.
968 * Skips comments as well.
969 */
970
76e3520e 971STATIC char *
cea2e8a9 972S_skipspace(pTHX_ register char *s)
a687059c 973{
97aff369 974 dVAR;
5db06880
NC
975#ifdef PERL_MAD
976 int curoff;
977 int startoff = s - SvPVX(PL_linestr);
978
cd81e915
NC
979 if (PL_skipwhite) {
980 sv_free(PL_skipwhite);
981 PL_skipwhite = 0;
5db06880
NC
982 }
983#endif
984
3280af22 985 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 986 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 987 s++;
5db06880
NC
988#ifdef PERL_MAD
989 goto done;
990#else
463ee0b2 991 return s;
5db06880 992#endif
463ee0b2
LW
993 }
994 for (;;) {
fd049845 995 STRLEN prevlen;
09bef843 996 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 997 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
998 while (s < PL_bufend && isSPACE(*s)) {
999 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1000 incline(s);
1001 }
ffb4593c
NT
1002
1003 /* comment */
3280af22
NIS
1004 if (s < PL_bufend && *s == '#') {
1005 while (s < PL_bufend && *s != '\n')
463ee0b2 1006 s++;
60e6418e 1007 if (s < PL_bufend) {
463ee0b2 1008 s++;
60e6418e
GS
1009 if (PL_in_eval && !PL_rsfp) {
1010 incline(s);
1011 continue;
1012 }
1013 }
463ee0b2 1014 }
ffb4593c
NT
1015
1016 /* only continue to recharge the buffer if we're at the end
1017 * of the buffer, we're not reading from a source filter, and
1018 * we're in normal lexing mode
1019 */
09bef843
SB
1020 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1021 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1022#ifdef PERL_MAD
1023 goto done;
1024#else
463ee0b2 1025 return s;
5db06880 1026#endif
ffb4593c
NT
1027
1028 /* try to recharge the buffer */
5db06880
NC
1029#ifdef PERL_MAD
1030 curoff = s - SvPVX(PL_linestr);
1031#endif
1032
9cbb5ea2 1033 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1034 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1035 {
5db06880
NC
1036#ifdef PERL_MAD
1037 if (PL_madskills && curoff != startoff) {
cd81e915 1038 if (!PL_skipwhite)
6b29d1f5 1039 PL_skipwhite = newSVpvs("");
cd81e915 1040 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1041 curoff - startoff);
1042 }
1043
1044 /* mustn't throw out old stuff yet if madpropping */
1045 SvCUR(PL_linestr) = curoff;
1046 s = SvPVX(PL_linestr) + curoff;
1047 *s = 0;
1048 if (curoff && s[-1] == '\n')
1049 s[-1] = ' ';
1050#endif
1051
9cbb5ea2 1052 /* end of file. Add on the -p or -n magic */
cd81e915 1053 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1054 if (PL_minus_p) {
5db06880
NC
1055#ifdef PERL_MAD
1056 sv_catpv(PL_linestr,
1057 ";}continue{print or die qq(-p destination: $!\\n);}");
1058#else
01a19ab0
NC
1059 sv_setpv(PL_linestr,
1060 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1061#endif
3280af22 1062 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1063 }
01a19ab0 1064 else if (PL_minus_n) {
5db06880
NC
1065#ifdef PERL_MAD
1066 sv_catpvn(PL_linestr, ";}", 2);
1067#else
01a19ab0 1068 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1069#endif
01a19ab0
NC
1070 PL_minus_n = 0;
1071 }
a0d0e21e 1072 else
5db06880
NC
1073#ifdef PERL_MAD
1074 sv_catpvn(PL_linestr,";", 1);
1075#else
4147a61b 1076 sv_setpvn(PL_linestr,";", 1);
5db06880 1077#endif
ffb4593c
NT
1078
1079 /* reset variables for next time we lex */
9cbb5ea2 1080 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1081 = SvPVX(PL_linestr)
1082#ifdef PERL_MAD
1083 + curoff
1084#endif
1085 ;
3280af22 1086 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1087 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1088
1089 /* Close the filehandle. Could be from -P preprocessor,
1090 * STDIN, or a regular file. If we were reading code from
1091 * STDIN (because the commandline held no -e or filename)
1092 * then we don't close it, we reset it so the code can
1093 * read from STDIN too.
1094 */
1095
3280af22
NIS
1096 if (PL_preprocess && !PL_in_eval)
1097 (void)PerlProc_pclose(PL_rsfp);
1098 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1099 PerlIO_clearerr(PL_rsfp);
8990e307 1100 else
3280af22 1101 (void)PerlIO_close(PL_rsfp);
4608196e 1102 PL_rsfp = NULL;
463ee0b2
LW
1103 return s;
1104 }
ffb4593c
NT
1105
1106 /* not at end of file, so we only read another line */
09bef843
SB
1107 /* make corresponding updates to old pointers, for yyerror() */
1108 oldprevlen = PL_oldbufptr - PL_bufend;
1109 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1110 if (PL_last_uni)
1111 oldunilen = PL_last_uni - PL_bufend;
1112 if (PL_last_lop)
1113 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1114 PL_linestart = PL_bufptr = s + prevlen;
1115 PL_bufend = s + SvCUR(PL_linestr);
1116 s = PL_bufptr;
09bef843
SB
1117 PL_oldbufptr = s + oldprevlen;
1118 PL_oldoldbufptr = s + oldoldprevlen;
1119 if (PL_last_uni)
1120 PL_last_uni = s + oldunilen;
1121 if (PL_last_lop)
1122 PL_last_lop = s + oldloplen;
a0d0e21e 1123 incline(s);
ffb4593c
NT
1124
1125 /* debugger active and we're not compiling the debugger code,
1126 * so store the line into the debugger's array of lines
1127 */
80a702cd
RGS
1128 if (PERLDB_LINE && PL_curstash != PL_debstash)
1129 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1130 }
5db06880
NC
1131
1132#ifdef PERL_MAD
1133 done:
1134 if (PL_madskills) {
cd81e915 1135 if (!PL_skipwhite)
6b29d1f5 1136 PL_skipwhite = newSVpvs("");
5db06880
NC
1137 curoff = s - SvPVX(PL_linestr);
1138 if (curoff - startoff)
cd81e915 1139 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1140 curoff - startoff);
1141 }
1142 return s;
1143#endif
a687059c 1144}
378cc40b 1145
ffb4593c
NT
1146/*
1147 * S_check_uni
1148 * Check the unary operators to ensure there's no ambiguity in how they're
1149 * used. An ambiguous piece of code would be:
1150 * rand + 5
1151 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1152 * the +5 is its argument.
1153 */
1154
76e3520e 1155STATIC void
cea2e8a9 1156S_check_uni(pTHX)
ba106d47 1157{
97aff369 1158 dVAR;
d4c19fe8
AL
1159 const char *s;
1160 const char *t;
2f3197b3 1161
3280af22 1162 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1163 return;
3280af22
NIS
1164 while (isSPACE(*PL_last_uni))
1165 PL_last_uni++;
c35e046a
AL
1166 s = PL_last_uni;
1167 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1168 s++;
3280af22 1169 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1170 return;
6136c704 1171
0453d815 1172 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1173 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1174 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1175 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1176 }
2f3197b3
LW
1177}
1178
ffb4593c
NT
1179/*
1180 * LOP : macro to build a list operator. Its behaviour has been replaced
1181 * with a subroutine, S_lop() for which LOP is just another name.
1182 */
1183
a0d0e21e
LW
1184#define LOP(f,x) return lop(f,x,s)
1185
ffb4593c
NT
1186/*
1187 * S_lop
1188 * Build a list operator (or something that might be one). The rules:
1189 * - if we have a next token, then it's a list operator [why?]
1190 * - if the next thing is an opening paren, then it's a function
1191 * - else it's a list operator
1192 */
1193
76e3520e 1194STATIC I32
a0be28da 1195S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1196{
97aff369 1197 dVAR;
79072805 1198 yylval.ival = f;
35c8bce7 1199 CLINE;
3280af22
NIS
1200 PL_expect = x;
1201 PL_bufptr = s;
1202 PL_last_lop = PL_oldbufptr;
eb160463 1203 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1204#ifdef PERL_MAD
1205 if (PL_lasttoke)
1206 return REPORT(LSTOP);
1207#else
3280af22 1208 if (PL_nexttoke)
bbf60fe6 1209 return REPORT(LSTOP);
5db06880 1210#endif
79072805 1211 if (*s == '(')
bbf60fe6 1212 return REPORT(FUNC);
29595ff2 1213 s = PEEKSPACE(s);
79072805 1214 if (*s == '(')
bbf60fe6 1215 return REPORT(FUNC);
79072805 1216 else
bbf60fe6 1217 return REPORT(LSTOP);
79072805
LW
1218}
1219
5db06880
NC
1220#ifdef PERL_MAD
1221 /*
1222 * S_start_force
1223 * Sets up for an eventual force_next(). start_force(0) basically does
1224 * an unshift, while start_force(-1) does a push. yylex removes items
1225 * on the "pop" end.
1226 */
1227
1228STATIC void
1229S_start_force(pTHX_ int where)
1230{
1231 int i;
1232
cd81e915 1233 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1234 where = PL_lasttoke;
cd81e915
NC
1235 assert(PL_curforce < 0 || PL_curforce == where);
1236 if (PL_curforce != where) {
5db06880
NC
1237 for (i = PL_lasttoke; i > where; --i) {
1238 PL_nexttoke[i] = PL_nexttoke[i-1];
1239 }
1240 PL_lasttoke++;
1241 }
cd81e915 1242 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1243 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1244 PL_curforce = where;
1245 if (PL_nextwhite) {
5db06880 1246 if (PL_madskills)
6b29d1f5 1247 curmad('^', newSVpvs(""));
cd81e915 1248 CURMAD('_', PL_nextwhite);
5db06880
NC
1249 }
1250}
1251
1252STATIC void
1253S_curmad(pTHX_ char slot, SV *sv)
1254{
1255 MADPROP **where;
1256
1257 if (!sv)
1258 return;
cd81e915
NC
1259 if (PL_curforce < 0)
1260 where = &PL_thismad;
5db06880 1261 else
cd81e915 1262 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1263
cd81e915 1264 if (PL_faketokens)
5db06880
NC
1265 sv_setpvn(sv, "", 0);
1266 else {
1267 if (!IN_BYTES) {
1268 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1269 SvUTF8_on(sv);
1270 else if (PL_encoding) {
1271 sv_recode_to_utf8(sv, PL_encoding);
1272 }
1273 }
1274 }
1275
1276 /* keep a slot open for the head of the list? */
1277 if (slot != '_' && *where && (*where)->mad_key == '^') {
1278 (*where)->mad_key = slot;
1279 sv_free((*where)->mad_val);
1280 (*where)->mad_val = (void*)sv;
1281 }
1282 else
1283 addmad(newMADsv(slot, sv), where, 0);
1284}
1285#else
b3f24c00
MHM
1286# define start_force(where) NOOP
1287# define curmad(slot, sv) NOOP
5db06880
NC
1288#endif
1289
ffb4593c
NT
1290/*
1291 * S_force_next
9cbb5ea2 1292 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1293 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1294 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1295 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1296 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1297 */
1298
4e553d73 1299STATIC void
cea2e8a9 1300S_force_next(pTHX_ I32 type)
79072805 1301{
97aff369 1302 dVAR;
5db06880 1303#ifdef PERL_MAD
cd81e915 1304 if (PL_curforce < 0)
5db06880 1305 start_force(PL_lasttoke);
cd81e915 1306 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1307 if (PL_lex_state != LEX_KNOWNEXT)
1308 PL_lex_defer = PL_lex_state;
1309 PL_lex_state = LEX_KNOWNEXT;
1310 PL_lex_expect = PL_expect;
cd81e915 1311 PL_curforce = -1;
5db06880 1312#else
3280af22
NIS
1313 PL_nexttype[PL_nexttoke] = type;
1314 PL_nexttoke++;
1315 if (PL_lex_state != LEX_KNOWNEXT) {
1316 PL_lex_defer = PL_lex_state;
1317 PL_lex_expect = PL_expect;
1318 PL_lex_state = LEX_KNOWNEXT;
79072805 1319 }
5db06880 1320#endif
79072805
LW
1321}
1322
d0a148a6
NC
1323STATIC SV *
1324S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1325{
97aff369 1326 dVAR;
9d4ba2ae 1327 SV * const sv = newSVpvn(start,len);
bfed75c6 1328 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1329 SvUTF8_on(sv);
1330 return sv;
1331}
1332
ffb4593c
NT
1333/*
1334 * S_force_word
1335 * When the lexer knows the next thing is a word (for instance, it has
1336 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1337 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1338 * lookahead.
ffb4593c
NT
1339 *
1340 * Arguments:
b1b65b59 1341 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1342 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1343 * int check_keyword : if true, Perl checks to make sure the word isn't
1344 * a keyword (do this if the word is a label, e.g. goto FOO)
1345 * int allow_pack : if true, : characters will also be allowed (require,
1346 * use, etc. do this)
9cbb5ea2 1347 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1348 */
1349
76e3520e 1350STATIC char *
cea2e8a9 1351S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1352{
97aff369 1353 dVAR;
463ee0b2
LW
1354 register char *s;
1355 STRLEN len;
4e553d73 1356
29595ff2 1357 start = SKIPSPACE1(start);
463ee0b2 1358 s = start;
7e2040f0 1359 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1360 (allow_pack && *s == ':') ||
15f0808c 1361 (allow_initial_tick && *s == '\'') )
a0d0e21e 1362 {
3280af22 1363 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1364 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1365 return start;
cd81e915 1366 start_force(PL_curforce);
5db06880
NC
1367 if (PL_madskills)
1368 curmad('X', newSVpvn(start,s-start));
463ee0b2 1369 if (token == METHOD) {
29595ff2 1370 s = SKIPSPACE1(s);
463ee0b2 1371 if (*s == '(')
3280af22 1372 PL_expect = XTERM;
463ee0b2 1373 else {
3280af22 1374 PL_expect = XOPERATOR;
463ee0b2 1375 }
79072805 1376 }
9ded7720 1377 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1378 = (OP*)newSVOP(OP_CONST,0,
1379 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1380 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1381 force_next(token);
1382 }
1383 return s;
1384}
1385
ffb4593c
NT
1386/*
1387 * S_force_ident
9cbb5ea2 1388 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1389 * text only contains the "foo" portion. The first argument is a pointer
1390 * to the "foo", and the second argument is the type symbol to prefix.
1391 * Forces the next token to be a "WORD".
9cbb5ea2 1392 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1393 */
1394
76e3520e 1395STATIC void
bfed75c6 1396S_force_ident(pTHX_ register const char *s, int kind)
79072805 1397{
97aff369 1398 dVAR;
c35e046a 1399 if (*s) {
90e5519e
NC
1400 const STRLEN len = strlen(s);
1401 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1402 start_force(PL_curforce);
9ded7720 1403 NEXTVAL_NEXTTOKE.opval = o;
79072805 1404 force_next(WORD);
748a9306 1405 if (kind) {
11343788 1406 o->op_private = OPpCONST_ENTERED;
55497cff 1407 /* XXX see note in pp_entereval() for why we forgo typo
1408 warnings if the symbol must be introduced in an eval.
1409 GSAR 96-10-12 */
90e5519e
NC
1410 gv_fetchpvn_flags(s, len,
1411 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1412 : GV_ADD,
1413 kind == '$' ? SVt_PV :
1414 kind == '@' ? SVt_PVAV :
1415 kind == '%' ? SVt_PVHV :
a0d0e21e 1416 SVt_PVGV
90e5519e 1417 );
748a9306 1418 }
79072805
LW
1419 }
1420}
1421
1571675a
GS
1422NV
1423Perl_str_to_version(pTHX_ SV *sv)
1424{
1425 NV retval = 0.0;
1426 NV nshift = 1.0;
1427 STRLEN len;
cfd0369c 1428 const char *start = SvPV_const(sv,len);
9d4ba2ae 1429 const char * const end = start + len;
504618e9 1430 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1431 while (start < end) {
ba210ebe 1432 STRLEN skip;
1571675a
GS
1433 UV n;
1434 if (utf)
9041c2e3 1435 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1436 else {
1437 n = *(U8*)start;
1438 skip = 1;
1439 }
1440 retval += ((NV)n)/nshift;
1441 start += skip;
1442 nshift *= 1000;
1443 }
1444 return retval;
1445}
1446
4e553d73 1447/*
ffb4593c
NT
1448 * S_force_version
1449 * Forces the next token to be a version number.
e759cc13
RGS
1450 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1451 * and if "guessing" is TRUE, then no new token is created (and the caller
1452 * must use an alternative parsing method).
ffb4593c
NT
1453 */
1454
76e3520e 1455STATIC char *
e759cc13 1456S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1457{
97aff369 1458 dVAR;
5f66b61c 1459 OP *version = NULL;
44dcb63b 1460 char *d;
5db06880
NC
1461#ifdef PERL_MAD
1462 I32 startoff = s - SvPVX(PL_linestr);
1463#endif
89bfa8cd 1464
29595ff2 1465 s = SKIPSPACE1(s);
89bfa8cd 1466
44dcb63b 1467 d = s;
dd629d5b 1468 if (*d == 'v')
44dcb63b 1469 d++;
44dcb63b 1470 if (isDIGIT(*d)) {
e759cc13
RGS
1471 while (isDIGIT(*d) || *d == '_' || *d == '.')
1472 d++;
5db06880
NC
1473#ifdef PERL_MAD
1474 if (PL_madskills) {
cd81e915 1475 start_force(PL_curforce);
5db06880
NC
1476 curmad('X', newSVpvn(s,d-s));
1477 }
1478#endif
9f3d182e 1479 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1480 SV *ver;
b73d6f50 1481 s = scan_num(s, &yylval);
89bfa8cd 1482 version = yylval.opval;
dd629d5b
GS
1483 ver = cSVOPx(version)->op_sv;
1484 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1485 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1486 SvNV_set(ver, str_to_version(ver));
1571675a 1487 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1488 }
89bfa8cd 1489 }
5db06880
NC
1490 else if (guessing) {
1491#ifdef PERL_MAD
1492 if (PL_madskills) {
cd81e915
NC
1493 sv_free(PL_nextwhite); /* let next token collect whitespace */
1494 PL_nextwhite = 0;
5db06880
NC
1495 s = SvPVX(PL_linestr) + startoff;
1496 }
1497#endif
e759cc13 1498 return s;
5db06880 1499 }
89bfa8cd 1500 }
1501
5db06880
NC
1502#ifdef PERL_MAD
1503 if (PL_madskills && !version) {
cd81e915
NC
1504 sv_free(PL_nextwhite); /* let next token collect whitespace */
1505 PL_nextwhite = 0;
5db06880
NC
1506 s = SvPVX(PL_linestr) + startoff;
1507 }
1508#endif
89bfa8cd 1509 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1510 start_force(PL_curforce);
9ded7720 1511 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1512 force_next(WORD);
89bfa8cd 1513
e759cc13 1514 return s;
89bfa8cd 1515}
1516
ffb4593c
NT
1517/*
1518 * S_tokeq
1519 * Tokenize a quoted string passed in as an SV. It finds the next
1520 * chunk, up to end of string or a backslash. It may make a new
1521 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1522 * turns \\ into \.
1523 */
1524
76e3520e 1525STATIC SV *
cea2e8a9 1526S_tokeq(pTHX_ SV *sv)
79072805 1527{
97aff369 1528 dVAR;
79072805
LW
1529 register char *s;
1530 register char *send;
1531 register char *d;
b3ac6de7
IZ
1532 STRLEN len = 0;
1533 SV *pv = sv;
79072805
LW
1534
1535 if (!SvLEN(sv))
b3ac6de7 1536 goto finish;
79072805 1537
a0d0e21e 1538 s = SvPV_force(sv, len);
21a311ee 1539 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1540 goto finish;
463ee0b2 1541 send = s + len;
79072805
LW
1542 while (s < send && *s != '\\')
1543 s++;
1544 if (s == send)
b3ac6de7 1545 goto finish;
79072805 1546 d = s;
be4731d2 1547 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1548 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1549 if (SvUTF8(sv))
1550 SvUTF8_on(pv);
1551 }
79072805
LW
1552 while (s < send) {
1553 if (*s == '\\') {
a0d0e21e 1554 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1555 s++; /* all that, just for this */
1556 }
1557 *d++ = *s++;
1558 }
1559 *d = '\0';
95a20fc0 1560 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1561 finish:
3280af22 1562 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1563 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1564 return sv;
1565}
1566
ffb4593c
NT
1567/*
1568 * Now come three functions related to double-quote context,
1569 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1570 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1571 * interact with PL_lex_state, and create fake ( ... ) argument lists
1572 * to handle functions and concatenation.
1573 * They assume that whoever calls them will be setting up a fake
1574 * join call, because each subthing puts a ',' after it. This lets
1575 * "lower \luPpEr"
1576 * become
1577 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1578 *
1579 * (I'm not sure whether the spurious commas at the end of lcfirst's
1580 * arguments and join's arguments are created or not).
1581 */
1582
1583/*
1584 * S_sublex_start
1585 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1586 *
1587 * Pattern matching will set PL_lex_op to the pattern-matching op to
1588 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1589 *
1590 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1591 *
1592 * Everything else becomes a FUNC.
1593 *
1594 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1595 * had an OP_CONST or OP_READLINE). This just sets us up for a
1596 * call to S_sublex_push().
1597 */
1598
76e3520e 1599STATIC I32
cea2e8a9 1600S_sublex_start(pTHX)
79072805 1601{
97aff369 1602 dVAR;
0d46e09a 1603 register const I32 op_type = yylval.ival;
79072805
LW
1604
1605 if (op_type == OP_NULL) {
3280af22 1606 yylval.opval = PL_lex_op;
5f66b61c 1607 PL_lex_op = NULL;
79072805
LW
1608 return THING;
1609 }
1610 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1611 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1612
1613 if (SvTYPE(sv) == SVt_PVIV) {
1614 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1615 STRLEN len;
96a5add6 1616 const char * const p = SvPV_const(sv, len);
f54cb97a 1617 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1618 if (SvUTF8(sv))
1619 SvUTF8_on(nsv);
b3ac6de7
IZ
1620 SvREFCNT_dec(sv);
1621 sv = nsv;
4e553d73 1622 }
b3ac6de7 1623 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1624 PL_lex_stuff = NULL;
6f33ba73
RGS
1625 /* Allow <FH> // "foo" */
1626 if (op_type == OP_READLINE)
1627 PL_expect = XTERMORDORDOR;
79072805
LW
1628 return THING;
1629 }
e3f73d4e
RGS
1630 else if (op_type == OP_BACKTICK && PL_lex_op) {
1631 /* readpipe() vas overriden */
1632 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1633 yylval.opval = PL_lex_op;
9b201d7d 1634 PL_lex_op = NULL;
e3f73d4e
RGS
1635 PL_lex_stuff = NULL;
1636 return THING;
1637 }
79072805 1638
3280af22
NIS
1639 PL_sublex_info.super_state = PL_lex_state;
1640 PL_sublex_info.sub_inwhat = op_type;
1641 PL_sublex_info.sub_op = PL_lex_op;
1642 PL_lex_state = LEX_INTERPPUSH;
55497cff 1643
3280af22
NIS
1644 PL_expect = XTERM;
1645 if (PL_lex_op) {
1646 yylval.opval = PL_lex_op;
5f66b61c 1647 PL_lex_op = NULL;
55497cff 1648 return PMFUNC;
1649 }
1650 else
1651 return FUNC;
1652}
1653
ffb4593c
NT
1654/*
1655 * S_sublex_push
1656 * Create a new scope to save the lexing state. The scope will be
1657 * ended in S_sublex_done. Returns a '(', starting the function arguments
1658 * to the uc, lc, etc. found before.
1659 * Sets PL_lex_state to LEX_INTERPCONCAT.
1660 */
1661
76e3520e 1662STATIC I32
cea2e8a9 1663S_sublex_push(pTHX)
55497cff 1664{
27da23d5 1665 dVAR;
f46d017c 1666 ENTER;
55497cff 1667
3280af22
NIS
1668 PL_lex_state = PL_sublex_info.super_state;
1669 SAVEI32(PL_lex_dojoin);
1670 SAVEI32(PL_lex_brackets);
3280af22
NIS
1671 SAVEI32(PL_lex_casemods);
1672 SAVEI32(PL_lex_starts);
1673 SAVEI32(PL_lex_state);
7766f137 1674 SAVEVPTR(PL_lex_inpat);
3280af22 1675 SAVEI32(PL_lex_inwhat);
57843af0 1676 SAVECOPLINE(PL_curcop);
3280af22 1677 SAVEPPTR(PL_bufptr);
8452ff4b 1678 SAVEPPTR(PL_bufend);
3280af22
NIS
1679 SAVEPPTR(PL_oldbufptr);
1680 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1681 SAVEPPTR(PL_last_lop);
1682 SAVEPPTR(PL_last_uni);
3280af22
NIS
1683 SAVEPPTR(PL_linestart);
1684 SAVESPTR(PL_linestr);
8edd5f42
RGS
1685 SAVEGENERICPV(PL_lex_brackstack);
1686 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1687
1688 PL_linestr = PL_lex_stuff;
a0714e2c 1689 PL_lex_stuff = NULL;
3280af22 1690
9cbb5ea2
GS
1691 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1692 = SvPVX(PL_linestr);
3280af22 1693 PL_bufend += SvCUR(PL_linestr);
bd61b366 1694 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1695 SAVEFREESV(PL_linestr);
1696
1697 PL_lex_dojoin = FALSE;
1698 PL_lex_brackets = 0;
a02a5408
JC
1699 Newx(PL_lex_brackstack, 120, char);
1700 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1701 PL_lex_casemods = 0;
1702 *PL_lex_casestack = '\0';
1703 PL_lex_starts = 0;
1704 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1705 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1706
1707 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1708 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1709 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1710 else
5f66b61c 1711 PL_lex_inpat = NULL;
79072805 1712
55497cff 1713 return '(';
79072805
LW
1714}
1715
ffb4593c
NT
1716/*
1717 * S_sublex_done
1718 * Restores lexer state after a S_sublex_push.
1719 */
1720
76e3520e 1721STATIC I32
cea2e8a9 1722S_sublex_done(pTHX)
79072805 1723{
27da23d5 1724 dVAR;
3280af22 1725 if (!PL_lex_starts++) {
396482e1 1726 SV * const sv = newSVpvs("");
9aa983d2
JH
1727 if (SvUTF8(PL_linestr))
1728 SvUTF8_on(sv);
3280af22 1729 PL_expect = XOPERATOR;
9aa983d2 1730 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1731 return THING;
1732 }
1733
3280af22
NIS
1734 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1735 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1736 return yylex();
79072805
LW
1737 }
1738
ffb4593c 1739 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1740 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1741 PL_linestr = PL_lex_repl;
1742 PL_lex_inpat = 0;
1743 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1744 PL_bufend += SvCUR(PL_linestr);
bd61b366 1745 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1746 SAVEFREESV(PL_linestr);
1747 PL_lex_dojoin = FALSE;
1748 PL_lex_brackets = 0;
3280af22
NIS
1749 PL_lex_casemods = 0;
1750 *PL_lex_casestack = '\0';
1751 PL_lex_starts = 0;
25da4f38 1752 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1753 PL_lex_state = LEX_INTERPNORMAL;
1754 PL_lex_starts++;
e9fa98b2
HS
1755 /* we don't clear PL_lex_repl here, so that we can check later
1756 whether this is an evalled subst; that means we rely on the
1757 logic to ensure sublex_done() is called again only via the
1758 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1759 }
e9fa98b2 1760 else {
3280af22 1761 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1762 PL_lex_repl = NULL;
e9fa98b2 1763 }
79072805 1764 return ',';
ffed7fef
LW
1765 }
1766 else {
5db06880
NC
1767#ifdef PERL_MAD
1768 if (PL_madskills) {
cd81e915
NC
1769 if (PL_thiswhite) {
1770 if (!PL_endwhite)
6b29d1f5 1771 PL_endwhite = newSVpvs("");
cd81e915
NC
1772 sv_catsv(PL_endwhite, PL_thiswhite);
1773 PL_thiswhite = 0;
1774 }
1775 if (PL_thistoken)
1776 sv_setpvn(PL_thistoken,"",0);
5db06880 1777 else
cd81e915 1778 PL_realtokenstart = -1;
5db06880
NC
1779 }
1780#endif
f46d017c 1781 LEAVE;
3280af22
NIS
1782 PL_bufend = SvPVX(PL_linestr);
1783 PL_bufend += SvCUR(PL_linestr);
1784 PL_expect = XOPERATOR;
09bef843 1785 PL_sublex_info.sub_inwhat = 0;
79072805 1786 return ')';
ffed7fef
LW
1787 }
1788}
1789
02aa26ce
NT
1790/*
1791 scan_const
1792
1793 Extracts a pattern, double-quoted string, or transliteration. This
1794 is terrifying code.
1795
94def140 1796 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1797 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1798 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1799
94def140
TS
1800 Returns a pointer to the character scanned up to. If this is
1801 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1802 successfully parsed), will leave an OP for the substring scanned
1803 in yylval. Caller must intuit reason for not parsing further
1804 by looking at the next characters herself.
1805
02aa26ce
NT
1806 In patterns:
1807 backslashes:
1808 double-quoted style: \r and \n
1809 regexp special ones: \D \s
94def140
TS
1810 constants: \x31
1811 backrefs: \1
02aa26ce
NT
1812 case and quoting: \U \Q \E
1813 stops on @ and $, but not for $ as tail anchor
1814
1815 In transliterations:
1816 characters are VERY literal, except for - not at the start or end
94def140
TS
1817 of the string, which indicates a range. If the range is in bytes,
1818 scan_const expands the range to the full set of intermediate
1819 characters. If the range is in utf8, the hyphen is replaced with
1820 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1821
1822 In double-quoted strings:
1823 backslashes:
1824 double-quoted style: \r and \n
94def140
TS
1825 constants: \x31
1826 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1827 case and quoting: \U \Q \E
1828 stops on @ and $
1829
1830 scan_const does *not* construct ops to handle interpolated strings.
1831 It stops processing as soon as it finds an embedded $ or @ variable
1832 and leaves it to the caller to work out what's going on.
1833
94def140
TS
1834 embedded arrays (whether in pattern or not) could be:
1835 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1836
1837 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1838
1839 $ in pattern could be $foo or could be tail anchor. Assumption:
1840 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1841 followed by one of "()| \r\n\t"
02aa26ce
NT
1842
1843 \1 (backreferences) are turned into $1
1844
1845 The structure of the code is
1846 while (there's a character to process) {
94def140
TS
1847 handle transliteration ranges
1848 skip regexp comments /(?#comment)/ and codes /(?{code})/
1849 skip #-initiated comments in //x patterns
1850 check for embedded arrays
02aa26ce
NT
1851 check for embedded scalars
1852 if (backslash) {
94def140
TS
1853 leave intact backslashes from leaveit (below)
1854 deprecate \1 in substitution replacements
02aa26ce
NT
1855 handle string-changing backslashes \l \U \Q \E, etc.
1856 switch (what was escaped) {
94def140
TS
1857 handle \- in a transliteration (becomes a literal -)
1858 handle \132 (octal characters)
1859 handle \x15 and \x{1234} (hex characters)
1860 handle \N{name} (named characters)
1861 handle \cV (control characters)
1862 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1863 } (end switch)
1864 } (end if backslash)
1865 } (end while character to read)
4e553d73 1866
02aa26ce
NT
1867*/
1868
76e3520e 1869STATIC char *
cea2e8a9 1870S_scan_const(pTHX_ char *start)
79072805 1871{
97aff369 1872 dVAR;
3280af22 1873 register char *send = PL_bufend; /* end of the constant */
561b68a9 1874 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1875 register char *s = start; /* start of the constant */
1876 register char *d = SvPVX(sv); /* destination for copies */
1877 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1878 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1879 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1880 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1881 UV uv;
4c3a8340
TS
1882#ifdef EBCDIC
1883 UV literal_endpoint = 0;
e294cc5d 1884 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1885#endif
012bcf8d 1886
2b9d42f0
NIS
1887 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1888 /* If we are doing a trans and we know we want UTF8 set expectation */
1889 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1890 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1891 }
1892
1893
79072805 1894 while (s < send || dorange) {
02aa26ce 1895 /* get transliterations out of the way (they're most literal) */
3280af22 1896 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1897 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1898 if (dorange) {
1ba5c669
JH
1899 I32 i; /* current expanded character */
1900 I32 min; /* first character in range */
1901 I32 max; /* last character in range */
02aa26ce 1902
e294cc5d
JH
1903#ifdef EBCDIC
1904 UV uvmax = 0;
1905#endif
1906
1907 if (has_utf8
1908#ifdef EBCDIC
1909 && !native_range
1910#endif
1911 ) {
9d4ba2ae 1912 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1913 char *e = d++;
1914 while (e-- > c)
1915 *(e + 1) = *e;
25716404 1916 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1917 /* mark the range as done, and continue */
1918 dorange = FALSE;
1919 didrange = TRUE;
1920 continue;
1921 }
2b9d42f0 1922
95a20fc0 1923 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1924#ifdef EBCDIC
1925 SvGROW(sv,
1926 SvLEN(sv) + (has_utf8 ?
1927 (512 - UTF_CONTINUATION_MARK +
1928 UNISKIP(0x100))
1929 : 256));
1930 /* How many two-byte within 0..255: 128 in UTF-8,
1931 * 96 in UTF-8-mod. */
1932#else
9cbb5ea2 1933 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1934#endif
9cbb5ea2 1935 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1936#ifdef EBCDIC
1937 if (has_utf8) {
1938 int j;
1939 for (j = 0; j <= 1; j++) {
1940 char * const c = (char*)utf8_hop((U8*)d, -1);
1941 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1942 if (j)
1943 min = (U8)uv;
1944 else if (uv < 256)
1945 max = (U8)uv;
1946 else {
1947 max = (U8)0xff; /* only to \xff */
1948 uvmax = uv; /* \x{100} to uvmax */
1949 }
1950 d = c; /* eat endpoint chars */
1951 }
1952 }
1953 else {
1954#endif
1955 d -= 2; /* eat the first char and the - */
1956 min = (U8)*d; /* first char in range */
1957 max = (U8)d[1]; /* last char in range */
1958#ifdef EBCDIC
1959 }
1960#endif
8ada0baa 1961
c2e66d9e 1962 if (min > max) {
01ec43d0 1963 Perl_croak(aTHX_
d1573ac7 1964 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1965 (char)min, (char)max);
c2e66d9e
GS
1966 }
1967
c7f1f016 1968#ifdef EBCDIC
4c3a8340
TS
1969 if (literal_endpoint == 2 &&
1970 ((isLOWER(min) && isLOWER(max)) ||
1971 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1972 if (isLOWER(min)) {
1973 for (i = min; i <= max; i++)
1974 if (isLOWER(i))
db42d148 1975 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1976 } else {
1977 for (i = min; i <= max; i++)
1978 if (isUPPER(i))
db42d148 1979 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1980 }
1981 }
1982 else
1983#endif
1984 for (i = min; i <= max; i++)
e294cc5d
JH
1985#ifdef EBCDIC
1986 if (has_utf8) {
1987 const U8 ch = (U8)NATIVE_TO_UTF(i);
1988 if (UNI_IS_INVARIANT(ch))
1989 *d++ = (U8)i;
1990 else {
1991 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1992 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1993 }
1994 }
1995 else
1996#endif
1997 *d++ = (char)i;
1998
1999#ifdef EBCDIC
2000 if (uvmax) {
2001 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2002 if (uvmax > 0x101)
2003 *d++ = (char)UTF_TO_NATIVE(0xff);
2004 if (uvmax > 0x100)
2005 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2006 }
2007#endif
02aa26ce
NT
2008
2009 /* mark the range as done, and continue */
79072805 2010 dorange = FALSE;
01ec43d0 2011 didrange = TRUE;
4c3a8340
TS
2012#ifdef EBCDIC
2013 literal_endpoint = 0;
2014#endif
79072805 2015 continue;
4e553d73 2016 }
02aa26ce
NT
2017
2018 /* range begins (ignore - as first or last char) */
79072805 2019 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2020 if (didrange) {
1fafa243 2021 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2022 }
e294cc5d
JH
2023 if (has_utf8
2024#ifdef EBCDIC
2025 && !native_range
2026#endif
2027 ) {
25716404 2028 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2029 s++;
2030 continue;
2031 }
79072805
LW
2032 dorange = TRUE;
2033 s++;
01ec43d0
GS
2034 }
2035 else {
2036 didrange = FALSE;
4c3a8340
TS
2037#ifdef EBCDIC
2038 literal_endpoint = 0;
e294cc5d 2039 native_range = TRUE;
4c3a8340 2040#endif
01ec43d0 2041 }
79072805 2042 }
02aa26ce
NT
2043
2044 /* if we get here, we're not doing a transliteration */
2045
0f5d15d6
IZ
2046 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2047 except for the last char, which will be done separately. */
3280af22 2048 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2049 if (s[2] == '#') {
e994fd66 2050 while (s+1 < send && *s != ')')
db42d148 2051 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2052 }
2053 else if (s[2] == '{' /* This should match regcomp.c */
2054 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2055 {
cc6b7395 2056 I32 count = 1;
0f5d15d6 2057 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2058 char c;
2059
d9f97599
GS
2060 while (count && (c = *regparse)) {
2061 if (c == '\\' && regparse[1])
2062 regparse++;
4e553d73 2063 else if (c == '{')
cc6b7395 2064 count++;
4e553d73 2065 else if (c == '}')
cc6b7395 2066 count--;
d9f97599 2067 regparse++;
cc6b7395 2068 }
e994fd66 2069 if (*regparse != ')')
5bdf89e7 2070 regparse--; /* Leave one char for continuation. */
0f5d15d6 2071 while (s < regparse)
db42d148 2072 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2073 }
748a9306 2074 }
02aa26ce
NT
2075
2076 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2077 else if (*s == '#' && PL_lex_inpat &&
2078 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2079 while (s+1 < send && *s != '\n')
db42d148 2080 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2081 }
02aa26ce 2082
5d1d4326 2083 /* check for embedded arrays
da6eedaa 2084 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2085 */
1749ea0d
TS
2086 else if (*s == '@' && s[1]) {
2087 if (isALNUM_lazy_if(s+1,UTF))
2088 break;
2089 if (strchr(":'{$", s[1]))
2090 break;
2091 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2092 break; /* in regexp, neither @+ nor @- are interpolated */
2093 }
02aa26ce
NT
2094
2095 /* check for embedded scalars. only stop if we're sure it's a
2096 variable.
2097 */
79072805 2098 else if (*s == '$') {
3280af22 2099 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2100 break;
6002328a 2101 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2102 break; /* in regexp, $ might be tail anchor */
2103 }
02aa26ce 2104
2b9d42f0
NIS
2105 /* End of else if chain - OP_TRANS rejoin rest */
2106
02aa26ce 2107 /* backslashes */
79072805
LW
2108 if (*s == '\\' && s+1 < send) {
2109 s++;
02aa26ce 2110
02aa26ce 2111 /* deprecate \1 in strings and substitution replacements */
3280af22 2112 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2113 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2114 {
599cee73 2115 if (ckWARN(WARN_SYNTAX))
9014280d 2116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2117 *--s = '$';
2118 break;
2119 }
02aa26ce
NT
2120
2121 /* string-change backslash escapes */
3280af22 2122 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2123 --s;
2124 break;
2125 }
cc74c5bd
TS
2126 /* skip any other backslash escapes in a pattern */
2127 else if (PL_lex_inpat) {
2128 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2129 goto default_action;
2130 }
02aa26ce
NT
2131
2132 /* if we get here, it's either a quoted -, or a digit */
79072805 2133 switch (*s) {
02aa26ce
NT
2134
2135 /* quoted - in transliterations */
79072805 2136 case '-':
3280af22 2137 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2138 *d++ = *s++;
2139 continue;
2140 }
2141 /* FALL THROUGH */
2142 default:
11b8faa4 2143 {
86f97054 2144 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2145 ckWARN(WARN_MISC))
9014280d 2146 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2147 "Unrecognized escape \\%c passed through",
2148 *s);
11b8faa4 2149 /* default action is to copy the quoted character */
f9a63242 2150 goto default_action;
11b8faa4 2151 }
02aa26ce
NT
2152
2153 /* \132 indicates an octal constant */
79072805
LW
2154 case '0': case '1': case '2': case '3':
2155 case '4': case '5': case '6': case '7':
ba210ebe 2156 {
53305cf1
NC
2157 I32 flags = 0;
2158 STRLEN len = 3;
2159 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2160 s += len;
2161 }
012bcf8d 2162 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2163
2164 /* \x24 indicates a hex constant */
79072805 2165 case 'x':
a0ed51b3
LW
2166 ++s;
2167 if (*s == '{') {
9d4ba2ae 2168 char* const e = strchr(s, '}');
a4c04bdc
NC
2169 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2170 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2171 STRLEN len;
355860ce 2172
53305cf1 2173 ++s;
adaeee49 2174 if (!e) {
a0ed51b3 2175 yyerror("Missing right brace on \\x{}");
355860ce 2176 continue;
ba210ebe 2177 }
53305cf1
NC
2178 len = e - s;
2179 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2180 s = e + 1;
a0ed51b3
LW
2181 }
2182 else {
ba210ebe 2183 {
53305cf1 2184 STRLEN len = 2;
a4c04bdc 2185 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2186 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2187 s += len;
2188 }
012bcf8d
GS
2189 }
2190
2191 NUM_ESCAPE_INSERT:
2192 /* Insert oct or hex escaped character.
301d3d20 2193 * There will always enough room in sv since such
db42d148 2194 * escapes will be longer than any UTF-8 sequence
301d3d20 2195 * they can end up as. */
ba7cea30 2196
c7f1f016
NIS
2197 /* We need to map to chars to ASCII before doing the tests
2198 to cover EBCDIC
2199 */
c4d5f83a 2200 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2201 if (!has_utf8 && uv > 255) {
301d3d20
JH
2202 /* Might need to recode whatever we have
2203 * accumulated so far if it contains any
2204 * hibit chars.
2205 *
2206 * (Can't we keep track of that and avoid
2207 * this rescan? --jhi)
012bcf8d 2208 */
c7f1f016 2209 int hicount = 0;
63cd0674
NIS
2210 U8 *c;
2211 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2212 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2213 hicount++;
db42d148 2214 }
012bcf8d 2215 }
63cd0674 2216 if (hicount) {
9d4ba2ae 2217 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2218 U8 *src, *dst;
2219 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2220 src = (U8 *)d - 1;
2221 dst = src+hicount;
2222 d += hicount;
cfd0369c 2223 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2224 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2225 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2226 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2227 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2228 }
2229 else {
63cd0674 2230 *dst-- = *src;
012bcf8d 2231 }
c7f1f016 2232 src--;
012bcf8d
GS
2233 }
2234 }
2235 }
2236
9aa983d2 2237 if (has_utf8 || uv > 255) {
9041c2e3 2238 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2239 has_utf8 = TRUE;
f9a63242
JH
2240 if (PL_lex_inwhat == OP_TRANS &&
2241 PL_sublex_info.sub_op) {
2242 PL_sublex_info.sub_op->op_private |=
2243 (PL_lex_repl ? OPpTRANS_FROM_UTF
2244 : OPpTRANS_TO_UTF);
f9a63242 2245 }
e294cc5d
JH
2246#ifdef EBCDIC
2247 if (uv > 255 && !dorange)
2248 native_range = FALSE;
2249#endif
012bcf8d 2250 }
a0ed51b3 2251 else {
012bcf8d 2252 *d++ = (char)uv;
a0ed51b3 2253 }
012bcf8d
GS
2254 }
2255 else {
c4d5f83a 2256 *d++ = (char) uv;
a0ed51b3 2257 }
79072805 2258 continue;
02aa26ce 2259
b239daa5 2260 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2261 case 'N':
55eda711 2262 ++s;
423cee85
JH
2263 if (*s == '{') {
2264 char* e = strchr(s, '}');
155aba94 2265 SV *res;
423cee85 2266 STRLEN len;
cfd0369c 2267 const char *str;
fc8cd66c 2268 SV *type;
4e553d73 2269
423cee85 2270 if (!e) {
5777a3f7 2271 yyerror("Missing right brace on \\N{}");
423cee85
JH
2272 e = s - 1;
2273 goto cont_scan;
2274 }
dbc0d4f2
JH
2275 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2276 /* \N{U+...} */
2277 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2278 PERL_SCAN_DISALLOW_PREFIX;
2279 s += 3;
2280 len = e - s;
2281 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2282 if ( e > s && len != (STRLEN)(e - s) ) {
2283 uv = 0xFFFD;
fc8cd66c 2284 }
dbc0d4f2
JH
2285 s = e + 1;
2286 goto NUM_ESCAPE_INSERT;
2287 }
55eda711 2288 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2289 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2290 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2291 res, NULL, SvPVX(type) );
2292 SvREFCNT_dec(type);
f9a63242
JH
2293 if (has_utf8)
2294 sv_utf8_upgrade(res);
cfd0369c 2295 str = SvPV_const(res,len);
1c47067b
JH
2296#ifdef EBCDIC_NEVER_MIND
2297 /* charnames uses pack U and that has been
2298 * recently changed to do the below uni->native
2299 * mapping, so this would be redundant (and wrong,
2300 * the code point would be doubly converted).
2301 * But leave this in just in case the pack U change
2302 * gets revoked, but the semantics is still
2303 * desireable for charnames. --jhi */
cddc7ef4 2304 {
cfd0369c 2305 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2306
2307 if (uv < 0x100) {
89ebb4a3 2308 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2309
2310 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2311 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2312 str = SvPV_const(res, len);
cddc7ef4
JH
2313 }
2314 }
2315#endif
89491803 2316 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2317 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2318 SvCUR_set(sv, d - ostart);
2319 SvPOK_on(sv);
e4f3eed8 2320 *d = '\0';
f08d6ad9 2321 sv_utf8_upgrade(sv);
d2f449dd 2322 /* this just broke our allocation above... */
eb160463 2323 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2324 d = SvPVX(sv) + SvCUR(sv);
89491803 2325 has_utf8 = TRUE;
f08d6ad9 2326 }
eb160463 2327 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2328 const char * const odest = SvPVX_const(sv);
423cee85 2329
8973db79 2330 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2331 d = SvPVX(sv) + (d - odest);
2332 }
e294cc5d
JH
2333#ifdef EBCDIC
2334 if (!dorange)
2335 native_range = FALSE; /* \N{} is guessed to be Unicode */
2336#endif
423cee85
JH
2337 Copy(str, d, len, char);
2338 d += len;
2339 SvREFCNT_dec(res);
2340 cont_scan:
2341 s = e + 1;
2342 }
2343 else
5777a3f7 2344 yyerror("Missing braces on \\N{}");
423cee85
JH
2345 continue;
2346
02aa26ce 2347 /* \c is a control character */
79072805
LW
2348 case 'c':
2349 s++;
961ce445 2350 if (s < send) {
ba210ebe 2351 U8 c = *s++;
c7f1f016
NIS
2352#ifdef EBCDIC
2353 if (isLOWER(c))
2354 c = toUPPER(c);
2355#endif
db42d148 2356 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2357 }
961ce445
RGS
2358 else {
2359 yyerror("Missing control char name in \\c");
2360 }
79072805 2361 continue;
02aa26ce
NT
2362
2363 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2364 case 'b':
db42d148 2365 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2366 break;
2367 case 'n':
db42d148 2368 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2369 break;
2370 case 'r':
db42d148 2371 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2372 break;
2373 case 'f':
db42d148 2374 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2375 break;
2376 case 't':
db42d148 2377 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2378 break;
34a3fe2a 2379 case 'e':
db42d148 2380 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2381 break;
2382 case 'a':
db42d148 2383 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2384 break;
02aa26ce
NT
2385 } /* end switch */
2386
79072805
LW
2387 s++;
2388 continue;
02aa26ce 2389 } /* end if (backslash) */
4c3a8340
TS
2390#ifdef EBCDIC
2391 else
2392 literal_endpoint++;
2393#endif
02aa26ce 2394
f9a63242 2395 default_action:
2b9d42f0
NIS
2396 /* If we started with encoded form, or already know we want it
2397 and then encode the next character */
2398 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2399 STRLEN len = 1;
5f66b61c
AL
2400 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2401 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2402 s += len;
2403 if (need > len) {
2404 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2405 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2406 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2407 }
5f66b61c 2408 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2409 has_utf8 = TRUE;
e294cc5d
JH
2410#ifdef EBCDIC
2411 if (uv > 255 && !dorange)
2412 native_range = FALSE;
2413#endif
2b9d42f0
NIS
2414 }
2415 else {
2416 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2417 }
02aa26ce
NT
2418 } /* while loop to process each character */
2419
2420 /* terminate the string and set up the sv */
79072805 2421 *d = '\0';
95a20fc0 2422 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2423 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2424 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2425
79072805 2426 SvPOK_on(sv);
9f4817db 2427 if (PL_encoding && !has_utf8) {
d0063567
DK
2428 sv_recode_to_utf8(sv, PL_encoding);
2429 if (SvUTF8(sv))
2430 has_utf8 = TRUE;
9f4817db 2431 }
2b9d42f0 2432 if (has_utf8) {
7e2040f0 2433 SvUTF8_on(sv);
2b9d42f0 2434 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2435 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2436 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2437 }
2438 }
79072805 2439
02aa26ce 2440 /* shrink the sv if we allocated more than we used */
79072805 2441 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2442 SvPV_shrink_to_cur(sv);
79072805 2443 }
02aa26ce 2444
9b599b2a 2445 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2446 if (s > PL_bufptr) {
2447 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2448 sv = new_constant(start, s - start,
2449 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2450 sv, NULL,
10edeb5d
JH
2451 (const char *)
2452 (( PL_lex_inwhat == OP_TRANS
2453 ? "tr"
2454 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2455 ? "s"
2456 : "qq"))));
79072805 2457 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2458 } else
8990e307 2459 SvREFCNT_dec(sv);
79072805
LW
2460 return s;
2461}
2462
ffb4593c
NT
2463/* S_intuit_more
2464 * Returns TRUE if there's more to the expression (e.g., a subscript),
2465 * FALSE otherwise.
ffb4593c
NT
2466 *
2467 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2468 *
2469 * ->[ and ->{ return TRUE
2470 * { and [ outside a pattern are always subscripts, so return TRUE
2471 * if we're outside a pattern and it's not { or [, then return FALSE
2472 * if we're in a pattern and the first char is a {
2473 * {4,5} (any digits around the comma) returns FALSE
2474 * if we're in a pattern and the first char is a [
2475 * [] returns FALSE
2476 * [SOMETHING] has a funky algorithm to decide whether it's a
2477 * character class or not. It has to deal with things like
2478 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2479 * anything else returns TRUE
2480 */
2481
9cbb5ea2
GS
2482/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2483
76e3520e 2484STATIC int
cea2e8a9 2485S_intuit_more(pTHX_ register char *s)
79072805 2486{
97aff369 2487 dVAR;
3280af22 2488 if (PL_lex_brackets)
79072805
LW
2489 return TRUE;
2490 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2491 return TRUE;
2492 if (*s != '{' && *s != '[')
2493 return FALSE;
3280af22 2494 if (!PL_lex_inpat)
79072805
LW
2495 return TRUE;
2496
2497 /* In a pattern, so maybe we have {n,m}. */
2498 if (*s == '{') {
2499 s++;
2500 if (!isDIGIT(*s))
2501 return TRUE;
2502 while (isDIGIT(*s))
2503 s++;
2504 if (*s == ',')
2505 s++;
2506 while (isDIGIT(*s))
2507 s++;
2508 if (*s == '}')
2509 return FALSE;
2510 return TRUE;
2511
2512 }
2513
2514 /* On the other hand, maybe we have a character class */
2515
2516 s++;
2517 if (*s == ']' || *s == '^')
2518 return FALSE;
2519 else {
ffb4593c 2520 /* this is terrifying, and it works */
79072805
LW
2521 int weight = 2; /* let's weigh the evidence */
2522 char seen[256];
f27ffc4a 2523 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2524 const char * const send = strchr(s,']');
3280af22 2525 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2526
2527 if (!send) /* has to be an expression */
2528 return TRUE;
2529
2530 Zero(seen,256,char);
2531 if (*s == '$')
2532 weight -= 3;
2533 else if (isDIGIT(*s)) {
2534 if (s[1] != ']') {
2535 if (isDIGIT(s[1]) && s[2] == ']')
2536 weight -= 10;
2537 }
2538 else
2539 weight -= 100;
2540 }
2541 for (; s < send; s++) {
2542 last_un_char = un_char;
2543 un_char = (unsigned char)*s;
2544 switch (*s) {
2545 case '@':
2546 case '&':
2547 case '$':
2548 weight -= seen[un_char] * 10;
7e2040f0 2549 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2550 int len;
8903cb82 2551 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2552 len = (int)strlen(tmpbuf);
2553 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2554 weight -= 100;
2555 else
2556 weight -= 10;
2557 }
2558 else if (*s == '$' && s[1] &&
93a17b20
LW
2559 strchr("[#!%*<>()-=",s[1])) {
2560 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2561 weight -= 10;
2562 else
2563 weight -= 1;
2564 }
2565 break;
2566 case '\\':
2567 un_char = 254;
2568 if (s[1]) {
93a17b20 2569 if (strchr("wds]",s[1]))
79072805 2570 weight += 100;
10edeb5d 2571 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2572 weight += 1;
93a17b20 2573 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2574 weight += 40;
2575 else if (isDIGIT(s[1])) {
2576 weight += 40;
2577 while (s[1] && isDIGIT(s[1]))
2578 s++;
2579 }
2580 }
2581 else
2582 weight += 100;
2583 break;
2584 case '-':
2585 if (s[1] == '\\')
2586 weight += 50;
93a17b20 2587 if (strchr("aA01! ",last_un_char))
79072805 2588 weight += 30;
93a17b20 2589 if (strchr("zZ79~",s[1]))
79072805 2590 weight += 30;
f27ffc4a
GS
2591 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2592 weight -= 5; /* cope with negative subscript */
79072805
LW
2593 break;
2594 default:
3792a11b
NC
2595 if (!isALNUM(last_un_char)
2596 && !(last_un_char == '$' || last_un_char == '@'
2597 || last_un_char == '&')
2598 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2599 char *d = tmpbuf;
2600 while (isALPHA(*s))
2601 *d++ = *s++;
2602 *d = '\0';
5458a98a 2603 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2604 weight -= 150;
2605 }
2606 if (un_char == last_un_char + 1)
2607 weight += 5;
2608 weight -= seen[un_char];
2609 break;
2610 }
2611 seen[un_char]++;
2612 }
2613 if (weight >= 0) /* probably a character class */
2614 return FALSE;
2615 }
2616
2617 return TRUE;
2618}
ffed7fef 2619
ffb4593c
NT
2620/*
2621 * S_intuit_method
2622 *
2623 * Does all the checking to disambiguate
2624 * foo bar
2625 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2626 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2627 *
2628 * First argument is the stuff after the first token, e.g. "bar".
2629 *
2630 * Not a method if bar is a filehandle.
2631 * Not a method if foo is a subroutine prototyped to take a filehandle.
2632 * Not a method if it's really "Foo $bar"
2633 * Method if it's "foo $bar"
2634 * Not a method if it's really "print foo $bar"
2635 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2636 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2637 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2638 * =>
2639 */
2640
76e3520e 2641STATIC int
62d55b22 2642S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2643{
97aff369 2644 dVAR;
a0d0e21e 2645 char *s = start + (*start == '$');
3280af22 2646 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2647 STRLEN len;
2648 GV* indirgv;
5db06880
NC
2649#ifdef PERL_MAD
2650 int soff;
2651#endif
a0d0e21e
LW
2652
2653 if (gv) {
62d55b22 2654 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2655 return 0;
62d55b22
NC
2656 if (cv) {
2657 if (SvPOK(cv)) {
2658 const char *proto = SvPVX_const(cv);
2659 if (proto) {
2660 if (*proto == ';')
2661 proto++;
2662 if (*proto == '*')
2663 return 0;
2664 }
b6c543e3
IZ
2665 }
2666 } else
c35e046a 2667 gv = NULL;
a0d0e21e 2668 }
8903cb82 2669 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2670 /* start is the beginning of the possible filehandle/object,
2671 * and s is the end of it
2672 * tmpbuf is a copy of it
2673 */
2674
a0d0e21e 2675 if (*start == '$') {
3280af22 2676 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2677 return 0;
5db06880
NC
2678#ifdef PERL_MAD
2679 len = start - SvPVX(PL_linestr);
2680#endif
29595ff2 2681 s = PEEKSPACE(s);
f0092767 2682#ifdef PERL_MAD
5db06880
NC
2683 start = SvPVX(PL_linestr) + len;
2684#endif
3280af22
NIS
2685 PL_bufptr = start;
2686 PL_expect = XREF;
a0d0e21e
LW
2687 return *s == '(' ? FUNCMETH : METHOD;
2688 }
5458a98a 2689 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2690 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2691 len -= 2;
2692 tmpbuf[len] = '\0';
5db06880
NC
2693#ifdef PERL_MAD
2694 soff = s - SvPVX(PL_linestr);
2695#endif
c3e0f903
GS
2696 goto bare_package;
2697 }
90e5519e 2698 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2699 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2700 return 0;
2701 /* filehandle or package name makes it a method */
89bfa8cd 2702 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2703#ifdef PERL_MAD
2704 soff = s - SvPVX(PL_linestr);
2705#endif
29595ff2 2706 s = PEEKSPACE(s);
3280af22 2707 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2708 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2709 bare_package:
cd81e915 2710 start_force(PL_curforce);
9ded7720 2711 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2712 newSVpvn(tmpbuf,len));
9ded7720 2713 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2714 if (PL_madskills)
2715 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2716 PL_expect = XTERM;
a0d0e21e 2717 force_next(WORD);
3280af22 2718 PL_bufptr = s;
5db06880
NC
2719#ifdef PERL_MAD
2720 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2721#endif
a0d0e21e
LW
2722 return *s == '(' ? FUNCMETH : METHOD;
2723 }
2724 }
2725 return 0;
2726}
2727
ffb4593c
NT
2728/*
2729 * S_incl_perldb
2730 * Return a string of Perl code to load the debugger. If PERL5DB
2731 * is set, it will return the contents of that, otherwise a
2732 * compile-time require of perl5db.pl.
2733 */
2734
bfed75c6 2735STATIC const char*
cea2e8a9 2736S_incl_perldb(pTHX)
a0d0e21e 2737{
97aff369 2738 dVAR;
3280af22 2739 if (PL_perldb) {
9d4ba2ae 2740 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2741
2742 if (pdb)
2743 return pdb;
93189314 2744 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2745 return "BEGIN { require 'perl5db.pl' }";
2746 }
2747 return "";
2748}
2749
2750
16d20bd9 2751/* Encoded script support. filter_add() effectively inserts a
4e553d73 2752 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2753 * Note that the filter function only applies to the current source file
2754 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2755 *
2756 * The datasv parameter (which may be NULL) can be used to pass
2757 * private data to this instance of the filter. The filter function
2758 * can recover the SV using the FILTER_DATA macro and use it to
2759 * store private buffers and state information.
2760 *
2761 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2762 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2763 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2764 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2765 * private use must be set using malloc'd pointers.
2766 */
16d20bd9
AD
2767
2768SV *
864dbfa3 2769Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2770{
97aff369 2771 dVAR;
f4c556ac 2772 if (!funcp)
a0714e2c 2773 return NULL;
f4c556ac 2774
3280af22
NIS
2775 if (!PL_rsfp_filters)
2776 PL_rsfp_filters = newAV();
16d20bd9 2777 if (!datasv)
561b68a9 2778 datasv = newSV(0);
862a34c6 2779 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2780 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2781 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2782 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2783 FPTR2DPTR(void *, IoANY(datasv)),
2784 SvPV_nolen(datasv)));
3280af22
NIS
2785 av_unshift(PL_rsfp_filters, 1);
2786 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2787 return(datasv);
2788}
4e553d73 2789
16d20bd9
AD
2790
2791/* Delete most recently added instance of this filter function. */
a0d0e21e 2792void
864dbfa3 2793Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2794{
97aff369 2795 dVAR;
e0c19803 2796 SV *datasv;
24801a4b 2797
33073adb 2798#ifdef DEBUGGING
55662e27
JH
2799 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2800 FPTR2DPTR(void*, funcp)));
33073adb 2801#endif
3280af22 2802 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2803 return;
2804 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2805 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2806 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2807 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2808 IoANY(datasv) = (void *)NULL;
3280af22 2809 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2810
16d20bd9
AD
2811 return;
2812 }
2813 /* we need to search for the correct entry and clear it */
cea2e8a9 2814 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2815}
2816
2817
1de9afcd
RGS
2818/* Invoke the idxth filter function for the current rsfp. */
2819/* maxlen 0 = read one text line */
16d20bd9 2820I32
864dbfa3 2821Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2822{
97aff369 2823 dVAR;
16d20bd9
AD
2824 filter_t funcp;
2825 SV *datasv = NULL;
f482118e
NC
2826 /* This API is bad. It should have been using unsigned int for maxlen.
2827 Not sure if we want to change the API, but if not we should sanity
2828 check the value here. */
39cd7a59
NC
2829 const unsigned int correct_length
2830 = maxlen < 0 ?
2831#ifdef PERL_MICRO
2832 0x7FFFFFFF
2833#else
2834 INT_MAX
2835#endif
2836 : maxlen;
e50aee73 2837
3280af22 2838 if (!PL_rsfp_filters)
16d20bd9 2839 return -1;
1de9afcd 2840 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2841 /* Provide a default input filter to make life easy. */
2842 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2843 DEBUG_P(PerlIO_printf(Perl_debug_log,
2844 "filter_read %d: from rsfp\n", idx));
f482118e 2845 if (correct_length) {
16d20bd9
AD
2846 /* Want a block */
2847 int len ;
f54cb97a 2848 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2849
2850 /* ensure buf_sv is large enough */
f482118e
NC
2851 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2852 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2853 correct_length)) <= 0) {
3280af22 2854 if (PerlIO_error(PL_rsfp))
37120919
AD
2855 return -1; /* error */
2856 else
2857 return 0 ; /* end of file */
2858 }
16d20bd9
AD
2859 SvCUR_set(buf_sv, old_len + len) ;
2860 } else {
2861 /* Want a line */
3280af22
NIS
2862 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2863 if (PerlIO_error(PL_rsfp))
37120919
AD
2864 return -1; /* error */
2865 else
2866 return 0 ; /* end of file */
2867 }
16d20bd9
AD
2868 }
2869 return SvCUR(buf_sv);
2870 }
2871 /* Skip this filter slot if filter has been deleted */
1de9afcd 2872 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2873 DEBUG_P(PerlIO_printf(Perl_debug_log,
2874 "filter_read %d: skipped (filter deleted)\n",
2875 idx));
f482118e 2876 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2877 }
2878 /* Get function pointer hidden within datasv */
8141890a 2879 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2880 DEBUG_P(PerlIO_printf(Perl_debug_log,
2881 "filter_read %d: via function %p (%s)\n",
ca0270c4 2882 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2883 /* Call function. The function is expected to */
2884 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2885 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2886 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2887}
2888
76e3520e 2889STATIC char *
cea2e8a9 2890S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2891{
97aff369 2892 dVAR;
c39cd008 2893#ifdef PERL_CR_FILTER
3280af22 2894 if (!PL_rsfp_filters) {
c39cd008 2895 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2896 }
2897#endif
3280af22 2898 if (PL_rsfp_filters) {
55497cff 2899 if (!append)
2900 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2901 if (FILTER_READ(0, sv, 0) > 0)
2902 return ( SvPVX(sv) ) ;
2903 else
bd61b366 2904 return NULL ;
16d20bd9 2905 }
9d116dd7 2906 else
fd049845 2907 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2908}
2909
01ec43d0 2910STATIC HV *
7fc63493 2911S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2912{
97aff369 2913 dVAR;
def3634b
GS
2914 GV *gv;
2915
01ec43d0 2916 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2917 return PL_curstash;
2918
2919 if (len > 2 &&
2920 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2921 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2922 {
2923 return GvHV(gv); /* Foo:: */
def3634b
GS
2924 }
2925
2926 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2927 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2928 if (gv && GvCV(gv)) {
2929 SV * const sv = cv_const_sv(GvCV(gv));
2930 if (sv)
83003860 2931 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2932 }
2933
2934 return gv_stashpv(pkgname, FALSE);
2935}
a0d0e21e 2936
e3f73d4e
RGS
2937/*
2938 * S_readpipe_override
2939 * Check whether readpipe() is overriden, and generates the appropriate
2940 * optree, provided sublex_start() is called afterwards.
2941 */
2942STATIC void
1d51329b 2943S_readpipe_override(pTHX)
e3f73d4e
RGS
2944{
2945 GV **gvp;
2946 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2947 yylval.ival = OP_BACKTICK;
2948 if ((gv_readpipe
2949 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2950 ||
2951 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2952 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2953 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2954 {
2955 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2956 append_elem(OP_LIST,
2957 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2958 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2959 }
2960 else {
2961 set_csh();
2962 }
2963}
2964
5db06880
NC
2965#ifdef PERL_MAD
2966 /*
2967 * Perl_madlex
2968 * The intent of this yylex wrapper is to minimize the changes to the
2969 * tokener when we aren't interested in collecting madprops. It remains
2970 * to be seen how successful this strategy will be...
2971 */
2972
2973int
2974Perl_madlex(pTHX)
2975{
2976 int optype;
2977 char *s = PL_bufptr;
2978
cd81e915
NC
2979 /* make sure PL_thiswhite is initialized */
2980 PL_thiswhite = 0;
2981 PL_thismad = 0;
5db06880 2982
cd81e915 2983 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2984 if (PL_pending_ident)
2985 return S_pending_ident(aTHX);
2986
2987 /* previous token ate up our whitespace? */
cd81e915
NC
2988 if (!PL_lasttoke && PL_nextwhite) {
2989 PL_thiswhite = PL_nextwhite;
2990 PL_nextwhite = 0;
5db06880
NC
2991 }
2992
2993 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2994 PL_realtokenstart = -1;
2995 PL_thistoken = 0;
5db06880
NC
2996 optype = yylex();
2997 s = PL_bufptr;
cd81e915 2998 assert(PL_curforce < 0);
5db06880 2999
cd81e915
NC
3000 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3001 if (!PL_thistoken) {
3002 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3003 PL_thistoken = newSVpvs("");
5db06880 3004 else {
c35e046a 3005 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3006 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3007 }
3008 }
cd81e915
NC
3009 if (PL_thismad) /* install head */
3010 CURMAD('X', PL_thistoken);
5db06880
NC
3011 }
3012
3013 /* last whitespace of a sublex? */
cd81e915
NC
3014 if (optype == ')' && PL_endwhite) {
3015 CURMAD('X', PL_endwhite);
5db06880
NC
3016 }
3017
cd81e915 3018 if (!PL_thismad) {
5db06880
NC
3019
3020 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3021 if (!PL_thiswhite && !PL_endwhite && !optype) {
3022 sv_free(PL_thistoken);
3023 PL_thistoken = 0;
5db06880
NC
3024 return 0;
3025 }
3026
3027 /* put off final whitespace till peg */
3028 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3029 PL_nextwhite = PL_thiswhite;
3030 PL_thiswhite = 0;
5db06880 3031 }
cd81e915
NC
3032 else if (PL_thisopen) {
3033 CURMAD('q', PL_thisopen);
3034 if (PL_thistoken)
3035 sv_free(PL_thistoken);
3036 PL_thistoken = 0;
5db06880
NC
3037 }
3038 else {
3039 /* Store actual token text as madprop X */
cd81e915 3040 CURMAD('X', PL_thistoken);
5db06880
NC
3041 }
3042
cd81e915 3043 if (PL_thiswhite) {
5db06880 3044 /* add preceding whitespace as madprop _ */
cd81e915 3045 CURMAD('_', PL_thiswhite);
5db06880
NC
3046 }
3047
cd81e915 3048 if (PL_thisstuff) {
5db06880 3049 /* add quoted material as madprop = */
cd81e915 3050 CURMAD('=', PL_thisstuff);
5db06880
NC
3051 }
3052
cd81e915 3053 if (PL_thisclose) {
5db06880 3054 /* add terminating quote as madprop Q */
cd81e915 3055 CURMAD('Q', PL_thisclose);
5db06880
NC
3056 }
3057 }
3058
3059 /* special processing based on optype */
3060
3061 switch (optype) {
3062
3063 /* opval doesn't need a TOKEN since it can already store mp */
3064 case WORD:
3065 case METHOD:
3066 case FUNCMETH:
3067 case THING:
3068 case PMFUNC:
3069 case PRIVATEREF:
3070 case FUNC0SUB:
3071 case UNIOPSUB:
3072 case LSTOPSUB:
3073 if (yylval.opval)
cd81e915
NC
3074 append_madprops(PL_thismad, yylval.opval, 0);
3075 PL_thismad = 0;
5db06880
NC
3076 return optype;
3077
3078 /* fake EOF */
3079 case 0:
3080 optype = PEG;
cd81e915
NC
3081 if (PL_endwhite) {
3082 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3083 PL_endwhite = 0;
5db06880
NC
3084 }
3085 break;
3086
3087 case ']':
3088 case '}':
cd81e915 3089 if (PL_faketokens)
5db06880
NC
3090 break;
3091 /* remember any fake bracket that lexer is about to discard */
3092 if (PL_lex_brackets == 1 &&
3093 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3094 {
3095 s = PL_bufptr;
3096 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3097 s++;
3098 if (*s == '}') {
cd81e915
NC
3099 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3100 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3101 PL_thiswhite = 0;
5db06880
NC
3102 PL_bufptr = s - 1;
3103 break; /* don't bother looking for trailing comment */
3104 }
3105 else
3106 s = PL_bufptr;
3107 }
3108 if (optype == ']')
3109 break;
3110 /* FALLTHROUGH */
3111
3112 /* attach a trailing comment to its statement instead of next token */
3113 case ';':
cd81e915 3114 if (PL_faketokens)
5db06880
NC
3115 break;
3116 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3117 s = PL_bufptr;
3118 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3119 s++;
3120 if (*s == '\n' || *s == '#') {
3121 while (s < PL_bufend && *s != '\n')
3122 s++;
3123 if (s < PL_bufend)
3124 s++;
cd81e915
NC
3125 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3126 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3127 PL_thiswhite = 0;
5db06880
NC
3128 PL_bufptr = s;
3129 }
3130 }
3131 break;
3132
3133 /* pval */
3134 case LABEL:
3135 break;
3136
3137 /* ival */
3138 default:
3139 break;
3140
3141 }
3142
3143 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3144 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3145 PL_thismad = 0;
5db06880
NC
3146 return optype;
3147}
3148#endif
3149
468aa647 3150STATIC char *
cc6ed77d 3151S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3152 dVAR;
468aa647
RGS
3153 if (PL_expect != XSTATE)
3154 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3155 is_use ? "use" : "no"));
29595ff2 3156 s = SKIPSPACE1(s);
468aa647
RGS
3157 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3158 s = force_version(s, TRUE);
29595ff2 3159 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3160 start_force(PL_curforce);
9ded7720 3161 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3162 force_next(WORD);
3163 }
3164 else if (*s == 'v') {
3165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3166 s = force_version(s, FALSE);
3167 }
3168 }
3169 else {
3170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3171 s = force_version(s, FALSE);
3172 }
3173 yylval.ival = is_use;
3174 return s;
3175}
748a9306 3176#ifdef DEBUGGING
27da23d5 3177 static const char* const exp_name[] =
09bef843 3178 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3179 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3180 };
748a9306 3181#endif
463ee0b2 3182
02aa26ce
NT
3183/*
3184 yylex
3185
3186 Works out what to call the token just pulled out of the input
3187 stream. The yacc parser takes care of taking the ops we return and
3188 stitching them into a tree.
3189
3190 Returns:
3191 PRIVATEREF
3192
3193 Structure:
3194 if read an identifier
3195 if we're in a my declaration
3196 croak if they tried to say my($foo::bar)
3197 build the ops for a my() declaration
3198 if it's an access to a my() variable
3199 are we in a sort block?
3200 croak if my($a); $a <=> $b
3201 build ops for access to a my() variable
3202 if in a dq string, and they've said @foo and we can't find @foo
3203 croak
3204 build ops for a bareword
3205 if we already built the token before, use it.
3206*/
3207
20141f0e 3208
dba4d153
JH
3209#ifdef __SC__
3210#pragma segment Perl_yylex
3211#endif
dba4d153 3212int
dba4d153 3213Perl_yylex(pTHX)
20141f0e 3214{
97aff369 3215 dVAR;
3afc138a 3216 register char *s = PL_bufptr;
378cc40b 3217 register char *d;
463ee0b2 3218 STRLEN len;
aa7440fb 3219 bool bof = FALSE;
a687059c 3220
10edeb5d
JH
3221 /* orig_keyword, gvp, and gv are initialized here because
3222 * jump to the label just_a_word_zero can bypass their
3223 * initialization later. */
3224 I32 orig_keyword = 0;
3225 GV *gv = NULL;
3226 GV **gvp = NULL;
3227
bbf60fe6 3228 DEBUG_T( {
396482e1 3229 SV* tmp = newSVpvs("");
b6007c36
DM
3230 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3231 (IV)CopLINE(PL_curcop),
3232 lex_state_names[PL_lex_state],
3233 exp_name[PL_expect],
3234 pv_display(tmp, s, strlen(s), 0, 60));
3235 SvREFCNT_dec(tmp);
bbf60fe6 3236 } );
02aa26ce 3237 /* check if there's an identifier for us to look at */
ba979b31 3238 if (PL_pending_ident)
bbf60fe6 3239 return REPORT(S_pending_ident(aTHX));
bbce6d69 3240
02aa26ce
NT
3241 /* no identifier pending identification */
3242
3280af22 3243 switch (PL_lex_state) {
79072805
LW
3244#ifdef COMMENTARY
3245 case LEX_NORMAL: /* Some compilers will produce faster */
3246 case LEX_INTERPNORMAL: /* code if we comment these out. */
3247 break;
3248#endif
3249
09bef843 3250 /* when we've already built the next token, just pull it out of the queue */
79072805 3251 case LEX_KNOWNEXT:
5db06880
NC
3252#ifdef PERL_MAD
3253 PL_lasttoke--;
3254 yylval = PL_nexttoke[PL_lasttoke].next_val;
3255 if (PL_madskills) {
cd81e915 3256 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3257 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3258 if (PL_thismad && PL_thismad->mad_key == '_') {
3259 PL_thiswhite = (SV*)PL_thismad->mad_val;
3260 PL_thismad->mad_val = 0;
3261 mad_free(PL_thismad);
3262 PL_thismad = 0;
5db06880
NC
3263 }
3264 }
3265 if (!PL_lasttoke) {
3266 PL_lex_state = PL_lex_defer;
3267 PL_expect = PL_lex_expect;
3268 PL_lex_defer = LEX_NORMAL;
3269 if (!PL_nexttoke[PL_lasttoke].next_type)
3270 return yylex();
3271 }
3272#else
3280af22 3273 PL_nexttoke--;
5db06880 3274 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3275 if (!PL_nexttoke) {
3276 PL_lex_state = PL_lex_defer;
3277 PL_expect = PL_lex_expect;
3278 PL_lex_defer = LEX_NORMAL;
463ee0b2 3279 }
5db06880
NC
3280#endif
3281#ifdef PERL_MAD
3282 /* FIXME - can these be merged? */
3283 return(PL_nexttoke[PL_lasttoke].next_type);
3284#else
bbf60fe6 3285 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3286#endif
79072805 3287
02aa26ce 3288 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3289 when we get here, PL_bufptr is at the \
02aa26ce 3290 */
79072805
LW
3291 case LEX_INTERPCASEMOD:
3292#ifdef DEBUGGING
3280af22 3293 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3294 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3295#endif
02aa26ce 3296 /* handle \E or end of string */
3280af22 3297 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3298 /* if at a \E */
3280af22 3299 if (PL_lex_casemods) {
f54cb97a 3300 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3301 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3302
3792a11b
NC
3303 if (PL_bufptr != PL_bufend
3304 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3305 PL_bufptr += 2;
3306 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3307#ifdef PERL_MAD
3308 if (PL_madskills)
6b29d1f5 3309 PL_thistoken = newSVpvs("\\E");
5db06880 3310#endif
a0d0e21e 3311 }
bbf60fe6 3312 return REPORT(')');
79072805 3313 }
5db06880
NC
3314#ifdef PERL_MAD
3315 while (PL_bufptr != PL_bufend &&
3316 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3317 if (!PL_thiswhite)
6b29d1f5 3318 PL_thiswhite = newSVpvs("");
cd81e915 3319 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3320 PL_bufptr += 2;
3321 }
3322#else
3280af22
NIS
3323 if (PL_bufptr != PL_bufend)
3324 PL_bufptr += 2;
5db06880 3325#endif
3280af22 3326 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3327 return yylex();
79072805
LW
3328 }
3329 else {
607df283 3330 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3331 "### Saw case modifier\n"); });
3280af22 3332 s = PL_bufptr + 1;
6e909404 3333 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3334#ifdef PERL_MAD
cd81e915 3335 if (!PL_thiswhite)
6b29d1f5 3336 PL_thiswhite = newSVpvs("");
cd81e915 3337 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3338#endif
89122651 3339 PL_bufptr = s + 3;
6e909404
JH
3340 PL_lex_state = LEX_INTERPCONCAT;
3341 return yylex();
a0d0e21e 3342 }
6e909404 3343 else {
90771dc0 3344 I32 tmp;
5db06880
NC
3345 if (!PL_madskills) /* when just compiling don't need correct */
3346 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3347 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3348 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3349 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3350 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3351 return REPORT(')');
6e909404
JH
3352 }
3353 if (PL_lex_casemods > 10)
3354 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3355 PL_lex_casestack[PL_lex_casemods++] = *s;
3356 PL_lex_casestack[PL_lex_casemods] = '\0';
3357 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3358 start_force(PL_curforce);
9ded7720 3359 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3360 force_next('(');
cd81e915 3361 start_force(PL_curforce);
6e909404 3362 if (*s == 'l')
9ded7720 3363 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3364 else if (*s == 'u')
9ded7720 3365 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3366 else if (*s == 'L')
9ded7720 3367 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3368 else if (*s == 'U')
9ded7720 3369 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3370 else if (*s == 'Q')
9ded7720 3371 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3372 else
3373 Perl_croak(aTHX_ "panic: yylex");
5db06880 3374 if (PL_madskills) {
6b29d1f5 3375 SV* const tmpsv = newSVpvs("");
5db06880
NC
3376 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3377 curmad('_', tmpsv);
3378 }
6e909404 3379 PL_bufptr = s + 1;
a0d0e21e 3380 }
79072805 3381 force_next(FUNC);
3280af22
NIS
3382 if (PL_lex_starts) {
3383 s = PL_bufptr;
3384 PL_lex_starts = 0;
5db06880
NC
3385#ifdef PERL_MAD
3386 if (PL_madskills) {
cd81e915
NC
3387 if (PL_thistoken)
3388 sv_free(PL_thistoken);
6b29d1f5 3389 PL_thistoken = newSVpvs("");
5db06880
NC
3390 }
3391#endif
131b3ad0
DM
3392 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3393 if (PL_lex_casemods == 1 && PL_lex_inpat)
3394 OPERATOR(',');
3395 else
3396 Aop(OP_CONCAT);
79072805
LW
3397 }
3398 else
cea2e8a9 3399 return yylex();
79072805
LW
3400 }
3401
55497cff 3402 case LEX_INTERPPUSH:
bbf60fe6 3403 return REPORT(sublex_push());
55497cff 3404
79072805 3405 case LEX_INTERPSTART:
3280af22 3406 if (PL_bufptr == PL_bufend)
bbf60fe6 3407 return REPORT(sublex_done());
607df283 3408 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3409 "### Interpolated variable\n"); });
3280af22
NIS
3410 PL_expect = XTERM;
3411 PL_lex_dojoin = (*PL_bufptr == '@');
3412 PL_lex_state = LEX_INTERPNORMAL;
3413 if (PL_lex_dojoin) {
cd81e915 3414 start_force(PL_curforce);
9ded7720 3415 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3416 force_next(',');
cd81e915 3417 start_force(PL_curforce);
a0d0e21e 3418 force_ident("\"", '$');
cd81e915 3419 start_force(PL_curforce);
9ded7720 3420 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3421 force_next('$');
cd81e915 3422 start_force(PL_curforce);
9ded7720 3423 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3424 force_next('(');
cd81e915 3425 start_force(PL_curforce);
9ded7720 3426 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3427 force_next(FUNC);
3428 }
3280af22
NIS
3429 if (PL_lex_starts++) {
3430 s = PL_bufptr;
5db06880
NC
3431#ifdef PERL_MAD
3432 if (PL_madskills) {
cd81e915
NC
3433 if (PL_thistoken)
3434 sv_free(PL_thistoken);
6b29d1f5 3435 PL_thistoken = newSVpvs("");
5db06880
NC
3436 }
3437#endif
131b3ad0
DM
3438 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3439 if (!PL_lex_casemods && PL_lex_inpat)
3440 OPERATOR(',');
3441 else
3442 Aop(OP_CONCAT);
79072805 3443 }
cea2e8a9 3444 return yylex();
79072805
LW
3445
3446 case LEX_INTERPENDMAYBE:
3280af22
NIS
3447 if (intuit_more(PL_bufptr)) {
3448 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3449 break;
3450 }
3451 /* FALL THROUGH */
3452
3453 case LEX_INTERPEND:
3280af22
NIS
3454 if (PL_lex_dojoin) {
3455 PL_lex_dojoin = FALSE;
3456 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3457#ifdef PERL_MAD
3458 if (PL_madskills) {
cd81e915
NC
3459 if (PL_thistoken)
3460 sv_free(PL_thistoken);
6b29d1f5 3461 PL_thistoken = newSVpvs("");
5db06880
NC
3462 }
3463#endif
bbf60fe6 3464 return REPORT(')');
79072805 3465 }
43a16006 3466 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3467 && SvEVALED(PL_lex_repl))
43a16006 3468 {
e9fa98b2 3469 if (PL_bufptr != PL_bufend)
cea2e8a9 3470 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3471 PL_lex_repl = NULL;
e9fa98b2 3472 }
79072805
LW
3473 /* FALLTHROUGH */
3474 case LEX_INTERPCONCAT:
3475#ifdef DEBUGGING
3280af22 3476 if (PL_lex_brackets)
cea2e8a9 3477 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3478#endif
3280af22 3479 if (PL_bufptr == PL_bufend)
bbf60fe6 3480 return REPORT(sublex_done());
79072805 3481
3280af22
NIS
3482 if (SvIVX(PL_linestr) == '\'') {
3483 SV *sv = newSVsv(PL_linestr);
3484 if (!PL_lex_inpat)
76e3520e 3485 sv = tokeq(sv);
3280af22 3486 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3487 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3488 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3489 s = PL_bufend;
79072805
LW
3490 }
3491 else {
3280af22 3492 s = scan_const(PL_bufptr);
79072805 3493 if (*s == '\\')
3280af22 3494 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3495 else
3280af22 3496 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3497 }
3498
3280af22 3499 if (s != PL_bufptr) {
cd81e915 3500 start_force(PL_curforce);
5db06880
NC
3501 if (PL_madskills) {
3502 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3503 }
9ded7720 3504 NEXTVAL_NEXTTOKE = yylval;
3280af22 3505 PL_expect = XTERM;
79072805 3506 force_next(THING);
131b3ad0 3507 if (PL_lex_starts++) {
5db06880
NC
3508#ifdef PERL_MAD
3509 if (PL_madskills) {
cd81e915
NC
3510 if (PL_thistoken)
3511 sv_free(PL_thistoken);
6b29d1f5 3512 PL_thistoken = newSVpvs("");
5db06880
NC
3513 }
3514#endif
131b3ad0
DM
3515 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3516 if (!PL_lex_casemods && PL_lex_inpat)
3517 OPERATOR(',');
3518 else
3519 Aop(OP_CONCAT);
3520 }
79072805 3521 else {
3280af22 3522 PL_bufptr = s;
cea2e8a9 3523 return yylex();
79072805
LW
3524 }
3525 }
3526
cea2e8a9 3527 return yylex();
a0d0e21e 3528 case LEX_FORMLINE:
3280af22
NIS
3529 PL_lex_state = LEX_NORMAL;
3530 s = scan_formline(PL_bufptr);
3531 if (!PL_lex_formbrack)
a0d0e21e
LW
3532 goto rightbracket;
3533 OPERATOR(';');
79072805
LW
3534 }
3535
3280af22
NIS
3536 s = PL_bufptr;
3537 PL_oldoldbufptr = PL_oldbufptr;
3538 PL_oldbufptr = s;
463ee0b2
LW
3539
3540 retry:
5db06880 3541#ifdef PERL_MAD
cd81e915
NC
3542 if (PL_thistoken) {
3543 sv_free(PL_thistoken);
3544 PL_thistoken = 0;
5db06880 3545 }
cd81e915 3546 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3547#endif
378cc40b
LW
3548 switch (*s) {
3549 default:
7e2040f0 3550 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3551 goto keylookup;
cea2e8a9 3552 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3553 case 4:
3554 case 26:
3555 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3556 case 0:
5db06880
NC
3557#ifdef PERL_MAD
3558 if (PL_madskills)
cd81e915 3559 PL_faketokens = 0;
5db06880 3560#endif
3280af22
NIS
3561 if (!PL_rsfp) {
3562 PL_last_uni = 0;
3563 PL_last_lop = 0;
c5ee2135 3564 if (PL_lex_brackets) {
10edeb5d
JH
3565 yyerror((const char *)
3566 (PL_lex_formbrack
3567 ? "Format not terminated"
3568 : "Missing right curly or square bracket"));
c5ee2135 3569 }