This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlfunc.pod - unite gmtime and localtime
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a
PP
71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 301 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
c35e046a
AL
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 NOOP;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 501 char he_name[32] = "feature_";
6fca0082 502 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
664 PL_linestr = line;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 667 s = SvPV_const(PL_linestr, len);
6f27f9a7 668 if (!len || s[len-1] != ';') {
3280af22
NIS
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 671 sv_catpvs(PL_linestr, "\n;");
8990e307 672 }
3280af22
NIS
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 676 PL_last_lop = PL_last_uni = NULL;
3280af22 677 PL_rsfp = 0;
79072805 678}
a687059c 679
ffb4593c
NT
680/*
681 * Perl_lex_end
9cbb5ea2
GS
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
ffb4593c
NT
684 */
685
463ee0b2 686void
864dbfa3 687Perl_lex_end(pTHX)
463ee0b2 688{
97aff369 689 dVAR;
3280af22 690 PL_doextract = FALSE;
463ee0b2
LW
691}
692
ffb4593c
NT
693/*
694 * S_incline
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
57843af0 697 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 698 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_incline(pTHX_ char *s)
463ee0b2 705{
97aff369 706 dVAR;
463ee0b2
LW
707 char *t;
708 char *n;
73659bf1 709 char *e;
463ee0b2 710 char ch;
463ee0b2 711
57843af0 712 CopLINE_inc(PL_curcop);
463ee0b2
LW
713 if (*s++ != '#')
714 return;
d4c19fe8
AL
715 while (SPACE_OR_TAB(*s))
716 s++;
73659bf1
GS
717 if (strnEQ(s, "line", 4))
718 s += 4;
719 else
720 return;
084592ab 721 if (SPACE_OR_TAB(*s))
73659bf1 722 s++;
4e553d73 723 else
73659bf1 724 return;
d4c19fe8
AL
725 while (SPACE_OR_TAB(*s))
726 s++;
463ee0b2
LW
727 if (!isDIGIT(*s))
728 return;
d4c19fe8 729
463ee0b2
LW
730 n = s;
731 while (isDIGIT(*s))
732 s++;
bf4acbe4 733 while (SPACE_OR_TAB(*s))
463ee0b2 734 s++;
73659bf1 735 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 736 s++;
73659bf1
GS
737 e = t + 1;
738 }
463ee0b2 739 else {
c35e046a
AL
740 t = s;
741 while (!isSPACE(*t))
742 t++;
73659bf1 743 e = t;
463ee0b2 744 }
bf4acbe4 745 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
746 e++;
747 if (*e != '\n' && *e != '\0')
748 return; /* false alarm */
749
463ee0b2
LW
750 ch = *t;
751 *t = '\0';
f4dd75d9 752 if (t - s > 0) {
8a5ee598 753#ifndef USE_ITHREADS
c4420975 754 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
755 STRLEN tmplen = cf ? strlen(cf) : 0;
756 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
757 /* must copy *{"::_<(eval N)[oldfilename:L]"}
758 * to *{"::_<newfilename"} */
759 char smallbuf[256], smallbuf2[256];
760 char *tmpbuf, *tmpbuf2;
8a5ee598 761 GV **gvp, *gv2;
e66cf94c
RGS
762 STRLEN tmplen2 = strlen(s);
763 if (tmplen + 3 < sizeof smallbuf)
764 tmpbuf = smallbuf;
765 else
766 Newx(tmpbuf, tmplen + 3, char);
767 if (tmplen2 + 3 < sizeof smallbuf2)
768 tmpbuf2 = smallbuf2;
769 else
770 Newx(tmpbuf2, tmplen2 + 3, char);
771 tmpbuf[0] = tmpbuf2[0] = '_';
772 tmpbuf[1] = tmpbuf2[1] = '<';
773 memcpy(tmpbuf + 2, cf, ++tmplen);
774 memcpy(tmpbuf2 + 2, s, ++tmplen2);
775 ++tmplen; ++tmplen2;
8a5ee598
RGS
776 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
777 if (gvp) {
778 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
779 if (!isGV(gv2))
780 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
781 /* adjust ${"::_<newfilename"} to store the new file name */
782 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
783 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
784 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
785 }
e66cf94c
RGS
786 if (tmpbuf != smallbuf) Safefree(tmpbuf);
787 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
788 }
8a5ee598 789#endif
05ec9bb3 790 CopFILE_free(PL_curcop);
57843af0 791 CopFILE_set(PL_curcop, s);
f4dd75d9 792 }
463ee0b2 793 *t = ch;
57843af0 794 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
795}
796
29595ff2 797#ifdef PERL_MAD
cd81e915 798/* skip space before PL_thistoken */
29595ff2
NC
799
800STATIC char *
801S_skipspace0(pTHX_ register char *s)
802{
803 s = skipspace(s);
804 if (!PL_madskills)
805 return s;
cd81e915
NC
806 if (PL_skipwhite) {
807 if (!PL_thiswhite)
808 PL_thiswhite = newSVpvn("",0);
809 sv_catsv(PL_thiswhite, PL_skipwhite);
810 sv_free(PL_skipwhite);
811 PL_skipwhite = 0;
812 }
813 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
814 return s;
815}
816
cd81e915 817/* skip space after PL_thistoken */
29595ff2
NC
818
819STATIC char *
820S_skipspace1(pTHX_ register char *s)
821{
d4c19fe8 822 const char *start = s;
29595ff2
NC
823 I32 startoff = start - SvPVX(PL_linestr);
824
825 s = skipspace(s);
826 if (!PL_madskills)
827 return s;
828 start = SvPVX(PL_linestr) + startoff;
cd81e915 829 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 830 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
831 PL_thistoken = newSVpvn(tstart, start - tstart);
832 }
833 PL_realtokenstart = -1;
834 if (PL_skipwhite) {
835 if (!PL_nextwhite)
836 PL_nextwhite = newSVpvn("",0);
837 sv_catsv(PL_nextwhite, PL_skipwhite);
838 sv_free(PL_skipwhite);
839 PL_skipwhite = 0;
29595ff2
NC
840 }
841 return s;
842}
843
844STATIC char *
845S_skipspace2(pTHX_ register char *s, SV **svp)
846{
c35e046a
AL
847 char *start;
848 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
849 const I32 startoff = s - SvPVX(PL_linestr);
850
29595ff2
NC
851 s = skipspace(s);
852 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
853 if (!PL_madskills || !svp)
854 return s;
855 start = SvPVX(PL_linestr) + startoff;
cd81e915 856 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 857 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
858 PL_thistoken = newSVpvn(tstart, start - tstart);
859 PL_realtokenstart = -1;
29595ff2 860 }
cd81e915 861 if (PL_skipwhite) {
29595ff2
NC
862 if (!*svp)
863 *svp = newSVpvn("",0);
cd81e915
NC
864 sv_setsv(*svp, PL_skipwhite);
865 sv_free(PL_skipwhite);
866 PL_skipwhite = 0;
29595ff2
NC
867 }
868
869 return s;
870}
871#endif
872
ffb4593c
NT
873/*
874 * S_skipspace
875 * Called to gobble the appropriate amount and type of whitespace.
876 * Skips comments as well.
877 */
878
76e3520e 879STATIC char *
cea2e8a9 880S_skipspace(pTHX_ register char *s)
a687059c 881{
97aff369 882 dVAR;
5db06880
NC
883#ifdef PERL_MAD
884 int curoff;
885 int startoff = s - SvPVX(PL_linestr);
886
cd81e915
NC
887 if (PL_skipwhite) {
888 sv_free(PL_skipwhite);
889 PL_skipwhite = 0;
5db06880
NC
890 }
891#endif
892
3280af22 893 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 894 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 895 s++;
5db06880
NC
896#ifdef PERL_MAD
897 goto done;
898#else
463ee0b2 899 return s;
5db06880 900#endif
463ee0b2
LW
901 }
902 for (;;) {
fd049845 903 STRLEN prevlen;
09bef843 904 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 905 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
906 while (s < PL_bufend && isSPACE(*s)) {
907 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
908 incline(s);
909 }
ffb4593c
NT
910
911 /* comment */
3280af22
NIS
912 if (s < PL_bufend && *s == '#') {
913 while (s < PL_bufend && *s != '\n')
463ee0b2 914 s++;
60e6418e 915 if (s < PL_bufend) {
463ee0b2 916 s++;
60e6418e
GS
917 if (PL_in_eval && !PL_rsfp) {
918 incline(s);
919 continue;
920 }
921 }
463ee0b2 922 }
ffb4593c
NT
923
924 /* only continue to recharge the buffer if we're at the end
925 * of the buffer, we're not reading from a source filter, and
926 * we're in normal lexing mode
927 */
09bef843
SB
928 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
929 PL_lex_state == LEX_FORMLINE)
5db06880
NC
930#ifdef PERL_MAD
931 goto done;
932#else
463ee0b2 933 return s;
5db06880 934#endif
ffb4593c
NT
935
936 /* try to recharge the buffer */
5db06880
NC
937#ifdef PERL_MAD
938 curoff = s - SvPVX(PL_linestr);
939#endif
940
9cbb5ea2 941 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 942 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 943 {
5db06880
NC
944#ifdef PERL_MAD
945 if (PL_madskills && curoff != startoff) {
cd81e915
NC
946 if (!PL_skipwhite)
947 PL_skipwhite = newSVpvn("",0);
948 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
949 curoff - startoff);
950 }
951
952 /* mustn't throw out old stuff yet if madpropping */
953 SvCUR(PL_linestr) = curoff;
954 s = SvPVX(PL_linestr) + curoff;
955 *s = 0;
956 if (curoff && s[-1] == '\n')
957 s[-1] = ' ';
958#endif
959
9cbb5ea2 960 /* end of file. Add on the -p or -n magic */
cd81e915 961 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 962 if (PL_minus_p) {
5db06880
NC
963#ifdef PERL_MAD
964 sv_catpv(PL_linestr,
965 ";}continue{print or die qq(-p destination: $!\\n);}");
966#else
01a19ab0
NC
967 sv_setpv(PL_linestr,
968 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 969#endif
3280af22 970 PL_minus_n = PL_minus_p = 0;
a0d0e21e 971 }
01a19ab0 972 else if (PL_minus_n) {
5db06880
NC
973#ifdef PERL_MAD
974 sv_catpvn(PL_linestr, ";}", 2);
975#else
01a19ab0 976 sv_setpvn(PL_linestr, ";}", 2);
5db06880 977#endif
01a19ab0
NC
978 PL_minus_n = 0;
979 }
a0d0e21e 980 else
5db06880
NC
981#ifdef PERL_MAD
982 sv_catpvn(PL_linestr,";", 1);
983#else
4147a61b 984 sv_setpvn(PL_linestr,";", 1);
5db06880 985#endif
ffb4593c
NT
986
987 /* reset variables for next time we lex */
9cbb5ea2 988 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
989 = SvPVX(PL_linestr)
990#ifdef PERL_MAD
991 + curoff
992#endif
993 ;
3280af22 994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 995 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
996
997 /* Close the filehandle. Could be from -P preprocessor,
998 * STDIN, or a regular file. If we were reading code from
999 * STDIN (because the commandline held no -e or filename)
1000 * then we don't close it, we reset it so the code can
1001 * read from STDIN too.
1002 */
1003
3280af22
NIS
1004 if (PL_preprocess && !PL_in_eval)
1005 (void)PerlProc_pclose(PL_rsfp);
1006 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1007 PerlIO_clearerr(PL_rsfp);
8990e307 1008 else
3280af22 1009 (void)PerlIO_close(PL_rsfp);
4608196e 1010 PL_rsfp = NULL;
463ee0b2
LW
1011 return s;
1012 }
ffb4593c
NT
1013
1014 /* not at end of file, so we only read another line */
09bef843
SB
1015 /* make corresponding updates to old pointers, for yyerror() */
1016 oldprevlen = PL_oldbufptr - PL_bufend;
1017 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1018 if (PL_last_uni)
1019 oldunilen = PL_last_uni - PL_bufend;
1020 if (PL_last_lop)
1021 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1022 PL_linestart = PL_bufptr = s + prevlen;
1023 PL_bufend = s + SvCUR(PL_linestr);
1024 s = PL_bufptr;
09bef843
SB
1025 PL_oldbufptr = s + oldprevlen;
1026 PL_oldoldbufptr = s + oldoldprevlen;
1027 if (PL_last_uni)
1028 PL_last_uni = s + oldunilen;
1029 if (PL_last_lop)
1030 PL_last_lop = s + oldloplen;
a0d0e21e 1031 incline(s);
ffb4593c
NT
1032
1033 /* debugger active and we're not compiling the debugger code,
1034 * so store the line into the debugger's array of lines
1035 */
3280af22 1036 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1037 SV * const sv = newSV(0);
8990e307
LW
1038
1039 sv_upgrade(sv, SVt_PVMG);
3280af22 1040 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1041 (void)SvIOK_on(sv);
45977657 1042 SvIV_set(sv, 0);
36c7798d 1043 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1044 }
463ee0b2 1045 }
5db06880
NC
1046
1047#ifdef PERL_MAD
1048 done:
1049 if (PL_madskills) {
cd81e915
NC
1050 if (!PL_skipwhite)
1051 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1052 curoff = s - SvPVX(PL_linestr);
1053 if (curoff - startoff)
cd81e915 1054 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1055 curoff - startoff);
1056 }
1057 return s;
1058#endif
a687059c 1059}
378cc40b 1060
ffb4593c
NT
1061/*
1062 * S_check_uni
1063 * Check the unary operators to ensure there's no ambiguity in how they're
1064 * used. An ambiguous piece of code would be:
1065 * rand + 5
1066 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1067 * the +5 is its argument.
1068 */
1069
76e3520e 1070STATIC void
cea2e8a9 1071S_check_uni(pTHX)
ba106d47 1072{
97aff369 1073 dVAR;
d4c19fe8
AL
1074 const char *s;
1075 const char *t;
2f3197b3 1076
3280af22 1077 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1078 return;
3280af22
NIS
1079 while (isSPACE(*PL_last_uni))
1080 PL_last_uni++;
c35e046a
AL
1081 s = PL_last_uni;
1082 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1083 s++;
3280af22 1084 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1085 return;
6136c704 1086
0453d815 1087 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1088 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1089 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1090 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1091 }
2f3197b3
LW
1092}
1093
ffb4593c
NT
1094/*
1095 * LOP : macro to build a list operator. Its behaviour has been replaced
1096 * with a subroutine, S_lop() for which LOP is just another name.
1097 */
1098
a0d0e21e
LW
1099#define LOP(f,x) return lop(f,x,s)
1100
ffb4593c
NT
1101/*
1102 * S_lop
1103 * Build a list operator (or something that might be one). The rules:
1104 * - if we have a next token, then it's a list operator [why?]
1105 * - if the next thing is an opening paren, then it's a function
1106 * - else it's a list operator
1107 */
1108
76e3520e 1109STATIC I32
a0be28da 1110S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1111{
97aff369 1112 dVAR;
79072805 1113 yylval.ival = f;
35c8bce7 1114 CLINE;
3280af22
NIS
1115 PL_expect = x;
1116 PL_bufptr = s;
1117 PL_last_lop = PL_oldbufptr;
eb160463 1118 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1119#ifdef PERL_MAD
1120 if (PL_lasttoke)
1121 return REPORT(LSTOP);
1122#else
3280af22 1123 if (PL_nexttoke)
bbf60fe6 1124 return REPORT(LSTOP);
5db06880 1125#endif
79072805 1126 if (*s == '(')
bbf60fe6 1127 return REPORT(FUNC);
29595ff2 1128 s = PEEKSPACE(s);
79072805 1129 if (*s == '(')
bbf60fe6 1130 return REPORT(FUNC);
79072805 1131 else
bbf60fe6 1132 return REPORT(LSTOP);
79072805
LW
1133}
1134
5db06880
NC
1135#ifdef PERL_MAD
1136 /*
1137 * S_start_force
1138 * Sets up for an eventual force_next(). start_force(0) basically does
1139 * an unshift, while start_force(-1) does a push. yylex removes items
1140 * on the "pop" end.
1141 */
1142
1143STATIC void
1144S_start_force(pTHX_ int where)
1145{
1146 int i;
1147
cd81e915 1148 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1149 where = PL_lasttoke;
cd81e915
NC
1150 assert(PL_curforce < 0 || PL_curforce == where);
1151 if (PL_curforce != where) {
5db06880
NC
1152 for (i = PL_lasttoke; i > where; --i) {
1153 PL_nexttoke[i] = PL_nexttoke[i-1];
1154 }
1155 PL_lasttoke++;
1156 }
cd81e915 1157 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1158 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1159 PL_curforce = where;
1160 if (PL_nextwhite) {
5db06880
NC
1161 if (PL_madskills)
1162 curmad('^', newSVpvn("",0));
cd81e915 1163 CURMAD('_', PL_nextwhite);
5db06880
NC
1164 }
1165}
1166
1167STATIC void
1168S_curmad(pTHX_ char slot, SV *sv)
1169{
1170 MADPROP **where;
1171
1172 if (!sv)
1173 return;
cd81e915
NC
1174 if (PL_curforce < 0)
1175 where = &PL_thismad;
5db06880 1176 else
cd81e915 1177 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1178
cd81e915 1179 if (PL_faketokens)
5db06880
NC
1180 sv_setpvn(sv, "", 0);
1181 else {
1182 if (!IN_BYTES) {
1183 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1184 SvUTF8_on(sv);
1185 else if (PL_encoding) {
1186 sv_recode_to_utf8(sv, PL_encoding);
1187 }
1188 }
1189 }
1190
1191 /* keep a slot open for the head of the list? */
1192 if (slot != '_' && *where && (*where)->mad_key == '^') {
1193 (*where)->mad_key = slot;
1194 sv_free((*where)->mad_val);
1195 (*where)->mad_val = (void*)sv;
1196 }
1197 else
1198 addmad(newMADsv(slot, sv), where, 0);
1199}
1200#else
b3f24c00
MHM
1201# define start_force(where) NOOP
1202# define curmad(slot, sv) NOOP
5db06880
NC
1203#endif
1204
ffb4593c
NT
1205/*
1206 * S_force_next
9cbb5ea2 1207 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1208 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1209 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1210 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1211 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1212 */
1213
4e553d73 1214STATIC void
cea2e8a9 1215S_force_next(pTHX_ I32 type)
79072805 1216{
97aff369 1217 dVAR;
5db06880 1218#ifdef PERL_MAD
cd81e915 1219 if (PL_curforce < 0)
5db06880 1220 start_force(PL_lasttoke);
cd81e915 1221 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1222 if (PL_lex_state != LEX_KNOWNEXT)
1223 PL_lex_defer = PL_lex_state;
1224 PL_lex_state = LEX_KNOWNEXT;
1225 PL_lex_expect = PL_expect;
cd81e915 1226 PL_curforce = -1;
5db06880 1227#else
3280af22
NIS
1228 PL_nexttype[PL_nexttoke] = type;
1229 PL_nexttoke++;
1230 if (PL_lex_state != LEX_KNOWNEXT) {
1231 PL_lex_defer = PL_lex_state;
1232 PL_lex_expect = PL_expect;
1233 PL_lex_state = LEX_KNOWNEXT;
79072805 1234 }
5db06880 1235#endif
79072805
LW
1236}
1237
d0a148a6
NC
1238STATIC SV *
1239S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1240{
97aff369 1241 dVAR;
9d4ba2ae 1242 SV * const sv = newSVpvn(start,len);
bfed75c6 1243 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1244 SvUTF8_on(sv);
1245 return sv;
1246}
1247
ffb4593c
NT
1248/*
1249 * S_force_word
1250 * When the lexer knows the next thing is a word (for instance, it has
1251 * just seen -> and it knows that the next char is a word char, then
1252 * it calls S_force_word to stick the next word into the PL_next lookahead.
1253 *
1254 * Arguments:
b1b65b59 1255 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1256 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1257 * int check_keyword : if true, Perl checks to make sure the word isn't
1258 * a keyword (do this if the word is a label, e.g. goto FOO)
1259 * int allow_pack : if true, : characters will also be allowed (require,
1260 * use, etc. do this)
9cbb5ea2 1261 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1262 */
1263
76e3520e 1264STATIC char *
cea2e8a9 1265S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1266{
97aff369 1267 dVAR;
463ee0b2
LW
1268 register char *s;
1269 STRLEN len;
4e553d73 1270
29595ff2 1271 start = SKIPSPACE1(start);
463ee0b2 1272 s = start;
7e2040f0 1273 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1274 (allow_pack && *s == ':') ||
15f0808c 1275 (allow_initial_tick && *s == '\'') )
a0d0e21e 1276 {
3280af22
NIS
1277 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1278 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1279 return start;
cd81e915 1280 start_force(PL_curforce);
5db06880
NC
1281 if (PL_madskills)
1282 curmad('X', newSVpvn(start,s-start));
463ee0b2 1283 if (token == METHOD) {
29595ff2 1284 s = SKIPSPACE1(s);
463ee0b2 1285 if (*s == '(')
3280af22 1286 PL_expect = XTERM;
463ee0b2 1287 else {
3280af22 1288 PL_expect = XOPERATOR;
463ee0b2 1289 }
79072805 1290 }
9ded7720 1291 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1292 = (OP*)newSVOP(OP_CONST,0,
1293 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1294 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1295 force_next(token);
1296 }
1297 return s;
1298}
1299
ffb4593c
NT
1300/*
1301 * S_force_ident
9cbb5ea2 1302 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1303 * text only contains the "foo" portion. The first argument is a pointer
1304 * to the "foo", and the second argument is the type symbol to prefix.
1305 * Forces the next token to be a "WORD".
9cbb5ea2 1306 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1307 */
1308
76e3520e 1309STATIC void
bfed75c6 1310S_force_ident(pTHX_ register const char *s, int kind)
79072805 1311{
97aff369 1312 dVAR;
c35e046a 1313 if (*s) {
90e5519e
NC
1314 const STRLEN len = strlen(s);
1315 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1316 start_force(PL_curforce);
9ded7720 1317 NEXTVAL_NEXTTOKE.opval = o;
79072805 1318 force_next(WORD);
748a9306 1319 if (kind) {
11343788 1320 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1321 /* XXX see note in pp_entereval() for why we forgo typo
1322 warnings if the symbol must be introduced in an eval.
1323 GSAR 96-10-12 */
90e5519e
NC
1324 gv_fetchpvn_flags(s, len,
1325 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1326 : GV_ADD,
1327 kind == '$' ? SVt_PV :
1328 kind == '@' ? SVt_PVAV :
1329 kind == '%' ? SVt_PVHV :
a0d0e21e 1330 SVt_PVGV
90e5519e 1331 );
748a9306 1332 }
79072805
LW
1333 }
1334}
1335
1571675a
GS
1336NV
1337Perl_str_to_version(pTHX_ SV *sv)
1338{
1339 NV retval = 0.0;
1340 NV nshift = 1.0;
1341 STRLEN len;
cfd0369c 1342 const char *start = SvPV_const(sv,len);
9d4ba2ae 1343 const char * const end = start + len;
504618e9 1344 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1345 while (start < end) {
ba210ebe 1346 STRLEN skip;
1571675a
GS
1347 UV n;
1348 if (utf)
9041c2e3 1349 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1350 else {
1351 n = *(U8*)start;
1352 skip = 1;
1353 }
1354 retval += ((NV)n)/nshift;
1355 start += skip;
1356 nshift *= 1000;
1357 }
1358 return retval;
1359}
1360
4e553d73 1361/*
ffb4593c
NT
1362 * S_force_version
1363 * Forces the next token to be a version number.
e759cc13
RGS
1364 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1365 * and if "guessing" is TRUE, then no new token is created (and the caller
1366 * must use an alternative parsing method).
ffb4593c
NT
1367 */
1368
76e3520e 1369STATIC char *
e759cc13 1370S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1371{
97aff369 1372 dVAR;
5f66b61c 1373 OP *version = NULL;
44dcb63b 1374 char *d;
5db06880
NC
1375#ifdef PERL_MAD
1376 I32 startoff = s - SvPVX(PL_linestr);
1377#endif
89bfa8cd 1378
29595ff2 1379 s = SKIPSPACE1(s);
89bfa8cd 1380
44dcb63b 1381 d = s;
dd629d5b 1382 if (*d == 'v')
44dcb63b 1383 d++;
44dcb63b 1384 if (isDIGIT(*d)) {
e759cc13
RGS
1385 while (isDIGIT(*d) || *d == '_' || *d == '.')
1386 d++;
5db06880
NC
1387#ifdef PERL_MAD
1388 if (PL_madskills) {
cd81e915 1389 start_force(PL_curforce);
5db06880
NC
1390 curmad('X', newSVpvn(s,d-s));
1391 }
1392#endif
9f3d182e 1393 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1394 SV *ver;
b73d6f50 1395 s = scan_num(s, &yylval);
89bfa8cd 1396 version = yylval.opval;
dd629d5b
GS
1397 ver = cSVOPx(version)->op_sv;
1398 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1399 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1400 SvNV_set(ver, str_to_version(ver));
1571675a 1401 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1402 }
89bfa8cd 1403 }
5db06880
NC
1404 else if (guessing) {
1405#ifdef PERL_MAD
1406 if (PL_madskills) {
cd81e915
NC
1407 sv_free(PL_nextwhite); /* let next token collect whitespace */
1408 PL_nextwhite = 0;
5db06880
NC
1409 s = SvPVX(PL_linestr) + startoff;
1410 }
1411#endif
e759cc13 1412 return s;
5db06880 1413 }
89bfa8cd
PP
1414 }
1415
5db06880
NC
1416#ifdef PERL_MAD
1417 if (PL_madskills && !version) {
cd81e915
NC
1418 sv_free(PL_nextwhite); /* let next token collect whitespace */
1419 PL_nextwhite = 0;
5db06880
NC
1420 s = SvPVX(PL_linestr) + startoff;
1421 }
1422#endif
89bfa8cd 1423 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1424 start_force(PL_curforce);
9ded7720 1425 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1426 force_next(WORD);
89bfa8cd 1427
e759cc13 1428 return s;
89bfa8cd
PP
1429}
1430
ffb4593c
NT
1431/*
1432 * S_tokeq
1433 * Tokenize a quoted string passed in as an SV. It finds the next
1434 * chunk, up to end of string or a backslash. It may make a new
1435 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1436 * turns \\ into \.
1437 */
1438
76e3520e 1439STATIC SV *
cea2e8a9 1440S_tokeq(pTHX_ SV *sv)
79072805 1441{
97aff369 1442 dVAR;
79072805
LW
1443 register char *s;
1444 register char *send;
1445 register char *d;
b3ac6de7
IZ
1446 STRLEN len = 0;
1447 SV *pv = sv;
79072805
LW
1448
1449 if (!SvLEN(sv))
b3ac6de7 1450 goto finish;
79072805 1451
a0d0e21e 1452 s = SvPV_force(sv, len);
21a311ee 1453 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1454 goto finish;
463ee0b2 1455 send = s + len;
79072805
LW
1456 while (s < send && *s != '\\')
1457 s++;
1458 if (s == send)
b3ac6de7 1459 goto finish;
79072805 1460 d = s;
be4731d2 1461 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1462 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1463 if (SvUTF8(sv))
1464 SvUTF8_on(pv);
1465 }
79072805
LW
1466 while (s < send) {
1467 if (*s == '\\') {
a0d0e21e 1468 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1469 s++; /* all that, just for this */
1470 }
1471 *d++ = *s++;
1472 }
1473 *d = '\0';
95a20fc0 1474 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1475 finish:
3280af22 1476 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1477 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1478 return sv;
1479}
1480
ffb4593c
NT
1481/*
1482 * Now come three functions related to double-quote context,
1483 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1484 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1485 * interact with PL_lex_state, and create fake ( ... ) argument lists
1486 * to handle functions and concatenation.
1487 * They assume that whoever calls them will be setting up a fake
1488 * join call, because each subthing puts a ',' after it. This lets
1489 * "lower \luPpEr"
1490 * become
1491 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1492 *
1493 * (I'm not sure whether the spurious commas at the end of lcfirst's
1494 * arguments and join's arguments are created or not).
1495 */
1496
1497/*
1498 * S_sublex_start
1499 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1500 *
1501 * Pattern matching will set PL_lex_op to the pattern-matching op to
1502 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1503 *
1504 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1505 *
1506 * Everything else becomes a FUNC.
1507 *
1508 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1509 * had an OP_CONST or OP_READLINE). This just sets us up for a
1510 * call to S_sublex_push().
1511 */
1512
76e3520e 1513STATIC I32
cea2e8a9 1514S_sublex_start(pTHX)
79072805 1515{
97aff369 1516 dVAR;
0d46e09a 1517 register const I32 op_type = yylval.ival;
79072805
LW
1518
1519 if (op_type == OP_NULL) {
3280af22 1520 yylval.opval = PL_lex_op;
5f66b61c 1521 PL_lex_op = NULL;
79072805
LW
1522 return THING;
1523 }
1524 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1525 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1526
1527 if (SvTYPE(sv) == SVt_PVIV) {
1528 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1529 STRLEN len;
96a5add6 1530 const char * const p = SvPV_const(sv, len);
f54cb97a 1531 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1532 if (SvUTF8(sv))
1533 SvUTF8_on(nsv);
b3ac6de7
IZ
1534 SvREFCNT_dec(sv);
1535 sv = nsv;
4e553d73 1536 }
b3ac6de7 1537 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1538 PL_lex_stuff = NULL;
6f33ba73
RGS
1539 /* Allow <FH> // "foo" */
1540 if (op_type == OP_READLINE)
1541 PL_expect = XTERMORDORDOR;
79072805
LW
1542 return THING;
1543 }
1544
3280af22
NIS
1545 PL_sublex_info.super_state = PL_lex_state;
1546 PL_sublex_info.sub_inwhat = op_type;
1547 PL_sublex_info.sub_op = PL_lex_op;
1548 PL_lex_state = LEX_INTERPPUSH;
55497cff 1549
3280af22
NIS
1550 PL_expect = XTERM;
1551 if (PL_lex_op) {
1552 yylval.opval = PL_lex_op;
5f66b61c 1553 PL_lex_op = NULL;
55497cff
PP
1554 return PMFUNC;
1555 }
1556 else
1557 return FUNC;
1558}
1559
ffb4593c
NT
1560/*
1561 * S_sublex_push
1562 * Create a new scope to save the lexing state. The scope will be
1563 * ended in S_sublex_done. Returns a '(', starting the function arguments
1564 * to the uc, lc, etc. found before.
1565 * Sets PL_lex_state to LEX_INTERPCONCAT.
1566 */
1567
76e3520e 1568STATIC I32
cea2e8a9 1569S_sublex_push(pTHX)
55497cff 1570{
27da23d5 1571 dVAR;
f46d017c 1572 ENTER;
55497cff 1573
3280af22
NIS
1574 PL_lex_state = PL_sublex_info.super_state;
1575 SAVEI32(PL_lex_dojoin);
1576 SAVEI32(PL_lex_brackets);
3280af22
NIS
1577 SAVEI32(PL_lex_casemods);
1578 SAVEI32(PL_lex_starts);
1579 SAVEI32(PL_lex_state);
7766f137 1580 SAVEVPTR(PL_lex_inpat);
3280af22 1581 SAVEI32(PL_lex_inwhat);
57843af0 1582 SAVECOPLINE(PL_curcop);
3280af22 1583 SAVEPPTR(PL_bufptr);
8452ff4b 1584 SAVEPPTR(PL_bufend);
3280af22
NIS
1585 SAVEPPTR(PL_oldbufptr);
1586 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1587 SAVEPPTR(PL_last_lop);
1588 SAVEPPTR(PL_last_uni);
3280af22
NIS
1589 SAVEPPTR(PL_linestart);
1590 SAVESPTR(PL_linestr);
8edd5f42
RGS
1591 SAVEGENERICPV(PL_lex_brackstack);
1592 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1593
1594 PL_linestr = PL_lex_stuff;
a0714e2c 1595 PL_lex_stuff = NULL;
3280af22 1596
9cbb5ea2
GS
1597 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1598 = SvPVX(PL_linestr);
3280af22 1599 PL_bufend += SvCUR(PL_linestr);
bd61b366 1600 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1601 SAVEFREESV(PL_linestr);
1602
1603 PL_lex_dojoin = FALSE;
1604 PL_lex_brackets = 0;
a02a5408
JC
1605 Newx(PL_lex_brackstack, 120, char);
1606 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1607 PL_lex_casemods = 0;
1608 *PL_lex_casestack = '\0';
1609 PL_lex_starts = 0;
1610 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1611 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1612
1613 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1614 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1615 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1616 else
5f66b61c 1617 PL_lex_inpat = NULL;
79072805 1618
55497cff 1619 return '(';
79072805
LW
1620}
1621
ffb4593c
NT
1622/*
1623 * S_sublex_done
1624 * Restores lexer state after a S_sublex_push.
1625 */
1626
76e3520e 1627STATIC I32
cea2e8a9 1628S_sublex_done(pTHX)
79072805 1629{
27da23d5 1630 dVAR;
3280af22 1631 if (!PL_lex_starts++) {
396482e1 1632 SV * const sv = newSVpvs("");
9aa983d2
JH
1633 if (SvUTF8(PL_linestr))
1634 SvUTF8_on(sv);
3280af22 1635 PL_expect = XOPERATOR;
9aa983d2 1636 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1637 return THING;
1638 }
1639
3280af22
NIS
1640 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1641 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1642 return yylex();
79072805
LW
1643 }
1644
ffb4593c 1645 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1646 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1647 PL_linestr = PL_lex_repl;
1648 PL_lex_inpat = 0;
1649 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1650 PL_bufend += SvCUR(PL_linestr);
bd61b366 1651 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1652 SAVEFREESV(PL_linestr);
1653 PL_lex_dojoin = FALSE;
1654 PL_lex_brackets = 0;
3280af22
NIS
1655 PL_lex_casemods = 0;
1656 *PL_lex_casestack = '\0';
1657 PL_lex_starts = 0;
25da4f38 1658 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1659 PL_lex_state = LEX_INTERPNORMAL;
1660 PL_lex_starts++;
e9fa98b2
HS
1661 /* we don't clear PL_lex_repl here, so that we can check later
1662 whether this is an evalled subst; that means we rely on the
1663 logic to ensure sublex_done() is called again only via the
1664 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1665 }
e9fa98b2 1666 else {
3280af22 1667 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1668 PL_lex_repl = NULL;
e9fa98b2 1669 }
79072805 1670 return ',';
ffed7fef
LW
1671 }
1672 else {
5db06880
NC
1673#ifdef PERL_MAD
1674 if (PL_madskills) {
cd81e915
NC
1675 if (PL_thiswhite) {
1676 if (!PL_endwhite)
1677 PL_endwhite = newSVpvn("",0);
1678 sv_catsv(PL_endwhite, PL_thiswhite);
1679 PL_thiswhite = 0;
1680 }
1681 if (PL_thistoken)
1682 sv_setpvn(PL_thistoken,"",0);
5db06880 1683 else
cd81e915 1684 PL_realtokenstart = -1;
5db06880
NC
1685 }
1686#endif
f46d017c 1687 LEAVE;
3280af22
NIS
1688 PL_bufend = SvPVX(PL_linestr);
1689 PL_bufend += SvCUR(PL_linestr);
1690 PL_expect = XOPERATOR;
09bef843 1691 PL_sublex_info.sub_inwhat = 0;
79072805 1692 return ')';
ffed7fef
LW
1693 }
1694}
1695
02aa26ce
NT
1696/*
1697 scan_const
1698
1699 Extracts a pattern, double-quoted string, or transliteration. This
1700 is terrifying code.
1701
94def140 1702 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1703 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1704 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1705
94def140
ST
1706 Returns a pointer to the character scanned up to. If this is
1707 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1708 successfully parsed), will leave an OP for the substring scanned
1709 in yylval. Caller must intuit reason for not parsing further
1710 by looking at the next characters herself.
1711
02aa26ce
NT
1712 In patterns:
1713 backslashes:
1714 double-quoted style: \r and \n
1715 regexp special ones: \D \s
94def140
ST
1716 constants: \x31
1717 backrefs: \1
02aa26ce
NT
1718 case and quoting: \U \Q \E
1719 stops on @ and $, but not for $ as tail anchor
1720
1721 In transliterations:
1722 characters are VERY literal, except for - not at the start or end
94def140
ST
1723 of the string, which indicates a range. If the range is in bytes,
1724 scan_const expands the range to the full set of intermediate
1725 characters. If the range is in utf8, the hyphen is replaced with
1726 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1727
1728 In double-quoted strings:
1729 backslashes:
1730 double-quoted style: \r and \n
94def140
ST
1731 constants: \x31
1732 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1733 case and quoting: \U \Q \E
1734 stops on @ and $
1735
1736 scan_const does *not* construct ops to handle interpolated strings.
1737 It stops processing as soon as it finds an embedded $ or @ variable
1738 and leaves it to the caller to work out what's going on.
1739
94def140
ST
1740 embedded arrays (whether in pattern or not) could be:
1741 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1742
1743 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1744
1745 $ in pattern could be $foo or could be tail anchor. Assumption:
1746 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1747 followed by one of "()| \r\n\t"
02aa26ce
NT
1748
1749 \1 (backreferences) are turned into $1
1750
1751 The structure of the code is
1752 while (there's a character to process) {
94def140
ST
1753 handle transliteration ranges
1754 skip regexp comments /(?#comment)/ and codes /(?{code})/
1755 skip #-initiated comments in //x patterns
1756 check for embedded arrays
02aa26ce
NT
1757 check for embedded scalars
1758 if (backslash) {
94def140
ST
1759 leave intact backslashes from leaveit (below)
1760 deprecate \1 in substitution replacements
02aa26ce
NT
1761 handle string-changing backslashes \l \U \Q \E, etc.
1762 switch (what was escaped) {
94def140
ST
1763 handle \- in a transliteration (becomes a literal -)
1764 handle \132 (octal characters)
1765 handle \x15 and \x{1234} (hex characters)
1766 handle \N{name} (named characters)
1767 handle \cV (control characters)
1768 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1769 } (end switch)
1770 } (end if backslash)
1771 } (end while character to read)
4e553d73 1772
02aa26ce
NT
1773*/
1774
76e3520e 1775STATIC char *
cea2e8a9 1776S_scan_const(pTHX_ char *start)
79072805 1777{
97aff369 1778 dVAR;
3280af22 1779 register char *send = PL_bufend; /* end of the constant */
561b68a9 1780 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1781 register char *s = start; /* start of the constant */
1782 register char *d = SvPVX(sv); /* destination for copies */
1783 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1784 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1785 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1786 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1787 UV uv;
4c3a8340
ST
1788#ifdef EBCDIC
1789 UV literal_endpoint = 0;
e294cc5d 1790 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1791#endif
012bcf8d 1792
d4c19fe8 1793 const char * const leaveit = /* set of acceptably-backslashed characters */
3280af22 1794 PL_lex_inpat
25f684f7 1795 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1796 : "";
79072805 1797
2b9d42f0
NIS
1798 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1799 /* If we are doing a trans and we know we want UTF8 set expectation */
1800 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1801 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1802 }
1803
1804
79072805 1805 while (s < send || dorange) {
02aa26ce 1806 /* get transliterations out of the way (they're most literal) */
3280af22 1807 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1808 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1809 if (dorange) {
1ba5c669
JH
1810 I32 i; /* current expanded character */
1811 I32 min; /* first character in range */
1812 I32 max; /* last character in range */
02aa26ce 1813
e294cc5d
JH
1814#ifdef EBCDIC
1815 UV uvmax = 0;
1816#endif
1817
1818 if (has_utf8
1819#ifdef EBCDIC
1820 && !native_range
1821#endif
1822 ) {
9d4ba2ae 1823 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1824 char *e = d++;
1825 while (e-- > c)
1826 *(e + 1) = *e;
25716404 1827 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1828 /* mark the range as done, and continue */
1829 dorange = FALSE;
1830 didrange = TRUE;
1831 continue;
1832 }
2b9d42f0 1833
95a20fc0 1834 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1835#ifdef EBCDIC
1836 SvGROW(sv,
1837 SvLEN(sv) + (has_utf8 ?
1838 (512 - UTF_CONTINUATION_MARK +
1839 UNISKIP(0x100))
1840 : 256));
1841 /* How many two-byte within 0..255: 128 in UTF-8,
1842 * 96 in UTF-8-mod. */
1843#else
9cbb5ea2 1844 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1845#endif
9cbb5ea2 1846 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1847#ifdef EBCDIC
1848 if (has_utf8) {
1849 int j;
1850 for (j = 0; j <= 1; j++) {
1851 char * const c = (char*)utf8_hop((U8*)d, -1);
1852 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1853 if (j)
1854 min = (U8)uv;
1855 else if (uv < 256)
1856 max = (U8)uv;
1857 else {
1858 max = (U8)0xff; /* only to \xff */
1859 uvmax = uv; /* \x{100} to uvmax */
1860 }
1861 d = c; /* eat endpoint chars */
1862 }
1863 }
1864 else {
1865#endif
1866 d -= 2; /* eat the first char and the - */
1867 min = (U8)*d; /* first char in range */
1868 max = (U8)d[1]; /* last char in range */
1869#ifdef EBCDIC
1870 }
1871#endif
8ada0baa 1872
c2e66d9e 1873 if (min > max) {
01ec43d0 1874 Perl_croak(aTHX_
d1573ac7 1875 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1876 (char)min, (char)max);
c2e66d9e
GS
1877 }
1878
c7f1f016 1879#ifdef EBCDIC
4c3a8340
ST
1880 if (literal_endpoint == 2 &&
1881 ((isLOWER(min) && isLOWER(max)) ||
1882 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1883 if (isLOWER(min)) {
1884 for (i = min; i <= max; i++)
1885 if (isLOWER(i))
db42d148 1886 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1887 } else {
1888 for (i = min; i <= max; i++)
1889 if (isUPPER(i))
db42d148 1890 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1891 }
1892 }
1893 else
1894#endif
1895 for (i = min; i <= max; i++)
e294cc5d
JH
1896#ifdef EBCDIC
1897 if (has_utf8) {
1898 const U8 ch = (U8)NATIVE_TO_UTF(i);
1899 if (UNI_IS_INVARIANT(ch))
1900 *d++ = (U8)i;
1901 else {
1902 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1903 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1904 }
1905 }
1906 else
1907#endif
1908 *d++ = (char)i;
1909
1910#ifdef EBCDIC
1911 if (uvmax) {
1912 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1913 if (uvmax > 0x101)
1914 *d++ = (char)UTF_TO_NATIVE(0xff);
1915 if (uvmax > 0x100)
1916 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1917 }
1918#endif
02aa26ce
NT
1919
1920 /* mark the range as done, and continue */
79072805 1921 dorange = FALSE;
01ec43d0 1922 didrange = TRUE;
4c3a8340
ST
1923#ifdef EBCDIC
1924 literal_endpoint = 0;
1925#endif
79072805 1926 continue;
4e553d73 1927 }
02aa26ce
NT
1928
1929 /* range begins (ignore - as first or last char) */
79072805 1930 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1931 if (didrange) {
1fafa243 1932 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1933 }
e294cc5d
JH
1934 if (has_utf8
1935#ifdef EBCDIC
1936 && !native_range
1937#endif
1938 ) {
25716404 1939 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1940 s++;
1941 continue;
1942 }
79072805
LW
1943 dorange = TRUE;
1944 s++;
01ec43d0
GS
1945 }
1946 else {
1947 didrange = FALSE;
4c3a8340
ST
1948#ifdef EBCDIC
1949 literal_endpoint = 0;
e294cc5d 1950 native_range = TRUE;
4c3a8340 1951#endif
01ec43d0 1952 }
79072805 1953 }
02aa26ce
NT
1954
1955 /* if we get here, we're not doing a transliteration */
1956
0f5d15d6
IZ
1957 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1958 except for the last char, which will be done separately. */
3280af22 1959 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1960 if (s[2] == '#') {
e994fd66 1961 while (s+1 < send && *s != ')')
db42d148 1962 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1963 }
1964 else if (s[2] == '{' /* This should match regcomp.c */
1965 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1966 {
cc6b7395 1967 I32 count = 1;
0f5d15d6 1968 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1969 char c;
1970
d9f97599
GS
1971 while (count && (c = *regparse)) {
1972 if (c == '\\' && regparse[1])
1973 regparse++;
4e553d73 1974 else if (c == '{')
cc6b7395 1975 count++;
4e553d73 1976 else if (c == '}')
cc6b7395 1977 count--;
d9f97599 1978 regparse++;
cc6b7395 1979 }
e994fd66 1980 if (*regparse != ')')
5bdf89e7 1981 regparse--; /* Leave one char for continuation. */
0f5d15d6 1982 while (s < regparse)
db42d148 1983 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1984 }
748a9306 1985 }
02aa26ce
NT
1986
1987 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1988 else if (*s == '#' && PL_lex_inpat &&
1989 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1990 while (s+1 < send && *s != '\n')
db42d148 1991 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1992 }
02aa26ce 1993
5d1d4326 1994 /* check for embedded arrays
da6eedaa 1995 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1996 */
1749ea0d
ST
1997 else if (*s == '@' && s[1]) {
1998 if (isALNUM_lazy_if(s+1,UTF))
1999 break;
2000 if (strchr(":'{$", s[1]))
2001 break;
2002 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2003 break; /* in regexp, neither @+ nor @- are interpolated */
2004 }
02aa26ce
NT
2005
2006 /* check for embedded scalars. only stop if we're sure it's a
2007 variable.
2008 */
79072805 2009 else if (*s == '$') {
3280af22 2010 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2011 break;
6002328a 2012 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2013 break; /* in regexp, $ might be tail anchor */
2014 }
02aa26ce 2015
2b9d42f0
NIS
2016 /* End of else if chain - OP_TRANS rejoin rest */
2017
02aa26ce 2018 /* backslashes */
79072805
LW
2019 if (*s == '\\' && s+1 < send) {
2020 s++;
02aa26ce
NT
2021
2022 /* some backslashes we leave behind */
c9f97d15 2023 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
2024 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2025 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
2026 continue;
2027 }
02aa26ce
NT
2028
2029 /* deprecate \1 in strings and substitution replacements */
3280af22 2030 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2031 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2032 {
599cee73 2033 if (ckWARN(WARN_SYNTAX))
9014280d 2034 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2035 *--s = '$';
2036 break;
2037 }
02aa26ce
NT
2038
2039 /* string-change backslash escapes */
3280af22 2040 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2041 --s;
2042 break;
2043 }
02aa26ce
NT
2044
2045 /* if we get here, it's either a quoted -, or a digit */
79072805 2046 switch (*s) {
02aa26ce
NT
2047
2048 /* quoted - in transliterations */
79072805 2049 case '-':
3280af22 2050 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2051 *d++ = *s++;
2052 continue;
2053 }
2054 /* FALL THROUGH */
2055 default:
11b8faa4 2056 {
86f97054 2057 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2058 ckWARN(WARN_MISC))
9014280d 2059 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2060 "Unrecognized escape \\%c passed through",
2061 *s);
11b8faa4 2062 /* default action is to copy the quoted character */
f9a63242 2063 goto default_action;
11b8faa4 2064 }
02aa26ce
NT
2065
2066 /* \132 indicates an octal constant */
79072805
LW
2067 case '0': case '1': case '2': case '3':
2068 case '4': case '5': case '6': case '7':
ba210ebe 2069 {
53305cf1
NC
2070 I32 flags = 0;
2071 STRLEN len = 3;
2072 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2073 s += len;
2074 }
012bcf8d 2075 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2076
2077 /* \x24 indicates a hex constant */
79072805 2078 case 'x':
a0ed51b3
LW
2079 ++s;
2080 if (*s == '{') {
9d4ba2ae 2081 char* const e = strchr(s, '}');
a4c04bdc
NC
2082 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2083 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2084 STRLEN len;
355860ce 2085
53305cf1 2086 ++s;
adaeee49 2087 if (!e) {
a0ed51b3 2088 yyerror("Missing right brace on \\x{}");
355860ce 2089 continue;
ba210ebe 2090 }
53305cf1
NC
2091 len = e - s;
2092 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2093 s = e + 1;
a0ed51b3
LW
2094 }
2095 else {
ba210ebe 2096 {
53305cf1 2097 STRLEN len = 2;
a4c04bdc 2098 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2099 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2100 s += len;
2101 }
012bcf8d
GS
2102 }
2103
2104 NUM_ESCAPE_INSERT:
2105 /* Insert oct or hex escaped character.
301d3d20 2106 * There will always enough room in sv since such
db42d148 2107 * escapes will be longer than any UTF-8 sequence
301d3d20 2108 * they can end up as. */
ba7cea30 2109
c7f1f016
NIS
2110 /* We need to map to chars to ASCII before doing the tests
2111 to cover EBCDIC
2112 */
c4d5f83a 2113 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2114 if (!has_utf8 && uv > 255) {
301d3d20
JH
2115 /* Might need to recode whatever we have
2116 * accumulated so far if it contains any
2117 * hibit chars.
2118 *
2119 * (Can't we keep track of that and avoid
2120 * this rescan? --jhi)
012bcf8d 2121 */
c7f1f016 2122 int hicount = 0;
63cd0674
NIS
2123 U8 *c;
2124 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2125 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2126 hicount++;
db42d148 2127 }
012bcf8d 2128 }
63cd0674 2129 if (hicount) {
9d4ba2ae 2130 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2131 U8 *src, *dst;
2132 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2133 src = (U8 *)d - 1;
2134 dst = src+hicount;
2135 d += hicount;
cfd0369c 2136 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2137 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2138 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2139 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2140 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2141 }
2142 else {
63cd0674 2143 *dst-- = *src;
012bcf8d 2144 }
c7f1f016 2145 src--;
012bcf8d
GS
2146 }
2147 }
2148 }
2149
9aa983d2 2150 if (has_utf8 || uv > 255) {
9041c2e3 2151 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2152 has_utf8 = TRUE;
f9a63242
JH
2153 if (PL_lex_inwhat == OP_TRANS &&
2154 PL_sublex_info.sub_op) {
2155 PL_sublex_info.sub_op->op_private |=
2156 (PL_lex_repl ? OPpTRANS_FROM_UTF
2157 : OPpTRANS_TO_UTF);
f9a63242 2158 }
e294cc5d
JH
2159#ifdef EBCDIC
2160 if (uv > 255 && !dorange)
2161 native_range = FALSE;
2162#endif
012bcf8d 2163 }
a0ed51b3 2164 else {
012bcf8d 2165 *d++ = (char)uv;
a0ed51b3 2166 }
012bcf8d
GS
2167 }
2168 else {
c4d5f83a 2169 *d++ = (char) uv;
a0ed51b3 2170 }
79072805 2171 continue;
02aa26ce 2172
b239daa5 2173 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2174 case 'N':
55eda711 2175 ++s;
423cee85
JH
2176 if (*s == '{') {
2177 char* e = strchr(s, '}');
155aba94 2178 SV *res;
423cee85 2179 STRLEN len;
cfd0369c 2180 const char *str;
4e553d73 2181
423cee85 2182 if (!e) {
5777a3f7 2183 yyerror("Missing right brace on \\N{}");
423cee85
JH
2184 e = s - 1;
2185 goto cont_scan;
2186 }
dbc0d4f2
JH
2187 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2188 /* \N{U+...} */
2189 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2190 PERL_SCAN_DISALLOW_PREFIX;
2191 s += 3;
2192 len = e - s;
2193 uv = grok_hex(s, &len, &flags, NULL);
2194 s = e + 1;
2195 goto NUM_ESCAPE_INSERT;
2196 }
55eda711 2197 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2198 res = new_constant( NULL, 0, "charnames",
a0714e2c 2199 res, NULL, "\\N{...}" );
f9a63242
JH
2200 if (has_utf8)
2201 sv_utf8_upgrade(res);
cfd0369c 2202 str = SvPV_const(res,len);
1c47067b
JH
2203#ifdef EBCDIC_NEVER_MIND
2204 /* charnames uses pack U and that has been
2205 * recently changed to do the below uni->native
2206 * mapping, so this would be redundant (and wrong,
2207 * the code point would be doubly converted).
2208 * But leave this in just in case the pack U change
2209 * gets revoked, but the semantics is still
2210 * desireable for charnames. --jhi */
cddc7ef4 2211 {
cfd0369c 2212 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2213
2214 if (uv < 0x100) {
89ebb4a3 2215 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2216
2217 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2218 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2219 str = SvPV_const(res, len);
cddc7ef4
JH
2220 }
2221 }
2222#endif
89491803 2223 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2224 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2225 SvCUR_set(sv, d - ostart);
2226 SvPOK_on(sv);
e4f3eed8 2227 *d = '\0';
f08d6ad9 2228 sv_utf8_upgrade(sv);
d2f449dd 2229 /* this just broke our allocation above... */
eb160463 2230 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2231 d = SvPVX(sv) + SvCUR(sv);
89491803 2232 has_utf8 = TRUE;
f08d6ad9 2233 }
eb160463 2234 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2235 const char * const odest = SvPVX_const(sv);
423cee85 2236
8973db79 2237 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2238 d = SvPVX(sv) + (d - odest);
2239 }
e294cc5d
JH
2240#ifdef EBCDIC
2241 if (!dorange)
2242 native_range = FALSE; /* \N{} is guessed to be Unicode */
2243#endif
423cee85
JH
2244 Copy(str, d, len, char);
2245 d += len;
2246 SvREFCNT_dec(res);
2247 cont_scan:
2248 s = e + 1;
2249 }
2250 else
5777a3f7 2251 yyerror("Missing braces on \\N{}");
423cee85
JH
2252 continue;
2253
02aa26ce 2254 /* \c is a control character */
79072805
LW
2255 case 'c':
2256 s++;
961ce445 2257 if (s < send) {
ba210ebe 2258 U8 c = *s++;
c7f1f016
NIS
2259#ifdef EBCDIC
2260 if (isLOWER(c))
2261 c = toUPPER(c);
2262#endif
db42d148 2263 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2264 }
961ce445
RGS
2265 else {
2266 yyerror("Missing control char name in \\c");
2267 }
79072805 2268 continue;
02aa26ce
NT
2269
2270 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2271 case 'b':
db42d148 2272 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2273 break;
2274 case 'n':
db42d148 2275 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2276 break;
2277 case 'r':
db42d148 2278 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2279 break;
2280 case 'f':
db42d148 2281 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2282 break;
2283 case 't':
db42d148 2284 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2285 break;
34a3fe2a 2286 case 'e':
db42d148 2287 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2288 break;
2289 case 'a':
db42d148 2290 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2291 break;
02aa26ce
NT
2292 } /* end switch */
2293
79072805
LW
2294 s++;
2295 continue;
02aa26ce 2296 } /* end if (backslash) */
4c3a8340
ST
2297#ifdef EBCDIC
2298 else
2299 literal_endpoint++;
2300#endif
02aa26ce 2301
f9a63242 2302 default_action:
2b9d42f0
NIS
2303 /* If we started with encoded form, or already know we want it
2304 and then encode the next character */
2305 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2306 STRLEN len = 1;
5f66b61c
AL
2307 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2308 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2309 s += len;
2310 if (need > len) {
2311 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2312 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2313 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2314 }
5f66b61c 2315 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2316 has_utf8 = TRUE;
e294cc5d
JH
2317#ifdef EBCDIC
2318 if (uv > 255 && !dorange)
2319 native_range = FALSE;
2320#endif
2b9d42f0
NIS
2321 }
2322 else {
2323 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2324 }
02aa26ce
NT
2325 } /* while loop to process each character */
2326
2327 /* terminate the string and set up the sv */
79072805 2328 *d = '\0';
95a20fc0 2329 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2330 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2331 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2332
79072805 2333 SvPOK_on(sv);
9f4817db 2334 if (PL_encoding && !has_utf8) {
d0063567
DK
2335 sv_recode_to_utf8(sv, PL_encoding);
2336 if (SvUTF8(sv))
2337 has_utf8 = TRUE;
9f4817db 2338 }
2b9d42f0 2339 if (has_utf8) {
7e2040f0 2340 SvUTF8_on(sv);
2b9d42f0 2341 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2342 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2343 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2344 }
2345 }
79072805 2346
02aa26ce 2347 /* shrink the sv if we allocated more than we used */
79072805 2348 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2349 SvPV_shrink_to_cur(sv);
79072805 2350 }
02aa26ce 2351
9b599b2a 2352 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2353 if (s > PL_bufptr) {
2354 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2355 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2356 sv, NULL,
4e553d73 2357 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2358 ? "tr"
3280af22 2359 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2360 ? "s"
2361 : "qq")));
79072805 2362 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2363 } else
8990e307 2364 SvREFCNT_dec(sv);
79072805
LW
2365 return s;
2366}
2367
ffb4593c
NT
2368/* S_intuit_more
2369 * Returns TRUE if there's more to the expression (e.g., a subscript),
2370 * FALSE otherwise.
ffb4593c
NT
2371 *
2372 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2373 *
2374 * ->[ and ->{ return TRUE
2375 * { and [ outside a pattern are always subscripts, so return TRUE
2376 * if we're outside a pattern and it's not { or [, then return FALSE
2377 * if we're in a pattern and the first char is a {
2378 * {4,5} (any digits around the comma) returns FALSE
2379 * if we're in a pattern and the first char is a [
2380 * [] returns FALSE
2381 * [SOMETHING] has a funky algorithm to decide whether it's a
2382 * character class or not. It has to deal with things like
2383 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2384 * anything else returns TRUE
2385 */
2386
9cbb5ea2
GS
2387/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2388
76e3520e 2389STATIC int
cea2e8a9 2390S_intuit_more(pTHX_ register char *s)
79072805 2391{
97aff369 2392 dVAR;
3280af22 2393 if (PL_lex_brackets)
79072805
LW
2394 return TRUE;
2395 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2396 return TRUE;
2397 if (*s != '{' && *s != '[')
2398 return FALSE;
3280af22 2399 if (!PL_lex_inpat)
79072805
LW
2400 return TRUE;
2401
2402 /* In a pattern, so maybe we have {n,m}. */
2403 if (*s == '{') {
2404 s++;
2405 if (!isDIGIT(*s))
2406 return TRUE;
2407 while (isDIGIT(*s))
2408 s++;
2409 if (*s == ',')
2410 s++;
2411 while (isDIGIT(*s))
2412 s++;
2413 if (*s == '}')
2414 return FALSE;
2415 return TRUE;
2416
2417 }
2418
2419 /* On the other hand, maybe we have a character class */
2420
2421 s++;
2422 if (*s == ']' || *s == '^')
2423 return FALSE;
2424 else {
ffb4593c 2425 /* this is terrifying, and it works */
79072805
LW
2426 int weight = 2; /* let's weigh the evidence */
2427 char seen[256];
f27ffc4a 2428 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2429 const char * const send = strchr(s,']');
3280af22 2430 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2431
2432 if (!send) /* has to be an expression */
2433 return TRUE;
2434
2435 Zero(seen,256,char);
2436 if (*s == '$')
2437 weight -= 3;
2438 else if (isDIGIT(*s)) {
2439 if (s[1] != ']') {
2440 if (isDIGIT(s[1]) && s[2] == ']')
2441 weight -= 10;
2442 }
2443 else
2444 weight -= 100;
2445 }
2446 for (; s < send; s++) {
2447 last_un_char = un_char;
2448 un_char = (unsigned char)*s;
2449 switch (*s) {
2450 case '@':
2451 case '&':
2452 case '$':
2453 weight -= seen[un_char] * 10;
7e2040f0 2454 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2455 int len;
8903cb82 2456 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2457 len = (int)strlen(tmpbuf);
2458 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2459 weight -= 100;
2460 else
2461 weight -= 10;
2462 }
2463 else if (*s == '$' && s[1] &&
93a17b20
LW
2464 strchr("[#!%*<>()-=",s[1])) {
2465 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2466 weight -= 10;
2467 else
2468 weight -= 1;
2469 }
2470 break;
2471 case '\\':
2472 un_char = 254;
2473 if (s[1]) {
93a17b20 2474 if (strchr("wds]",s[1]))
79072805
LW
2475 weight += 100;
2476 else if (seen['\''] || seen['"'])
2477 weight += 1;
93a17b20 2478 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2479 weight += 40;
2480 else if (isDIGIT(s[1])) {
2481 weight += 40;
2482 while (s[1] && isDIGIT(s[1]))
2483 s++;
2484 }
2485 }
2486 else
2487 weight += 100;
2488 break;
2489 case '-':
2490 if (s[1] == '\\')
2491 weight += 50;
93a17b20 2492 if (strchr("aA01! ",last_un_char))
79072805 2493 weight += 30;
93a17b20 2494 if (strchr("zZ79~",s[1]))
79072805 2495 weight += 30;
f27ffc4a
GS
2496 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2497 weight -= 5; /* cope with negative subscript */
79072805
LW
2498 break;
2499 default:
3792a11b
NC
2500 if (!isALNUM(last_un_char)
2501 && !(last_un_char == '$' || last_un_char == '@'
2502 || last_un_char == '&')
2503 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2504 char *d = tmpbuf;
2505 while (isALPHA(*s))
2506 *d++ = *s++;
2507 *d = '\0';
2508 if (keyword(tmpbuf, d - tmpbuf))
2509 weight -= 150;
2510 }
2511 if (un_char == last_un_char + 1)
2512 weight += 5;
2513 weight -= seen[un_char];
2514 break;
2515 }
2516 seen[un_char]++;
2517 }
2518 if (weight >= 0) /* probably a character class */
2519 return FALSE;
2520 }
2521
2522 return TRUE;
2523}
ffed7fef 2524
ffb4593c
NT
2525/*
2526 * S_intuit_method
2527 *
2528 * Does all the checking to disambiguate
2529 * foo bar
2530 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2531 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2532 *
2533 * First argument is the stuff after the first token, e.g. "bar".
2534 *
2535 * Not a method if bar is a filehandle.
2536 * Not a method if foo is a subroutine prototyped to take a filehandle.
2537 * Not a method if it's really "Foo $bar"
2538 * Method if it's "foo $bar"
2539 * Not a method if it's really "print foo $bar"
2540 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2541 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2542 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2543 * =>
2544 */
2545
76e3520e 2546STATIC int
62d55b22 2547S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2548{
97aff369 2549 dVAR;
a0d0e21e 2550 char *s = start + (*start == '$');
3280af22 2551 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2552 STRLEN len;
2553 GV* indirgv;
5db06880
NC
2554#ifdef PERL_MAD
2555 int soff;
2556#endif
a0d0e21e
LW
2557
2558 if (gv) {
62d55b22 2559 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2560 return 0;
62d55b22
NC
2561 if (cv) {
2562 if (SvPOK(cv)) {
2563 const char *proto = SvPVX_const(cv);
2564 if (proto) {
2565 if (*proto == ';')
2566 proto++;
2567 if (*proto == '*')
2568 return 0;
2569 }
b6c543e3
IZ
2570 }
2571 } else
c35e046a 2572 gv = NULL;
a0d0e21e 2573 }
8903cb82 2574 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2575 /* start is the beginning of the possible filehandle/object,
2576 * and s is the end of it
2577 * tmpbuf is a copy of it
2578 */
2579
a0d0e21e 2580 if (*start == '$') {
3280af22 2581 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2582 return 0;
5db06880
NC
2583#ifdef PERL_MAD
2584 len = start - SvPVX(PL_linestr);
2585#endif
29595ff2 2586 s = PEEKSPACE(s);
5db06880
NC
2587#ifdef PERLMAD
2588 start = SvPVX(PL_linestr) + len;
2589#endif
3280af22
NIS
2590 PL_bufptr = start;
2591 PL_expect = XREF;
a0d0e21e
LW
2592 return *s == '(' ? FUNCMETH : METHOD;
2593 }
2594 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2595 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2596 len -= 2;
2597 tmpbuf[len] = '\0';
5db06880
NC
2598#ifdef PERL_MAD
2599 soff = s - SvPVX(PL_linestr);
2600#endif
c3e0f903
GS
2601 goto bare_package;
2602 }
90e5519e 2603 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2604 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2605 return 0;
2606 /* filehandle or package name makes it a method */
89bfa8cd 2607 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2608#ifdef PERL_MAD
2609 soff = s - SvPVX(PL_linestr);
2610#endif
29595ff2 2611 s = PEEKSPACE(s);
3280af22 2612 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2613 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2614 bare_package:
cd81e915 2615 start_force(PL_curforce);
9ded7720 2616 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2617 newSVpvn(tmpbuf,len));
9ded7720 2618 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2619 if (PL_madskills)
2620 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2621 PL_expect = XTERM;
a0d0e21e 2622 force_next(WORD);
3280af22 2623 PL_bufptr = s;
5db06880
NC
2624#ifdef PERL_MAD
2625 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2626#endif
a0d0e21e
LW
2627 return *s == '(' ? FUNCMETH : METHOD;
2628 }
2629 }
2630 return 0;
2631}
2632
ffb4593c
NT
2633/*
2634 * S_incl_perldb
2635 * Return a string of Perl code to load the debugger. If PERL5DB
2636 * is set, it will return the contents of that, otherwise a
2637 * compile-time require of perl5db.pl.
2638 */
2639
bfed75c6 2640STATIC const char*
cea2e8a9 2641S_incl_perldb(pTHX)
a0d0e21e 2642{
97aff369 2643 dVAR;
3280af22 2644 if (PL_perldb) {
9d4ba2ae 2645 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2646
2647 if (pdb)
2648 return pdb;
93189314 2649 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2650 return "BEGIN { require 'perl5db.pl' }";
2651 }
2652 return "";
2653}
2654
2655
16d20bd9 2656/* Encoded script support. filter_add() effectively inserts a
4e553d73 2657 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2658 * Note that the filter function only applies to the current source file
2659 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2660 *
2661 * The datasv parameter (which may be NULL) can be used to pass
2662 * private data to this instance of the filter. The filter function
2663 * can recover the SV using the FILTER_DATA macro and use it to
2664 * store private buffers and state information.
2665 *
2666 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2667 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2668 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2669 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2670 * private use must be set using malloc'd pointers.
2671 */
16d20bd9
AD
2672
2673SV *
864dbfa3 2674Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2675{
97aff369 2676 dVAR;
f4c556ac 2677 if (!funcp)
a0714e2c 2678 return NULL;
f4c556ac 2679
3280af22
NIS
2680 if (!PL_rsfp_filters)
2681 PL_rsfp_filters = newAV();
16d20bd9 2682 if (!datasv)
561b68a9 2683 datasv = newSV(0);
862a34c6 2684 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2685 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2686 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2687 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2688 FPTR2DPTR(void *, IoANY(datasv)),
2689 SvPV_nolen(datasv)));
3280af22
NIS
2690 av_unshift(PL_rsfp_filters, 1);
2691 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2692 return(datasv);
2693}
4e553d73 2694
16d20bd9
AD
2695
2696/* Delete most recently added instance of this filter function. */
a0d0e21e 2697void
864dbfa3 2698Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2699{
97aff369 2700 dVAR;
e0c19803 2701 SV *datasv;
24801a4b 2702
33073adb 2703#ifdef DEBUGGING
55662e27
JH
2704 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2705 FPTR2DPTR(void*, funcp)));
33073adb 2706#endif
3280af22 2707 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2708 return;
2709 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2710 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2711 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2712 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2713 IoANY(datasv) = (void *)NULL;
3280af22 2714 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2715
16d20bd9
AD
2716 return;
2717 }
2718 /* we need to search for the correct entry and clear it */
cea2e8a9 2719 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2720}
2721
2722
1de9afcd
RGS
2723/* Invoke the idxth filter function for the current rsfp. */
2724/* maxlen 0 = read one text line */
16d20bd9 2725I32
864dbfa3 2726Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2727{
97aff369 2728 dVAR;
16d20bd9
AD
2729 filter_t funcp;
2730 SV *datasv = NULL;
f482118e
NC
2731 /* This API is bad. It should have been using unsigned int for maxlen.
2732 Not sure if we want to change the API, but if not we should sanity
2733 check the value here. */
39cd7a59
NC
2734 const unsigned int correct_length
2735 = maxlen < 0 ?
2736#ifdef PERL_MICRO
2737 0x7FFFFFFF
2738#else
2739 INT_MAX
2740#endif
2741 : maxlen;
e50aee73 2742
3280af22 2743 if (!PL_rsfp_filters)
16d20bd9 2744 return -1;
1de9afcd 2745 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2746 /* Provide a default input filter to make life easy. */
2747 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2748 DEBUG_P(PerlIO_printf(Perl_debug_log,
2749 "filter_read %d: from rsfp\n", idx));
f482118e 2750 if (correct_length) {
16d20bd9
AD
2751 /* Want a block */
2752 int len ;
f54cb97a 2753 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2754
2755 /* ensure buf_sv is large enough */
f482118e
NC
2756 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2757 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2758 correct_length)) <= 0) {
3280af22 2759 if (PerlIO_error(PL_rsfp))
37120919
AD
2760 return -1; /* error */
2761 else
2762 return 0 ; /* end of file */
2763 }
16d20bd9
AD
2764 SvCUR_set(buf_sv, old_len + len) ;
2765 } else {
2766 /* Want a line */
3280af22
NIS
2767 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2768 if (PerlIO_error(PL_rsfp))
37120919
AD
2769 return -1; /* error */
2770 else
2771 return 0 ; /* end of file */
2772 }
16d20bd9
AD
2773 }
2774 return SvCUR(buf_sv);
2775 }
2776 /* Skip this filter slot if filter has been deleted */
1de9afcd 2777 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2778 DEBUG_P(PerlIO_printf(Perl_debug_log,
2779 "filter_read %d: skipped (filter deleted)\n",
2780 idx));
f482118e 2781 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2782 }
2783 /* Get function pointer hidden within datasv */
8141890a 2784 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2785 DEBUG_P(PerlIO_printf(Perl_debug_log,
2786 "filter_read %d: via function %p (%s)\n",
ca0270c4 2787 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2788 /* Call function. The function is expected to */
2789 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2790 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2791 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2792}
2793
76e3520e 2794STATIC char *
cea2e8a9 2795S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2796{
97aff369 2797 dVAR;
c39cd008 2798#ifdef PERL_CR_FILTER
3280af22 2799 if (!PL_rsfp_filters) {
c39cd008 2800 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2801 }
2802#endif
3280af22 2803 if (PL_rsfp_filters) {
55497cff
PP
2804 if (!append)
2805 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2806 if (FILTER_READ(0, sv, 0) > 0)
2807 return ( SvPVX(sv) ) ;
2808 else
bd61b366 2809 return NULL ;
16d20bd9 2810 }
9d116dd7 2811 else
fd049845 2812 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2813}
2814
01ec43d0 2815STATIC HV *
7fc63493 2816S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2817{
97aff369 2818 dVAR;
def3634b
GS
2819 GV *gv;
2820
01ec43d0 2821 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2822 return PL_curstash;
2823
2824 if (len > 2 &&
2825 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2826 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2827 {
2828 return GvHV(gv); /* Foo:: */
def3634b
GS
2829 }
2830
2831 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2832 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2833 if (gv && GvCV(gv)) {
2834 SV * const sv = cv_const_sv(GvCV(gv));
2835 if (sv)
83003860 2836 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2837 }
2838
2839 return gv_stashpv(pkgname, FALSE);
2840}
a0d0e21e 2841
5db06880
NC
2842#ifdef PERL_MAD
2843 /*
2844 * Perl_madlex
2845 * The intent of this yylex wrapper is to minimize the changes to the
2846 * tokener when we aren't interested in collecting madprops. It remains
2847 * to be seen how successful this strategy will be...
2848 */
2849
2850int
2851Perl_madlex(pTHX)
2852{
2853 int optype;
2854 char *s = PL_bufptr;
2855
cd81e915
NC
2856 /* make sure PL_thiswhite is initialized */
2857 PL_thiswhite = 0;
2858 PL_thismad = 0;
5db06880 2859
cd81e915 2860 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2861 if (PL_pending_ident)
2862 return S_pending_ident(aTHX);
2863
2864 /* previous token ate up our whitespace? */
cd81e915
NC
2865 if (!PL_lasttoke && PL_nextwhite) {
2866 PL_thiswhite = PL_nextwhite;
2867 PL_nextwhite = 0;
5db06880
NC
2868 }
2869
2870 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2871 PL_realtokenstart = -1;
2872 PL_thistoken = 0;
5db06880
NC
2873 optype = yylex();
2874 s = PL_bufptr;
cd81e915 2875 assert(PL_curforce < 0);
5db06880 2876
cd81e915
NC
2877 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2878 if (!PL_thistoken) {
2879 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2880 PL_thistoken = newSVpvn("",0);
5db06880 2881 else {
c35e046a 2882 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2883 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2884 }
2885 }
cd81e915
NC
2886 if (PL_thismad) /* install head */
2887 CURMAD('X', PL_thistoken);
5db06880
NC
2888 }
2889
2890 /* last whitespace of a sublex? */
cd81e915
NC
2891 if (optype == ')' && PL_endwhite) {
2892 CURMAD('X', PL_endwhite);
5db06880
NC
2893 }
2894
cd81e915 2895 if (!PL_thismad) {
5db06880
NC
2896
2897 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2898 if (!PL_thiswhite && !PL_endwhite && !optype) {
2899 sv_free(PL_thistoken);
2900 PL_thistoken = 0;
5db06880
NC
2901 return 0;
2902 }
2903
2904 /* put off final whitespace till peg */
2905 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2906 PL_nextwhite = PL_thiswhite;
2907 PL_thiswhite = 0;
5db06880 2908 }
cd81e915
NC
2909 else if (PL_thisopen) {
2910 CURMAD('q', PL_thisopen);
2911 if (PL_thistoken)
2912 sv_free(PL_thistoken);
2913 PL_thistoken = 0;
5db06880
NC
2914 }
2915 else {
2916 /* Store actual token text as madprop X */
cd81e915 2917 CURMAD('X', PL_thistoken);
5db06880
NC
2918 }
2919
cd81e915 2920 if (PL_thiswhite) {
5db06880 2921 /* add preceding whitespace as madprop _ */
cd81e915 2922 CURMAD('_', PL_thiswhite);
5db06880
NC
2923 }
2924
cd81e915 2925 if (PL_thisstuff) {
5db06880 2926 /* add quoted material as madprop = */
cd81e915 2927 CURMAD('=', PL_thisstuff);
5db06880
NC
2928 }
2929
cd81e915 2930 if (PL_thisclose) {
5db06880 2931 /* add terminating quote as madprop Q */
cd81e915 2932 CURMAD('Q', PL_thisclose);
5db06880
NC
2933 }
2934 }
2935
2936 /* special processing based on optype */
2937
2938 switch (optype) {
2939
2940 /* opval doesn't need a TOKEN since it can already store mp */
2941 case WORD:
2942 case METHOD:
2943 case FUNCMETH:
2944 case THING:
2945 case PMFUNC:
2946 case PRIVATEREF:
2947 case FUNC0SUB:
2948 case UNIOPSUB:
2949 case LSTOPSUB:
2950 if (yylval.opval)
cd81e915
NC
2951 append_madprops(PL_thismad, yylval.opval, 0);
2952 PL_thismad = 0;
5db06880
NC
2953 return optype;
2954
2955 /* fake EOF */
2956 case 0:
2957 optype = PEG;
cd81e915
NC
2958 if (PL_endwhite) {
2959 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2960 PL_endwhite = 0;
5db06880
NC
2961 }
2962 break;
2963
2964 case ']':
2965 case '}':
cd81e915 2966 if (PL_faketokens)
5db06880
NC
2967 break;
2968 /* remember any fake bracket that lexer is about to discard */
2969 if (PL_lex_brackets == 1 &&
2970 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2971 {
2972 s = PL_bufptr;
2973 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2974 s++;
2975 if (*s == '}') {
cd81e915
NC
2976 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2977 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2978 PL_thiswhite = 0;
5db06880
NC
2979 PL_bufptr = s - 1;
2980 break; /* don't bother looking for trailing comment */
2981 }
2982 else
2983 s = PL_bufptr;
2984 }
2985 if (optype == ']')
2986 break;
2987 /* FALLTHROUGH */
2988
2989 /* attach a trailing comment to its statement instead of next token */
2990 case ';':
cd81e915 2991 if (PL_faketokens)
5db06880
NC
2992 break;
2993 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2994 s = PL_bufptr;
2995 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2996 s++;
2997 if (*s == '\n' || *s == '#') {
2998 while (s < PL_bufend && *s != '\n')
2999 s++;
3000 if (s < PL_bufend)
3001 s++;
cd81e915
NC
3002 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3003 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3004 PL_thiswhite = 0;
5db06880
NC
3005 PL_bufptr = s;
3006 }
3007 }
3008 break;
3009
3010 /* pval */
3011 case LABEL:
3012 break;
3013
3014 /* ival */
3015 default:
3016 break;
3017
3018 }
3019
3020 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3021 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3022 PL_thismad = 0;
5db06880
NC
3023 return optype;
3024}
3025#endif
3026
468aa647 3027STATIC char *
cc6ed77d 3028S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3029 dVAR;
468aa647
RGS
3030 if (PL_expect != XSTATE)
3031 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3032 is_use ? "use" : "no"));
29595ff2 3033 s = SKIPSPACE1(s);
468aa647
RGS
3034 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3035 s = force_version(s, TRUE);
29595ff2 3036 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3037 start_force(PL_curforce);
9ded7720 3038 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3039 force_next(WORD);
3040 }
3041 else if (*s == 'v') {
3042 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3043 s = force_version(s, FALSE);
3044 }
3045 }
3046 else {
3047 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3048 s = force_version(s, FALSE);
3049 }
3050 yylval.ival = is_use;
3051 return s;
3052}
748a9306 3053#ifdef DEBUGGING
27da23d5 3054 static const char* const exp_name[] =
09bef843 3055 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3056 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3057 };
748a9306 3058#endif
463ee0b2 3059
02aa26ce
NT
3060/*
3061 yylex
3062
3063 Works out what to call the token just pulled out of the input
3064 stream. The yacc parser takes care of taking the ops we return and
3065 stitching them into a tree.
3066
3067 Returns:
3068 PRIVATEREF
3069
3070 Structure:
3071 if read an identifier
3072 if we're in a my declaration
3073 croak if they tried to say my($foo::bar)
3074 build the ops for a my() declaration
3075 if it's an access to a my() variable
3076 are we in a sort block?
3077 croak if my($a); $a <=> $b
3078 build ops for access to a my() variable
3079 if in a dq string, and they've said @foo and we can't find @foo
3080 croak
3081 build ops for a bareword
3082 if we already built the token before, use it.
3083*/
3084
20141f0e 3085
dba4d153
JH
3086#ifdef __SC__
3087#pragma segment Perl_yylex
3088#endif
dba4d153 3089int
dba4d153 3090Perl_yylex(pTHX)
20141f0e 3091{
97aff369 3092 dVAR;
3afc138a 3093 register char *s = PL_bufptr;
378cc40b 3094 register char *d;
463ee0b2 3095 STRLEN len;
aa7440fb 3096 bool bof = FALSE;
a687059c 3097
bbf60fe6 3098 DEBUG_T( {
396482e1 3099 SV* tmp = newSVpvs("");
b6007c36
DM
3100 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3101 (IV)CopLINE(PL_curcop),
3102 lex_state_names[PL_lex_state],
3103 exp_name[PL_expect],
3104 pv_display(tmp, s, strlen(s), 0, 60));
3105 SvREFCNT_dec(tmp);
bbf60fe6 3106 } );
02aa26ce 3107 /* check if there's an identifier for us to look at */
ba979b31 3108 if (PL_pending_ident)
bbf60fe6 3109 return REPORT(S_pending_ident(aTHX));
bbce6d69 3110
02aa26ce
NT
3111 /* no identifier pending identification */
3112
3280af22 3113 switch (PL_lex_state) {
79072805
LW
3114#ifdef COMMENTARY
3115 case LEX_NORMAL: /* Some compilers will produce faster */
3116 case LEX_INTERPNORMAL: /* code if we comment these out. */
3117 break;
3118#endif
3119
09bef843 3120 /* when we've already built the next token, just pull it out of the queue */
79072805 3121 case LEX_KNOWNEXT:
5db06880
NC
3122#ifdef PERL_MAD
3123 PL_lasttoke--;
3124 yylval = PL_nexttoke[PL_lasttoke].next_val;
3125 if (PL_madskills) {
cd81e915 3126 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3127 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3128 if (PL_thismad && PL_thismad->mad_key == '_') {
3129 PL_thiswhite = (SV*)PL_thismad->mad_val;
3130 PL_thismad->mad_val = 0;
3131 mad_free(PL_thismad);
3132 PL_thismad = 0;
5db06880
NC
3133 }
3134 }
3135 if (!PL_lasttoke) {
3136 PL_lex_state = PL_lex_defer;
3137 PL_expect = PL_lex_expect;
3138 PL_lex_defer = LEX_NORMAL;
3139 if (!PL_nexttoke[PL_lasttoke].next_type)
3140 return yylex();
3141 }
3142#else
3280af22 3143 PL_nexttoke--;
5db06880 3144 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3145 if (!PL_nexttoke) {
3146 PL_lex_state = PL_lex_defer;
3147 PL_expect = PL_lex_expect;
3148 PL_lex_defer = LEX_NORMAL;
463ee0b2 3149 }
5db06880
NC
3150#endif
3151#ifdef PERL_MAD
3152 /* FIXME - can these be merged? */
3153 return(PL_nexttoke[PL_lasttoke].next_type);
3154#else
bbf60fe6 3155 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3156#endif
79072805 3157
02aa26ce 3158 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3159 when we get here, PL_bufptr is at the \
02aa26ce 3160 */
79072805
LW
3161 case LEX_INTERPCASEMOD:
3162#ifdef DEBUGGING
3280af22 3163 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3164 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3165#endif
02aa26ce 3166 /* handle \E or end of string */
3280af22 3167 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3168 /* if at a \E */
3280af22 3169 if (PL_lex_casemods) {
f54cb97a 3170 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3171 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3172
3792a11b
NC
3173 if (PL_bufptr != PL_bufend
3174 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3175 PL_bufptr += 2;
3176 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3177#ifdef PERL_MAD
3178 if (PL_madskills)
cd81e915 3179 PL_thistoken = newSVpvn("\\E",2);
5db06880 3180#endif
a0d0e21e 3181 }
bbf60fe6 3182 return REPORT(')');
79072805 3183 }
5db06880
NC
3184#ifdef PERL_MAD
3185 while (PL_bufptr != PL_bufend &&
3186 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3187 if (!PL_thiswhite)
3188 PL_thiswhite = newSVpvn("",0);
3189 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3190 PL_bufptr += 2;
3191 }
3192#else
3280af22
NIS
3193 if (PL_bufptr != PL_bufend)
3194 PL_bufptr += 2;
5db06880 3195#endif
3280af22 3196 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3197 return yylex();
79072805
LW
3198 }
3199 else {
607df283 3200 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3201 "### Saw case modifier\n"); });
3280af22 3202 s = PL_bufptr + 1;
6e909404 3203 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3204#ifdef PERL_MAD
cd81e915
NC
3205 if (!PL_thiswhite)
3206 PL_thiswhite = newSVpvn("",0);
3207 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3208#endif
89122651 3209 PL_bufptr = s + 3;
6e909404
JH
3210 PL_lex_state = LEX_INTERPCONCAT;
3211 return yylex();
a0d0e21e 3212 }
6e909404 3213 else {
90771dc0 3214 I32 tmp;
5db06880
NC
3215 if (!PL_madskills) /* when just compiling don't need correct */
3216 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3217 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3218 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3219 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3220 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3221 return REPORT(')');
6e909404
JH
3222 }
3223 if (PL_lex_casemods > 10)
3224 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3225 PL_lex_casestack[PL_lex_casemods++] = *s;
3226 PL_lex_casestack[PL_lex_casemods] = '\0';
3227 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3228 start_force(PL_curforce);
9ded7720 3229 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3230 force_next('(');
cd81e915 3231 start_force(PL_curforce);
6e909404 3232 if (*s == 'l')
9ded7720 3233 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3234 else if (*s == 'u')
9ded7720 3235 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3236 else if (*s == 'L')
9ded7720 3237 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3238 else if (*s == 'U')
9ded7720 3239 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3240 else if (*s == 'Q')
9ded7720 3241 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3242 else
3243 Perl_croak(aTHX_ "panic: yylex");
5db06880 3244 if (PL_madskills) {
d4c19fe8 3245 SV* const tmpsv = newSVpvn("",0);
5db06880
NC
3246 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3247 curmad('_', tmpsv);
3248 }
6e909404 3249 PL_bufptr = s + 1;
a0d0e21e 3250 }
79072805 3251 force_next(FUNC);
3280af22
NIS
3252 if (PL_lex_starts) {
3253 s = PL_bufptr;
3254 PL_lex_starts = 0;
5db06880
NC
3255#ifdef PERL_MAD
3256 if (PL_madskills) {
cd81e915
NC
3257 if (PL_thistoken)
3258 sv_free(PL_thistoken);
3259 PL_thistoken = newSVpvn("",0);
5db06880
NC
3260 }
3261#endif
131b3ad0
DM
3262 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3263 if (PL_lex_casemods == 1 && PL_lex_inpat)
3264 OPERATOR(',');
3265 else
3266 Aop(OP_CONCAT);
79072805
LW
3267 }
3268 else
cea2e8a9 3269 return yylex();
79072805
LW
3270 }
3271
55497cff 3272 case LEX_INTERPPUSH:
bbf60fe6 3273 return REPORT(sublex_push());
55497cff 3274
79072805 3275 case LEX_INTERPSTART:
3280af22 3276 if (PL_bufptr == PL_bufend)
bbf60fe6 3277 return REPORT(sublex_done());
607df283 3278 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3279 "### Interpolated variable\n"); });
3280af22
NIS
3280 PL_expect = XTERM;
3281 PL_lex_dojoin = (*PL_bufptr == '@');
3282 PL_lex_state = LEX_INTERPNORMAL;
3283 if (PL_lex_dojoin) {
cd81e915 3284 start_force(PL_curforce);
9ded7720 3285 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3286 force_next(',');
cd81e915 3287 start_force(PL_curforce);
a0d0e21e 3288 force_ident("\"", '$');
cd81e915 3289 start_force(PL_curforce);
9ded7720 3290 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3291 force_next('$');
cd81e915 3292 start_force(PL_curforce);
9ded7720 3293 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3294 force_next('(');
cd81e915 3295 start_force(PL_curforce);
9ded7720 3296 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3297 force_next(FUNC);
3298 }
3280af22
NIS
3299 if (PL_lex_starts++) {
3300 s = PL_bufptr;
5db06880
NC
3301#ifdef PERL_MAD
3302 if (PL_madskills) {
cd81e915
NC
3303 if (PL_thistoken)
3304 sv_free(PL_thistoken);
3305 PL_thistoken = newSVpvn("",0);
5db06880
NC
3306 }
3307#endif
131b3ad0
DM
3308 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3309 if (!PL_lex_casemods && PL_lex_inpat)
3310 OPERATOR(',');
3311 else
3312 Aop(OP_CONCAT);
79072805 3313 }
cea2e8a9 3314 return yylex();
79072805
LW
3315
3316 case LEX_INTERPENDMAYBE:
3280af22
NIS
3317 if (intuit_more(PL_bufptr)) {
3318 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3319 break;
3320 }
3321 /* FALL THROUGH */
3322
3323 case LEX_INTERPEND:
3280af22
NIS
3324 if (PL_lex_dojoin) {
3325 PL_lex_dojoin = FALSE;
3326 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3327#ifdef PERL_MAD
3328 if (PL_madskills) {
cd81e915
NC
3329 if (PL_thistoken)
3330 sv_free(PL_thistoken);
3331 PL_thistoken = newSVpvn("",0);
5db06880
NC
3332 }
3333#endif
bbf60fe6 3334 return REPORT(')');
79072805 3335 }
43a16006 3336 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3337 && SvEVALED(PL_lex_repl))
43a16006 3338 {
e9fa98b2 3339 if (PL_bufptr != PL_bufend)
cea2e8a9 3340 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3341 PL_lex_repl = NULL;
e9fa98b2 3342 }
79072805
LW
3343 /* FALLTHROUGH */
3344 case LEX_INTERPCONCAT:
3345#ifdef DEBUGGING
3280af22 3346 if (PL_lex_brackets)
cea2e8a9 3347 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3348#endif
3280af22 3349 if (PL_bufptr == PL_bufend)
bbf60fe6 3350 return REPORT(sublex_done());
79072805 3351
3280af22
NIS
3352 if (SvIVX(PL_linestr) == '\'') {
3353 SV *sv = newSVsv(PL_linestr);
3354 if (!PL_lex_inpat)
76e3520e 3355 sv = tokeq(sv);
3280af22 3356 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3357 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3358 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3359 s = PL_bufend;
79072805
LW
3360 }
3361 else {
3280af22 3362 s = scan_const(PL_bufptr);
79072805 3363 if (*s == '\\')
3280af22 3364 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3365 else
3280af22 3366 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3367 }
3368
3280af22 3369 if (s != PL_bufptr) {
cd81e915 3370 start_force(PL_curforce);
5db06880
NC
3371 if (PL_madskills) {
3372 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3373 }
9ded7720 3374 NEXTVAL_NEXTTOKE = yylval;
3280af22 3375 PL_expect = XTERM;
79072805 3376 force_next(THING);
131b3ad0 3377 if (PL_lex_starts++) {
5db06880
NC
3378#ifdef PERL_MAD
3379 if (PL_madskills) {
cd81e915
NC
3380 if (PL_thistoken)
3381 sv_free(PL_thistoken);
3382 PL_thistoken = newSVpvn("",0);
5db06880
NC
3383 }
3384#endif
131b3ad0
DM
3385 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3386 if (!PL_lex_casemods && PL_lex_inpat)
3387 OPERATOR(',');
3388 else
3389 Aop(OP_CONCAT);
3390 }
79072805 3391 else {
3280af22 3392 PL_bufptr = s;
cea2e8a9 3393 return yylex();
79072805
LW
3394 }
3395 }
3396
cea2e8a9 3397 return yylex();
a0d0e21e 3398 case LEX_FORMLINE:
3280af22
NIS
3399 PL_lex_state = LEX_NORMAL;
3400 s = scan_formline(PL_bufptr);
3401 if (!PL_lex_formbrack)
a0d0e21e
LW
3402 goto rightbracket;
3403 OPERATOR(';');
79072805
LW
3404 }
3405
3280af22
NIS
3406 s = PL_bufptr;
3407 PL_oldoldbufptr = PL_oldbufptr;
3408 PL_oldbufptr = s;
463ee0b2
LW
3409
3410 retry:
5db06880 3411#ifdef PERL_MAD
cd81e915
NC
3412 if (PL_thistoken) {
3413 sv_free(PL_thistoken);
3414 PL_thistoken = 0;
5db06880 3415 }
cd81e915 3416 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3417#endif
378cc40b
LW
3418 switch (*s) {
3419 default:
7e2040f0 3420 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3421 goto keylookup;
cea2e8a9 3422 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3423 case 4:
3424 case 26:
3425 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3426 case 0:
5db06880
NC
3427#ifdef PERL_MAD
3428 if (PL_madskills)
cd81e915 3429 PL_faketokens = 0;
5db06880 3430#endif
3280af22
NIS
3431 if (!PL_rsfp) {
3432 PL_last_uni = 0;
3433 PL_last_lop = 0;
c5ee2135 3434 if (PL_lex_brackets) {
0bd48802
AL
3435 yyerror(PL_lex_formbrack
3436 ? "Format not terminated"
3437 : "Missing right curly or square bracket");
c5ee2135 3438 }
4e553d73 3439 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3440 "### Tokener got EOF\n");
5f80b19c 3441 } );
79072805 3442 TOKEN(0);
463ee0b2 3443 }
3280af22 3444 if (s++ < PL_bufend)
a687059c 3445 goto retry; /* ignore stray nulls */
3280af22
NIS
3446 PL_last_uni = 0;
3447 PL_last_lop = 0;
3448 if (!PL_in_eval && !PL_preambled) {
3449 PL_preambled = TRUE;
5db06880
NC
3450#ifdef PERL_MAD
3451 if (PL_madskills)
cd81e915 3452 PL_faketokens = 1;
5db06880 3453#endif
3280af22
NIS
3454 sv_setpv(PL_linestr,incl_perldb());
3455 if (SvCUR(PL_linestr))
396482e1 3456 sv_catpvs(PL_linestr,";");
3280af22
NIS
3457 if (PL_preambleav){
3458 while(AvFILLp(PL_preambleav) >= 0) {
3459 SV *tmpsv = av_shift(PL_preambleav);
3460 sv_catsv(PL_linestr, tmpsv);
396482e1 3461 sv_catpvs(PL_linestr, ";");
91b7def8
PP
3462 sv_free(tmpsv);
3463 }
3280af22
NIS
3464 sv_free((SV*)PL_preambleav);
3465 PL_preambleav = NULL;
91b7def8 3466 }
3280af22 3467 if (PL_minus_n || PL_minus_p) {
396482e1 3468 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3469 if (PL_minus_l)
396482e1 3470 sv_catpvs(PL_linestr,"chomp;");
3280af22 3471 if (PL_minus_a) {
3280af22 3472 if (PL_minus_F) {
3792a11b
NC
3473 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3474 || *PL_splitstr == '"')
3280af22 3475 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3476 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3477 else {
c8ef6a4b
NC
3478 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3479 bytes can be used as quoting characters. :-) */
dd374669 3480 const char *splits = PL_splitstr;
91d456ae 3481 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3482 do {
3483 /* Need to \ \s */
dd374669
AL
3484 if (*splits == '\\')
3485 sv_catpvn(PL_linestr, splits, 1);
3486 sv_catpvn(PL_linestr, splits, 1);
3487 } while (*splits++);
48c4c863
NC
3488 /* This loop will embed the trailing NUL of
3489 PL_linestr as the last thing it does before
3490 terminating. */
396482e1 3491 sv_catpvs(PL_linestr, ");");
54310121 3492 }
2304df62
AD
3493 }
3494 else
396482e1 3495 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3496 }
79072805 3497 }
bc9b29db 3498 if (PL_minus_E)
396482e1
GA
3499 sv_catpvs(PL_linestr,"use feature ':5.10';");
3500 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3501 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3502 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3503 PL_last_lop = PL_last_uni = NULL;
3280af22 3504 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3505 SV * const sv = newSV(0);
a0d0e21e
LW
3506
3507 sv_upgrade(sv, SVt_PVMG);
3280af22 3508 sv_setsv(sv,PL_linestr);
0ac0412a 3509 (void)SvIOK_on(sv);
45977657 3510 SvIV_set(sv, 0);
36c7798d 3511 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3512 }
79072805 3513 goto retry;
a687059c 3514 }
e929a76b 3515 do {
aa7440fb 3516 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3517 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3518 fake_eof:
5db06880 3519#ifdef PERL_MAD
cd81e915 3520 PL_realtokenstart = -1;
5db06880 3521#endif
7e28d3af
JH
3522 if (PL_rsfp) {
3523 if (PL_preprocess && !PL_in_eval)
3524 (void)PerlProc_pclose(PL_rsfp);
3525 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3526 PerlIO_clearerr(PL_rsfp);
3527 else
3528 (void)PerlIO_close(PL_rsfp);
4608196e 3529 PL_rsfp = NULL;
7e28d3af
JH
3530 PL_doextract = FALSE;
3531 }
3532 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3533#ifdef PERL_MAD
3534 if (PL_madskills)
cd81e915 3535 PL_faketokens = 1;
5db06880 3536#endif
a23c4656
NC
3537 sv_setpv(PL_linestr,PL_minus_p
3538 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3539 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3540 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3541 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH