This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: \N{...} in regular expression [PATCH]
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a
PP
71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 301 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
c35e046a
AL
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 NOOP;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 501 char he_name[32] = "feature_";
6fca0082 502 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
664 PL_linestr = line;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 667 s = SvPV_const(PL_linestr, len);
6f27f9a7 668 if (!len || s[len-1] != ';') {
3280af22
NIS
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 671 sv_catpvs(PL_linestr, "\n;");
8990e307 672 }
3280af22
NIS
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 676 PL_last_lop = PL_last_uni = NULL;
3280af22 677 PL_rsfp = 0;
79072805 678}
a687059c 679
ffb4593c
NT
680/*
681 * Perl_lex_end
9cbb5ea2
GS
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
ffb4593c
NT
684 */
685
463ee0b2 686void
864dbfa3 687Perl_lex_end(pTHX)
463ee0b2 688{
97aff369 689 dVAR;
3280af22 690 PL_doextract = FALSE;
463ee0b2
LW
691}
692
ffb4593c
NT
693/*
694 * S_incline
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
57843af0 697 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 698 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_incline(pTHX_ char *s)
463ee0b2 705{
97aff369 706 dVAR;
463ee0b2
LW
707 char *t;
708 char *n;
73659bf1 709 char *e;
463ee0b2 710 char ch;
463ee0b2 711
57843af0 712 CopLINE_inc(PL_curcop);
463ee0b2
LW
713 if (*s++ != '#')
714 return;
d4c19fe8
AL
715 while (SPACE_OR_TAB(*s))
716 s++;
73659bf1
GS
717 if (strnEQ(s, "line", 4))
718 s += 4;
719 else
720 return;
084592ab 721 if (SPACE_OR_TAB(*s))
73659bf1 722 s++;
4e553d73 723 else
73659bf1 724 return;
d4c19fe8
AL
725 while (SPACE_OR_TAB(*s))
726 s++;
463ee0b2
LW
727 if (!isDIGIT(*s))
728 return;
d4c19fe8 729
463ee0b2
LW
730 n = s;
731 while (isDIGIT(*s))
732 s++;
bf4acbe4 733 while (SPACE_OR_TAB(*s))
463ee0b2 734 s++;
73659bf1 735 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 736 s++;
73659bf1
GS
737 e = t + 1;
738 }
463ee0b2 739 else {
c35e046a
AL
740 t = s;
741 while (!isSPACE(*t))
742 t++;
73659bf1 743 e = t;
463ee0b2 744 }
bf4acbe4 745 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
746 e++;
747 if (*e != '\n' && *e != '\0')
748 return; /* false alarm */
749
463ee0b2
LW
750 ch = *t;
751 *t = '\0';
f4dd75d9 752 if (t - s > 0) {
8a5ee598 753#ifndef USE_ITHREADS
c4420975 754 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
755 STRLEN tmplen = cf ? strlen(cf) : 0;
756 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
757 /* must copy *{"::_<(eval N)[oldfilename:L]"}
758 * to *{"::_<newfilename"} */
759 char smallbuf[256], smallbuf2[256];
760 char *tmpbuf, *tmpbuf2;
8a5ee598 761 GV **gvp, *gv2;
e66cf94c
RGS
762 STRLEN tmplen2 = strlen(s);
763 if (tmplen + 3 < sizeof smallbuf)
764 tmpbuf = smallbuf;
765 else
766 Newx(tmpbuf, tmplen + 3, char);
767 if (tmplen2 + 3 < sizeof smallbuf2)
768 tmpbuf2 = smallbuf2;
769 else
770 Newx(tmpbuf2, tmplen2 + 3, char);
771 tmpbuf[0] = tmpbuf2[0] = '_';
772 tmpbuf[1] = tmpbuf2[1] = '<';
773 memcpy(tmpbuf + 2, cf, ++tmplen);
774 memcpy(tmpbuf2 + 2, s, ++tmplen2);
775 ++tmplen; ++tmplen2;
8a5ee598
RGS
776 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
777 if (gvp) {
778 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
779 if (!isGV(gv2))
780 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
781 /* adjust ${"::_<newfilename"} to store the new file name */
782 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
783 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
784 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
785 }
e66cf94c
RGS
786 if (tmpbuf != smallbuf) Safefree(tmpbuf);
787 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
788 }
8a5ee598 789#endif
05ec9bb3 790 CopFILE_free(PL_curcop);
57843af0 791 CopFILE_set(PL_curcop, s);
f4dd75d9 792 }
463ee0b2 793 *t = ch;
57843af0 794 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
795}
796
29595ff2 797#ifdef PERL_MAD
cd81e915 798/* skip space before PL_thistoken */
29595ff2
NC
799
800STATIC char *
801S_skipspace0(pTHX_ register char *s)
802{
803 s = skipspace(s);
804 if (!PL_madskills)
805 return s;
cd81e915
NC
806 if (PL_skipwhite) {
807 if (!PL_thiswhite)
808 PL_thiswhite = newSVpvn("",0);
809 sv_catsv(PL_thiswhite, PL_skipwhite);
810 sv_free(PL_skipwhite);
811 PL_skipwhite = 0;
812 }
813 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
814 return s;
815}
816
cd81e915 817/* skip space after PL_thistoken */
29595ff2
NC
818
819STATIC char *
820S_skipspace1(pTHX_ register char *s)
821{
d4c19fe8 822 const char *start = s;
29595ff2
NC
823 I32 startoff = start - SvPVX(PL_linestr);
824
825 s = skipspace(s);
826 if (!PL_madskills)
827 return s;
828 start = SvPVX(PL_linestr) + startoff;
cd81e915 829 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 830 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
831 PL_thistoken = newSVpvn(tstart, start - tstart);
832 }
833 PL_realtokenstart = -1;
834 if (PL_skipwhite) {
835 if (!PL_nextwhite)
836 PL_nextwhite = newSVpvn("",0);
837 sv_catsv(PL_nextwhite, PL_skipwhite);
838 sv_free(PL_skipwhite);
839 PL_skipwhite = 0;
29595ff2
NC
840 }
841 return s;
842}
843
844STATIC char *
845S_skipspace2(pTHX_ register char *s, SV **svp)
846{
c35e046a
AL
847 char *start;
848 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
849 const I32 startoff = s - SvPVX(PL_linestr);
850
29595ff2
NC
851 s = skipspace(s);
852 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
853 if (!PL_madskills || !svp)
854 return s;
855 start = SvPVX(PL_linestr) + startoff;
cd81e915 856 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 857 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
858 PL_thistoken = newSVpvn(tstart, start - tstart);
859 PL_realtokenstart = -1;
29595ff2 860 }
cd81e915 861 if (PL_skipwhite) {
29595ff2
NC
862 if (!*svp)
863 *svp = newSVpvn("",0);
cd81e915
NC
864 sv_setsv(*svp, PL_skipwhite);
865 sv_free(PL_skipwhite);
866 PL_skipwhite = 0;
29595ff2
NC
867 }
868
869 return s;
870}
871#endif
872
ffb4593c
NT
873/*
874 * S_skipspace
875 * Called to gobble the appropriate amount and type of whitespace.
876 * Skips comments as well.
877 */
878
76e3520e 879STATIC char *
cea2e8a9 880S_skipspace(pTHX_ register char *s)
a687059c 881{
97aff369 882 dVAR;
5db06880
NC
883#ifdef PERL_MAD
884 int curoff;
885 int startoff = s - SvPVX(PL_linestr);
886
cd81e915
NC
887 if (PL_skipwhite) {
888 sv_free(PL_skipwhite);
889 PL_skipwhite = 0;
5db06880
NC
890 }
891#endif
892
3280af22 893 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 894 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 895 s++;
5db06880
NC
896#ifdef PERL_MAD
897 goto done;
898#else
463ee0b2 899 return s;
5db06880 900#endif
463ee0b2
LW
901 }
902 for (;;) {
fd049845 903 STRLEN prevlen;
09bef843 904 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 905 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
906 while (s < PL_bufend && isSPACE(*s)) {
907 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
908 incline(s);
909 }
ffb4593c
NT
910
911 /* comment */
3280af22
NIS
912 if (s < PL_bufend && *s == '#') {
913 while (s < PL_bufend && *s != '\n')
463ee0b2 914 s++;
60e6418e 915 if (s < PL_bufend) {
463ee0b2 916 s++;
60e6418e
GS
917 if (PL_in_eval && !PL_rsfp) {
918 incline(s);
919 continue;
920 }
921 }
463ee0b2 922 }
ffb4593c
NT
923
924 /* only continue to recharge the buffer if we're at the end
925 * of the buffer, we're not reading from a source filter, and
926 * we're in normal lexing mode
927 */
09bef843
SB
928 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
929 PL_lex_state == LEX_FORMLINE)
5db06880
NC
930#ifdef PERL_MAD
931 goto done;
932#else
463ee0b2 933 return s;
5db06880 934#endif
ffb4593c
NT
935
936 /* try to recharge the buffer */
5db06880
NC
937#ifdef PERL_MAD
938 curoff = s - SvPVX(PL_linestr);
939#endif
940
9cbb5ea2 941 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 942 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 943 {
5db06880
NC
944#ifdef PERL_MAD
945 if (PL_madskills && curoff != startoff) {
cd81e915
NC
946 if (!PL_skipwhite)
947 PL_skipwhite = newSVpvn("",0);
948 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
949 curoff - startoff);
950 }
951
952 /* mustn't throw out old stuff yet if madpropping */
953 SvCUR(PL_linestr) = curoff;
954 s = SvPVX(PL_linestr) + curoff;
955 *s = 0;
956 if (curoff && s[-1] == '\n')
957 s[-1] = ' ';
958#endif
959
9cbb5ea2 960 /* end of file. Add on the -p or -n magic */
cd81e915 961 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 962 if (PL_minus_p) {
5db06880
NC
963#ifdef PERL_MAD
964 sv_catpv(PL_linestr,
965 ";}continue{print or die qq(-p destination: $!\\n);}");
966#else
01a19ab0
NC
967 sv_setpv(PL_linestr,
968 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 969#endif
3280af22 970 PL_minus_n = PL_minus_p = 0;
a0d0e21e 971 }
01a19ab0 972 else if (PL_minus_n) {
5db06880
NC
973#ifdef PERL_MAD
974 sv_catpvn(PL_linestr, ";}", 2);
975#else
01a19ab0 976 sv_setpvn(PL_linestr, ";}", 2);
5db06880 977#endif
01a19ab0
NC
978 PL_minus_n = 0;
979 }
a0d0e21e 980 else
5db06880
NC
981#ifdef PERL_MAD
982 sv_catpvn(PL_linestr,";", 1);
983#else
4147a61b 984 sv_setpvn(PL_linestr,";", 1);
5db06880 985#endif
ffb4593c
NT
986
987 /* reset variables for next time we lex */
9cbb5ea2 988 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
989 = SvPVX(PL_linestr)
990#ifdef PERL_MAD
991 + curoff
992#endif
993 ;
3280af22 994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 995 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
996
997 /* Close the filehandle. Could be from -P preprocessor,
998 * STDIN, or a regular file. If we were reading code from
999 * STDIN (because the commandline held no -e or filename)
1000 * then we don't close it, we reset it so the code can
1001 * read from STDIN too.
1002 */
1003
3280af22
NIS
1004 if (PL_preprocess && !PL_in_eval)
1005 (void)PerlProc_pclose(PL_rsfp);
1006 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1007 PerlIO_clearerr(PL_rsfp);
8990e307 1008 else
3280af22 1009 (void)PerlIO_close(PL_rsfp);
4608196e 1010 PL_rsfp = NULL;
463ee0b2
LW
1011 return s;
1012 }
ffb4593c
NT
1013
1014 /* not at end of file, so we only read another line */
09bef843
SB
1015 /* make corresponding updates to old pointers, for yyerror() */
1016 oldprevlen = PL_oldbufptr - PL_bufend;
1017 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1018 if (PL_last_uni)
1019 oldunilen = PL_last_uni - PL_bufend;
1020 if (PL_last_lop)
1021 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1022 PL_linestart = PL_bufptr = s + prevlen;
1023 PL_bufend = s + SvCUR(PL_linestr);
1024 s = PL_bufptr;
09bef843
SB
1025 PL_oldbufptr = s + oldprevlen;
1026 PL_oldoldbufptr = s + oldoldprevlen;
1027 if (PL_last_uni)
1028 PL_last_uni = s + oldunilen;
1029 if (PL_last_lop)
1030 PL_last_lop = s + oldloplen;
a0d0e21e 1031 incline(s);
ffb4593c
NT
1032
1033 /* debugger active and we're not compiling the debugger code,
1034 * so store the line into the debugger's array of lines
1035 */
3280af22 1036 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1037 SV * const sv = newSV(0);
8990e307
LW
1038
1039 sv_upgrade(sv, SVt_PVMG);
3280af22 1040 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1041 (void)SvIOK_on(sv);
45977657 1042 SvIV_set(sv, 0);
36c7798d 1043 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1044 }
463ee0b2 1045 }
5db06880
NC
1046
1047#ifdef PERL_MAD
1048 done:
1049 if (PL_madskills) {
cd81e915
NC
1050 if (!PL_skipwhite)
1051 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1052 curoff = s - SvPVX(PL_linestr);
1053 if (curoff - startoff)
cd81e915 1054 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1055 curoff - startoff);
1056 }
1057 return s;
1058#endif
a687059c 1059}
378cc40b 1060
ffb4593c
NT
1061/*
1062 * S_check_uni
1063 * Check the unary operators to ensure there's no ambiguity in how they're
1064 * used. An ambiguous piece of code would be:
1065 * rand + 5
1066 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1067 * the +5 is its argument.
1068 */
1069
76e3520e 1070STATIC void
cea2e8a9 1071S_check_uni(pTHX)
ba106d47 1072{
97aff369 1073 dVAR;
d4c19fe8
AL
1074 const char *s;
1075 const char *t;
2f3197b3 1076
3280af22 1077 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1078 return;
3280af22
NIS
1079 while (isSPACE(*PL_last_uni))
1080 PL_last_uni++;
c35e046a
AL
1081 s = PL_last_uni;
1082 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1083 s++;
3280af22 1084 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1085 return;
6136c704 1086
0453d815 1087 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1088 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1089 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1090 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1091 }
2f3197b3
LW
1092}
1093
ffb4593c
NT
1094/*
1095 * LOP : macro to build a list operator. Its behaviour has been replaced
1096 * with a subroutine, S_lop() for which LOP is just another name.
1097 */
1098
a0d0e21e
LW
1099#define LOP(f,x) return lop(f,x,s)
1100
ffb4593c
NT
1101/*
1102 * S_lop
1103 * Build a list operator (or something that might be one). The rules:
1104 * - if we have a next token, then it's a list operator [why?]
1105 * - if the next thing is an opening paren, then it's a function
1106 * - else it's a list operator
1107 */
1108
76e3520e 1109STATIC I32
a0be28da 1110S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1111{
97aff369 1112 dVAR;
79072805 1113 yylval.ival = f;
35c8bce7 1114 CLINE;
3280af22
NIS
1115 PL_expect = x;
1116 PL_bufptr = s;
1117 PL_last_lop = PL_oldbufptr;
eb160463 1118 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1119#ifdef PERL_MAD
1120 if (PL_lasttoke)
1121 return REPORT(LSTOP);
1122#else
3280af22 1123 if (PL_nexttoke)
bbf60fe6 1124 return REPORT(LSTOP);
5db06880 1125#endif
79072805 1126 if (*s == '(')
bbf60fe6 1127 return REPORT(FUNC);
29595ff2 1128 s = PEEKSPACE(s);
79072805 1129 if (*s == '(')
bbf60fe6 1130 return REPORT(FUNC);
79072805 1131 else
bbf60fe6 1132 return REPORT(LSTOP);
79072805
LW
1133}
1134
5db06880
NC
1135#ifdef PERL_MAD
1136 /*
1137 * S_start_force
1138 * Sets up for an eventual force_next(). start_force(0) basically does
1139 * an unshift, while start_force(-1) does a push. yylex removes items
1140 * on the "pop" end.
1141 */
1142
1143STATIC void
1144S_start_force(pTHX_ int where)
1145{
1146 int i;
1147
cd81e915 1148 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1149 where = PL_lasttoke;
cd81e915
NC
1150 assert(PL_curforce < 0 || PL_curforce == where);
1151 if (PL_curforce != where) {
5db06880
NC
1152 for (i = PL_lasttoke; i > where; --i) {
1153 PL_nexttoke[i] = PL_nexttoke[i-1];
1154 }
1155 PL_lasttoke++;
1156 }
cd81e915 1157 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1158 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1159 PL_curforce = where;
1160 if (PL_nextwhite) {
5db06880
NC
1161 if (PL_madskills)
1162 curmad('^', newSVpvn("",0));
cd81e915 1163 CURMAD('_', PL_nextwhite);
5db06880
NC
1164 }
1165}
1166
1167STATIC void
1168S_curmad(pTHX_ char slot, SV *sv)
1169{
1170 MADPROP **where;
1171
1172 if (!sv)
1173 return;
cd81e915
NC
1174 if (PL_curforce < 0)
1175 where = &PL_thismad;
5db06880 1176 else
cd81e915 1177 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1178
cd81e915 1179 if (PL_faketokens)
5db06880
NC
1180 sv_setpvn(sv, "", 0);
1181 else {
1182 if (!IN_BYTES) {
1183 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1184 SvUTF8_on(sv);
1185 else if (PL_encoding) {
1186 sv_recode_to_utf8(sv, PL_encoding);
1187 }
1188 }
1189 }
1190
1191 /* keep a slot open for the head of the list? */
1192 if (slot != '_' && *where && (*where)->mad_key == '^') {
1193 (*where)->mad_key = slot;
1194 sv_free((*where)->mad_val);
1195 (*where)->mad_val = (void*)sv;
1196 }
1197 else
1198 addmad(newMADsv(slot, sv), where, 0);
1199}
1200#else
b3f24c00
MHM
1201# define start_force(where) NOOP
1202# define curmad(slot, sv) NOOP
5db06880
NC
1203#endif
1204
ffb4593c
NT
1205/*
1206 * S_force_next
9cbb5ea2 1207 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1208 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1209 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1210 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1211 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1212 */
1213
4e553d73 1214STATIC void
cea2e8a9 1215S_force_next(pTHX_ I32 type)
79072805 1216{
97aff369 1217 dVAR;
5db06880 1218#ifdef PERL_MAD
cd81e915 1219 if (PL_curforce < 0)
5db06880 1220 start_force(PL_lasttoke);
cd81e915 1221 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1222 if (PL_lex_state != LEX_KNOWNEXT)
1223 PL_lex_defer = PL_lex_state;
1224 PL_lex_state = LEX_KNOWNEXT;
1225 PL_lex_expect = PL_expect;
cd81e915 1226 PL_curforce = -1;
5db06880 1227#else
3280af22
NIS
1228 PL_nexttype[PL_nexttoke] = type;
1229 PL_nexttoke++;
1230 if (PL_lex_state != LEX_KNOWNEXT) {
1231 PL_lex_defer = PL_lex_state;
1232 PL_lex_expect = PL_expect;
1233 PL_lex_state = LEX_KNOWNEXT;
79072805 1234 }
5db06880 1235#endif
79072805
LW
1236}
1237
d0a148a6
NC
1238STATIC SV *
1239S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1240{
97aff369 1241 dVAR;
9d4ba2ae 1242 SV * const sv = newSVpvn(start,len);
bfed75c6 1243 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1244 SvUTF8_on(sv);
1245 return sv;
1246}
1247
ffb4593c
NT
1248/*
1249 * S_force_word
1250 * When the lexer knows the next thing is a word (for instance, it has
1251 * just seen -> and it knows that the next char is a word char, then
1252 * it calls S_force_word to stick the next word into the PL_next lookahead.
1253 *
1254 * Arguments:
b1b65b59 1255 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1256 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1257 * int check_keyword : if true, Perl checks to make sure the word isn't
1258 * a keyword (do this if the word is a label, e.g. goto FOO)
1259 * int allow_pack : if true, : characters will also be allowed (require,
1260 * use, etc. do this)
9cbb5ea2 1261 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1262 */
1263
76e3520e 1264STATIC char *
cea2e8a9 1265S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1266{
97aff369 1267 dVAR;
463ee0b2
LW
1268 register char *s;
1269 STRLEN len;
4e553d73 1270
29595ff2 1271 start = SKIPSPACE1(start);
463ee0b2 1272 s = start;
7e2040f0 1273 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1274 (allow_pack && *s == ':') ||
15f0808c 1275 (allow_initial_tick && *s == '\'') )
a0d0e21e 1276 {
3280af22
NIS
1277 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1278 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1279 return start;
cd81e915 1280 start_force(PL_curforce);
5db06880
NC
1281 if (PL_madskills)
1282 curmad('X', newSVpvn(start,s-start));
463ee0b2 1283 if (token == METHOD) {
29595ff2 1284 s = SKIPSPACE1(s);
463ee0b2 1285 if (*s == '(')
3280af22 1286 PL_expect = XTERM;
463ee0b2 1287 else {
3280af22 1288 PL_expect = XOPERATOR;
463ee0b2 1289 }
79072805 1290 }
9ded7720 1291 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1292 = (OP*)newSVOP(OP_CONST,0,
1293 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1294 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1295 force_next(token);
1296 }
1297 return s;
1298}
1299
ffb4593c
NT
1300/*
1301 * S_force_ident
9cbb5ea2 1302 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1303 * text only contains the "foo" portion. The first argument is a pointer
1304 * to the "foo", and the second argument is the type symbol to prefix.
1305 * Forces the next token to be a "WORD".
9cbb5ea2 1306 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1307 */
1308
76e3520e 1309STATIC void
bfed75c6 1310S_force_ident(pTHX_ register const char *s, int kind)
79072805 1311{
97aff369 1312 dVAR;
c35e046a 1313 if (*s) {
90e5519e
NC
1314 const STRLEN len = strlen(s);
1315 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1316 start_force(PL_curforce);
9ded7720 1317 NEXTVAL_NEXTTOKE.opval = o;
79072805 1318 force_next(WORD);
748a9306 1319 if (kind) {
11343788 1320 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1321 /* XXX see note in pp_entereval() for why we forgo typo
1322 warnings if the symbol must be introduced in an eval.
1323 GSAR 96-10-12 */
90e5519e
NC
1324 gv_fetchpvn_flags(s, len,
1325 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1326 : GV_ADD,
1327 kind == '$' ? SVt_PV :
1328 kind == '@' ? SVt_PVAV :
1329 kind == '%' ? SVt_PVHV :
a0d0e21e 1330 SVt_PVGV
90e5519e 1331 );
748a9306 1332 }
79072805
LW
1333 }
1334}
1335
1571675a
GS
1336NV
1337Perl_str_to_version(pTHX_ SV *sv)
1338{
1339 NV retval = 0.0;
1340 NV nshift = 1.0;
1341 STRLEN len;
cfd0369c 1342 const char *start = SvPV_const(sv,len);
9d4ba2ae 1343 const char * const end = start + len;
504618e9 1344 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1345 while (start < end) {
ba210ebe 1346 STRLEN skip;
1571675a
GS
1347 UV n;
1348 if (utf)
9041c2e3 1349 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1350 else {
1351 n = *(U8*)start;
1352 skip = 1;
1353 }
1354 retval += ((NV)n)/nshift;
1355 start += skip;
1356 nshift *= 1000;
1357 }
1358 return retval;
1359}
1360
4e553d73 1361/*
ffb4593c
NT
1362 * S_force_version
1363 * Forces the next token to be a version number.
e759cc13
RGS
1364 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1365 * and if "guessing" is TRUE, then no new token is created (and the caller
1366 * must use an alternative parsing method).
ffb4593c
NT
1367 */
1368
76e3520e 1369STATIC char *
e759cc13 1370S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1371{
97aff369 1372 dVAR;
5f66b61c 1373 OP *version = NULL;
44dcb63b 1374 char *d;
5db06880
NC
1375#ifdef PERL_MAD
1376 I32 startoff = s - SvPVX(PL_linestr);
1377#endif
89bfa8cd 1378
29595ff2 1379 s = SKIPSPACE1(s);
89bfa8cd 1380
44dcb63b 1381 d = s;
dd629d5b 1382 if (*d == 'v')
44dcb63b 1383 d++;
44dcb63b 1384 if (isDIGIT(*d)) {
e759cc13
RGS
1385 while (isDIGIT(*d) || *d == '_' || *d == '.')
1386 d++;
5db06880
NC
1387#ifdef PERL_MAD
1388 if (PL_madskills) {
cd81e915 1389 start_force(PL_curforce);
5db06880
NC
1390 curmad('X', newSVpvn(s,d-s));
1391 }
1392#endif
9f3d182e 1393 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1394 SV *ver;
b73d6f50 1395 s = scan_num(s, &yylval);
89bfa8cd 1396 version = yylval.opval;
dd629d5b
GS
1397 ver = cSVOPx(version)->op_sv;
1398 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1399 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1400 SvNV_set(ver, str_to_version(ver));
1571675a 1401 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1402 }
89bfa8cd 1403 }
5db06880
NC
1404 else if (guessing) {
1405#ifdef PERL_MAD
1406 if (PL_madskills) {
cd81e915
NC
1407 sv_free(PL_nextwhite); /* let next token collect whitespace */
1408 PL_nextwhite = 0;
5db06880
NC
1409 s = SvPVX(PL_linestr) + startoff;
1410 }
1411#endif
e759cc13 1412 return s;
5db06880 1413 }
89bfa8cd
PP
1414 }
1415
5db06880
NC
1416#ifdef PERL_MAD
1417 if (PL_madskills && !version) {
cd81e915
NC
1418 sv_free(PL_nextwhite); /* let next token collect whitespace */
1419 PL_nextwhite = 0;
5db06880
NC
1420 s = SvPVX(PL_linestr) + startoff;
1421 }
1422#endif
89bfa8cd 1423 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1424 start_force(PL_curforce);
9ded7720 1425 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1426 force_next(WORD);
89bfa8cd 1427
e759cc13 1428 return s;
89bfa8cd
PP
1429}
1430
ffb4593c
NT
1431/*
1432 * S_tokeq
1433 * Tokenize a quoted string passed in as an SV. It finds the next
1434 * chunk, up to end of string or a backslash. It may make a new
1435 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1436 * turns \\ into \.
1437 */
1438
76e3520e 1439STATIC SV *
cea2e8a9 1440S_tokeq(pTHX_ SV *sv)
79072805 1441{
97aff369 1442 dVAR;
79072805
LW
1443 register char *s;
1444 register char *send;
1445 register char *d;
b3ac6de7
IZ
1446 STRLEN len = 0;
1447 SV *pv = sv;
79072805
LW
1448
1449 if (!SvLEN(sv))
b3ac6de7 1450 goto finish;
79072805 1451
a0d0e21e 1452 s = SvPV_force(sv, len);
21a311ee 1453 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1454 goto finish;
463ee0b2 1455 send = s + len;
79072805
LW
1456 while (s < send && *s != '\\')
1457 s++;
1458 if (s == send)
b3ac6de7 1459 goto finish;
79072805 1460 d = s;
be4731d2 1461 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1462 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1463 if (SvUTF8(sv))
1464 SvUTF8_on(pv);
1465 }
79072805
LW
1466 while (s < send) {
1467 if (*s == '\\') {
a0d0e21e 1468 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1469 s++; /* all that, just for this */
1470 }
1471 *d++ = *s++;
1472 }
1473 *d = '\0';
95a20fc0 1474 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1475 finish:
3280af22 1476 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1477 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1478 return sv;
1479}
1480
ffb4593c
NT
1481/*
1482 * Now come three functions related to double-quote context,
1483 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1484 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1485 * interact with PL_lex_state, and create fake ( ... ) argument lists
1486 * to handle functions and concatenation.
1487 * They assume that whoever calls them will be setting up a fake
1488 * join call, because each subthing puts a ',' after it. This lets
1489 * "lower \luPpEr"
1490 * become
1491 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1492 *
1493 * (I'm not sure whether the spurious commas at the end of lcfirst's
1494 * arguments and join's arguments are created or not).
1495 */
1496
1497/*
1498 * S_sublex_start
1499 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1500 *
1501 * Pattern matching will set PL_lex_op to the pattern-matching op to
1502 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1503 *
1504 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1505 *
1506 * Everything else becomes a FUNC.
1507 *
1508 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1509 * had an OP_CONST or OP_READLINE). This just sets us up for a
1510 * call to S_sublex_push().
1511 */
1512
76e3520e 1513STATIC I32
cea2e8a9 1514S_sublex_start(pTHX)
79072805 1515{
97aff369 1516 dVAR;
0d46e09a 1517 register const I32 op_type = yylval.ival;
79072805
LW
1518
1519 if (op_type == OP_NULL) {
3280af22 1520 yylval.opval = PL_lex_op;
5f66b61c 1521 PL_lex_op = NULL;
79072805
LW
1522 return THING;
1523 }
1524 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1525 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1526
1527 if (SvTYPE(sv) == SVt_PVIV) {
1528 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1529 STRLEN len;
96a5add6 1530 const char * const p = SvPV_const(sv, len);
f54cb97a 1531 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1532 if (SvUTF8(sv))
1533 SvUTF8_on(nsv);
b3ac6de7
IZ
1534 SvREFCNT_dec(sv);
1535 sv = nsv;
4e553d73 1536 }
b3ac6de7 1537 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1538 PL_lex_stuff = NULL;
6f33ba73
RGS
1539 /* Allow <FH> // "foo" */
1540 if (op_type == OP_READLINE)
1541 PL_expect = XTERMORDORDOR;
79072805
LW
1542 return THING;
1543 }
1544
3280af22
NIS
1545 PL_sublex_info.super_state = PL_lex_state;
1546 PL_sublex_info.sub_inwhat = op_type;
1547 PL_sublex_info.sub_op = PL_lex_op;
1548 PL_lex_state = LEX_INTERPPUSH;
55497cff 1549
3280af22
NIS
1550 PL_expect = XTERM;
1551 if (PL_lex_op) {
1552 yylval.opval = PL_lex_op;
5f66b61c 1553 PL_lex_op = NULL;
55497cff
PP
1554 return PMFUNC;
1555 }
1556 else
1557 return FUNC;
1558}
1559
ffb4593c
NT
1560/*
1561 * S_sublex_push
1562 * Create a new scope to save the lexing state. The scope will be
1563 * ended in S_sublex_done. Returns a '(', starting the function arguments
1564 * to the uc, lc, etc. found before.
1565 * Sets PL_lex_state to LEX_INTERPCONCAT.
1566 */
1567
76e3520e 1568STATIC I32
cea2e8a9 1569S_sublex_push(pTHX)
55497cff 1570{
27da23d5 1571 dVAR;
f46d017c 1572 ENTER;
55497cff 1573
3280af22
NIS
1574 PL_lex_state = PL_sublex_info.super_state;
1575 SAVEI32(PL_lex_dojoin);
1576 SAVEI32(PL_lex_brackets);
3280af22
NIS
1577 SAVEI32(PL_lex_casemods);
1578 SAVEI32(PL_lex_starts);
1579 SAVEI32(PL_lex_state);
7766f137 1580 SAVEVPTR(PL_lex_inpat);
3280af22 1581 SAVEI32(PL_lex_inwhat);
57843af0 1582 SAVECOPLINE(PL_curcop);
3280af22 1583 SAVEPPTR(PL_bufptr);
8452ff4b 1584 SAVEPPTR(PL_bufend);
3280af22
NIS
1585 SAVEPPTR(PL_oldbufptr);
1586 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1587 SAVEPPTR(PL_last_lop);
1588 SAVEPPTR(PL_last_uni);
3280af22
NIS
1589 SAVEPPTR(PL_linestart);
1590 SAVESPTR(PL_linestr);
8edd5f42
RGS
1591 SAVEGENERICPV(PL_lex_brackstack);
1592 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1593
1594 PL_linestr = PL_lex_stuff;
a0714e2c 1595 PL_lex_stuff = NULL;
3280af22 1596
9cbb5ea2
GS
1597 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1598 = SvPVX(PL_linestr);
3280af22 1599 PL_bufend += SvCUR(PL_linestr);
bd61b366 1600 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1601 SAVEFREESV(PL_linestr);
1602
1603 PL_lex_dojoin = FALSE;
1604 PL_lex_brackets = 0;
a02a5408
JC
1605 Newx(PL_lex_brackstack, 120, char);
1606 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1607 PL_lex_casemods = 0;
1608 *PL_lex_casestack = '\0';
1609 PL_lex_starts = 0;
1610 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1611 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1612
1613 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1614 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1615 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1616 else
5f66b61c 1617 PL_lex_inpat = NULL;
79072805 1618
55497cff 1619 return '(';
79072805
LW
1620}
1621
ffb4593c
NT
1622/*
1623 * S_sublex_done
1624 * Restores lexer state after a S_sublex_push.
1625 */
1626
76e3520e 1627STATIC I32
cea2e8a9 1628S_sublex_done(pTHX)
79072805 1629{
27da23d5 1630 dVAR;
3280af22 1631 if (!PL_lex_starts++) {
396482e1 1632 SV * const sv = newSVpvs("");
9aa983d2
JH
1633 if (SvUTF8(PL_linestr))
1634 SvUTF8_on(sv);
3280af22 1635 PL_expect = XOPERATOR;
9aa983d2 1636 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1637 return THING;
1638 }
1639
3280af22
NIS
1640 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1641 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1642 return yylex();
79072805
LW
1643 }
1644
ffb4593c 1645 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1646 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1647 PL_linestr = PL_lex_repl;
1648 PL_lex_inpat = 0;
1649 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1650 PL_bufend += SvCUR(PL_linestr);
bd61b366 1651 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1652 SAVEFREESV(PL_linestr);
1653 PL_lex_dojoin = FALSE;
1654 PL_lex_brackets = 0;
3280af22
NIS
1655 PL_lex_casemods = 0;
1656 *PL_lex_casestack = '\0';
1657 PL_lex_starts = 0;
25da4f38 1658 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1659 PL_lex_state = LEX_INTERPNORMAL;
1660 PL_lex_starts++;
e9fa98b2
HS
1661 /* we don't clear PL_lex_repl here, so that we can check later
1662 whether this is an evalled subst; that means we rely on the
1663 logic to ensure sublex_done() is called again only via the
1664 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1665 }
e9fa98b2 1666 else {
3280af22 1667 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1668 PL_lex_repl = NULL;
e9fa98b2 1669 }
79072805 1670 return ',';
ffed7fef
LW
1671 }
1672 else {
5db06880
NC
1673#ifdef PERL_MAD
1674 if (PL_madskills) {
cd81e915
NC
1675 if (PL_thiswhite) {
1676 if (!PL_endwhite)
1677 PL_endwhite = newSVpvn("",0);
1678 sv_catsv(PL_endwhite, PL_thiswhite);
1679 PL_thiswhite = 0;
1680 }
1681 if (PL_thistoken)
1682 sv_setpvn(PL_thistoken,"",0);
5db06880 1683 else
cd81e915 1684 PL_realtokenstart = -1;
5db06880
NC
1685 }
1686#endif
f46d017c 1687 LEAVE;
3280af22
NIS
1688 PL_bufend = SvPVX(PL_linestr);
1689 PL_bufend += SvCUR(PL_linestr);
1690 PL_expect = XOPERATOR;
09bef843 1691 PL_sublex_info.sub_inwhat = 0;
79072805 1692 return ')';
ffed7fef
LW
1693 }
1694}
1695
02aa26ce
NT
1696/*
1697 scan_const
1698
1699 Extracts a pattern, double-quoted string, or transliteration. This
1700 is terrifying code.
1701
94def140 1702 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1703 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1704 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1705
94def140
ST
1706 Returns a pointer to the character scanned up to. If this is
1707 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1708 successfully parsed), will leave an OP for the substring scanned
1709 in yylval. Caller must intuit reason for not parsing further
1710 by looking at the next characters herself.
1711
02aa26ce
NT
1712 In patterns:
1713 backslashes:
1714 double-quoted style: \r and \n
1715 regexp special ones: \D \s
94def140
ST
1716 constants: \x31
1717 backrefs: \1
02aa26ce
NT
1718 case and quoting: \U \Q \E
1719 stops on @ and $, but not for $ as tail anchor
1720
1721 In transliterations:
1722 characters are VERY literal, except for - not at the start or end
94def140
ST
1723 of the string, which indicates a range. If the range is in bytes,
1724 scan_const expands the range to the full set of intermediate
1725 characters. If the range is in utf8, the hyphen is replaced with
1726 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1727
1728 In double-quoted strings:
1729 backslashes:
1730 double-quoted style: \r and \n
94def140
ST
1731 constants: \x31
1732 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1733 case and quoting: \U \Q \E
1734 stops on @ and $
1735
1736 scan_const does *not* construct ops to handle interpolated strings.
1737 It stops processing as soon as it finds an embedded $ or @ variable
1738 and leaves it to the caller to work out what's going on.
1739
94def140
ST
1740 embedded arrays (whether in pattern or not) could be:
1741 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1742
1743 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1744
1745 $ in pattern could be $foo or could be tail anchor. Assumption:
1746 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1747 followed by one of "()| \r\n\t"
02aa26ce
NT
1748
1749 \1 (backreferences) are turned into $1
1750
1751 The structure of the code is
1752 while (there's a character to process) {
94def140
ST
1753 handle transliteration ranges
1754 skip regexp comments /(?#comment)/ and codes /(?{code})/
1755 skip #-initiated comments in //x patterns
1756 check for embedded arrays
02aa26ce
NT
1757 check for embedded scalars
1758 if (backslash) {
94def140
ST
1759 leave intact backslashes from leaveit (below)
1760 deprecate \1 in substitution replacements
02aa26ce
NT
1761 handle string-changing backslashes \l \U \Q \E, etc.
1762 switch (what was escaped) {
94def140
ST
1763 handle \- in a transliteration (becomes a literal -)
1764 handle \132 (octal characters)
1765 handle \x15 and \x{1234} (hex characters)
1766 handle \N{name} (named characters)
1767 handle \cV (control characters)
1768 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1769 } (end switch)
1770 } (end if backslash)
1771 } (end while character to read)
4e553d73 1772
02aa26ce
NT
1773*/
1774
76e3520e 1775STATIC char *
cea2e8a9 1776S_scan_const(pTHX_ char *start)
79072805 1777{
97aff369 1778 dVAR;
3280af22 1779 register char *send = PL_bufend; /* end of the constant */
561b68a9 1780 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1781 register char *s = start; /* start of the constant */
1782 register char *d = SvPVX(sv); /* destination for copies */
1783 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1784 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1785 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1786 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1787 UV uv;
4c3a8340
ST
1788#ifdef EBCDIC
1789 UV literal_endpoint = 0;
e294cc5d 1790 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1791#endif
012bcf8d 1792
d4c19fe8 1793 const char * const leaveit = /* set of acceptably-backslashed characters */
10edeb5d
JH
1794 (const char *)
1795 (PL_lex_inpat
fc8cd66c 1796 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-Nnrtfeaxcz0123456789[{]} \t\n\r\f\v#"
10edeb5d 1797 : "");
79072805 1798
2b9d42f0
NIS
1799 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1800 /* If we are doing a trans and we know we want UTF8 set expectation */
1801 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1802 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1803 }
1804
1805
79072805 1806 while (s < send || dorange) {
02aa26ce 1807 /* get transliterations out of the way (they're most literal) */
3280af22 1808 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1809 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1810 if (dorange) {
1ba5c669
JH
1811 I32 i; /* current expanded character */
1812 I32 min; /* first character in range */
1813 I32 max; /* last character in range */
02aa26ce 1814
e294cc5d
JH
1815#ifdef EBCDIC
1816 UV uvmax = 0;
1817#endif
1818
1819 if (has_utf8
1820#ifdef EBCDIC
1821 && !native_range
1822#endif
1823 ) {
9d4ba2ae 1824 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1825 char *e = d++;
1826 while (e-- > c)
1827 *(e + 1) = *e;
25716404 1828 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1829 /* mark the range as done, and continue */
1830 dorange = FALSE;
1831 didrange = TRUE;
1832 continue;
1833 }
2b9d42f0 1834
95a20fc0 1835 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1836#ifdef EBCDIC
1837 SvGROW(sv,
1838 SvLEN(sv) + (has_utf8 ?
1839 (512 - UTF_CONTINUATION_MARK +
1840 UNISKIP(0x100))
1841 : 256));
1842 /* How many two-byte within 0..255: 128 in UTF-8,
1843 * 96 in UTF-8-mod. */
1844#else
9cbb5ea2 1845 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1846#endif
9cbb5ea2 1847 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1848#ifdef EBCDIC
1849 if (has_utf8) {
1850 int j;
1851 for (j = 0; j <= 1; j++) {
1852 char * const c = (char*)utf8_hop((U8*)d, -1);
1853 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1854 if (j)
1855 min = (U8)uv;
1856 else if (uv < 256)
1857 max = (U8)uv;
1858 else {
1859 max = (U8)0xff; /* only to \xff */
1860 uvmax = uv; /* \x{100} to uvmax */
1861 }
1862 d = c; /* eat endpoint chars */
1863 }
1864 }
1865 else {
1866#endif
1867 d -= 2; /* eat the first char and the - */
1868 min = (U8)*d; /* first char in range */
1869 max = (U8)d[1]; /* last char in range */
1870#ifdef EBCDIC
1871 }
1872#endif
8ada0baa 1873
c2e66d9e 1874 if (min > max) {
01ec43d0 1875 Perl_croak(aTHX_
d1573ac7 1876 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1877 (char)min, (char)max);
c2e66d9e
GS
1878 }
1879
c7f1f016 1880#ifdef EBCDIC
4c3a8340
ST
1881 if (literal_endpoint == 2 &&
1882 ((isLOWER(min) && isLOWER(max)) ||
1883 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1884 if (isLOWER(min)) {
1885 for (i = min; i <= max; i++)
1886 if (isLOWER(i))
db42d148 1887 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1888 } else {
1889 for (i = min; i <= max; i++)
1890 if (isUPPER(i))
db42d148 1891 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1892 }
1893 }
1894 else
1895#endif
1896 for (i = min; i <= max; i++)
e294cc5d
JH
1897#ifdef EBCDIC
1898 if (has_utf8) {
1899 const U8 ch = (U8)NATIVE_TO_UTF(i);
1900 if (UNI_IS_INVARIANT(ch))
1901 *d++ = (U8)i;
1902 else {
1903 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1904 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1905 }
1906 }
1907 else
1908#endif
1909 *d++ = (char)i;
1910
1911#ifdef EBCDIC
1912 if (uvmax) {
1913 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1914 if (uvmax > 0x101)
1915 *d++ = (char)UTF_TO_NATIVE(0xff);
1916 if (uvmax > 0x100)
1917 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1918 }
1919#endif
02aa26ce
NT
1920
1921 /* mark the range as done, and continue */
79072805 1922 dorange = FALSE;
01ec43d0 1923 didrange = TRUE;
4c3a8340
ST
1924#ifdef EBCDIC
1925 literal_endpoint = 0;
1926#endif
79072805 1927 continue;
4e553d73 1928 }
02aa26ce
NT
1929
1930 /* range begins (ignore - as first or last char) */
79072805 1931 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1932 if (didrange) {
1fafa243 1933 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1934 }
e294cc5d
JH
1935 if (has_utf8
1936#ifdef EBCDIC
1937 && !native_range
1938#endif
1939 ) {
25716404 1940 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1941 s++;
1942 continue;
1943 }
79072805
LW
1944 dorange = TRUE;
1945 s++;
01ec43d0
GS
1946 }
1947 else {
1948 didrange = FALSE;
4c3a8340
ST
1949#ifdef EBCDIC
1950 literal_endpoint = 0;
e294cc5d 1951 native_range = TRUE;
4c3a8340 1952#endif
01ec43d0 1953 }
79072805 1954 }
02aa26ce
NT
1955
1956 /* if we get here, we're not doing a transliteration */
1957
0f5d15d6
IZ
1958 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1959 except for the last char, which will be done separately. */
3280af22 1960 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1961 if (s[2] == '#') {
e994fd66 1962 while (s+1 < send && *s != ')')
db42d148 1963 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1964 }
1965 else if (s[2] == '{' /* This should match regcomp.c */
1966 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1967 {
cc6b7395 1968 I32 count = 1;
0f5d15d6 1969 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1970 char c;
1971
d9f97599
GS
1972 while (count && (c = *regparse)) {
1973 if (c == '\\' && regparse[1])
1974 regparse++;
4e553d73 1975 else if (c == '{')
cc6b7395 1976 count++;
4e553d73 1977 else if (c == '}')
cc6b7395 1978 count--;
d9f97599 1979 regparse++;
cc6b7395 1980 }
e994fd66 1981 if (*regparse != ')')
5bdf89e7 1982 regparse--; /* Leave one char for continuation. */
0f5d15d6 1983 while (s < regparse)
db42d148 1984 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1985 }
748a9306 1986 }
02aa26ce
NT
1987
1988 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1989 else if (*s == '#' && PL_lex_inpat &&
1990 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1991 while (s+1 < send && *s != '\n')
db42d148 1992 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1993 }
02aa26ce 1994
5d1d4326 1995 /* check for embedded arrays
da6eedaa 1996 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1997 */
1749ea0d
ST
1998 else if (*s == '@' && s[1]) {
1999 if (isALNUM_lazy_if(s+1,UTF))
2000 break;
2001 if (strchr(":'{$", s[1]))
2002 break;
2003 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2004 break; /* in regexp, neither @+ nor @- are interpolated */
2005 }
02aa26ce
NT
2006
2007 /* check for embedded scalars. only stop if we're sure it's a
2008 variable.
2009 */
79072805 2010 else if (*s == '$') {
3280af22 2011 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2012 break;
6002328a 2013 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2014 break; /* in regexp, $ might be tail anchor */
2015 }
02aa26ce 2016
2b9d42f0
NIS
2017 /* End of else if chain - OP_TRANS rejoin rest */
2018
02aa26ce 2019 /* backslashes */
79072805
LW
2020 if (*s == '\\' && s+1 < send) {
2021 s++;
02aa26ce
NT
2022
2023 /* some backslashes we leave behind */
c9f97d15 2024 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
2025 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2026 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
2027 continue;
2028 }
02aa26ce
NT
2029
2030 /* deprecate \1 in strings and substitution replacements */
3280af22 2031 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2032 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2033 {
599cee73 2034 if (ckWARN(WARN_SYNTAX))
9014280d 2035 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2036 *--s = '$';
2037 break;
2038 }
02aa26ce
NT
2039
2040 /* string-change backslash escapes */
3280af22 2041 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2042 --s;
2043 break;
2044 }
02aa26ce
NT
2045
2046 /* if we get here, it's either a quoted -, or a digit */
79072805 2047 switch (*s) {
02aa26ce
NT
2048
2049 /* quoted - in transliterations */
79072805 2050 case '-':
3280af22 2051 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2052 *d++ = *s++;
2053 continue;
2054 }
2055 /* FALL THROUGH */
2056 default:
11b8faa4 2057 {
86f97054 2058 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2059 ckWARN(WARN_MISC))
9014280d 2060 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2061 "Unrecognized escape \\%c passed through",
2062 *s);
11b8faa4 2063 /* default action is to copy the quoted character */
f9a63242 2064 goto default_action;
11b8faa4 2065 }
02aa26ce
NT
2066
2067 /* \132 indicates an octal constant */
79072805
LW
2068 case '0': case '1': case '2': case '3':
2069 case '4': case '5': case '6': case '7':
ba210ebe 2070 {
53305cf1
NC
2071 I32 flags = 0;
2072 STRLEN len = 3;
2073 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2074 s += len;
2075 }
012bcf8d 2076 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2077
2078 /* \x24 indicates a hex constant */
79072805 2079 case 'x':
a0ed51b3
LW
2080 ++s;
2081 if (*s == '{') {
9d4ba2ae 2082 char* const e = strchr(s, '}');
a4c04bdc
NC
2083 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2084 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2085 STRLEN len;
355860ce 2086
53305cf1 2087 ++s;
adaeee49 2088 if (!e) {
a0ed51b3 2089 yyerror("Missing right brace on \\x{}");
355860ce 2090 continue;
ba210ebe 2091 }
53305cf1
NC
2092 len = e - s;
2093 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2094 s = e + 1;
a0ed51b3
LW
2095 }
2096 else {
ba210ebe 2097 {
53305cf1 2098 STRLEN len = 2;
a4c04bdc 2099 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2100 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2101 s += len;
2102 }
012bcf8d
GS
2103 }
2104
2105 NUM_ESCAPE_INSERT:
2106 /* Insert oct or hex escaped character.
301d3d20 2107 * There will always enough room in sv since such
db42d148 2108 * escapes will be longer than any UTF-8 sequence
301d3d20 2109 * they can end up as. */
ba7cea30 2110
c7f1f016
NIS
2111 /* We need to map to chars to ASCII before doing the tests
2112 to cover EBCDIC
2113 */
c4d5f83a 2114 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2115 if (!has_utf8 && uv > 255) {
301d3d20
JH
2116 /* Might need to recode whatever we have
2117 * accumulated so far if it contains any
2118 * hibit chars.
2119 *
2120 * (Can't we keep track of that and avoid
2121 * this rescan? --jhi)
012bcf8d 2122 */
c7f1f016 2123 int hicount = 0;
63cd0674
NIS
2124 U8 *c;
2125 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2126 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2127 hicount++;
db42d148 2128 }
012bcf8d 2129 }
63cd0674 2130 if (hicount) {
9d4ba2ae 2131 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2132 U8 *src, *dst;
2133 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2134 src = (U8 *)d - 1;
2135 dst = src+hicount;
2136 d += hicount;
cfd0369c 2137 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2138 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2139 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2140 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2141 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2142 }
2143 else {
63cd0674 2144 *dst-- = *src;
012bcf8d 2145 }
c7f1f016 2146 src--;
012bcf8d
GS
2147 }
2148 }
2149 }
2150
9aa983d2 2151 if (has_utf8 || uv > 255) {
9041c2e3 2152 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2153 has_utf8 = TRUE;
f9a63242
JH
2154 if (PL_lex_inwhat == OP_TRANS &&
2155 PL_sublex_info.sub_op) {
2156 PL_sublex_info.sub_op->op_private |=
2157 (PL_lex_repl ? OPpTRANS_FROM_UTF
2158 : OPpTRANS_TO_UTF);
f9a63242 2159 }
e294cc5d
JH
2160#ifdef EBCDIC
2161 if (uv > 255 && !dorange)
2162 native_range = FALSE;
2163#endif
012bcf8d 2164 }
a0ed51b3 2165 else {
012bcf8d 2166 *d++ = (char)uv;
a0ed51b3 2167 }
012bcf8d
GS
2168 }
2169 else {
c4d5f83a 2170 *d++ = (char) uv;
a0ed51b3 2171 }
79072805 2172 continue;
02aa26ce 2173
b239daa5 2174 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2175 case 'N':
55eda711 2176 ++s;
423cee85
JH
2177 if (*s == '{') {
2178 char* e = strchr(s, '}');
155aba94 2179 SV *res;
423cee85 2180 STRLEN len;
cfd0369c 2181 const char *str;
fc8cd66c 2182 SV *type;
4e553d73 2183
423cee85 2184 if (!e) {
5777a3f7 2185 yyerror("Missing right brace on \\N{}");
423cee85
JH
2186 e = s - 1;
2187 goto cont_scan;
2188 }
dbc0d4f2
JH
2189 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2190 /* \N{U+...} */
2191 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2192 PERL_SCAN_DISALLOW_PREFIX;
2193 s += 3;
2194 len = e - s;
2195 uv = grok_hex(s, &len, &flags, NULL);
fc8cd66c
YO
2196 if ( len != e - s ) {
2197 uv=0xFFFD;
2198 }
dbc0d4f2
JH
2199 s = e + 1;
2200 goto NUM_ESCAPE_INSERT;
2201 }
55eda711 2202 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2203 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2204 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2205 res, NULL, SvPVX(type) );
2206 SvREFCNT_dec(type);
f9a63242
JH
2207 if (has_utf8)
2208 sv_utf8_upgrade(res);
cfd0369c 2209 str = SvPV_const(res,len);
1c47067b
JH
2210#ifdef EBCDIC_NEVER_MIND
2211 /* charnames uses pack U and that has been
2212 * recently changed to do the below uni->native
2213 * mapping, so this would be redundant (and wrong,
2214 * the code point would be doubly converted).
2215 * But leave this in just in case the pack U change
2216 * gets revoked, but the semantics is still
2217 * desireable for charnames. --jhi */
cddc7ef4 2218 {
cfd0369c 2219 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2220
2221 if (uv < 0x100) {
89ebb4a3 2222 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2223
2224 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2225 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2226 str = SvPV_const(res, len);
cddc7ef4
JH
2227 }
2228 }
2229#endif
89491803 2230 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2231 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2232 SvCUR_set(sv, d - ostart);
2233 SvPOK_on(sv);
e4f3eed8 2234 *d = '\0';
f08d6ad9 2235 sv_utf8_upgrade(sv);
d2f449dd 2236 /* this just broke our allocation above... */
eb160463 2237 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2238 d = SvPVX(sv) + SvCUR(sv);
89491803 2239 has_utf8 = TRUE;
f08d6ad9 2240 }
eb160463 2241 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2242 const char * const odest = SvPVX_const(sv);
423cee85 2243
8973db79 2244 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2245 d = SvPVX(sv) + (d - odest);
2246 }
e294cc5d
JH
2247#ifdef EBCDIC
2248 if (!dorange)
2249 native_range = FALSE; /* \N{} is guessed to be Unicode */
2250#endif
423cee85
JH
2251 Copy(str, d, len, char);
2252 d += len;
2253 SvREFCNT_dec(res);
2254 cont_scan:
2255 s = e + 1;
2256 }
2257 else
5777a3f7 2258 yyerror("Missing braces on \\N{}");
423cee85
JH
2259 continue;
2260
02aa26ce 2261 /* \c is a control character */
79072805
LW
2262 case 'c':
2263 s++;
961ce445 2264 if (s < send) {
ba210ebe 2265 U8 c = *s++;
c7f1f016
NIS
2266#ifdef EBCDIC
2267 if (isLOWER(c))
2268 c = toUPPER(c);
2269#endif
db42d148 2270 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2271 }
961ce445
RGS
2272 else {
2273 yyerror("Missing control char name in \\c");
2274 }
79072805 2275 continue;
02aa26ce
NT
2276
2277 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2278 case 'b':
db42d148 2279 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2280 break;
2281 case 'n':
db42d148 2282 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2283 break;
2284 case 'r':
db42d148 2285 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2286 break;
2287 case 'f':
db42d148 2288 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2289 break;
2290 case 't':
db42d148 2291 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2292 break;
34a3fe2a 2293 case 'e':
db42d148 2294 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2295 break;
2296 case 'a':
db42d148 2297 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2298 break;
02aa26ce
NT
2299 } /* end switch */
2300
79072805
LW
2301 s++;
2302 continue;
02aa26ce 2303 } /* end if (backslash) */
4c3a8340
ST
2304#ifdef EBCDIC
2305 else
2306 literal_endpoint++;
2307#endif
02aa26ce 2308
f9a63242 2309 default_action:
2b9d42f0
NIS
2310 /* If we started with encoded form, or already know we want it
2311 and then encode the next character */
2312 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2313 STRLEN len = 1;
5f66b61c
AL
2314 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2315 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2316 s += len;
2317 if (need > len) {
2318 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2319 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2320 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2321 }
5f66b61c 2322 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2323 has_utf8 = TRUE;
e294cc5d
JH
2324#ifdef EBCDIC
2325 if (uv > 255 && !dorange)
2326 native_range = FALSE;
2327#endif
2b9d42f0
NIS
2328 }
2329 else {
2330 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2331 }
02aa26ce
NT
2332 } /* while loop to process each character */
2333
2334 /* terminate the string and set up the sv */
79072805 2335 *d = '\0';
95a20fc0 2336 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2337 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2338 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2339
79072805 2340 SvPOK_on(sv);
9f4817db 2341 if (PL_encoding && !has_utf8) {
d0063567
DK
2342 sv_recode_to_utf8(sv, PL_encoding);
2343 if (SvUTF8(sv))
2344 has_utf8 = TRUE;
9f4817db 2345 }
2b9d42f0 2346 if (has_utf8) {
7e2040f0 2347 SvUTF8_on(sv);
2b9d42f0 2348 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2349 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2350 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2351 }
2352 }
79072805 2353
02aa26ce 2354 /* shrink the sv if we allocated more than we used */
79072805 2355 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2356 SvPV_shrink_to_cur(sv);
79072805 2357 }
02aa26ce 2358
9b599b2a 2359 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2360 if (s > PL_bufptr) {
2361 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2362 sv = new_constant(start, s - start,
2363 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2364 sv, NULL,
10edeb5d
JH
2365 (const char *)
2366 (( PL_lex_inwhat == OP_TRANS
2367 ? "tr"
2368 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2369 ? "s"
2370 : "qq"))));
79072805 2371 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2372 } else
8990e307 2373 SvREFCNT_dec(sv);
79072805
LW
2374 return s;
2375}
2376
ffb4593c
NT
2377/* S_intuit_more
2378 * Returns TRUE if there's more to the expression (e.g., a subscript),
2379 * FALSE otherwise.
ffb4593c
NT
2380 *
2381 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2382 *
2383 * ->[ and ->{ return TRUE
2384 * { and [ outside a pattern are always subscripts, so return TRUE
2385 * if we're outside a pattern and it's not { or [, then return FALSE
2386 * if we're in a pattern and the first char is a {
2387 * {4,5} (any digits around the comma) returns FALSE
2388 * if we're in a pattern and the first char is a [
2389 * [] returns FALSE
2390 * [SOMETHING] has a funky algorithm to decide whether it's a
2391 * character class or not. It has to deal with things like
2392 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2393 * anything else returns TRUE
2394 */
2395
9cbb5ea2
GS
2396/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2397
76e3520e 2398STATIC int
cea2e8a9 2399S_intuit_more(pTHX_ register char *s)
79072805 2400{
97aff369 2401 dVAR;
3280af22 2402 if (PL_lex_brackets)
79072805
LW
2403 return TRUE;
2404 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2405 return TRUE;
2406 if (*s != '{' && *s != '[')
2407 return FALSE;
3280af22 2408 if (!PL_lex_inpat)
79072805
LW
2409 return TRUE;
2410
2411 /* In a pattern, so maybe we have {n,m}. */
2412 if (*s == '{') {
2413 s++;
2414 if (!isDIGIT(*s))
2415 return TRUE;
2416 while (isDIGIT(*s))
2417 s++;
2418 if (*s == ',')
2419 s++;
2420 while (isDIGIT(*s))
2421 s++;
2422 if (*s == '}')
2423 return FALSE;
2424 return TRUE;
2425
2426 }
2427
2428 /* On the other hand, maybe we have a character class */
2429
2430 s++;
2431 if (*s == ']' || *s == '^')
2432 return FALSE;
2433 else {
ffb4593c 2434 /* this is terrifying, and it works */
79072805
LW
2435 int weight = 2; /* let's weigh the evidence */
2436 char seen[256];
f27ffc4a 2437 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2438 const char * const send = strchr(s,']');
3280af22 2439 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2440
2441 if (!send) /* has to be an expression */
2442 return TRUE;
2443
2444 Zero(seen,256,char);
2445 if (*s == '$')
2446 weight -= 3;
2447 else if (isDIGIT(*s)) {
2448 if (s[1] != ']') {
2449 if (isDIGIT(s[1]) && s[2] == ']')
2450 weight -= 10;
2451 }
2452 else
2453 weight -= 100;
2454 }
2455 for (; s < send; s++) {
2456 last_un_char = un_char;
2457 un_char = (unsigned char)*s;
2458 switch (*s) {
2459 case '@':
2460 case '&':
2461 case '$':
2462 weight -= seen[un_char] * 10;
7e2040f0 2463 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2464 int len;
8903cb82 2465 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2466 len = (int)strlen(tmpbuf);
2467 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2468 weight -= 100;
2469 else
2470 weight -= 10;
2471 }
2472 else if (*s == '$' && s[1] &&
93a17b20
LW
2473 strchr("[#!%*<>()-=",s[1])) {
2474 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2475 weight -= 10;
2476 else
2477 weight -= 1;
2478 }
2479 break;
2480 case '\\':
2481 un_char = 254;
2482 if (s[1]) {
93a17b20 2483 if (strchr("wds]",s[1]))
79072805 2484 weight += 100;
10edeb5d 2485 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2486 weight += 1;
93a17b20 2487 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2488 weight += 40;
2489 else if (isDIGIT(s[1])) {
2490 weight += 40;
2491 while (s[1] && isDIGIT(s[1]))
2492 s++;
2493 }
2494 }
2495 else
2496 weight += 100;
2497 break;
2498 case '-':
2499 if (s[1] == '\\')
2500 weight += 50;
93a17b20 2501 if (strchr("aA01! ",last_un_char))
79072805 2502 weight += 30;
93a17b20 2503 if (strchr("zZ79~",s[1]))
79072805 2504 weight += 30;
f27ffc4a
GS
2505 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2506 weight -= 5; /* cope with negative subscript */
79072805
LW
2507 break;
2508 default:
3792a11b
NC
2509 if (!isALNUM(last_un_char)
2510 && !(last_un_char == '$' || last_un_char == '@'
2511 || last_un_char == '&')
2512 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2513 char *d = tmpbuf;
2514 while (isALPHA(*s))
2515 *d++ = *s++;
2516 *d = '\0';
2517 if (keyword(tmpbuf, d - tmpbuf))
2518 weight -= 150;
2519 }
2520 if (un_char == last_un_char + 1)
2521 weight += 5;
2522 weight -= seen[un_char];
2523 break;
2524 }
2525 seen[un_char]++;
2526 }
2527 if (weight >= 0) /* probably a character class */
2528 return FALSE;
2529 }
2530
2531 return TRUE;
2532}
ffed7fef 2533
ffb4593c
NT
2534/*
2535 * S_intuit_method
2536 *
2537 * Does all the checking to disambiguate
2538 * foo bar
2539 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2540 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2541 *
2542 * First argument is the stuff after the first token, e.g. "bar".
2543 *
2544 * Not a method if bar is a filehandle.
2545 * Not a method if foo is a subroutine prototyped to take a filehandle.
2546 * Not a method if it's really "Foo $bar"
2547 * Method if it's "foo $bar"
2548 * Not a method if it's really "print foo $bar"
2549 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2550 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2551 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2552 * =>
2553 */
2554
76e3520e 2555STATIC int
62d55b22 2556S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2557{
97aff369 2558 dVAR;
a0d0e21e 2559 char *s = start + (*start == '$');
3280af22 2560 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2561 STRLEN len;
2562 GV* indirgv;
5db06880
NC
2563#ifdef PERL_MAD
2564 int soff;
2565#endif
a0d0e21e
LW
2566
2567 if (gv) {
62d55b22 2568 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2569 return 0;
62d55b22
NC
2570 if (cv) {
2571 if (SvPOK(cv)) {
2572 const char *proto = SvPVX_const(cv);
2573 if (proto) {
2574 if (*proto == ';')
2575 proto++;
2576 if (*proto == '*')
2577 return 0;
2578 }
b6c543e3
IZ
2579 }
2580 } else
c35e046a 2581 gv = NULL;
a0d0e21e 2582 }
8903cb82 2583 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2584 /* start is the beginning of the possible filehandle/object,
2585 * and s is the end of it
2586 * tmpbuf is a copy of it
2587 */
2588
a0d0e21e 2589 if (*start == '$') {
3280af22 2590 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2591 return 0;
5db06880
NC
2592#ifdef PERL_MAD
2593 len = start - SvPVX(PL_linestr);
2594#endif
29595ff2 2595 s = PEEKSPACE(s);
f0092767 2596#ifdef PERL_MAD
5db06880
NC
2597 start = SvPVX(PL_linestr) + len;
2598#endif
3280af22
NIS
2599 PL_bufptr = start;
2600 PL_expect = XREF;
a0d0e21e
LW
2601 return *s == '(' ? FUNCMETH : METHOD;
2602 }
2603 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2604 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2605 len -= 2;
2606 tmpbuf[len] = '\0';
5db06880
NC
2607#ifdef PERL_MAD
2608 soff = s - SvPVX(PL_linestr);
2609#endif
c3e0f903
GS
2610 goto bare_package;
2611 }
90e5519e 2612 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2613 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2614 return 0;
2615 /* filehandle or package name makes it a method */
89bfa8cd 2616 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2617#ifdef PERL_MAD
2618 soff = s - SvPVX(PL_linestr);
2619#endif
29595ff2 2620 s = PEEKSPACE(s);
3280af22 2621 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2622 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2623 bare_package:
cd81e915 2624 start_force(PL_curforce);
9ded7720 2625 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2626 newSVpvn(tmpbuf,len));
9ded7720 2627 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2628 if (PL_madskills)
2629 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2630 PL_expect = XTERM;
a0d0e21e 2631 force_next(WORD);
3280af22 2632 PL_bufptr = s;
5db06880
NC
2633#ifdef PERL_MAD
2634 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2635#endif
a0d0e21e
LW
2636 return *s == '(' ? FUNCMETH : METHOD;
2637 }
2638 }
2639 return 0;
2640}
2641
ffb4593c
NT
2642/*
2643 * S_incl_perldb
2644 * Return a string of Perl code to load the debugger. If PERL5DB
2645 * is set, it will return the contents of that, otherwise a
2646 * compile-time require of perl5db.pl.
2647 */
2648
bfed75c6 2649STATIC const char*
cea2e8a9 2650S_incl_perldb(pTHX)
a0d0e21e 2651{
97aff369 2652 dVAR;
3280af22 2653 if (PL_perldb) {
9d4ba2ae 2654 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2655
2656 if (pdb)
2657 return pdb;
93189314 2658 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2659 return "BEGIN { require 'perl5db.pl' }";
2660 }
2661 return "";
2662}
2663
2664
16d20bd9 2665/* Encoded script support. filter_add() effectively inserts a
4e553d73 2666 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2667 * Note that the filter function only applies to the current source file
2668 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2669 *
2670 * The datasv parameter (which may be NULL) can be used to pass
2671 * private data to this instance of the filter. The filter function
2672 * can recover the SV using the FILTER_DATA macro and use it to
2673 * store private buffers and state information.
2674 *
2675 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2676 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2677 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2678 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2679 * private use must be set using malloc'd pointers.
2680 */
16d20bd9
AD
2681
2682SV *
864dbfa3 2683Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2684{
97aff369 2685 dVAR;
f4c556ac 2686 if (!funcp)
a0714e2c 2687 return NULL;
f4c556ac 2688
3280af22
NIS
2689 if (!PL_rsfp_filters)
2690 PL_rsfp_filters = newAV();
16d20bd9 2691 if (!datasv)
561b68a9 2692 datasv = newSV(0);
862a34c6 2693 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2694 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2695 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2696 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2697 FPTR2DPTR(void *, IoANY(datasv)),
2698 SvPV_nolen(datasv)));
3280af22
NIS
2699 av_unshift(PL_rsfp_filters, 1);
2700 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2701 return(datasv);
2702}
4e553d73 2703
16d20bd9
AD
2704
2705/* Delete most recently added instance of this filter function. */
a0d0e21e 2706void
864dbfa3 2707Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2708{
97aff369 2709 dVAR;
e0c19803 2710 SV *datasv;
24801a4b 2711
33073adb 2712#ifdef DEBUGGING
55662e27
JH
2713 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2714 FPTR2DPTR(void*, funcp)));
33073adb 2715#endif
3280af22 2716 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2717 return;
2718 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2719 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2720 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2721 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2722 IoANY(datasv) = (void *)NULL;
3280af22 2723 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2724
16d20bd9
AD
2725 return;
2726 }
2727 /* we need to search for the correct entry and clear it */
cea2e8a9 2728 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2729}
2730
2731
1de9afcd
RGS
2732/* Invoke the idxth filter function for the current rsfp. */
2733/* maxlen 0 = read one text line */
16d20bd9 2734I32
864dbfa3 2735Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2736{
97aff369 2737 dVAR;
16d20bd9
AD
2738 filter_t funcp;
2739 SV *datasv = NULL;
f482118e
NC
2740 /* This API is bad. It should have been using unsigned int for maxlen.
2741 Not sure if we want to change the API, but if not we should sanity
2742 check the value here. */
39cd7a59
NC
2743 const unsigned int correct_length
2744 = maxlen < 0 ?
2745#ifdef PERL_MICRO
2746 0x7FFFFFFF
2747#else
2748 INT_MAX
2749#endif
2750 : maxlen;
e50aee73 2751
3280af22 2752 if (!PL_rsfp_filters)
16d20bd9 2753 return -1;
1de9afcd 2754 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2755 /* Provide a default input filter to make life easy. */
2756 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2757 DEBUG_P(PerlIO_printf(Perl_debug_log,
2758 "filter_read %d: from rsfp\n", idx));
f482118e 2759 if (correct_length) {
16d20bd9
AD
2760 /* Want a block */
2761 int len ;
f54cb97a 2762 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2763
2764 /* ensure buf_sv is large enough */
f482118e
NC
2765 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2766 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2767 correct_length)) <= 0) {
3280af22 2768 if (PerlIO_error(PL_rsfp))
37120919
AD
2769 return -1; /* error */
2770 else
2771 return 0 ; /* end of file */
2772 }
16d20bd9
AD
2773 SvCUR_set(buf_sv, old_len + len) ;
2774 } else {
2775 /* Want a line */
3280af22
NIS
2776 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2777 if (PerlIO_error(PL_rsfp))
37120919
AD
2778 return -1; /* error */
2779 else
2780 return 0 ; /* end of file */
2781 }
16d20bd9
AD
2782 }
2783 return SvCUR(buf_sv);
2784 }
2785 /* Skip this filter slot if filter has been deleted */
1de9afcd 2786 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2787 DEBUG_P(PerlIO_printf(Perl_debug_log,
2788 "filter_read %d: skipped (filter deleted)\n",
2789 idx));
f482118e 2790 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2791 }
2792 /* Get function pointer hidden within datasv */
8141890a 2793 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2794 DEBUG_P(PerlIO_printf(Perl_debug_log,
2795 "filter_read %d: via function %p (%s)\n",
ca0270c4 2796 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2797 /* Call function. The function is expected to */
2798 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2799 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2800 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2801}
2802
76e3520e 2803STATIC char *
cea2e8a9 2804S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2805{
97aff369 2806 dVAR;
c39cd008 2807#ifdef PERL_CR_FILTER
3280af22 2808 if (!PL_rsfp_filters) {
c39cd008 2809 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2810 }
2811#endif
3280af22 2812 if (PL_rsfp_filters) {
55497cff
PP
2813 if (!append)
2814 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2815 if (FILTER_READ(0, sv, 0) > 0)
2816 return ( SvPVX(sv) ) ;
2817 else
bd61b366 2818 return NULL ;
16d20bd9 2819 }
9d116dd7 2820 else
fd049845 2821 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2822}
2823
01ec43d0 2824STATIC HV *
7fc63493 2825S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2826{
97aff369 2827 dVAR;
def3634b
GS
2828 GV *gv;
2829
01ec43d0 2830 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2831 return PL_curstash;
2832
2833 if (len > 2 &&
2834 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2835 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2836 {
2837 return GvHV(gv); /* Foo:: */
def3634b
GS
2838 }
2839
2840 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2841 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2842 if (gv && GvCV(gv)) {
2843 SV * const sv = cv_const_sv(GvCV(gv));
2844 if (sv)
83003860 2845 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2846 }
2847
2848 return gv_stashpv(pkgname, FALSE);
2849}
a0d0e21e 2850
5db06880
NC
2851#ifdef PERL_MAD
2852 /*
2853 * Perl_madlex
2854 * The intent of this yylex wrapper is to minimize the changes to the
2855 * tokener when we aren't interested in collecting madprops. It remains
2856 * to be seen how successful this strategy will be...
2857 */
2858
2859int
2860Perl_madlex(pTHX)
2861{
2862 int optype;
2863 char *s = PL_bufptr;
2864
cd81e915
NC
2865 /* make sure PL_thiswhite is initialized */
2866 PL_thiswhite = 0;
2867 PL_thismad = 0;
5db06880 2868
cd81e915 2869 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2870 if (PL_pending_ident)
2871 return S_pending_ident(aTHX);
2872
2873 /* previous token ate up our whitespace? */
cd81e915
NC
2874 if (!PL_lasttoke && PL_nextwhite) {
2875 PL_thiswhite = PL_nextwhite;
2876 PL_nextwhite = 0;
5db06880
NC
2877 }
2878
2879 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2880 PL_realtokenstart = -1;
2881 PL_thistoken = 0;
5db06880
NC
2882 optype = yylex();
2883 s = PL_bufptr;
cd81e915 2884 assert(PL_curforce < 0);
5db06880 2885
cd81e915
NC
2886 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2887 if (!PL_thistoken) {
2888 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2889 PL_thistoken = newSVpvn("",0);
5db06880 2890 else {
c35e046a 2891 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2892 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2893 }
2894 }
cd81e915
NC
2895 if (PL_thismad) /* install head */
2896 CURMAD('X', PL_thistoken);
5db06880
NC
2897 }
2898
2899 /* last whitespace of a sublex? */
cd81e915
NC
2900 if (optype == ')' && PL_endwhite) {
2901 CURMAD('X', PL_endwhite);
5db06880
NC
2902 }
2903
cd81e915 2904 if (!PL_thismad) {
5db06880
NC
2905
2906 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2907 if (!PL_thiswhite && !PL_endwhite && !optype) {
2908 sv_free(PL_thistoken);
2909 PL_thistoken = 0;
5db06880
NC
2910 return 0;
2911 }
2912
2913 /* put off final whitespace till peg */
2914 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2915 PL_nextwhite = PL_thiswhite;
2916 PL_thiswhite = 0;
5db06880 2917 }
cd81e915
NC
2918 else if (PL_thisopen) {
2919 CURMAD('q', PL_thisopen);
2920 if (PL_thistoken)
2921 sv_free(PL_thistoken);
2922 PL_thistoken = 0;
5db06880
NC
2923 }
2924 else {
2925 /* Store actual token text as madprop X */
cd81e915 2926 CURMAD('X', PL_thistoken);
5db06880
NC
2927 }
2928
cd81e915 2929 if (PL_thiswhite) {
5db06880 2930 /* add preceding whitespace as madprop _ */
cd81e915 2931 CURMAD('_', PL_thiswhite);
5db06880
NC
2932 }
2933
cd81e915 2934 if (PL_thisstuff) {
5db06880 2935 /* add quoted material as madprop = */
cd81e915 2936 CURMAD('=', PL_thisstuff);
5db06880
NC
2937 }
2938
cd81e915 2939 if (PL_thisclose) {
5db06880 2940 /* add terminating quote as madprop Q */
cd81e915 2941 CURMAD('Q', PL_thisclose);
5db06880
NC
2942 }
2943 }
2944
2945 /* special processing based on optype */
2946
2947 switch (optype) {
2948
2949 /* opval doesn't need a TOKEN since it can already store mp */
2950 case WORD:
2951 case METHOD:
2952 case FUNCMETH:
2953 case THING:
2954 case PMFUNC:
2955 case PRIVATEREF:
2956 case FUNC0SUB:
2957 case UNIOPSUB:
2958 case LSTOPSUB:
2959 if (yylval.opval)
cd81e915
NC
2960 append_madprops(PL_thismad, yylval.opval, 0);
2961 PL_thismad = 0;
5db06880
NC
2962 return optype;
2963
2964 /* fake EOF */
2965 case 0:
2966 optype = PEG;
cd81e915
NC
2967 if (PL_endwhite) {
2968 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2969 PL_endwhite = 0;
5db06880
NC
2970 }
2971 break;
2972
2973 case ']':
2974 case '}':
cd81e915 2975 if (PL_faketokens)
5db06880
NC
2976 break;
2977 /* remember any fake bracket that lexer is about to discard */
2978 if (PL_lex_brackets == 1 &&
2979 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2980 {
2981 s = PL_bufptr;
2982 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2983 s++;
2984 if (*s == '}') {
cd81e915
NC
2985 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2986 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2987 PL_thiswhite = 0;
5db06880
NC
2988 PL_bufptr = s - 1;
2989 break; /* don't bother looking for trailing comment */
2990 }
2991 else
2992 s = PL_bufptr;
2993 }
2994 if (optype == ']')
2995 break;
2996 /* FALLTHROUGH */
2997
2998 /* attach a trailing comment to its statement instead of next token */
2999 case ';':
cd81e915 3000 if (PL_faketokens)
5db06880
NC
3001 break;
3002 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3003 s = PL_bufptr;
3004 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3005 s++;
3006 if (*s == '\n' || *s == '#') {
3007 while (s < PL_bufend && *s != '\n')
3008 s++;
3009 if (s < PL_bufend)
3010 s++;
cd81e915
NC
3011 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3012 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3013 PL_thiswhite = 0;
5db06880
NC
3014 PL_bufptr = s;
3015 }
3016 }
3017 break;
3018
3019 /* pval */
3020 case LABEL:
3021 break;
3022
3023 /* ival */
3024 default:
3025 break;
3026
3027 }
3028
3029 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3030 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3031 PL_thismad = 0;
5db06880
NC
3032 return optype;
3033}
3034#endif
3035
468aa647 3036STATIC char *
cc6ed77d 3037S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3038 dVAR;
468aa647
RGS
3039 if (PL_expect != XSTATE)
3040 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3041 is_use ? "use" : "no"));
29595ff2 3042 s = SKIPSPACE1(s);
468aa647
RGS
3043 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3044 s = force_version(s, TRUE);
29595ff2 3045 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3046 start_force(PL_curforce);
9ded7720 3047 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3048 force_next(WORD);
3049 }
3050 else if (*s == 'v') {
3051 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3052 s = force_version(s, FALSE);
3053 }
3054 }
3055 else {
3056 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3057 s = force_version(s, FALSE);
3058 }
3059 yylval.ival = is_use;
3060 return s;
3061}
748a9306 3062#ifdef DEBUGGING
27da23d5 3063 static const char* const exp_name[] =
09bef843 3064 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3065 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3066 };
748a9306 3067#endif
463ee0b2 3068
02aa26ce
NT
3069/*
3070 yylex
3071
3072 Works out what to call the token just pulled out of the input
3073 stream. The yacc parser takes care of taking the ops we return and
3074 stitching them into a tree.
3075
3076 Returns:
3077 PRIVATEREF
3078
3079 Structure:
3080 if read an identifier
3081 if we're in a my declaration
3082 croak if they tried to say my($foo::bar)
3083 build the ops for a my() declaration
3084 if it's an access to a my() variable
3085 are we in a sort block?
3086 croak if my($a); $a <=> $b
3087 build ops for access to a my() variable
3088 if in a dq string, and they've said @foo and we can't find @foo
3089 croak
3090 build ops for a bareword
3091 if we already built the token before, use it.
3092*/
3093
20141f0e 3094
dba4d153
JH
3095#ifdef __SC__
3096#pragma segment Perl_yylex
3097#endif
dba4d153 3098int
dba4d153 3099Perl_yylex(pTHX)
20141f0e 3100{
97aff369 3101 dVAR;
3afc138a 3102 register char *s = PL_bufptr;
378cc40b 3103 register char *d;
463ee0b2 3104 STRLEN len;
aa7440fb 3105 bool bof = FALSE;
a687059c 3106
10edeb5d
JH
3107 /* orig_keyword, gvp, and gv are initialized here because
3108 * jump to the label just_a_word_zero can bypass their
3109 * initialization later. */
3110 I32 orig_keyword = 0;
3111 GV *gv = NULL;
3112 GV **gvp = NULL;
3113
bbf60fe6 3114 DEBUG_T( {
396482e1 3115 SV* tmp = newSVpvs("");
b6007c36
DM
3116 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3117 (IV)CopLINE(PL_curcop),
3118 lex_state_names[PL_lex_state],
3119 exp_name[PL_expect],
3120 pv_display(tmp, s, strlen(s), 0, 60));
3121 SvREFCNT_dec(tmp);
bbf60fe6 3122 } );
02aa26ce 3123 /* check if there's an identifier for us to look at */
ba979b31 3124 if (PL_pending_ident)
bbf60fe6 3125 return REPORT(S_pending_ident(aTHX));
bbce6d69 3126
02aa26ce
NT
3127 /* no identifier pending identification */
3128
3280af22 3129 switch (PL_lex_state) {
79072805
LW
3130#ifdef COMMENTARY
3131 case LEX_NORMAL: /* Some compilers will produce faster */
3132 case LEX_INTERPNORMAL: /* code if we comment these out. */
3133 break;
3134#endif
3135
09bef843 3136 /* when we've already built the next token, just pull it out of the queue */
79072805 3137 case LEX_KNOWNEXT:
5db06880
NC
3138#ifdef PERL_MAD
3139 PL_lasttoke--;
3140 yylval = PL_nexttoke[PL_lasttoke].next_val;
3141 if (PL_madskills) {
cd81e915 3142 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3143 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3144 if (PL_thismad && PL_thismad->mad_key == '_') {
3145 PL_thiswhite = (SV*)PL_thismad->mad_val;
3146 PL_thismad->mad_val = 0;
3147 mad_free(PL_thismad);
3148 PL_thismad = 0;
5db06880
NC
3149 }
3150 }
3151 if (!PL_lasttoke) {
3152 PL_lex_state = PL_lex_defer;
3153 PL_expect = PL_lex_expect;
3154 PL_lex_defer = LEX_NORMAL;
3155 if (!PL_nexttoke[PL_lasttoke].next_type)
3156 return yylex();
3157 }
3158#else
3280af22 3159 PL_nexttoke--;
5db06880 3160 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3161 if (!PL_nexttoke) {
3162 PL_lex_state = PL_lex_defer;
3163 PL_expect = PL_lex_expect;
3164 PL_lex_defer = LEX_NORMAL;
463ee0b2 3165 }
5db06880
NC
3166#endif
3167#ifdef PERL_MAD
3168 /* FIXME - can these be merged? */
3169 return(PL_nexttoke[PL_lasttoke].next_type);
3170#else
bbf60fe6 3171 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3172#endif
79072805 3173
02aa26ce 3174 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3175 when we get here, PL_bufptr is at the \
02aa26ce 3176 */
79072805
LW
3177 case LEX_INTERPCASEMOD:
3178#ifdef DEBUGGING
3280af22 3179 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3180 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3181#endif
02aa26ce 3182 /* handle \E or end of string */
3280af22 3183 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3184 /* if at a \E */
3280af22 3185 if (PL_lex_casemods) {
f54cb97a 3186 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3187 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3188
3792a11b
NC
3189 if (PL_bufptr != PL_bufend
3190 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3191 PL_bufptr += 2;
3192 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3193#ifdef PERL_MAD
3194 if (PL_madskills)
cd81e915 3195 PL_thistoken = newSVpvn("\\E",2);
5db06880 3196#endif
a0d0e21e 3197 }
bbf60fe6 3198 return REPORT(')');
79072805 3199 }
5db06880
NC
3200#ifdef PERL_MAD
3201 while (PL_bufptr != PL_bufend &&
3202 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3203 if (!PL_thiswhite)
3204 PL_thiswhite = newSVpvn("",0);
3205 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3206 PL_bufptr += 2;
3207 }
3208#else
3280af22
NIS
3209 if (PL_bufptr != PL_bufend)
3210 PL_bufptr += 2;
5db06880 3211#endif
3280af22 3212 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3213 return yylex();
79072805
LW
3214 }
3215 else {
607df283 3216 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3217 "### Saw case modifier\n"); });
3280af22 3218 s = PL_bufptr + 1;
6e909404 3219 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3220#ifdef PERL_MAD
cd81e915
NC
3221 if (!PL_thiswhite)
3222 PL_thiswhite = newSVpvn("",0);
3223 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3224#endif
89122651 3225 PL_bufptr = s + 3;
6e909404
JH
3226 PL_lex_state = LEX_INTERPCONCAT;
3227 return yylex();
a0d0e21e 3228 }
6e909404 3229 else {
90771dc0 3230 I32 tmp;
5db06880
NC
3231 if (!PL_madskills) /* when just compiling don't need correct */
3232 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3233 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3234 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3235 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3236 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3237 return REPORT(')');
6e909404
JH
3238 }
3239 if (PL_lex_casemods > 10)
3240 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3241 PL_lex_casestack[PL_lex_casemods++] = *s;
3242 PL_lex_casestack[PL_lex_casemods] = '\0';
3243 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3244 start_force(PL_curforce);
9ded7720 3245 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3246 force_next('(');
cd81e915 3247 start_force(PL_curforce);
6e909404 3248 if (*s == 'l')
9ded7720 3249 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3250 else if (*s == 'u')
9ded7720 3251 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3252 else if (*s == 'L')
9ded7720 3253 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3254 else if (*s == 'U')
9ded7720 3255 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3256 else if (*s == 'Q')
9ded7720 3257 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3258 else
3259 Perl_croak(aTHX_ "panic: yylex");
5db06880 3260 if (PL_madskills) {
d4c19fe8 3261 SV* const tmpsv = newSVpvn("",0);
5db06880
NC
3262 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3263 curmad('_', tmpsv);
3264 }
6e909404 3265 PL_bufptr = s + 1;
a0d0e21e 3266 }
79072805 3267 force_next(FUNC);
3280af22
NIS
3268 if (PL_lex_starts) {
3269 s = PL_bufptr;
3270 PL_lex_starts = 0;
5db06880
NC
3271#ifdef PERL_MAD
3272 if (PL_madskills) {
cd81e915
NC
3273 if (PL_thistoken)
3274 sv_free(PL_thistoken);
3275 PL_thistoken = newSVpvn("",0);
5db06880
NC
3276 }
3277#endif
131b3ad0
DM
3278 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3279 if (PL_lex_casemods == 1 && PL_lex_inpat)
3280 OPERATOR(',');
3281 else
3282 Aop(OP_CONCAT);
79072805
LW
3283 }
3284 else
cea2e8a9 3285 return yylex();
79072805
LW
3286 }
3287
55497cff 3288 case LEX_INTERPPUSH:
bbf60fe6 3289 return REPORT(sublex_push());
55497cff 3290
79072805 3291 case LEX_INTERPSTART:
3280af22 3292 if (PL_bufptr == PL_bufend)
bbf60fe6 3293 return REPORT(sublex_done());
607df283 3294 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3295 "### Interpolated variable\n"); });
3280af22
NIS
3296 PL_expect = XTERM;
3297 PL_lex_dojoin = (*PL_bufptr == '@');
3298 PL_lex_state = LEX_INTERPNORMAL;
3299 if (PL_lex_dojoin) {
cd81e915 3300 start_force(PL_curforce);
9ded7720 3301 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3302 force_next(',');
cd81e915 3303 start_force(PL_curforce);
a0d0e21e 3304 force_ident("\"", '$');
cd81e915 3305 start_force(PL_curforce);
9ded7720 3306 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3307 force_next('$');
cd81e915 3308 start_force(PL_curforce);
9ded7720 3309 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3310 force_next('(');
cd81e915 3311 start_force(PL_curforce);
9ded7720 3312 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3313 force_next(FUNC);
3314 }
3280af22
NIS
3315 if (PL_lex_starts++) {
3316 s = PL_bufptr;
5db06880
NC
3317#ifdef PERL_MAD
3318 if (PL_madskills) {
cd81e915
NC
3319 if (PL_thistoken)
3320 sv_free(PL_thistoken);
3321 PL_thistoken = newSVpvn("",0);
5db06880
NC
3322 }
3323#endif
131b3ad0
DM
3324 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3325 if (!PL_lex_casemods && PL_lex_inpat)
3326 OPERATOR(',');
3327 else
3328 Aop(OP_CONCAT);
79072805 3329 }
cea2e8a9 3330 return yylex();
79072805
LW
3331
3332 case LEX_INTERPENDMAYBE:
3280af22
NIS
3333 if (intuit_more(PL_bufptr)) {
3334 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3335 break;
3336 }
3337 /* FALL THROUGH */
3338
3339 case LEX_INTERPEND:
3280af22
NIS
3340 if (PL_lex_dojoin) {
3341 PL_lex_dojoin = FALSE;
3342 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3343#ifdef PERL_MAD
3344 if (PL_madskills) {
cd81e915
NC
3345 if (PL_thistoken)
3346 sv_free(PL_thistoken);
3347 PL_thistoken = newSVpvn("",0);
5db06880
NC
3348 }
3349#endif
bbf60fe6 3350 return REPORT(')');
79072805 3351 }
43a16006 3352 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3353 && SvEVALED(PL_lex_repl))
43a16006 3354 {
e9fa98b2 3355 if (PL_bufptr != PL_bufend)
cea2e8a9 3356 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3357 PL_lex_repl = NULL;
e9fa98b2 3358 }
79072805
LW
3359 /* FALLTHROUGH */
3360 case LEX_INTERPCONCAT:
3361#ifdef DEBUGGING
3280af22 3362 if (PL_lex_brackets)
cea2e8a9 3363 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3364#endif
3280af22 3365 if (PL_bufptr == PL_bufend)
bbf60fe6 3366 return REPORT(sublex_done());
79072805 3367
3280af22
NIS
3368 if (SvIVX(PL_linestr) == '\'') {
3369 SV *sv = newSVsv(PL_linestr);
3370 if (!PL_lex_inpat)
76e3520e 3371 sv = tokeq(sv);
3280af22 3372 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3373 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3374 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3375 s = PL_bufend;
79072805
LW
3376 }
3377 else {
3280af22 3378 s = scan_const(PL_bufptr);
79072805 3379 if (*s == '\\')
3280af22 3380 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3381 else
3280af22 3382 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3383 }
3384
3280af22 3385 if (s != PL_bufptr) {
cd81e915 3386 start_force(PL_curforce);
5db06880
NC
3387 if (PL_madskills) {
3388 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3389 }
9ded7720 3390 NEXTVAL_NEXTTOKE = yylval;
3280af22 3391 PL_expect = XTERM;
79072805 3392 force_next(THING);
131b3ad0 3393 if (PL_lex_starts++) {
5db06880
NC
3394#ifdef PERL_MAD
3395 if (PL_madskills) {
cd81e915
NC
3396 if (PL_thistoken)
3397 sv_free(PL_thistoken);
3398 PL_thistoken = newSVpvn("",0);
5db06880
NC
3399 }
3400#endif
131b3ad0
DM
3401 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3402 if (!PL_lex_casemods && PL_lex_inpat)
3403 OPERATOR(',');
3404 else
3405 Aop(OP_CONCAT);
3406 }
79072805 3407 else {
3280af22 3408 PL_bufptr = s;
cea2e8a9 3409 return yylex();
79072805
LW
3410 }
3411 }
3412
cea2e8a9 3413 return yylex();
a0d0e21e 3414 case LEX_FORMLINE:
3280af22
NIS
3415 PL_lex_state = LEX_NORMAL;
3416 s = scan_formline(PL_bufptr);
3417 if (!PL_lex_formbrack)
a0d0e21e
LW
3418 goto rightbracket;
3419 OPERATOR(';');
79072805
LW
3420 }
3421
3280af22
NIS
3422 s = PL_bufptr;
3423 PL_oldoldbufptr = PL_oldbufptr;
3424 PL_oldbufptr = s;
463ee0b2
LW
3425
3426 retry:
5db06880 3427#ifdef PERL_MAD
cd81e915
NC
3428 if (PL_thistoken) {
3429 sv_free(PL_thistoken);
3430 PL_thistoken = 0;
5db06880 3431 }
cd81e915 3432 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3433#endif
378cc40b
LW
3434 switch (*s) {
3435 default:
7e2040f0 3436 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3437 goto keylookup;
cea2e8a9 3438 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3439 case 4:
3440 case 26:
3441 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3442 case 0:
5db06880
NC
3443#ifdef PERL_MAD
3444 if (PL_madskills)
cd81e915 3445 PL_faketokens = 0;
5db06880 3446#endif
3280af22
NIS
3447 if (!PL_rsfp) {
3448 PL_last_uni = 0;
3449 PL_last_lop = 0;
c5ee2135 3450 if (PL_lex_brackets) {
10edeb5d
JH
3451 yyerror((const char *)
3452 (PL_lex_formbrack
3453 ? "Format not terminated"
3454 : "Missing right curly or square bracket"));
c5ee2135 3455 }
4e553d73 3456 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3457 "### Tokener got EOF\n");
5f80b19c 3458 } );
79072805 3459 TOKEN(0);
463ee0b2 3460 }
3280af22 3461 if (s++ < PL_bufend)
a687059c 3462 goto retry; /* ignore stray nulls */
3280af22
NIS
3463 PL_last_uni = 0;
3464 PL_last_lop = 0;
3465 if (!PL_in_eval && !PL_preambled) {
3466 PL_preambled = TRUE;
5db06880
NC
3467#ifdef PERL_MAD
3468 if (PL_madskills)
cd81e915 3469 PL_faketokens = 1;
5db06880 3470#endif
3280af22
NIS
3471 sv_setpv(PL_linestr,incl_perldb());
3472 if (SvCUR(PL_linestr))
396482e1 3473 sv_catpvs(PL_linestr,";");
3280af22
NIS
3474 if (PL_preambleav){
3475 while(AvFILLp(PL_preambleav) >= 0) {
3476 SV *tmpsv = av_shift(PL_preambleav);
3477 sv_catsv(PL_linestr, tmpsv);
396482e1 3478 sv_catpvs(PL_linestr, ";");
91b7def8
PP
3479 sv_free(tmpsv);
3480 }
3280af22
NIS
3481 sv_free((SV*)PL_preambleav);
3482 PL_preambleav = NULL;
91b7def8 3483 }
3280af22 3484 if (PL_minus_n || PL_minus_p) {
396482e1 3485 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3486 if (PL_minus_l)
396482e1 3487 sv_catpvs(PL_linestr,"chomp;");
3280af22 3488 if (PL_minus_a) {
3280af22 3489 if (PL_minus_F) {
3792a11b
NC
3490 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3491 || *PL_splitstr == '"')
3280af22 3492 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3493 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3494 else {
c8ef6a4b
NC
3495 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3496 bytes can be used as quoting characters. :-) */
dd374669 3497 const char *splits = PL_splitstr;
91d456ae 3498 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3499 do {
3500 /* Need to \ \s */
dd374669
AL
3501 if (*splits == '\\')
3502 sv_catpvn(PL_linestr, splits, 1);
3503 sv_catpvn(PL_linestr, splits, 1);
3504 } while (*splits++);
48c4c863
NC
3505 /* This loop will embed the trailing NUL of
3506 PL_linestr as the last thing it does before
3507 terminating. */
396482e1 3508 sv_catpvs(PL_linestr, ");");
54310121 3509 }
2304df62
AD
3510 }
3511 else
396482e1 3512 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3513 }
79072805 3514 }
bc9b29db 3515 if (PL_minus_E)
396482e1
GA
3516 sv_catpvs(PL_linestr,"use feature ':5.10';");
3517 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3518 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3519 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3520 PL_last_lop = PL_last_uni = NULL;
3280af22 3521 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3522 SV * const sv = newSV(0);
a0d0e21e
LW
3523
3524 sv_upgrade(sv, SVt_PVMG);
3280af22 3525 sv_setsv(sv,PL_linestr);
0ac0412a 3526 (void)SvIOK_on(sv);
45977657 3527 SvIV_set(sv, 0);
36c7798d 3528 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3529 }
79072805 3530 goto retry;
a687059c 3531 }
e929a76b 3532 do {
aa7440fb 3533 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3534 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3535 fake_eof:
5db06880 3536#ifdef PERL_MAD
cd81e915 3537 PL_realtokenstart = -1;
5db06880 3538#endif
7e28d3af
JH
3539 if (PL_rsfp) {
3540 if (PL_preprocess && !PL_in_eval)
3541 (void)PerlProc_pclose(PL_rsfp);
3542 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3543 PerlIO_clearerr(PL_rsfp);
3544 else
3545 (void)PerlIO_close(PL_rsfp);