This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make lc/uc/lcfirst/ucfirst warn when passed undef.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
eb0d8d16
NC
26#define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
28
6154021b 29#define pl_yylval (PL_parser->yylval)
d3b6f988 30
acdf0a21
DM
31/* YYINITDEPTH -- initial size of the parser's stacks. */
32#define YYINITDEPTH 200
33
199e78b7
DM
34/* XXX temporary backwards compatibility */
35#define PL_lex_brackets (PL_parser->lex_brackets)
36#define PL_lex_brackstack (PL_parser->lex_brackstack)
37#define PL_lex_casemods (PL_parser->lex_casemods)
38#define PL_lex_casestack (PL_parser->lex_casestack)
39#define PL_lex_defer (PL_parser->lex_defer)
40#define PL_lex_dojoin (PL_parser->lex_dojoin)
41#define PL_lex_expect (PL_parser->lex_expect)
42#define PL_lex_formbrack (PL_parser->lex_formbrack)
43#define PL_lex_inpat (PL_parser->lex_inpat)
44#define PL_lex_inwhat (PL_parser->lex_inwhat)
45#define PL_lex_op (PL_parser->lex_op)
46#define PL_lex_repl (PL_parser->lex_repl)
47#define PL_lex_starts (PL_parser->lex_starts)
48#define PL_lex_stuff (PL_parser->lex_stuff)
49#define PL_multi_start (PL_parser->multi_start)
50#define PL_multi_open (PL_parser->multi_open)
51#define PL_multi_close (PL_parser->multi_close)
52#define PL_pending_ident (PL_parser->pending_ident)
53#define PL_preambled (PL_parser->preambled)
54#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 55#define PL_linestr (PL_parser->linestr)
c2598295
DM
56#define PL_expect (PL_parser->expect)
57#define PL_copline (PL_parser->copline)
f06b5848
DM
58#define PL_bufptr (PL_parser->bufptr)
59#define PL_oldbufptr (PL_parser->oldbufptr)
60#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61#define PL_linestart (PL_parser->linestart)
62#define PL_bufend (PL_parser->bufend)
63#define PL_last_uni (PL_parser->last_uni)
64#define PL_last_lop (PL_parser->last_lop)
65#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 66#define PL_lex_state (PL_parser->lex_state)
2f9285f8 67#define PL_rsfp (PL_parser->rsfp)
5486870f 68#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
69#define PL_in_my (PL_parser->in_my)
70#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 71#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 72#define PL_multi_end (PL_parser->multi_end)
13765c85 73#define PL_error_count (PL_parser->error_count)
199e78b7
DM
74
75#ifdef PERL_MAD
76# define PL_endwhite (PL_parser->endwhite)
77# define PL_faketokens (PL_parser->faketokens)
78# define PL_lasttoke (PL_parser->lasttoke)
79# define PL_nextwhite (PL_parser->nextwhite)
80# define PL_realtokenstart (PL_parser->realtokenstart)
81# define PL_skipwhite (PL_parser->skipwhite)
82# define PL_thisclose (PL_parser->thisclose)
83# define PL_thismad (PL_parser->thismad)
84# define PL_thisopen (PL_parser->thisopen)
85# define PL_thisstuff (PL_parser->thisstuff)
86# define PL_thistoken (PL_parser->thistoken)
87# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
88# define PL_thiswhite (PL_parser->thiswhite)
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_curforce (PL_parser->curforce)
91#else
92# define PL_nexttoke (PL_parser->nexttoke)
93# define PL_nexttype (PL_parser->nexttype)
94# define PL_nextval (PL_parser->nextval)
199e78b7
DM
95#endif
96
3cbf51f5
DM
97static int
98S_pending_ident(pTHX);
199e78b7 99
0bd48802 100static const char ident_too_long[] = "Identifier too long";
c445ea15 101static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 102
6e3aabd6 103#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
104static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 106#endif
51371543 107
29595ff2 108#ifdef PERL_MAD
29595ff2 109# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 110# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 111#else
5db06880 112# define CURMAD(slot,sv)
9ded7720 113# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
114#endif
115
9059aa12
LW
116#define XFAKEBRACK 128
117#define XENUMMASK 127
118
39e02b42
JH
119#ifdef USE_UTF8_SCRIPTS
120# define UTF (!IN_BYTES)
2b9d42f0 121#else
746b446a 122# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 123#endif
a0ed51b3 124
61f0cdd9 125/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
126 * 1999-02-27 mjd-perl-patch@plover.com */
127#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128
bf4acbe4
GS
129/* On MacOS, respect nonbreaking spaces */
130#ifdef MACOS_TRADITIONAL
131#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
132#else
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
134#endif
135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a 141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 230# define REPORT(retval) tokereport((I32)retval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
242#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf 261#define UNI2(f,x) { \
6154021b 262 pl_yylval.ival = f; \
376fcdbf
AL
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf 275#define UNIBRACK(f) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
6154021b 286#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
6154021b 290/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
6154021b 294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 371 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
372};
373
6154021b 374/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 375
bbf60fe6 376STATIC int
f5bd084c 377S_tokereport(pTHX_ I32 rv)
bbf60fe6 378{
97aff369 379 dVAR;
bbf60fe6 380 if (DEBUG_T_TEST) {
bd61b366 381 const char *name = NULL;
bbf60fe6 382 enum token_type type = TOKENTYPE_NONE;
f54cb97a 383 const struct debug_tokens *p;
396482e1 384 SV* const report = newSVpvs("<== ");
bbf60fe6 385
f54cb97a 386 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
387 if (p->token == (int)rv) {
388 name = p->name;
389 type = p->type;
390 break;
391 }
392 }
393 if (name)
54667de8 394 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
397 else if (!rv)
396482e1 398 sv_catpvs(report, "EOF");
bbf60fe6
DM
399 else
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
401 switch (type) {
402 case TOKENTYPE_NONE:
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
404 break;
405 case TOKENTYPE_IVAL:
6154021b 406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)pl_yylval.ival);
bbf60fe6
DM
407 break;
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
6154021b 410 PL_op_name[pl_yylval.ival]);
bbf60fe6
DM
411 break;
412 case TOKENTYPE_PVAL:
6154021b 413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", pl_yylval.pval);
bbf60fe6
DM
414 break;
415 case TOKENTYPE_OPVAL:
6154021b 416 if (pl_yylval.opval) {
401441c0 417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
6154021b
RGS
418 PL_op_name[pl_yylval.opval->op_type]);
419 if (pl_yylval.opval->op_type == OP_CONST) {
b6007c36 420 Perl_sv_catpvf(aTHX_ report, " %s",
6154021b 421 SvPEEK(cSVOPx_sv(pl_yylval.opval)));
b6007c36
DM
422 }
423
424 }
401441c0 425 else
396482e1 426 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
427 break;
428 }
b6007c36 429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
430 };
431 return (int)rv;
998054bd
SC
432}
433
b6007c36
DM
434
435/* print the buffer with suitable escapes */
436
437STATIC void
438S_printbuf(pTHX_ const char* fmt, const char* s)
439{
396482e1 440 SV* const tmp = newSVpvs("");
b6007c36
DM
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
442 SvREFCNT_dec(tmp);
443}
444
8fa7f367
JH
445#endif
446
ffb4593c
NT
447/*
448 * S_ao
449 *
c963b151
BD
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
452 */
453
76e3520e 454STATIC int
cea2e8a9 455S_ao(pTHX_ int toketype)
a0d0e21e 456{
97aff369 457 dVAR;
3280af22
NIS
458 if (*PL_bufptr == '=') {
459 PL_bufptr++;
a0d0e21e 460 if (toketype == ANDAND)
6154021b 461 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 462 else if (toketype == OROR)
6154021b 463 pl_yylval.ival = OP_ORASSIGN;
c963b151 464 else if (toketype == DORDOR)
6154021b 465 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
466 toketype = ASSIGNOP;
467 }
468 return toketype;
469}
470
ffb4593c
NT
471/*
472 * S_no_op
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
482 */
483
76e3520e 484STATIC void
bfed75c6 485S_no_op(pTHX_ const char *what, char *s)
463ee0b2 486{
97aff369 487 dVAR;
9d4ba2ae
AL
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 490
1189a94a
GS
491 if (!s)
492 s = oldbp;
07c798fb 493 else
1189a94a 494 PL_bufptr = s;
cea2e8a9 495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
496 if (ckWARN_d(WARN_SYNTAX)) {
497 if (is_first)
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 501 const char *t;
c35e046a
AL
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
503 NOOP;
56da5a46
RGS
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
551405c4 507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
508 }
509 else {
510 assert(s >= oldbp);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 513 }
07c798fb 514 }
3280af22 515 PL_bufptr = oldbp;
8990e307
LW
516}
517
ffb4593c
NT
518/*
519 * S_missingterm
520 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 521 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
524 * This is fatal.
525 */
526
76e3520e 527STATIC void
cea2e8a9 528S_missingterm(pTHX_ char *s)
8990e307 529{
97aff369 530 dVAR;
8990e307
LW
531 char tmpbuf[3];
532 char q;
533 if (s) {
9d4ba2ae 534 char * const nl = strrchr(s,'\n');
d2719217 535 if (nl)
8990e307
LW
536 *nl = '\0';
537 }
9d116dd7
JH
538 else if (
539#ifdef EBCDIC
540 iscntrl(PL_multi_close)
541#else
542 PL_multi_close < 32 || PL_multi_close == 127
543#endif
544 ) {
8990e307 545 *tmpbuf = '^';
585ec06d 546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
547 tmpbuf[2] = '\0';
548 s = tmpbuf;
549 }
550 else {
eb160463 551 *tmpbuf = (char)PL_multi_close;
8990e307
LW
552 tmpbuf[1] = '\0';
553 s = tmpbuf;
554 }
555 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 557}
79072805 558
ef89dcc3 559#define FEATURE_IS_ENABLED(name) \
0d863452 560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
562/* The longest string we pass in. */
563#define MAX_FEATURE_LEN (sizeof("switch")-1)
564
0d863452
RH
565/*
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
568 */
569STATIC bool
d4c19fe8 570S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 571{
97aff369 572 dVAR;
0d863452 573 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b
NC
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
d4c19fe8 577
7b9ef140 578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
579}
580
ffb4593c
NT
581/*
582 * Perl_deprecate
ffb4593c
NT
583 */
584
79072805 585void
bfed75c6 586Perl_deprecate(pTHX_ const char *s)
a0d0e21e 587{
599cee73 588 if (ckWARN(WARN_DEPRECATED))
9014280d 589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
590}
591
12bcd1a6 592void
bfed75c6 593Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
594{
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
597 /* */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
602
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
605 "Use of %s is deprecated", s);
606}
607
ffb4593c 608/*
9cbb5ea2
GS
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
ffb4593c
NT
611 */
612
c39cd008
GS
613#ifdef PERL_CR_FILTER
614static void
615strip_return(SV *sv)
616{
95a20fc0 617 register const char *s = SvPVX_const(sv);
9d4ba2ae 618 register const char * const e = s + SvCUR(sv);
c39cd008
GS
619 /* outer loop optimized to do nothing if there are no CR-LFs */
620 while (s < e) {
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
624 *d++ = *s++;
625 while (s < e) {
626 if (*s == '\r' && s[1] == '\n')
627 s++;
628 *d++ = *s++;
629 }
630 SvCUR(sv) -= s - d;
631 return;
632 }
633 }
634}
a868473f 635
76e3520e 636STATIC I32
c39cd008 637S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 638{
f54cb97a 639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
640 if (count > 0 && !maxlen)
641 strip_return(sv);
642 return count;
a868473f
NIS
643}
644#endif
645
199e78b7
DM
646
647
ffb4593c
NT
648/*
649 * Perl_lex_start
5486870f 650 *
e3abe207 651 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
652 *
653 * rsfp is the opened file handle to read from (if any),
654 *
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
657 *
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
ffb4593c
NT
660 */
661
a0d0e21e 662void
5486870f 663Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 664{
97aff369 665 dVAR;
6ef55633 666 const char *s = NULL;
8990e307 667 STRLEN len;
5486870f 668 yy_parser *parser, *oparser;
acdf0a21
DM
669
670 /* create and initialise a parser */
671
199e78b7 672 Newxz(parser, 1, yy_parser);
5486870f 673 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
674 PL_parser = parser;
675
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
679
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
683
e3abe207
DM
684 /* on scope exit, free this parser and restore any outer one */
685 SAVEPARSER(parser);
7c4baf47 686 parser->saved_curcop = PL_curcop;
e3abe207 687
acdf0a21 688 /* initialise lexer state */
8990e307 689
fb205e7a
DM
690#ifdef PERL_MAD
691 parser->curforce = -1;
692#else
693 parser->nexttoke = 0;
694#endif
c2598295 695 parser->copline = NOLINE;
5afb0a62 696 parser->lex_state = LEX_NORMAL;
c2598295 697 parser->expect = XSTATE;
2f9285f8 698 parser->rsfp = rsfp;
56b27c9a 699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
5486870f 700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
2f9285f8 701
199e78b7
DM
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
02b34bbe 705
10efb74f
NC
706 if (line) {
707 s = SvPV_const(line, len);
708 } else {
709 len = 0;
710 }
bdc0bf6f 711
10efb74f 712 if (!len) {
bdc0bf6f 713 parser->linestr = newSVpvs("\n;");
10efb74f 714 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 715 parser->linestr = newSVsv(line);
10efb74f 716 if (s[len-1] != ';')
bdc0bf6f 717 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
718 } else {
719 SvTEMP_off(line);
720 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 721 parser->linestr = line;
8990e307 722 }
f06b5848
DM
723 parser->oldoldbufptr =
724 parser->oldbufptr =
725 parser->bufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
79072805 729}
a687059c 730
e3abe207
DM
731
732/* delete a parser object */
733
734void
735Perl_parser_free(pTHX_ const yy_parser *parser)
736{
7c4baf47 737 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
738 SvREFCNT_dec(parser->linestr);
739
2f9285f8
DM
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
5486870f 745 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 746
e3abe207
DM
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
751 Safefree(parser);
752}
753
754
ffb4593c
NT
755/*
756 * Perl_lex_end
9cbb5ea2
GS
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
ffb4593c
NT
759 */
760
463ee0b2 761void
864dbfa3 762Perl_lex_end(pTHX)
463ee0b2 763{
97aff369 764 dVAR;
3280af22 765 PL_doextract = FALSE;
463ee0b2
LW
766}
767
ffb4593c
NT
768/*
769 * S_incline
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
57843af0 772 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 773 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
776 */
777
76e3520e 778STATIC void
d9095cec 779S_incline(pTHX_ const char *s)
463ee0b2 780{
97aff369 781 dVAR;
d9095cec
NC
782 const char *t;
783 const char *n;
784 const char *e;
463ee0b2 785
57843af0 786 CopLINE_inc(PL_curcop);
463ee0b2
LW
787 if (*s++ != '#')
788 return;
d4c19fe8
AL
789 while (SPACE_OR_TAB(*s))
790 s++;
73659bf1
GS
791 if (strnEQ(s, "line", 4))
792 s += 4;
793 else
794 return;
084592ab 795 if (SPACE_OR_TAB(*s))
73659bf1 796 s++;
4e553d73 797 else
73659bf1 798 return;
d4c19fe8
AL
799 while (SPACE_OR_TAB(*s))
800 s++;
463ee0b2
LW
801 if (!isDIGIT(*s))
802 return;
d4c19fe8 803
463ee0b2
LW
804 n = s;
805 while (isDIGIT(*s))
806 s++;
bf4acbe4 807 while (SPACE_OR_TAB(*s))
463ee0b2 808 s++;
73659bf1 809 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 810 s++;
73659bf1
GS
811 e = t + 1;
812 }
463ee0b2 813 else {
c35e046a
AL
814 t = s;
815 while (!isSPACE(*t))
816 t++;
73659bf1 817 e = t;
463ee0b2 818 }
bf4acbe4 819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
820 e++;
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
823
f4dd75d9 824 if (t - s > 0) {
d9095cec 825 const STRLEN len = t - s;
8a5ee598 826#ifndef USE_ITHREADS
19bad673
NC
827 SV *const temp_sv = CopFILESV(PL_curcop);
828 const char *cf;
829 STRLEN tmplen;
830
831 if (temp_sv) {
832 cf = SvPVX(temp_sv);
833 tmplen = SvCUR(temp_sv);
834 } else {
835 cf = NULL;
836 tmplen = 0;
837 }
838
42d9b98d 839 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
840 /* must copy *{"::_<(eval N)[oldfilename:L]"}
841 * to *{"::_<newfilename"} */
44867030
NC
842 /* However, the long form of evals is only turned on by the
843 debugger - usually they're "(eval %lu)" */
844 char smallbuf[128];
845 char *tmpbuf;
846 GV **gvp;
d9095cec 847 STRLEN tmplen2 = len;
798b63bc 848 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
849 tmpbuf = smallbuf;
850 else
2ae0db35 851 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
852 tmpbuf[0] = '_';
853 tmpbuf[1] = '<';
2ae0db35 854 memcpy(tmpbuf + 2, cf, tmplen);
44867030 855 tmplen += 2;
8a5ee598
RGS
856 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
857 if (gvp) {
44867030
NC
858 char *tmpbuf2;
859 GV *gv2;
860
861 if (tmplen2 + 2 <= sizeof smallbuf)
862 tmpbuf2 = smallbuf;
863 else
864 Newx(tmpbuf2, tmplen2 + 2, char);
865
866 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
867 /* Either they malloc'd it, or we malloc'd it,
868 so no prefix is present in ours. */
869 tmpbuf2[0] = '_';
870 tmpbuf2[1] = '<';
871 }
872
873 memcpy(tmpbuf2 + 2, s, tmplen2);
874 tmplen2 += 2;
875
8a5ee598 876 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 877 if (!isGV(gv2)) {
8a5ee598 878 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
879 /* adjust ${"::_<newfilename"} to store the new file name */
880 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
881 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
882 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
883 }
44867030
NC
884
885 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 886 }
e66cf94c 887 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 888 }
8a5ee598 889#endif
05ec9bb3 890 CopFILE_free(PL_curcop);
d9095cec 891 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 892 }
57843af0 893 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
894}
895
29595ff2 896#ifdef PERL_MAD
cd81e915 897/* skip space before PL_thistoken */
29595ff2
NC
898
899STATIC char *
900S_skipspace0(pTHX_ register char *s)
901{
902 s = skipspace(s);
903 if (!PL_madskills)
904 return s;
cd81e915
NC
905 if (PL_skipwhite) {
906 if (!PL_thiswhite)
6b29d1f5 907 PL_thiswhite = newSVpvs("");
cd81e915
NC
908 sv_catsv(PL_thiswhite, PL_skipwhite);
909 sv_free(PL_skipwhite);
910 PL_skipwhite = 0;
911 }
912 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
913 return s;
914}
915
cd81e915 916/* skip space after PL_thistoken */
29595ff2
NC
917
918STATIC char *
919S_skipspace1(pTHX_ register char *s)
920{
d4c19fe8 921 const char *start = s;
29595ff2
NC
922 I32 startoff = start - SvPVX(PL_linestr);
923
924 s = skipspace(s);
925 if (!PL_madskills)
926 return s;
927 start = SvPVX(PL_linestr) + startoff;
cd81e915 928 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 929 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
930 PL_thistoken = newSVpvn(tstart, start - tstart);
931 }
932 PL_realtokenstart = -1;
933 if (PL_skipwhite) {
934 if (!PL_nextwhite)
6b29d1f5 935 PL_nextwhite = newSVpvs("");
cd81e915
NC
936 sv_catsv(PL_nextwhite, PL_skipwhite);
937 sv_free(PL_skipwhite);
938 PL_skipwhite = 0;
29595ff2
NC
939 }
940 return s;
941}
942
943STATIC char *
944S_skipspace2(pTHX_ register char *s, SV **svp)
945{
c35e046a
AL
946 char *start;
947 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
948 const I32 startoff = s - SvPVX(PL_linestr);
949
29595ff2
NC
950 s = skipspace(s);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
953 return s;
954 start = SvPVX(PL_linestr) + startoff;
cd81e915 955 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
29595ff2 959 }
cd81e915 960 if (PL_skipwhite) {
29595ff2 961 if (!*svp)
6b29d1f5 962 *svp = newSVpvs("");
cd81e915
NC
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
965 PL_skipwhite = 0;
29595ff2
NC
966 }
967
968 return s;
969}
970#endif
971
80a702cd 972STATIC void
5fa550fb 973S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
974{
975 AV *av = CopFILEAVx(PL_curcop);
976 if (av) {
b9f83d2f 977 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
978 if (orig_sv)
979 sv_setsv(sv, orig_sv);
980 else
981 sv_setpvn(sv, buf, len);
80a702cd
RGS
982 (void)SvIOK_on(sv);
983 SvIV_set(sv, 0);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
985 }
986}
987
ffb4593c
NT
988/*
989 * S_skipspace
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
992 */
993
76e3520e 994STATIC char *
cea2e8a9 995S_skipspace(pTHX_ register char *s)
a687059c 996{
97aff369 997 dVAR;
5db06880
NC
998#ifdef PERL_MAD
999 int curoff;
1000 int startoff = s - SvPVX(PL_linestr);
1001
cd81e915
NC
1002 if (PL_skipwhite) {
1003 sv_free(PL_skipwhite);
1004 PL_skipwhite = 0;
5db06880
NC
1005 }
1006#endif
1007
3280af22 1008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1009 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1010 s++;
5db06880
NC
1011#ifdef PERL_MAD
1012 goto done;
1013#else
463ee0b2 1014 return s;
5db06880 1015#endif
463ee0b2
LW
1016 }
1017 for (;;) {
fd049845 1018 STRLEN prevlen;
09bef843 1019 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1020 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1021 while (s < PL_bufend && isSPACE(*s)) {
1022 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1023 incline(s);
1024 }
ffb4593c
NT
1025
1026 /* comment */
3280af22
NIS
1027 if (s < PL_bufend && *s == '#') {
1028 while (s < PL_bufend && *s != '\n')
463ee0b2 1029 s++;
60e6418e 1030 if (s < PL_bufend) {
463ee0b2 1031 s++;
60e6418e
GS
1032 if (PL_in_eval && !PL_rsfp) {
1033 incline(s);
1034 continue;
1035 }
1036 }
463ee0b2 1037 }
ffb4593c
NT
1038
1039 /* only continue to recharge the buffer if we're at the end
1040 * of the buffer, we're not reading from a source filter, and
1041 * we're in normal lexing mode
1042 */
09bef843
SB
1043 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1044 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1045#ifdef PERL_MAD
1046 goto done;
1047#else
463ee0b2 1048 return s;
5db06880 1049#endif
ffb4593c
NT
1050
1051 /* try to recharge the buffer */
5db06880
NC
1052#ifdef PERL_MAD
1053 curoff = s - SvPVX(PL_linestr);
1054#endif
1055
9cbb5ea2 1056 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1057 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1058 {
5db06880
NC
1059#ifdef PERL_MAD
1060 if (PL_madskills && curoff != startoff) {
cd81e915 1061 if (!PL_skipwhite)
6b29d1f5 1062 PL_skipwhite = newSVpvs("");
cd81e915 1063 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1064 curoff - startoff);
1065 }
1066
1067 /* mustn't throw out old stuff yet if madpropping */
1068 SvCUR(PL_linestr) = curoff;
1069 s = SvPVX(PL_linestr) + curoff;
1070 *s = 0;
1071 if (curoff && s[-1] == '\n')
1072 s[-1] = ' ';
1073#endif
1074
9cbb5ea2 1075 /* end of file. Add on the -p or -n magic */
cd81e915 1076 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1077 if (PL_minus_p) {
5db06880 1078#ifdef PERL_MAD
6502358f 1079 sv_catpvs(PL_linestr,
5db06880
NC
1080 ";}continue{print or die qq(-p destination: $!\\n);}");
1081#else
6502358f 1082 sv_setpvs(PL_linestr,
01a19ab0 1083 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1084#endif
3280af22 1085 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1086 }
01a19ab0 1087 else if (PL_minus_n) {
5db06880
NC
1088#ifdef PERL_MAD
1089 sv_catpvn(PL_linestr, ";}", 2);
1090#else
01a19ab0 1091 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1092#endif
01a19ab0
NC
1093 PL_minus_n = 0;
1094 }
a0d0e21e 1095 else
5db06880
NC
1096#ifdef PERL_MAD
1097 sv_catpvn(PL_linestr,";", 1);
1098#else
4147a61b 1099 sv_setpvn(PL_linestr,";", 1);
5db06880 1100#endif
ffb4593c
NT
1101
1102 /* reset variables for next time we lex */
9cbb5ea2 1103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1104 = SvPVX(PL_linestr)
1105#ifdef PERL_MAD
1106 + curoff
1107#endif
1108 ;
3280af22 1109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1110 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1111
4c84d7f2 1112 /* Close the filehandle. Could be from
ffb4593c
NT
1113 * STDIN, or a regular file. If we were reading code from
1114 * STDIN (because the commandline held no -e or filename)
1115 * then we don't close it, we reset it so the code can
1116 * read from STDIN too.
1117 */
1118
4c84d7f2 1119 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1120 PerlIO_clearerr(PL_rsfp);
8990e307 1121 else
3280af22 1122 (void)PerlIO_close(PL_rsfp);
4608196e 1123 PL_rsfp = NULL;
463ee0b2
LW
1124 return s;
1125 }
ffb4593c
NT
1126
1127 /* not at end of file, so we only read another line */
09bef843
SB
1128 /* make corresponding updates to old pointers, for yyerror() */
1129 oldprevlen = PL_oldbufptr - PL_bufend;
1130 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1131 if (PL_last_uni)
1132 oldunilen = PL_last_uni - PL_bufend;
1133 if (PL_last_lop)
1134 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1135 PL_linestart = PL_bufptr = s + prevlen;
1136 PL_bufend = s + SvCUR(PL_linestr);
1137 s = PL_bufptr;
09bef843
SB
1138 PL_oldbufptr = s + oldprevlen;
1139 PL_oldoldbufptr = s + oldoldprevlen;
1140 if (PL_last_uni)
1141 PL_last_uni = s + oldunilen;
1142 if (PL_last_lop)
1143 PL_last_lop = s + oldloplen;
a0d0e21e 1144 incline(s);
ffb4593c
NT
1145
1146 /* debugger active and we're not compiling the debugger code,
1147 * so store the line into the debugger's array of lines
1148 */
80a702cd 1149 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1150 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1151 }
5db06880
NC
1152
1153#ifdef PERL_MAD
1154 done:
1155 if (PL_madskills) {
cd81e915 1156 if (!PL_skipwhite)
6b29d1f5 1157 PL_skipwhite = newSVpvs("");
5db06880
NC
1158 curoff = s - SvPVX(PL_linestr);
1159 if (curoff - startoff)
cd81e915 1160 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1161 curoff - startoff);
1162 }
1163 return s;
1164#endif
a687059c 1165}
378cc40b 1166
ffb4593c
NT
1167/*
1168 * S_check_uni
1169 * Check the unary operators to ensure there's no ambiguity in how they're
1170 * used. An ambiguous piece of code would be:
1171 * rand + 5
1172 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1173 * the +5 is its argument.
1174 */
1175
76e3520e 1176STATIC void
cea2e8a9 1177S_check_uni(pTHX)
ba106d47 1178{
97aff369 1179 dVAR;
d4c19fe8
AL
1180 const char *s;
1181 const char *t;
2f3197b3 1182
3280af22 1183 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1184 return;
3280af22
NIS
1185 while (isSPACE(*PL_last_uni))
1186 PL_last_uni++;
c35e046a
AL
1187 s = PL_last_uni;
1188 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1189 s++;
3280af22 1190 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1191 return;
6136c704 1192
0453d815 1193 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1194 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1195 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1196 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1197 }
2f3197b3
LW
1198}
1199
ffb4593c
NT
1200/*
1201 * LOP : macro to build a list operator. Its behaviour has been replaced
1202 * with a subroutine, S_lop() for which LOP is just another name.
1203 */
1204
a0d0e21e
LW
1205#define LOP(f,x) return lop(f,x,s)
1206
ffb4593c
NT
1207/*
1208 * S_lop
1209 * Build a list operator (or something that might be one). The rules:
1210 * - if we have a next token, then it's a list operator [why?]
1211 * - if the next thing is an opening paren, then it's a function
1212 * - else it's a list operator
1213 */
1214
76e3520e 1215STATIC I32
a0be28da 1216S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1217{
97aff369 1218 dVAR;
6154021b 1219 pl_yylval.ival = f;
35c8bce7 1220 CLINE;
3280af22
NIS
1221 PL_expect = x;
1222 PL_bufptr = s;
1223 PL_last_lop = PL_oldbufptr;
eb160463 1224 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1225#ifdef PERL_MAD
1226 if (PL_lasttoke)
1227 return REPORT(LSTOP);
1228#else
3280af22 1229 if (PL_nexttoke)
bbf60fe6 1230 return REPORT(LSTOP);
5db06880 1231#endif
79072805 1232 if (*s == '(')
bbf60fe6 1233 return REPORT(FUNC);
29595ff2 1234 s = PEEKSPACE(s);
79072805 1235 if (*s == '(')
bbf60fe6 1236 return REPORT(FUNC);
79072805 1237 else
bbf60fe6 1238 return REPORT(LSTOP);
79072805
LW
1239}
1240
5db06880
NC
1241#ifdef PERL_MAD
1242 /*
1243 * S_start_force
1244 * Sets up for an eventual force_next(). start_force(0) basically does
1245 * an unshift, while start_force(-1) does a push. yylex removes items
1246 * on the "pop" end.
1247 */
1248
1249STATIC void
1250S_start_force(pTHX_ int where)
1251{
1252 int i;
1253
cd81e915 1254 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1255 where = PL_lasttoke;
cd81e915
NC
1256 assert(PL_curforce < 0 || PL_curforce == where);
1257 if (PL_curforce != where) {
5db06880
NC
1258 for (i = PL_lasttoke; i > where; --i) {
1259 PL_nexttoke[i] = PL_nexttoke[i-1];
1260 }
1261 PL_lasttoke++;
1262 }
cd81e915 1263 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1264 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1265 PL_curforce = where;
1266 if (PL_nextwhite) {
5db06880 1267 if (PL_madskills)
6b29d1f5 1268 curmad('^', newSVpvs(""));
cd81e915 1269 CURMAD('_', PL_nextwhite);
5db06880
NC
1270 }
1271}
1272
1273STATIC void
1274S_curmad(pTHX_ char slot, SV *sv)
1275{
1276 MADPROP **where;
1277
1278 if (!sv)
1279 return;
cd81e915
NC
1280 if (PL_curforce < 0)
1281 where = &PL_thismad;
5db06880 1282 else
cd81e915 1283 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1284
cd81e915 1285 if (PL_faketokens)
5db06880
NC
1286 sv_setpvn(sv, "", 0);
1287 else {
1288 if (!IN_BYTES) {
1289 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1290 SvUTF8_on(sv);
1291 else if (PL_encoding) {
1292 sv_recode_to_utf8(sv, PL_encoding);
1293 }
1294 }
1295 }
1296
1297 /* keep a slot open for the head of the list? */
1298 if (slot != '_' && *where && (*where)->mad_key == '^') {
1299 (*where)->mad_key = slot;
035e2bcc 1300 sv_free((SV*)((*where)->mad_val));
5db06880
NC
1301 (*where)->mad_val = (void*)sv;
1302 }
1303 else
1304 addmad(newMADsv(slot, sv), where, 0);
1305}
1306#else
b3f24c00
MHM
1307# define start_force(where) NOOP
1308# define curmad(slot, sv) NOOP
5db06880
NC
1309#endif
1310
ffb4593c
NT
1311/*
1312 * S_force_next
9cbb5ea2 1313 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1314 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1315 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1316 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1317 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1318 */
1319
4e553d73 1320STATIC void
cea2e8a9 1321S_force_next(pTHX_ I32 type)
79072805 1322{
97aff369 1323 dVAR;
5db06880 1324#ifdef PERL_MAD
cd81e915 1325 if (PL_curforce < 0)
5db06880 1326 start_force(PL_lasttoke);
cd81e915 1327 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1328 if (PL_lex_state != LEX_KNOWNEXT)
1329 PL_lex_defer = PL_lex_state;
1330 PL_lex_state = LEX_KNOWNEXT;
1331 PL_lex_expect = PL_expect;
cd81e915 1332 PL_curforce = -1;
5db06880 1333#else
3280af22
NIS
1334 PL_nexttype[PL_nexttoke] = type;
1335 PL_nexttoke++;
1336 if (PL_lex_state != LEX_KNOWNEXT) {
1337 PL_lex_defer = PL_lex_state;
1338 PL_lex_expect = PL_expect;
1339 PL_lex_state = LEX_KNOWNEXT;
79072805 1340 }
5db06880 1341#endif
79072805
LW
1342}
1343
d0a148a6
NC
1344STATIC SV *
1345S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1346{
97aff369 1347 dVAR;
740cce10
NC
1348 SV * const sv = newSVpvn_utf8(start, len,
1349 UTF && !IN_BYTES
1350 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1351 return sv;
1352}
1353
ffb4593c
NT
1354/*
1355 * S_force_word
1356 * When the lexer knows the next thing is a word (for instance, it has
1357 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1358 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1359 * lookahead.
ffb4593c
NT
1360 *
1361 * Arguments:
b1b65b59 1362 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1363 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1364 * int check_keyword : if true, Perl checks to make sure the word isn't
1365 * a keyword (do this if the word is a label, e.g. goto FOO)
1366 * int allow_pack : if true, : characters will also be allowed (require,
1367 * use, etc. do this)
9cbb5ea2 1368 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1369 */
1370
76e3520e 1371STATIC char *
cea2e8a9 1372S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1373{
97aff369 1374 dVAR;
463ee0b2
LW
1375 register char *s;
1376 STRLEN len;
4e553d73 1377
29595ff2 1378 start = SKIPSPACE1(start);
463ee0b2 1379 s = start;
7e2040f0 1380 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1381 (allow_pack && *s == ':') ||
15f0808c 1382 (allow_initial_tick && *s == '\'') )
a0d0e21e 1383 {
3280af22 1384 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1385 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1386 return start;
cd81e915 1387 start_force(PL_curforce);
5db06880
NC
1388 if (PL_madskills)
1389 curmad('X', newSVpvn(start,s-start));
463ee0b2 1390 if (token == METHOD) {
29595ff2 1391 s = SKIPSPACE1(s);
463ee0b2 1392 if (*s == '(')
3280af22 1393 PL_expect = XTERM;
463ee0b2 1394 else {
3280af22 1395 PL_expect = XOPERATOR;
463ee0b2 1396 }
79072805 1397 }
e74e6b3d 1398 if (PL_madskills)
63575281 1399 curmad('g', newSVpvs( "forced" ));
9ded7720 1400 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1401 = (OP*)newSVOP(OP_CONST,0,
1402 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1403 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1404 force_next(token);
1405 }
1406 return s;
1407}
1408
ffb4593c
NT
1409/*
1410 * S_force_ident
9cbb5ea2 1411 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1412 * text only contains the "foo" portion. The first argument is a pointer
1413 * to the "foo", and the second argument is the type symbol to prefix.
1414 * Forces the next token to be a "WORD".
9cbb5ea2 1415 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1416 */
1417
76e3520e 1418STATIC void
bfed75c6 1419S_force_ident(pTHX_ register const char *s, int kind)
79072805 1420{
97aff369 1421 dVAR;
c35e046a 1422 if (*s) {
90e5519e
NC
1423 const STRLEN len = strlen(s);
1424 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1425 start_force(PL_curforce);
9ded7720 1426 NEXTVAL_NEXTTOKE.opval = o;
79072805 1427 force_next(WORD);
748a9306 1428 if (kind) {
11343788 1429 o->op_private = OPpCONST_ENTERED;
55497cff 1430 /* XXX see note in pp_entereval() for why we forgo typo
1431 warnings if the symbol must be introduced in an eval.
1432 GSAR 96-10-12 */
90e5519e
NC
1433 gv_fetchpvn_flags(s, len,
1434 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1435 : GV_ADD,
1436 kind == '$' ? SVt_PV :
1437 kind == '@' ? SVt_PVAV :
1438 kind == '%' ? SVt_PVHV :
a0d0e21e 1439 SVt_PVGV
90e5519e 1440 );
748a9306 1441 }
79072805
LW
1442 }
1443}
1444
1571675a
GS
1445NV
1446Perl_str_to_version(pTHX_ SV *sv)
1447{
1448 NV retval = 0.0;
1449 NV nshift = 1.0;
1450 STRLEN len;
cfd0369c 1451 const char *start = SvPV_const(sv,len);
9d4ba2ae 1452 const char * const end = start + len;
504618e9 1453 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1454 while (start < end) {
ba210ebe 1455 STRLEN skip;
1571675a
GS
1456 UV n;
1457 if (utf)
9041c2e3 1458 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1459 else {
1460 n = *(U8*)start;
1461 skip = 1;
1462 }
1463 retval += ((NV)n)/nshift;
1464 start += skip;
1465 nshift *= 1000;
1466 }
1467 return retval;
1468}
1469
4e553d73 1470/*
ffb4593c
NT
1471 * S_force_version
1472 * Forces the next token to be a version number.
e759cc13
RGS
1473 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1474 * and if "guessing" is TRUE, then no new token is created (and the caller
1475 * must use an alternative parsing method).
ffb4593c
NT
1476 */
1477
76e3520e 1478STATIC char *
e759cc13 1479S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1480{
97aff369 1481 dVAR;
5f66b61c 1482 OP *version = NULL;
44dcb63b 1483 char *d;
5db06880
NC
1484#ifdef PERL_MAD
1485 I32 startoff = s - SvPVX(PL_linestr);
1486#endif
89bfa8cd 1487
29595ff2 1488 s = SKIPSPACE1(s);
89bfa8cd 1489
44dcb63b 1490 d = s;
dd629d5b 1491 if (*d == 'v')
44dcb63b 1492 d++;
44dcb63b 1493 if (isDIGIT(*d)) {
e759cc13
RGS
1494 while (isDIGIT(*d) || *d == '_' || *d == '.')
1495 d++;
5db06880
NC
1496#ifdef PERL_MAD
1497 if (PL_madskills) {
cd81e915 1498 start_force(PL_curforce);
5db06880
NC
1499 curmad('X', newSVpvn(s,d-s));
1500 }
1501#endif
9f3d182e 1502 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1503 SV *ver;
6154021b
RGS
1504 s = scan_num(s, &pl_yylval);
1505 version = pl_yylval.opval;
dd629d5b
GS
1506 ver = cSVOPx(version)->op_sv;
1507 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1508 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1509 SvNV_set(ver, str_to_version(ver));
1571675a 1510 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1511 }
89bfa8cd 1512 }
5db06880
NC
1513 else if (guessing) {
1514#ifdef PERL_MAD
1515 if (PL_madskills) {
cd81e915
NC
1516 sv_free(PL_nextwhite); /* let next token collect whitespace */
1517 PL_nextwhite = 0;
5db06880
NC
1518 s = SvPVX(PL_linestr) + startoff;
1519 }
1520#endif
e759cc13 1521 return s;
5db06880 1522 }
89bfa8cd 1523 }
1524
5db06880
NC
1525#ifdef PERL_MAD
1526 if (PL_madskills && !version) {
cd81e915
NC
1527 sv_free(PL_nextwhite); /* let next token collect whitespace */
1528 PL_nextwhite = 0;
5db06880
NC
1529 s = SvPVX(PL_linestr) + startoff;
1530 }
1531#endif
89bfa8cd 1532 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1533 start_force(PL_curforce);
9ded7720 1534 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1535 force_next(WORD);
89bfa8cd 1536
e759cc13 1537 return s;
89bfa8cd 1538}
1539
ffb4593c
NT
1540/*
1541 * S_tokeq
1542 * Tokenize a quoted string passed in as an SV. It finds the next
1543 * chunk, up to end of string or a backslash. It may make a new
1544 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1545 * turns \\ into \.
1546 */
1547
76e3520e 1548STATIC SV *
cea2e8a9 1549S_tokeq(pTHX_ SV *sv)
79072805 1550{
97aff369 1551 dVAR;
79072805
LW
1552 register char *s;
1553 register char *send;
1554 register char *d;
b3ac6de7
IZ
1555 STRLEN len = 0;
1556 SV *pv = sv;
79072805
LW
1557
1558 if (!SvLEN(sv))
b3ac6de7 1559 goto finish;
79072805 1560
a0d0e21e 1561 s = SvPV_force(sv, len);
21a311ee 1562 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1563 goto finish;
463ee0b2 1564 send = s + len;
79072805
LW
1565 while (s < send && *s != '\\')
1566 s++;
1567 if (s == send)
b3ac6de7 1568 goto finish;
79072805 1569 d = s;
be4731d2 1570 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1571 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1572 }
79072805
LW
1573 while (s < send) {
1574 if (*s == '\\') {
a0d0e21e 1575 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1576 s++; /* all that, just for this */
1577 }
1578 *d++ = *s++;
1579 }
1580 *d = '\0';
95a20fc0 1581 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1582 finish:
3280af22 1583 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1584 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1585 return sv;
1586}
1587
ffb4593c
NT
1588/*
1589 * Now come three functions related to double-quote context,
1590 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1591 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1592 * interact with PL_lex_state, and create fake ( ... ) argument lists
1593 * to handle functions and concatenation.
1594 * They assume that whoever calls them will be setting up a fake
1595 * join call, because each subthing puts a ',' after it. This lets
1596 * "lower \luPpEr"
1597 * become
1598 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1599 *
1600 * (I'm not sure whether the spurious commas at the end of lcfirst's
1601 * arguments and join's arguments are created or not).
1602 */
1603
1604/*
1605 * S_sublex_start
6154021b 1606 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1607 *
1608 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1609 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1610 *
1611 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1612 *
1613 * Everything else becomes a FUNC.
1614 *
1615 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1616 * had an OP_CONST or OP_READLINE). This just sets us up for a
1617 * call to S_sublex_push().
1618 */
1619
76e3520e 1620STATIC I32
cea2e8a9 1621S_sublex_start(pTHX)
79072805 1622{
97aff369 1623 dVAR;
6154021b 1624 register const I32 op_type = pl_yylval.ival;
79072805
LW
1625
1626 if (op_type == OP_NULL) {
6154021b 1627 pl_yylval.opval = PL_lex_op;
5f66b61c 1628 PL_lex_op = NULL;
79072805
LW
1629 return THING;
1630 }
1631 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1632 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1633
1634 if (SvTYPE(sv) == SVt_PVIV) {
1635 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1636 STRLEN len;
96a5add6 1637 const char * const p = SvPV_const(sv, len);
740cce10 1638 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1639 SvREFCNT_dec(sv);
1640 sv = nsv;
4e553d73 1641 }
6154021b 1642 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1643 PL_lex_stuff = NULL;
6f33ba73
RGS
1644 /* Allow <FH> // "foo" */
1645 if (op_type == OP_READLINE)
1646 PL_expect = XTERMORDORDOR;
79072805
LW
1647 return THING;
1648 }
e3f73d4e
RGS
1649 else if (op_type == OP_BACKTICK && PL_lex_op) {
1650 /* readpipe() vas overriden */
1651 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1652 pl_yylval.opval = PL_lex_op;
9b201d7d 1653 PL_lex_op = NULL;
e3f73d4e
RGS
1654 PL_lex_stuff = NULL;
1655 return THING;
1656 }
79072805 1657
3280af22 1658 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1659 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1660 PL_sublex_info.sub_op = PL_lex_op;
1661 PL_lex_state = LEX_INTERPPUSH;
55497cff 1662
3280af22
NIS
1663 PL_expect = XTERM;
1664 if (PL_lex_op) {
6154021b 1665 pl_yylval.opval = PL_lex_op;
5f66b61c 1666 PL_lex_op = NULL;
55497cff 1667 return PMFUNC;
1668 }
1669 else
1670 return FUNC;
1671}
1672
ffb4593c
NT
1673/*
1674 * S_sublex_push
1675 * Create a new scope to save the lexing state. The scope will be
1676 * ended in S_sublex_done. Returns a '(', starting the function arguments
1677 * to the uc, lc, etc. found before.
1678 * Sets PL_lex_state to LEX_INTERPCONCAT.
1679 */
1680
76e3520e 1681STATIC I32
cea2e8a9 1682S_sublex_push(pTHX)
55497cff 1683{
27da23d5 1684 dVAR;
f46d017c 1685 ENTER;
55497cff 1686
3280af22 1687 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1688 SAVEBOOL(PL_lex_dojoin);
3280af22 1689 SAVEI32(PL_lex_brackets);
3280af22
NIS
1690 SAVEI32(PL_lex_casemods);
1691 SAVEI32(PL_lex_starts);
651b5b28 1692 SAVEI8(PL_lex_state);
7766f137 1693 SAVEVPTR(PL_lex_inpat);
98246f1e 1694 SAVEI16(PL_lex_inwhat);
57843af0 1695 SAVECOPLINE(PL_curcop);
3280af22 1696 SAVEPPTR(PL_bufptr);
8452ff4b 1697 SAVEPPTR(PL_bufend);
3280af22
NIS
1698 SAVEPPTR(PL_oldbufptr);
1699 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1700 SAVEPPTR(PL_last_lop);
1701 SAVEPPTR(PL_last_uni);
3280af22
NIS
1702 SAVEPPTR(PL_linestart);
1703 SAVESPTR(PL_linestr);
8edd5f42
RGS
1704 SAVEGENERICPV(PL_lex_brackstack);
1705 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1706
1707 PL_linestr = PL_lex_stuff;
a0714e2c 1708 PL_lex_stuff = NULL;
3280af22 1709
9cbb5ea2
GS
1710 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1711 = SvPVX(PL_linestr);
3280af22 1712 PL_bufend += SvCUR(PL_linestr);
bd61b366 1713 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1714 SAVEFREESV(PL_linestr);
1715
1716 PL_lex_dojoin = FALSE;
1717 PL_lex_brackets = 0;
a02a5408
JC
1718 Newx(PL_lex_brackstack, 120, char);
1719 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1720 PL_lex_casemods = 0;
1721 *PL_lex_casestack = '\0';
1722 PL_lex_starts = 0;
1723 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1724 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1725
1726 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1727 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1728 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1729 else
5f66b61c 1730 PL_lex_inpat = NULL;
79072805 1731
55497cff 1732 return '(';
79072805
LW
1733}
1734
ffb4593c
NT
1735/*
1736 * S_sublex_done
1737 * Restores lexer state after a S_sublex_push.
1738 */
1739
76e3520e 1740STATIC I32
cea2e8a9 1741S_sublex_done(pTHX)
79072805 1742{
27da23d5 1743 dVAR;
3280af22 1744 if (!PL_lex_starts++) {
396482e1 1745 SV * const sv = newSVpvs("");
9aa983d2
JH
1746 if (SvUTF8(PL_linestr))
1747 SvUTF8_on(sv);
3280af22 1748 PL_expect = XOPERATOR;
6154021b 1749 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1750 return THING;
1751 }
1752
3280af22
NIS
1753 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1754 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1755 return yylex();
79072805
LW
1756 }
1757
ffb4593c 1758 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1759 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1760 PL_linestr = PL_lex_repl;
1761 PL_lex_inpat = 0;
1762 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1763 PL_bufend += SvCUR(PL_linestr);
bd61b366 1764 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1765 SAVEFREESV(PL_linestr);
1766 PL_lex_dojoin = FALSE;
1767 PL_lex_brackets = 0;
3280af22
NIS
1768 PL_lex_casemods = 0;
1769 *PL_lex_casestack = '\0';
1770 PL_lex_starts = 0;
25da4f38 1771 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1772 PL_lex_state = LEX_INTERPNORMAL;
1773 PL_lex_starts++;
e9fa98b2
HS
1774 /* we don't clear PL_lex_repl here, so that we can check later
1775 whether this is an evalled subst; that means we rely on the
1776 logic to ensure sublex_done() is called again only via the
1777 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1778 }
e9fa98b2 1779 else {
3280af22 1780 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1781 PL_lex_repl = NULL;
e9fa98b2 1782 }
79072805 1783 return ',';
ffed7fef
LW
1784 }
1785 else {
5db06880
NC
1786#ifdef PERL_MAD
1787 if (PL_madskills) {
cd81e915
NC
1788 if (PL_thiswhite) {
1789 if (!PL_endwhite)
6b29d1f5 1790 PL_endwhite = newSVpvs("");
cd81e915
NC
1791 sv_catsv(PL_endwhite, PL_thiswhite);
1792 PL_thiswhite = 0;
1793 }
1794 if (PL_thistoken)
1795 sv_setpvn(PL_thistoken,"",0);
5db06880 1796 else
cd81e915 1797 PL_realtokenstart = -1;
5db06880
NC
1798 }
1799#endif
f46d017c 1800 LEAVE;
3280af22
NIS
1801 PL_bufend = SvPVX(PL_linestr);
1802 PL_bufend += SvCUR(PL_linestr);
1803 PL_expect = XOPERATOR;
09bef843 1804 PL_sublex_info.sub_inwhat = 0;
79072805 1805 return ')';
ffed7fef
LW
1806 }
1807}
1808
02aa26ce
NT
1809/*
1810 scan_const
1811
1812 Extracts a pattern, double-quoted string, or transliteration. This
1813 is terrifying code.
1814
94def140 1815 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1816 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1817 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1818
94def140
TS
1819 Returns a pointer to the character scanned up to. If this is
1820 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1821 successfully parsed), will leave an OP for the substring scanned
6154021b 1822 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1823 by looking at the next characters herself.
1824
02aa26ce
NT
1825 In patterns:
1826 backslashes:
1827 double-quoted style: \r and \n
1828 regexp special ones: \D \s
94def140
TS
1829 constants: \x31
1830 backrefs: \1
02aa26ce
NT
1831 case and quoting: \U \Q \E
1832 stops on @ and $, but not for $ as tail anchor
1833
1834 In transliterations:
1835 characters are VERY literal, except for - not at the start or end
94def140
TS
1836 of the string, which indicates a range. If the range is in bytes,
1837 scan_const expands the range to the full set of intermediate
1838 characters. If the range is in utf8, the hyphen is replaced with
1839 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1840
1841 In double-quoted strings:
1842 backslashes:
1843 double-quoted style: \r and \n
94def140
TS
1844 constants: \x31
1845 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1846 case and quoting: \U \Q \E
1847 stops on @ and $
1848
1849 scan_const does *not* construct ops to handle interpolated strings.
1850 It stops processing as soon as it finds an embedded $ or @ variable
1851 and leaves it to the caller to work out what's going on.
1852
94def140
TS
1853 embedded arrays (whether in pattern or not) could be:
1854 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1855
1856 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1857
1858 $ in pattern could be $foo or could be tail anchor. Assumption:
1859 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1860 followed by one of "()| \r\n\t"
02aa26ce
NT
1861
1862 \1 (backreferences) are turned into $1
1863
1864 The structure of the code is
1865 while (there's a character to process) {
94def140
TS
1866 handle transliteration ranges
1867 skip regexp comments /(?#comment)/ and codes /(?{code})/
1868 skip #-initiated comments in //x patterns
1869 check for embedded arrays
02aa26ce
NT
1870 check for embedded scalars
1871 if (backslash) {
94def140
TS
1872 leave intact backslashes from leaveit (below)
1873 deprecate \1 in substitution replacements
02aa26ce
NT
1874 handle string-changing backslashes \l \U \Q \E, etc.
1875 switch (what was escaped) {
94def140
TS
1876 handle \- in a transliteration (becomes a literal -)
1877 handle \132 (octal characters)
1878 handle \x15 and \x{1234} (hex characters)
1879 handle \N{name} (named characters)
1880 handle \cV (control characters)
1881 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1882 } (end switch)
1883 } (end if backslash)
1884 } (end while character to read)
4e553d73 1885
02aa26ce
NT
1886*/
1887
76e3520e 1888STATIC char *
cea2e8a9 1889S_scan_const(pTHX_ char *start)
79072805 1890{
97aff369 1891 dVAR;
3280af22 1892 register char *send = PL_bufend; /* end of the constant */
561b68a9 1893 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1894 register char *s = start; /* start of the constant */
1895 register char *d = SvPVX(sv); /* destination for copies */
1896 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1897 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1898 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1899 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1900 UV uv;
4c3a8340
TS
1901#ifdef EBCDIC
1902 UV literal_endpoint = 0;
e294cc5d 1903 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1904#endif
012bcf8d 1905
2b9d42f0
NIS
1906 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1907 /* If we are doing a trans and we know we want UTF8 set expectation */
1908 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1909 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1910 }
1911
1912
79072805 1913 while (s < send || dorange) {
02aa26ce 1914 /* get transliterations out of the way (they're most literal) */
3280af22 1915 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1916 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1917 if (dorange) {
1ba5c669
JH
1918 I32 i; /* current expanded character */
1919 I32 min; /* first character in range */
1920 I32 max; /* last character in range */
02aa26ce 1921
e294cc5d
JH
1922#ifdef EBCDIC
1923 UV uvmax = 0;
1924#endif
1925
1926 if (has_utf8
1927#ifdef EBCDIC
1928 && !native_range
1929#endif
1930 ) {
9d4ba2ae 1931 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1932 char *e = d++;
1933 while (e-- > c)
1934 *(e + 1) = *e;
25716404 1935 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1936 /* mark the range as done, and continue */
1937 dorange = FALSE;
1938 didrange = TRUE;
1939 continue;
1940 }
2b9d42f0 1941
95a20fc0 1942 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1943#ifdef EBCDIC
1944 SvGROW(sv,
1945 SvLEN(sv) + (has_utf8 ?
1946 (512 - UTF_CONTINUATION_MARK +
1947 UNISKIP(0x100))
1948 : 256));
1949 /* How many two-byte within 0..255: 128 in UTF-8,
1950 * 96 in UTF-8-mod. */
1951#else
9cbb5ea2 1952 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1953#endif
9cbb5ea2 1954 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1955#ifdef EBCDIC
1956 if (has_utf8) {
1957 int j;
1958 for (j = 0; j <= 1; j++) {
1959 char * const c = (char*)utf8_hop((U8*)d, -1);
1960 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1961 if (j)
1962 min = (U8)uv;
1963 else if (uv < 256)
1964 max = (U8)uv;
1965 else {
1966 max = (U8)0xff; /* only to \xff */
1967 uvmax = uv; /* \x{100} to uvmax */
1968 }
1969 d = c; /* eat endpoint chars */
1970 }
1971 }
1972 else {
1973#endif
1974 d -= 2; /* eat the first char and the - */
1975 min = (U8)*d; /* first char in range */
1976 max = (U8)d[1]; /* last char in range */
1977#ifdef EBCDIC
1978 }
1979#endif
8ada0baa 1980
c2e66d9e 1981 if (min > max) {
01ec43d0 1982 Perl_croak(aTHX_
d1573ac7 1983 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1984 (char)min, (char)max);
c2e66d9e
GS
1985 }
1986
c7f1f016 1987#ifdef EBCDIC
4c3a8340
TS
1988 if (literal_endpoint == 2 &&
1989 ((isLOWER(min) && isLOWER(max)) ||
1990 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1991 if (isLOWER(min)) {
1992 for (i = min; i <= max; i++)
1993 if (isLOWER(i))
db42d148 1994 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1995 } else {
1996 for (i = min; i <= max; i++)
1997 if (isUPPER(i))
db42d148 1998 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1999 }
2000 }
2001 else
2002#endif
2003 for (i = min; i <= max; i++)
e294cc5d
JH
2004#ifdef EBCDIC
2005 if (has_utf8) {
2006 const U8 ch = (U8)NATIVE_TO_UTF(i);
2007 if (UNI_IS_INVARIANT(ch))
2008 *d++ = (U8)i;
2009 else {
2010 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2011 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2012 }
2013 }
2014 else
2015#endif
2016 *d++ = (char)i;
2017
2018#ifdef EBCDIC
2019 if (uvmax) {
2020 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2021 if (uvmax > 0x101)
2022 *d++ = (char)UTF_TO_NATIVE(0xff);
2023 if (uvmax > 0x100)
2024 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2025 }
2026#endif
02aa26ce
NT
2027
2028 /* mark the range as done, and continue */
79072805 2029 dorange = FALSE;
01ec43d0 2030 didrange = TRUE;
4c3a8340
TS
2031#ifdef EBCDIC
2032 literal_endpoint = 0;
2033#endif
79072805 2034 continue;
4e553d73 2035 }
02aa26ce
NT
2036
2037 /* range begins (ignore - as first or last char) */
79072805 2038 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2039 if (didrange) {
1fafa243 2040 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2041 }
e294cc5d
JH
2042 if (has_utf8
2043#ifdef EBCDIC
2044 && !native_range
2045#endif
2046 ) {
25716404 2047 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2048 s++;
2049 continue;
2050 }
79072805
LW
2051 dorange = TRUE;
2052 s++;
01ec43d0
GS
2053 }
2054 else {
2055 didrange = FALSE;
4c3a8340
TS
2056#ifdef EBCDIC
2057 literal_endpoint = 0;
e294cc5d 2058 native_range = TRUE;
4c3a8340 2059#endif
01ec43d0 2060 }
79072805 2061 }
02aa26ce
NT
2062
2063 /* if we get here, we're not doing a transliteration */
2064
0f5d15d6
IZ
2065 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2066 except for the last char, which will be done separately. */
3280af22 2067 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2068 if (s[2] == '#') {
e994fd66 2069 while (s+1 < send && *s != ')')
db42d148 2070 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2071 }
2072 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2073 || (s[2] == '?' && s[3] == '{'))
155aba94 2074 {
cc6b7395 2075 I32 count = 1;
0f5d15d6 2076 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2077 char c;
2078
d9f97599
GS
2079 while (count && (c = *regparse)) {
2080 if (c == '\\' && regparse[1])
2081 regparse++;
4e553d73 2082 else if (c == '{')
cc6b7395 2083 count++;
4e553d73 2084 else if (c == '}')
cc6b7395 2085 count--;
d9f97599 2086 regparse++;
cc6b7395 2087 }
e994fd66 2088 if (*regparse != ')')
5bdf89e7 2089 regparse--; /* Leave one char for continuation. */
0f5d15d6 2090 while (s < regparse)
db42d148 2091 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2092 }
748a9306 2093 }
02aa26ce
NT
2094
2095 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2096 else if (*s == '#' && PL_lex_inpat &&
2097 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2098 while (s+1 < send && *s != '\n')
db42d148 2099 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2100 }
02aa26ce 2101
5d1d4326 2102 /* check for embedded arrays
da6eedaa 2103 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2104 */
1749ea0d
TS
2105 else if (*s == '@' && s[1]) {
2106 if (isALNUM_lazy_if(s+1,UTF))
2107 break;
2108 if (strchr(":'{$", s[1]))
2109 break;
2110 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2111 break; /* in regexp, neither @+ nor @- are interpolated */
2112 }
02aa26ce
NT
2113
2114 /* check for embedded scalars. only stop if we're sure it's a
2115 variable.
2116 */
79072805 2117 else if (*s == '$') {
3280af22 2118 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2119 break;
6002328a 2120 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2121 break; /* in regexp, $ might be tail anchor */
2122 }
02aa26ce 2123
2b9d42f0
NIS
2124 /* End of else if chain - OP_TRANS rejoin rest */
2125
02aa26ce 2126 /* backslashes */
79072805
LW
2127 if (*s == '\\' && s+1 < send) {
2128 s++;
02aa26ce 2129
02aa26ce 2130 /* deprecate \1 in strings and substitution replacements */
3280af22 2131 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2132 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2133 {
599cee73 2134 if (ckWARN(WARN_SYNTAX))
9014280d 2135 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2136 *--s = '$';
2137 break;
2138 }
02aa26ce
NT
2139
2140 /* string-change backslash escapes */
3280af22 2141 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2142 --s;
2143 break;
2144 }
cc74c5bd
TS
2145 /* skip any other backslash escapes in a pattern */
2146 else if (PL_lex_inpat) {
2147 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2148 goto default_action;
2149 }
02aa26ce
NT
2150
2151 /* if we get here, it's either a quoted -, or a digit */
79072805 2152 switch (*s) {
02aa26ce
NT
2153
2154 /* quoted - in transliterations */
79072805 2155 case '-':
3280af22 2156 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2157 *d++ = *s++;
2158 continue;
2159 }
2160 /* FALL THROUGH */
2161 default:
11b8faa4 2162 {
86f97054 2163 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2164 ckWARN(WARN_MISC))
9014280d 2165 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2166 "Unrecognized escape \\%c passed through",
2167 *s);
11b8faa4 2168 /* default action is to copy the quoted character */
f9a63242 2169 goto default_action;
11b8faa4 2170 }
02aa26ce
NT
2171
2172 /* \132 indicates an octal constant */
79072805
LW
2173 case '0': case '1': case '2': case '3':
2174 case '4': case '5': case '6': case '7':
ba210ebe 2175 {
53305cf1
NC
2176 I32 flags = 0;
2177 STRLEN len = 3;
2178 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2179 s += len;
2180 }
012bcf8d 2181 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2182
2183 /* \x24 indicates a hex constant */
79072805 2184 case 'x':
a0ed51b3
LW
2185 ++s;
2186 if (*s == '{') {
9d4ba2ae 2187 char* const e = strchr(s, '}');
a4c04bdc
NC
2188 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2189 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2190 STRLEN len;
355860ce 2191
53305cf1 2192 ++s;
adaeee49 2193 if (!e) {
a0ed51b3 2194 yyerror("Missing right brace on \\x{}");
355860ce 2195 continue;
ba210ebe 2196 }
53305cf1
NC
2197 len = e - s;
2198 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2199 s = e + 1;
a0ed51b3
LW
2200 }
2201 else {
ba210ebe 2202 {
53305cf1 2203 STRLEN len = 2;
a4c04bdc 2204 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2205 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2206 s += len;
2207 }
012bcf8d
GS
2208 }
2209
2210 NUM_ESCAPE_INSERT:
2211 /* Insert oct or hex escaped character.
301d3d20 2212 * There will always enough room in sv since such
db42d148 2213 * escapes will be longer than any UTF-8 sequence
301d3d20 2214 * they can end up as. */
ba7cea30 2215
c7f1f016
NIS
2216 /* We need to map to chars to ASCII before doing the tests
2217 to cover EBCDIC
2218 */
c4d5f83a 2219 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2220 if (!has_utf8 && uv > 255) {
301d3d20
JH
2221 /* Might need to recode whatever we have
2222 * accumulated so far if it contains any
2223 * hibit chars.
2224 *
2225 * (Can't we keep track of that and avoid
2226 * this rescan? --jhi)
012bcf8d 2227 */
c7f1f016 2228 int hicount = 0;
63cd0674
NIS
2229 U8 *c;
2230 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2231 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2232 hicount++;
db42d148 2233 }
012bcf8d 2234 }
63cd0674 2235 if (hicount) {
9d4ba2ae 2236 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2237 U8 *src, *dst;
2238 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2239 src = (U8 *)d - 1;
2240 dst = src+hicount;
2241 d += hicount;
cfd0369c 2242 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2243 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2244 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2245 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2246 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2247 }
2248 else {
63cd0674 2249 *dst-- = *src;
012bcf8d 2250 }
c7f1f016 2251 src--;
012bcf8d
GS
2252 }
2253 }
2254 }
2255
9aa983d2 2256 if (has_utf8 || uv > 255) {
9041c2e3 2257 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2258 has_utf8 = TRUE;
f9a63242
JH
2259 if (PL_lex_inwhat == OP_TRANS &&
2260 PL_sublex_info.sub_op) {
2261 PL_sublex_info.sub_op->op_private |=
2262 (PL_lex_repl ? OPpTRANS_FROM_UTF
2263 : OPpTRANS_TO_UTF);
f9a63242 2264 }
e294cc5d
JH
2265#ifdef EBCDIC
2266 if (uv > 255 && !dorange)
2267 native_range = FALSE;
2268#endif
012bcf8d 2269 }
a0ed51b3 2270 else {
012bcf8d 2271 *d++ = (char)uv;
a0ed51b3 2272 }
012bcf8d
GS
2273 }
2274 else {
c4d5f83a 2275 *d++ = (char) uv;
a0ed51b3 2276 }
79072805 2277 continue;
02aa26ce 2278
b239daa5 2279 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2280 case 'N':
55eda711 2281 ++s;
423cee85
JH
2282 if (*s == '{') {
2283 char* e = strchr(s, '}');
155aba94 2284 SV *res;
423cee85 2285 STRLEN len;
cfd0369c 2286 const char *str;
4e553d73 2287
423cee85 2288 if (!e) {
5777a3f7 2289 yyerror("Missing right brace on \\N{}");
423cee85
JH
2290 e = s - 1;
2291 goto cont_scan;
2292 }
dbc0d4f2
JH
2293 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2294 /* \N{U+...} */
2295 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2296 PERL_SCAN_DISALLOW_PREFIX;
2297 s += 3;
2298 len = e - s;
2299 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2300 if ( e > s && len != (STRLEN)(e - s) ) {
2301 uv = 0xFFFD;
fc8cd66c 2302 }
dbc0d4f2
JH
2303 s = e + 1;
2304 goto NUM_ESCAPE_INSERT;
2305 }
55eda711 2306 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2307 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2308 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2309 if (has_utf8)
2310 sv_utf8_upgrade(res);
cfd0369c 2311 str = SvPV_const(res,len);
1c47067b
JH
2312#ifdef EBCDIC_NEVER_MIND
2313 /* charnames uses pack U and that has been
2314 * recently changed to do the below uni->native
2315 * mapping, so this would be redundant (and wrong,
2316 * the code point would be doubly converted).
2317 * But leave this in just in case the pack U change
2318 * gets revoked, but the semantics is still
2319 * desireable for charnames. --jhi */
cddc7ef4 2320 {
cfd0369c 2321 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2322
2323 if (uv < 0x100) {
89ebb4a3 2324 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2325
2326 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2327 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2328 str = SvPV_const(res, len);
cddc7ef4
JH
2329 }
2330 }
2331#endif
89491803 2332 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2333 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2334 SvCUR_set(sv, d - ostart);
2335 SvPOK_on(sv);
e4f3eed8 2336 *d = '\0';
f08d6ad9 2337 sv_utf8_upgrade(sv);
d2f449dd 2338 /* this just broke our allocation above... */
eb160463 2339 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2340 d = SvPVX(sv) + SvCUR(sv);
89491803 2341 has_utf8 = TRUE;
f08d6ad9 2342 }
eb160463 2343 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2344 const char * const odest = SvPVX_const(sv);
423cee85 2345
8973db79 2346 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2347 d = SvPVX(sv) + (d - odest);
2348 }
e294cc5d
JH
2349#ifdef EBCDIC
2350 if (!dorange)
2351 native_range = FALSE; /* \N{} is guessed to be Unicode */
2352#endif
423cee85
JH
2353 Copy(str, d, len, char);
2354 d += len;
2355 SvREFCNT_dec(res);
2356 cont_scan:
2357 s = e + 1;
2358 }
2359 else
5777a3f7 2360 yyerror("Missing braces on \\N{}");
423cee85
JH
2361 continue;
2362
02aa26ce 2363 /* \c is a control character */
79072805
LW
2364 case 'c':
2365 s++;
961ce445 2366 if (s < send) {
ba210ebe 2367 U8 c = *s++;
c7f1f016
NIS
2368#ifdef EBCDIC
2369 if (isLOWER(c))
2370 c = toUPPER(c);
2371#endif
db42d148 2372 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2373 }
961ce445
RGS
2374 else {
2375 yyerror("Missing control char name in \\c");
2376 }
79072805 2377 continue;
02aa26ce
NT
2378
2379 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2380 case 'b':
db42d148 2381 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2382 break;
2383 case 'n':
db42d148 2384 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2385 break;
2386 case 'r':
db42d148 2387 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2388 break;
2389 case 'f':
db42d148 2390 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2391 break;
2392 case 't':
db42d148 2393 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2394 break;
34a3fe2a 2395 case 'e':
db42d148 2396 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2397 break;
2398 case 'a':
db42d148 2399 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2400 break;
02aa26ce
NT
2401 } /* end switch */
2402
79072805
LW
2403 s++;
2404 continue;
02aa26ce 2405 } /* end if (backslash) */
4c3a8340
TS
2406#ifdef EBCDIC
2407 else
2408 literal_endpoint++;
2409#endif
02aa26ce 2410
f9a63242 2411 default_action:
2b9d42f0
NIS
2412 /* If we started with encoded form, or already know we want it
2413 and then encode the next character */
2414 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2415 STRLEN len = 1;
5f66b61c
AL
2416 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2417 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2418 s += len;
2419 if (need > len) {
2420 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2421 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2422 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2423 }
5f66b61c 2424 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2425 has_utf8 = TRUE;
e294cc5d
JH
2426#ifdef EBCDIC
2427 if (uv > 255 && !dorange)
2428 native_range = FALSE;
2429#endif
2b9d42f0
NIS
2430 }
2431 else {
2432 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2433 }
02aa26ce
NT
2434 } /* while loop to process each character */
2435
2436 /* terminate the string and set up the sv */
79072805 2437 *d = '\0';
95a20fc0 2438 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2439 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2440 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2441
79072805 2442 SvPOK_on(sv);
9f4817db 2443 if (PL_encoding && !has_utf8) {
d0063567
DK
2444 sv_recode_to_utf8(sv, PL_encoding);
2445 if (SvUTF8(sv))
2446 has_utf8 = TRUE;
9f4817db 2447 }
2b9d42f0 2448 if (has_utf8) {
7e2040f0 2449 SvUTF8_on(sv);
2b9d42f0 2450 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2451 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2452 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2453 }
2454 }
79072805 2455
02aa26ce 2456 /* shrink the sv if we allocated more than we used */
79072805 2457 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2458 SvPV_shrink_to_cur(sv);
79072805 2459 }
02aa26ce 2460
6154021b 2461 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2462 if (s > PL_bufptr) {
eb0d8d16
NC
2463 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2464 const char *const key = PL_lex_inpat ? "qr" : "q";
2465 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2466 const char *type;
2467 STRLEN typelen;
2468
2469 if (PL_lex_inwhat == OP_TRANS) {
2470 type = "tr";
2471 typelen = 2;
2472 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2473 type = "s";
2474 typelen = 1;
2475 } else {
2476 type = "qq";
2477 typelen = 2;
2478 }
2479
2480 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2481 type, typelen);
2482 }
6154021b 2483 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2484 } else
8990e307 2485 SvREFCNT_dec(sv);
79072805
LW
2486 return s;
2487}
2488
ffb4593c
NT
2489/* S_intuit_more
2490 * Returns TRUE if there's more to the expression (e.g., a subscript),
2491 * FALSE otherwise.
ffb4593c
NT
2492 *
2493 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2494 *
2495 * ->[ and ->{ return TRUE
2496 * { and [ outside a pattern are always subscripts, so return TRUE
2497 * if we're outside a pattern and it's not { or [, then return FALSE
2498 * if we're in a pattern and the first char is a {
2499 * {4,5} (any digits around the comma) returns FALSE
2500 * if we're in a pattern and the first char is a [
2501 * [] returns FALSE
2502 * [SOMETHING] has a funky algorithm to decide whether it's a
2503 * character class or not. It has to deal with things like
2504 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2505 * anything else returns TRUE
2506 */
2507
9cbb5ea2
GS
2508/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2509
76e3520e 2510STATIC int
cea2e8a9 2511S_intuit_more(pTHX_ register char *s)
79072805 2512{
97aff369 2513 dVAR;
3280af22 2514 if (PL_lex_brackets)
79072805
LW
2515 return TRUE;
2516 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2517 return TRUE;
2518 if (*s != '{' && *s != '[')
2519 return FALSE;
3280af22 2520 if (!PL_lex_inpat)
79072805
LW
2521 return TRUE;
2522
2523 /* In a pattern, so maybe we have {n,m}. */
2524 if (*s == '{') {
2525 s++;
2526 if (!isDIGIT(*s))
2527 return TRUE;
2528 while (isDIGIT(*s))
2529 s++;
2530 if (*s == ',')
2531 s++;
2532 while (isDIGIT(*s))
2533 s++;
2534 if (*s == '}')
2535 return FALSE;
2536 return TRUE;
2537
2538 }
2539
2540 /* On the other hand, maybe we have a character class */
2541
2542 s++;
2543 if (*s == ']' || *s == '^')
2544 return FALSE;
2545 else {
ffb4593c 2546 /* this is terrifying, and it works */
79072805
LW
2547 int weight = 2; /* let's weigh the evidence */
2548 char seen[256];
f27ffc4a 2549 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2550 const char * const send = strchr(s,']');
3280af22 2551 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2552
2553 if (!send) /* has to be an expression */
2554 return TRUE;
2555
2556 Zero(seen,256,char);
2557 if (*s == '$')
2558 weight -= 3;
2559 else if (isDIGIT(*s)) {
2560 if (s[1] != ']') {
2561 if (isDIGIT(s[1]) && s[2] == ']')
2562 weight -= 10;
2563 }
2564 else
2565 weight -= 100;
2566 }
2567 for (; s < send; s++) {
2568 last_un_char = un_char;
2569 un_char = (unsigned char)*s;
2570 switch (*s) {
2571 case '@':
2572 case '&':
2573 case '$':
2574 weight -= seen[un_char] * 10;
7e2040f0 2575 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2576 int len;
8903cb82 2577 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2578 len = (int)strlen(tmpbuf);
2579 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2580 weight -= 100;
2581 else
2582 weight -= 10;
2583 }
2584 else if (*s == '$' && s[1] &&
93a17b20
LW
2585 strchr("[#!%*<>()-=",s[1])) {
2586 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2587 weight -= 10;
2588 else
2589 weight -= 1;
2590 }
2591 break;
2592 case '\\':
2593 un_char = 254;
2594 if (s[1]) {
93a17b20 2595 if (strchr("wds]",s[1]))
79072805 2596 weight += 100;
10edeb5d 2597 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2598 weight += 1;
93a17b20 2599 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2600 weight += 40;
2601 else if (isDIGIT(s[1])) {
2602 weight += 40;
2603 while (s[1] && isDIGIT(s[1]))
2604 s++;
2605 }
2606 }
2607 else
2608 weight += 100;
2609 break;
2610 case '-':
2611 if (s[1] == '\\')
2612 weight += 50;
93a17b20 2613 if (strchr("aA01! ",last_un_char))
79072805 2614 weight += 30;
93a17b20 2615 if (strchr("zZ79~",s[1]))
79072805 2616 weight += 30;
f27ffc4a
GS
2617 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2618 weight -= 5; /* cope with negative subscript */
79072805
LW
2619 break;
2620 default:
3792a11b
NC
2621 if (!isALNUM(last_un_char)
2622 && !(last_un_char == '$' || last_un_char == '@'
2623 || last_un_char == '&')
2624 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2625 char *d = tmpbuf;
2626 while (isALPHA(*s))
2627 *d++ = *s++;
2628 *d = '\0';
5458a98a 2629 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2630 weight -= 150;
2631 }
2632 if (un_char == last_un_char + 1)
2633 weight += 5;
2634 weight -= seen[un_char];
2635 break;
2636 }
2637 seen[un_char]++;
2638 }
2639 if (weight >= 0) /* probably a character class */
2640 return FALSE;
2641 }
2642
2643 return TRUE;
2644}
ffed7fef 2645
ffb4593c
NT
2646/*
2647 * S_intuit_method
2648 *
2649 * Does all the checking to disambiguate
2650 * foo bar
2651 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2652 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2653 *
2654 * First argument is the stuff after the first token, e.g. "bar".
2655 *
2656 * Not a method if bar is a filehandle.
2657 * Not a method if foo is a subroutine prototyped to take a filehandle.
2658 * Not a method if it's really "Foo $bar"
2659 * Method if it's "foo $bar"
2660 * Not a method if it's really "print foo $bar"
2661 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2662 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2663 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2664 * =>
2665 */
2666
76e3520e 2667STATIC int
62d55b22 2668S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2669{
97aff369 2670 dVAR;
a0d0e21e 2671 char *s = start + (*start == '$');
3280af22 2672 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2673 STRLEN len;
2674 GV* indirgv;
5db06880
NC
2675#ifdef PERL_MAD
2676 int soff;
2677#endif
a0d0e21e
LW
2678
2679 if (gv) {
62d55b22 2680 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2681 return 0;
62d55b22
NC
2682 if (cv) {
2683 if (SvPOK(cv)) {
2684 const char *proto = SvPVX_const(cv);
2685 if (proto) {
2686 if (*proto == ';')
2687 proto++;
2688 if (*proto == '*')
2689 return 0;
2690 }
b6c543e3
IZ
2691 }
2692 } else
c35e046a 2693 gv = NULL;
a0d0e21e 2694 }
8903cb82 2695 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2696 /* start is the beginning of the possible filehandle/object,
2697 * and s is the end of it
2698 * tmpbuf is a copy of it
2699 */
2700
a0d0e21e 2701 if (*start == '$') {
3ef1310e
RGS
2702 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2703 isUPPER(*PL_tokenbuf))
a0d0e21e 2704 return 0;
5db06880
NC
2705#ifdef PERL_MAD
2706 len = start - SvPVX(PL_linestr);
2707#endif
29595ff2 2708 s = PEEKSPACE(s);
f0092767 2709#ifdef PERL_MAD
5db06880
NC
2710 start = SvPVX(PL_linestr) + len;
2711#endif
3280af22
NIS
2712 PL_bufptr = start;
2713 PL_expect = XREF;
a0d0e21e
LW
2714 return *s == '(' ? FUNCMETH : METHOD;
2715 }
5458a98a 2716 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2717 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2718 len -= 2;
2719 tmpbuf[len] = '\0';
5db06880
NC
2720#ifdef PERL_MAD
2721 soff = s - SvPVX(PL_linestr);
2722#endif
c3e0f903
GS
2723 goto bare_package;
2724 }
90e5519e 2725 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2726 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2727 return 0;
2728 /* filehandle or package name makes it a method */
da51bb9b 2729 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2730#ifdef PERL_MAD
2731 soff = s - SvPVX(PL_linestr);
2732#endif
29595ff2 2733 s = PEEKSPACE(s);
3280af22 2734 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2735 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2736 bare_package:
cd81e915 2737 start_force(PL_curforce);
9ded7720 2738 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2739 newSVpvn(tmpbuf,len));
9ded7720 2740 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2741 if (PL_madskills)
2742 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2743 PL_expect = XTERM;
a0d0e21e 2744 force_next(WORD);
3280af22 2745 PL_bufptr = s;
5db06880
NC
2746#ifdef PERL_MAD
2747 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2748#endif
a0d0e21e
LW
2749 return *s == '(' ? FUNCMETH : METHOD;
2750 }
2751 }
2752 return 0;
2753}
2754
16d20bd9 2755/* Encoded script support. filter_add() effectively inserts a
4e553d73 2756 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2757 * Note that the filter function only applies to the current source file
2758 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2759 *
2760 * The datasv parameter (which may be NULL) can be used to pass
2761 * private data to this instance of the filter. The filter function
2762 * can recover the SV using the FILTER_DATA macro and use it to
2763 * store private buffers and state information.
2764 *
2765 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2766 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2767 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2768 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2769 * private use must be set using malloc'd pointers.
2770 */
16d20bd9
AD
2771
2772SV *
864dbfa3 2773Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2774{
97aff369 2775 dVAR;
f4c556ac 2776 if (!funcp)
a0714e2c 2777 return NULL;
f4c556ac 2778
5486870f
DM
2779 if (!PL_parser)
2780 return NULL;
2781
3280af22
NIS
2782 if (!PL_rsfp_filters)
2783 PL_rsfp_filters = newAV();
16d20bd9 2784 if (!datasv)
561b68a9 2785 datasv = newSV(0);
862a34c6 2786 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2787 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2788 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2789 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2790 FPTR2DPTR(void *, IoANY(datasv)),
2791 SvPV_nolen(datasv)));
3280af22
NIS
2792 av_unshift(PL_rsfp_filters, 1);
2793 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2794 return(datasv);
2795}
4e553d73 2796
16d20bd9
AD
2797
2798/* Delete most recently added instance of this filter function. */
a0d0e21e 2799void
864dbfa3 2800Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2801{
97aff369 2802 dVAR;
e0c19803 2803 SV *datasv;
24801a4b 2804
33073adb 2805#ifdef DEBUGGING
55662e27
JH
2806 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2807 FPTR2DPTR(void*, funcp)));
33073adb 2808#endif
5486870f 2809 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2810 return;
2811 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2812 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2813 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2814 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2815 IoANY(datasv) = (void *)NULL;
3280af22 2816 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2817
16d20bd9
AD
2818 return;
2819 }
2820 /* we need to search for the correct entry and clear it */
cea2e8a9 2821 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2822}
2823
2824
1de9afcd
RGS
2825/* Invoke the idxth filter function for the current rsfp. */
2826/* maxlen 0 = read one text line */
16d20bd9 2827I32
864dbfa3 2828Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2829{
97aff369 2830 dVAR;
16d20bd9
AD
2831 filter_t funcp;
2832 SV *datasv = NULL;
f482118e
NC
2833 /* This API is bad. It should have been using unsigned int for maxlen.
2834 Not sure if we want to change the API, but if not we should sanity
2835 check the value here. */
39cd7a59
NC
2836 const unsigned int correct_length
2837 = maxlen < 0 ?
2838#ifdef PERL_MICRO
2839 0x7FFFFFFF
2840#else
2841 INT_MAX
2842#endif
2843 : maxlen;
e50aee73 2844
5486870f 2845 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2846 return -1;
1de9afcd 2847 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2848 /* Provide a default input filter to make life easy. */
2849 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2850 DEBUG_P(PerlIO_printf(Perl_debug_log,
2851 "filter_read %d: from rsfp\n", idx));
f482118e 2852 if (correct_length) {
16d20bd9
AD
2853 /* Want a block */
2854 int len ;
f54cb97a 2855 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2856
2857 /* ensure buf_sv is large enough */
f482118e
NC
2858 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2859 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2860 correct_length)) <= 0) {
3280af22 2861 if (PerlIO_error(PL_rsfp))
37120919
AD
2862 return -1; /* error */
2863 else
2864 return 0 ; /* end of file */
2865 }
16d20bd9
AD
2866 SvCUR_set(buf_sv, old_len + len) ;
2867 } else {
2868 /* Want a line */
3280af22
NIS
2869 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2870 if (PerlIO_error(PL_rsfp))
37120919
AD
2871 return -1; /* error */
2872 else
2873 return 0 ; /* end of file */
2874 }
16d20bd9
AD
2875 }
2876 return SvCUR(buf_sv);
2877 }
2878 /* Skip this filter slot if filter has been deleted */
1de9afcd 2879 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2880 DEBUG_P(PerlIO_printf(Perl_debug_log,
2881 "filter_read %d: skipped (filter deleted)\n",
2882 idx));
f482118e 2883 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2884 }
2885 /* Get function pointer hidden within datasv */
8141890a 2886 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2887 DEBUG_P(PerlIO_printf(Perl_debug_log,
2888 "filter_read %d: via function %p (%s)\n",
ca0270c4 2889 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2890 /* Call function. The function is expected to */
2891 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2892 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2893 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2894}
2895
76e3520e 2896STATIC char *
cea2e8a9 2897S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2898{
97aff369 2899 dVAR;
c39cd008 2900#ifdef PERL_CR_FILTER
3280af22 2901 if (!PL_rsfp_filters) {
c39cd008 2902 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2903 }
2904#endif
3280af22 2905 if (PL_rsfp_filters) {
55497cff 2906 if (!append)
2907 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2908 if (FILTER_READ(0, sv, 0) > 0)
2909 return ( SvPVX(sv) ) ;
2910 else
bd61b366 2911 return NULL ;
16d20bd9 2912 }
9d116dd7 2913 else
fd049845 2914 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2915}
2916
01ec43d0 2917STATIC HV *
9bde8eb0 2918S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2919{
97aff369 2920 dVAR;
def3634b
GS
2921 GV *gv;
2922
01ec43d0 2923 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2924 return PL_curstash;
2925
2926 if (len > 2 &&
2927 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2928 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2929 {
2930 return GvHV(gv); /* Foo:: */
def3634b
GS
2931 }
2932
2933 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2934 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2935 if (gv && GvCV(gv)) {
2936 SV * const sv = cv_const_sv(GvCV(gv));
2937 if (sv)
9bde8eb0 2938 pkgname = SvPV_const(sv, len);
def3634b
GS
2939 }
2940
9bde8eb0 2941 return gv_stashpvn(pkgname, len, 0);
def3634b 2942}
a0d0e21e 2943
e3f73d4e
RGS
2944/*
2945 * S_readpipe_override
2946 * Check whether readpipe() is overriden, and generates the appropriate
2947 * optree, provided sublex_start() is called afterwards.
2948 */
2949STATIC void
1d51329b 2950S_readpipe_override(pTHX)
e3f73d4e
RGS
2951{
2952 GV **gvp;
2953 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 2954 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
2955 if ((gv_readpipe
2956 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2957 ||
2958 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2959 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2960 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2961 {
2962 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2963 append_elem(OP_LIST,
2964 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2965 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2966 }
e3f73d4e
RGS
2967}
2968
5db06880
NC
2969#ifdef PERL_MAD
2970 /*
2971 * Perl_madlex
2972 * The intent of this yylex wrapper is to minimize the changes to the
2973 * tokener when we aren't interested in collecting madprops. It remains
2974 * to be seen how successful this strategy will be...
2975 */
2976
2977int
2978Perl_madlex(pTHX)
2979{
2980 int optype;
2981 char *s = PL_bufptr;
2982
cd81e915
NC
2983 /* make sure PL_thiswhite is initialized */
2984 PL_thiswhite = 0;
2985 PL_thismad = 0;
5db06880 2986
cd81e915 2987 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2988 if (PL_pending_ident)
2989 return S_pending_ident(aTHX);
2990
2991 /* previous token ate up our whitespace? */
cd81e915
NC
2992 if (!PL_lasttoke && PL_nextwhite) {
2993 PL_thiswhite = PL_nextwhite;
2994 PL_nextwhite = 0;
5db06880
NC
2995 }
2996
2997 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2998 PL_realtokenstart = -1;
2999 PL_thistoken = 0;
5db06880
NC
3000 optype = yylex();
3001 s = PL_bufptr;
cd81e915 3002 assert(PL_curforce < 0);
5db06880 3003
cd81e915
NC
3004 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3005 if (!PL_thistoken) {
3006 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3007 PL_thistoken = newSVpvs("");
5db06880 3008 else {
c35e046a 3009 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3010 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3011 }
3012 }
cd81e915
NC
3013 if (PL_thismad) /* install head */
3014 CURMAD('X', PL_thistoken);
5db06880
NC
3015 }
3016
3017 /* last whitespace of a sublex? */
cd81e915
NC
3018 if (optype == ')' && PL_endwhite) {
3019 CURMAD('X', PL_endwhite);
5db06880
NC
3020 }
3021
cd81e915 3022 if (!PL_thismad) {
5db06880
NC
3023
3024 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3025 if (!PL_thiswhite && !PL_endwhite && !optype) {
3026 sv_free(PL_thistoken);
3027 PL_thistoken = 0;
5db06880
NC
3028 return 0;
3029 }
3030
3031 /* put off final whitespace till peg */
3032 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3033 PL_nextwhite = PL_thiswhite;
3034 PL_thiswhite = 0;
5db06880 3035 }
cd81e915
NC
3036 else if (PL_thisopen) {
3037 CURMAD('q', PL_thisopen);
3038 if (PL_thistoken)
3039 sv_free(PL_thistoken);
3040 PL_thistoken = 0;
5db06880
NC
3041 }
3042 else {
3043 /* Store actual token text as madprop X */
cd81e915 3044 CURMAD('X', PL_thistoken);
5db06880
NC
3045 }
3046
cd81e915 3047 if (PL_thiswhite) {
5db06880 3048 /* add preceding whitespace as madprop _ */
cd81e915 3049 CURMAD('_', PL_thiswhite);
5db06880
NC
3050 }
3051
cd81e915 3052 if (PL_thisstuff) {
5db06880 3053 /* add quoted material as madprop = */
cd81e915 3054 CURMAD('=', PL_thisstuff);
5db06880
NC
3055 }
3056
cd81e915 3057 if (PL_thisclose) {
5db06880 3058 /* add terminating quote as madprop Q */
cd81e915 3059 CURMAD('Q', PL_thisclose);
5db06880
NC
3060 }
3061 }
3062
3063 /* special processing based on optype */
3064
3065 switch (optype) {
3066
3067 /* opval doesn't need a TOKEN since it can already store mp */
3068 case WORD:
3069 case METHOD:
3070 case FUNCMETH:
3071 case THING:
3072 case PMFUNC:
3073 case PRIVATEREF:
3074 case FUNC0SUB:
3075 case UNIOPSUB:
3076 case LSTOPSUB:
6154021b
RGS
3077 if (pl_yylval.opval)
3078 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3079 PL_thismad = 0;
5db06880
NC
3080 return optype;
3081
3082 /* fake EOF */
3083 case 0:
3084 optype = PEG;
cd81e915
NC
3085 if (PL_endwhite) {
3086 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3087 PL_endwhite = 0;
5db06880
NC
3088 }
3089 break;
3090
3091 case ']':
3092 case '}':
cd81e915 3093 if (PL_faketokens)
5db06880
NC
3094 break;
3095 /* remember any fake bracket that lexer is about to discard */
3096 if (PL_lex_brackets == 1 &&
3097 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3098 {
3099 s = PL_bufptr;
3100 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3101 s++;
3102 if (*s == '}') {
cd81e915
NC
3103 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3104 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3105 PL_thiswhite = 0;
5db06880
NC
3106 PL_bufptr = s - 1;
3107 break; /* don't bother looking for trailing comment */
3108 }
3109 else
3110 s = PL_bufptr;
3111 }
3112 if (optype == ']')
3113 break;
3114 /* FALLTHROUGH */
3115
3116 /* attach a trailing comment to its statement instead of next token */
3117 case ';':
cd81e915 3118 if (PL_faketokens)
5db06880
NC
3119 break;
3120 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3121 s = PL_bufptr;
3122 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3123 s++;
3124 if (*s == '\n' || *s == '#') {
3125 while (s < PL_bufend && *s != '\n')
3126 s++;
3127 if (s < PL_bufend)
3128 s++;
cd81e915
NC
3129 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3130 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3131 PL_thiswhite = 0;
5db06880
NC
3132 PL_bufptr = s;
3133 }
3134 }
3135 break;
3136
3137 /* pval */
3138 case LABEL:
3139 break;
3140
3141 /* ival */
3142 default:
3143 break;
3144
3145 }
3146
3147 /* Create new token struct. Note: opvals return early above. */
6154021b 3148 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3149 PL_thismad = 0;
5db06880
NC
3150 return optype;
3151}
3152#endif
3153
468aa647 3154STATIC char *
cc6ed77d 3155S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3156 dVAR;
468aa647
RGS
3157 if (PL_expect != XSTATE)
3158 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3159 is_use ? "use" : "no"));
29595ff2 3160 s = SKIPSPACE1(s);
468aa647
RGS
3161 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3162 s = force_version(s, TRUE);
29595ff2 3163 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3164 start_force(PL_curforce);
9ded7720 3165 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3166 force_next(WORD);
3167 }
3168 else if (*s == 'v') {
3169 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3170 s = force_version(s, FALSE);
3171 }
3172 }
3173 else {
3174 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3175 s = force_version(s, FALSE);
3176 }
6154021b 3177 pl_yylval.ival = is_use;
468aa647
RGS
3178 return s;
3179}
748a9306 3180#ifdef DEBUGGING
27da23d5 3181 static const char* const exp_name[] =
09bef843 3182 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3183 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3184 };
748a9306 3185#endif
463ee0b2 3186
02aa26ce
NT
3187/*
3188 yylex
3189
3190 Works out what to call the token just pulled out of the input
3191 stream. The yacc parser takes care of taking the ops we return and
3192 stitching them into a tree.
3193
3194 Returns:
3195 PRIVATEREF
3196
3197 Structure:
3198 if read an identifier
3199 if we're in a my declaration
3200 croak if they tried to say my($foo::bar)
3201 build the ops for a my() declaration
3202 if it's an access to a my() variable
3203 are we in a sort block?
3204 croak if my($a); $a <=> $b
3205 build ops for access to a my() variable
3206 if in a dq string, and they've said @foo and we can't find @foo
3207 croak
3208 build ops for a bareword
3209 if we already built the token before, use it.
3210*/
3211
20141f0e 3212
dba4d153
JH
3213#ifdef __SC__
3214#pragma segment Perl_yylex
3215#endif
dba4d153 3216int
dba4d153 3217Perl_yylex(pTHX)
20141f0e 3218{
97aff369 3219 dVAR;
3afc138a 3220 register char *s = PL_bufptr;
378cc40b 3221 register char *d;
463ee0b2 3222 STRLEN len;
aa7440fb 3223 bool bof = FALSE;
a687059c 3224
10edeb5d
JH
3225 /* orig_keyword, gvp, and gv are initialized here because
3226 * jump to the label just_a_word_zero can bypass their
3227 * initialization later. */
3228 I32 orig_keyword = 0;
3229 GV *gv = NULL;
3230 GV **gvp = NULL;
3231
bbf60fe6 3232 DEBUG_T( {
396482e1 3233 SV* tmp = newSVpvs("");
b6007c36
DM
3234 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3235 (IV)CopLINE(PL_curcop),
3236 lex_state_names[PL_lex_state],
3237 exp_name[PL_expect],
3238 pv_display(tmp, s, strlen(s), 0, 60));
3239 SvREFCNT_dec(tmp);
bbf60fe6 3240 } );
02aa26ce 3241 /* check if there's an identifier for us to look at */
ba979b31 3242 if (PL_pending_ident)
bbf60fe6 3243 return REPORT(S_pending_ident(aTHX));
bbce6d69 3244
02aa26ce
NT
3245 /* no identifier pending identification */
3246
3280af22 3247 switch (PL_lex_state) {
79072805
LW
3248#ifdef COMMENTARY
3249 case LEX_NORMAL: /* Some compilers will produce faster */
3250 case LEX_INTERPNORMAL: /* code if we comment these out. */
3251 break;
3252#endif
3253
09bef843 3254 /* when we've already built the next token, just pull it out of the queue */
79072805 3255 case LEX_KNOWNEXT:
5db06880
NC
3256#ifdef PERL_MAD
3257 PL_lasttoke--;
6154021b 3258 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3259 if (PL_madskills) {
cd81e915 3260 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3261 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3262 if (PL_thismad && PL_thismad->mad_key == '_') {
3263 PL_thiswhite = (SV*)PL_thismad->mad_val;
3264 PL_thismad->mad_val = 0;
3265 mad_free(PL_thismad);
3266 PL_thismad = 0;
5db06880
NC
3267 }
3268 }
3269 if (!PL_lasttoke) {
3270 PL_lex_state = PL_lex_defer;
3271 PL_expect = PL_lex_expect;
3272 PL_lex_defer = LEX_NORMAL;
3273 if (!PL_nexttoke[PL_lasttoke].next_type)
3274 return yylex();
3275 }
3276#else
3280af22 3277 PL_nexttoke--;
6154021b 3278 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3279 if (!PL_nexttoke) {
3280 PL_lex_state = PL_lex_defer;
3281 PL_expect = PL_lex_expect;
3282 PL_lex_defer = LEX_NORMAL;
463ee0b2 3283 }
5db06880
NC
3284#endif
3285#ifdef PERL_MAD
3286 /* FIXME - can these be merged? */
3287 return(PL_nexttoke[PL_lasttoke].next_type);
3288#else
bbf60fe6 3289 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3290#endif
79072805 3291
02aa26ce 3292 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3293 when we get here, PL_bufptr is at the \
02aa26ce 3294 */
79072805
LW
3295 case LEX_INTERPCASEMOD:
3296#ifdef DEBUGGING
3280af22 3297 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3298 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3299#endif
02aa26ce 3300 /* handle \E or end of string */
3280af22 3301 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3302 /* if at a \E */
3280af22 3303 if (PL_lex_casemods) {
f54cb97a 3304 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3305 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3306
3792a11b
NC
3307 if (PL_bufptr != PL_bufend
3308 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3309 PL_bufptr += 2;
3310 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3311#ifdef PERL_MAD
3312 if (PL_madskills)
6b29d1f5 3313 PL_thistoken = newSVpvs("\\E");
5db06880 3314#endif
a0d0e21e 3315 }
bbf60fe6 3316 return REPORT(')');
79072805 3317 }
5db06880
NC
3318#ifdef PERL_MAD
3319 while (PL_bufptr != PL_bufend &&
3320 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3321 if (!PL_thiswhite)
6b29d1f5 3322 PL_thiswhite = newSVpvs("");
cd81e915 3323 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3324 PL_bufptr += 2;
3325 }
3326#else
3280af22
NIS
3327 if (PL_bufptr != PL_bufend)
3328 PL_bufptr += 2;
5db06880 3329#endif
3280af22 3330 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3331 return yylex();
79072805
LW
3332 }
3333 else {
607df283 3334 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3335 "### Saw case modifier\n"); });
3280af22 3336 s = PL_bufptr + 1;
6e909404 3337 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3338#ifdef PERL_MAD
cd81e915 3339 if (!PL_thiswhite)
6b29d1f5 3340 PL_thiswhite = newSVpvs("");
cd81e915 3341 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3342#endif
89122651 3343 PL_bufptr = s + 3;
6e909404
JH
3344 PL_lex_state = LEX_INTERPCONCAT;
3345 return yylex();
a0d0e21e 3346 }
6e909404 3347 else {
90771dc0 3348 I32 tmp;
5db06880
NC
3349 if (!PL_madskills) /* when just compiling don't need correct */
3350 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3351 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3352 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3353 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3354 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3355 return REPORT(')');
6e909404
JH
3356 }
3357 if (PL_lex_casemods > 10)
3358 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3359 PL_lex_casestack[PL_lex_casemods++] = *s;
3360 PL_lex_casestack[PL_lex_casemods] = '\0';
3361 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3362 start_force(PL_curforce);
9ded7720 3363 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3364 force_next('(');
cd81e915 3365 start_force(PL_curforce);
6e909404 3366 if (*s == 'l')
9ded7720 3367 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3368 else if (*s == 'u')
9ded7720 3369 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3370 else if (*s == 'L')
9ded7720 3371 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3372 else if (*s == 'U')
9ded7720 3373 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3374 else if (*s == 'Q')
9ded7720 3375 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3376 else
3377 Perl_croak(aTHX_ "panic: yylex");
5db06880 3378 if (PL_madskills) {
a5849ce5
NC
3379 SV* const tmpsv = newSVpvs("\\ ");
3380 /* replace the space with the character we want to escape
3381 */
3382 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3383 curmad('_', tmpsv);
3384 }
6e909404 3385 PL_bufptr = s + 1;
a0d0e21e 3386 }
79072805 3387 force_next(FUNC);
3280af22
NIS
3388 if (PL_lex_starts) {
3389 s = PL_bufptr;
3390 PL_lex_starts = 0;
5db06880
NC
3391#ifdef PERL_MAD
3392 if (PL_madskills) {
cd81e915
NC
3393 if (PL_thistoken)
3394 sv_free(PL_thistoken);
6b29d1f5 3395 PL_thistoken = newSVpvs("");
5db06880
NC
3396 }
3397#endif
131b3ad0
DM
3398 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3399 if (PL_lex_casemods == 1 && PL_lex_inpat)
3400 OPERATOR(',');
3401 else
3402 Aop(OP_CONCAT);
79072805
LW
3403 }
3404 else
cea2e8a9 3405 return yylex();
79072805
LW
3406 }
3407
55497cff 3408 case LEX_INTERPPUSH:
bbf60fe6 3409 return REPORT(sublex_push());
55497cff 3410
79072805 3411 case LEX_INTERPSTART:
3280af22 3412 if (PL_bufptr == PL_bufend)
bbf60fe6 3413 return REPORT(sublex_done());
607df283 3414 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3415 "### Interpolated variable\n"); });
3280af22
NIS
3416 PL_expect = XTERM;
3417 PL_lex_dojoin = (*PL_bufptr == '@');
3418 PL_lex_state = LEX_INTERPNORMAL;
3419 if (PL_lex_dojoin) {
cd81e915 3420 start_force(PL_curforce);
9ded7720 3421 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3422 force_next(',');
cd81e915 3423 start_force(PL_curforce);
a0d0e21e 3424 force_ident("\"", '$');
cd81e915 3425 start_force(PL_curforce);
9ded7720 3426 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3427 force_next('$');
cd81e915 3428 start_force(PL_curforce);
9ded7720 3429 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3430 force_next('(');
cd81e915 3431 start_force(PL_curforce);
9ded7720 3432 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3433 force_next(FUNC);
3434 }
3280af22
NIS
3435 if (PL_lex_starts++) {
3436 s = PL_bufptr;
5db06880
NC
3437#ifdef PERL_MAD
3438 if (PL_madskills) {
cd81e915
NC
3439 if (PL_thistoken)
3440 sv_free(PL_thistoken);
6b29d1f5 3441 PL_thistoken = newSVpvs("");
5db06880
NC
3442 }
3443#endif
131b3ad0
DM
3444 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3445 if (!PL_lex_casemods && PL_lex_inpat)
3446 OPERATOR(',');
3447 else
3448 Aop(OP_CONCAT);
79072805 3449 }
cea2e8a9 3450 return yylex();
79072805
LW
3451
3452 case LEX_INTERPENDMAYBE:
3280af22
NIS
3453 if (intuit_more(PL_bufptr)) {
3454 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3455 break;
3456 }
3457 /* FALL THROUGH */
3458
3459 case LEX_INTERPEND:
3280af22
NIS
3460 if (PL_lex_dojoin) {
3461 PL_lex_dojoin = FALSE;
3462 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3463#ifdef PERL_MAD
3464 if (PL_madskills) {
cd81e915
NC
3465 if (PL_thistoken)
3466 sv_free(PL_thistoken);
6b29d1f5 3467 PL_thistoken = newSVpvs("");
5db06880
NC
3468 }
3469#endif
bbf60fe6 3470 return REPORT(')');
79072805 3471 }
43a16006 3472 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3473 && SvEVALED(PL_lex_repl))
43a16006 3474 {
e9fa98b2 3475 if (PL_bufptr != PL_bufend)
cea2e8a9 3476 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3477 PL_lex_repl = NULL;
e9fa98b2 3478 }
79072805
LW
3479 /* FALLTHROUGH */
3480 case LEX_INTERPCONCAT:
3481#ifdef DEBUGGING
3280af22 3482 if (PL_lex_brackets)
cea2e8a9 3483 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3484#endif
3280af22 3485 if (PL_bufptr == PL_bufend)
bbf60fe6 3486 return REPORT(sublex_done());
79072805 3487
3280af22
NIS
3488 if (SvIVX(PL_linestr) == '\'') {
3489 SV *sv = newSVsv(PL_linestr);
3490 if (!PL_lex_inpat)
76e3520e 3491 sv = tokeq(sv);
3280af22 3492 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 3493 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 3494 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3495 s = PL_bufend;
79072805
LW
3496 }
3497 else {
3280af22 3498 s = scan_const(PL_bufptr);
79072805 3499 if (*s == '\\')
3280af22 3500 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3501 else
3280af22 3502 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3503 }
3504
3280af22 3505 if (s != PL_bufptr) {
cd81e915 3506 start_force(PL_curforce);
5db06880
NC
3507 if (PL_madskills) {
3508 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3509 }
6154021b 3510 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 3511 PL_expect = XTERM;
79072805 3512 force_next(THING);
131b3ad0 3513 if (PL_lex_starts++) {
5db06880
NC
3514#ifdef PERL_MAD
3515 if (PL_madskills) {
cd81e915
NC
3516 if (PL_thistoken)
3517 sv_free(PL_thistoken);
6b29d1f5 3518 PL_thistoken = newSVpvs("");
5db06880
NC
3519 }
3520#endif
131b3ad0
DM
3521 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3522 if (!PL_lex_casemods && PL_lex_inpat)
3523 OPERATOR(',');
3524 else
3525 Aop(OP_CONCAT);
3526 }
79072805 3527 else {
3280af22 3528 PL_bufptr = s;
cea2e8a9 3529 return yylex();
79072805
LW
3530 }
3531 }