This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't call strlen() on CopFILE() for the unthreaded case, because the
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
eb0d8d16
NC
26#define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
28
5912531f 29#define yylval (PL_parser->yylval)
d3b6f988 30
acdf0a21
DM
31/* YYINITDEPTH -- initial size of the parser's stacks. */
32#define YYINITDEPTH 200
33
199e78b7
DM
34/* XXX temporary backwards compatibility */
35#define PL_lex_brackets (PL_parser->lex_brackets)
36#define PL_lex_brackstack (PL_parser->lex_brackstack)
37#define PL_lex_casemods (PL_parser->lex_casemods)
38#define PL_lex_casestack (PL_parser->lex_casestack)
39#define PL_lex_defer (PL_parser->lex_defer)
40#define PL_lex_dojoin (PL_parser->lex_dojoin)
41#define PL_lex_expect (PL_parser->lex_expect)
42#define PL_lex_formbrack (PL_parser->lex_formbrack)
43#define PL_lex_inpat (PL_parser->lex_inpat)
44#define PL_lex_inwhat (PL_parser->lex_inwhat)
45#define PL_lex_op (PL_parser->lex_op)
46#define PL_lex_repl (PL_parser->lex_repl)
47#define PL_lex_starts (PL_parser->lex_starts)
48#define PL_lex_stuff (PL_parser->lex_stuff)
49#define PL_multi_start (PL_parser->multi_start)
50#define PL_multi_open (PL_parser->multi_open)
51#define PL_multi_close (PL_parser->multi_close)
52#define PL_pending_ident (PL_parser->pending_ident)
53#define PL_preambled (PL_parser->preambled)
54#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 55#define PL_linestr (PL_parser->linestr)
c2598295
DM
56#define PL_expect (PL_parser->expect)
57#define PL_copline (PL_parser->copline)
f06b5848
DM
58#define PL_bufptr (PL_parser->bufptr)
59#define PL_oldbufptr (PL_parser->oldbufptr)
60#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61#define PL_linestart (PL_parser->linestart)
62#define PL_bufend (PL_parser->bufend)
63#define PL_last_uni (PL_parser->last_uni)
64#define PL_last_lop (PL_parser->last_lop)
65#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 66#define PL_lex_state (PL_parser->lex_state)
2f9285f8 67#define PL_rsfp (PL_parser->rsfp)
5486870f 68#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
69#define PL_in_my (PL_parser->in_my)
70#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 71#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 72#define PL_multi_end (PL_parser->multi_end)
13765c85 73#define PL_error_count (PL_parser->error_count)
199e78b7
DM
74
75#ifdef PERL_MAD
76# define PL_endwhite (PL_parser->endwhite)
77# define PL_faketokens (PL_parser->faketokens)
78# define PL_lasttoke (PL_parser->lasttoke)
79# define PL_nextwhite (PL_parser->nextwhite)
80# define PL_realtokenstart (PL_parser->realtokenstart)
81# define PL_skipwhite (PL_parser->skipwhite)
82# define PL_thisclose (PL_parser->thisclose)
83# define PL_thismad (PL_parser->thismad)
84# define PL_thisopen (PL_parser->thisopen)
85# define PL_thisstuff (PL_parser->thisstuff)
86# define PL_thistoken (PL_parser->thistoken)
87# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
88# define PL_thiswhite (PL_parser->thiswhite)
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_curforce (PL_parser->curforce)
91#else
92# define PL_nexttoke (PL_parser->nexttoke)
93# define PL_nexttype (PL_parser->nexttype)
94# define PL_nextval (PL_parser->nextval)
199e78b7
DM
95#endif
96
3cbf51f5
DM
97static int
98S_pending_ident(pTHX);
199e78b7 99
0bd48802 100static const char ident_too_long[] = "Identifier too long";
c445ea15 101static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 102
6e3aabd6 103#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
104static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 106#endif
51371543 107
29595ff2 108#ifdef PERL_MAD
29595ff2 109# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 110# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 111#else
5db06880 112# define CURMAD(slot,sv)
9ded7720 113# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
114#endif
115
9059aa12
LW
116#define XFAKEBRACK 128
117#define XENUMMASK 127
118
39e02b42
JH
119#ifdef USE_UTF8_SCRIPTS
120# define UTF (!IN_BYTES)
2b9d42f0 121#else
746b446a 122# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 123#endif
a0ed51b3 124
61f0cdd9 125/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
126 * 1999-02-27 mjd-perl-patch@plover.com */
127#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128
bf4acbe4
GS
129/* On MacOS, respect nonbreaking spaces */
130#ifdef MACOS_TRADITIONAL
131#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
132#else
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
134#endif
135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a
PP
141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 230# define REPORT(retval) tokereport((I32)retval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf
AL
261#define UNI2(f,x) { \
262 yylval.ival = f; \
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf
AL
275#define UNIBRACK(f) { \
276 yylval.ival = f; \
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
3280af22 286#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
bbf60fe6
DM
290/* how to interpret the yylval associated with the token */
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
294 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 371 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
372};
373
374/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 375
bbf60fe6 376STATIC int
f5bd084c 377S_tokereport(pTHX_ I32 rv)
bbf60fe6 378{
97aff369 379 dVAR;
bbf60fe6 380 if (DEBUG_T_TEST) {
bd61b366 381 const char *name = NULL;
bbf60fe6 382 enum token_type type = TOKENTYPE_NONE;
f54cb97a 383 const struct debug_tokens *p;
396482e1 384 SV* const report = newSVpvs("<== ");
bbf60fe6 385
f54cb97a 386 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
387 if (p->token == (int)rv) {
388 name = p->name;
389 type = p->type;
390 break;
391 }
392 }
393 if (name)
54667de8 394 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
397 else if (!rv)
396482e1 398 sv_catpvs(report, "EOF");
bbf60fe6
DM
399 else
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
401 switch (type) {
402 case TOKENTYPE_NONE:
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
404 break;
405 case TOKENTYPE_IVAL:
e4584336 406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
407 break;
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[yylval.ival]);
411 break;
412 case TOKENTYPE_PVAL:
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
414 break;
415 case TOKENTYPE_OPVAL:
b6007c36 416 if (yylval.opval) {
401441c0 417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 418 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
419 if (yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(yylval.opval)));
422 }
423
424 }
401441c0 425 else
396482e1 426 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
427 break;
428 }
b6007c36 429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
430 };
431 return (int)rv;
998054bd
SC
432}
433
b6007c36
DM
434
435/* print the buffer with suitable escapes */
436
437STATIC void
438S_printbuf(pTHX_ const char* fmt, const char* s)
439{
396482e1 440 SV* const tmp = newSVpvs("");
b6007c36
DM
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
442 SvREFCNT_dec(tmp);
443}
444
8fa7f367
JH
445#endif
446
ffb4593c
NT
447/*
448 * S_ao
449 *
c963b151
BD
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
452 */
453
76e3520e 454STATIC int
cea2e8a9 455S_ao(pTHX_ int toketype)
a0d0e21e 456{
97aff369 457 dVAR;
3280af22
NIS
458 if (*PL_bufptr == '=') {
459 PL_bufptr++;
a0d0e21e
LW
460 if (toketype == ANDAND)
461 yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 yylval.ival = OP_ORASSIGN;
c963b151
BD
464 else if (toketype == DORDOR)
465 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
466 toketype = ASSIGNOP;
467 }
468 return toketype;
469}
470
ffb4593c
NT
471/*
472 * S_no_op
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
482 */
483
76e3520e 484STATIC void
bfed75c6 485S_no_op(pTHX_ const char *what, char *s)
463ee0b2 486{
97aff369 487 dVAR;
9d4ba2ae
AL
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 490
1189a94a
GS
491 if (!s)
492 s = oldbp;
07c798fb 493 else
1189a94a 494 PL_bufptr = s;
cea2e8a9 495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
496 if (ckWARN_d(WARN_SYNTAX)) {
497 if (is_first)
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 501 const char *t;
c35e046a
AL
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
503 NOOP;
56da5a46
RGS
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
551405c4 507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
508 }
509 else {
510 assert(s >= oldbp);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 513 }
07c798fb 514 }
3280af22 515 PL_bufptr = oldbp;
8990e307
LW
516}
517
ffb4593c
NT
518/*
519 * S_missingterm
520 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 521 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
524 * This is fatal.
525 */
526
76e3520e 527STATIC void
cea2e8a9 528S_missingterm(pTHX_ char *s)
8990e307 529{
97aff369 530 dVAR;
8990e307
LW
531 char tmpbuf[3];
532 char q;
533 if (s) {
9d4ba2ae 534 char * const nl = strrchr(s,'\n');
d2719217 535 if (nl)
8990e307
LW
536 *nl = '\0';
537 }
9d116dd7
JH
538 else if (
539#ifdef EBCDIC
540 iscntrl(PL_multi_close)
541#else
542 PL_multi_close < 32 || PL_multi_close == 127
543#endif
544 ) {
8990e307 545 *tmpbuf = '^';
585ec06d 546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
547 tmpbuf[2] = '\0';
548 s = tmpbuf;
549 }
550 else {
eb160463 551 *tmpbuf = (char)PL_multi_close;
8990e307
LW
552 tmpbuf[1] = '\0';
553 s = tmpbuf;
554 }
555 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 557}
79072805 558
ef89dcc3 559#define FEATURE_IS_ENABLED(name) \
0d863452 560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
562/* The longest string we pass in. */
563#define MAX_FEATURE_LEN (sizeof("switch")-1)
564
0d863452
RH
565/*
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
568 */
569STATIC bool
d4c19fe8 570S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 571{
97aff369 572 dVAR;
0d863452 573 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b
NC
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
d4c19fe8 577
7b9ef140 578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
579}
580
ffb4593c
NT
581/*
582 * Perl_deprecate
ffb4593c
NT
583 */
584
79072805 585void
bfed75c6 586Perl_deprecate(pTHX_ const char *s)
a0d0e21e 587{
599cee73 588 if (ckWARN(WARN_DEPRECATED))
9014280d 589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
590}
591
12bcd1a6 592void
bfed75c6 593Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
594{
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
597 /* */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
602
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
605 "Use of %s is deprecated", s);
606}
607
ffb4593c 608/*
9cbb5ea2
GS
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
ffb4593c
NT
611 */
612
c39cd008
GS
613#ifdef PERL_CR_FILTER
614static void
615strip_return(SV *sv)
616{
95a20fc0 617 register const char *s = SvPVX_const(sv);
9d4ba2ae 618 register const char * const e = s + SvCUR(sv);
c39cd008
GS
619 /* outer loop optimized to do nothing if there are no CR-LFs */
620 while (s < e) {
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
624 *d++ = *s++;
625 while (s < e) {
626 if (*s == '\r' && s[1] == '\n')
627 s++;
628 *d++ = *s++;
629 }
630 SvCUR(sv) -= s - d;
631 return;
632 }
633 }
634}
a868473f 635
76e3520e 636STATIC I32
c39cd008 637S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 638{
f54cb97a 639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
640 if (count > 0 && !maxlen)
641 strip_return(sv);
642 return count;
a868473f
NIS
643}
644#endif
645
199e78b7
DM
646
647
ffb4593c
NT
648/*
649 * Perl_lex_start
5486870f 650 *
e3abe207 651 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
652 *
653 * rsfp is the opened file handle to read from (if any),
654 *
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
657 *
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
ffb4593c
NT
660 */
661
a0d0e21e 662void
5486870f 663Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 664{
97aff369 665 dVAR;
6ef55633 666 const char *s = NULL;
8990e307 667 STRLEN len;
5486870f 668 yy_parser *parser, *oparser;
acdf0a21
DM
669
670 /* create and initialise a parser */
671
199e78b7 672 Newxz(parser, 1, yy_parser);
5486870f 673 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
674 PL_parser = parser;
675
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
679
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
683
e3abe207
DM
684 /* on scope exit, free this parser and restore any outer one */
685 SAVEPARSER(parser);
7c4baf47 686 parser->saved_curcop = PL_curcop;
e3abe207 687
acdf0a21 688 /* initialise lexer state */
8990e307 689
fb205e7a
DM
690#ifdef PERL_MAD
691 parser->curforce = -1;
692#else
693 parser->nexttoke = 0;
694#endif
c2598295 695 parser->copline = NOLINE;
5afb0a62 696 parser->lex_state = LEX_NORMAL;
c2598295 697 parser->expect = XSTATE;
2f9285f8 698 parser->rsfp = rsfp;
56b27c9a 699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
5486870f 700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
2f9285f8 701
199e78b7
DM
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
02b34bbe 705
10efb74f
NC
706 if (line) {
707 s = SvPV_const(line, len);
708 } else {
709 len = 0;
710 }
bdc0bf6f 711
10efb74f 712 if (!len) {
bdc0bf6f 713 parser->linestr = newSVpvs("\n;");
10efb74f 714 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 715 parser->linestr = newSVsv(line);
10efb74f 716 if (s[len-1] != ';')
bdc0bf6f 717 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
718 } else {
719 SvTEMP_off(line);
720 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 721 parser->linestr = line;
8990e307 722 }
f06b5848
DM
723 parser->oldoldbufptr =
724 parser->oldbufptr =
725 parser->bufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
79072805 729}
a687059c 730
e3abe207
DM
731
732/* delete a parser object */
733
734void
735Perl_parser_free(pTHX_ const yy_parser *parser)
736{
7c4baf47 737 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
738 SvREFCNT_dec(parser->linestr);
739
2f9285f8
DM
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
5486870f 745 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 746
e3abe207
DM
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
751 Safefree(parser);
752}
753
754
ffb4593c
NT
755/*
756 * Perl_lex_end
9cbb5ea2
GS
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
ffb4593c
NT
759 */
760
463ee0b2 761void
864dbfa3 762Perl_lex_end(pTHX)
463ee0b2 763{
97aff369 764 dVAR;
3280af22 765 PL_doextract = FALSE;
463ee0b2
LW
766}
767
ffb4593c
NT
768/*
769 * S_incline
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
57843af0 772 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 773 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
776 */
777
76e3520e 778STATIC void
d9095cec 779S_incline(pTHX_ const char *s)
463ee0b2 780{
97aff369 781 dVAR;
d9095cec
NC
782 const char *t;
783 const char *n;
784 const char *e;
463ee0b2 785
57843af0 786 CopLINE_inc(PL_curcop);
463ee0b2
LW
787 if (*s++ != '#')
788 return;
d4c19fe8
AL
789 while (SPACE_OR_TAB(*s))
790 s++;
73659bf1
GS
791 if (strnEQ(s, "line", 4))
792 s += 4;
793 else
794 return;
084592ab 795 if (SPACE_OR_TAB(*s))
73659bf1 796 s++;
4e553d73 797 else
73659bf1 798 return;
d4c19fe8
AL
799 while (SPACE_OR_TAB(*s))
800 s++;
463ee0b2
LW
801 if (!isDIGIT(*s))
802 return;
d4c19fe8 803
463ee0b2
LW
804 n = s;
805 while (isDIGIT(*s))
806 s++;
bf4acbe4 807 while (SPACE_OR_TAB(*s))
463ee0b2 808 s++;
73659bf1 809 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 810 s++;
73659bf1
GS
811 e = t + 1;
812 }
463ee0b2 813 else {
c35e046a
AL
814 t = s;
815 while (!isSPACE(*t))
816 t++;
73659bf1 817 e = t;
463ee0b2 818 }
bf4acbe4 819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
820 e++;
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
823
f4dd75d9 824 if (t - s > 0) {
d9095cec 825 const STRLEN len = t - s;
8a5ee598 826#ifndef USE_ITHREADS
19bad673
NC
827 SV *const temp_sv = CopFILESV(PL_curcop);
828 const char *cf;
829 STRLEN tmplen;
830
831 if (temp_sv) {
832 cf = SvPVX(temp_sv);
833 tmplen = SvCUR(temp_sv);
834 } else {
835 cf = NULL;
836 tmplen = 0;
837 }
838
42d9b98d 839 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
840 /* must copy *{"::_<(eval N)[oldfilename:L]"}
841 * to *{"::_<newfilename"} */
44867030
NC
842 /* However, the long form of evals is only turned on by the
843 debugger - usually they're "(eval %lu)" */
844 char smallbuf[128];
845 char *tmpbuf;
846 GV **gvp;
d9095cec 847 STRLEN tmplen2 = len;
798b63bc 848 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
849 tmpbuf = smallbuf;
850 else
2ae0db35 851 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
852 tmpbuf[0] = '_';
853 tmpbuf[1] = '<';
2ae0db35 854 memcpy(tmpbuf + 2, cf, tmplen);
44867030 855 tmplen += 2;
8a5ee598
RGS
856 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
857 if (gvp) {
44867030
NC
858 char *tmpbuf2;
859 GV *gv2;
860
861 if (tmplen2 + 2 <= sizeof smallbuf)
862 tmpbuf2 = smallbuf;
863 else
864 Newx(tmpbuf2, tmplen2 + 2, char);
865
866 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
867 /* Either they malloc'd it, or we malloc'd it,
868 so no prefix is present in ours. */
869 tmpbuf2[0] = '_';
870 tmpbuf2[1] = '<';
871 }
872
873 memcpy(tmpbuf2 + 2, s, tmplen2);
874 tmplen2 += 2;
875
8a5ee598 876 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 877 if (!isGV(gv2)) {
8a5ee598 878 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
879 /* adjust ${"::_<newfilename"} to store the new file name */
880 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
881 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
882 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
883 }
44867030
NC
884
885 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 886 }
e66cf94c 887 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 888 }
8a5ee598 889#endif
05ec9bb3 890 CopFILE_free(PL_curcop);
d9095cec 891 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 892 }
57843af0 893 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
894}
895
29595ff2 896#ifdef PERL_MAD
cd81e915 897/* skip space before PL_thistoken */
29595ff2
NC
898
899STATIC char *
900S_skipspace0(pTHX_ register char *s)
901{
902 s = skipspace(s);
903 if (!PL_madskills)
904 return s;
cd81e915
NC
905 if (PL_skipwhite) {
906 if (!PL_thiswhite)
6b29d1f5 907 PL_thiswhite = newSVpvs("");
cd81e915
NC
908 sv_catsv(PL_thiswhite, PL_skipwhite);
909 sv_free(PL_skipwhite);
910 PL_skipwhite = 0;
911 }
912 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
913 return s;
914}
915
cd81e915 916/* skip space after PL_thistoken */
29595ff2
NC
917
918STATIC char *
919S_skipspace1(pTHX_ register char *s)
920{
d4c19fe8 921 const char *start = s;
29595ff2
NC
922 I32 startoff = start - SvPVX(PL_linestr);
923
924 s = skipspace(s);
925 if (!PL_madskills)
926 return s;
927 start = SvPVX(PL_linestr) + startoff;
cd81e915 928 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 929 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
930 PL_thistoken = newSVpvn(tstart, start - tstart);
931 }
932 PL_realtokenstart = -1;
933 if (PL_skipwhite) {
934 if (!PL_nextwhite)
6b29d1f5 935 PL_nextwhite = newSVpvs("");
cd81e915
NC
936 sv_catsv(PL_nextwhite, PL_skipwhite);
937 sv_free(PL_skipwhite);
938 PL_skipwhite = 0;
29595ff2
NC
939 }
940 return s;
941}
942
943STATIC char *
944S_skipspace2(pTHX_ register char *s, SV **svp)
945{
c35e046a
AL
946 char *start;
947 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
948 const I32 startoff = s - SvPVX(PL_linestr);
949
29595ff2
NC
950 s = skipspace(s);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
953 return s;
954 start = SvPVX(PL_linestr) + startoff;
cd81e915 955 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
29595ff2 959 }
cd81e915 960 if (PL_skipwhite) {
29595ff2 961 if (!*svp)
6b29d1f5 962 *svp = newSVpvs("");
cd81e915
NC
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
965 PL_skipwhite = 0;
29595ff2
NC
966 }
967
968 return s;
969}
970#endif
971
80a702cd 972STATIC void
5fa550fb 973S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
974{
975 AV *av = CopFILEAVx(PL_curcop);
976 if (av) {
b9f83d2f 977 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
978 if (orig_sv)
979 sv_setsv(sv, orig_sv);
980 else
981 sv_setpvn(sv, buf, len);
80a702cd
RGS
982 (void)SvIOK_on(sv);
983 SvIV_set(sv, 0);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
985 }
986}
987
ffb4593c
NT
988/*
989 * S_skipspace
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
992 */
993
76e3520e 994STATIC char *
cea2e8a9 995S_skipspace(pTHX_ register char *s)
a687059c 996{
97aff369 997 dVAR;
5db06880
NC
998#ifdef PERL_MAD
999 int curoff;
1000 int startoff = s - SvPVX(PL_linestr);
1001
cd81e915
NC
1002 if (PL_skipwhite) {
1003 sv_free(PL_skipwhite);
1004 PL_skipwhite = 0;
5db06880
NC
1005 }
1006#endif
1007
3280af22 1008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1009 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1010 s++;
5db06880
NC
1011#ifdef PERL_MAD
1012 goto done;
1013#else
463ee0b2 1014 return s;
5db06880 1015#endif
463ee0b2
LW
1016 }
1017 for (;;) {
fd049845 1018 STRLEN prevlen;
09bef843 1019 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1020 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1021 while (s < PL_bufend && isSPACE(*s)) {
1022 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1023 incline(s);
1024 }
ffb4593c
NT
1025
1026 /* comment */
3280af22
NIS
1027 if (s < PL_bufend && *s == '#') {
1028 while (s < PL_bufend && *s != '\n')
463ee0b2 1029 s++;
60e6418e 1030 if (s < PL_bufend) {
463ee0b2 1031 s++;
60e6418e
GS
1032 if (PL_in_eval && !PL_rsfp) {
1033 incline(s);
1034 continue;
1035 }
1036 }
463ee0b2 1037 }
ffb4593c
NT
1038
1039 /* only continue to recharge the buffer if we're at the end
1040 * of the buffer, we're not reading from a source filter, and
1041 * we're in normal lexing mode
1042 */
09bef843
SB
1043 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1044 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1045#ifdef PERL_MAD
1046 goto done;
1047#else
463ee0b2 1048 return s;
5db06880 1049#endif
ffb4593c
NT
1050
1051 /* try to recharge the buffer */
5db06880
NC
1052#ifdef PERL_MAD
1053 curoff = s - SvPVX(PL_linestr);
1054#endif
1055
9cbb5ea2 1056 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1057 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1058 {
5db06880
NC
1059#ifdef PERL_MAD
1060 if (PL_madskills && curoff != startoff) {
cd81e915 1061 if (!PL_skipwhite)
6b29d1f5 1062 PL_skipwhite = newSVpvs("");
cd81e915 1063 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1064 curoff - startoff);
1065 }
1066
1067 /* mustn't throw out old stuff yet if madpropping */
1068 SvCUR(PL_linestr) = curoff;
1069 s = SvPVX(PL_linestr) + curoff;
1070 *s = 0;
1071 if (curoff && s[-1] == '\n')
1072 s[-1] = ' ';
1073#endif
1074
9cbb5ea2 1075 /* end of file. Add on the -p or -n magic */
cd81e915 1076 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1077 if (PL_minus_p) {
5db06880 1078#ifdef PERL_MAD
6502358f 1079 sv_catpvs(PL_linestr,
5db06880
NC
1080 ";}continue{print or die qq(-p destination: $!\\n);}");
1081#else
6502358f 1082 sv_setpvs(PL_linestr,
01a19ab0 1083 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1084#endif
3280af22 1085 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1086 }
01a19ab0 1087 else if (PL_minus_n) {
5db06880
NC
1088#ifdef PERL_MAD
1089 sv_catpvn(PL_linestr, ";}", 2);
1090#else
01a19ab0 1091 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1092#endif
01a19ab0
NC
1093 PL_minus_n = 0;
1094 }
a0d0e21e 1095 else
5db06880
NC
1096#ifdef PERL_MAD
1097 sv_catpvn(PL_linestr,";", 1);
1098#else
4147a61b 1099 sv_setpvn(PL_linestr,";", 1);
5db06880 1100#endif
ffb4593c
NT
1101
1102 /* reset variables for next time we lex */
9cbb5ea2 1103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1104 = SvPVX(PL_linestr)
1105#ifdef PERL_MAD
1106 + curoff
1107#endif
1108 ;
3280af22 1109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1110 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1111
1112 /* Close the filehandle. Could be from -P preprocessor,
1113 * STDIN, or a regular file. If we were reading code from
1114 * STDIN (because the commandline held no -e or filename)
1115 * then we don't close it, we reset it so the code can
1116 * read from STDIN too.
1117 */
1118
3280af22
NIS
1119 if (PL_preprocess && !PL_in_eval)
1120 (void)PerlProc_pclose(PL_rsfp);
1121 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1122 PerlIO_clearerr(PL_rsfp);
8990e307 1123 else
3280af22 1124 (void)PerlIO_close(PL_rsfp);
4608196e 1125 PL_rsfp = NULL;
463ee0b2
LW
1126 return s;
1127 }
ffb4593c
NT
1128
1129 /* not at end of file, so we only read another line */
09bef843
SB
1130 /* make corresponding updates to old pointers, for yyerror() */
1131 oldprevlen = PL_oldbufptr - PL_bufend;
1132 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1133 if (PL_last_uni)
1134 oldunilen = PL_last_uni - PL_bufend;
1135 if (PL_last_lop)
1136 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1137 PL_linestart = PL_bufptr = s + prevlen;
1138 PL_bufend = s + SvCUR(PL_linestr);
1139 s = PL_bufptr;
09bef843
SB
1140 PL_oldbufptr = s + oldprevlen;
1141 PL_oldoldbufptr = s + oldoldprevlen;
1142 if (PL_last_uni)
1143 PL_last_uni = s + oldunilen;
1144 if (PL_last_lop)
1145 PL_last_lop = s + oldloplen;
a0d0e21e 1146 incline(s);
ffb4593c
NT
1147
1148 /* debugger active and we're not compiling the debugger code,
1149 * so store the line into the debugger's array of lines
1150 */
80a702cd 1151 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1152 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1153 }
5db06880
NC
1154
1155#ifdef PERL_MAD
1156 done:
1157 if (PL_madskills) {
cd81e915 1158 if (!PL_skipwhite)
6b29d1f5 1159 PL_skipwhite = newSVpvs("");
5db06880
NC
1160 curoff = s - SvPVX(PL_linestr);
1161 if (curoff - startoff)
cd81e915 1162 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1163 curoff - startoff);
1164 }
1165 return s;
1166#endif
a687059c 1167}
378cc40b 1168
ffb4593c
NT
1169/*
1170 * S_check_uni
1171 * Check the unary operators to ensure there's no ambiguity in how they're
1172 * used. An ambiguous piece of code would be:
1173 * rand + 5
1174 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1175 * the +5 is its argument.
1176 */
1177
76e3520e 1178STATIC void
cea2e8a9 1179S_check_uni(pTHX)
ba106d47 1180{
97aff369 1181 dVAR;
d4c19fe8
AL
1182 const char *s;
1183 const char *t;
2f3197b3 1184
3280af22 1185 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1186 return;
3280af22
NIS
1187 while (isSPACE(*PL_last_uni))
1188 PL_last_uni++;
c35e046a
AL
1189 s = PL_last_uni;
1190 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1191 s++;
3280af22 1192 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1193 return;
6136c704 1194
0453d815 1195 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1196 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1197 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1198 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1199 }
2f3197b3
LW
1200}
1201
ffb4593c
NT
1202/*
1203 * LOP : macro to build a list operator. Its behaviour has been replaced
1204 * with a subroutine, S_lop() for which LOP is just another name.
1205 */
1206
a0d0e21e
LW
1207#define LOP(f,x) return lop(f,x,s)
1208
ffb4593c
NT
1209/*
1210 * S_lop
1211 * Build a list operator (or something that might be one). The rules:
1212 * - if we have a next token, then it's a list operator [why?]
1213 * - if the next thing is an opening paren, then it's a function
1214 * - else it's a list operator
1215 */
1216
76e3520e 1217STATIC I32
a0be28da 1218S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1219{
97aff369 1220 dVAR;
79072805 1221 yylval.ival = f;
35c8bce7 1222 CLINE;
3280af22
NIS
1223 PL_expect = x;
1224 PL_bufptr = s;
1225 PL_last_lop = PL_oldbufptr;
eb160463 1226 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1227#ifdef PERL_MAD
1228 if (PL_lasttoke)
1229 return REPORT(LSTOP);
1230#else
3280af22 1231 if (PL_nexttoke)
bbf60fe6 1232 return REPORT(LSTOP);
5db06880 1233#endif
79072805 1234 if (*s == '(')
bbf60fe6 1235 return REPORT(FUNC);
29595ff2 1236 s = PEEKSPACE(s);
79072805 1237 if (*s == '(')
bbf60fe6 1238 return REPORT(FUNC);
79072805 1239 else
bbf60fe6 1240 return REPORT(LSTOP);
79072805
LW
1241}
1242
5db06880
NC
1243#ifdef PERL_MAD
1244 /*
1245 * S_start_force
1246 * Sets up for an eventual force_next(). start_force(0) basically does
1247 * an unshift, while start_force(-1) does a push. yylex removes items
1248 * on the "pop" end.
1249 */
1250
1251STATIC void
1252S_start_force(pTHX_ int where)
1253{
1254 int i;
1255
cd81e915 1256 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1257 where = PL_lasttoke;
cd81e915
NC
1258 assert(PL_curforce < 0 || PL_curforce == where);
1259 if (PL_curforce != where) {
5db06880
NC
1260 for (i = PL_lasttoke; i > where; --i) {
1261 PL_nexttoke[i] = PL_nexttoke[i-1];
1262 }
1263 PL_lasttoke++;
1264 }
cd81e915 1265 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1266 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1267 PL_curforce = where;
1268 if (PL_nextwhite) {
5db06880 1269 if (PL_madskills)
6b29d1f5 1270 curmad('^', newSVpvs(""));
cd81e915 1271 CURMAD('_', PL_nextwhite);
5db06880
NC
1272 }
1273}
1274
1275STATIC void
1276S_curmad(pTHX_ char slot, SV *sv)
1277{
1278 MADPROP **where;
1279
1280 if (!sv)
1281 return;
cd81e915
NC
1282 if (PL_curforce < 0)
1283 where = &PL_thismad;
5db06880 1284 else
cd81e915 1285 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1286
cd81e915 1287 if (PL_faketokens)
5db06880
NC
1288 sv_setpvn(sv, "", 0);
1289 else {
1290 if (!IN_BYTES) {
1291 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1292 SvUTF8_on(sv);
1293 else if (PL_encoding) {
1294 sv_recode_to_utf8(sv, PL_encoding);
1295 }
1296 }
1297 }
1298
1299 /* keep a slot open for the head of the list? */
1300 if (slot != '_' && *where && (*where)->mad_key == '^') {
1301 (*where)->mad_key = slot;
035e2bcc 1302 sv_free((SV*)((*where)->mad_val));
5db06880
NC
1303 (*where)->mad_val = (void*)sv;
1304 }
1305 else
1306 addmad(newMADsv(slot, sv), where, 0);
1307}
1308#else
b3f24c00
MHM
1309# define start_force(where) NOOP
1310# define curmad(slot, sv) NOOP
5db06880
NC
1311#endif
1312
ffb4593c
NT
1313/*
1314 * S_force_next
9cbb5ea2 1315 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1316 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1317 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1318 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1319 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1320 */
1321
4e553d73 1322STATIC void
cea2e8a9 1323S_force_next(pTHX_ I32 type)
79072805 1324{
97aff369 1325 dVAR;
5db06880 1326#ifdef PERL_MAD
cd81e915 1327 if (PL_curforce < 0)
5db06880 1328 start_force(PL_lasttoke);
cd81e915 1329 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1330 if (PL_lex_state != LEX_KNOWNEXT)
1331 PL_lex_defer = PL_lex_state;
1332 PL_lex_state = LEX_KNOWNEXT;
1333 PL_lex_expect = PL_expect;
cd81e915 1334 PL_curforce = -1;
5db06880 1335#else
3280af22
NIS
1336 PL_nexttype[PL_nexttoke] = type;
1337 PL_nexttoke++;
1338 if (PL_lex_state != LEX_KNOWNEXT) {
1339 PL_lex_defer = PL_lex_state;
1340 PL_lex_expect = PL_expect;
1341 PL_lex_state = LEX_KNOWNEXT;
79072805 1342 }
5db06880 1343#endif
79072805
LW
1344}
1345
d0a148a6
NC
1346STATIC SV *
1347S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1348{
97aff369 1349 dVAR;
9d4ba2ae 1350 SV * const sv = newSVpvn(start,len);
bfed75c6 1351 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1352 SvUTF8_on(sv);
1353 return sv;
1354}
1355
ffb4593c
NT
1356/*
1357 * S_force_word
1358 * When the lexer knows the next thing is a word (for instance, it has
1359 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1360 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1361 * lookahead.
ffb4593c
NT
1362 *
1363 * Arguments:
b1b65b59 1364 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1365 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1366 * int check_keyword : if true, Perl checks to make sure the word isn't
1367 * a keyword (do this if the word is a label, e.g. goto FOO)
1368 * int allow_pack : if true, : characters will also be allowed (require,
1369 * use, etc. do this)
9cbb5ea2 1370 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1371 */
1372
76e3520e 1373STATIC char *
cea2e8a9 1374S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1375{
97aff369 1376 dVAR;
463ee0b2
LW
1377 register char *s;
1378 STRLEN len;
4e553d73 1379
29595ff2 1380 start = SKIPSPACE1(start);
463ee0b2 1381 s = start;
7e2040f0 1382 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1383 (allow_pack && *s == ':') ||
15f0808c 1384 (allow_initial_tick && *s == '\'') )
a0d0e21e 1385 {
3280af22 1386 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1387 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1388 return start;
cd81e915 1389 start_force(PL_curforce);
5db06880
NC
1390 if (PL_madskills)
1391 curmad('X', newSVpvn(start,s-start));
463ee0b2 1392 if (token == METHOD) {
29595ff2 1393 s = SKIPSPACE1(s);
463ee0b2 1394 if (*s == '(')
3280af22 1395 PL_expect = XTERM;
463ee0b2 1396 else {
3280af22 1397 PL_expect = XOPERATOR;
463ee0b2 1398 }
79072805 1399 }
e74e6b3d 1400 if (PL_madskills)
63575281 1401 curmad('g', newSVpvs( "forced" ));
9ded7720 1402 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1403 = (OP*)newSVOP(OP_CONST,0,
1404 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1405 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1406 force_next(token);
1407 }
1408 return s;
1409}
1410
ffb4593c
NT
1411/*
1412 * S_force_ident
9cbb5ea2 1413 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1414 * text only contains the "foo" portion. The first argument is a pointer
1415 * to the "foo", and the second argument is the type symbol to prefix.
1416 * Forces the next token to be a "WORD".
9cbb5ea2 1417 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1418 */
1419
76e3520e 1420STATIC void
bfed75c6 1421S_force_ident(pTHX_ register const char *s, int kind)
79072805 1422{
97aff369 1423 dVAR;
c35e046a 1424 if (*s) {
90e5519e
NC
1425 const STRLEN len = strlen(s);
1426 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1427 start_force(PL_curforce);
9ded7720 1428 NEXTVAL_NEXTTOKE.opval = o;
79072805 1429 force_next(WORD);
748a9306 1430 if (kind) {
11343788 1431 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1432 /* XXX see note in pp_entereval() for why we forgo typo
1433 warnings if the symbol must be introduced in an eval.
1434 GSAR 96-10-12 */
90e5519e
NC
1435 gv_fetchpvn_flags(s, len,
1436 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1437 : GV_ADD,
1438 kind == '$' ? SVt_PV :
1439 kind == '@' ? SVt_PVAV :
1440 kind == '%' ? SVt_PVHV :
a0d0e21e 1441 SVt_PVGV
90e5519e 1442 );
748a9306 1443 }
79072805
LW
1444 }
1445}
1446
1571675a
GS
1447NV
1448Perl_str_to_version(pTHX_ SV *sv)
1449{
1450 NV retval = 0.0;
1451 NV nshift = 1.0;
1452 STRLEN len;
cfd0369c 1453 const char *start = SvPV_const(sv,len);
9d4ba2ae 1454 const char * const end = start + len;
504618e9 1455 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1456 while (start < end) {
ba210ebe 1457 STRLEN skip;
1571675a
GS
1458 UV n;
1459 if (utf)
9041c2e3 1460 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1461 else {
1462 n = *(U8*)start;
1463 skip = 1;
1464 }
1465 retval += ((NV)n)/nshift;
1466 start += skip;
1467 nshift *= 1000;
1468 }
1469 return retval;
1470}
1471
4e553d73 1472/*
ffb4593c
NT
1473 * S_force_version
1474 * Forces the next token to be a version number.
e759cc13
RGS
1475 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1476 * and if "guessing" is TRUE, then no new token is created (and the caller
1477 * must use an alternative parsing method).
ffb4593c
NT
1478 */
1479
76e3520e 1480STATIC char *
e759cc13 1481S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1482{
97aff369 1483 dVAR;
5f66b61c 1484 OP *version = NULL;
44dcb63b 1485 char *d;
5db06880
NC
1486#ifdef PERL_MAD
1487 I32 startoff = s - SvPVX(PL_linestr);
1488#endif
89bfa8cd 1489
29595ff2 1490 s = SKIPSPACE1(s);
89bfa8cd 1491
44dcb63b 1492 d = s;
dd629d5b 1493 if (*d == 'v')
44dcb63b 1494 d++;
44dcb63b 1495 if (isDIGIT(*d)) {
e759cc13
RGS
1496 while (isDIGIT(*d) || *d == '_' || *d == '.')
1497 d++;
5db06880
NC
1498#ifdef PERL_MAD
1499 if (PL_madskills) {
cd81e915 1500 start_force(PL_curforce);
5db06880
NC
1501 curmad('X', newSVpvn(s,d-s));
1502 }
1503#endif
9f3d182e 1504 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1505 SV *ver;
b73d6f50 1506 s = scan_num(s, &yylval);
89bfa8cd 1507 version = yylval.opval;
dd629d5b
GS
1508 ver = cSVOPx(version)->op_sv;
1509 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1510 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1511 SvNV_set(ver, str_to_version(ver));
1571675a 1512 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1513 }
89bfa8cd 1514 }
5db06880
NC
1515 else if (guessing) {
1516#ifdef PERL_MAD
1517 if (PL_madskills) {
cd81e915
NC
1518 sv_free(PL_nextwhite); /* let next token collect whitespace */
1519 PL_nextwhite = 0;
5db06880
NC
1520 s = SvPVX(PL_linestr) + startoff;
1521 }
1522#endif
e759cc13 1523 return s;
5db06880 1524 }
89bfa8cd
PP
1525 }
1526
5db06880
NC
1527#ifdef PERL_MAD
1528 if (PL_madskills && !version) {
cd81e915
NC
1529 sv_free(PL_nextwhite); /* let next token collect whitespace */
1530 PL_nextwhite = 0;
5db06880
NC
1531 s = SvPVX(PL_linestr) + startoff;
1532 }
1533#endif
89bfa8cd 1534 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1535 start_force(PL_curforce);
9ded7720 1536 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1537 force_next(WORD);
89bfa8cd 1538
e759cc13 1539 return s;
89bfa8cd
PP
1540}
1541
ffb4593c
NT
1542/*
1543 * S_tokeq
1544 * Tokenize a quoted string passed in as an SV. It finds the next
1545 * chunk, up to end of string or a backslash. It may make a new
1546 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1547 * turns \\ into \.
1548 */
1549
76e3520e 1550STATIC SV *
cea2e8a9 1551S_tokeq(pTHX_ SV *sv)
79072805 1552{
97aff369 1553 dVAR;
79072805
LW
1554 register char *s;
1555 register char *send;
1556 register char *d;
b3ac6de7
IZ
1557 STRLEN len = 0;
1558 SV *pv = sv;
79072805
LW
1559
1560 if (!SvLEN(sv))
b3ac6de7 1561 goto finish;
79072805 1562
a0d0e21e 1563 s = SvPV_force(sv, len);
21a311ee 1564 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1565 goto finish;
463ee0b2 1566 send = s + len;
79072805
LW
1567 while (s < send && *s != '\\')
1568 s++;
1569 if (s == send)
b3ac6de7 1570 goto finish;
79072805 1571 d = s;
be4731d2 1572 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1573 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1574 if (SvUTF8(sv))
1575 SvUTF8_on(pv);
1576 }
79072805
LW
1577 while (s < send) {
1578 if (*s == '\\') {
a0d0e21e 1579 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1580 s++; /* all that, just for this */
1581 }
1582 *d++ = *s++;
1583 }
1584 *d = '\0';
95a20fc0 1585 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1586 finish:
3280af22 1587 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1588 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1589 return sv;
1590}
1591
ffb4593c
NT
1592/*
1593 * Now come three functions related to double-quote context,
1594 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1595 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1596 * interact with PL_lex_state, and create fake ( ... ) argument lists
1597 * to handle functions and concatenation.
1598 * They assume that whoever calls them will be setting up a fake
1599 * join call, because each subthing puts a ',' after it. This lets
1600 * "lower \luPpEr"
1601 * become
1602 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1603 *
1604 * (I'm not sure whether the spurious commas at the end of lcfirst's
1605 * arguments and join's arguments are created or not).
1606 */
1607
1608/*
1609 * S_sublex_start
1610 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1611 *
1612 * Pattern matching will set PL_lex_op to the pattern-matching op to
1613 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1614 *
1615 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1616 *
1617 * Everything else becomes a FUNC.
1618 *
1619 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1620 * had an OP_CONST or OP_READLINE). This just sets us up for a
1621 * call to S_sublex_push().
1622 */
1623
76e3520e 1624STATIC I32
cea2e8a9 1625S_sublex_start(pTHX)
79072805 1626{
97aff369 1627 dVAR;
0d46e09a 1628 register const I32 op_type = yylval.ival;
79072805
LW
1629
1630 if (op_type == OP_NULL) {
3280af22 1631 yylval.opval = PL_lex_op;
5f66b61c 1632 PL_lex_op = NULL;
79072805
LW
1633 return THING;
1634 }
1635 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1636 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1637
1638 if (SvTYPE(sv) == SVt_PVIV) {
1639 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1640 STRLEN len;
96a5add6 1641 const char * const p = SvPV_const(sv, len);
f54cb97a 1642 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1643 if (SvUTF8(sv))
1644 SvUTF8_on(nsv);
b3ac6de7
IZ
1645 SvREFCNT_dec(sv);
1646 sv = nsv;
4e553d73 1647 }
b3ac6de7 1648 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1649 PL_lex_stuff = NULL;
6f33ba73
RGS
1650 /* Allow <FH> // "foo" */
1651 if (op_type == OP_READLINE)
1652 PL_expect = XTERMORDORDOR;
79072805
LW
1653 return THING;
1654 }
e3f73d4e
RGS
1655 else if (op_type == OP_BACKTICK && PL_lex_op) {
1656 /* readpipe() vas overriden */
1657 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1658 yylval.opval = PL_lex_op;
9b201d7d 1659 PL_lex_op = NULL;
e3f73d4e
RGS
1660 PL_lex_stuff = NULL;
1661 return THING;
1662 }
79072805 1663
3280af22 1664 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1665 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1666 PL_sublex_info.sub_op = PL_lex_op;
1667 PL_lex_state = LEX_INTERPPUSH;
55497cff 1668
3280af22
NIS
1669 PL_expect = XTERM;
1670 if (PL_lex_op) {
1671 yylval.opval = PL_lex_op;
5f66b61c 1672 PL_lex_op = NULL;
55497cff
PP
1673 return PMFUNC;
1674 }
1675 else
1676 return FUNC;
1677}
1678
ffb4593c
NT
1679/*
1680 * S_sublex_push
1681 * Create a new scope to save the lexing state. The scope will be
1682 * ended in S_sublex_done. Returns a '(', starting the function arguments
1683 * to the uc, lc, etc. found before.
1684 * Sets PL_lex_state to LEX_INTERPCONCAT.
1685 */
1686
76e3520e 1687STATIC I32
cea2e8a9 1688S_sublex_push(pTHX)
55497cff 1689{
27da23d5 1690 dVAR;
f46d017c 1691 ENTER;
55497cff 1692
3280af22 1693 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1694 SAVEBOOL(PL_lex_dojoin);
3280af22 1695 SAVEI32(PL_lex_brackets);
3280af22
NIS
1696 SAVEI32(PL_lex_casemods);
1697 SAVEI32(PL_lex_starts);
651b5b28 1698 SAVEI8(PL_lex_state);
7766f137 1699 SAVEVPTR(PL_lex_inpat);
98246f1e 1700 SAVEI16(PL_lex_inwhat);
57843af0 1701 SAVECOPLINE(PL_curcop);
3280af22 1702 SAVEPPTR(PL_bufptr);
8452ff4b 1703 SAVEPPTR(PL_bufend);
3280af22
NIS
1704 SAVEPPTR(PL_oldbufptr);
1705 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1706 SAVEPPTR(PL_last_lop);
1707 SAVEPPTR(PL_last_uni);
3280af22
NIS
1708 SAVEPPTR(PL_linestart);
1709 SAVESPTR(PL_linestr);
8edd5f42
RGS
1710 SAVEGENERICPV(PL_lex_brackstack);
1711 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1712
1713 PL_linestr = PL_lex_stuff;
a0714e2c 1714 PL_lex_stuff = NULL;
3280af22 1715
9cbb5ea2
GS
1716 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1717 = SvPVX(PL_linestr);
3280af22 1718 PL_bufend += SvCUR(PL_linestr);
bd61b366 1719 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1720 SAVEFREESV(PL_linestr);
1721
1722 PL_lex_dojoin = FALSE;
1723 PL_lex_brackets = 0;
a02a5408
JC
1724 Newx(PL_lex_brackstack, 120, char);
1725 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1726 PL_lex_casemods = 0;
1727 *PL_lex_casestack = '\0';
1728 PL_lex_starts = 0;
1729 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1730 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1731
1732 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1733 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1734 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1735 else
5f66b61c 1736 PL_lex_inpat = NULL;
79072805 1737
55497cff 1738 return '(';
79072805
LW
1739}
1740
ffb4593c
NT
1741/*
1742 * S_sublex_done
1743 * Restores lexer state after a S_sublex_push.
1744 */
1745
76e3520e 1746STATIC I32
cea2e8a9 1747S_sublex_done(pTHX)
79072805 1748{
27da23d5 1749 dVAR;
3280af22 1750 if (!PL_lex_starts++) {
396482e1 1751 SV * const sv = newSVpvs("");
9aa983d2
JH
1752 if (SvUTF8(PL_linestr))
1753 SvUTF8_on(sv);
3280af22 1754 PL_expect = XOPERATOR;
9aa983d2 1755 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1756 return THING;
1757 }
1758
3280af22
NIS
1759 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1760 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1761 return yylex();
79072805
LW
1762 }
1763
ffb4593c 1764 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1765 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1766 PL_linestr = PL_lex_repl;
1767 PL_lex_inpat = 0;
1768 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1769 PL_bufend += SvCUR(PL_linestr);
bd61b366 1770 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1771 SAVEFREESV(PL_linestr);
1772 PL_lex_dojoin = FALSE;
1773 PL_lex_brackets = 0;
3280af22
NIS
1774 PL_lex_casemods = 0;
1775 *PL_lex_casestack = '\0';
1776 PL_lex_starts = 0;
25da4f38 1777 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1778 PL_lex_state = LEX_INTERPNORMAL;
1779 PL_lex_starts++;
e9fa98b2
HS
1780 /* we don't clear PL_lex_repl here, so that we can check later
1781 whether this is an evalled subst; that means we rely on the
1782 logic to ensure sublex_done() is called again only via the
1783 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1784 }
e9fa98b2 1785 else {
3280af22 1786 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1787 PL_lex_repl = NULL;
e9fa98b2 1788 }
79072805 1789 return ',';
ffed7fef
LW
1790 }
1791 else {
5db06880
NC
1792#ifdef PERL_MAD
1793 if (PL_madskills) {
cd81e915
NC
1794 if (PL_thiswhite) {
1795 if (!PL_endwhite)
6b29d1f5 1796 PL_endwhite = newSVpvs("");
cd81e915
NC
1797 sv_catsv(PL_endwhite, PL_thiswhite);
1798 PL_thiswhite = 0;
1799 }
1800 if (PL_thistoken)
1801 sv_setpvn(PL_thistoken,"",0);
5db06880 1802 else
cd81e915 1803 PL_realtokenstart = -1;
5db06880
NC
1804 }
1805#endif
f46d017c 1806 LEAVE;
3280af22
NIS
1807 PL_bufend = SvPVX(PL_linestr);
1808 PL_bufend += SvCUR(PL_linestr);
1809 PL_expect = XOPERATOR;
09bef843 1810 PL_sublex_info.sub_inwhat = 0;
79072805 1811 return ')';
ffed7fef
LW
1812 }
1813}
1814
02aa26ce
NT
1815/*
1816 scan_const
1817
1818 Extracts a pattern, double-quoted string, or transliteration. This
1819 is terrifying code.
1820
94def140 1821 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1822 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1823 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1824
94def140
ST
1825 Returns a pointer to the character scanned up to. If this is
1826 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1827 successfully parsed), will leave an OP for the substring scanned
1828 in yylval. Caller must intuit reason for not parsing further
1829 by looking at the next characters herself.
1830
02aa26ce
NT
1831 In patterns:
1832 backslashes:
1833 double-quoted style: \r and \n
1834 regexp special ones: \D \s
94def140
ST
1835 constants: \x31
1836 backrefs: \1
02aa26ce
NT
1837 case and quoting: \U \Q \E
1838 stops on @ and $, but not for $ as tail anchor
1839
1840 In transliterations:
1841 characters are VERY literal, except for - not at the start or end
94def140
ST
1842 of the string, which indicates a range. If the range is in bytes,
1843 scan_const expands the range to the full set of intermediate
1844 characters. If the range is in utf8, the hyphen is replaced with
1845 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1846
1847 In double-quoted strings:
1848 backslashes:
1849 double-quoted style: \r and \n
94def140
ST
1850 constants: \x31
1851 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1852 case and quoting: \U \Q \E
1853 stops on @ and $
1854
1855 scan_const does *not* construct ops to handle interpolated strings.
1856 It stops processing as soon as it finds an embedded $ or @ variable
1857 and leaves it to the caller to work out what's going on.
1858
94def140
ST
1859 embedded arrays (whether in pattern or not) could be:
1860 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1861
1862 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1863
1864 $ in pattern could be $foo or could be tail anchor. Assumption:
1865 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1866 followed by one of "()| \r\n\t"
02aa26ce
NT
1867
1868 \1 (backreferences) are turned into $1
1869
1870 The structure of the code is
1871 while (there's a character to process) {
94def140
ST
1872 handle transliteration ranges
1873 skip regexp comments /(?#comment)/ and codes /(?{code})/
1874 skip #-initiated comments in //x patterns
1875 check for embedded arrays
02aa26ce
NT
1876 check for embedded scalars
1877 if (backslash) {
94def140
ST
1878 leave intact backslashes from leaveit (below)
1879 deprecate \1 in substitution replacements
02aa26ce
NT
1880 handle string-changing backslashes \l \U \Q \E, etc.
1881 switch (what was escaped) {
94def140
ST
1882 handle \- in a transliteration (becomes a literal -)
1883 handle \132 (octal characters)
1884 handle \x15 and \x{1234} (hex characters)
1885 handle \N{name} (named characters)
1886 handle \cV (control characters)
1887 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1888 } (end switch)
1889 } (end if backslash)
1890 } (end while character to read)
4e553d73 1891
02aa26ce
NT
1892*/
1893
76e3520e 1894STATIC char *
cea2e8a9 1895S_scan_const(pTHX_ char *start)
79072805 1896{
97aff369 1897 dVAR;
3280af22 1898 register char *send = PL_bufend; /* end of the constant */
561b68a9 1899 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1900 register char *s = start; /* start of the constant */
1901 register char *d = SvPVX(sv); /* destination for copies */
1902 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1903 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1904 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1905 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1906 UV uv;
4c3a8340
ST
1907#ifdef EBCDIC
1908 UV literal_endpoint = 0;
e294cc5d 1909 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1910#endif
012bcf8d 1911
2b9d42f0
NIS
1912 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1913 /* If we are doing a trans and we know we want UTF8 set expectation */
1914 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1915 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1916 }
1917
1918
79072805 1919 while (s < send || dorange) {
02aa26ce 1920 /* get transliterations out of the way (they're most literal) */
3280af22 1921 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1922 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1923 if (dorange) {
1ba5c669
JH
1924 I32 i; /* current expanded character */
1925 I32 min; /* first character in range */
1926 I32 max; /* last character in range */
02aa26ce 1927
e294cc5d
JH
1928#ifdef EBCDIC
1929 UV uvmax = 0;
1930#endif
1931
1932 if (has_utf8
1933#ifdef EBCDIC
1934 && !native_range
1935#endif
1936 ) {
9d4ba2ae 1937 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1938 char *e = d++;
1939 while (e-- > c)
1940 *(e + 1) = *e;
25716404 1941 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1942 /* mark the range as done, and continue */
1943 dorange = FALSE;
1944 didrange = TRUE;
1945 continue;
1946 }
2b9d42f0 1947
95a20fc0 1948 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1949#ifdef EBCDIC
1950 SvGROW(sv,
1951 SvLEN(sv) + (has_utf8 ?
1952 (512 - UTF_CONTINUATION_MARK +
1953 UNISKIP(0x100))
1954 : 256));
1955 /* How many two-byte within 0..255: 128 in UTF-8,
1956 * 96 in UTF-8-mod. */
1957#else
9cbb5ea2 1958 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1959#endif
9cbb5ea2 1960 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1961#ifdef EBCDIC
1962 if (has_utf8) {
1963 int j;
1964 for (j = 0; j <= 1; j++) {
1965 char * const c = (char*)utf8_hop((U8*)d, -1);
1966 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1967 if (j)
1968 min = (U8)uv;
1969 else if (uv < 256)
1970 max = (U8)uv;
1971 else {
1972 max = (U8)0xff; /* only to \xff */
1973 uvmax = uv; /* \x{100} to uvmax */
1974 }
1975 d = c; /* eat endpoint chars */
1976 }
1977 }
1978 else {
1979#endif
1980 d -= 2; /* eat the first char and the - */
1981 min = (U8)*d; /* first char in range */
1982 max = (U8)d[1]; /* last char in range */
1983#ifdef EBCDIC
1984 }
1985#endif
8ada0baa 1986
c2e66d9e 1987 if (min > max) {
01ec43d0 1988 Perl_croak(aTHX_
d1573ac7 1989 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1990 (char)min, (char)max);
c2e66d9e
GS
1991 }
1992
c7f1f016 1993#ifdef EBCDIC
4c3a8340
ST
1994 if (literal_endpoint == 2 &&
1995 ((isLOWER(min) && isLOWER(max)) ||
1996 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1997 if (isLOWER(min)) {
1998 for (i = min; i <= max; i++)
1999 if (isLOWER(i))
db42d148 2000 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2001 } else {
2002 for (i = min; i <= max; i++)
2003 if (isUPPER(i))
db42d148 2004 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2005 }
2006 }
2007 else
2008#endif
2009 for (i = min; i <= max; i++)
e294cc5d
JH
2010#ifdef EBCDIC
2011 if (has_utf8) {
2012 const U8 ch = (U8)NATIVE_TO_UTF(i);
2013 if (UNI_IS_INVARIANT(ch))
2014 *d++ = (U8)i;
2015 else {
2016 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2017 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2018 }
2019 }
2020 else
2021#endif
2022 *d++ = (char)i;
2023
2024#ifdef EBCDIC
2025 if (uvmax) {
2026 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2027 if (uvmax > 0x101)
2028 *d++ = (char)UTF_TO_NATIVE(0xff);
2029 if (uvmax > 0x100)
2030 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2031 }
2032#endif
02aa26ce
NT
2033
2034 /* mark the range as done, and continue */
79072805 2035 dorange = FALSE;
01ec43d0 2036 didrange = TRUE;
4c3a8340
ST
2037#ifdef EBCDIC
2038 literal_endpoint = 0;
2039#endif
79072805 2040 continue;
4e553d73 2041 }
02aa26ce
NT
2042
2043 /* range begins (ignore - as first or last char) */
79072805 2044 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2045 if (didrange) {
1fafa243 2046 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2047 }
e294cc5d
JH
2048 if (has_utf8
2049#ifdef EBCDIC
2050 && !native_range
2051#endif
2052 ) {
25716404 2053 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2054 s++;
2055 continue;
2056 }
79072805
LW
2057 dorange = TRUE;
2058 s++;
01ec43d0
GS
2059 }
2060 else {
2061 didrange = FALSE;
4c3a8340
ST
2062#ifdef EBCDIC
2063 literal_endpoint = 0;
e294cc5d 2064 native_range = TRUE;
4c3a8340 2065#endif
01ec43d0 2066 }
79072805 2067 }
02aa26ce
NT
2068
2069 /* if we get here, we're not doing a transliteration */
2070
0f5d15d6
IZ
2071 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2072 except for the last char, which will be done separately. */
3280af22 2073 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2074 if (s[2] == '#') {
e994fd66 2075 while (s+1 < send && *s != ')')
db42d148 2076 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2077 }
2078 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2079 || (s[2] == '?' && s[3] == '{'))
155aba94 2080 {
cc6b7395 2081 I32 count = 1;
0f5d15d6 2082 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2083 char c;
2084
d9f97599
GS
2085 while (count && (c = *regparse)) {
2086 if (c == '\\' && regparse[1])
2087 regparse++;
4e553d73 2088 else if (c == '{')
cc6b7395 2089 count++;
4e553d73 2090 else if (c == '}')
cc6b7395 2091 count--;
d9f97599 2092 regparse++;
cc6b7395 2093 }
e994fd66 2094 if (*regparse != ')')
5bdf89e7 2095 regparse--; /* Leave one char for continuation. */
0f5d15d6 2096 while (s < regparse)
db42d148 2097 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2098 }
748a9306 2099 }
02aa26ce
NT
2100
2101 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2102 else if (*s == '#' && PL_lex_inpat &&
2103 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2104 while (s+1 < send && *s != '\n')
db42d148 2105 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2106 }
02aa26ce 2107
5d1d4326 2108 /* check for embedded arrays
da6eedaa 2109 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2110 */
1749ea0d
ST
2111 else if (*s == '@' && s[1]) {
2112 if (isALNUM_lazy_if(s+1,UTF))
2113 break;
2114 if (strchr(":'{$", s[1]))
2115 break;
2116 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2117 break; /* in regexp, neither @+ nor @- are interpolated */
2118 }
02aa26ce
NT
2119
2120 /* check for embedded scalars. only stop if we're sure it's a
2121 variable.
2122 */
79072805 2123 else if (*s == '$') {
3280af22 2124 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2125 break;
6002328a 2126 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2127 break; /* in regexp, $ might be tail anchor */
2128 }
02aa26ce 2129
2b9d42f0
NIS
2130 /* End of else if chain - OP_TRANS rejoin rest */
2131
02aa26ce 2132 /* backslashes */
79072805
LW
2133 if (*s == '\\' && s+1 < send) {
2134 s++;
02aa26ce 2135
02aa26ce 2136 /* deprecate \1 in strings and substitution replacements */
3280af22 2137 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2138 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2139 {
599cee73 2140 if (ckWARN(WARN_SYNTAX))
9014280d 2141 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2142 *--s = '$';
2143 break;
2144 }
02aa26ce
NT
2145
2146 /* string-change backslash escapes */
3280af22 2147 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2148 --s;
2149 break;
2150 }
cc74c5bd
ST
2151 /* skip any other backslash escapes in a pattern */
2152 else if (PL_lex_inpat) {
2153 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2154 goto default_action;
2155 }
02aa26ce
NT
2156
2157 /* if we get here, it's either a quoted -, or a digit */
79072805 2158 switch (*s) {
02aa26ce
NT
2159
2160 /* quoted - in transliterations */
79072805 2161 case '-':
3280af22 2162 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2163 *d++ = *s++;
2164 continue;
2165 }
2166 /* FALL THROUGH */
2167 default:
11b8faa4 2168 {
86f97054 2169 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2170 ckWARN(WARN_MISC))
9014280d 2171 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2172 "Unrecognized escape \\%c passed through",
2173 *s);
11b8faa4 2174 /* default action is to copy the quoted character */
f9a63242 2175 goto default_action;
11b8faa4 2176 }
02aa26ce
NT
2177
2178 /* \132 indicates an octal constant */
79072805
LW
2179 case '0': case '1': case '2': case '3':
2180 case '4': case '5': case '6': case '7':
ba210ebe 2181 {
53305cf1
NC
2182 I32 flags = 0;
2183 STRLEN len = 3;
2184 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2185 s += len;
2186 }
012bcf8d 2187 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2188
2189 /* \x24 indicates a hex constant */
79072805 2190 case 'x':
a0ed51b3
LW
2191 ++s;
2192 if (*s == '{') {
9d4ba2ae 2193 char* const e = strchr(s, '}');
a4c04bdc
NC
2194 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2195 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2196 STRLEN len;
355860ce 2197
53305cf1 2198 ++s;
adaeee49 2199 if (!e) {
a0ed51b3 2200 yyerror("Missing right brace on \\x{}");
355860ce 2201 continue;
ba210ebe 2202 }
53305cf1
NC
2203 len = e - s;
2204 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2205 s = e + 1;
a0ed51b3
LW
2206 }
2207 else {
ba210ebe 2208 {
53305cf1 2209 STRLEN len = 2;
a4c04bdc 2210 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2211 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2212 s += len;
2213 }
012bcf8d
GS
2214 }
2215
2216 NUM_ESCAPE_INSERT:
2217 /* Insert oct or hex escaped character.
301d3d20 2218 * There will always enough room in sv since such
db42d148 2219 * escapes will be longer than any UTF-8 sequence
301d3d20 2220 * they can end up as. */
ba7cea30 2221
c7f1f016
NIS
2222 /* We need to map to chars to ASCII before doing the tests
2223 to cover EBCDIC
2224 */
c4d5f83a 2225 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2226 if (!has_utf8 && uv > 255) {
301d3d20
JH
2227 /* Might need to recode whatever we have
2228 * accumulated so far if it contains any
2229 * hibit chars.
2230 *
2231 * (Can't we keep track of that and avoid
2232 * this rescan? --jhi)
012bcf8d 2233 */
c7f1f016 2234 int hicount = 0;
63cd0674
NIS
2235 U8 *c;
2236 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2237 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2238 hicount++;
db42d148 2239 }
012bcf8d 2240 }
63cd0674 2241 if (hicount) {
9d4ba2ae 2242 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2243 U8 *src, *dst;
2244 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2245 src = (U8 *)d - 1;
2246 dst = src+hicount;
2247 d += hicount;
cfd0369c 2248 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2249 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2250 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2251 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2252 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2253 }
2254 else {
63cd0674 2255 *dst-- = *src;
012bcf8d 2256 }
c7f1f016 2257 src--;
012bcf8d
GS
2258 }
2259 }
2260 }
2261
9aa983d2 2262 if (has_utf8 || uv > 255) {
9041c2e3 2263 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2264 has_utf8 = TRUE;
f9a63242
JH
2265 if (PL_lex_inwhat == OP_TRANS &&
2266 PL_sublex_info.sub_op) {
2267 PL_sublex_info.sub_op->op_private |=
2268 (PL_lex_repl ? OPpTRANS_FROM_UTF
2269 : OPpTRANS_TO_UTF);
f9a63242 2270 }
e294cc5d
JH
2271#ifdef EBCDIC
2272 if (uv > 255 && !dorange)
2273 native_range = FALSE;
2274#endif
012bcf8d 2275 }
a0ed51b3 2276 else {
012bcf8d 2277 *d++ = (char)uv;
a0ed51b3 2278 }
012bcf8d
GS
2279 }
2280 else {
c4d5f83a 2281 *d++ = (char) uv;
a0ed51b3 2282 }
79072805 2283 continue;
02aa26ce 2284
b239daa5 2285 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2286 case 'N':
55eda711 2287 ++s;
423cee85
JH
2288 if (*s == '{') {
2289 char* e = strchr(s, '}');
155aba94 2290 SV *res;
423cee85 2291 STRLEN len;
cfd0369c 2292 const char *str;
4e553d73 2293
423cee85 2294 if (!e) {
5777a3f7 2295 yyerror("Missing right brace on \\N{}");
423cee85
JH
2296 e = s - 1;
2297 goto cont_scan;
2298 }
dbc0d4f2
JH
2299 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2300 /* \N{U+...} */
2301 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2302 PERL_SCAN_DISALLOW_PREFIX;
2303 s += 3;
2304 len = e - s;
2305 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2306 if ( e > s && len != (STRLEN)(e - s) ) {
2307 uv = 0xFFFD;
fc8cd66c 2308 }
dbc0d4f2
JH
2309 s = e + 1;
2310 goto NUM_ESCAPE_INSERT;
2311 }
55eda711 2312 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2313 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2314 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2315 if (has_utf8)
2316 sv_utf8_upgrade(res);
cfd0369c 2317 str = SvPV_const(res,len);
1c47067b
JH
2318#ifdef EBCDIC_NEVER_MIND
2319 /* charnames uses pack U and that has been
2320 * recently changed to do the below uni->native
2321 * mapping, so this would be redundant (and wrong,
2322 * the code point would be doubly converted).
2323 * But leave this in just in case the pack U change
2324 * gets revoked, but the semantics is still
2325 * desireable for charnames. --jhi */
cddc7ef4 2326 {
cfd0369c 2327 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2328
2329 if (uv < 0x100) {
89ebb4a3 2330 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2331
2332 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2333 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2334 str = SvPV_const(res, len);
cddc7ef4
JH
2335 }
2336 }
2337#endif
89491803 2338 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2339 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2340 SvCUR_set(sv, d - ostart);
2341 SvPOK_on(sv);
e4f3eed8 2342 *d = '\0';
f08d6ad9 2343 sv_utf8_upgrade(sv);
d2f449dd 2344 /* this just broke our allocation above... */
eb160463 2345 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2346 d = SvPVX(sv) + SvCUR(sv);
89491803 2347 has_utf8 = TRUE;
f08d6ad9 2348 }
eb160463 2349 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2350 const char * const odest = SvPVX_const(sv);
423cee85 2351
8973db79 2352 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2353 d = SvPVX(sv) + (d - odest);
2354 }
e294cc5d
JH
2355#ifdef EBCDIC
2356 if (!dorange)
2357 native_range = FALSE; /* \N{} is guessed to be Unicode */
2358#endif
423cee85
JH
2359 Copy(str, d, len, char);
2360 d += len;
2361 SvREFCNT_dec(res);
2362 cont_scan:
2363 s = e + 1;
2364 }
2365 else
5777a3f7 2366 yyerror("Missing braces on \\N{}");
423cee85
JH
2367 continue;
2368
02aa26ce 2369 /* \c is a control character */
79072805
LW
2370 case 'c':
2371 s++;
961ce445 2372 if (s < send) {
ba210ebe 2373 U8 c = *s++;
c7f1f016
NIS
2374#ifdef EBCDIC
2375 if (isLOWER(c))
2376 c = toUPPER(c);
2377#endif
db42d148 2378 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2379 }
961ce445
RGS
2380 else {
2381 yyerror("Missing control char name in \\c");
2382 }
79072805 2383 continue;
02aa26ce
NT
2384
2385 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2386 case 'b':
db42d148 2387 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2388 break;
2389 case 'n':
db42d148 2390 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2391 break;
2392 case 'r':
db42d148 2393 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2394 break;
2395 case 'f':
db42d148 2396 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2397 break;
2398 case 't':
db42d148 2399 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2400 break;
34a3fe2a 2401 case 'e':
db42d148 2402 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2403 break;
2404 case 'a':
db42d148 2405 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2406 break;
02aa26ce
NT
2407 } /* end switch */
2408
79072805
LW
2409 s++;
2410 continue;
02aa26ce 2411 } /* end if (backslash) */
4c3a8340
ST
2412#ifdef EBCDIC
2413 else
2414 literal_endpoint++;
2415#endif
02aa26ce 2416
f9a63242 2417 default_action:
2b9d42f0
NIS
2418 /* If we started with encoded form, or already know we want it
2419 and then encode the next character */
2420 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2421 STRLEN len = 1;
5f66b61c
AL
2422 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2423 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2424 s += len;
2425 if (need > len) {
2426 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2427 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2428 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2429 }
5f66b61c 2430 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2431 has_utf8 = TRUE;
e294cc5d
JH
2432#ifdef EBCDIC
2433 if (uv > 255 && !dorange)
2434 native_range = FALSE;
2435#endif
2b9d42f0
NIS
2436 }
2437 else {
2438 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2439 }
02aa26ce
NT
2440 } /* while loop to process each character */
2441
2442 /* terminate the string and set up the sv */
79072805 2443 *d = '\0';
95a20fc0 2444 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2445 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2446 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2447
79072805 2448 SvPOK_on(sv);
9f4817db 2449 if (PL_encoding && !has_utf8) {
d0063567
DK
2450 sv_recode_to_utf8(sv, PL_encoding);
2451 if (SvUTF8(sv))
2452 has_utf8 = TRUE;
9f4817db 2453 }
2b9d42f0 2454 if (has_utf8) {
7e2040f0 2455 SvUTF8_on(sv);
2b9d42f0 2456 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2457 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2458 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2459 }
2460 }
79072805 2461
02aa26ce 2462 /* shrink the sv if we allocated more than we used */
79072805 2463 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2464 SvPV_shrink_to_cur(sv);
79072805 2465 }
02aa26ce 2466
9b599b2a 2467 /* return the substring (via yylval) only if we parsed anything */
3280af22 2468 if (s > PL_bufptr) {
eb0d8d16
NC
2469 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2470 const char *const key = PL_lex_inpat ? "qr" : "q";
2471 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2472 const char *type;
2473 STRLEN typelen;
2474
2475 if (PL_lex_inwhat == OP_TRANS) {
2476 type = "tr";
2477 typelen = 2;
2478 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2479 type = "s";
2480 typelen = 1;
2481 } else {
2482 type = "qq";
2483 typelen = 2;
2484 }
2485
2486 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2487 type, typelen);
2488 }
79072805 2489 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2490 } else
8990e307 2491 SvREFCNT_dec(sv);
79072805
LW
2492 return s;
2493}
2494
ffb4593c
NT
2495/* S_intuit_more
2496 * Returns TRUE if there's more to the expression (e.g., a subscript),
2497 * FALSE otherwise.
ffb4593c
NT
2498 *
2499 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2500 *
2501 * ->[ and ->{ return TRUE
2502 * { and [ outside a pattern are always subscripts, so return TRUE
2503 * if we're outside a pattern and it's not { or [, then return FALSE
2504 * if we're in a pattern and the first char is a {
2505 * {4,5} (any digits around the comma) returns FALSE
2506 * if we're in a pattern and the first char is a [
2507 * [] returns FALSE
2508 * [SOMETHING] has a funky algorithm to decide whether it's a
2509 * character class or not. It has to deal with things like
2510 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2511 * anything else returns TRUE
2512 */
2513
9cbb5ea2
GS
2514/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2515
76e3520e 2516STATIC int
cea2e8a9 2517S_intuit_more(pTHX_ register char *s)
79072805 2518{
97aff369 2519 dVAR;
3280af22 2520 if (PL_lex_brackets)
79072805
LW
2521 return TRUE;
2522 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2523 return TRUE;
2524 if (*s != '{' && *s != '[')
2525 return FALSE;
3280af22 2526 if (!PL_lex_inpat)
79072805
LW
2527 return TRUE;
2528
2529 /* In a pattern, so maybe we have {n,m}. */
2530 if (*s == '{') {
2531 s++;
2532 if (!isDIGIT(*s))
2533 return TRUE;
2534 while (isDIGIT(*s))
2535 s++;
2536 if (*s == ',')
2537 s++;
2538 while (isDIGIT(*s))
2539 s++;
2540 if (*s == '}')
2541 return FALSE;
2542 return TRUE;
2543
2544 }
2545
2546 /* On the other hand, maybe we have a character class */
2547
2548 s++;
2549 if (*s == ']' || *s == '^')
2550 return FALSE;
2551 else {
ffb4593c 2552 /* this is terrifying, and it works */
79072805
LW
2553 int weight = 2; /* let's weigh the evidence */
2554 char seen[256];
f27ffc4a 2555 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2556 const char * const send = strchr(s,']');
3280af22 2557 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2558
2559 if (!send) /* has to be an expression */
2560 return TRUE;
2561
2562 Zero(seen,256,char);
2563 if (*s == '$')
2564 weight -= 3;
2565 else if (isDIGIT(*s)) {
2566 if (s[1] != ']') {
2567 if (isDIGIT(s[1]) && s[2] == ']')
2568 weight -= 10;
2569 }
2570 else
2571 weight -= 100;
2572 }
2573 for (; s < send; s++) {
2574 last_un_char = un_char;
2575 un_char = (unsigned char)*s;
2576 switch (*s) {
2577 case '@':
2578 case '&':
2579 case '$':
2580 weight -= seen[un_char] * 10;
7e2040f0 2581 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2582 int len;
8903cb82 2583 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2584 len = (int)strlen(tmpbuf);
2585 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2586 weight -= 100;
2587 else
2588 weight -= 10;
2589 }
2590 else if (*s == '$' && s[1] &&
93a17b20
LW
2591 strchr("[#!%*<>()-=",s[1])) {
2592 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2593 weight -= 10;
2594 else
2595 weight -= 1;
2596 }
2597 break;
2598 case '\\':
2599 un_char = 254;
2600 if (s[1]) {
93a17b20 2601 if (strchr("wds]",s[1]))
79072805 2602 weight += 100;
10edeb5d 2603 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2604 weight += 1;
93a17b20 2605 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2606 weight += 40;
2607 else if (isDIGIT(s[1])) {
2608 weight += 40;
2609 while (s[1] && isDIGIT(s[1]))
2610 s++;
2611 }
2612 }
2613 else
2614 weight += 100;
2615 break;
2616 case '-':
2617 if (s[1] == '\\')
2618 weight += 50;
93a17b20 2619 if (strchr("aA01! ",last_un_char))
79072805 2620 weight += 30;
93a17b20 2621 if (strchr("zZ79~",s[1]))
79072805 2622 weight += 30;
f27ffc4a
GS
2623 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2624 weight -= 5; /* cope with negative subscript */
79072805
LW
2625 break;
2626 default:
3792a11b
NC
2627 if (!isALNUM(last_un_char)
2628 && !(last_un_char == '$' || last_un_char == '@'
2629 || last_un_char == '&')
2630 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2631 char *d = tmpbuf;
2632 while (isALPHA(*s))
2633 *d++ = *s++;
2634 *d = '\0';
5458a98a 2635 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2636 weight -= 150;
2637 }
2638 if (un_char == last_un_char + 1)
2639 weight += 5;
2640 weight -= seen[un_char];
2641 break;
2642 }
2643 seen[un_char]++;
2644 }
2645 if (weight >= 0) /* probably a character class */
2646 return FALSE;
2647 }
2648
2649 return TRUE;
2650}
ffed7fef 2651
ffb4593c
NT
2652/*
2653 * S_intuit_method
2654 *
2655 * Does all the checking to disambiguate
2656 * foo bar
2657 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2658 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2659 *
2660 * First argument is the stuff after the first token, e.g. "bar".
2661 *
2662 * Not a method if bar is a filehandle.
2663 * Not a method if foo is a subroutine prototyped to take a filehandle.
2664 * Not a method if it's really "Foo $bar"
2665 * Method if it's "foo $bar"
2666 * Not a method if it's really "print foo $bar"
2667 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2668 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2669 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2670 * =>
2671 */
2672
76e3520e 2673STATIC int
62d55b22 2674S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2675{
97aff369 2676 dVAR;
a0d0e21e 2677 char *s = start + (*start == '$');
3280af22 2678 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2679 STRLEN len;
2680 GV* indirgv;
5db06880
NC
2681#ifdef PERL_MAD
2682 int soff;
2683#endif
a0d0e21e
LW
2684
2685 if (gv) {
62d55b22 2686 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2687 return 0;
62d55b22
NC
2688 if (cv) {
2689 if (SvPOK(cv)) {
2690 const char *proto = SvPVX_const(cv);
2691 if (proto) {
2692 if (*proto == ';')
2693 proto++;
2694 if (*proto == '*')
2695 return 0;
2696 }
b6c543e3
IZ
2697 }
2698 } else
c35e046a 2699 gv = NULL;
a0d0e21e 2700 }
8903cb82 2701 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2702 /* start is the beginning of the possible filehandle/object,
2703 * and s is the end of it
2704 * tmpbuf is a copy of it
2705 */
2706
a0d0e21e 2707 if (*start == '$') {
3ef1310e
RGS
2708 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2709 isUPPER(*PL_tokenbuf))
a0d0e21e 2710 return 0;
5db06880
NC
2711#ifdef PERL_MAD
2712 len = start - SvPVX(PL_linestr);
2713#endif
29595ff2 2714 s = PEEKSPACE(s);
f0092767 2715#ifdef PERL_MAD
5db06880
NC
2716 start = SvPVX(PL_linestr) + len;
2717#endif
3280af22
NIS
2718 PL_bufptr = start;
2719 PL_expect = XREF;
a0d0e21e
LW
2720 return *s == '(' ? FUNCMETH : METHOD;
2721 }
5458a98a 2722 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2723 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2724 len -= 2;
2725 tmpbuf[len] = '\0';
5db06880
NC
2726#ifdef PERL_MAD
2727 soff = s - SvPVX(PL_linestr);
2728#endif
c3e0f903
GS
2729 goto bare_package;
2730 }
90e5519e 2731 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2732 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2733 return 0;
2734 /* filehandle or package name makes it a method */
da51bb9b 2735 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2736#ifdef PERL_MAD
2737 soff = s - SvPVX(PL_linestr);
2738#endif
29595ff2 2739 s = PEEKSPACE(s);
3280af22 2740 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2741 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2742 bare_package:
cd81e915 2743 start_force(PL_curforce);
9ded7720 2744 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2745 newSVpvn(tmpbuf,len));
9ded7720 2746 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2747 if (PL_madskills)
2748 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2749 PL_expect = XTERM;
a0d0e21e 2750 force_next(WORD);
3280af22 2751 PL_bufptr = s;
5db06880
NC
2752#ifdef PERL_MAD
2753 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2754#endif
a0d0e21e
LW
2755 return *s == '(' ? FUNCMETH : METHOD;
2756 }
2757 }
2758 return 0;
2759}
2760
ffb4593c
NT
2761/*
2762 * S_incl_perldb
2763 * Return a string of Perl code to load the debugger. If PERL5DB
2764 * is set, it will return the contents of that, otherwise a
2765 * compile-time require of perl5db.pl.
2766 */
2767
bfed75c6 2768STATIC const char*
cea2e8a9 2769S_incl_perldb(pTHX)
a0d0e21e 2770{
97aff369 2771 dVAR;
3280af22 2772 if (PL_perldb) {
9d4ba2ae 2773 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2774
2775 if (pdb)
2776 return pdb;
93189314 2777 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2778 return "BEGIN { require 'perl5db.pl' }";
2779 }
2780 return "";
2781}
2782
2783
16d20bd9 2784/* Encoded script support. filter_add() effectively inserts a
4e553d73 2785 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2786 * Note that the filter function only applies to the current source file
2787 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2788 *
2789 * The datasv parameter (which may be NULL) can be used to pass
2790 * private data to this instance of the filter. The filter function
2791 * can recover the SV using the FILTER_DATA macro and use it to
2792 * store private buffers and state information.
2793 *
2794 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2795 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2796 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2797 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2798 * private use must be set using malloc'd pointers.
2799 */
16d20bd9
AD
2800
2801SV *
864dbfa3 2802Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2803{
97aff369 2804 dVAR;
f4c556ac 2805 if (!funcp)
a0714e2c 2806 return NULL;
f4c556ac 2807
5486870f
DM
2808 if (!PL_parser)
2809 return NULL;
2810
3280af22
NIS
2811 if (!PL_rsfp_filters)
2812 PL_rsfp_filters = newAV();
16d20bd9 2813 if (!datasv)
561b68a9 2814 datasv = newSV(0);
862a34c6 2815 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2816 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2817 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2818 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2819 FPTR2DPTR(void *, IoANY(datasv)),
2820 SvPV_nolen(datasv)));
3280af22
NIS
2821 av_unshift(PL_rsfp_filters, 1);
2822 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2823 return(datasv);
2824}
4e553d73 2825
16d20bd9
AD
2826
2827/* Delete most recently added instance of this filter function. */
a0d0e21e 2828void
864dbfa3 2829Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2830{
97aff369 2831 dVAR;
e0c19803 2832 SV *datasv;
24801a4b 2833
33073adb 2834#ifdef DEBUGGING
55662e27
JH
2835 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2836 FPTR2DPTR(void*, funcp)));
33073adb 2837#endif
5486870f 2838 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2839 return;
2840 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2841 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2842 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2843 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2844 IoANY(datasv) = (void *)NULL;
3280af22 2845 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2846
16d20bd9
AD
2847 return;
2848 }
2849 /* we need to search for the correct entry and clear it */
cea2e8a9 2850 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2851}
2852
2853
1de9afcd
RGS
2854/* Invoke the idxth filter function for the current rsfp. */
2855/* maxlen 0 = read one text line */
16d20bd9 2856I32
864dbfa3 2857Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2858{
97aff369 2859 dVAR;
16d20bd9
AD
2860 filter_t funcp;
2861 SV *datasv = NULL;
f482118e
NC
2862 /* This API is bad. It should have been using unsigned int for maxlen.
2863 Not sure if we want to change the API, but if not we should sanity
2864 check the value here. */
39cd7a59
NC
2865 const unsigned int correct_length
2866 = maxlen < 0 ?
2867#ifdef PERL_MICRO
2868 0x7FFFFFFF
2869#else
2870 INT_MAX
2871#endif
2872 : maxlen;
e50aee73 2873
5486870f 2874 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2875 return -1;
1de9afcd 2876 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2877 /* Provide a default input filter to make life easy. */
2878 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2879 DEBUG_P(PerlIO_printf(Perl_debug_log,
2880 "filter_read %d: from rsfp\n", idx));
f482118e 2881 if (correct_length) {
16d20bd9
AD
2882 /* Want a block */
2883 int len ;
f54cb97a 2884 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2885
2886 /* ensure buf_sv is large enough */
f482118e
NC
2887 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2888 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2889 correct_length)) <= 0) {
3280af22 2890 if (PerlIO_error(PL_rsfp))
37120919
AD
2891 return -1; /* error */
2892 else
2893 return 0 ; /* end of file */
2894 }
16d20bd9
AD
2895 SvCUR_set(buf_sv, old_len + len) ;
2896 } else {
2897 /* Want a line */
3280af22
NIS
2898 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2899 if (PerlIO_error(PL_rsfp))
37120919
AD
2900 return -1; /* error */
2901 else
2902 return 0 ; /* end of file */
2903 }
16d20bd9
AD
2904 }
2905 return SvCUR(buf_sv);
2906 }
2907 /* Skip this filter slot if filter has been deleted */
1de9afcd 2908 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2909 DEBUG_P(PerlIO_printf(Perl_debug_log,
2910 "filter_read %d: skipped (filter deleted)\n",
2911 idx));
f482118e 2912 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2913 }
2914 /* Get function pointer hidden within datasv */
8141890a 2915 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2916 DEBUG_P(PerlIO_printf(Perl_debug_log,
2917 "filter_read %d: via function %p (%s)\n",
ca0270c4 2918 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2919 /* Call function. The function is expected to */
2920 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2921 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2922 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2923}
2924
76e3520e 2925STATIC char *
cea2e8a9 2926S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2927{
97aff369 2928 dVAR;
c39cd008 2929#ifdef PERL_CR_FILTER
3280af22 2930 if (!PL_rsfp_filters) {
c39cd008 2931 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2932 }
2933#endif
3280af22 2934 if (PL_rsfp_filters) {
55497cff
PP
2935 if (!append)
2936 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2937 if (FILTER_READ(0, sv, 0) > 0)
2938 return ( SvPVX(sv) ) ;
2939 else
bd61b366 2940 return NULL ;
16d20bd9 2941 }
9d116dd7 2942 else
fd049845 2943 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2944}
2945
01ec43d0 2946STATIC HV *
9bde8eb0 2947S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2948{
97aff369 2949 dVAR;
def3634b
GS
2950 GV *gv;
2951
01ec43d0 2952 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2953 return PL_curstash;
2954
2955 if (len > 2 &&
2956 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2957 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2958 {
2959 return GvHV(gv); /* Foo:: */
def3634b
GS
2960 }
2961
2962 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2963 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2964 if (gv && GvCV(gv)) {
2965 SV * const sv = cv_const_sv(GvCV(gv));
2966 if (sv)
9bde8eb0 2967 pkgname = SvPV_const(sv, len);
def3634b
GS
2968 }
2969
9bde8eb0 2970 return gv_stashpvn(pkgname, len, 0);
def3634b 2971}
a0d0e21e 2972
e3f73d4e
RGS
2973/*
2974 * S_readpipe_override
2975 * Check whether readpipe() is overriden, and generates the appropriate
2976 * optree, provided sublex_start() is called afterwards.
2977 */
2978STATIC void
1d51329b 2979S_readpipe_override(pTHX)
e3f73d4e
RGS
2980{
2981 GV **gvp;
2982 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2983 yylval.ival = OP_BACKTICK;
2984 if ((gv_readpipe
2985 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2986 ||
2987 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2988 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2989 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2990 {
2991 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2992 append_elem(OP_LIST,
2993 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2994 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2995 }
e3f73d4e
RGS
2996}
2997
5db06880
NC
2998#ifdef PERL_MAD
2999 /*
3000 * Perl_madlex
3001 * The intent of this yylex wrapper is to minimize the changes to the
3002 * tokener when we aren't interested in collecting madprops. It remains
3003 * to be seen how successful this strategy will be...
3004 */
3005
3006int
3007Perl_madlex(pTHX)
3008{
3009 int optype;
3010 char *s = PL_bufptr;
3011
cd81e915
NC
3012 /* make sure PL_thiswhite is initialized */
3013 PL_thiswhite = 0;
3014 PL_thismad = 0;
5db06880 3015
cd81e915 3016 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3017 if (PL_pending_ident)
3018 return S_pending_ident(aTHX);
3019
3020 /* previous token ate up our whitespace? */
cd81e915
NC
3021 if (!PL_lasttoke && PL_nextwhite) {
3022 PL_thiswhite = PL_nextwhite;
3023 PL_nextwhite = 0;
5db06880
NC
3024 }
3025
3026 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3027 PL_realtokenstart = -1;
3028 PL_thistoken = 0;
5db06880
NC
3029 optype = yylex();
3030 s = PL_bufptr;
cd81e915 3031 assert(PL_curforce < 0);
5db06880 3032
cd81e915
NC
3033 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3034 if (!PL_thistoken) {
3035 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3036 PL_thistoken = newSVpvs("");
5db06880 3037 else {
c35e046a 3038 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3039 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3040 }
3041 }
cd81e915
NC
3042 if (PL_thismad) /* install head */
3043 CURMAD('X', PL_thistoken);
5db06880
NC
3044 }
3045
3046 /* last whitespace of a sublex? */
cd81e915
NC
3047 if (optype == ')' && PL_endwhite) {
3048 CURMAD('X', PL_endwhite);
5db06880
NC
3049 }
3050
cd81e915 3051 if (!PL_thismad) {
5db06880
NC
3052
3053 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3054 if (!PL_thiswhite && !PL_endwhite && !optype) {
3055 sv_free(PL_thistoken);
3056 PL_thistoken = 0;
5db06880
NC
3057 return 0;
3058 }
3059
3060 /* put off final whitespace till peg */
3061 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3062 PL_nextwhite = PL_thiswhite;
3063 PL_thiswhite = 0;
5db06880 3064 }
cd81e915
NC
3065 else if (PL_thisopen) {
3066 CURMAD('q', PL_thisopen);
3067 if (PL_thistoken)
3068 sv_free(PL_thistoken);
3069 PL_thistoken = 0;
5db06880
NC
3070 }
3071 else {
3072 /* Store actual token text as madprop X */
cd81e915 3073 CURMAD('X', PL_thistoken);
5db06880
NC
3074 }
3075
cd81e915 3076 if (PL_thiswhite) {
5db06880 3077 /* add preceding whitespace as madprop _ */
cd81e915 3078 CURMAD('_', PL_thiswhite);
5db06880
NC
3079 }
3080
cd81e915 3081 if (PL_thisstuff) {
5db06880 3082 /* add quoted material as madprop = */
cd81e915 3083 CURMAD('=', PL_thisstuff);
5db06880
NC
3084 }
3085
cd81e915 3086 if (PL_thisclose) {
5db06880 3087 /* add terminating quote as madprop Q */
cd81e915 3088 CURMAD('Q', PL_thisclose);
5db06880
NC
3089 }
3090 }
3091
3092 /* special processing based on optype */
3093
3094 switch (optype) {
3095
3096 /* opval doesn't need a TOKEN since it can already store mp */
3097 case WORD:
3098 case METHOD:
3099 case FUNCMETH:
3100 case THING:
3101 case PMFUNC:
3102 case PRIVATEREF:
3103 case FUNC0SUB:
3104 case UNIOPSUB:
3105 case LSTOPSUB:
3106 if (yylval.opval)
cd81e915
NC
3107 append_madprops(PL_thismad, yylval.opval, 0);
3108 PL_thismad = 0;
5db06880
NC
3109 return optype;
3110
3111 /* fake EOF */
3112 case 0:
3113 optype = PEG;
cd81e915
NC
3114 if (PL_endwhite) {
3115 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3116 PL_endwhite = 0;
5db06880
NC
3117 }
3118 break;
3119
3120 case ']':
3121 case '}':
cd81e915 3122 if (PL_faketokens)
5db06880
NC
3123 break;
3124 /* remember any fake bracket that lexer is about to discard */
3125 if (PL_lex_brackets == 1 &&
3126 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3127 {
3128 s = PL_bufptr;
3129 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3130 s++;
3131 if (*s == '}') {
cd81e915
NC
3132 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3133 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3134 PL_thiswhite = 0;
5db06880
NC
3135 PL_bufptr = s - 1;
3136 break; /* don't bother looking for trailing comment */
3137 }
3138 else
3139 s = PL_bufptr;
3140 }
3141 if (optype == ']')
3142 break;
3143 /* FALLTHROUGH */
3144
3145 /* attach a trailing comment to its statement instead of next token */
3146 case ';':
cd81e915 3147 if (PL_faketokens)
5db06880
NC
3148 break;
3149 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3150 s = PL_bufptr;
3151 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3152 s++;
3153 if (*s == '\n' || *s == '#') {
3154 while (s < PL_bufend && *s != '\n')
3155 s++;
3156 if (s < PL_bufend)
3157 s++;
cd81e915
NC
3158 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3159 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3160 PL_thiswhite = 0;
5db06880
NC
3161 PL_bufptr = s;
3162 }
3163 }
3164 break;
3165
3166 /* pval */
3167 case LABEL:
3168 break;
3169
3170 /* ival */
3171 default:
3172 break;
3173
3174 }
3175
3176 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3177 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3178 PL_thismad = 0;
5db06880
NC
3179 return optype;
3180}
3181#endif
3182
468aa647 3183STATIC char *
cc6ed77d 3184S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3185 dVAR;
468aa647
RGS
3186 if (PL_expect != XSTATE)
3187 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3188 is_use ? "use" : "no"));
29595ff2 3189 s = SKIPSPACE1(s);
468aa647
RGS
3190 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3191 s = force_version(s, TRUE);
29595ff2 3192 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3193 start_force(PL_curforce);
9ded7720 3194 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3195 force_next(WORD);
3196 }
3197 else if (*s == 'v') {
3198 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3199 s = force_version(s, FALSE);
3200 }
3201 }
3202 else {
3203 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3204 s = force_version(s, FALSE);
3205 }
3206 yylval.ival = is_use;
3207 return s;
3208}
748a9306 3209#ifdef DEBUGGING
27da23d5 3210 static const char* const exp_name[] =
09bef843 3211 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3212 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3213 };
748a9306 3214#endif
463ee0b2 3215
02aa26ce
NT
3216/*
3217 yylex
3218
3219 Works out what to call the token just pulled out of the input
3220 stream. The yacc parser takes care of taking the ops we return and
3221 stitching them into a tree.
3222
3223 Returns:
3224 PRIVATEREF
3225
3226 Structure:
3227 if read an identifier
3228 if we're in a my declaration
3229 croak if they tried to say my($foo::bar)
3230 build the ops for a my() declaration
3231 if it's an access to a my() variable
3232 are we in a sort block?
3233 croak if my($a); $a <=> $b
3234 build ops for access to a my() variable
3235 if in a dq string, and they've said @foo and we can't find @foo
3236 croak
3237 build ops for a bareword
3238 if we already built the token before, use it.
3239*/
3240
20141f0e 3241
dba4d153
JH
3242#ifdef __SC__
3243#pragma segment Perl_yylex
3244#endif
dba4d153 3245int
dba4d153 3246Perl_yylex(pTHX)
20141f0e 3247{
97aff369 3248 dVAR;
3afc138a 3249 register char *s = PL_bufptr;
378cc40b 3250 register char *d;
463ee0b2 3251 STRLEN len;
aa7440fb 3252 bool bof = FALSE;
a687059c 3253
10edeb5d
JH
3254 /* orig_keyword, gvp, and gv are initialized here because
3255 * jump to the label just_a_word_zero can bypass their
3256 * initialization later. */
3257 I32 orig_keyword = 0;
3258 GV *gv = NULL;
3259 GV **gvp = NULL;
3260
bbf60fe6 3261 DEBUG_T( {
396482e1 3262 SV* tmp = newSVpvs("");
b6007c36
DM
3263 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3264 (IV)CopLINE(PL_curcop),
3265 lex_state_names[PL_lex_state],
3266 exp_name[PL_expect],
3267 pv_display(tmp, s, strlen(s), 0, 60));
3268 SvREFCNT_dec(tmp);
bbf60fe6 3269 } );
02aa26ce 3270 /* check if there's an identifier for us to look at */
ba979b31 3271 if (PL_pending_ident)
bbf60fe6 3272 return REPORT(S_pending_ident(aTHX));
bbce6d69 3273
02aa26ce
NT
3274 /* no identifier pending identification */
3275
3280af22 3276 switch (PL_lex_state) {
79072805
LW
3277#ifdef COMMENTARY
3278 case LEX_NORMAL: /* Some compilers will produce faster */
3279 case LEX_INTERPNORMAL: /* code if we comment these out. */
3280 break;
3281#endif
3282
09bef843 3283 /* when we've already built the next token, just pull it out of the queue */
79072805 3284 case LEX_KNOWNEXT:
5db06880
NC
3285#ifdef PERL_MAD
3286 PL_lasttoke--;
3287 yylval = PL_nexttoke[PL_lasttoke].next_val;
3288 if (PL_madskills) {
cd81e915 3289 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3290 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3291 if (PL_thismad && PL_thismad->mad_key == '_') {
3292 PL_thiswhite = (SV*)PL_thismad->mad_val;
3293 PL_thismad->mad_val = 0;
3294 mad_free(PL_thismad);
3295 PL_thismad = 0;
5db06880
NC
3296 }
3297 }
3298 if (!PL_lasttoke) {
3299 PL_lex_state = PL_lex_defer;
3300 PL_expect = PL_lex_expect;
3301 PL_lex_defer = LEX_NORMAL;
3302 if (!PL_nexttoke[PL_lasttoke].next_type)
3303 return yylex();
3304 }
3305#else
3280af22 3306 PL_nexttoke--;
5db06880 3307 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3308 if (!PL_nexttoke) {
3309 PL_lex_state = PL_lex_defer;
3310 PL_expect = PL_lex_expect;
3311 PL_lex_defer = LEX_NORMAL;
463ee0b2 3312 }
5db06880
NC
3313#endif
3314#ifdef PERL_MAD
3315 /* FIXME - can these be merged? */
3316 return(PL_nexttoke[PL_lasttoke].next_type);
3317#else
bbf60fe6 3318 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3319#endif
79072805 3320
02aa26ce 3321 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3322 when we get here, PL_bufptr is at the \
02aa26ce 3323 */
79072805
LW
3324 case LEX_INTERPCASEMOD:
3325#ifdef DEBUGGING
3280af22 3326 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3327 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3328#endif
02aa26ce 3329 /* handle \E or end of string */
3280af22 3330 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3331 /* if at a \E */
3280af22 3332 if (PL_lex_casemods) {
f54cb97a 3333 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3334 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3335
3792a11b
NC
3336 if (PL_bufptr != PL_bufend
3337 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3338 PL_bufptr += 2;
3339 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3340#ifdef PERL_MAD
3341 if (PL_madskills)
6b29d1f5 3342 PL_thistoken = newSVpvs("\\E");
5db06880 3343#endif
a0d0e21e 3344 }
bbf60fe6 3345 return REPORT(')');
79072805 3346 }
5db06880
NC
3347#ifdef PERL_MAD
3348 while (PL_bufptr != PL_bufend &&
3349 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3350 if (!PL_thiswhite)
6b29d1f5 3351 PL_thiswhite = newSVpvs("");
cd81e915 3352 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3353 PL_bufptr += 2;
3354 }
3355#else
3280af22
NIS
3356 if (PL_bufptr != PL_bufend)
3357 PL_bufptr += 2;
5db06880 3358#endif
3280af22 3359 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3360 return yylex();
79072805
LW
3361 }
3362 else {
607df283 3363 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3364 "### Saw case modifier\n"); });
3280af22 3365 s = PL_bufptr + 1;
6e909404 3366 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3367#ifdef PERL_MAD
cd81e915 3368 if (!PL_thiswhite)
6b29d1f5 3369 PL_thiswhite = newSVpvs("");
cd81e915 3370 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3371#endif
89122651 3372 PL_bufptr = s + 3;
6e909404
JH
3373 PL_lex_state = LEX_INTERPCONCAT;
3374 return yylex();
a0d0e21e 3375 }
6e909404 3376 else {
90771dc0 3377 I32 tmp;
5db06880
NC
3378 if (!PL_madskills) /* when just compiling don't need correct */
3379 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3380 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3381 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3382 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3383 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3384 return REPORT(')');
6e909404
JH
3385 }
3386 if (PL_lex_casemods > 10)
3387 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3388 PL_lex_casestack[PL_lex_casemods++] = *s;
3389 PL_lex_casestack[PL_lex_casemods] = '\0';
3390 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3391 start_force(PL_curforce);
9ded7720 3392 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3393 force_next('(');
cd81e915 3394 start_force(PL_curforce);
6e909404 3395 if (*s == 'l')
9ded7720 3396 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3397 else if (*s == 'u')
9ded7720 3398 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3399 else if (*s == 'L')
9ded7720 3400 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3401 else if (*s == 'U')
9ded7720 3402 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3403 else if (*s == 'Q')
9ded7720 3404 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3405 else
3406 Perl_croak(aTHX_ "panic: yylex");
5db06880 3407 if (PL_madskills) {
a5849ce5
NC
3408 SV* const tmpsv = newSVpvs("\\ ");
3409 /* replace the space with the character we want to escape
3410 */
3411 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3412 curmad('_', tmpsv);
3413 }
6e909404 3414 PL_bufptr = s + 1;
a0d0e21e 3415 }
79072805 3416 force_next(FUNC);
3280af22
NIS
3417 if (PL_lex_starts) {
3418 s = PL_bufptr;
3419 PL_lex_starts = 0;
5db06880
NC
3420#ifdef PERL_MAD
3421 if (PL_madskills) {
cd81e915
NC
3422 if (PL_thistoken)
3423 sv_free(PL_thistoken);
6b29d1f5 3424 PL_thistoken = newSVpvs("");
5db06880
NC
3425 }
3426#endif
131b3ad0
DM
3427 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3428 if (PL_lex_casemods == 1 && PL_lex_inpat)
3429 OPERATOR(',');
3430 else
3431 Aop(OP_CONCAT);
79072805
LW
3432 }
3433 else
cea2e8a9 3434 return yylex();
79072805
LW
3435 }
3436
55497cff 3437 case LEX_INTERPPUSH:
bbf60fe6 3438 return REPORT(sublex_push());
55497cff 3439
79072805 3440 case LEX_INTERPSTART:
3280af22 3441 if (PL_bufptr == PL_bufend)
bbf60fe6 3442 return REPORT(sublex_done());
607df283 3443 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3444 "### Interpolated variable\n"); });
3280af22
NIS
3445 PL_expect = XTERM;
3446 PL_lex_dojoin = (*PL_bufptr == '@');
3447 PL_lex_state = LEX_INTERPNORMAL;
3448 if (PL_lex_dojoin) {
cd81e915 3449 start_force(PL_curforce);
9ded7720 3450 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3451 force_next(',');
cd81e915 3452 start_force(PL_curforce);
a0d0e21e 3453 force_ident("\"", '$');
cd81e915 3454 start_force(PL_curforce);
9ded7720 3455 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3456 force_next('$');
cd81e915 3457 start_force(PL_curforce);
9ded7720 3458 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3459 force_next('(');
cd81e915 3460 start_force(PL_curforce);
9ded7720 3461 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3462 force_next(FUNC);
3463 }
3280af22
NIS
3464 if (PL_lex_starts++) {
3465 s = PL_bufptr;
5db06880
NC
3466#ifdef PERL_MAD
3467 if (PL_madskills) {
cd81e915
NC
3468 if (PL_thistoken)
3469 sv_free(PL_thistoken);
6b29d1f5 3470 PL_thistoken = newSVpvs("");
5db06880
NC
3471 }
3472#endif
131b3ad0
DM
3473 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3474 if (!PL_lex_casemods && PL_lex_inpat)
3475 OPERATOR(',');
3476 else
3477 Aop(OP_CONCAT);
79072805 3478 }
cea2e8a9 3479 return yylex();
79072805
LW
3480
3481 case LEX_INTERPENDMAYBE:
3280af22
NIS
3482 if (intuit_more(PL_bufptr)) {
3483 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3484 break;
3485 }
3486 /* FALL THROUGH */
3487
3488 case LEX_INTERPEND:
3280af22
NIS
3489 if (PL_lex_dojoin) {
3490 PL_lex_dojoin = FALSE;
3491 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3492#ifdef PERL_MAD
3493 if (PL_madskills) {
cd81e915
NC
3494 if (PL_thistoken)
3495 sv_free(PL_thistoken);
6b29d1f5 3496 PL_thistoken = newSVpvs("");
5db06880
NC
3497 }
3498#endif
bbf60fe6 3499 return REPORT(')');
79072805 3500 }
43a16006 3501 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3502 && SvEVALED(PL_lex_repl))
43a16006 3503 {
e9fa98b2 3504 if (PL_bufptr != PL_bufend)
cea2e8a9 3505 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3506 PL_lex_repl = NULL;
e9fa98b2 3507 }
79072805
LW
3508 /* FALLTHROUGH */
3509 case LEX_INTERPCONCAT:
3510#ifdef DEBUGGING
3280af22 3511 if (PL_lex_brackets)
cea2e8a9 3512 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3513#endif
3280af22 3514 if (PL_bufptr == PL_bufend)
bbf60fe6 3515 return REPORT(sublex_done());
79072805 3516
3280af22
NIS
3517 if (SvIVX(PL_linestr) == '\'') {
3518 SV *sv = newSVsv(PL_linestr);
3519 if (!PL_lex_inpat)
76e3520e 3520 sv = tokeq(sv);
3280af22 3521 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 3522 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
79072805 3523 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3524 s = PL_bufend;
79072805
LW
3525 }
3526 else {
3280af22 3527 s = scan_const(PL_bufptr);
79072805 3528 if (*s == '\\')
3280af22 3529 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3530 else
3280af22 3531 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3532 }
3533
3280af22 3534 if (s != PL_bufptr) {
cd81e915 3535 start_force(PL_curforce);
5db06880
NC
3536 if (PL_madskills) {
3537 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3538 }
9ded7720 3539 NEXTVAL_NEXTTOKE = yylval;
3280af22 3540 PL_expect = XTERM;
79072805 3541 force_next(THING);
131b3ad0 3542 if (PL_lex_starts++) {
5db06880
NC
3543#ifdef PERL_MAD
3544 if (PL_madskills) {
cd81e915
NC
3545 if (PL_thistoken)
3546 sv_free(PL_thistoken);
6b29d1f5 3547 PL_thistoken = newSVpvs("");
5db06880
NC
3548 }
3549#endif
131b3ad0
DM
3550 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3551 if (!PL_lex_casemods && PL_lex_inpat)
3552 OPERATOR(',');
3553 else
3554 Aop(OP_CONCAT);