This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Silence a casting warning
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
eb0d8d16
NC
26#define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
28
5912531f 29#define yylval (PL_parser->yylval)
d3b6f988 30
acdf0a21
DM
31/* YYINITDEPTH -- initial size of the parser's stacks. */
32#define YYINITDEPTH 200
33
199e78b7
DM
34/* XXX temporary backwards compatibility */
35#define PL_lex_brackets (PL_parser->lex_brackets)
36#define PL_lex_brackstack (PL_parser->lex_brackstack)
37#define PL_lex_casemods (PL_parser->lex_casemods)
38#define PL_lex_casestack (PL_parser->lex_casestack)
39#define PL_lex_defer (PL_parser->lex_defer)
40#define PL_lex_dojoin (PL_parser->lex_dojoin)
41#define PL_lex_expect (PL_parser->lex_expect)
42#define PL_lex_formbrack (PL_parser->lex_formbrack)
43#define PL_lex_inpat (PL_parser->lex_inpat)
44#define PL_lex_inwhat (PL_parser->lex_inwhat)
45#define PL_lex_op (PL_parser->lex_op)
46#define PL_lex_repl (PL_parser->lex_repl)
47#define PL_lex_starts (PL_parser->lex_starts)
48#define PL_lex_stuff (PL_parser->lex_stuff)
49#define PL_multi_start (PL_parser->multi_start)
50#define PL_multi_open (PL_parser->multi_open)
51#define PL_multi_close (PL_parser->multi_close)
52#define PL_pending_ident (PL_parser->pending_ident)
53#define PL_preambled (PL_parser->preambled)
54#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 55#define PL_linestr (PL_parser->linestr)
c2598295
DM
56#define PL_expect (PL_parser->expect)
57#define PL_copline (PL_parser->copline)
f06b5848
DM
58#define PL_bufptr (PL_parser->bufptr)
59#define PL_oldbufptr (PL_parser->oldbufptr)
60#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61#define PL_linestart (PL_parser->linestart)
62#define PL_bufend (PL_parser->bufend)
63#define PL_last_uni (PL_parser->last_uni)
64#define PL_last_lop (PL_parser->last_lop)
65#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 66#define PL_lex_state (PL_parser->lex_state)
2f9285f8 67#define PL_rsfp (PL_parser->rsfp)
5486870f 68#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
69#define PL_in_my (PL_parser->in_my)
70#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 71#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 72#define PL_multi_end (PL_parser->multi_end)
13765c85 73#define PL_error_count (PL_parser->error_count)
199e78b7
DM
74
75#ifdef PERL_MAD
76# define PL_endwhite (PL_parser->endwhite)
77# define PL_faketokens (PL_parser->faketokens)
78# define PL_lasttoke (PL_parser->lasttoke)
79# define PL_nextwhite (PL_parser->nextwhite)
80# define PL_realtokenstart (PL_parser->realtokenstart)
81# define PL_skipwhite (PL_parser->skipwhite)
82# define PL_thisclose (PL_parser->thisclose)
83# define PL_thismad (PL_parser->thismad)
84# define PL_thisopen (PL_parser->thisopen)
85# define PL_thisstuff (PL_parser->thisstuff)
86# define PL_thistoken (PL_parser->thistoken)
87# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
88# define PL_thiswhite (PL_parser->thiswhite)
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_curforce (PL_parser->curforce)
91#else
92# define PL_nexttoke (PL_parser->nexttoke)
93# define PL_nexttype (PL_parser->nexttype)
94# define PL_nextval (PL_parser->nextval)
199e78b7
DM
95#endif
96
3cbf51f5
DM
97static int
98S_pending_ident(pTHX);
199e78b7 99
0bd48802 100static const char ident_too_long[] = "Identifier too long";
c445ea15 101static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 102
6e3aabd6 103#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
104static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 106#endif
51371543 107
29595ff2 108#ifdef PERL_MAD
29595ff2 109# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 110# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 111#else
5db06880 112# define CURMAD(slot,sv)
9ded7720 113# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
114#endif
115
9059aa12
LW
116#define XFAKEBRACK 128
117#define XENUMMASK 127
118
39e02b42
JH
119#ifdef USE_UTF8_SCRIPTS
120# define UTF (!IN_BYTES)
2b9d42f0 121#else
746b446a 122# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 123#endif
a0ed51b3 124
61f0cdd9 125/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
126 * 1999-02-27 mjd-perl-patch@plover.com */
127#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128
bf4acbe4
GS
129/* On MacOS, respect nonbreaking spaces */
130#ifdef MACOS_TRADITIONAL
131#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
132#else
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
134#endif
135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a
PP
141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 230# define REPORT(retval) tokereport((I32)retval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf
AL
261#define UNI2(f,x) { \
262 yylval.ival = f; \
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf
AL
275#define UNIBRACK(f) { \
276 yylval.ival = f; \
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
3280af22 286#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
bbf60fe6
DM
290/* how to interpret the yylval associated with the token */
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
294 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 371 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
372};
373
374/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 375
bbf60fe6 376STATIC int
f5bd084c 377S_tokereport(pTHX_ I32 rv)
bbf60fe6 378{
97aff369 379 dVAR;
bbf60fe6 380 if (DEBUG_T_TEST) {
bd61b366 381 const char *name = NULL;
bbf60fe6 382 enum token_type type = TOKENTYPE_NONE;
f54cb97a 383 const struct debug_tokens *p;
396482e1 384 SV* const report = newSVpvs("<== ");
bbf60fe6 385
f54cb97a 386 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
387 if (p->token == (int)rv) {
388 name = p->name;
389 type = p->type;
390 break;
391 }
392 }
393 if (name)
54667de8 394 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
397 else if (!rv)
396482e1 398 sv_catpvs(report, "EOF");
bbf60fe6
DM
399 else
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
401 switch (type) {
402 case TOKENTYPE_NONE:
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
404 break;
405 case TOKENTYPE_IVAL:
e4584336 406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
407 break;
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[yylval.ival]);
411 break;
412 case TOKENTYPE_PVAL:
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
414 break;
415 case TOKENTYPE_OPVAL:
b6007c36 416 if (yylval.opval) {
401441c0 417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 418 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
419 if (yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(yylval.opval)));
422 }
423
424 }
401441c0 425 else
396482e1 426 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
427 break;
428 }
b6007c36 429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
430 };
431 return (int)rv;
998054bd
SC
432}
433
b6007c36
DM
434
435/* print the buffer with suitable escapes */
436
437STATIC void
438S_printbuf(pTHX_ const char* fmt, const char* s)
439{
396482e1 440 SV* const tmp = newSVpvs("");
b6007c36
DM
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
442 SvREFCNT_dec(tmp);
443}
444
8fa7f367
JH
445#endif
446
ffb4593c
NT
447/*
448 * S_ao
449 *
c963b151
BD
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
452 */
453
76e3520e 454STATIC int
cea2e8a9 455S_ao(pTHX_ int toketype)
a0d0e21e 456{
97aff369 457 dVAR;
3280af22
NIS
458 if (*PL_bufptr == '=') {
459 PL_bufptr++;
a0d0e21e
LW
460 if (toketype == ANDAND)
461 yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 yylval.ival = OP_ORASSIGN;
c963b151
BD
464 else if (toketype == DORDOR)
465 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
466 toketype = ASSIGNOP;
467 }
468 return toketype;
469}
470
ffb4593c
NT
471/*
472 * S_no_op
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
482 */
483
76e3520e 484STATIC void
bfed75c6 485S_no_op(pTHX_ const char *what, char *s)
463ee0b2 486{
97aff369 487 dVAR;
9d4ba2ae
AL
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 490
1189a94a
GS
491 if (!s)
492 s = oldbp;
07c798fb 493 else
1189a94a 494 PL_bufptr = s;
cea2e8a9 495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
496 if (ckWARN_d(WARN_SYNTAX)) {
497 if (is_first)
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 501 const char *t;
c35e046a
AL
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
503 NOOP;
56da5a46
RGS
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
551405c4 507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
508 }
509 else {
510 assert(s >= oldbp);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 513 }
07c798fb 514 }
3280af22 515 PL_bufptr = oldbp;
8990e307
LW
516}
517
ffb4593c
NT
518/*
519 * S_missingterm
520 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 521 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
524 * This is fatal.
525 */
526
76e3520e 527STATIC void
cea2e8a9 528S_missingterm(pTHX_ char *s)
8990e307 529{
97aff369 530 dVAR;
8990e307
LW
531 char tmpbuf[3];
532 char q;
533 if (s) {
9d4ba2ae 534 char * const nl = strrchr(s,'\n');
d2719217 535 if (nl)
8990e307
LW
536 *nl = '\0';
537 }
9d116dd7
JH
538 else if (
539#ifdef EBCDIC
540 iscntrl(PL_multi_close)
541#else
542 PL_multi_close < 32 || PL_multi_close == 127
543#endif
544 ) {
8990e307 545 *tmpbuf = '^';
585ec06d 546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
547 tmpbuf[2] = '\0';
548 s = tmpbuf;
549 }
550 else {
eb160463 551 *tmpbuf = (char)PL_multi_close;
8990e307
LW
552 tmpbuf[1] = '\0';
553 s = tmpbuf;
554 }
555 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 557}
79072805 558
ef89dcc3 559#define FEATURE_IS_ENABLED(name) \
0d863452 560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
562/* The longest string we pass in. */
563#define MAX_FEATURE_LEN (sizeof("switch")-1)
564
0d863452
RH
565/*
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
568 */
569STATIC bool
d4c19fe8 570S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 571{
97aff369 572 dVAR;
0d863452 573 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b
NC
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
d4c19fe8 577
7b9ef140 578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
579}
580
ffb4593c
NT
581/*
582 * Perl_deprecate
ffb4593c
NT
583 */
584
79072805 585void
bfed75c6 586Perl_deprecate(pTHX_ const char *s)
a0d0e21e 587{
599cee73 588 if (ckWARN(WARN_DEPRECATED))
9014280d 589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
590}
591
12bcd1a6 592void
bfed75c6 593Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
594{
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
597 /* */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
602
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
605 "Use of %s is deprecated", s);
606}
607
ffb4593c 608/*
9cbb5ea2
GS
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
ffb4593c
NT
611 */
612
c39cd008
GS
613#ifdef PERL_CR_FILTER
614static void
615strip_return(SV *sv)
616{
95a20fc0 617 register const char *s = SvPVX_const(sv);
9d4ba2ae 618 register const char * const e = s + SvCUR(sv);
c39cd008
GS
619 /* outer loop optimized to do nothing if there are no CR-LFs */
620 while (s < e) {
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
624 *d++ = *s++;
625 while (s < e) {
626 if (*s == '\r' && s[1] == '\n')
627 s++;
628 *d++ = *s++;
629 }
630 SvCUR(sv) -= s - d;
631 return;
632 }
633 }
634}
a868473f 635
76e3520e 636STATIC I32
c39cd008 637S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 638{
f54cb97a 639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
640 if (count > 0 && !maxlen)
641 strip_return(sv);
642 return count;
a868473f
NIS
643}
644#endif
645
199e78b7
DM
646
647
ffb4593c
NT
648/*
649 * Perl_lex_start
5486870f 650 *
e3abe207 651 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
652 *
653 * rsfp is the opened file handle to read from (if any),
654 *
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
657 *
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
ffb4593c
NT
660 */
661
a0d0e21e 662void
5486870f 663Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 664{
97aff369 665 dVAR;
6ef55633 666 const char *s = NULL;
8990e307 667 STRLEN len;
5486870f 668 yy_parser *parser, *oparser;
acdf0a21
DM
669
670 /* create and initialise a parser */
671
199e78b7 672 Newxz(parser, 1, yy_parser);
5486870f 673 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
674 PL_parser = parser;
675
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
679
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
683
e3abe207
DM
684 /* on scope exit, free this parser and restore any outer one */
685 SAVEPARSER(parser);
7c4baf47 686 parser->saved_curcop = PL_curcop;
e3abe207 687
acdf0a21 688 /* initialise lexer state */
8990e307 689
fb205e7a
DM
690#ifdef PERL_MAD
691 parser->curforce = -1;
692#else
693 parser->nexttoke = 0;
694#endif
c2598295 695 parser->copline = NOLINE;
5afb0a62 696 parser->lex_state = LEX_NORMAL;
c2598295 697 parser->expect = XSTATE;
2f9285f8 698 parser->rsfp = rsfp;
56b27c9a 699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
5486870f 700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
2f9285f8 701
199e78b7
DM
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
02b34bbe 705
10efb74f
NC
706 if (line) {
707 s = SvPV_const(line, len);
708 } else {
709 len = 0;
710 }
bdc0bf6f 711
10efb74f 712 if (!len) {
bdc0bf6f 713 parser->linestr = newSVpvs("\n;");
10efb74f 714 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 715 parser->linestr = newSVsv(line);
10efb74f 716 if (s[len-1] != ';')
bdc0bf6f 717 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
718 } else {
719 SvTEMP_off(line);
720 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 721 parser->linestr = line;
8990e307 722 }
f06b5848
DM
723 parser->oldoldbufptr =
724 parser->oldbufptr =
725 parser->bufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
79072805 729}
a687059c 730
e3abe207
DM
731
732/* delete a parser object */
733
734void
735Perl_parser_free(pTHX_ const yy_parser *parser)
736{
7c4baf47 737 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
738 SvREFCNT_dec(parser->linestr);
739
2f9285f8
DM
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
5486870f 745 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 746
e3abe207
DM
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
751 Safefree(parser);
752}
753
754
ffb4593c
NT
755/*
756 * Perl_lex_end
9cbb5ea2
GS
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
ffb4593c
NT
759 */
760
463ee0b2 761void
864dbfa3 762Perl_lex_end(pTHX)
463ee0b2 763{
97aff369 764 dVAR;
3280af22 765 PL_doextract = FALSE;
463ee0b2
LW
766}
767
ffb4593c
NT
768/*
769 * S_incline
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
57843af0 772 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 773 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
776 */
777
76e3520e 778STATIC void
d9095cec 779S_incline(pTHX_ const char *s)
463ee0b2 780{
97aff369 781 dVAR;
d9095cec
NC
782 const char *t;
783 const char *n;
784 const char *e;
463ee0b2 785
57843af0 786 CopLINE_inc(PL_curcop);
463ee0b2
LW
787 if (*s++ != '#')
788 return;
d4c19fe8
AL
789 while (SPACE_OR_TAB(*s))
790 s++;
73659bf1
GS
791 if (strnEQ(s, "line", 4))
792 s += 4;
793 else
794 return;
084592ab 795 if (SPACE_OR_TAB(*s))
73659bf1 796 s++;
4e553d73 797 else
73659bf1 798 return;
d4c19fe8
AL
799 while (SPACE_OR_TAB(*s))
800 s++;
463ee0b2
LW
801 if (!isDIGIT(*s))
802 return;
d4c19fe8 803
463ee0b2
LW
804 n = s;
805 while (isDIGIT(*s))
806 s++;
bf4acbe4 807 while (SPACE_OR_TAB(*s))
463ee0b2 808 s++;
73659bf1 809 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 810 s++;
73659bf1
GS
811 e = t + 1;
812 }
463ee0b2 813 else {
c35e046a
AL
814 t = s;
815 while (!isSPACE(*t))
816 t++;
73659bf1 817 e = t;
463ee0b2 818 }
bf4acbe4 819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
820 e++;
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
823
f4dd75d9 824 if (t - s > 0) {
d9095cec 825 const STRLEN len = t - s;
8a5ee598 826#ifndef USE_ITHREADS
c4420975 827 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
828 STRLEN tmplen = cf ? strlen(cf) : 0;
829 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
830 /* must copy *{"::_<(eval N)[oldfilename:L]"}
831 * to *{"::_<newfilename"} */
44867030
NC
832 /* However, the long form of evals is only turned on by the
833 debugger - usually they're "(eval %lu)" */
834 char smallbuf[128];
835 char *tmpbuf;
836 GV **gvp;
d9095cec 837 STRLEN tmplen2 = len;
798b63bc 838 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
839 tmpbuf = smallbuf;
840 else
2ae0db35 841 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
842 tmpbuf[0] = '_';
843 tmpbuf[1] = '<';
2ae0db35 844 memcpy(tmpbuf + 2, cf, tmplen);
44867030 845 tmplen += 2;
8a5ee598
RGS
846 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
847 if (gvp) {
44867030
NC
848 char *tmpbuf2;
849 GV *gv2;
850
851 if (tmplen2 + 2 <= sizeof smallbuf)
852 tmpbuf2 = smallbuf;
853 else
854 Newx(tmpbuf2, tmplen2 + 2, char);
855
856 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
857 /* Either they malloc'd it, or we malloc'd it,
858 so no prefix is present in ours. */
859 tmpbuf2[0] = '_';
860 tmpbuf2[1] = '<';
861 }
862
863 memcpy(tmpbuf2 + 2, s, tmplen2);
864 tmplen2 += 2;
865
8a5ee598 866 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 867 if (!isGV(gv2)) {
8a5ee598 868 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
869 /* adjust ${"::_<newfilename"} to store the new file name */
870 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
871 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
872 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
873 }
44867030
NC
874
875 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 876 }
e66cf94c 877 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 878 }
8a5ee598 879#endif
05ec9bb3 880 CopFILE_free(PL_curcop);
d9095cec 881 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 882 }
57843af0 883 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
884}
885
29595ff2 886#ifdef PERL_MAD
cd81e915 887/* skip space before PL_thistoken */
29595ff2
NC
888
889STATIC char *
890S_skipspace0(pTHX_ register char *s)
891{
892 s = skipspace(s);
893 if (!PL_madskills)
894 return s;
cd81e915
NC
895 if (PL_skipwhite) {
896 if (!PL_thiswhite)
6b29d1f5 897 PL_thiswhite = newSVpvs("");
cd81e915
NC
898 sv_catsv(PL_thiswhite, PL_skipwhite);
899 sv_free(PL_skipwhite);
900 PL_skipwhite = 0;
901 }
902 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
903 return s;
904}
905
cd81e915 906/* skip space after PL_thistoken */
29595ff2
NC
907
908STATIC char *
909S_skipspace1(pTHX_ register char *s)
910{
d4c19fe8 911 const char *start = s;
29595ff2
NC
912 I32 startoff = start - SvPVX(PL_linestr);
913
914 s = skipspace(s);
915 if (!PL_madskills)
916 return s;
917 start = SvPVX(PL_linestr) + startoff;
cd81e915 918 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 919 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
920 PL_thistoken = newSVpvn(tstart, start - tstart);
921 }
922 PL_realtokenstart = -1;
923 if (PL_skipwhite) {
924 if (!PL_nextwhite)
6b29d1f5 925 PL_nextwhite = newSVpvs("");
cd81e915
NC
926 sv_catsv(PL_nextwhite, PL_skipwhite);
927 sv_free(PL_skipwhite);
928 PL_skipwhite = 0;
29595ff2
NC
929 }
930 return s;
931}
932
933STATIC char *
934S_skipspace2(pTHX_ register char *s, SV **svp)
935{
c35e046a
AL
936 char *start;
937 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
938 const I32 startoff = s - SvPVX(PL_linestr);
939
29595ff2
NC
940 s = skipspace(s);
941 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
942 if (!PL_madskills || !svp)
943 return s;
944 start = SvPVX(PL_linestr) + startoff;
cd81e915 945 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 946 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
947 PL_thistoken = newSVpvn(tstart, start - tstart);
948 PL_realtokenstart = -1;
29595ff2 949 }
cd81e915 950 if (PL_skipwhite) {
29595ff2 951 if (!*svp)
6b29d1f5 952 *svp = newSVpvs("");
cd81e915
NC
953 sv_setsv(*svp, PL_skipwhite);
954 sv_free(PL_skipwhite);
955 PL_skipwhite = 0;
29595ff2
NC
956 }
957
958 return s;
959}
960#endif
961
80a702cd 962STATIC void
5fa550fb 963S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
80a702cd
RGS
964{
965 AV *av = CopFILEAVx(PL_curcop);
966 if (av) {
b9f83d2f 967 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
968 if (orig_sv)
969 sv_setsv(sv, orig_sv);
970 else
971 sv_setpvn(sv, buf, len);
80a702cd
RGS
972 (void)SvIOK_on(sv);
973 SvIV_set(sv, 0);
974 av_store(av, (I32)CopLINE(PL_curcop), sv);
975 }
976}
977
ffb4593c
NT
978/*
979 * S_skipspace
980 * Called to gobble the appropriate amount and type of whitespace.
981 * Skips comments as well.
982 */
983
76e3520e 984STATIC char *
cea2e8a9 985S_skipspace(pTHX_ register char *s)
a687059c 986{
97aff369 987 dVAR;
5db06880
NC
988#ifdef PERL_MAD
989 int curoff;
990 int startoff = s - SvPVX(PL_linestr);
991
cd81e915
NC
992 if (PL_skipwhite) {
993 sv_free(PL_skipwhite);
994 PL_skipwhite = 0;
5db06880
NC
995 }
996#endif
997
3280af22 998 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 999 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1000 s++;
5db06880
NC
1001#ifdef PERL_MAD
1002 goto done;
1003#else
463ee0b2 1004 return s;
5db06880 1005#endif
463ee0b2
LW
1006 }
1007 for (;;) {
fd049845 1008 STRLEN prevlen;
09bef843 1009 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1010 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1011 while (s < PL_bufend && isSPACE(*s)) {
1012 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1013 incline(s);
1014 }
ffb4593c
NT
1015
1016 /* comment */
3280af22
NIS
1017 if (s < PL_bufend && *s == '#') {
1018 while (s < PL_bufend && *s != '\n')
463ee0b2 1019 s++;
60e6418e 1020 if (s < PL_bufend) {
463ee0b2 1021 s++;
60e6418e
GS
1022 if (PL_in_eval && !PL_rsfp) {
1023 incline(s);
1024 continue;
1025 }
1026 }
463ee0b2 1027 }
ffb4593c
NT
1028
1029 /* only continue to recharge the buffer if we're at the end
1030 * of the buffer, we're not reading from a source filter, and
1031 * we're in normal lexing mode
1032 */
09bef843
SB
1033 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1034 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1035#ifdef PERL_MAD
1036 goto done;
1037#else
463ee0b2 1038 return s;
5db06880 1039#endif
ffb4593c
NT
1040
1041 /* try to recharge the buffer */
5db06880
NC
1042#ifdef PERL_MAD
1043 curoff = s - SvPVX(PL_linestr);
1044#endif
1045
9cbb5ea2 1046 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1047 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1048 {
5db06880
NC
1049#ifdef PERL_MAD
1050 if (PL_madskills && curoff != startoff) {
cd81e915 1051 if (!PL_skipwhite)
6b29d1f5 1052 PL_skipwhite = newSVpvs("");
cd81e915 1053 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1054 curoff - startoff);
1055 }
1056
1057 /* mustn't throw out old stuff yet if madpropping */
1058 SvCUR(PL_linestr) = curoff;
1059 s = SvPVX(PL_linestr) + curoff;
1060 *s = 0;
1061 if (curoff && s[-1] == '\n')
1062 s[-1] = ' ';
1063#endif
1064
9cbb5ea2 1065 /* end of file. Add on the -p or -n magic */
cd81e915 1066 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1067 if (PL_minus_p) {
5db06880 1068#ifdef PERL_MAD
6502358f 1069 sv_catpvs(PL_linestr,
5db06880
NC
1070 ";}continue{print or die qq(-p destination: $!\\n);}");
1071#else
6502358f 1072 sv_setpvs(PL_linestr,
01a19ab0 1073 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1074#endif
3280af22 1075 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1076 }
01a19ab0 1077 else if (PL_minus_n) {
5db06880
NC
1078#ifdef PERL_MAD
1079 sv_catpvn(PL_linestr, ";}", 2);
1080#else
01a19ab0 1081 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1082#endif
01a19ab0
NC
1083 PL_minus_n = 0;
1084 }
a0d0e21e 1085 else
5db06880
NC
1086#ifdef PERL_MAD
1087 sv_catpvn(PL_linestr,";", 1);
1088#else
4147a61b 1089 sv_setpvn(PL_linestr,";", 1);
5db06880 1090#endif
ffb4593c
NT
1091
1092 /* reset variables for next time we lex */
9cbb5ea2 1093 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1094 = SvPVX(PL_linestr)
1095#ifdef PERL_MAD
1096 + curoff
1097#endif
1098 ;
3280af22 1099 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1100 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
1101
1102 /* Close the filehandle. Could be from -P preprocessor,
1103 * STDIN, or a regular file. If we were reading code from
1104 * STDIN (because the commandline held no -e or filename)
1105 * then we don't close it, we reset it so the code can
1106 * read from STDIN too.
1107 */
1108
3280af22
NIS
1109 if (PL_preprocess && !PL_in_eval)
1110 (void)PerlProc_pclose(PL_rsfp);
1111 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1112 PerlIO_clearerr(PL_rsfp);
8990e307 1113 else
3280af22 1114 (void)PerlIO_close(PL_rsfp);
4608196e 1115 PL_rsfp = NULL;
463ee0b2
LW
1116 return s;
1117 }
ffb4593c
NT
1118
1119 /* not at end of file, so we only read another line */
09bef843
SB
1120 /* make corresponding updates to old pointers, for yyerror() */
1121 oldprevlen = PL_oldbufptr - PL_bufend;
1122 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1123 if (PL_last_uni)
1124 oldunilen = PL_last_uni - PL_bufend;
1125 if (PL_last_lop)
1126 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1127 PL_linestart = PL_bufptr = s + prevlen;
1128 PL_bufend = s + SvCUR(PL_linestr);
1129 s = PL_bufptr;
09bef843
SB
1130 PL_oldbufptr = s + oldprevlen;
1131 PL_oldoldbufptr = s + oldoldprevlen;
1132 if (PL_last_uni)
1133 PL_last_uni = s + oldunilen;
1134 if (PL_last_lop)
1135 PL_last_lop = s + oldloplen;
a0d0e21e 1136 incline(s);
ffb4593c
NT
1137
1138 /* debugger active and we're not compiling the debugger code,
1139 * so store the line into the debugger's array of lines
1140 */
80a702cd 1141 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1142 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1143 }
5db06880
NC
1144
1145#ifdef PERL_MAD
1146 done:
1147 if (PL_madskills) {
cd81e915 1148 if (!PL_skipwhite)
6b29d1f5 1149 PL_skipwhite = newSVpvs("");
5db06880
NC
1150 curoff = s - SvPVX(PL_linestr);
1151 if (curoff - startoff)
cd81e915 1152 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1153 curoff - startoff);
1154 }
1155 return s;
1156#endif
a687059c 1157}
378cc40b 1158
ffb4593c
NT
1159/*
1160 * S_check_uni
1161 * Check the unary operators to ensure there's no ambiguity in how they're
1162 * used. An ambiguous piece of code would be:
1163 * rand + 5
1164 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1165 * the +5 is its argument.
1166 */
1167
76e3520e 1168STATIC void
cea2e8a9 1169S_check_uni(pTHX)
ba106d47 1170{
97aff369 1171 dVAR;
d4c19fe8
AL
1172 const char *s;
1173 const char *t;
2f3197b3 1174
3280af22 1175 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1176 return;
3280af22
NIS
1177 while (isSPACE(*PL_last_uni))
1178 PL_last_uni++;
c35e046a
AL
1179 s = PL_last_uni;
1180 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1181 s++;
3280af22 1182 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1183 return;
6136c704 1184
0453d815 1185 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1186 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1187 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1188 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1189 }
2f3197b3
LW
1190}
1191
ffb4593c
NT
1192/*
1193 * LOP : macro to build a list operator. Its behaviour has been replaced
1194 * with a subroutine, S_lop() for which LOP is just another name.
1195 */
1196
a0d0e21e
LW
1197#define LOP(f,x) return lop(f,x,s)
1198
ffb4593c
NT
1199/*
1200 * S_lop
1201 * Build a list operator (or something that might be one). The rules:
1202 * - if we have a next token, then it's a list operator [why?]
1203 * - if the next thing is an opening paren, then it's a function
1204 * - else it's a list operator
1205 */
1206
76e3520e 1207STATIC I32
a0be28da 1208S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1209{
97aff369 1210 dVAR;
79072805 1211 yylval.ival = f;
35c8bce7 1212 CLINE;
3280af22
NIS
1213 PL_expect = x;
1214 PL_bufptr = s;
1215 PL_last_lop = PL_oldbufptr;
eb160463 1216 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1217#ifdef PERL_MAD
1218 if (PL_lasttoke)
1219 return REPORT(LSTOP);
1220#else
3280af22 1221 if (PL_nexttoke)
bbf60fe6 1222 return REPORT(LSTOP);
5db06880 1223#endif
79072805 1224 if (*s == '(')
bbf60fe6 1225 return REPORT(FUNC);
29595ff2 1226 s = PEEKSPACE(s);
79072805 1227 if (*s == '(')
bbf60fe6 1228 return REPORT(FUNC);
79072805 1229 else
bbf60fe6 1230 return REPORT(LSTOP);
79072805
LW
1231}
1232
5db06880
NC
1233#ifdef PERL_MAD
1234 /*
1235 * S_start_force
1236 * Sets up for an eventual force_next(). start_force(0) basically does
1237 * an unshift, while start_force(-1) does a push. yylex removes items
1238 * on the "pop" end.
1239 */
1240
1241STATIC void
1242S_start_force(pTHX_ int where)
1243{
1244 int i;
1245
cd81e915 1246 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1247 where = PL_lasttoke;
cd81e915
NC
1248 assert(PL_curforce < 0 || PL_curforce == where);
1249 if (PL_curforce != where) {
5db06880
NC
1250 for (i = PL_lasttoke; i > where; --i) {
1251 PL_nexttoke[i] = PL_nexttoke[i-1];
1252 }
1253 PL_lasttoke++;
1254 }
cd81e915 1255 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1256 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1257 PL_curforce = where;
1258 if (PL_nextwhite) {
5db06880 1259 if (PL_madskills)
6b29d1f5 1260 curmad('^', newSVpvs(""));
cd81e915 1261 CURMAD('_', PL_nextwhite);
5db06880
NC
1262 }
1263}
1264
1265STATIC void
1266S_curmad(pTHX_ char slot, SV *sv)
1267{
1268 MADPROP **where;
1269
1270 if (!sv)
1271 return;
cd81e915
NC
1272 if (PL_curforce < 0)
1273 where = &PL_thismad;
5db06880 1274 else
cd81e915 1275 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1276
cd81e915 1277 if (PL_faketokens)
5db06880
NC
1278 sv_setpvn(sv, "", 0);
1279 else {
1280 if (!IN_BYTES) {
1281 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1282 SvUTF8_on(sv);
1283 else if (PL_encoding) {
1284 sv_recode_to_utf8(sv, PL_encoding);
1285 }
1286 }
1287 }
1288
1289 /* keep a slot open for the head of the list? */
1290 if (slot != '_' && *where && (*where)->mad_key == '^') {
1291 (*where)->mad_key = slot;
035e2bcc 1292 sv_free((SV*)((*where)->mad_val));
5db06880
NC
1293 (*where)->mad_val = (void*)sv;
1294 }
1295 else
1296 addmad(newMADsv(slot, sv), where, 0);
1297}
1298#else
b3f24c00
MHM
1299# define start_force(where) NOOP
1300# define curmad(slot, sv) NOOP
5db06880
NC
1301#endif
1302
ffb4593c
NT
1303/*
1304 * S_force_next
9cbb5ea2 1305 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1306 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1307 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1308 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1309 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1310 */
1311
4e553d73 1312STATIC void
cea2e8a9 1313S_force_next(pTHX_ I32 type)
79072805 1314{
97aff369 1315 dVAR;
5db06880 1316#ifdef PERL_MAD
cd81e915 1317 if (PL_curforce < 0)
5db06880 1318 start_force(PL_lasttoke);
cd81e915 1319 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1320 if (PL_lex_state != LEX_KNOWNEXT)
1321 PL_lex_defer = PL_lex_state;
1322 PL_lex_state = LEX_KNOWNEXT;
1323 PL_lex_expect = PL_expect;
cd81e915 1324 PL_curforce = -1;
5db06880 1325#else
3280af22
NIS
1326 PL_nexttype[PL_nexttoke] = type;
1327 PL_nexttoke++;
1328 if (PL_lex_state != LEX_KNOWNEXT) {
1329 PL_lex_defer = PL_lex_state;
1330 PL_lex_expect = PL_expect;
1331 PL_lex_state = LEX_KNOWNEXT;
79072805 1332 }
5db06880 1333#endif
79072805
LW
1334}
1335
d0a148a6
NC
1336STATIC SV *
1337S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1338{
97aff369 1339 dVAR;
9d4ba2ae 1340 SV * const sv = newSVpvn(start,len);
bfed75c6 1341 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1342 SvUTF8_on(sv);
1343 return sv;
1344}
1345
ffb4593c
NT
1346/*
1347 * S_force_word
1348 * When the lexer knows the next thing is a word (for instance, it has
1349 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1350 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1351 * lookahead.
ffb4593c
NT
1352 *
1353 * Arguments:
b1b65b59 1354 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1355 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1356 * int check_keyword : if true, Perl checks to make sure the word isn't
1357 * a keyword (do this if the word is a label, e.g. goto FOO)
1358 * int allow_pack : if true, : characters will also be allowed (require,
1359 * use, etc. do this)
9cbb5ea2 1360 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1361 */
1362
76e3520e 1363STATIC char *
cea2e8a9 1364S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1365{
97aff369 1366 dVAR;
463ee0b2
LW
1367 register char *s;
1368 STRLEN len;
4e553d73 1369
29595ff2 1370 start = SKIPSPACE1(start);
463ee0b2 1371 s = start;
7e2040f0 1372 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1373 (allow_pack && *s == ':') ||
15f0808c 1374 (allow_initial_tick && *s == '\'') )
a0d0e21e 1375 {
3280af22 1376 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1377 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1378 return start;
cd81e915 1379 start_force(PL_curforce);
5db06880
NC
1380 if (PL_madskills)
1381 curmad('X', newSVpvn(start,s-start));
463ee0b2 1382 if (token == METHOD) {
29595ff2 1383 s = SKIPSPACE1(s);
463ee0b2 1384 if (*s == '(')
3280af22 1385 PL_expect = XTERM;
463ee0b2 1386 else {
3280af22 1387 PL_expect = XOPERATOR;
463ee0b2 1388 }
79072805 1389 }
e74e6b3d 1390 if (PL_madskills)
63575281 1391 curmad('g', newSVpvs( "forced" ));
9ded7720 1392 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1393 = (OP*)newSVOP(OP_CONST,0,
1394 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1395 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1396 force_next(token);
1397 }
1398 return s;
1399}
1400
ffb4593c
NT
1401/*
1402 * S_force_ident
9cbb5ea2 1403 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1404 * text only contains the "foo" portion. The first argument is a pointer
1405 * to the "foo", and the second argument is the type symbol to prefix.
1406 * Forces the next token to be a "WORD".
9cbb5ea2 1407 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1408 */
1409
76e3520e 1410STATIC void
bfed75c6 1411S_force_ident(pTHX_ register const char *s, int kind)
79072805 1412{
97aff369 1413 dVAR;
c35e046a 1414 if (*s) {
90e5519e
NC
1415 const STRLEN len = strlen(s);
1416 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1417 start_force(PL_curforce);
9ded7720 1418 NEXTVAL_NEXTTOKE.opval = o;
79072805 1419 force_next(WORD);
748a9306 1420 if (kind) {
11343788 1421 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1422 /* XXX see note in pp_entereval() for why we forgo typo
1423 warnings if the symbol must be introduced in an eval.
1424 GSAR 96-10-12 */
90e5519e
NC
1425 gv_fetchpvn_flags(s, len,
1426 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1427 : GV_ADD,
1428 kind == '$' ? SVt_PV :
1429 kind == '@' ? SVt_PVAV :
1430 kind == '%' ? SVt_PVHV :
a0d0e21e 1431 SVt_PVGV
90e5519e 1432 );
748a9306 1433 }
79072805
LW
1434 }
1435}
1436
1571675a
GS
1437NV
1438Perl_str_to_version(pTHX_ SV *sv)
1439{
1440 NV retval = 0.0;
1441 NV nshift = 1.0;
1442 STRLEN len;
cfd0369c 1443 const char *start = SvPV_const(sv,len);
9d4ba2ae 1444 const char * const end = start + len;
504618e9 1445 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1446 while (start < end) {
ba210ebe 1447 STRLEN skip;
1571675a
GS
1448 UV n;
1449 if (utf)
9041c2e3 1450 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1451 else {
1452 n = *(U8*)start;
1453 skip = 1;
1454 }
1455 retval += ((NV)n)/nshift;
1456 start += skip;
1457 nshift *= 1000;
1458 }
1459 return retval;
1460}
1461
4e553d73 1462/*
ffb4593c
NT
1463 * S_force_version
1464 * Forces the next token to be a version number.
e759cc13
RGS
1465 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1466 * and if "guessing" is TRUE, then no new token is created (and the caller
1467 * must use an alternative parsing method).
ffb4593c
NT
1468 */
1469
76e3520e 1470STATIC char *
e759cc13 1471S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1472{
97aff369 1473 dVAR;
5f66b61c 1474 OP *version = NULL;
44dcb63b 1475 char *d;
5db06880
NC
1476#ifdef PERL_MAD
1477 I32 startoff = s - SvPVX(PL_linestr);
1478#endif
89bfa8cd 1479
29595ff2 1480 s = SKIPSPACE1(s);
89bfa8cd 1481
44dcb63b 1482 d = s;
dd629d5b 1483 if (*d == 'v')
44dcb63b 1484 d++;
44dcb63b 1485 if (isDIGIT(*d)) {
e759cc13
RGS
1486 while (isDIGIT(*d) || *d == '_' || *d == '.')
1487 d++;
5db06880
NC
1488#ifdef PERL_MAD
1489 if (PL_madskills) {
cd81e915 1490 start_force(PL_curforce);
5db06880
NC
1491 curmad('X', newSVpvn(s,d-s));
1492 }
1493#endif
9f3d182e 1494 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1495 SV *ver;
b73d6f50 1496 s = scan_num(s, &yylval);
89bfa8cd 1497 version = yylval.opval;
dd629d5b
GS
1498 ver = cSVOPx(version)->op_sv;
1499 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1500 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1501 SvNV_set(ver, str_to_version(ver));
1571675a 1502 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1503 }
89bfa8cd 1504 }
5db06880
NC
1505 else if (guessing) {
1506#ifdef PERL_MAD
1507 if (PL_madskills) {
cd81e915
NC
1508 sv_free(PL_nextwhite); /* let next token collect whitespace */
1509 PL_nextwhite = 0;
5db06880
NC
1510 s = SvPVX(PL_linestr) + startoff;
1511 }
1512#endif
e759cc13 1513 return s;
5db06880 1514 }
89bfa8cd
PP
1515 }
1516
5db06880
NC
1517#ifdef PERL_MAD
1518 if (PL_madskills && !version) {
cd81e915
NC
1519 sv_free(PL_nextwhite); /* let next token collect whitespace */
1520 PL_nextwhite = 0;
5db06880
NC
1521 s = SvPVX(PL_linestr) + startoff;
1522 }
1523#endif
89bfa8cd 1524 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1525 start_force(PL_curforce);
9ded7720 1526 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1527 force_next(WORD);
89bfa8cd 1528
e759cc13 1529 return s;
89bfa8cd
PP
1530}
1531
ffb4593c
NT
1532/*
1533 * S_tokeq
1534 * Tokenize a quoted string passed in as an SV. It finds the next
1535 * chunk, up to end of string or a backslash. It may make a new
1536 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1537 * turns \\ into \.
1538 */
1539
76e3520e 1540STATIC SV *
cea2e8a9 1541S_tokeq(pTHX_ SV *sv)
79072805 1542{
97aff369 1543 dVAR;
79072805
LW
1544 register char *s;
1545 register char *send;
1546 register char *d;
b3ac6de7
IZ
1547 STRLEN len = 0;
1548 SV *pv = sv;
79072805
LW
1549
1550 if (!SvLEN(sv))
b3ac6de7 1551 goto finish;
79072805 1552
a0d0e21e 1553 s = SvPV_force(sv, len);
21a311ee 1554 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1555 goto finish;
463ee0b2 1556 send = s + len;
79072805
LW
1557 while (s < send && *s != '\\')
1558 s++;
1559 if (s == send)
b3ac6de7 1560 goto finish;
79072805 1561 d = s;
be4731d2 1562 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1563 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1564 if (SvUTF8(sv))
1565 SvUTF8_on(pv);
1566 }
79072805
LW
1567 while (s < send) {
1568 if (*s == '\\') {
a0d0e21e 1569 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1570 s++; /* all that, just for this */
1571 }
1572 *d++ = *s++;
1573 }
1574 *d = '\0';
95a20fc0 1575 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1576 finish:
3280af22 1577 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1578 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1579 return sv;
1580}
1581
ffb4593c
NT
1582/*
1583 * Now come three functions related to double-quote context,
1584 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1585 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1586 * interact with PL_lex_state, and create fake ( ... ) argument lists
1587 * to handle functions and concatenation.
1588 * They assume that whoever calls them will be setting up a fake
1589 * join call, because each subthing puts a ',' after it. This lets
1590 * "lower \luPpEr"
1591 * become
1592 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1593 *
1594 * (I'm not sure whether the spurious commas at the end of lcfirst's
1595 * arguments and join's arguments are created or not).
1596 */
1597
1598/*
1599 * S_sublex_start
1600 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1601 *
1602 * Pattern matching will set PL_lex_op to the pattern-matching op to
1603 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1604 *
1605 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1606 *
1607 * Everything else becomes a FUNC.
1608 *
1609 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1610 * had an OP_CONST or OP_READLINE). This just sets us up for a
1611 * call to S_sublex_push().
1612 */
1613
76e3520e 1614STATIC I32
cea2e8a9 1615S_sublex_start(pTHX)
79072805 1616{
97aff369 1617 dVAR;
0d46e09a 1618 register const I32 op_type = yylval.ival;
79072805
LW
1619
1620 if (op_type == OP_NULL) {
3280af22 1621 yylval.opval = PL_lex_op;
5f66b61c 1622 PL_lex_op = NULL;
79072805
LW
1623 return THING;
1624 }
1625 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1626 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1627
1628 if (SvTYPE(sv) == SVt_PVIV) {
1629 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1630 STRLEN len;
96a5add6 1631 const char * const p = SvPV_const(sv, len);
f54cb97a 1632 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1633 if (SvUTF8(sv))
1634 SvUTF8_on(nsv);
b3ac6de7
IZ
1635 SvREFCNT_dec(sv);
1636 sv = nsv;
4e553d73 1637 }
b3ac6de7 1638 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1639 PL_lex_stuff = NULL;
6f33ba73
RGS
1640 /* Allow <FH> // "foo" */
1641 if (op_type == OP_READLINE)
1642 PL_expect = XTERMORDORDOR;
79072805
LW
1643 return THING;
1644 }
e3f73d4e
RGS
1645 else if (op_type == OP_BACKTICK && PL_lex_op) {
1646 /* readpipe() vas overriden */
1647 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1648 yylval.opval = PL_lex_op;
9b201d7d 1649 PL_lex_op = NULL;
e3f73d4e
RGS
1650 PL_lex_stuff = NULL;
1651 return THING;
1652 }
79072805 1653
3280af22 1654 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1655 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1656 PL_sublex_info.sub_op = PL_lex_op;
1657 PL_lex_state = LEX_INTERPPUSH;
55497cff 1658
3280af22
NIS
1659 PL_expect = XTERM;
1660 if (PL_lex_op) {
1661 yylval.opval = PL_lex_op;
5f66b61c 1662 PL_lex_op = NULL;
55497cff
PP
1663 return PMFUNC;
1664 }
1665 else
1666 return FUNC;
1667}
1668
ffb4593c
NT
1669/*
1670 * S_sublex_push
1671 * Create a new scope to save the lexing state. The scope will be
1672 * ended in S_sublex_done. Returns a '(', starting the function arguments
1673 * to the uc, lc, etc. found before.
1674 * Sets PL_lex_state to LEX_INTERPCONCAT.
1675 */
1676
76e3520e 1677STATIC I32
cea2e8a9 1678S_sublex_push(pTHX)
55497cff 1679{
27da23d5 1680 dVAR;
f46d017c 1681 ENTER;
55497cff 1682
3280af22 1683 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1684 SAVEBOOL(PL_lex_dojoin);
3280af22 1685 SAVEI32(PL_lex_brackets);
3280af22
NIS
1686 SAVEI32(PL_lex_casemods);
1687 SAVEI32(PL_lex_starts);
651b5b28 1688 SAVEI8(PL_lex_state);
7766f137 1689 SAVEVPTR(PL_lex_inpat);
98246f1e 1690 SAVEI16(PL_lex_inwhat);
57843af0 1691 SAVECOPLINE(PL_curcop);
3280af22 1692 SAVEPPTR(PL_bufptr);
8452ff4b 1693 SAVEPPTR(PL_bufend);
3280af22
NIS
1694 SAVEPPTR(PL_oldbufptr);
1695 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1696 SAVEPPTR(PL_last_lop);
1697 SAVEPPTR(PL_last_uni);
3280af22
NIS
1698 SAVEPPTR(PL_linestart);
1699 SAVESPTR(PL_linestr);
8edd5f42
RGS
1700 SAVEGENERICPV(PL_lex_brackstack);
1701 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1702
1703 PL_linestr = PL_lex_stuff;
a0714e2c 1704 PL_lex_stuff = NULL;
3280af22 1705
9cbb5ea2
GS
1706 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1707 = SvPVX(PL_linestr);
3280af22 1708 PL_bufend += SvCUR(PL_linestr);
bd61b366 1709 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1710 SAVEFREESV(PL_linestr);
1711
1712 PL_lex_dojoin = FALSE;
1713 PL_lex_brackets = 0;
a02a5408
JC
1714 Newx(PL_lex_brackstack, 120, char);
1715 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1716 PL_lex_casemods = 0;
1717 *PL_lex_casestack = '\0';
1718 PL_lex_starts = 0;
1719 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1720 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1721
1722 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1723 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1724 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1725 else
5f66b61c 1726 PL_lex_inpat = NULL;
79072805 1727
55497cff 1728 return '(';
79072805
LW
1729}
1730
ffb4593c
NT
1731/*
1732 * S_sublex_done
1733 * Restores lexer state after a S_sublex_push.
1734 */
1735
76e3520e 1736STATIC I32
cea2e8a9 1737S_sublex_done(pTHX)
79072805 1738{
27da23d5 1739 dVAR;
3280af22 1740 if (!PL_lex_starts++) {
396482e1 1741 SV * const sv = newSVpvs("");
9aa983d2
JH
1742 if (SvUTF8(PL_linestr))
1743 SvUTF8_on(sv);
3280af22 1744 PL_expect = XOPERATOR;
9aa983d2 1745 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1746 return THING;
1747 }
1748
3280af22
NIS
1749 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1750 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1751 return yylex();
79072805
LW
1752 }
1753
ffb4593c 1754 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1755 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1756 PL_linestr = PL_lex_repl;
1757 PL_lex_inpat = 0;
1758 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1759 PL_bufend += SvCUR(PL_linestr);
bd61b366 1760 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1761 SAVEFREESV(PL_linestr);
1762 PL_lex_dojoin = FALSE;
1763 PL_lex_brackets = 0;
3280af22
NIS
1764 PL_lex_casemods = 0;
1765 *PL_lex_casestack = '\0';
1766 PL_lex_starts = 0;
25da4f38 1767 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1768 PL_lex_state = LEX_INTERPNORMAL;
1769 PL_lex_starts++;
e9fa98b2
HS
1770 /* we don't clear PL_lex_repl here, so that we can check later
1771 whether this is an evalled subst; that means we rely on the
1772 logic to ensure sublex_done() is called again only via the
1773 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1774 }
e9fa98b2 1775 else {
3280af22 1776 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1777 PL_lex_repl = NULL;
e9fa98b2 1778 }
79072805 1779 return ',';
ffed7fef
LW
1780 }
1781 else {
5db06880
NC
1782#ifdef PERL_MAD
1783 if (PL_madskills) {
cd81e915
NC
1784 if (PL_thiswhite) {
1785 if (!PL_endwhite)
6b29d1f5 1786 PL_endwhite = newSVpvs("");
cd81e915
NC
1787 sv_catsv(PL_endwhite, PL_thiswhite);
1788 PL_thiswhite = 0;
1789 }
1790 if (PL_thistoken)
1791 sv_setpvn(PL_thistoken,"",0);
5db06880 1792 else
cd81e915 1793 PL_realtokenstart = -1;
5db06880
NC
1794 }
1795#endif
f46d017c 1796 LEAVE;
3280af22
NIS
1797 PL_bufend = SvPVX(PL_linestr);
1798 PL_bufend += SvCUR(PL_linestr);
1799 PL_expect = XOPERATOR;
09bef843 1800 PL_sublex_info.sub_inwhat = 0;
79072805 1801 return ')';
ffed7fef
LW
1802 }
1803}
1804
02aa26ce
NT
1805/*
1806 scan_const
1807
1808 Extracts a pattern, double-quoted string, or transliteration. This
1809 is terrifying code.
1810
94def140 1811 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1812 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1813 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1814
94def140
ST
1815 Returns a pointer to the character scanned up to. If this is
1816 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1817 successfully parsed), will leave an OP for the substring scanned
1818 in yylval. Caller must intuit reason for not parsing further
1819 by looking at the next characters herself.
1820
02aa26ce
NT
1821 In patterns:
1822 backslashes:
1823 double-quoted style: \r and \n
1824 regexp special ones: \D \s
94def140
ST
1825 constants: \x31
1826 backrefs: \1
02aa26ce
NT
1827 case and quoting: \U \Q \E
1828 stops on @ and $, but not for $ as tail anchor
1829
1830 In transliterations:
1831 characters are VERY literal, except for - not at the start or end
94def140
ST
1832 of the string, which indicates a range. If the range is in bytes,
1833 scan_const expands the range to the full set of intermediate
1834 characters. If the range is in utf8, the hyphen is replaced with
1835 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1836
1837 In double-quoted strings:
1838 backslashes:
1839 double-quoted style: \r and \n
94def140
ST
1840 constants: \x31
1841 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1842 case and quoting: \U \Q \E
1843 stops on @ and $
1844
1845 scan_const does *not* construct ops to handle interpolated strings.
1846 It stops processing as soon as it finds an embedded $ or @ variable
1847 and leaves it to the caller to work out what's going on.
1848
94def140
ST
1849 embedded arrays (whether in pattern or not) could be:
1850 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1851
1852 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1853
1854 $ in pattern could be $foo or could be tail anchor. Assumption:
1855 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1856 followed by one of "()| \r\n\t"
02aa26ce
NT
1857
1858 \1 (backreferences) are turned into $1
1859
1860 The structure of the code is
1861 while (there's a character to process) {
94def140
ST
1862 handle transliteration ranges
1863 skip regexp comments /(?#comment)/ and codes /(?{code})/
1864 skip #-initiated comments in //x patterns
1865 check for embedded arrays
02aa26ce
NT
1866 check for embedded scalars
1867 if (backslash) {
94def140
ST
1868 leave intact backslashes from leaveit (below)
1869 deprecate \1 in substitution replacements
02aa26ce
NT
1870 handle string-changing backslashes \l \U \Q \E, etc.
1871 switch (what was escaped) {
94def140
ST
1872 handle \- in a transliteration (becomes a literal -)
1873 handle \132 (octal characters)
1874 handle \x15 and \x{1234} (hex characters)
1875 handle \N{name} (named characters)
1876 handle \cV (control characters)
1877 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1878 } (end switch)
1879 } (end if backslash)
1880 } (end while character to read)
4e553d73 1881
02aa26ce
NT
1882*/
1883
76e3520e 1884STATIC char *
cea2e8a9 1885S_scan_const(pTHX_ char *start)
79072805 1886{
97aff369 1887 dVAR;
3280af22 1888 register char *send = PL_bufend; /* end of the constant */
561b68a9 1889 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1890 register char *s = start; /* start of the constant */
1891 register char *d = SvPVX(sv); /* destination for copies */
1892 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1893 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1894 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1895 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1896 UV uv;
4c3a8340
ST
1897#ifdef EBCDIC
1898 UV literal_endpoint = 0;
e294cc5d 1899 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1900#endif
012bcf8d 1901
2b9d42f0
NIS
1902 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1903 /* If we are doing a trans and we know we want UTF8 set expectation */
1904 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1905 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1906 }
1907
1908
79072805 1909 while (s < send || dorange) {
02aa26ce 1910 /* get transliterations out of the way (they're most literal) */
3280af22 1911 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1912 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1913 if (dorange) {
1ba5c669
JH
1914 I32 i; /* current expanded character */
1915 I32 min; /* first character in range */
1916 I32 max; /* last character in range */
02aa26ce 1917
e294cc5d
JH
1918#ifdef EBCDIC
1919 UV uvmax = 0;
1920#endif
1921
1922 if (has_utf8
1923#ifdef EBCDIC
1924 && !native_range
1925#endif
1926 ) {
9d4ba2ae 1927 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1928 char *e = d++;
1929 while (e-- > c)
1930 *(e + 1) = *e;
25716404 1931 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1932 /* mark the range as done, and continue */
1933 dorange = FALSE;
1934 didrange = TRUE;
1935 continue;
1936 }
2b9d42f0 1937
95a20fc0 1938 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1939#ifdef EBCDIC
1940 SvGROW(sv,
1941 SvLEN(sv) + (has_utf8 ?
1942 (512 - UTF_CONTINUATION_MARK +
1943 UNISKIP(0x100))
1944 : 256));
1945 /* How many two-byte within 0..255: 128 in UTF-8,
1946 * 96 in UTF-8-mod. */
1947#else
9cbb5ea2 1948 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1949#endif
9cbb5ea2 1950 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1951#ifdef EBCDIC
1952 if (has_utf8) {
1953 int j;
1954 for (j = 0; j <= 1; j++) {
1955 char * const c = (char*)utf8_hop((U8*)d, -1);
1956 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1957 if (j)
1958 min = (U8)uv;
1959 else if (uv < 256)
1960 max = (U8)uv;
1961 else {
1962 max = (U8)0xff; /* only to \xff */
1963 uvmax = uv; /* \x{100} to uvmax */
1964 }
1965 d = c; /* eat endpoint chars */
1966 }
1967 }
1968 else {
1969#endif
1970 d -= 2; /* eat the first char and the - */
1971 min = (U8)*d; /* first char in range */
1972 max = (U8)d[1]; /* last char in range */
1973#ifdef EBCDIC
1974 }
1975#endif
8ada0baa 1976
c2e66d9e 1977 if (min > max) {
01ec43d0 1978 Perl_croak(aTHX_
d1573ac7 1979 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1980 (char)min, (char)max);
c2e66d9e
GS
1981 }
1982
c7f1f016 1983#ifdef EBCDIC
4c3a8340
ST
1984 if (literal_endpoint == 2 &&
1985 ((isLOWER(min) && isLOWER(max)) ||
1986 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1987 if (isLOWER(min)) {
1988 for (i = min; i <= max; i++)
1989 if (isLOWER(i))
db42d148 1990 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1991 } else {
1992 for (i = min; i <= max; i++)
1993 if (isUPPER(i))
db42d148 1994 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1995 }
1996 }
1997 else
1998#endif
1999 for (i = min; i <= max; i++)
e294cc5d
JH
2000#ifdef EBCDIC
2001 if (has_utf8) {
2002 const U8 ch = (U8)NATIVE_TO_UTF(i);
2003 if (UNI_IS_INVARIANT(ch))
2004 *d++ = (U8)i;
2005 else {
2006 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2007 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2008 }
2009 }
2010 else
2011#endif
2012 *d++ = (char)i;
2013
2014#ifdef EBCDIC
2015 if (uvmax) {
2016 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2017 if (uvmax > 0x101)
2018 *d++ = (char)UTF_TO_NATIVE(0xff);
2019 if (uvmax > 0x100)
2020 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2021 }
2022#endif
02aa26ce
NT
2023
2024 /* mark the range as done, and continue */
79072805 2025 dorange = FALSE;
01ec43d0 2026 didrange = TRUE;
4c3a8340
ST
2027#ifdef EBCDIC
2028 literal_endpoint = 0;
2029#endif
79072805 2030 continue;
4e553d73 2031 }
02aa26ce
NT
2032
2033 /* range begins (ignore - as first or last char) */
79072805 2034 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2035 if (didrange) {
1fafa243 2036 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2037 }
e294cc5d
JH
2038 if (has_utf8
2039#ifdef EBCDIC
2040 && !native_range
2041#endif
2042 ) {
25716404 2043 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2044 s++;
2045 continue;
2046 }
79072805
LW
2047 dorange = TRUE;
2048 s++;
01ec43d0
GS
2049 }
2050 else {
2051 didrange = FALSE;
4c3a8340
ST
2052#ifdef EBCDIC
2053 literal_endpoint = 0;
e294cc5d 2054 native_range = TRUE;
4c3a8340 2055#endif
01ec43d0 2056 }
79072805 2057 }
02aa26ce
NT
2058
2059 /* if we get here, we're not doing a transliteration */
2060
0f5d15d6
IZ
2061 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2062 except for the last char, which will be done separately. */
3280af22 2063 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2064 if (s[2] == '#') {
e994fd66 2065 while (s+1 < send && *s != ')')
db42d148 2066 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2067 }
2068 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2069 || (s[2] == '?' && s[3] == '{'))
155aba94 2070 {
cc6b7395 2071 I32 count = 1;
0f5d15d6 2072 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2073 char c;
2074
d9f97599
GS
2075 while (count && (c = *regparse)) {
2076 if (c == '\\' && regparse[1])
2077 regparse++;
4e553d73 2078 else if (c == '{')
cc6b7395 2079 count++;
4e553d73 2080 else if (c == '}')
cc6b7395 2081 count--;
d9f97599 2082 regparse++;
cc6b7395 2083 }
e994fd66 2084 if (*regparse != ')')
5bdf89e7 2085 regparse--; /* Leave one char for continuation. */
0f5d15d6 2086 while (s < regparse)
db42d148 2087 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2088 }
748a9306 2089 }
02aa26ce
NT
2090
2091 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2092 else if (*s == '#' && PL_lex_inpat &&
2093 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2094 while (s+1 < send && *s != '\n')
db42d148 2095 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2096 }
02aa26ce 2097
5d1d4326 2098 /* check for embedded arrays
da6eedaa 2099 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2100 */
1749ea0d
ST
2101 else if (*s == '@' && s[1]) {
2102 if (isALNUM_lazy_if(s+1,UTF))
2103 break;
2104 if (strchr(":'{$", s[1]))
2105 break;
2106 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2107 break; /* in regexp, neither @+ nor @- are interpolated */
2108 }
02aa26ce
NT
2109
2110 /* check for embedded scalars. only stop if we're sure it's a
2111 variable.
2112 */
79072805 2113 else if (*s == '$') {
3280af22 2114 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2115 break;
6002328a 2116 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2117 break; /* in regexp, $ might be tail anchor */
2118 }
02aa26ce 2119
2b9d42f0
NIS
2120 /* End of else if chain - OP_TRANS rejoin rest */
2121
02aa26ce 2122 /* backslashes */
79072805
LW
2123 if (*s == '\\' && s+1 < send) {
2124 s++;
02aa26ce 2125
02aa26ce 2126 /* deprecate \1 in strings and substitution replacements */
3280af22 2127 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2128 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2129 {
599cee73 2130 if (ckWARN(WARN_SYNTAX))
9014280d 2131 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2132 *--s = '$';
2133 break;
2134 }
02aa26ce
NT
2135
2136 /* string-change backslash escapes */
3280af22 2137 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2138 --s;
2139 break;
2140 }
cc74c5bd
ST
2141 /* skip any other backslash escapes in a pattern */
2142 else if (PL_lex_inpat) {
2143 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2144 goto default_action;
2145 }
02aa26ce
NT
2146
2147 /* if we get here, it's either a quoted -, or a digit */
79072805 2148 switch (*s) {
02aa26ce
NT
2149
2150 /* quoted - in transliterations */
79072805 2151 case '-':
3280af22 2152 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2153 *d++ = *s++;
2154 continue;
2155 }
2156 /* FALL THROUGH */
2157 default:
11b8faa4 2158 {
86f97054 2159 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2160 ckWARN(WARN_MISC))
9014280d 2161 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2162 "Unrecognized escape \\%c passed through",
2163 *s);
11b8faa4 2164 /* default action is to copy the quoted character */
f9a63242 2165 goto default_action;
11b8faa4 2166 }
02aa26ce
NT
2167
2168 /* \132 indicates an octal constant */
79072805
LW
2169 case '0': case '1': case '2': case '3':
2170 case '4': case '5': case '6': case '7':
ba210ebe 2171 {
53305cf1
NC
2172 I32 flags = 0;
2173 STRLEN len = 3;
2174 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2175 s += len;
2176 }
012bcf8d 2177 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2178
2179 /* \x24 indicates a hex constant */
79072805 2180 case 'x':
a0ed51b3
LW
2181 ++s;
2182 if (*s == '{') {
9d4ba2ae 2183 char* const e = strchr(s, '}');
a4c04bdc
NC
2184 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2185 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2186 STRLEN len;
355860ce 2187
53305cf1 2188 ++s;
adaeee49 2189 if (!e) {
a0ed51b3 2190 yyerror("Missing right brace on \\x{}");
355860ce 2191 continue;
ba210ebe 2192 }
53305cf1
NC
2193 len = e - s;
2194 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2195 s = e + 1;
a0ed51b3
LW
2196 }
2197 else {
ba210ebe 2198 {
53305cf1 2199 STRLEN len = 2;
a4c04bdc 2200 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2201 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2202 s += len;
2203 }
012bcf8d
GS
2204 }
2205
2206 NUM_ESCAPE_INSERT:
2207 /* Insert oct or hex escaped character.
301d3d20 2208 * There will always enough room in sv since such
db42d148 2209 * escapes will be longer than any UTF-8 sequence
301d3d20 2210 * they can end up as. */
ba7cea30 2211
c7f1f016
NIS
2212 /* We need to map to chars to ASCII before doing the tests
2213 to cover EBCDIC
2214 */
c4d5f83a 2215 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2216 if (!has_utf8 && uv > 255) {
301d3d20
JH
2217 /* Might need to recode whatever we have
2218 * accumulated so far if it contains any
2219 * hibit chars.
2220 *
2221 * (Can't we keep track of that and avoid
2222 * this rescan? --jhi)
012bcf8d 2223 */
c7f1f016 2224 int hicount = 0;
63cd0674
NIS
2225 U8 *c;
2226 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2227 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2228 hicount++;
db42d148 2229 }
012bcf8d 2230 }
63cd0674 2231 if (hicount) {
9d4ba2ae 2232 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2233 U8 *src, *dst;
2234 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2235 src = (U8 *)d - 1;
2236 dst = src+hicount;
2237 d += hicount;
cfd0369c 2238 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2239 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2240 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2241 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2242 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2243 }
2244 else {
63cd0674 2245 *dst-- = *src;
012bcf8d 2246 }
c7f1f016 2247 src--;
012bcf8d
GS
2248 }
2249 }
2250 }
2251
9aa983d2 2252 if (has_utf8 || uv > 255) {
9041c2e3 2253 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2254 has_utf8 = TRUE;
f9a63242
JH
2255 if (PL_lex_inwhat == OP_TRANS &&
2256 PL_sublex_info.sub_op) {
2257 PL_sublex_info.sub_op->op_private |=
2258 (PL_lex_repl ? OPpTRANS_FROM_UTF
2259 : OPpTRANS_TO_UTF);
f9a63242 2260 }
e294cc5d
JH
2261#ifdef EBCDIC
2262 if (uv > 255 && !dorange)
2263 native_range = FALSE;
2264#endif
012bcf8d 2265 }
a0ed51b3 2266 else {
012bcf8d 2267 *d++ = (char)uv;
a0ed51b3 2268 }
012bcf8d
GS
2269 }
2270 else {
c4d5f83a 2271 *d++ = (char) uv;
a0ed51b3 2272 }
79072805 2273 continue;
02aa26ce 2274
b239daa5 2275 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2276 case 'N':
55eda711 2277 ++s;
423cee85
JH
2278 if (*s == '{') {
2279 char* e = strchr(s, '}');
155aba94 2280 SV *res;
423cee85 2281 STRLEN len;
cfd0369c 2282 const char *str;
4e553d73 2283
423cee85 2284 if (!e) {
5777a3f7 2285 yyerror("Missing right brace on \\N{}");
423cee85
JH
2286 e = s - 1;
2287 goto cont_scan;
2288 }
dbc0d4f2
JH
2289 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2290 /* \N{U+...} */
2291 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2292 PERL_SCAN_DISALLOW_PREFIX;
2293 s += 3;
2294 len = e - s;
2295 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2296 if ( e > s && len != (STRLEN)(e - s) ) {
2297 uv = 0xFFFD;
fc8cd66c 2298 }
dbc0d4f2
JH
2299 s = e + 1;
2300 goto NUM_ESCAPE_INSERT;
2301 }
55eda711 2302 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2303 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2304 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2305 if (has_utf8)
2306 sv_utf8_upgrade(res);
cfd0369c 2307 str = SvPV_const(res,len);
1c47067b
JH
2308#ifdef EBCDIC_NEVER_MIND
2309 /* charnames uses pack U and that has been
2310 * recently changed to do the below uni->native
2311 * mapping, so this would be redundant (and wrong,
2312 * the code point would be doubly converted).
2313 * But leave this in just in case the pack U change
2314 * gets revoked, but the semantics is still
2315 * desireable for charnames. --jhi */
cddc7ef4 2316 {
cfd0369c 2317 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2318
2319 if (uv < 0x100) {
89ebb4a3 2320 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2321
2322 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2323 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2324 str = SvPV_const(res, len);
cddc7ef4
JH
2325 }
2326 }
2327#endif
89491803 2328 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2329 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2330 SvCUR_set(sv, d - ostart);
2331 SvPOK_on(sv);
e4f3eed8 2332 *d = '\0';
f08d6ad9 2333 sv_utf8_upgrade(sv);
d2f449dd 2334 /* this just broke our allocation above... */
eb160463 2335 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2336 d = SvPVX(sv) + SvCUR(sv);
89491803 2337 has_utf8 = TRUE;
f08d6ad9 2338 }
eb160463 2339 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2340 const char * const odest = SvPVX_const(sv);
423cee85 2341
8973db79 2342 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2343 d = SvPVX(sv) + (d - odest);
2344 }
e294cc5d
JH
2345#ifdef EBCDIC
2346 if (!dorange)
2347 native_range = FALSE; /* \N{} is guessed to be Unicode */
2348#endif
423cee85
JH
2349 Copy(str, d, len, char);
2350 d += len;
2351 SvREFCNT_dec(res);
2352 cont_scan:
2353 s = e + 1;
2354 }
2355 else
5777a3f7 2356 yyerror("Missing braces on \\N{}");
423cee85
JH
2357 continue;
2358
02aa26ce 2359 /* \c is a control character */
79072805
LW
2360 case 'c':
2361 s++;
961ce445 2362 if (s < send) {
ba210ebe 2363 U8 c = *s++;
c7f1f016
NIS
2364#ifdef EBCDIC
2365 if (isLOWER(c))
2366 c = toUPPER(c);
2367#endif
db42d148 2368 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2369 }
961ce445
RGS
2370 else {
2371 yyerror("Missing control char name in \\c");
2372 }
79072805 2373 continue;
02aa26ce
NT
2374
2375 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2376 case 'b':
db42d148 2377 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2378 break;
2379 case 'n':
db42d148 2380 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2381 break;
2382 case 'r':
db42d148 2383 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2384 break;
2385 case 'f':
db42d148 2386 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2387 break;
2388 case 't':
db42d148 2389 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2390 break;
34a3fe2a 2391 case 'e':
db42d148 2392 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2393 break;
2394 case 'a':
db42d148 2395 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2396 break;
02aa26ce
NT
2397 } /* end switch */
2398
79072805
LW
2399 s++;
2400 continue;
02aa26ce 2401 } /* end if (backslash) */
4c3a8340
ST
2402#ifdef EBCDIC
2403 else
2404 literal_endpoint++;
2405#endif
02aa26ce 2406
f9a63242 2407 default_action:
2b9d42f0
NIS
2408 /* If we started with encoded form, or already know we want it
2409 and then encode the next character */
2410 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2411 STRLEN len = 1;
5f66b61c
AL
2412 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2413 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2414 s += len;
2415 if (need > len) {
2416 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2417 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2418 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2419 }
5f66b61c 2420 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2421 has_utf8 = TRUE;
e294cc5d
JH
2422#ifdef EBCDIC
2423 if (uv > 255 && !dorange)
2424 native_range = FALSE;
2425#endif
2b9d42f0
NIS
2426 }
2427 else {
2428 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2429 }
02aa26ce
NT
2430 } /* while loop to process each character */
2431
2432 /* terminate the string and set up the sv */
79072805 2433 *d = '\0';
95a20fc0 2434 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2435 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2436 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2437
79072805 2438 SvPOK_on(sv);
9f4817db 2439 if (PL_encoding && !has_utf8) {
d0063567
DK
2440 sv_recode_to_utf8(sv, PL_encoding);
2441 if (SvUTF8(sv))
2442 has_utf8 = TRUE;
9f4817db 2443 }
2b9d42f0 2444 if (has_utf8) {
7e2040f0 2445 SvUTF8_on(sv);
2b9d42f0 2446 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2447 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2448 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2449 }
2450 }
79072805 2451
02aa26ce 2452 /* shrink the sv if we allocated more than we used */
79072805 2453 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2454 SvPV_shrink_to_cur(sv);
79072805 2455 }
02aa26ce 2456
9b599b2a 2457 /* return the substring (via yylval) only if we parsed anything */
3280af22 2458 if (s > PL_bufptr) {
eb0d8d16
NC
2459 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2460 const char *const key = PL_lex_inpat ? "qr" : "q";
2461 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2462 const char *type;
2463 STRLEN typelen;
2464
2465 if (PL_lex_inwhat == OP_TRANS) {
2466 type = "tr";
2467 typelen = 2;
2468 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2469 type = "s";
2470 typelen = 1;
2471 } else {
2472 type = "qq";
2473 typelen = 2;
2474 }
2475
2476 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2477 type, typelen);
2478 }
79072805 2479 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2480 } else
8990e307 2481 SvREFCNT_dec(sv);
79072805
LW
2482 return s;
2483}
2484
ffb4593c
NT
2485/* S_intuit_more
2486 * Returns TRUE if there's more to the expression (e.g., a subscript),
2487 * FALSE otherwise.
ffb4593c
NT
2488 *
2489 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2490 *
2491 * ->[ and ->{ return TRUE
2492 * { and [ outside a pattern are always subscripts, so return TRUE
2493 * if we're outside a pattern and it's not { or [, then return FALSE
2494 * if we're in a pattern and the first char is a {
2495 * {4,5} (any digits around the comma) returns FALSE
2496 * if we're in a pattern and the first char is a [
2497 * [] returns FALSE
2498 * [SOMETHING] has a funky algorithm to decide whether it's a
2499 * character class or not. It has to deal with things like
2500 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2501 * anything else returns TRUE
2502 */
2503
9cbb5ea2
GS
2504/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2505
76e3520e 2506STATIC int
cea2e8a9 2507S_intuit_more(pTHX_ register char *s)
79072805 2508{
97aff369 2509 dVAR;
3280af22 2510 if (PL_lex_brackets)
79072805
LW
2511 return TRUE;
2512 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2513 return TRUE;
2514 if (*s != '{' && *s != '[')
2515 return FALSE;
3280af22 2516 if (!PL_lex_inpat)
79072805
LW
2517 return TRUE;
2518
2519 /* In a pattern, so maybe we have {n,m}. */
2520 if (*s == '{') {
2521 s++;
2522 if (!isDIGIT(*s))
2523 return TRUE;
2524 while (isDIGIT(*s))
2525 s++;
2526 if (*s == ',')
2527 s++;
2528 while (isDIGIT(*s))
2529 s++;
2530 if (*s == '}')
2531 return FALSE;
2532 return TRUE;
2533
2534 }
2535
2536 /* On the other hand, maybe we have a character class */
2537
2538 s++;
2539 if (*s == ']' || *s == '^')
2540 return FALSE;
2541 else {
ffb4593c 2542 /* this is terrifying, and it works */
79072805
LW
2543 int weight = 2; /* let's weigh the evidence */
2544 char seen[256];
f27ffc4a 2545 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2546 const char * const send = strchr(s,']');
3280af22 2547 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2548
2549 if (!send) /* has to be an expression */
2550 return TRUE;
2551
2552 Zero(seen,256,char);
2553 if (*s == '$')
2554 weight -= 3;
2555 else if (isDIGIT(*s)) {
2556 if (s[1] != ']') {
2557 if (isDIGIT(s[1]) && s[2] == ']')
2558 weight -= 10;
2559 }
2560 else
2561 weight -= 100;
2562 }
2563 for (; s < send; s++) {
2564 last_un_char = un_char;
2565 un_char = (unsigned char)*s;
2566 switch (*s) {
2567 case '@':
2568 case '&':
2569 case '$':
2570 weight -= seen[un_char] * 10;
7e2040f0 2571 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2572 int len;
8903cb82 2573 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2574 len = (int)strlen(tmpbuf);
2575 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2576 weight -= 100;
2577 else
2578 weight -= 10;
2579 }
2580 else if (*s == '$' && s[1] &&
93a17b20
LW
2581 strchr("[#!%*<>()-=",s[1])) {
2582 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2583 weight -= 10;
2584 else
2585 weight -= 1;
2586 }
2587 break;
2588 case '\\':
2589 un_char = 254;
2590 if (s[1]) {
93a17b20 2591 if (strchr("wds]",s[1]))
79072805 2592 weight += 100;
10edeb5d 2593 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2594 weight += 1;
93a17b20 2595 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2596 weight += 40;
2597 else if (isDIGIT(s[1])) {
2598 weight += 40;
2599 while (s[1] && isDIGIT(s[1]))
2600 s++;
2601 }
2602 }
2603 else
2604 weight += 100;
2605 break;
2606 case '-':
2607 if (s[1] == '\\')
2608 weight += 50;
93a17b20 2609 if (strchr("aA01! ",last_un_char))
79072805 2610 weight += 30;
93a17b20 2611 if (strchr("zZ79~",s[1]))
79072805 2612 weight += 30;
f27ffc4a
GS
2613 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2614 weight -= 5; /* cope with negative subscript */
79072805
LW
2615 break;
2616 default:
3792a11b
NC
2617 if (!isALNUM(last_un_char)
2618 && !(last_un_char == '$' || last_un_char == '@'
2619 || last_un_char == '&')
2620 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2621 char *d = tmpbuf;
2622 while (isALPHA(*s))
2623 *d++ = *s++;
2624 *d = '\0';
5458a98a 2625 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2626 weight -= 150;
2627 }
2628 if (un_char == last_un_char + 1)
2629 weight += 5;
2630 weight -= seen[un_char];
2631 break;
2632 }
2633 seen[un_char]++;
2634 }
2635 if (weight >= 0) /* probably a character class */
2636 return FALSE;
2637 }
2638
2639 return TRUE;
2640}
ffed7fef 2641
ffb4593c
NT
2642/*
2643 * S_intuit_method
2644 *
2645 * Does all the checking to disambiguate
2646 * foo bar
2647 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2648 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2649 *
2650 * First argument is the stuff after the first token, e.g. "bar".
2651 *
2652 * Not a method if bar is a filehandle.
2653 * Not a method if foo is a subroutine prototyped to take a filehandle.
2654 * Not a method if it's really "Foo $bar"
2655 * Method if it's "foo $bar"
2656 * Not a method if it's really "print foo $bar"
2657 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2658 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2659 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2660 * =>
2661 */
2662
76e3520e 2663STATIC int
62d55b22 2664S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2665{
97aff369 2666 dVAR;
a0d0e21e 2667 char *s = start + (*start == '$');
3280af22 2668 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2669 STRLEN len;
2670 GV* indirgv;
5db06880
NC
2671#ifdef PERL_MAD
2672 int soff;
2673#endif
a0d0e21e
LW
2674
2675 if (gv) {
62d55b22 2676 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2677 return 0;
62d55b22
NC
2678 if (cv) {
2679 if (SvPOK(cv)) {
2680 const char *proto = SvPVX_const(cv);
2681 if (proto) {
2682 if (*proto == ';')
2683 proto++;
2684 if (*proto == '*')
2685 return 0;
2686 }
b6c543e3
IZ
2687 }
2688 } else
c35e046a 2689 gv = NULL;
a0d0e21e 2690 }
8903cb82 2691 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2692 /* start is the beginning of the possible filehandle/object,
2693 * and s is the end of it
2694 * tmpbuf is a copy of it
2695 */
2696
a0d0e21e 2697 if (*start == '$') {
3ef1310e
RGS
2698 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2699 isUPPER(*PL_tokenbuf))
a0d0e21e 2700 return 0;
5db06880
NC
2701#ifdef PERL_MAD
2702 len = start - SvPVX(PL_linestr);
2703#endif
29595ff2 2704 s = PEEKSPACE(s);
f0092767 2705#ifdef PERL_MAD
5db06880
NC
2706 start = SvPVX(PL_linestr) + len;
2707#endif
3280af22
NIS
2708 PL_bufptr = start;
2709 PL_expect = XREF;
a0d0e21e
LW
2710 return *s == '(' ? FUNCMETH : METHOD;
2711 }
5458a98a 2712 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2713 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2714 len -= 2;
2715 tmpbuf[len] = '\0';
5db06880
NC
2716#ifdef PERL_MAD
2717 soff = s - SvPVX(PL_linestr);
2718#endif
c3e0f903
GS
2719 goto bare_package;
2720 }
90e5519e 2721 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2722 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2723 return 0;
2724 /* filehandle or package name makes it a method */
da51bb9b 2725 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2726#ifdef PERL_MAD
2727 soff = s - SvPVX(PL_linestr);
2728#endif
29595ff2 2729 s = PEEKSPACE(s);
3280af22 2730 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2731 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2732 bare_package:
cd81e915 2733 start_force(PL_curforce);
9ded7720 2734 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2735 newSVpvn(tmpbuf,len));
9ded7720 2736 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2737 if (PL_madskills)
2738 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2739 PL_expect = XTERM;
a0d0e21e 2740 force_next(WORD);
3280af22 2741 PL_bufptr = s;
5db06880
NC
2742#ifdef PERL_MAD
2743 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2744#endif
a0d0e21e
LW
2745 return *s == '(' ? FUNCMETH : METHOD;
2746 }
2747 }
2748 return 0;
2749}
2750
ffb4593c
NT
2751/*
2752 * S_incl_perldb
2753 * Return a string of Perl code to load the debugger. If PERL5DB
2754 * is set, it will return the contents of that, otherwise a
2755 * compile-time require of perl5db.pl.
2756 */
2757
bfed75c6 2758STATIC const char*
cea2e8a9 2759S_incl_perldb(pTHX)
a0d0e21e 2760{
97aff369 2761 dVAR;
3280af22 2762 if (PL_perldb) {
9d4ba2ae 2763 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2764
2765 if (pdb)
2766 return pdb;
93189314 2767 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2768 return "BEGIN { require 'perl5db.pl' }";
2769 }
2770 return "";
2771}
2772
2773
16d20bd9 2774/* Encoded script support. filter_add() effectively inserts a
4e553d73 2775 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2776 * Note that the filter function only applies to the current source file
2777 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2778 *
2779 * The datasv parameter (which may be NULL) can be used to pass
2780 * private data to this instance of the filter. The filter function
2781 * can recover the SV using the FILTER_DATA macro and use it to
2782 * store private buffers and state information.
2783 *
2784 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2785 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2786 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2787 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2788 * private use must be set using malloc'd pointers.
2789 */
16d20bd9
AD
2790
2791SV *
864dbfa3 2792Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2793{
97aff369 2794 dVAR;
f4c556ac 2795 if (!funcp)
a0714e2c 2796 return NULL;
f4c556ac 2797
5486870f
DM
2798 if (!PL_parser)
2799 return NULL;
2800
3280af22
NIS
2801 if (!PL_rsfp_filters)
2802 PL_rsfp_filters = newAV();
16d20bd9 2803 if (!datasv)
561b68a9 2804 datasv = newSV(0);
862a34c6 2805 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2806 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2807 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2808 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2809 FPTR2DPTR(void *, IoANY(datasv)),
2810 SvPV_nolen(datasv)));
3280af22
NIS
2811 av_unshift(PL_rsfp_filters, 1);
2812 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2813 return(datasv);
2814}
4e553d73 2815
16d20bd9
AD
2816
2817/* Delete most recently added instance of this filter function. */
a0d0e21e 2818void
864dbfa3 2819Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2820{
97aff369 2821 dVAR;
e0c19803 2822 SV *datasv;
24801a4b 2823
33073adb 2824#ifdef DEBUGGING
55662e27
JH
2825 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2826 FPTR2DPTR(void*, funcp)));
33073adb 2827#endif
5486870f 2828 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2829 return;
2830 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2831 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2832 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2833 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2834 IoANY(datasv) = (void *)NULL;
3280af22 2835 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2836
16d20bd9
AD
2837 return;
2838 }
2839 /* we need to search for the correct entry and clear it */
cea2e8a9 2840 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2841}
2842
2843
1de9afcd
RGS
2844/* Invoke the idxth filter function for the current rsfp. */
2845/* maxlen 0 = read one text line */
16d20bd9 2846I32
864dbfa3 2847Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2848{
97aff369 2849 dVAR;
16d20bd9
AD
2850 filter_t funcp;
2851 SV *datasv = NULL;
f482118e
NC
2852 /* This API is bad. It should have been using unsigned int for maxlen.
2853 Not sure if we want to change the API, but if not we should sanity
2854 check the value here. */
39cd7a59
NC
2855 const unsigned int correct_length
2856 = maxlen < 0 ?
2857#ifdef PERL_MICRO
2858 0x7FFFFFFF
2859#else
2860 INT_MAX
2861#endif
2862 : maxlen;
e50aee73 2863
5486870f 2864 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2865 return -1;
1de9afcd 2866 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2867 /* Provide a default input filter to make life easy. */
2868 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2869 DEBUG_P(PerlIO_printf(Perl_debug_log,
2870 "filter_read %d: from rsfp\n", idx));
f482118e 2871 if (correct_length) {
16d20bd9
AD
2872 /* Want a block */
2873 int len ;
f54cb97a 2874 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2875
2876 /* ensure buf_sv is large enough */
f482118e
NC
2877 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2878 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2879 correct_length)) <= 0) {
3280af22 2880 if (PerlIO_error(PL_rsfp))
37120919
AD
2881 return -1; /* error */
2882 else
2883 return 0 ; /* end of file */
2884 }
16d20bd9
AD
2885 SvCUR_set(buf_sv, old_len + len) ;
2886 } else {
2887 /* Want a line */
3280af22
NIS
2888 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2889 if (PerlIO_error(PL_rsfp))
37120919
AD
2890 return -1; /* error */
2891 else
2892 return 0 ; /* end of file */
2893 }
16d20bd9
AD
2894 }
2895 return SvCUR(buf_sv);
2896 }
2897 /* Skip this filter slot if filter has been deleted */
1de9afcd 2898 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2899 DEBUG_P(PerlIO_printf(Perl_debug_log,
2900 "filter_read %d: skipped (filter deleted)\n",
2901 idx));
f482118e 2902 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2903 }
2904 /* Get function pointer hidden within datasv */
8141890a 2905 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2906 DEBUG_P(PerlIO_printf(Perl_debug_log,
2907 "filter_read %d: via function %p (%s)\n",
ca0270c4 2908 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2909 /* Call function. The function is expected to */
2910 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2911 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2912 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2913}
2914
76e3520e 2915STATIC char *
cea2e8a9 2916S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2917{
97aff369 2918 dVAR;
c39cd008 2919#ifdef PERL_CR_FILTER
3280af22 2920 if (!PL_rsfp_filters) {
c39cd008 2921 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2922 }
2923#endif
3280af22 2924 if (PL_rsfp_filters) {
55497cff
PP
2925 if (!append)
2926 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2927 if (FILTER_READ(0, sv, 0) > 0)
2928 return ( SvPVX(sv) ) ;
2929 else
bd61b366 2930 return NULL ;
16d20bd9 2931 }
9d116dd7 2932 else
fd049845 2933 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2934}
2935
01ec43d0 2936STATIC HV *
9bde8eb0 2937S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2938{
97aff369 2939 dVAR;
def3634b
GS
2940 GV *gv;
2941
01ec43d0 2942 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2943 return PL_curstash;
2944
2945 if (len > 2 &&
2946 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2947 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2948 {
2949 return GvHV(gv); /* Foo:: */
def3634b
GS
2950 }
2951
2952 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2953 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2954 if (gv && GvCV(gv)) {
2955 SV * const sv = cv_const_sv(GvCV(gv));
2956 if (sv)
9bde8eb0 2957 pkgname = SvPV_const(sv, len);
def3634b
GS
2958 }
2959
9bde8eb0 2960 return gv_stashpvn(pkgname, len, 0);
def3634b 2961}
a0d0e21e 2962
e3f73d4e
RGS
2963/*
2964 * S_readpipe_override
2965 * Check whether readpipe() is overriden, and generates the appropriate
2966 * optree, provided sublex_start() is called afterwards.
2967 */
2968STATIC void
1d51329b 2969S_readpipe_override(pTHX)
e3f73d4e
RGS
2970{
2971 GV **gvp;
2972 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2973 yylval.ival = OP_BACKTICK;
2974 if ((gv_readpipe
2975 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2976 ||
2977 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2978 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2979 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2980 {
2981 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2982 append_elem(OP_LIST,
2983 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2984 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2985 }
e3f73d4e
RGS
2986}
2987
5db06880
NC
2988#ifdef PERL_MAD
2989 /*
2990 * Perl_madlex
2991 * The intent of this yylex wrapper is to minimize the changes to the
2992 * tokener when we aren't interested in collecting madprops. It remains
2993 * to be seen how successful this strategy will be...
2994 */
2995
2996int
2997Perl_madlex(pTHX)
2998{
2999 int optype;
3000 char *s = PL_bufptr;
3001
cd81e915
NC
3002 /* make sure PL_thiswhite is initialized */
3003 PL_thiswhite = 0;
3004 PL_thismad = 0;
5db06880 3005
cd81e915 3006 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3007 if (PL_pending_ident)
3008 return S_pending_ident(aTHX);
3009
3010 /* previous token ate up our whitespace? */
cd81e915
NC
3011 if (!PL_lasttoke && PL_nextwhite) {
3012 PL_thiswhite = PL_nextwhite;
3013 PL_nextwhite = 0;
5db06880
NC
3014 }
3015
3016 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3017 PL_realtokenstart = -1;
3018 PL_thistoken = 0;
5db06880
NC
3019 optype = yylex();
3020 s = PL_bufptr;
cd81e915 3021 assert(PL_curforce < 0);
5db06880 3022
cd81e915
NC
3023 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3024 if (!PL_thistoken) {
3025 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3026 PL_thistoken = newSVpvs("");
5db06880 3027 else {
c35e046a 3028 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3029 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3030 }
3031 }
cd81e915
NC
3032 if (PL_thismad) /* install head */
3033 CURMAD('X', PL_thistoken);
5db06880
NC
3034 }
3035
3036 /* last whitespace of a sublex? */
cd81e915
NC
3037 if (optype == ')' && PL_endwhite) {
3038 CURMAD('X', PL_endwhite);
5db06880
NC
3039 }
3040
cd81e915 3041 if (!PL_thismad) {
5db06880
NC
3042
3043 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3044 if (!PL_thiswhite && !PL_endwhite && !optype) {
3045 sv_free(PL_thistoken);
3046 PL_thistoken = 0;
5db06880
NC
3047 return 0;
3048 }
3049
3050 /* put off final whitespace till peg */
3051 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3052 PL_nextwhite = PL_thiswhite;
3053 PL_thiswhite = 0;
5db06880 3054 }
cd81e915
NC
3055 else if (PL_thisopen) {
3056 CURMAD('q', PL_thisopen);
3057 if (PL_thistoken)
3058 sv_free(PL_thistoken);
3059 PL_thistoken = 0;
5db06880
NC
3060 }
3061 else {
3062 /* Store actual token text as madprop X */
cd81e915 3063 CURMAD('X', PL_thistoken);
5db06880
NC
3064 }
3065
cd81e915 3066 if (PL_thiswhite) {
5db06880 3067 /* add preceding whitespace as madprop _ */
cd81e915 3068 CURMAD('_', PL_thiswhite);
5db06880
NC
3069 }
3070
cd81e915 3071 if (PL_thisstuff) {
5db06880 3072 /* add quoted material as madprop = */
cd81e915 3073 CURMAD('=', PL_thisstuff);
5db06880
NC
3074 }
3075
cd81e915 3076 if (PL_thisclose) {
5db06880 3077 /* add terminating quote as madprop Q */
cd81e915 3078 CURMAD('Q', PL_thisclose);
5db06880
NC
3079 }
3080 }
3081
3082 /* special processing based on optype */
3083
3084 switch (optype) {
3085
3086 /* opval doesn't need a TOKEN since it can already store mp */
3087 case WORD:
3088 case METHOD:
3089 case FUNCMETH:
3090 case THING:
3091 case PMFUNC:
3092 case PRIVATEREF:
3093 case FUNC0SUB:
3094 case UNIOPSUB:
3095 case LSTOPSUB:
3096 if (yylval.opval)
cd81e915
NC
3097 append_madprops(PL_thismad, yylval.opval, 0);
3098 PL_thismad = 0;
5db06880
NC
3099 return optype;
3100
3101 /* fake EOF */
3102 case 0:
3103 optype = PEG;
cd81e915
NC
3104 if (PL_endwhite) {
3105 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3106 PL_endwhite = 0;
5db06880
NC
3107 }
3108 break;
3109
3110 case ']':
3111 case '}':
cd81e915 3112 if (PL_faketokens)
5db06880
NC
3113 break;
3114 /* remember any fake bracket that lexer is about to discard */
3115 if (PL_lex_brackets == 1 &&
3116 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3117 {
3118 s = PL_bufptr;
3119 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3120 s++;
3121 if (*s == '}') {
cd81e915
NC
3122 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3123 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3124 PL_thiswhite = 0;
5db06880
NC
3125 PL_bufptr = s - 1;
3126 break; /* don't bother looking for trailing comment */
3127 }
3128 else
3129 s = PL_bufptr;
3130 }
3131 if (optype == ']')
3132 break;
3133 /* FALLTHROUGH */
3134
3135 /* attach a trailing comment to its statement instead of next token */
3136 case ';':
cd81e915 3137 if (PL_faketokens)
5db06880
NC
3138 break;
3139 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3140 s = PL_bufptr;
3141 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3142 s++;
3143 if (*s == '\n' || *s == '#') {
3144 while (s < PL_bufend && *s != '\n')
3145 s++;
3146 if (s < PL_bufend)
3147 s++;
cd81e915
NC
3148 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3149 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3150 PL_thiswhite = 0;
5db06880
NC
3151 PL_bufptr = s;
3152 }
3153 }
3154 break;
3155
3156 /* pval */
3157 case LABEL:
3158 break;
3159
3160 /* ival */
3161 default:
3162 break;
3163
3164 }
3165
3166 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3167 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3168 PL_thismad = 0;
5db06880
NC
3169 return optype;
3170}
3171#endif
3172
468aa647 3173STATIC char *
cc6ed77d 3174S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3175 dVAR;
468aa647
RGS
3176 if (PL_expect != XSTATE)
3177 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3178 is_use ? "use" : "no"));
29595ff2 3179 s = SKIPSPACE1(s);
468aa647
RGS
3180 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3181 s = force_version(s, TRUE);
29595ff2 3182 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3183 start_force(PL_curforce);
9ded7720 3184 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3185 force_next(WORD);
3186 }
3187 else if (*s == 'v') {
3188 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3189 s = force_version(s, FALSE);
3190 }
3191 }
3192 else {
3193 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3194 s = force_version(s, FALSE);
3195 }
3196 yylval.ival = is_use;
3197 return s;
3198}
748a9306 3199#ifdef DEBUGGING
27da23d5 3200 static const char* const exp_name[] =
09bef843 3201 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3202 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3203 };
748a9306 3204#endif
463ee0b2 3205
02aa26ce
NT
3206/*
3207 yylex
3208
3209 Works out what to call the token just pulled out of the input
3210 stream. The yacc parser takes care of taking the ops we return and
3211 stitching them into a tree.
3212
3213 Returns:
3214 PRIVATEREF
3215
3216 Structure:
3217 if read an identifier
3218 if we're in a my declaration
3219 croak if they tried to say my($foo::bar)
3220 build the ops for a my() declaration
3221 if it's an access to a my() variable
3222 are we in a sort block?
3223 croak if my($a); $a <=> $b
3224 build ops for access to a my() variable
3225 if in a dq string, and they've said @foo and we can't find @foo
3226 croak
3227 build ops for a bareword
3228 if we already built the token before, use it.
3229*/
3230
20141f0e 3231
dba4d153
JH
3232#ifdef __SC__
3233#pragma segment Perl_yylex
3234#endif
dba4d153 3235int
dba4d153 3236Perl_yylex(pTHX)
20141f0e 3237{
97aff369 3238 dVAR;
3afc138a 3239 register char *s = PL_bufptr;
378cc40b 3240 register char *d;
463ee0b2 3241 STRLEN len;
aa7440fb 3242 bool bof = FALSE;
a687059c 3243
10edeb5d
JH
3244 /* orig_keyword, gvp, and gv are initialized here because
3245 * jump to the label just_a_word_zero can bypass their
3246 * initialization later. */
3247 I32 orig_keyword = 0;
3248 GV *gv = NULL;
3249 GV **gvp = NULL;
3250
bbf60fe6 3251 DEBUG_T( {
396482e1 3252 SV* tmp = newSVpvs("");
b6007c36
DM
3253 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3254 (IV)CopLINE(PL_curcop),
3255 lex_state_names[PL_lex_state],
3256 exp_name[PL_expect],
3257 pv_display(tmp, s, strlen(s), 0, 60));
3258 SvREFCNT_dec(tmp);
bbf60fe6 3259 } );
02aa26ce 3260 /* check if there's an identifier for us to look at */
ba979b31 3261 if (PL_pending_ident)
bbf60fe6 3262 return REPORT(S_pending_ident(aTHX));
bbce6d69 3263
02aa26ce
NT
3264 /* no identifier pending identification */
3265
3280af22 3266 switch (PL_lex_state) {
79072805
LW
3267#ifdef COMMENTARY
3268 case LEX_NORMAL: /* Some compilers will produce faster */
3269 case LEX_INTERPNORMAL: /* code if we comment these out. */
3270 break;
3271#endif
3272
09bef843 3273 /* when we've already built the next token, just pull it out of the queue */
79072805 3274 case LEX_KNOWNEXT:
5db06880
NC
3275#ifdef PERL_MAD
3276 PL_lasttoke--;
3277 yylval = PL_nexttoke[PL_lasttoke].next_val;
3278 if (PL_madskills) {
cd81e915 3279 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3280 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3281 if (PL_thismad && PL_thismad->mad_key == '_') {
3282 PL_thiswhite = (SV*)PL_thismad->mad_val;
3283 PL_thismad->mad_val = 0;
3284 mad_free(PL_thismad);
3285 PL_thismad = 0;
5db06880
NC
3286 }
3287 }
3288 if (!PL_lasttoke) {
3289 PL_lex_state = PL_lex_defer;
3290 PL_expect = PL_lex_expect;
3291 PL_lex_defer = LEX_NORMAL;
3292 if (!PL_nexttoke[PL_lasttoke].next_type)
3293 return yylex();
3294 }
3295#else
3280af22 3296 PL_nexttoke--;
5db06880 3297 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3298 if (!PL_nexttoke) {
3299 PL_lex_state = PL_lex_defer;
3300 PL_expect = PL_lex_expect;
3301 PL_lex_defer = LEX_NORMAL;
463ee0b2 3302 }
5db06880
NC
3303#endif
3304#ifdef PERL_MAD
3305 /* FIXME - can these be merged? */
3306 return(PL_nexttoke[PL_lasttoke].next_type);
3307#else
bbf60fe6 3308 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3309#endif
79072805 3310
02aa26ce 3311 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3312 when we get here, PL_bufptr is at the \
02aa26ce 3313 */
79072805
LW
3314 case LEX_INTERPCASEMOD:
3315#ifdef DEBUGGING
3280af22 3316 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3317 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3318#endif
02aa26ce 3319 /* handle \E or end of string */
3280af22 3320 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3321 /* if at a \E */
3280af22 3322 if (PL_lex_casemods) {
f54cb97a 3323 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3324 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3325
3792a11b
NC
3326 if (PL_bufptr != PL_bufend
3327 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3328 PL_bufptr += 2;
3329 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3330#ifdef PERL_MAD
3331 if (PL_madskills)
6b29d1f5 3332 PL_thistoken = newSVpvs("\\E");
5db06880 3333#endif
a0d0e21e 3334 }
bbf60fe6 3335 return REPORT(')');
79072805 3336 }
5db06880
NC
3337#ifdef PERL_MAD
3338 while (PL_bufptr != PL_bufend &&
3339 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3340 if (!PL_thiswhite)
6b29d1f5 3341 PL_thiswhite = newSVpvs("");
cd81e915 3342 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3343 PL_bufptr += 2;
3344 }
3345#else
3280af22
NIS
3346 if (PL_bufptr != PL_bufend)
3347 PL_bufptr += 2;
5db06880 3348#endif
3280af22 3349 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3350 return yylex();
79072805
LW
3351 }
3352 else {
607df283 3353 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3354 "### Saw case modifier\n"); });
3280af22 3355 s = PL_bufptr + 1;
6e909404 3356 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3357#ifdef PERL_MAD
cd81e915 3358 if (!PL_thiswhite)
6b29d1f5 3359 PL_thiswhite = newSVpvs("");
cd81e915 3360 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3361#endif
89122651 3362 PL_bufptr = s + 3;
6e909404
JH
3363 PL_lex_state = LEX_INTERPCONCAT;
3364 return yylex();
a0d0e21e 3365 }
6e909404 3366 else {
90771dc0 3367 I32 tmp;
5db06880
NC
3368 if (!PL_madskills) /* when just compiling don't need correct */
3369 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3370 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3371 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3372 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3373 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3374 return REPORT(')');
6e909404
JH
3375 }
3376 if (PL_lex_casemods > 10)
3377 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3378 PL_lex_casestack[PL_lex_casemods++] = *s;
3379 PL_lex_casestack[PL_lex_casemods] = '\0';
3380 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3381 start_force(PL_curforce);
9ded7720 3382 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3383 force_next('(');
cd81e915 3384 start_force(PL_curforce);
6e909404 3385 if (*s == 'l')
9ded7720 3386 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3387 else if (*s == 'u')
9ded7720 3388 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3389 else if (*s == 'L')
9ded7720 3390 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3391 else if (*s == 'U')
9ded7720 3392 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3393 else if (*s == 'Q')
9ded7720 3394 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3395 else
3396 Perl_croak(aTHX_ "panic: yylex");
5db06880 3397 if (PL_madskills) {
a5849ce5
NC
3398 SV* const tmpsv = newSVpvs("\\ ");
3399 /* replace the space with the character we want to escape
3400 */
3401 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3402 curmad('_', tmpsv);
3403 }
6e909404 3404 PL_bufptr = s + 1;
a0d0e21e 3405 }
79072805 3406 force_next(FUNC);
3280af22
NIS
3407 if (PL_lex_starts) {
3408 s = PL_bufptr;
3409 PL_lex_starts = 0;
5db06880
NC
3410#ifdef PERL_MAD
3411 if (PL_madskills) {
cd81e915
NC
3412 if (PL_thistoken)
3413 sv_free(PL_thistoken);
6b29d1f5 3414 PL_thistoken = newSVpvs("");
5db06880
NC
3415 }
3416#endif
131b3ad0
DM
3417 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3418 if (PL_lex_casemods == 1 && PL_lex_inpat)
3419 OPERATOR(',');
3420 else
3421 Aop(OP_CONCAT);
79072805
LW
3422 }
3423 else
cea2e8a9 3424 return yylex();
79072805
LW
3425 }
3426
55497cff 3427 case LEX_INTERPPUSH:
bbf60fe6 3428 return REPORT(sublex_push());
55497cff 3429
79072805 3430 case LEX_INTERPSTART:
3280af22 3431 if (PL_bufptr == PL_bufend)
bbf60fe6 3432 return REPORT(sublex_done());
607df283 3433 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3434 "### Interpolated variable\n"); });
3280af22
NIS
3435 PL_expect = XTERM;
3436 PL_lex_dojoin = (*PL_bufptr == '@');
3437 PL_lex_state = LEX_INTERPNORMAL;
3438 if (PL_lex_dojoin) {
cd81e915 3439 start_force(PL_curforce);
9ded7720 3440 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3441 force_next(',');
cd81e915 3442 start_force(PL_curforce);
a0d0e21e 3443 force_ident("\"", '$');
cd81e915 3444 start_force(PL_curforce);
9ded7720 3445 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3446 force_next('$');
cd81e915 3447 start_force(PL_curforce);
9ded7720 3448 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3449 force_next('(');
cd81e915 3450 start_force(PL_curforce);
9ded7720 3451 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3452 force_next(FUNC);
3453 }
3280af22
NIS
3454 if (PL_lex_starts++) {
3455 s = PL_bufptr;
5db06880
NC
3456#ifdef PERL_MAD
3457 if (PL_madskills) {
cd81e915
NC
3458 if (PL_thistoken)
3459 sv_free(PL_thistoken);
6b29d1f5 3460 PL_thistoken = newSVpvs("");
5db06880
NC
3461 }
3462#endif
131b3ad0
DM
3463 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3464 if (!PL_lex_casemods && PL_lex_inpat)
3465 OPERATOR(',');
3466 else
3467 Aop(OP_CONCAT);
79072805 3468 }
cea2e8a9 3469 return yylex();
79072805
LW
3470
3471 case LEX_INTERPENDMAYBE:
3280af22
NIS
3472 if (intuit_more(PL_bufptr)) {
3473 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3474 break;
3475 }
3476 /* FALL THROUGH */
3477
3478 case LEX_INTERPEND:
3280af22
NIS
3479 if (PL_lex_dojoin) {
3480 PL_lex_dojoin = FALSE;
3481 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3482#ifdef PERL_MAD
3483 if (PL_madskills) {
cd81e915
NC
3484 if (PL_thistoken)
3485 sv_free(PL_thistoken);
6b29d1f5 3486 PL_thistoken = newSVpvs("");
5db06880
NC
3487 }
3488#endif
bbf60fe6 3489 return REPORT(')');
79072805 3490 }
43a16006 3491 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3492 && SvEVALED(PL_lex_repl))
43a16006 3493 {
e9fa98b2 3494 if (PL_bufptr != PL_bufend)
cea2e8a9 3495 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3496 PL_lex_repl = NULL;
e9fa98b2 3497 }
79072805
LW
3498 /* FALLTHROUGH */
3499 case LEX_INTERPCONCAT:
3500#ifdef DEBUGGING
3280af22 3501 if (PL_lex_brackets)
cea2e8a9 3502 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3503#endif
3280af22 3504 if (PL_bufptr == PL_bufend)
bbf60fe6 3505 return REPORT(sublex_done());
79072805 3506
3280af22
NIS
3507 if (SvIVX(PL_linestr) == '\'') {
3508 SV *sv = newSVsv(PL_linestr);
3509 if (!PL_lex_inpat)
76e3520e 3510 sv = tokeq(sv);
3280af22 3511 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 3512 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
79072805 3513 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3514 s = PL_bufend;
79072805
LW
3515 }
3516 else {
3280af22 3517 s = scan_const(PL_bufptr);
79072805 3518 if (*s == '\\')
3280af22 3519 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3520 else
3280af22 3521 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3522 }
3523
3280af22 3524 if (s != PL_bufptr) {
cd81e915 3525 start_force(PL_curforce);
5db06880
NC
3526 if (PL_madskills) {
3527 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3528 }
9ded7720 3529 NEXTVAL_NEXTTOKE = yylval;
3280af22 3530 PL_expect = XTERM;
79072805 3531 force_next(THING);
131b3ad0 3532 if (PL_lex_starts++) {
5db06880
NC
3533#ifdef PERL_MAD
3534 if (PL_madskills) {
cd81e915
NC
3535 if (PL_thistoken)
3536 sv_free(PL_thistoken);
6b29d1f5 3537 PL_thistoken = newSVpvs("");
5db06880
NC
3538 }
3539#endif
131b3ad0
DM
3540 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3541 if (!PL_lex_casemods && PL_lex_inpat)
3542 OPERATOR(',');
3543 else
3544 Aop(OP_CONCAT);
3545 }
79072805 3546 else {
3280af22 3547 PL_bufptr = s;
cea2e8a9 3548 return yylex();
79072805
LW
3549 }
3550 }
3551