This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #56526] m/a{1,0}/ compiles but doesn't match a literal string
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
eb0d8d16
NC
26#define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
28
6154021b 29#define pl_yylval (PL_parser->yylval)
d3b6f988 30
acdf0a21
DM
31/* YYINITDEPTH -- initial size of the parser's stacks. */
32#define YYINITDEPTH 200
33
199e78b7
DM
34/* XXX temporary backwards compatibility */
35#define PL_lex_brackets (PL_parser->lex_brackets)
36#define PL_lex_brackstack (PL_parser->lex_brackstack)
37#define PL_lex_casemods (PL_parser->lex_casemods)
38#define PL_lex_casestack (PL_parser->lex_casestack)
39#define PL_lex_defer (PL_parser->lex_defer)
40#define PL_lex_dojoin (PL_parser->lex_dojoin)
41#define PL_lex_expect (PL_parser->lex_expect)
42#define PL_lex_formbrack (PL_parser->lex_formbrack)
43#define PL_lex_inpat (PL_parser->lex_inpat)
44#define PL_lex_inwhat (PL_parser->lex_inwhat)
45#define PL_lex_op (PL_parser->lex_op)
46#define PL_lex_repl (PL_parser->lex_repl)
47#define PL_lex_starts (PL_parser->lex_starts)
48#define PL_lex_stuff (PL_parser->lex_stuff)
49#define PL_multi_start (PL_parser->multi_start)
50#define PL_multi_open (PL_parser->multi_open)
51#define PL_multi_close (PL_parser->multi_close)
52#define PL_pending_ident (PL_parser->pending_ident)
53#define PL_preambled (PL_parser->preambled)
54#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 55#define PL_linestr (PL_parser->linestr)
c2598295
DM
56#define PL_expect (PL_parser->expect)
57#define PL_copline (PL_parser->copline)
f06b5848
DM
58#define PL_bufptr (PL_parser->bufptr)
59#define PL_oldbufptr (PL_parser->oldbufptr)
60#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61#define PL_linestart (PL_parser->linestart)
62#define PL_bufend (PL_parser->bufend)
63#define PL_last_uni (PL_parser->last_uni)
64#define PL_last_lop (PL_parser->last_lop)
65#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 66#define PL_lex_state (PL_parser->lex_state)
2f9285f8 67#define PL_rsfp (PL_parser->rsfp)
5486870f 68#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
69#define PL_in_my (PL_parser->in_my)
70#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 71#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 72#define PL_multi_end (PL_parser->multi_end)
13765c85 73#define PL_error_count (PL_parser->error_count)
199e78b7
DM
74
75#ifdef PERL_MAD
76# define PL_endwhite (PL_parser->endwhite)
77# define PL_faketokens (PL_parser->faketokens)
78# define PL_lasttoke (PL_parser->lasttoke)
79# define PL_nextwhite (PL_parser->nextwhite)
80# define PL_realtokenstart (PL_parser->realtokenstart)
81# define PL_skipwhite (PL_parser->skipwhite)
82# define PL_thisclose (PL_parser->thisclose)
83# define PL_thismad (PL_parser->thismad)
84# define PL_thisopen (PL_parser->thisopen)
85# define PL_thisstuff (PL_parser->thisstuff)
86# define PL_thistoken (PL_parser->thistoken)
87# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
88# define PL_thiswhite (PL_parser->thiswhite)
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_curforce (PL_parser->curforce)
91#else
92# define PL_nexttoke (PL_parser->nexttoke)
93# define PL_nexttype (PL_parser->nexttype)
94# define PL_nextval (PL_parser->nextval)
199e78b7
DM
95#endif
96
3cbf51f5
DM
97static int
98S_pending_ident(pTHX);
199e78b7 99
0bd48802 100static const char ident_too_long[] = "Identifier too long";
c445ea15 101static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 102
6e3aabd6 103#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
104static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 106#endif
51371543 107
29595ff2 108#ifdef PERL_MAD
29595ff2 109# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 110# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 111#else
5db06880 112# define CURMAD(slot,sv)
9ded7720 113# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
114#endif
115
9059aa12
LW
116#define XFAKEBRACK 128
117#define XENUMMASK 127
118
39e02b42
JH
119#ifdef USE_UTF8_SCRIPTS
120# define UTF (!IN_BYTES)
2b9d42f0 121#else
746b446a 122# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 123#endif
a0ed51b3 124
61f0cdd9 125/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
126 * 1999-02-27 mjd-perl-patch@plover.com */
127#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128
bf4acbe4
GS
129/* On MacOS, respect nonbreaking spaces */
130#ifdef MACOS_TRADITIONAL
131#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
132#else
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
134#endif
135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a 141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
704d4215 230# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
242#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf 261#define UNI2(f,x) { \
6154021b 262 pl_yylval.ival = f; \
376fcdbf
AL
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf 275#define UNIBRACK(f) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
6154021b 286#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
6154021b 290/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
6154021b 294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 371 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 372 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
373};
374
6154021b 375/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 376
bbf60fe6 377STATIC int
704d4215 378S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 379{
97aff369 380 dVAR;
7918f24d
NC
381
382 PERL_ARGS_ASSERT_TOKEREPORT;
383
bbf60fe6 384 if (DEBUG_T_TEST) {
bd61b366 385 const char *name = NULL;
bbf60fe6 386 enum token_type type = TOKENTYPE_NONE;
f54cb97a 387 const struct debug_tokens *p;
396482e1 388 SV* const report = newSVpvs("<== ");
bbf60fe6 389
f54cb97a 390 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
391 if (p->token == (int)rv) {
392 name = p->name;
393 type = p->type;
394 break;
395 }
396 }
397 if (name)
54667de8 398 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
399 else if ((char)rv > ' ' && (char)rv < '~')
400 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
401 else if (!rv)
396482e1 402 sv_catpvs(report, "EOF");
bbf60fe6
DM
403 else
404 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
405 switch (type) {
406 case TOKENTYPE_NONE:
407 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
408 break;
409 case TOKENTYPE_IVAL:
704d4215 410 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
411 break;
412 case TOKENTYPE_OPNUM:
413 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 414 PL_op_name[lvalp->ival]);
bbf60fe6
DM
415 break;
416 case TOKENTYPE_PVAL:
704d4215 417 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
418 break;
419 case TOKENTYPE_OPVAL:
704d4215 420 if (lvalp->opval) {
401441c0 421 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
422 PL_op_name[lvalp->opval->op_type]);
423 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 424 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 425 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
426 }
427
428 }
401441c0 429 else
396482e1 430 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
431 break;
432 }
b6007c36 433 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
434 };
435 return (int)rv;
998054bd
SC
436}
437
b6007c36
DM
438
439/* print the buffer with suitable escapes */
440
441STATIC void
15f169a1 442S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 443{
396482e1 444 SV* const tmp = newSVpvs("");
7918f24d
NC
445
446 PERL_ARGS_ASSERT_PRINTBUF;
447
b6007c36
DM
448 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
449 SvREFCNT_dec(tmp);
450}
451
8fa7f367
JH
452#endif
453
ffb4593c
NT
454/*
455 * S_ao
456 *
c963b151
BD
457 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
458 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
459 */
460
76e3520e 461STATIC int
cea2e8a9 462S_ao(pTHX_ int toketype)
a0d0e21e 463{
97aff369 464 dVAR;
3280af22
NIS
465 if (*PL_bufptr == '=') {
466 PL_bufptr++;
a0d0e21e 467 if (toketype == ANDAND)
6154021b 468 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 469 else if (toketype == OROR)
6154021b 470 pl_yylval.ival = OP_ORASSIGN;
c963b151 471 else if (toketype == DORDOR)
6154021b 472 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
473 toketype = ASSIGNOP;
474 }
475 return toketype;
476}
477
ffb4593c
NT
478/*
479 * S_no_op
480 * When Perl expects an operator and finds something else, no_op
481 * prints the warning. It always prints "<something> found where
482 * operator expected. It prints "Missing semicolon on previous line?"
483 * if the surprise occurs at the start of the line. "do you need to
484 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
485 * where the compiler doesn't know if foo is a method call or a function.
486 * It prints "Missing operator before end of line" if there's nothing
487 * after the missing operator, or "... before <...>" if there is something
488 * after the missing operator.
489 */
490
76e3520e 491STATIC void
15f169a1 492S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 493{
97aff369 494 dVAR;
9d4ba2ae
AL
495 char * const oldbp = PL_bufptr;
496 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 497
7918f24d
NC
498 PERL_ARGS_ASSERT_NO_OP;
499
1189a94a
GS
500 if (!s)
501 s = oldbp;
07c798fb 502 else
1189a94a 503 PL_bufptr = s;
cea2e8a9 504 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
505 if (ckWARN_d(WARN_SYNTAX)) {
506 if (is_first)
507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
508 "\t(Missing semicolon on previous line?)\n");
509 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 510 const char *t;
c35e046a
AL
511 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
512 NOOP;
56da5a46
RGS
513 if (t < PL_bufptr && isSPACE(*t))
514 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
515 "\t(Do you need to predeclare %.*s?)\n",
551405c4 516 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
517 }
518 else {
519 assert(s >= oldbp);
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 521 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 522 }
07c798fb 523 }
3280af22 524 PL_bufptr = oldbp;
8990e307
LW
525}
526
ffb4593c
NT
527/*
528 * S_missingterm
529 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 530 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
531 * If we're in a delimited string and the delimiter is a control
532 * character, it's reformatted into a two-char sequence like ^C.
533 * This is fatal.
534 */
535
76e3520e 536STATIC void
cea2e8a9 537S_missingterm(pTHX_ char *s)
8990e307 538{
97aff369 539 dVAR;
8990e307
LW
540 char tmpbuf[3];
541 char q;
542 if (s) {
9d4ba2ae 543 char * const nl = strrchr(s,'\n');
d2719217 544 if (nl)
8990e307
LW
545 *nl = '\0';
546 }
9d116dd7
JH
547 else if (
548#ifdef EBCDIC
549 iscntrl(PL_multi_close)
550#else
551 PL_multi_close < 32 || PL_multi_close == 127
552#endif
553 ) {
8990e307 554 *tmpbuf = '^';
585ec06d 555 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
556 tmpbuf[2] = '\0';
557 s = tmpbuf;
558 }
559 else {
eb160463 560 *tmpbuf = (char)PL_multi_close;
8990e307
LW
561 tmpbuf[1] = '\0';
562 s = tmpbuf;
563 }
564 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 565 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 566}
79072805 567
ef89dcc3 568#define FEATURE_IS_ENABLED(name) \
0d863452 569 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 570 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
571/* The longest string we pass in. */
572#define MAX_FEATURE_LEN (sizeof("switch")-1)
573
0d863452
RH
574/*
575 * S_feature_is_enabled
576 * Check whether the named feature is enabled.
577 */
578STATIC bool
15f169a1 579S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 580{
97aff369 581 dVAR;
0d863452 582 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 583 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
584
585 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
586
4a731d7b
NC
587 assert(namelen <= MAX_FEATURE_LEN);
588 memcpy(&he_name[8], name, namelen);
d4c19fe8 589
7b9ef140 590 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
591}
592
ffb4593c
NT
593/*
594 * Perl_deprecate
ffb4593c
NT
595 */
596
79072805 597void
15f169a1 598Perl_deprecate(pTHX_ const char *const s)
a0d0e21e 599{
7918f24d
NC
600 PERL_ARGS_ASSERT_DEPRECATE;
601
599cee73 602 if (ckWARN(WARN_DEPRECATED))
9014280d 603 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
604}
605
12bcd1a6 606void
15f169a1 607Perl_deprecate_old(pTHX_ const char *const s)
12bcd1a6
PM
608{
609 /* This function should NOT be called for any new deprecated warnings */
610 /* Use Perl_deprecate instead */
611 /* */
612 /* It is here to maintain backward compatibility with the pre-5.8 */
613 /* warnings category hierarchy. The "deprecated" category used to */
614 /* live under the "syntax" category. It is now a top-level category */
615 /* in its own right. */
616
7918f24d
NC
617 PERL_ARGS_ASSERT_DEPRECATE_OLD;
618
12bcd1a6 619 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 620 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
621 "Use of %s is deprecated", s);
622}
623
ffb4593c 624/*
9cbb5ea2
GS
625 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
626 * utf16-to-utf8-reversed.
ffb4593c
NT
627 */
628
c39cd008
GS
629#ifdef PERL_CR_FILTER
630static void
631strip_return(SV *sv)
632{
95a20fc0 633 register const char *s = SvPVX_const(sv);
9d4ba2ae 634 register const char * const e = s + SvCUR(sv);
7918f24d
NC
635
636 PERL_ARGS_ASSERT_STRIP_RETURN;
637
c39cd008
GS
638 /* outer loop optimized to do nothing if there are no CR-LFs */
639 while (s < e) {
640 if (*s++ == '\r' && *s == '\n') {
641 /* hit a CR-LF, need to copy the rest */
642 register char *d = s - 1;
643 *d++ = *s++;
644 while (s < e) {
645 if (*s == '\r' && s[1] == '\n')
646 s++;
647 *d++ = *s++;
648 }
649 SvCUR(sv) -= s - d;
650 return;
651 }
652 }
653}
a868473f 654
76e3520e 655STATIC I32
c39cd008 656S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 657{
f54cb97a 658 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
659 if (count > 0 && !maxlen)
660 strip_return(sv);
661 return count;
a868473f
NIS
662}
663#endif
664
199e78b7
DM
665
666
ffb4593c
NT
667/*
668 * Perl_lex_start
5486870f 669 *
e3abe207 670 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
671 *
672 * rsfp is the opened file handle to read from (if any),
673 *
674 * line holds any initial content already read from the file (or in
675 * the case of no file, such as an eval, the whole contents);
676 *
677 * new_filter indicates that this is a new file and it shouldn't inherit
678 * the filters from the current parser (ie require).
ffb4593c
NT
679 */
680
a0d0e21e 681void
5486870f 682Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 683{
97aff369 684 dVAR;
6ef55633 685 const char *s = NULL;
8990e307 686 STRLEN len;
5486870f 687 yy_parser *parser, *oparser;
acdf0a21
DM
688
689 /* create and initialise a parser */
690
199e78b7 691 Newxz(parser, 1, yy_parser);
5486870f 692 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
693 PL_parser = parser;
694
695 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
696 parser->ps = parser->stack;
697 parser->stack_size = YYINITDEPTH;
698
699 parser->stack->state = 0;
700 parser->yyerrstatus = 0;
701 parser->yychar = YYEMPTY; /* Cause a token to be read. */
702
e3abe207
DM
703 /* on scope exit, free this parser and restore any outer one */
704 SAVEPARSER(parser);
7c4baf47 705 parser->saved_curcop = PL_curcop;
e3abe207 706
acdf0a21 707 /* initialise lexer state */
8990e307 708
fb205e7a
DM
709#ifdef PERL_MAD
710 parser->curforce = -1;
711#else
712 parser->nexttoke = 0;
713#endif
ca4cfd28 714 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 715 parser->copline = NOLINE;
5afb0a62 716 parser->lex_state = LEX_NORMAL;
c2598295 717 parser->expect = XSTATE;
2f9285f8 718 parser->rsfp = rsfp;
56b27c9a 719 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 720 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 721
199e78b7
DM
722 Newx(parser->lex_brackstack, 120, char);
723 Newx(parser->lex_casestack, 12, char);
724 *parser->lex_casestack = '\0';
02b34bbe 725
10efb74f
NC
726 if (line) {
727 s = SvPV_const(line, len);
728 } else {
729 len = 0;
730 }
bdc0bf6f 731
10efb74f 732 if (!len) {
bdc0bf6f 733 parser->linestr = newSVpvs("\n;");
10efb74f 734 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 735 parser->linestr = newSVsv(line);
10efb74f 736 if (s[len-1] != ';')
bdc0bf6f 737 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
738 } else {
739 SvTEMP_off(line);
740 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 741 parser->linestr = line;
8990e307 742 }
f06b5848
DM
743 parser->oldoldbufptr =
744 parser->oldbufptr =
745 parser->bufptr =
746 parser->linestart = SvPVX(parser->linestr);
747 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
748 parser->last_lop = parser->last_uni = NULL;
79072805 749}
a687059c 750
e3abe207
DM
751
752/* delete a parser object */
753
754void
755Perl_parser_free(pTHX_ const yy_parser *parser)
756{
7918f24d
NC
757 PERL_ARGS_ASSERT_PARSER_FREE;
758
7c4baf47 759 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
760 SvREFCNT_dec(parser->linestr);
761
2f9285f8
DM
762 if (parser->rsfp == PerlIO_stdin())
763 PerlIO_clearerr(parser->rsfp);
799361c3
SH
764 else if (parser->rsfp && (!parser->old_parser ||
765 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 766 PerlIO_close(parser->rsfp);
5486870f 767 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 768
e3abe207
DM
769 Safefree(parser->stack);
770 Safefree(parser->lex_brackstack);
771 Safefree(parser->lex_casestack);
772 PL_parser = parser->old_parser;
773 Safefree(parser);
774}
775
776
ffb4593c
NT
777/*
778 * Perl_lex_end
9cbb5ea2
GS
779 * Finalizer for lexing operations. Must be called when the parser is
780 * done with the lexer.
ffb4593c
NT
781 */
782
463ee0b2 783void
864dbfa3 784Perl_lex_end(pTHX)
463ee0b2 785{
97aff369 786 dVAR;
3280af22 787 PL_doextract = FALSE;
463ee0b2
LW
788}
789
ffb4593c
NT
790/*
791 * S_incline
792 * This subroutine has nothing to do with tilting, whether at windmills
793 * or pinball tables. Its name is short for "increment line". It
57843af0 794 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 795 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
796 * # line 500 "foo.pm"
797 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
798 */
799
76e3520e 800STATIC void
d9095cec 801S_incline(pTHX_ const char *s)
463ee0b2 802{
97aff369 803 dVAR;
d9095cec
NC
804 const char *t;
805 const char *n;
806 const char *e;
463ee0b2 807
7918f24d
NC
808 PERL_ARGS_ASSERT_INCLINE;
809
57843af0 810 CopLINE_inc(PL_curcop);
463ee0b2
LW
811 if (*s++ != '#')
812 return;
d4c19fe8
AL
813 while (SPACE_OR_TAB(*s))
814 s++;
73659bf1
GS
815 if (strnEQ(s, "line", 4))
816 s += 4;
817 else
818 return;
084592ab 819 if (SPACE_OR_TAB(*s))
73659bf1 820 s++;
4e553d73 821 else
73659bf1 822 return;
d4c19fe8
AL
823 while (SPACE_OR_TAB(*s))
824 s++;
463ee0b2
LW
825 if (!isDIGIT(*s))
826 return;
d4c19fe8 827
463ee0b2
LW
828 n = s;
829 while (isDIGIT(*s))
830 s++;
bf4acbe4 831 while (SPACE_OR_TAB(*s))
463ee0b2 832 s++;
73659bf1 833 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 834 s++;
73659bf1
GS
835 e = t + 1;
836 }
463ee0b2 837 else {
c35e046a
AL
838 t = s;
839 while (!isSPACE(*t))
840 t++;
73659bf1 841 e = t;
463ee0b2 842 }
bf4acbe4 843 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
844 e++;
845 if (*e != '\n' && *e != '\0')
846 return; /* false alarm */
847
f4dd75d9 848 if (t - s > 0) {
d9095cec 849 const STRLEN len = t - s;
8a5ee598 850#ifndef USE_ITHREADS
19bad673
NC
851 SV *const temp_sv = CopFILESV(PL_curcop);
852 const char *cf;
853 STRLEN tmplen;
854
855 if (temp_sv) {
856 cf = SvPVX(temp_sv);
857 tmplen = SvCUR(temp_sv);
858 } else {
859 cf = NULL;
860 tmplen = 0;
861 }
862
42d9b98d 863 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
864 /* must copy *{"::_<(eval N)[oldfilename:L]"}
865 * to *{"::_<newfilename"} */
44867030
NC
866 /* However, the long form of evals is only turned on by the
867 debugger - usually they're "(eval %lu)" */
868 char smallbuf[128];
869 char *tmpbuf;
870 GV **gvp;
d9095cec 871 STRLEN tmplen2 = len;
798b63bc 872 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
873 tmpbuf = smallbuf;
874 else
2ae0db35 875 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
876 tmpbuf[0] = '_';
877 tmpbuf[1] = '<';
2ae0db35 878 memcpy(tmpbuf + 2, cf, tmplen);
44867030 879 tmplen += 2;
8a5ee598
RGS
880 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
881 if (gvp) {
44867030
NC
882 char *tmpbuf2;
883 GV *gv2;
884
885 if (tmplen2 + 2 <= sizeof smallbuf)
886 tmpbuf2 = smallbuf;
887 else
888 Newx(tmpbuf2, tmplen2 + 2, char);
889
890 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
891 /* Either they malloc'd it, or we malloc'd it,
892 so no prefix is present in ours. */
893 tmpbuf2[0] = '_';
894 tmpbuf2[1] = '<';
895 }
896
897 memcpy(tmpbuf2 + 2, s, tmplen2);
898 tmplen2 += 2;
899
8a5ee598 900 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 901 if (!isGV(gv2)) {
8a5ee598 902 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
903 /* adjust ${"::_<newfilename"} to store the new file name */
904 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
905 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
906 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 907 }
44867030
NC
908
909 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 910 }
e66cf94c 911 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 912 }
8a5ee598 913#endif
05ec9bb3 914 CopFILE_free(PL_curcop);
d9095cec 915 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 916 }
57843af0 917 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
918}
919
29595ff2 920#ifdef PERL_MAD
cd81e915 921/* skip space before PL_thistoken */
29595ff2
NC
922
923STATIC char *
924S_skipspace0(pTHX_ register char *s)
925{
7918f24d
NC
926 PERL_ARGS_ASSERT_SKIPSPACE0;
927
29595ff2
NC
928 s = skipspace(s);
929 if (!PL_madskills)
930 return s;
cd81e915
NC
931 if (PL_skipwhite) {
932 if (!PL_thiswhite)
6b29d1f5 933 PL_thiswhite = newSVpvs("");
cd81e915
NC
934 sv_catsv(PL_thiswhite, PL_skipwhite);
935 sv_free(PL_skipwhite);
936 PL_skipwhite = 0;
937 }
938 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
939 return s;
940}
941
cd81e915 942/* skip space after PL_thistoken */
29595ff2
NC
943
944STATIC char *
945S_skipspace1(pTHX_ register char *s)
946{
d4c19fe8 947 const char *start = s;
29595ff2
NC
948 I32 startoff = start - SvPVX(PL_linestr);
949
7918f24d
NC
950 PERL_ARGS_ASSERT_SKIPSPACE1;
951
29595ff2
NC
952 s = skipspace(s);
953 if (!PL_madskills)
954 return s;
955 start = SvPVX(PL_linestr) + startoff;
cd81e915 956 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 957 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
958 PL_thistoken = newSVpvn(tstart, start - tstart);
959 }
960 PL_realtokenstart = -1;
961 if (PL_skipwhite) {
962 if (!PL_nextwhite)
6b29d1f5 963 PL_nextwhite = newSVpvs("");
cd81e915
NC
964 sv_catsv(PL_nextwhite, PL_skipwhite);
965 sv_free(PL_skipwhite);
966 PL_skipwhite = 0;
29595ff2
NC
967 }
968 return s;
969}
970
971STATIC char *
972S_skipspace2(pTHX_ register char *s, SV **svp)
973{
c35e046a
AL
974 char *start;
975 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
976 const I32 startoff = s - SvPVX(PL_linestr);
977
7918f24d
NC
978 PERL_ARGS_ASSERT_SKIPSPACE2;
979
29595ff2
NC
980 s = skipspace(s);
981 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
982 if (!PL_madskills || !svp)
983 return s;
984 start = SvPVX(PL_linestr) + startoff;
cd81e915 985 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 986 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
987 PL_thistoken = newSVpvn(tstart, start - tstart);
988 PL_realtokenstart = -1;
29595ff2 989 }
cd81e915 990 if (PL_skipwhite) {
29595ff2 991 if (!*svp)
6b29d1f5 992 *svp = newSVpvs("");
cd81e915
NC
993 sv_setsv(*svp, PL_skipwhite);
994 sv_free(PL_skipwhite);
995 PL_skipwhite = 0;
29595ff2
NC
996 }
997
998 return s;
999}
1000#endif
1001
80a702cd 1002STATIC void
15f169a1 1003S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1004{
1005 AV *av = CopFILEAVx(PL_curcop);
1006 if (av) {
b9f83d2f 1007 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1008 if (orig_sv)
1009 sv_setsv(sv, orig_sv);
1010 else
1011 sv_setpvn(sv, buf, len);
80a702cd
RGS
1012 (void)SvIOK_on(sv);
1013 SvIV_set(sv, 0);
1014 av_store(av, (I32)CopLINE(PL_curcop), sv);
1015 }
1016}
1017
ffb4593c
NT
1018/*
1019 * S_skipspace
1020 * Called to gobble the appropriate amount and type of whitespace.
1021 * Skips comments as well.
1022 */
1023
76e3520e 1024STATIC char *
cea2e8a9 1025S_skipspace(pTHX_ register char *s)
a687059c 1026{
97aff369 1027 dVAR;
5db06880
NC
1028#ifdef PERL_MAD
1029 int curoff;
1030 int startoff = s - SvPVX(PL_linestr);
1031
7918f24d
NC
1032 PERL_ARGS_ASSERT_SKIPSPACE;
1033
cd81e915
NC
1034 if (PL_skipwhite) {
1035 sv_free(PL_skipwhite);
1036 PL_skipwhite = 0;
5db06880
NC
1037 }
1038#endif
7918f24d 1039 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1040
3280af22 1041 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1042 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1043 s++;
5db06880
NC
1044#ifdef PERL_MAD
1045 goto done;
1046#else
463ee0b2 1047 return s;
5db06880 1048#endif
463ee0b2
LW
1049 }
1050 for (;;) {
fd049845 1051 STRLEN prevlen;
09bef843 1052 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1053 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1054 while (s < PL_bufend && isSPACE(*s)) {
1055 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1056 incline(s);
1057 }
ffb4593c
NT
1058
1059 /* comment */
3280af22
NIS
1060 if (s < PL_bufend && *s == '#') {
1061 while (s < PL_bufend && *s != '\n')
463ee0b2 1062 s++;
60e6418e 1063 if (s < PL_bufend) {
463ee0b2 1064 s++;
60e6418e
GS
1065 if (PL_in_eval && !PL_rsfp) {
1066 incline(s);
1067 continue;
1068 }
1069 }
463ee0b2 1070 }
ffb4593c
NT
1071
1072 /* only continue to recharge the buffer if we're at the end
1073 * of the buffer, we're not reading from a source filter, and
1074 * we're in normal lexing mode
1075 */
09bef843
SB
1076 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1077 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1078#ifdef PERL_MAD
1079 goto done;
1080#else
463ee0b2 1081 return s;
5db06880 1082#endif
ffb4593c
NT
1083
1084 /* try to recharge the buffer */
5db06880
NC
1085#ifdef PERL_MAD
1086 curoff = s - SvPVX(PL_linestr);
1087#endif
1088
9cbb5ea2 1089 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1090 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1091 {
5db06880
NC
1092#ifdef PERL_MAD
1093 if (PL_madskills && curoff != startoff) {
cd81e915 1094 if (!PL_skipwhite)
6b29d1f5 1095 PL_skipwhite = newSVpvs("");
cd81e915 1096 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1097 curoff - startoff);
1098 }
1099
1100 /* mustn't throw out old stuff yet if madpropping */
1101 SvCUR(PL_linestr) = curoff;
1102 s = SvPVX(PL_linestr) + curoff;
1103 *s = 0;
1104 if (curoff && s[-1] == '\n')
1105 s[-1] = ' ';
1106#endif
1107
9cbb5ea2 1108 /* end of file. Add on the -p or -n magic */
cd81e915 1109 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1110 if (PL_minus_p) {
5db06880 1111#ifdef PERL_MAD
6502358f 1112 sv_catpvs(PL_linestr,
5db06880
NC
1113 ";}continue{print or die qq(-p destination: $!\\n);}");
1114#else
6502358f 1115 sv_setpvs(PL_linestr,
01a19ab0 1116 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1117#endif
3280af22 1118 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1119 }
01a19ab0 1120 else if (PL_minus_n) {
5db06880 1121#ifdef PERL_MAD
76f68e9b 1122 sv_catpvs(PL_linestr, ";}");
5db06880 1123#else
76f68e9b 1124 sv_setpvs(PL_linestr, ";}");
5db06880 1125#endif
01a19ab0
NC
1126 PL_minus_n = 0;
1127 }
a0d0e21e 1128 else
5db06880 1129#ifdef PERL_MAD
76f68e9b 1130 sv_catpvs(PL_linestr,";");
5db06880 1131#else
76f68e9b 1132 sv_setpvs(PL_linestr,";");
5db06880 1133#endif
ffb4593c
NT
1134
1135 /* reset variables for next time we lex */
9cbb5ea2 1136 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1137 = SvPVX(PL_linestr)
1138#ifdef PERL_MAD
1139 + curoff
1140#endif
1141 ;
3280af22 1142 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1143 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1144
4c84d7f2 1145 /* Close the filehandle. Could be from
ffb4593c
NT
1146 * STDIN, or a regular file. If we were reading code from
1147 * STDIN (because the commandline held no -e or filename)
1148 * then we don't close it, we reset it so the code can
1149 * read from STDIN too.
1150 */
1151
4c84d7f2 1152 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1153 PerlIO_clearerr(PL_rsfp);
8990e307 1154 else
3280af22 1155 (void)PerlIO_close(PL_rsfp);
4608196e 1156 PL_rsfp = NULL;
463ee0b2
LW
1157 return s;
1158 }
ffb4593c
NT
1159
1160 /* not at end of file, so we only read another line */
09bef843
SB
1161 /* make corresponding updates to old pointers, for yyerror() */
1162 oldprevlen = PL_oldbufptr - PL_bufend;
1163 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1164 if (PL_last_uni)
1165 oldunilen = PL_last_uni - PL_bufend;
1166 if (PL_last_lop)
1167 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1168 PL_linestart = PL_bufptr = s + prevlen;
1169 PL_bufend = s + SvCUR(PL_linestr);
1170 s = PL_bufptr;
09bef843
SB
1171 PL_oldbufptr = s + oldprevlen;
1172 PL_oldoldbufptr = s + oldoldprevlen;
1173 if (PL_last_uni)
1174 PL_last_uni = s + oldunilen;
1175 if (PL_last_lop)
1176 PL_last_lop = s + oldloplen;
a0d0e21e 1177 incline(s);
ffb4593c
NT
1178
1179 /* debugger active and we're not compiling the debugger code,
1180 * so store the line into the debugger's array of lines
1181 */
80a702cd 1182 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1183 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1184 }
5db06880
NC
1185
1186#ifdef PERL_MAD
1187 done:
1188 if (PL_madskills) {
cd81e915 1189 if (!PL_skipwhite)
6b29d1f5 1190 PL_skipwhite = newSVpvs("");
5db06880
NC
1191 curoff = s - SvPVX(PL_linestr);
1192 if (curoff - startoff)
cd81e915 1193 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1194 curoff - startoff);
1195 }
1196 return s;
1197#endif
a687059c 1198}
378cc40b 1199
ffb4593c
NT
1200/*
1201 * S_check_uni
1202 * Check the unary operators to ensure there's no ambiguity in how they're
1203 * used. An ambiguous piece of code would be:
1204 * rand + 5
1205 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1206 * the +5 is its argument.
1207 */
1208
76e3520e 1209STATIC void
cea2e8a9 1210S_check_uni(pTHX)
ba106d47 1211{
97aff369 1212 dVAR;
d4c19fe8
AL
1213 const char *s;
1214 const char *t;
2f3197b3 1215
3280af22 1216 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1217 return;
3280af22
NIS
1218 while (isSPACE(*PL_last_uni))
1219 PL_last_uni++;
c35e046a
AL
1220 s = PL_last_uni;
1221 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1222 s++;
3280af22 1223 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1224 return;
6136c704 1225
0453d815 1226 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1227 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1228 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1229 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1230 }
2f3197b3
LW
1231}
1232
ffb4593c
NT
1233/*
1234 * LOP : macro to build a list operator. Its behaviour has been replaced
1235 * with a subroutine, S_lop() for which LOP is just another name.
1236 */
1237
a0d0e21e
LW
1238#define LOP(f,x) return lop(f,x,s)
1239
ffb4593c
NT
1240/*
1241 * S_lop
1242 * Build a list operator (or something that might be one). The rules:
1243 * - if we have a next token, then it's a list operator [why?]
1244 * - if the next thing is an opening paren, then it's a function
1245 * - else it's a list operator
1246 */
1247
76e3520e 1248STATIC I32
a0be28da 1249S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1250{
97aff369 1251 dVAR;
7918f24d
NC
1252
1253 PERL_ARGS_ASSERT_LOP;
1254
6154021b 1255 pl_yylval.ival = f;
35c8bce7 1256 CLINE;
3280af22
NIS
1257 PL_expect = x;
1258 PL_bufptr = s;
1259 PL_last_lop = PL_oldbufptr;
eb160463 1260 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1261#ifdef PERL_MAD
1262 if (PL_lasttoke)
1263 return REPORT(LSTOP);
1264#else
3280af22 1265 if (PL_nexttoke)
bbf60fe6 1266 return REPORT(LSTOP);
5db06880 1267#endif
79072805 1268 if (*s == '(')
bbf60fe6 1269 return REPORT(FUNC);
29595ff2 1270 s = PEEKSPACE(s);
79072805 1271 if (*s == '(')
bbf60fe6 1272 return REPORT(FUNC);
79072805 1273 else
bbf60fe6 1274 return REPORT(LSTOP);
79072805
LW
1275}
1276
5db06880
NC
1277#ifdef PERL_MAD
1278 /*
1279 * S_start_force
1280 * Sets up for an eventual force_next(). start_force(0) basically does
1281 * an unshift, while start_force(-1) does a push. yylex removes items
1282 * on the "pop" end.
1283 */
1284
1285STATIC void
1286S_start_force(pTHX_ int where)
1287{
1288 int i;
1289
cd81e915 1290 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1291 where = PL_lasttoke;
cd81e915
NC
1292 assert(PL_curforce < 0 || PL_curforce == where);
1293 if (PL_curforce != where) {
5db06880
NC
1294 for (i = PL_lasttoke; i > where; --i) {
1295 PL_nexttoke[i] = PL_nexttoke[i-1];
1296 }
1297 PL_lasttoke++;
1298 }
cd81e915 1299 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1300 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1301 PL_curforce = where;
1302 if (PL_nextwhite) {
5db06880 1303 if (PL_madskills)
6b29d1f5 1304 curmad('^', newSVpvs(""));
cd81e915 1305 CURMAD('_', PL_nextwhite);
5db06880
NC
1306 }
1307}
1308
1309STATIC void
1310S_curmad(pTHX_ char slot, SV *sv)
1311{
1312 MADPROP **where;
1313
1314 if (!sv)
1315 return;
cd81e915
NC
1316 if (PL_curforce < 0)
1317 where = &PL_thismad;
5db06880 1318 else
cd81e915 1319 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1320
cd81e915 1321 if (PL_faketokens)
76f68e9b 1322 sv_setpvs(sv, "");
5db06880
NC
1323 else {
1324 if (!IN_BYTES) {
1325 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1326 SvUTF8_on(sv);
1327 else if (PL_encoding) {
1328 sv_recode_to_utf8(sv, PL_encoding);
1329 }
1330 }
1331 }
1332
1333 /* keep a slot open for the head of the list? */
1334 if (slot != '_' && *where && (*where)->mad_key == '^') {
1335 (*where)->mad_key = slot;
daba3364 1336 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1337 (*where)->mad_val = (void*)sv;
1338 }
1339 else
1340 addmad(newMADsv(slot, sv), where, 0);
1341}
1342#else
b3f24c00
MHM
1343# define start_force(where) NOOP
1344# define curmad(slot, sv) NOOP
5db06880
NC
1345#endif
1346
ffb4593c
NT
1347/*
1348 * S_force_next
9cbb5ea2 1349 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1350 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1351 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1352 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1353 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1354 */
1355
4e553d73 1356STATIC void
cea2e8a9 1357S_force_next(pTHX_ I32 type)
79072805 1358{
97aff369 1359 dVAR;
704d4215
GG
1360#ifdef DEBUGGING
1361 if (DEBUG_T_TEST) {
1362 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1363 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1364 }
1365#endif
5db06880 1366#ifdef PERL_MAD
cd81e915 1367 if (PL_curforce < 0)
5db06880 1368 start_force(PL_lasttoke);
cd81e915 1369 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1370 if (PL_lex_state != LEX_KNOWNEXT)
1371 PL_lex_defer = PL_lex_state;
1372 PL_lex_state = LEX_KNOWNEXT;
1373 PL_lex_expect = PL_expect;
cd81e915 1374 PL_curforce = -1;
5db06880 1375#else
3280af22
NIS
1376 PL_nexttype[PL_nexttoke] = type;
1377 PL_nexttoke++;
1378 if (PL_lex_state != LEX_KNOWNEXT) {
1379 PL_lex_defer = PL_lex_state;
1380 PL_lex_expect = PL_expect;
1381 PL_lex_state = LEX_KNOWNEXT;
79072805 1382 }
5db06880 1383#endif
79072805
LW
1384}
1385
d0a148a6 1386STATIC SV *
15f169a1 1387S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1388{
97aff369 1389 dVAR;
740cce10
NC
1390 SV * const sv = newSVpvn_utf8(start, len,
1391 UTF && !IN_BYTES
1392 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1393 return sv;
1394}
1395
ffb4593c
NT
1396/*
1397 * S_force_word
1398 * When the lexer knows the next thing is a word (for instance, it has
1399 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1400 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1401 * lookahead.
ffb4593c
NT
1402 *
1403 * Arguments:
b1b65b59 1404 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1405 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1406 * int check_keyword : if true, Perl checks to make sure the word isn't
1407 * a keyword (do this if the word is a label, e.g. goto FOO)
1408 * int allow_pack : if true, : characters will also be allowed (require,
1409 * use, etc. do this)
9cbb5ea2 1410 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1411 */
1412
76e3520e 1413STATIC char *
cea2e8a9 1414S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1415{
97aff369 1416 dVAR;
463ee0b2
LW
1417 register char *s;
1418 STRLEN len;
4e553d73 1419
7918f24d
NC
1420 PERL_ARGS_ASSERT_FORCE_WORD;
1421
29595ff2 1422 start = SKIPSPACE1(start);
463ee0b2 1423 s = start;
7e2040f0 1424 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1425 (allow_pack && *s == ':') ||
15f0808c 1426 (allow_initial_tick && *s == '\'') )
a0d0e21e 1427 {
3280af22 1428 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1429 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1430 return start;
cd81e915 1431 start_force(PL_curforce);
5db06880
NC
1432 if (PL_madskills)
1433 curmad('X', newSVpvn(start,s-start));
463ee0b2 1434 if (token == METHOD) {
29595ff2 1435 s = SKIPSPACE1(s);
463ee0b2 1436 if (*s == '(')
3280af22 1437 PL_expect = XTERM;
463ee0b2 1438 else {
3280af22 1439 PL_expect = XOPERATOR;
463ee0b2 1440 }
79072805 1441 }
e74e6b3d 1442 if (PL_madskills)
63575281 1443 curmad('g', newSVpvs( "forced" ));
9ded7720 1444 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1445 = (OP*)newSVOP(OP_CONST,0,
1446 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1447 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1448 force_next(token);
1449 }
1450 return s;
1451}
1452
ffb4593c
NT
1453/*
1454 * S_force_ident
9cbb5ea2 1455 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1456 * text only contains the "foo" portion. The first argument is a pointer
1457 * to the "foo", and the second argument is the type symbol to prefix.
1458 * Forces the next token to be a "WORD".
9cbb5ea2 1459 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1460 */
1461
76e3520e 1462STATIC void
bfed75c6 1463S_force_ident(pTHX_ register const char *s, int kind)
79072805 1464{
97aff369 1465 dVAR;
7918f24d
NC
1466
1467 PERL_ARGS_ASSERT_FORCE_IDENT;
1468
c35e046a 1469 if (*s) {
90e5519e
NC
1470 const STRLEN len = strlen(s);
1471 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1472 start_force(PL_curforce);
9ded7720 1473 NEXTVAL_NEXTTOKE.opval = o;
79072805 1474 force_next(WORD);
748a9306 1475 if (kind) {
11343788 1476 o->op_private = OPpCONST_ENTERED;
55497cff 1477 /* XXX see note in pp_entereval() for why we forgo typo
1478 warnings if the symbol must be introduced in an eval.
1479 GSAR 96-10-12 */
90e5519e
NC
1480 gv_fetchpvn_flags(s, len,
1481 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1482 : GV_ADD,
1483 kind == '$' ? SVt_PV :
1484 kind == '@' ? SVt_PVAV :
1485 kind == '%' ? SVt_PVHV :
a0d0e21e 1486 SVt_PVGV
90e5519e 1487 );
748a9306 1488 }
79072805
LW
1489 }
1490}
1491
1571675a
GS
1492NV
1493Perl_str_to_version(pTHX_ SV *sv)
1494{
1495 NV retval = 0.0;
1496 NV nshift = 1.0;
1497 STRLEN len;
cfd0369c 1498 const char *start = SvPV_const(sv,len);
9d4ba2ae 1499 const char * const end = start + len;
504618e9 1500 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1501
1502 PERL_ARGS_ASSERT_STR_TO_VERSION;
1503
1571675a 1504 while (start < end) {
ba210ebe 1505 STRLEN skip;
1571675a
GS
1506 UV n;
1507 if (utf)
9041c2e3 1508 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1509 else {
1510 n = *(U8*)start;
1511 skip = 1;
1512 }
1513 retval += ((NV)n)/nshift;
1514 start += skip;
1515 nshift *= 1000;
1516 }
1517 return retval;
1518}
1519
4e553d73 1520/*
ffb4593c
NT
1521 * S_force_version
1522 * Forces the next token to be a version number.
e759cc13
RGS
1523 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1524 * and if "guessing" is TRUE, then no new token is created (and the caller
1525 * must use an alternative parsing method).
ffb4593c
NT
1526 */
1527
76e3520e 1528STATIC char *
e759cc13 1529S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1530{
97aff369 1531 dVAR;
5f66b61c 1532 OP *version = NULL;
44dcb63b 1533 char *d;
5db06880
NC
1534#ifdef PERL_MAD
1535 I32 startoff = s - SvPVX(PL_linestr);
1536#endif
89bfa8cd 1537
7918f24d
NC
1538 PERL_ARGS_ASSERT_FORCE_VERSION;
1539
29595ff2 1540 s = SKIPSPACE1(s);
89bfa8cd 1541
44dcb63b 1542 d = s;
dd629d5b 1543 if (*d == 'v')
44dcb63b 1544 d++;
44dcb63b 1545 if (isDIGIT(*d)) {
e759cc13
RGS
1546 while (isDIGIT(*d) || *d == '_' || *d == '.')
1547 d++;
5db06880
NC
1548#ifdef PERL_MAD
1549 if (PL_madskills) {
cd81e915 1550 start_force(PL_curforce);
5db06880
NC
1551 curmad('X', newSVpvn(s,d-s));
1552 }
1553#endif
9f3d182e 1554 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1555 SV *ver;
6154021b
RGS
1556 s = scan_num(s, &pl_yylval);
1557 version = pl_yylval.opval;
dd629d5b
GS
1558 ver = cSVOPx(version)->op_sv;
1559 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1560 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1561 SvNV_set(ver, str_to_version(ver));
1571675a 1562 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1563 }
89bfa8cd 1564 }
5db06880
NC
1565 else if (guessing) {
1566#ifdef PERL_MAD
1567 if (PL_madskills) {
cd81e915
NC
1568 sv_free(PL_nextwhite); /* let next token collect whitespace */
1569 PL_nextwhite = 0;
5db06880
NC
1570 s = SvPVX(PL_linestr) + startoff;
1571 }
1572#endif
e759cc13 1573 return s;
5db06880 1574 }
89bfa8cd 1575 }
1576
5db06880
NC
1577#ifdef PERL_MAD
1578 if (PL_madskills && !version) {
cd81e915
NC
1579 sv_free(PL_nextwhite); /* let next token collect whitespace */
1580 PL_nextwhite = 0;
5db06880
NC
1581 s = SvPVX(PL_linestr) + startoff;
1582 }
1583#endif
89bfa8cd 1584 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1585 start_force(PL_curforce);
9ded7720 1586 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1587 force_next(WORD);
89bfa8cd 1588
e759cc13 1589 return s;
89bfa8cd 1590}
1591
ffb4593c
NT
1592/*
1593 * S_tokeq
1594 * Tokenize a quoted string passed in as an SV. It finds the next
1595 * chunk, up to end of string or a backslash. It may make a new
1596 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1597 * turns \\ into \.
1598 */
1599
76e3520e 1600STATIC SV *
cea2e8a9 1601S_tokeq(pTHX_ SV *sv)
79072805 1602{
97aff369 1603 dVAR;
79072805
LW
1604 register char *s;
1605 register char *send;
1606 register char *d;
b3ac6de7
IZ
1607 STRLEN len = 0;
1608 SV *pv = sv;
79072805 1609
7918f24d
NC
1610 PERL_ARGS_ASSERT_TOKEQ;
1611
79072805 1612 if (!SvLEN(sv))
b3ac6de7 1613 goto finish;
79072805 1614
a0d0e21e 1615 s = SvPV_force(sv, len);
21a311ee 1616 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1617 goto finish;
463ee0b2 1618 send = s + len;
79072805
LW
1619 while (s < send && *s != '\\')
1620 s++;
1621 if (s == send)
b3ac6de7 1622 goto finish;
79072805 1623 d = s;
be4731d2 1624 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1625 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1626 }
79072805
LW
1627 while (s < send) {
1628 if (*s == '\\') {
a0d0e21e 1629 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1630 s++; /* all that, just for this */
1631 }
1632 *d++ = *s++;
1633 }
1634 *d = '\0';
95a20fc0 1635 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1636 finish:
3280af22 1637 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1638 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1639 return sv;
1640}
1641
ffb4593c
NT
1642/*
1643 * Now come three functions related to double-quote context,
1644 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1645 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1646 * interact with PL_lex_state, and create fake ( ... ) argument lists
1647 * to handle functions and concatenation.
1648 * They assume that whoever calls them will be setting up a fake
1649 * join call, because each subthing puts a ',' after it. This lets
1650 * "lower \luPpEr"
1651 * become
1652 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1653 *
1654 * (I'm not sure whether the spurious commas at the end of lcfirst's
1655 * arguments and join's arguments are created or not).
1656 */
1657
1658/*
1659 * S_sublex_start
6154021b 1660 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1661 *
1662 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1663 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1664 *
1665 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1666 *
1667 * Everything else becomes a FUNC.
1668 *
1669 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1670 * had an OP_CONST or OP_READLINE). This just sets us up for a
1671 * call to S_sublex_push().
1672 */
1673
76e3520e 1674STATIC I32
cea2e8a9 1675S_sublex_start(pTHX)
79072805 1676{
97aff369 1677 dVAR;
6154021b 1678 register const I32 op_type = pl_yylval.ival;
79072805
LW
1679
1680 if (op_type == OP_NULL) {
6154021b 1681 pl_yylval.opval = PL_lex_op;
5f66b61c 1682 PL_lex_op = NULL;
79072805
LW
1683 return THING;
1684 }
1685 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1686 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1687
1688 if (SvTYPE(sv) == SVt_PVIV) {
1689 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1690 STRLEN len;
96a5add6 1691 const char * const p = SvPV_const(sv, len);
740cce10 1692 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1693 SvREFCNT_dec(sv);
1694 sv = nsv;
4e553d73 1695 }
6154021b 1696 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1697 PL_lex_stuff = NULL;
6f33ba73
RGS
1698 /* Allow <FH> // "foo" */
1699 if (op_type == OP_READLINE)
1700 PL_expect = XTERMORDORDOR;
79072805
LW
1701 return THING;
1702 }
e3f73d4e
RGS
1703 else if (op_type == OP_BACKTICK && PL_lex_op) {
1704 /* readpipe() vas overriden */
1705 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1706 pl_yylval.opval = PL_lex_op;
9b201d7d 1707 PL_lex_op = NULL;
e3f73d4e
RGS
1708 PL_lex_stuff = NULL;
1709 return THING;
1710 }
79072805 1711
3280af22 1712 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1713 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1714 PL_sublex_info.sub_op = PL_lex_op;
1715 PL_lex_state = LEX_INTERPPUSH;
55497cff 1716
3280af22
NIS
1717 PL_expect = XTERM;
1718 if (PL_lex_op) {
6154021b 1719 pl_yylval.opval = PL_lex_op;
5f66b61c 1720 PL_lex_op = NULL;
55497cff 1721 return PMFUNC;
1722 }
1723 else
1724 return FUNC;
1725}
1726
ffb4593c
NT
1727/*
1728 * S_sublex_push
1729 * Create a new scope to save the lexing state. The scope will be
1730 * ended in S_sublex_done. Returns a '(', starting the function arguments
1731 * to the uc, lc, etc. found before.
1732 * Sets PL_lex_state to LEX_INTERPCONCAT.
1733 */
1734
76e3520e 1735STATIC I32
cea2e8a9 1736S_sublex_push(pTHX)
55497cff 1737{
27da23d5 1738 dVAR;
f46d017c 1739 ENTER;
55497cff 1740
3280af22 1741 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1742 SAVEBOOL(PL_lex_dojoin);
3280af22 1743 SAVEI32(PL_lex_brackets);
3280af22
NIS
1744 SAVEI32(PL_lex_casemods);
1745 SAVEI32(PL_lex_starts);
651b5b28 1746 SAVEI8(PL_lex_state);
7766f137 1747 SAVEVPTR(PL_lex_inpat);
98246f1e 1748 SAVEI16(PL_lex_inwhat);
57843af0 1749 SAVECOPLINE(PL_curcop);
3280af22 1750 SAVEPPTR(PL_bufptr);
8452ff4b 1751 SAVEPPTR(PL_bufend);
3280af22
NIS
1752 SAVEPPTR(PL_oldbufptr);
1753 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1754 SAVEPPTR(PL_last_lop);
1755 SAVEPPTR(PL_last_uni);
3280af22
NIS
1756 SAVEPPTR(PL_linestart);
1757 SAVESPTR(PL_linestr);
8edd5f42
RGS
1758 SAVEGENERICPV(PL_lex_brackstack);
1759 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1760
1761 PL_linestr = PL_lex_stuff;
a0714e2c 1762 PL_lex_stuff = NULL;
3280af22 1763
9cbb5ea2
GS
1764 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1765 = SvPVX(PL_linestr);
3280af22 1766 PL_bufend += SvCUR(PL_linestr);
bd61b366 1767 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1768 SAVEFREESV(PL_linestr);
1769
1770 PL_lex_dojoin = FALSE;
1771 PL_lex_brackets = 0;
a02a5408
JC
1772 Newx(PL_lex_brackstack, 120, char);
1773 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1774 PL_lex_casemods = 0;
1775 *PL_lex_casestack = '\0';
1776 PL_lex_starts = 0;
1777 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1778 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1779
1780 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1781 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1782 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1783 else
5f66b61c 1784 PL_lex_inpat = NULL;
79072805 1785
55497cff 1786 return '(';
79072805
LW
1787}
1788
ffb4593c
NT
1789/*
1790 * S_sublex_done
1791 * Restores lexer state after a S_sublex_push.
1792 */
1793
76e3520e 1794STATIC I32
cea2e8a9 1795S_sublex_done(pTHX)
79072805 1796{
27da23d5 1797 dVAR;
3280af22 1798 if (!PL_lex_starts++) {
396482e1 1799 SV * const sv = newSVpvs("");
9aa983d2
JH
1800 if (SvUTF8(PL_linestr))
1801 SvUTF8_on(sv);
3280af22 1802 PL_expect = XOPERATOR;
6154021b 1803 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1804 return THING;
1805 }
1806
3280af22
NIS
1807 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1808 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1809 return yylex();
79072805
LW
1810 }
1811
ffb4593c 1812 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1813 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1814 PL_linestr = PL_lex_repl;
1815 PL_lex_inpat = 0;
1816 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1817 PL_bufend += SvCUR(PL_linestr);
bd61b366 1818 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1819 SAVEFREESV(PL_linestr);
1820 PL_lex_dojoin = FALSE;
1821 PL_lex_brackets = 0;
3280af22
NIS
1822 PL_lex_casemods = 0;
1823 *PL_lex_casestack = '\0';
1824 PL_lex_starts = 0;
25da4f38 1825 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1826 PL_lex_state = LEX_INTERPNORMAL;
1827 PL_lex_starts++;
e9fa98b2
HS
1828 /* we don't clear PL_lex_repl here, so that we can check later
1829 whether this is an evalled subst; that means we rely on the
1830 logic to ensure sublex_done() is called again only via the
1831 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1832 }
e9fa98b2 1833 else {
3280af22 1834 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1835 PL_lex_repl = NULL;
e9fa98b2 1836 }
79072805 1837 return ',';
ffed7fef
LW
1838 }
1839 else {
5db06880
NC
1840#ifdef PERL_MAD
1841 if (PL_madskills) {
cd81e915
NC
1842 if (PL_thiswhite) {
1843 if (!PL_endwhite)
6b29d1f5 1844 PL_endwhite = newSVpvs("");
cd81e915
NC
1845 sv_catsv(PL_endwhite, PL_thiswhite);
1846 PL_thiswhite = 0;
1847 }
1848 if (PL_thistoken)
76f68e9b 1849 sv_setpvs(PL_thistoken,"");
5db06880 1850 else
cd81e915 1851 PL_realtokenstart = -1;
5db06880
NC
1852 }
1853#endif
f46d017c 1854 LEAVE;
3280af22
NIS
1855 PL_bufend = SvPVX(PL_linestr);
1856 PL_bufend += SvCUR(PL_linestr);
1857 PL_expect = XOPERATOR;
09bef843 1858 PL_sublex_info.sub_inwhat = 0;
79072805 1859 return ')';
ffed7fef
LW
1860 }
1861}
1862
02aa26ce
NT
1863/*
1864 scan_const
1865
1866 Extracts a pattern, double-quoted string, or transliteration. This
1867 is terrifying code.
1868
94def140 1869 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1870 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1871 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1872
94def140
TS
1873 Returns a pointer to the character scanned up to. If this is
1874 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1875 successfully parsed), will leave an OP for the substring scanned
6154021b 1876 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1877 by looking at the next characters herself.
1878
02aa26ce
NT
1879 In patterns:
1880 backslashes:
1881 double-quoted style: \r and \n
1882 regexp special ones: \D \s
94def140
TS
1883 constants: \x31
1884 backrefs: \1
02aa26ce
NT
1885 case and quoting: \U \Q \E
1886 stops on @ and $, but not for $ as tail anchor
1887
1888 In transliterations:
1889 characters are VERY literal, except for - not at the start or end
94def140
TS
1890 of the string, which indicates a range. If the range is in bytes,
1891 scan_const expands the range to the full set of intermediate
1892 characters. If the range is in utf8, the hyphen is replaced with
1893 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1894
1895 In double-quoted strings:
1896 backslashes:
1897 double-quoted style: \r and \n
94def140
TS
1898 constants: \x31
1899 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1900 case and quoting: \U \Q \E
1901 stops on @ and $
1902
1903 scan_const does *not* construct ops to handle interpolated strings.
1904 It stops processing as soon as it finds an embedded $ or @ variable
1905 and leaves it to the caller to work out what's going on.
1906
94def140
TS
1907 embedded arrays (whether in pattern or not) could be:
1908 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1909
1910 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1911
1912 $ in pattern could be $foo or could be tail anchor. Assumption:
1913 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1914 followed by one of "()| \r\n\t"
02aa26ce
NT
1915
1916 \1 (backreferences) are turned into $1
1917
1918 The structure of the code is
1919 while (there's a character to process) {
94def140
TS
1920 handle transliteration ranges
1921 skip regexp comments /(?#comment)/ and codes /(?{code})/
1922 skip #-initiated comments in //x patterns
1923 check for embedded arrays
02aa26ce
NT
1924 check for embedded scalars
1925 if (backslash) {
94def140
TS
1926 leave intact backslashes from leaveit (below)
1927 deprecate \1 in substitution replacements
02aa26ce
NT
1928 handle string-changing backslashes \l \U \Q \E, etc.
1929 switch (what was escaped) {
94def140
TS
1930 handle \- in a transliteration (becomes a literal -)
1931 handle \132 (octal characters)
1932 handle \x15 and \x{1234} (hex characters)
1933 handle \N{name} (named characters)
1934 handle \cV (control characters)
1935 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1936 } (end switch)
1937 } (end if backslash)
1938 } (end while character to read)
4e553d73 1939
02aa26ce
NT
1940*/
1941
76e3520e 1942STATIC char *
cea2e8a9 1943S_scan_const(pTHX_ char *start)
79072805 1944{
97aff369 1945 dVAR;
3280af22 1946 register char *send = PL_bufend; /* end of the constant */
561b68a9 1947 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1948 register char *s = start; /* start of the constant */
1949 register char *d = SvPVX(sv); /* destination for copies */
1950 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1951 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1952 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1953 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1954 UV uv;
4c3a8340
TS
1955#ifdef EBCDIC
1956 UV literal_endpoint = 0;
e294cc5d 1957 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1958#endif
012bcf8d 1959
7918f24d
NC
1960 PERL_ARGS_ASSERT_SCAN_CONST;
1961
2b9d42f0
NIS
1962 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1963 /* If we are doing a trans and we know we want UTF8 set expectation */
1964 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1965 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1966 }
1967
1968
79072805 1969 while (s < send || dorange) {
02aa26ce 1970 /* get transliterations out of the way (they're most literal) */
3280af22 1971 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1972 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1973 if (dorange) {
1ba5c669
JH
1974 I32 i; /* current expanded character */
1975 I32 min; /* first character in range */
1976 I32 max; /* last character in range */
02aa26ce 1977
e294cc5d
JH
1978#ifdef EBCDIC
1979 UV uvmax = 0;
1980#endif
1981
1982 if (has_utf8
1983#ifdef EBCDIC
1984 && !native_range
1985#endif
1986 ) {
9d4ba2ae 1987 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1988 char *e = d++;
1989 while (e-- > c)
1990 *(e + 1) = *e;
25716404 1991 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1992 /* mark the range as done, and continue */
1993 dorange = FALSE;
1994 didrange = TRUE;
1995 continue;
1996 }
2b9d42f0 1997
95a20fc0 1998 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1999#ifdef EBCDIC
2000 SvGROW(sv,
2001 SvLEN(sv) + (has_utf8 ?
2002 (512 - UTF_CONTINUATION_MARK +
2003 UNISKIP(0x100))
2004 : 256));
2005 /* How many two-byte within 0..255: 128 in UTF-8,
2006 * 96 in UTF-8-mod. */
2007#else
9cbb5ea2 2008 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2009#endif
9cbb5ea2 2010 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2011#ifdef EBCDIC
2012 if (has_utf8) {
2013 int j;
2014 for (j = 0; j <= 1; j++) {
2015 char * const c = (char*)utf8_hop((U8*)d, -1);
2016 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2017 if (j)
2018 min = (U8)uv;
2019 else if (uv < 256)
2020 max = (U8)uv;
2021 else {
2022 max = (U8)0xff; /* only to \xff */
2023 uvmax = uv; /* \x{100} to uvmax */
2024 }
2025 d = c; /* eat endpoint chars */
2026 }
2027 }
2028 else {
2029#endif
2030 d -= 2; /* eat the first char and the - */
2031 min = (U8)*d; /* first char in range */
2032 max = (U8)d[1]; /* last char in range */
2033#ifdef EBCDIC
2034 }
2035#endif
8ada0baa 2036
c2e66d9e 2037 if (min > max) {
01ec43d0 2038 Perl_croak(aTHX_
d1573ac7 2039 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2040 (char)min, (char)max);
c2e66d9e
GS
2041 }
2042
c7f1f016 2043#ifdef EBCDIC
4c3a8340
TS
2044 if (literal_endpoint == 2 &&
2045 ((isLOWER(min) && isLOWER(max)) ||
2046 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2047 if (isLOWER(min)) {
2048 for (i = min; i <= max; i++)
2049 if (isLOWER(i))
db42d148 2050 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2051 } else {
2052 for (i = min; i <= max; i++)
2053 if (isUPPER(i))
db42d148 2054 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2055 }
2056 }
2057 else
2058#endif
2059 for (i = min; i <= max; i++)
e294cc5d
JH
2060#ifdef EBCDIC
2061 if (has_utf8) {
2062 const U8 ch = (U8)NATIVE_TO_UTF(i);
2063 if (UNI_IS_INVARIANT(ch))
2064 *d++ = (U8)i;
2065 else {
2066 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2067 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2068 }
2069 }
2070 else
2071#endif
2072 *d++ = (char)i;
2073
2074#ifdef EBCDIC
2075 if (uvmax) {
2076 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2077 if (uvmax > 0x101)
2078 *d++ = (char)UTF_TO_NATIVE(0xff);
2079 if (uvmax > 0x100)
2080 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2081 }
2082#endif
02aa26ce
NT
2083
2084 /* mark the range as done, and continue */
79072805 2085 dorange = FALSE;
01ec43d0 2086 didrange = TRUE;
4c3a8340
TS
2087#ifdef EBCDIC
2088 literal_endpoint = 0;
2089#endif
79072805 2090 continue;
4e553d73 2091 }
02aa26ce
NT
2092
2093 /* range begins (ignore - as first or last char) */
79072805 2094 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2095 if (didrange) {
1fafa243 2096 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2097 }
e294cc5d
JH
2098 if (has_utf8
2099#ifdef EBCDIC
2100 && !native_range
2101#endif
2102 ) {
25716404 2103 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2104 s++;
2105 continue;
2106 }
79072805
LW
2107 dorange = TRUE;
2108 s++;
01ec43d0
GS
2109 }
2110 else {
2111 didrange = FALSE;
4c3a8340
TS
2112#ifdef EBCDIC
2113 literal_endpoint = 0;
e294cc5d 2114 native_range = TRUE;
4c3a8340 2115#endif
01ec43d0 2116 }
79072805 2117 }
02aa26ce
NT
2118
2119 /* if we get here, we're not doing a transliteration */
2120
0f5d15d6
IZ
2121 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2122 except for the last char, which will be done separately. */
3280af22 2123 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2124 if (s[2] == '#') {
e994fd66 2125 while (s+1 < send && *s != ')')
db42d148 2126 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2127 }
2128 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2129 || (s[2] == '?' && s[3] == '{'))
155aba94 2130 {
cc6b7395 2131 I32 count = 1;
0f5d15d6 2132 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2133 char c;
2134
d9f97599
GS
2135 while (count && (c = *regparse)) {
2136 if (c == '\\' && regparse[1])
2137 regparse++;
4e553d73 2138 else if (c == '{')
cc6b7395 2139 count++;
4e553d73 2140 else if (c == '}')
cc6b7395 2141 count--;
d9f97599 2142 regparse++;
cc6b7395 2143 }
e994fd66 2144 if (*regparse != ')')
5bdf89e7 2145 regparse--; /* Leave one char for continuation. */
0f5d15d6 2146 while (s < regparse)
db42d148 2147 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2148 }
748a9306 2149 }
02aa26ce
NT
2150
2151 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2152 else if (*s == '#' && PL_lex_inpat &&
2153 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2154 while (s+1 < send && *s != '\n')
db42d148 2155 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2156 }
02aa26ce 2157
5d1d4326 2158 /* check for embedded arrays
da6eedaa 2159 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2160 */
1749ea0d
TS
2161 else if (*s == '@' && s[1]) {
2162 if (isALNUM_lazy_if(s+1,UTF))
2163 break;
2164 if (strchr(":'{$", s[1]))
2165 break;
2166 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2167 break; /* in regexp, neither @+ nor @- are interpolated */
2168 }
02aa26ce
NT
2169
2170 /* check for embedded scalars. only stop if we're sure it's a
2171 variable.
2172 */
79072805 2173 else if (*s == '$') {
3280af22 2174 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2175 break;
77772344
B
2176 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2177 if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) {
2178 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2179 "Possible unintended interpolation of $\\ in regex");
2180 }
79072805 2181 break; /* in regexp, $ might be tail anchor */
77772344 2182 }
79072805 2183 }
02aa26ce 2184
2b9d42f0
NIS
2185 /* End of else if chain - OP_TRANS rejoin rest */
2186
02aa26ce 2187 /* backslashes */
79072805
LW
2188 if (*s == '\\' && s+1 < send) {
2189 s++;
02aa26ce 2190
02aa26ce 2191 /* deprecate \1 in strings and substitution replacements */
3280af22 2192 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2193 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2194 {
599cee73 2195 if (ckWARN(WARN_SYNTAX))
9014280d 2196 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2197 *--s = '$';
2198 break;
2199 }
02aa26ce
NT
2200
2201 /* string-change backslash escapes */
3280af22 2202 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2203 --s;
2204 break;
2205 }
cc74c5bd
TS
2206 /* skip any other backslash escapes in a pattern */
2207 else if (PL_lex_inpat) {
2208 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2209 goto default_action;
2210 }
02aa26ce
NT
2211
2212 /* if we get here, it's either a quoted -, or a digit */
79072805 2213 switch (*s) {
02aa26ce
NT
2214
2215 /* quoted - in transliterations */
79072805 2216 case '-':
3280af22 2217 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2218 *d++ = *s++;
2219 continue;
2220 }
2221 /* FALL THROUGH */
2222 default:
11b8faa4 2223 {
86f97054 2224 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2225 ckWARN(WARN_MISC))
9014280d 2226 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2227 "Unrecognized escape \\%c passed through",
2228 *s);
11b8faa4 2229 /* default action is to copy the quoted character */
f9a63242 2230 goto default_action;
11b8faa4 2231 }
02aa26ce
NT
2232
2233 /* \132 indicates an octal constant */
79072805
LW
2234 case '0': case '1': case '2': case '3':
2235 case '4': case '5': case '6': case '7':
ba210ebe 2236 {
53305cf1
NC
2237 I32 flags = 0;
2238 STRLEN len = 3;
2239 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2240 s += len;
2241 }
012bcf8d 2242 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2243
2244 /* \x24 indicates a hex constant */
79072805 2245 case 'x':
a0ed51b3
LW
2246 ++s;
2247 if (*s == '{') {
9d4ba2ae 2248 char* const e = strchr(s, '}');
a4c04bdc
NC
2249 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2250 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2251 STRLEN len;
355860ce 2252
53305cf1 2253 ++s;
adaeee49 2254 if (!e) {
a0ed51b3 2255 yyerror("Missing right brace on \\x{}");
355860ce 2256 continue;
ba210ebe 2257 }
53305cf1
NC
2258 len = e - s;
2259 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2260 s = e + 1;
a0ed51b3
LW
2261 }
2262 else {
ba210ebe 2263 {
53305cf1 2264 STRLEN len = 2;
a4c04bdc 2265 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2266 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2267 s += len;
2268 }
012bcf8d
GS
2269 }
2270
2271 NUM_ESCAPE_INSERT:
2272 /* Insert oct or hex escaped character.
301d3d20 2273 * There will always enough room in sv since such
db42d148 2274 * escapes will be longer than any UTF-8 sequence
301d3d20 2275 * they can end up as. */
ba7cea30 2276
c7f1f016
NIS
2277 /* We need to map to chars to ASCII before doing the tests
2278 to cover EBCDIC
2279 */
c4d5f83a 2280 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2281 if (!has_utf8 && uv > 255) {
301d3d20
JH
2282 /* Might need to recode whatever we have
2283 * accumulated so far if it contains any
2284 * hibit chars.
2285 *
2286 * (Can't we keep track of that and avoid
2287 * this rescan? --jhi)
012bcf8d 2288 */
c7f1f016 2289 int hicount = 0;
63cd0674
NIS
2290 U8 *c;
2291 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2292 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2293 hicount++;
db42d148 2294 }
012bcf8d 2295 }
63cd0674 2296 if (hicount) {
9d4ba2ae 2297 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2298 U8 *src, *dst;
2299 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2300 src = (U8 *)d - 1;
2301 dst = src+hicount;
2302 d += hicount;
cfd0369c 2303 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2304 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2305 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2306 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2307 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2308 }
2309 else {
63cd0674 2310 *dst-- = *src;
012bcf8d 2311 }
c7f1f016 2312 src--;
012bcf8d
GS
2313 }
2314 }
2315 }
2316
9aa983d2 2317 if (has_utf8 || uv > 255) {
9041c2e3 2318 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2319 has_utf8 = TRUE;
f9a63242
JH
2320 if (PL_lex_inwhat == OP_TRANS &&
2321 PL_sublex_info.sub_op) {
2322 PL_sublex_info.sub_op->op_private |=
2323 (PL_lex_repl ? OPpTRANS_FROM_UTF
2324 : OPpTRANS_TO_UTF);
f9a63242 2325 }
e294cc5d
JH
2326#ifdef EBCDIC
2327 if (uv > 255 && !dorange)
2328 native_range = FALSE;
2329#endif
012bcf8d 2330 }
a0ed51b3 2331 else {
012bcf8d 2332 *d++ = (char)uv;
a0ed51b3 2333 }
012bcf8d
GS
2334 }
2335 else {
c4d5f83a 2336 *d++ = (char) uv;
a0ed51b3 2337 }
79072805 2338 continue;
02aa26ce 2339
b239daa5 2340 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2341 case 'N':
55eda711 2342 ++s;
423cee85
JH
2343 if (*s == '{') {
2344 char* e = strchr(s, '}');
155aba94 2345 SV *res;
423cee85 2346 STRLEN len;
cfd0369c 2347 const char *str;
4e553d73 2348
423cee85 2349 if (!e) {
5777a3f7 2350 yyerror("Missing right brace on \\N{}");
423cee85
JH
2351 e = s - 1;
2352 goto cont_scan;
2353 }
dbc0d4f2
JH
2354 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2355 /* \N{U+...} */
2356 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2357 PERL_SCAN_DISALLOW_PREFIX;
2358 s += 3;
2359 len = e - s;
2360 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2361 if ( e > s && len != (STRLEN)(e - s) ) {
2362 uv = 0xFFFD;
fc8cd66c 2363 }
dbc0d4f2
JH
2364 s = e + 1;
2365 goto NUM_ESCAPE_INSERT;
2366 }
55eda711 2367 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2368 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2369 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2370 if (has_utf8)
2371 sv_utf8_upgrade(res);
cfd0369c 2372 str = SvPV_const(res,len);
1c47067b
JH
2373#ifdef EBCDIC_NEVER_MIND
2374 /* charnames uses pack U and that has been
2375 * recently changed to do the below uni->native
2376 * mapping, so this would be redundant (and wrong,
2377 * the code point would be doubly converted).
2378 * But leave this in just in case the pack U change
2379 * gets revoked, but the semantics is still
2380 * desireable for charnames. --jhi */
cddc7ef4 2381 {
cfd0369c 2382 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2383
2384 if (uv < 0x100) {
89ebb4a3 2385 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2386
2387 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2388 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2389 str = SvPV_const(res, len);
cddc7ef4
JH
2390 }
2391 }
2392#endif
89491803 2393 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2394 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2395 SvCUR_set(sv, d - ostart);
2396 SvPOK_on(sv);
e4f3eed8 2397 *d = '\0';
f08d6ad9 2398 sv_utf8_upgrade(sv);
d2f449dd 2399 /* this just broke our allocation above... */
eb160463 2400 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2401 d = SvPVX(sv) + SvCUR(sv);
89491803 2402 has_utf8 = TRUE;
f08d6ad9 2403 }
eb160463 2404 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2405 const char * const odest = SvPVX_const(sv);
423cee85 2406
8973db79 2407 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2408 d = SvPVX(sv) + (d - odest);
2409 }
e294cc5d
JH
2410#ifdef EBCDIC
2411 if (!dorange)
2412 native_range = FALSE; /* \N{} is guessed to be Unicode */
2413#endif
423cee85
JH
2414 Copy(str, d, len, char);
2415 d += len;
2416 SvREFCNT_dec(res);
2417 cont_scan:
2418 s = e + 1;
2419 }
2420 else
5777a3f7 2421 yyerror("Missing braces on \\N{}");
423cee85
JH
2422 continue;
2423
02aa26ce 2424 /* \c is a control character */
79072805
LW
2425 case 'c':
2426 s++;
961ce445 2427 if (s < send) {
ba210ebe 2428 U8 c = *s++;
c7f1f016
NIS
2429#ifdef EBCDIC
2430 if (isLOWER(c))
2431 c = toUPPER(c);
2432#endif
db42d148 2433 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2434 }
961ce445
RGS
2435 else {
2436 yyerror("Missing control char name in \\c");
2437 }
79072805 2438 continue;
02aa26ce
NT
2439
2440 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2441 case 'b':
db42d148 2442 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2443 break;
2444 case 'n':
db42d148 2445 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2446 break;
2447 case 'r':
db42d148 2448 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2449 break;
2450 case 'f':
db42d148 2451 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2452 break;
2453 case 't':
db42d148 2454 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2455 break;
34a3fe2a 2456 case 'e':
db42d148 2457 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2458 break;
2459 case 'a':
db42d148 2460 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2461 break;
02aa26ce
NT
2462 } /* end switch */
2463
79072805
LW
2464 s++;
2465 continue;
02aa26ce 2466 } /* end if (backslash) */
4c3a8340
TS
2467#ifdef EBCDIC
2468 else
2469 literal_endpoint++;
2470#endif
02aa26ce 2471
f9a63242 2472 default_action:
2b9d42f0
NIS
2473 /* If we started with encoded form, or already know we want it
2474 and then encode the next character */
2475 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2476 STRLEN len = 1;
5f66b61c
AL
2477 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2478 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2479 s += len;
2480 if (need > len) {
2481 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2482 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2483 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2484 }
5f66b61c 2485 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2486 has_utf8 = TRUE;
e294cc5d
JH
2487#ifdef EBCDIC
2488 if (uv > 255 && !dorange)
2489 native_range = FALSE;
2490#endif
2b9d42f0
NIS
2491 }
2492 else {
2493 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2494 }
02aa26ce
NT
2495 } /* while loop to process each character */
2496
2497 /* terminate the string and set up the sv */
79072805 2498 *d = '\0';
95a20fc0 2499 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2500 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2501 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2502
79072805 2503 SvPOK_on(sv);
9f4817db 2504 if (PL_encoding && !has_utf8) {
d0063567
DK
2505 sv_recode_to_utf8(sv, PL_encoding);
2506 if (SvUTF8(sv))
2507 has_utf8 = TRUE;
9f4817db 2508 }
2b9d42f0 2509 if (has_utf8) {
7e2040f0 2510 SvUTF8_on(sv);
2b9d42f0 2511 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2512 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2513 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2514 }
2515 }
79072805 2516
02aa26ce 2517 /* shrink the sv if we allocated more than we used */
79072805 2518 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2519 SvPV_shrink_to_cur(sv);
79072805 2520 }
02aa26ce 2521
6154021b 2522 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2523 if (s > PL_bufptr) {
eb0d8d16
NC
2524 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2525 const char *const key = PL_lex_inpat ? "qr" : "q";
2526 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2527 const char *type;
2528 STRLEN typelen;
2529
2530 if (PL_lex_inwhat == OP_TRANS) {
2531 type = "tr";
2532 typelen = 2;
2533 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2534 type = "s";
2535 typelen = 1;
2536 } else {
2537 type = "qq";
2538 typelen = 2;
2539 }
2540
2541 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2542 type, typelen);
2543 }
6154021b 2544 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2545 } else
8990e307 2546 SvREFCNT_dec(sv);
79072805
LW
2547 return s;
2548}
2549
ffb4593c
NT
2550/* S_intuit_more
2551 * Returns TRUE if there's more to the expression (e.g., a subscript),
2552 * FALSE otherwise.
ffb4593c
NT
2553 *
2554 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2555 *
2556 * ->[ and ->{ return TRUE
2557 * { and [ outside a pattern are always subscripts, so return TRUE
2558 * if we're outside a pattern and it's not { or [, then return FALSE
2559 * if we're in a pattern and the first char is a {
2560 * {4,5} (any digits around the comma) returns FALSE
2561 * if we're in a pattern and the first char is a [
2562 * [] returns FALSE
2563 * [SOMETHING] has a funky algorithm to decide whether it's a
2564 * character class or not. It has to deal with things like
2565 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2566 * anything else returns TRUE
2567 */
2568
9cbb5ea2
GS
2569/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2570
76e3520e 2571STATIC int
cea2e8a9 2572S_intuit_more(pTHX_ register char *s)
79072805 2573{
97aff369 2574 dVAR;
7918f24d
NC
2575
2576 PERL_ARGS_ASSERT_INTUIT_MORE;
2577
3280af22 2578 if (PL_lex_brackets)
79072805
LW
2579 return TRUE;
2580 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2581 return TRUE;
2582 if (*s != '{' && *s != '[')
2583 return FALSE;
3280af22 2584 if (!PL_lex_inpat)
79072805
LW
2585 return TRUE;
2586
2587 /* In a pattern, so maybe we have {n,m}. */
2588 if (*s == '{') {
2589 s++;
2590 if (!isDIGIT(*s))
2591 return TRUE;
2592 while (isDIGIT(*s))
2593 s++;
2594 if (*s == ',')
2595 s++;
2596 while (isDIGIT(*s))
2597 s++;
2598 if (*s == '}')
2599 return FALSE;
2600 return TRUE;
2601
2602 }
2603
2604 /* On the other hand, maybe we have a character class */
2605
2606 s++;
2607 if (*s == ']' || *s == '^')
2608 return FALSE;
2609 else {
ffb4593c 2610 /* this is terrifying, and it works */
79072805
LW
2611 int weight = 2; /* let's weigh the evidence */
2612 char seen[256];
f27ffc4a 2613 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2614 const char * const send = strchr(s,']');
3280af22 2615 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2616
2617 if (!send) /* has to be an expression */
2618 return TRUE;
2619
2620 Zero(seen,256,char);
2621 if (*s == '$')
2622 weight -= 3;
2623 else if (isDIGIT(*s)) {
2624 if (s[1] != ']') {
2625 if (isDIGIT(s[1]) && s[2] == ']')
2626 weight -= 10;
2627 }
2628 else
2629 weight -= 100;
2630 }
2631 for (; s < send; s++) {
2632 last_un_char = un_char;
2633 un_char = (unsigned char)*s;
2634 switch (*s) {
2635 case '@':
2636 case '&':
2637 case '$':
2638 weight -= seen[un_char] * 10;
7e2040f0 2639 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2640 int len;
8903cb82 2641 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2642 len = (int)strlen(tmpbuf);
2643 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2644 weight -= 100;
2645 else
2646 weight -= 10;
2647 }
2648 else if (*s == '$' && s[1] &&
93a17b20
LW
2649 strchr("[#!%*<>()-=",s[1])) {
2650 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2651 weight -= 10;
2652 else
2653 weight -= 1;
2654 }
2655 break;
2656 case '\\':
2657 un_char = 254;
2658 if (s[1]) {
93a17b20 2659 if (strchr("wds]",s[1]))
79072805 2660 weight += 100;
10edeb5d 2661 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2662 weight += 1;
93a17b20 2663 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2664 weight += 40;
2665 else if (isDIGIT(s[1])) {
2666 weight += 40;
2667 while (s[1] && isDIGIT(s[1]))
2668 s++;
2669 }
2670 }
2671 else
2672 weight += 100;
2673 break;
2674 case '-':
2675 if (s[1] == '\\')
2676 weight += 50;
93a17b20 2677 if (strchr("aA01! ",last_un_char))
79072805 2678 weight += 30;
93a17b20 2679 if (strchr("zZ79~",s[1]))
79072805 2680 weight += 30;
f27ffc4a
GS
2681 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2682 weight -= 5; /* cope with negative subscript */
79072805
LW
2683 break;
2684 default:
3792a11b
NC
2685 if (!isALNUM(last_un_char)
2686 && !(last_un_char == '$' || last_un_char == '@'
2687 || last_un_char == '&')
2688 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2689 char *d = tmpbuf;
2690 while (isALPHA(*s))
2691 *d++ = *s++;
2692 *d = '\0';
5458a98a 2693 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2694 weight -= 150;
2695 }
2696 if (un_char == last_un_char + 1)
2697 weight += 5;
2698 weight -= seen[un_char];
2699 break;
2700 }
2701 seen[un_char]++;
2702 }
2703 if (weight >= 0) /* probably a character class */
2704 return FALSE;
2705 }
2706
2707 return TRUE;
2708}
ffed7fef 2709
ffb4593c
NT
2710/*
2711 * S_intuit_method
2712 *
2713 * Does all the checking to disambiguate
2714 * foo bar
2715 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2716 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2717 *
2718 * First argument is the stuff after the first token, e.g. "bar".
2719 *
2720 * Not a method if bar is a filehandle.
2721 * Not a method if foo is a subroutine prototyped to take a filehandle.
2722 * Not a method if it's really "Foo $bar"
2723 * Method if it's "foo $bar"
2724 * Not a method if it's really "print foo $bar"
2725 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2726 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2727 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2728 * =>
2729 */
2730
76e3520e 2731STATIC int
62d55b22 2732S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2733{
97aff369 2734 dVAR;
a0d0e21e 2735 char *s = start + (*start == '$');
3280af22 2736 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2737 STRLEN len;
2738 GV* indirgv;
5db06880
NC
2739#ifdef PERL_MAD
2740 int soff;
2741#endif
a0d0e21e 2742
7918f24d
NC
2743 PERL_ARGS_ASSERT_INTUIT_METHOD;
2744
a0d0e21e 2745 if (gv) {
62d55b22 2746 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2747 return 0;
62d55b22
NC
2748 if (cv) {
2749 if (SvPOK(cv)) {
2750 const char *proto = SvPVX_const(cv);
2751 if (proto) {
2752 if (*proto == ';')
2753 proto++;
2754 if (*proto == '*')
2755 return 0;
2756 }
b6c543e3
IZ
2757 }
2758 } else
c35e046a 2759 gv = NULL;
a0d0e21e 2760 }
8903cb82 2761 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2762 /* start is the beginning of the possible filehandle/object,
2763 * and s is the end of it
2764 * tmpbuf is a copy of it
2765 */
2766
a0d0e21e 2767 if (*start == '$') {
3ef1310e
RGS
2768 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2769 isUPPER(*PL_tokenbuf))
a0d0e21e 2770 return 0;
5db06880
NC
2771#ifdef PERL_MAD
2772 len = start - SvPVX(PL_linestr);
2773#endif
29595ff2 2774 s = PEEKSPACE(s);
f0092767 2775#ifdef PERL_MAD
5db06880
NC
2776 start = SvPVX(PL_linestr) + len;
2777#endif
3280af22
NIS
2778 PL_bufptr = start;
2779 PL_expect = XREF;
a0d0e21e
LW
2780 return *s == '(' ? FUNCMETH : METHOD;
2781 }
5458a98a 2782 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2783 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2784 len -= 2;
2785 tmpbuf[len] = '\0';
5db06880
NC
2786#ifdef PERL_MAD
2787 soff = s - SvPVX(PL_linestr);
2788#endif
c3e0f903
GS
2789 goto bare_package;
2790 }
90e5519e 2791 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2792 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2793 return 0;
2794 /* filehandle or package name makes it a method */
da51bb9b 2795 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2796#ifdef PERL_MAD
2797 soff = s - SvPVX(PL_linestr);
2798#endif
29595ff2 2799 s = PEEKSPACE(s);
3280af22 2800 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2801 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2802 bare_package:
cd81e915 2803 start_force(PL_curforce);
9ded7720 2804 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2805 newSVpvn(tmpbuf,len));
9ded7720 2806 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2807 if (PL_madskills)
2808 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2809 PL_expect = XTERM;
a0d0e21e 2810 force_next(WORD);
3280af22 2811 PL_bufptr = s;
5db06880
NC
2812#ifdef PERL_MAD
2813 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2814#endif
a0d0e21e
LW
2815 return *s == '(' ? FUNCMETH : METHOD;
2816 }
2817 }
2818 return 0;
2819}
2820
16d20bd9 2821/* Encoded script support. filter_add() effectively inserts a
4e553d73 2822 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2823 * Note that the filter function only applies to the current source file
2824 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2825 *
2826 * The datasv parameter (which may be NULL) can be used to pass
2827 * private data to this instance of the filter. The filter function
2828 * can recover the SV using the FILTER_DATA macro and use it to
2829 * store private buffers and state information.
2830 *
2831 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2832 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2833 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2834 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2835 * private use must be set using malloc'd pointers.
2836 */
16d20bd9
AD
2837
2838SV *
864dbfa3 2839Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2840{
97aff369 2841 dVAR;
f4c556ac 2842 if (!funcp)
a0714e2c 2843 return NULL;
f4c556ac 2844
5486870f
DM
2845 if (!PL_parser)
2846 return NULL;
2847
3280af22
NIS
2848 if (!PL_rsfp_filters)
2849 PL_rsfp_filters = newAV();
16d20bd9 2850 if (!datasv)
561b68a9 2851 datasv = newSV(0);
862a34c6 2852 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2853 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2854 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2855 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2856 FPTR2DPTR(void *, IoANY(datasv)),
2857 SvPV_nolen(datasv)));
3280af22
NIS
2858 av_unshift(PL_rsfp_filters, 1);
2859 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2860 return(datasv);
2861}
4e553d73 2862
16d20bd9
AD
2863
2864/* Delete most recently added instance of this filter function. */
a0d0e21e 2865void
864dbfa3 2866Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2867{
97aff369 2868 dVAR;
e0c19803 2869 SV *datasv;
24801a4b 2870
7918f24d
NC
2871 PERL_ARGS_ASSERT_FILTER_DEL;
2872
33073adb 2873#ifdef DEBUGGING
55662e27
JH
2874 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2875 FPTR2DPTR(void*, funcp)));
33073adb 2876#endif
5486870f 2877 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2878 return;
2879 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2880 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2881 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2882 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2883 IoANY(datasv) = (void *)NULL;
3280af22 2884 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2885
16d20bd9
AD
2886 return;
2887 }
2888 /* we need to search for the correct entry and clear it */
cea2e8a9 2889 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2890}
2891
2892
1de9afcd
RGS
2893/* Invoke the idxth filter function for the current rsfp. */
2894/* maxlen 0 = read one text line */
16d20bd9 2895I32
864dbfa3 2896Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2897{
97aff369 2898 dVAR;
16d20bd9
AD
2899 filter_t funcp;
2900 SV *datasv = NULL;
f482118e
NC
2901 /* This API is bad. It should have been using unsigned int for maxlen.
2902 Not sure if we want to change the API, but if not we should sanity
2903 check the value here. */
39cd7a59
NC
2904 const unsigned int correct_length
2905 = maxlen < 0 ?
2906#ifdef PERL_MICRO
2907 0x7FFFFFFF
2908#else
2909 INT_MAX
2910#endif
2911 : maxlen;
e50aee73 2912
7918f24d
NC
2913 PERL_ARGS_ASSERT_FILTER_READ;
2914
5486870f 2915 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2916 return -1;
1de9afcd 2917 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2918 /* Provide a default input filter to make life easy. */
2919 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2920 DEBUG_P(PerlIO_printf(Perl_debug_log,
2921 "filter_read %d: from rsfp\n", idx));
f482118e 2922 if (correct_length) {
16d20bd9
AD
2923 /* Want a block */
2924 int len ;
f54cb97a 2925 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2926
2927 /* ensure buf_sv is large enough */
f482118e
NC
2928 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2929 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2930 correct_length)) <= 0) {
3280af22 2931 if (PerlIO_error(PL_rsfp))
37120919
AD
2932 return -1; /* error */
2933 else
2934 return 0 ; /* end of file */
2935 }
16d20bd9
AD
2936 SvCUR_set(buf_sv, old_len + len) ;
2937 } else {
2938 /* Want a line */
3280af22
NIS
2939 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2940 if (PerlIO_error(PL_rsfp))
37120919
AD
2941 return -1; /* error */
2942 else
2943 return 0 ; /* end of file */
2944 }
16d20bd9
AD
2945 }
2946 return SvCUR(buf_sv);
2947 }
2948 /* Skip this filter slot if filter has been deleted */
1de9afcd 2949 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2950 DEBUG_P(PerlIO_printf(Perl_debug_log,
2951 "filter_read %d: skipped (filter deleted)\n",
2952 idx));
f482118e 2953 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2954 }
2955 /* Get function pointer hidden within datasv */
8141890a 2956 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2957 DEBUG_P(PerlIO_printf(Perl_debug_log,
2958 "filter_read %d: via function %p (%s)\n",
ca0270c4 2959 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2960 /* Call function. The function is expected to */
2961 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2962 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2963 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2964}
2965
76e3520e 2966STATIC char *
cea2e8a9 2967S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2968{
97aff369 2969 dVAR;
7918f24d
NC
2970
2971 PERL_ARGS_ASSERT_FILTER_GETS;
2972
c39cd008 2973#ifdef PERL_CR_FILTER
3280af22 2974 if (!PL_rsfp_filters) {
c39cd008 2975 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2976 }
2977#endif
3280af22 2978 if (PL_rsfp_filters) {
55497cff 2979 if (!append)
2980 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2981 if (FILTER_READ(0, sv, 0) > 0)
2982 return ( SvPVX(sv) ) ;
2983 else
bd61b366 2984 return NULL ;
16d20bd9 2985 }
9d116dd7 2986 else
fd049845 2987 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2988}
2989
01ec43d0 2990STATIC HV *
9bde8eb0 2991S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2992{
97aff369 2993 dVAR;
def3634b
GS
2994 GV *gv;
2995
7918f24d
NC
2996 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2997
01ec43d0 2998 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2999 return PL_curstash;
3000
3001 if (len > 2 &&
3002 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3003 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3004 {
3005 return GvHV(gv); /* Foo:: */
def3634b
GS
3006 }
3007
3008 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3009 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3010 if (gv && GvCV(gv)) {
3011 SV * const sv = cv_const_sv(GvCV(gv));
3012 if (sv)
9bde8eb0 3013 pkgname = SvPV_const(sv, len);
def3634b
GS
3014 }
3015
9bde8eb0 3016 return gv_stashpvn(pkgname, len, 0);
def3634b 3017}
a0d0e21e 3018
e3f73d4e
RGS
3019/*
3020 * S_readpipe_override
3021 * Check whether readpipe() is overriden, and generates the appropriate
3022 * optree, provided sublex_start() is called afterwards.
3023 */
3024STATIC void
1d51329b 3025S_readpipe_override(pTHX)
e3f73d4e
RGS
3026{
3027 GV **gvp;
3028 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3029 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3030 if ((gv_readpipe
3031 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3032 ||
3033 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3034 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3035 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3036 {
3037 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3038 append_elem(OP_LIST,
3039 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3040 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3041 }
e3f73d4e
RGS
3042}
3043
5db06880
NC
3044#ifdef PERL_MAD
3045 /*
3046 * Perl_madlex
3047 * The intent of this yylex wrapper is to minimize the changes to the
3048 * tokener when we aren't interested in collecting madprops. It remains
3049 * to be seen how successful this strategy will be...
3050 */
3051
3052int
3053Perl_madlex(pTHX)
3054{
3055 int optype;
3056 char *s = PL_bufptr;
3057
cd81e915
NC
3058 /* make sure PL_thiswhite is initialized */
3059 PL_thiswhite = 0;
3060 PL_thismad = 0;
5db06880 3061
cd81e915 3062 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3063 if (PL_pending_ident)
3064 return S_pending_ident(aTHX);
3065
3066 /* previous token ate up our whitespace? */
cd81e915
NC
3067 if (!PL_lasttoke && PL_nextwhite) {
3068 PL_thiswhite = PL_nextwhite;
3069 PL_nextwhite = 0;
5db06880
NC
3070 }
3071
3072 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3073 PL_realtokenstart = -1;
3074 PL_thistoken = 0;
5db06880
NC
3075 optype = yylex();
3076 s = PL_bufptr;
cd81e915 3077 assert(PL_curforce < 0);
5db06880 3078
cd81e915
NC
3079 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3080 if (!PL_thistoken) {
3081 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3082 PL_thistoken = newSVpvs("");
5db06880 3083 else {
c35e046a 3084 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3085 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3086 }
3087 }
cd81e915
NC
3088 if (PL_thismad) /* install head */
3089 CURMAD('X', PL_thistoken);
5db06880
NC
3090 }
3091
3092 /* last whitespace of a sublex? */
cd81e915
NC
3093 if (optype == ')' && PL_endwhite) {
3094 CURMAD('X', PL_endwhite);
5db06880
NC
3095 }
3096
cd81e915 3097 if (!PL_thismad) {
5db06880
NC
3098
3099 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3100 if (!PL_thiswhite && !PL_endwhite && !optype) {
3101 sv_free(PL_thistoken);
3102 PL_thistoken = 0;
5db06880
NC
3103 return 0;
3104 }
3105
3106 /* put off final whitespace till peg */
3107 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3108 PL_nextwhite = PL_thiswhite;
3109 PL_thiswhite = 0;
5db06880 3110 }
cd81e915
NC
3111 else if (PL_thisopen) {
3112 CURMAD('q', PL_thisopen);
3113 if (PL_thistoken)
3114 sv_free(PL_thistoken);
3115 PL_thistoken = 0;
5db06880
NC
3116 }
3117 else {
3118 /* Store actual token text as madprop X */
cd81e915 3119 CURMAD('X', PL_thistoken);
5db06880
NC
3120 }
3121
cd81e915 3122 if (PL_thiswhite) {
5db06880 3123 /* add preceding whitespace as madprop _ */
cd81e915 3124 CURMAD('_', PL_thiswhite);
5db06880
NC
3125 }
3126
cd81e915 3127 if (PL_thisstuff) {
5db06880 3128 /* add quoted material as madprop = */
cd81e915 3129 CURMAD('=', PL_thisstuff);
5db06880
NC
3130 }
3131
cd81e915 3132 if (PL_thisclose) {
5db06880 3133 /* add terminating quote as madprop Q */
cd81e915 3134 CURMAD('Q', PL_thisclose);
5db06880
NC
3135 }
3136 }
3137
3138 /* special processing based on optype */
3139
3140 switch (optype) {
3141
3142 /* opval doesn't need a TOKEN since it can already store mp */
3143 case WORD:
3144 case METHOD:
3145 case FUNCMETH:
3146 case THING:
3147 case PMFUNC:
3148 case PRIVATEREF:
3149 case FUNC0SUB:
3150 case UNIOPSUB:
3151 case LSTOPSUB:
6154021b
RGS
3152 if (pl_yylval.opval)
3153 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3154 PL_thismad = 0;
5db06880
NC
3155 return optype;
3156
3157 /* fake EOF */
3158 case 0:
3159 optype = PEG;
cd81e915
NC
3160 if (PL_endwhite) {
3161 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3162 PL_endwhite = 0;
5db06880
NC
3163 }
3164 break;
3165
3166 case ']':
3167 case '}':
cd81e915 3168 if (PL_faketokens)
5db06880
NC
3169 break;
3170 /* remember any fake bracket that lexer is about to discard */
3171 if (PL_lex_brackets == 1 &&
3172 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3173 {
3174 s = PL_bufptr;
3175 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3176 s++;
3177 if (*s == '}') {
cd81e915
NC
3178 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3179 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3180 PL_thiswhite = 0;
5db06880
NC
3181 PL_bufptr = s - 1;
3182 break; /* don't bother looking for trailing comment */
3183 }
3184 else
3185 s = PL_bufptr;
3186 }
3187 if (optype == ']')
3188 break;
3189 /* FALLTHROUGH */
3190
3191 /* attach a trailing comment to its statement instead of next token */
3192 case ';':
cd81e915 3193 if (PL_faketokens)
5db06880
NC
3194 break;
3195 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3196 s = PL_bufptr;
3197 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3198 s++;
3199 if (*s == '\n' || *s == '#') {
3200 while (s < PL_bufend && *s != '\n')
3201 s++;
3202 if (s < PL_bufend)
3203 s++;
cd81e915
NC
3204 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3205 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3206 PL_thiswhite = 0;
5db06880
NC
3207 PL_bufptr = s;
3208 }
3209 }
3210 break;
3211
3212 /* pval */
3213 case LABEL:
3214 break;
3215
3216 /* ival */
3217 default:
3218 break;
3219
3220 }
3221
3222 /* Create new token struct. Note: opvals return early above. */
6154021b 3223 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3224 PL_thismad = 0;
5db06880
NC
3225 return optype;
3226}
3227#endif
3228
468aa647 3229STATIC char *
cc6ed77d 3230S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3231 dVAR;
7918f24d
NC
3232
3233 PERL_ARGS_ASSERT_TOKENIZE_USE;
3234
468aa647
RGS
3235 if (PL_expect != XSTATE)
3236 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3237 is_use ? "use" : "no"));
29595ff2 3238 s = SKIPSPACE1(s);
468aa647
RGS
3239 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3240 s = force_version(s, TRUE);
29595ff2 3241 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3242 start_force(PL_curforce);
9ded7720 3243 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3244 force_next(WORD);
3245 }
3246 else if (*s == 'v') {
3247 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3248 s = force_version(s, FALSE);
3249 }
3250 }
3251 else {
3252 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3253 s = force_version(s, FALSE);
3254 }
6154021b 3255 pl_yylval.ival = is_use;
468aa647
RGS
3256 return s;
3257}
748a9306 3258#ifdef DEBUGGING
27da23d5 3259 static const char* const exp_name[] =
09bef843 3260 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3261 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3262 };
748a9306 3263#endif
463ee0b2 3264
02aa26ce
NT
3265/*
3266 yylex
3267
3268 Works out what to call the token just pulled out of the input
3269 stream. The yacc parser takes care of taking the ops we return and
3270 stitching them into a tree.
3271
3272 Returns:
3273 PRIVATEREF
3274
3275 Structure:
3276 if read an identifier
3277 if we're in a my declaration
3278 croak if they tried to say my($foo::bar)
3279 build the ops for a my() declaration
3280 if it's an access to a my() variable
3281 are we in a sort block?
3282 croak if my($a); $a <=> $b
3283 build ops for access to a my() variable
3284 if in a dq string, and they've said @foo and we can't find @foo
3285 croak
3286 build ops for a bareword
3287 if we already built the token before, use it.
3288*/
3289
20141f0e 3290
dba4d153
JH
3291#ifdef __SC__
3292#pragma segment Perl_yylex
3293#endif
dba4d153 3294int
dba4d153 3295Perl_yylex(pTHX)
20141f0e 3296{
97aff369 3297 dVAR;
3afc138a 3298 register char *s = PL_bufptr;
378cc40b 3299 register char *d;
463ee0b2 3300 STRLEN len;
aa7440fb 3301 bool bof = FALSE;
a687059c 3302
10edeb5d
JH
3303 /* orig_keyword, gvp, and gv are initialized here because
3304 * jump to the label just_a_word_zero can bypass their
3305 * initialization later. */
3306 I32 orig_keyword = 0;
3307 GV *gv = NULL;
3308 GV **gvp = NULL;
3309
bbf60fe6 3310 DEBUG_T( {
396482e1 3311 SV* tmp = newSVpvs("");
b6007c36
DM
3312 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3313 (IV)CopLINE(PL_curcop),
3314 lex_state_names[PL_lex_state],
3315 exp_name[PL_expect],
3316 pv_display(tmp, s, strlen(s), 0, 60));
3317 SvREFCNT_dec(tmp);
bbf60fe6 3318 } );
02aa26ce 3319 /* check if there's an identifier for us to look at */
ba979b31 3320 if (PL_pending_ident)
bbf60fe6 3321 return REPORT(S_pending_ident(aTHX));
bbce6d69 3322
02aa26ce
NT
3323 /* no identifier pending identification */
3324
3280af22 3325 switch (PL_lex_state) {
79072805
LW
3326#ifdef COMMENTARY
3327 case LEX_NORMAL: /* Some compilers will produce faster */
3328 case LEX_INTERPNORMAL: /* code if we comment these out. */
3329 break;
3330#endif
3331
09bef843 3332 /* when we've already built the next token, just pull it out of the queue */
79072805 3333 case LEX_KNOWNEXT:
5db06880
NC
3334#ifdef PERL_MAD
3335 PL_lasttoke--;
6154021b 3336 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3337 if (PL_madskills) {
cd81e915 3338 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3339 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3340 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3341 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3342 PL_thismad->mad_val = 0;
3343 mad_free(PL_thismad);
3344 PL_thismad = 0;
5db06880
NC
3345 }
3346 }
3347 if (!PL_lasttoke) {
3348 PL_lex_state = PL_lex_defer;
3349 PL_expect = PL_lex_expect;
3350 PL_lex_defer = LEX_NORMAL;
3351 if (!PL_nexttoke[PL_lasttoke].next_type)
3352 return yylex();
3353 }
3354#else
3280af22 3355 PL_nexttoke--;
6154021b 3356 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3357 if (!PL_nexttoke) {
3358 PL_lex_state = PL_lex_defer;
3359 PL_expect = PL_lex_expect;
3360 PL_lex_defer = LEX_NORMAL;
463ee0b2 3361 }
5db06880
NC
3362#endif
3363#ifdef PERL_MAD
3364 /* FIXME - can these be merged? */
3365 return(PL_nexttoke[PL_lasttoke].next_type);
3366#else
bbf60fe6 3367 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3368#endif
79072805 3369
02aa26ce 3370 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3371 when we get here, PL_bufptr is at the \
02aa26ce 3372 */
79072805
LW
3373 case LEX_INTERPCASEMOD:
3374#ifdef DEBUGGING
3280af22 3375 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3376 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3377#endif
02aa26ce 3378 /* handle \E or end of string */
3280af22 3379 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3380 /* if at a \E */
3280af22 3381 if (PL_lex_casemods) {
f54cb97a 3382 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3383 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3384
3792a11b
NC
3385 if (PL_bufptr != PL_bufend
3386 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3387 PL_bufptr += 2;
3388 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3389#ifdef PERL_MAD
3390 if (PL_madskills)
6b29d1f5 3391 PL_thistoken = newSVpvs("\\E");
5db06880 3392#endif
a0d0e21e 3393 }
bbf60fe6 3394 return REPORT(')');
79072805 3395 }
5db06880
NC
3396#ifdef PERL_MAD
3397 while (PL_bufptr != PL_bufend &&
3398 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3399 if (!PL_thiswhite)
6b29d1f5 3400 PL_thiswhite = newSVpvs("");
cd81e915 3401 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3402 PL_bufptr += 2;
3403 }
3404#else
3280af22
NIS
3405 if (PL_bufptr != PL_bufend)
3406 PL_bufptr += 2;
5db06880 3407#endif
3280af22 3408 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3409 return yylex();
79072805
LW
3410 }
3411 else {
607df283 3412 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3413 "### Saw case modifier\n"); });
3280af22 3414 s = PL_bufptr + 1;
6e909404 3415 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3416#ifdef PERL_MAD
cd81e915 3417 if (!PL_thiswhite)
6b29d1f5 3418 PL_thiswhite = newSVpvs("");
cd81e915 3419 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3420#endif
89122651 3421 PL_bufptr = s + 3;
6e909404
JH
3422 PL_lex_state = LEX_INTERPCONCAT;
3423 return yylex();
a0d0e21e 3424 }
6e909404 3425 else {
90771dc0 3426 I32 tmp;
5db06880
NC
3427 if (!PL_madskills) /* when just compiling don't need correct */
3428 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3429 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3430 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3431 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3432 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3433 return REPORT(')');
6e909404
JH
3434 }
3435 if (PL_lex_casemods > 10)
3436 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3437 PL_lex_casestack[PL_lex_casemods++] = *s;
3438 PL_lex_casestack[PL_lex_casemods] = '\0';
3439 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3440 start_force(PL_curforce);
9ded7720 3441 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3442 force_next('(');
cd81e915 3443 start_force(PL_curforce);
6e909404 3444 if (*s == 'l')
9ded7720 3445 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3446 else if (*s == 'u')
9ded7720 3447 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3448 else if (*s == 'L')
9ded7720 3449 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3450 else if (*s == 'U')
9ded7720 3451 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3452 else if (*s == 'Q')
9ded7720 3453 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3454 else
3455 Perl_croak(aTHX_ "panic: yylex");
5db06880 3456 if (PL_madskills) {
a5849ce5
NC
3457 SV* const tmpsv = newSVpvs("\\ ");
3458 /* replace the space with the character we want to escape
3459 */
3460 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3461 curmad('_', tmpsv);
3462 }
6e909404 3463 PL_bufptr = s + 1;
a0d0e21e 3464 }
79072805 3465 force_next(FUNC);
3280af22
NIS
3466 if (PL_lex_starts) {
3467 s = PL_bufptr;
3468 PL_lex_starts = 0;
5db06880
NC
3469#ifdef PERL_MAD
3470 if (PL_madskills) {
cd81e915
NC
3471 if (PL_thistoken)
3472 sv_free(PL_thistoken);
6b29d1f5 3473 PL_thistoken = newSVpvs("");
5db06880
NC
3474 }
3475#endif
131b3ad0
DM
3476 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3477 if (PL_lex_casemods == 1 && PL_lex_inpat)
3478 OPERATOR(',');
3479 else
3480 Aop(OP_CONCAT);
79072805
LW
3481 }
3482 else
cea2e8a9 3483 return yylex();
79072805
LW
3484 }
3485
55497cff 3486 case LEX_INTERPPUSH:
bbf60fe6 3487 return REPORT(sublex_push());
55497cff 3488
79072805 3489 case LEX_INTERPSTART:
3280af22 3490 if (PL_bufptr == PL_bufend)
bbf60fe6 3491 return REPORT(sublex_done());
607df283 3492 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3493 "### Interpolated variable\n"); });
3280af22
NIS
3494 PL_expect = XTERM;
3495 PL_lex_dojoin = (*PL_bufptr == '@');
3496 PL_lex_state = LEX_INTERPNORMAL;
3497 if (PL_lex_dojoin) {
cd81e915 3498 start_force(PL_curforce);
9ded7720 3499 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3500 force_next(',');
cd81e915 3501 start_force(PL_curforce);
a0d0e21e 3502 force_ident("\"", '$');
cd81e915 3503 start_force(PL_curforce);
9ded7720 3504 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3505 force_next('$');
cd81e915 3506 start_force(PL_curforce);
9ded7720 3507 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3508 force_next('(');
cd81e915 3509 start_force(PL_curforce);
9ded7720 3510 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3511 force_next(FUNC);
3512 }
3280af22
NIS
3513 if (PL_lex_starts++) {
3514 s = PL_bufptr;
5db06880
NC
3515#ifdef PERL_MAD
3516 if (PL_madskills) {
cd81e915
NC
3517 if (PL_thistoken)
3518 sv_free(PL_thistoken);
6b29d1f5 3519 PL_thistoken = newSVpvs("");
5db06880
NC
3520 }
3521#endif
131b3ad0
DM
3522 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3523 if (!PL_lex_casemods && PL_lex_inpat)
3524 OPERATOR(',');
3525 else
3526 Aop(OP_CONCAT);
79072805 3527 }
cea