This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: perl 5.7.3 + XS lvalue subs
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
acfe0abc 30static void restore_rsfp(pTHX_ void *f);
6e3aabd6 31#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
32static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
39e02b42
JH
39#ifdef USE_UTF8_SCRIPTS
40# define UTF (!IN_BYTES)
2b9d42f0 41#else
39e02b42
JH
42# ifdef EBCDIC /* For now 'use utf8' does not affect tokenizer on EBCDIC */
43# define UTF (PL_linestr && DO_UTF8(PL_linestr))
44# else
45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
46# endif
2b9d42f0 47#endif
a0ed51b3 48
61f0cdd9 49/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
bf4acbe4
GS
53/* On MacOS, respect nonbreaking spaces */
54#ifdef MACOS_TRADITIONAL
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
56#else
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58#endif
59
ffb4593c
NT
60/* LEX_* are values for PL_lex_state, the state of the lexer.
61 * They are arranged oddly so that the guard on the switch statement
79072805
LW
62 * can get by with a single comparison (if the compiler is smart enough).
63 */
64
fb73857a
PP
65/* #define LEX_NOTPARSING 11 is done in perl.h. */
66
55497cff
PP
67#define LEX_NORMAL 10
68#define LEX_INTERPNORMAL 9
69#define LEX_INTERPCASEMOD 8
70#define LEX_INTERPPUSH 7
71#define LEX_INTERPSTART 6
72#define LEX_INTERPEND 5
73#define LEX_INTERPENDMAYBE 4
74#define LEX_INTERPCONCAT 3
75#define LEX_INTERPCONST 2
76#define LEX_FORMLINE 1
77#define LEX_KNOWNEXT 0
79072805 78
79072805
LW
79#ifdef ff_next
80#undef ff_next
d48672a2
LW
81#endif
82
a1a0e61e 83#ifdef USE_PURE_BISON
dba4d153
JH
84# ifndef YYMAXLEVEL
85# define YYMAXLEVEL 100
86# endif
20141f0e
RI
87YYSTYPE* yylval_pointer[YYMAXLEVEL];
88int* yychar_pointer[YYMAXLEVEL];
6f202aea 89int yyactlevel = -1;
22c35a8c
GS
90# undef yylval
91# undef yychar
20141f0e
RI
92# define yylval (*yylval_pointer[yyactlevel])
93# define yychar (*yychar_pointer[yyactlevel])
94# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 95# undef yylex
dba4d153 96# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
97#endif
98
79072805 99#include "keywords.h"
fe14fcc3 100
ffb4593c
NT
101/* CLINE is a macro that ensures PL_copline has a sane value */
102
ae986130
LW
103#ifdef CLINE
104#undef CLINE
105#endif
57843af0 106#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 107
ffb4593c
NT
108/*
109 * Convenience functions to return different tokens and prime the
9cbb5ea2 110 * lexer for the next token. They all take an argument.
ffb4593c
NT
111 *
112 * TOKEN : generic token (used for '(', DOLSHARP, etc)
113 * OPERATOR : generic operator
114 * AOPERATOR : assignment operator
115 * PREBLOCK : beginning the block after an if, while, foreach, ...
116 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
117 * PREREF : *EXPR where EXPR is not a simple identifier
118 * TERM : expression term
119 * LOOPX : loop exiting command (goto, last, dump, etc)
120 * FTST : file test operator
121 * FUN0 : zero-argument function
2d2e263d 122 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
123 * BOop : bitwise or or xor
124 * BAop : bitwise and
125 * SHop : shift operator
126 * PWop : power operator
9cbb5ea2 127 * PMop : pattern-matching operator
ffb4593c
NT
128 * Aop : addition-level operator
129 * Mop : multiplication-level operator
130 * Eop : equality-testing operator
e5edeb50 131 * Rop : relational operator <= != gt
ffb4593c
NT
132 *
133 * Also see LOP and lop() below.
134 */
135
075953c3
JH
136/* Note that REPORT() and REPORT2() will be expressions that supply
137 * their own trailing comma, not suitable for statements as such. */
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
139# define REPORT(x,retval) tokereport(x,s,(int)retval),
140# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 141#else
075953c3
JH
142# define REPORT(x,retval)
143# define REPORT2(x,retval)
998054bd
SC
144#endif
145
075953c3
JH
146#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
147#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
148#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
149#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
150#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
151#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
152#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
153#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
154#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
155#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
156#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
157#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
158#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
159#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
160#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
161#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
162#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
163#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
164#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
165#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 166
a687059c
LW
167/* This bit of chicanery makes a unary function followed by
168 * a parenthesis into a function with one argument, highest precedence.
169 */
2f3197b3 170#define UNI(f) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
3280af22
NIS
172 PL_expect = XTERM, \
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c
LW
176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
177
79072805 178#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 179 REPORT("uni",f) \
3280af22
NIS
180 PL_bufptr = s, \
181 PL_last_uni = PL_oldbufptr, \
79072805
LW
182 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
183
9f68db38 184/* grandfather return to old style */
3280af22 185#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 186
8fa7f367
JH
187#ifdef DEBUGGING
188
2d00ba3b 189STATIC void
61b2116b 190S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 191{
998054bd 192 DEBUG_T({
9c5ffd7c 193 SV* report = newSVpv(thing, 0);
29b291f7
RB
194 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
195 (IV)rv);
998054bd
SC
196
197 if (s - PL_bufptr > 0)
198 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
199 else {
200 if (PL_oldbufptr && *PL_oldbufptr)
201 sv_catpv(report, PL_tokenbuf);
202 }
203 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 204 });
998054bd
SC
205}
206
8fa7f367
JH
207#endif
208
ffb4593c
NT
209/*
210 * S_ao
211 *
212 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
213 * into an OP_ANDASSIGN or OP_ORASSIGN
214 */
215
76e3520e 216STATIC int
cea2e8a9 217S_ao(pTHX_ int toketype)
a0d0e21e 218{
3280af22
NIS
219 if (*PL_bufptr == '=') {
220 PL_bufptr++;
a0d0e21e
LW
221 if (toketype == ANDAND)
222 yylval.ival = OP_ANDASSIGN;
223 else if (toketype == OROR)
224 yylval.ival = OP_ORASSIGN;
225 toketype = ASSIGNOP;
226 }
227 return toketype;
228}
229
ffb4593c
NT
230/*
231 * S_no_op
232 * When Perl expects an operator and finds something else, no_op
233 * prints the warning. It always prints "<something> found where
234 * operator expected. It prints "Missing semicolon on previous line?"
235 * if the surprise occurs at the start of the line. "do you need to
236 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
237 * where the compiler doesn't know if foo is a method call or a function.
238 * It prints "Missing operator before end of line" if there's nothing
239 * after the missing operator, or "... before <...>" if there is something
240 * after the missing operator.
241 */
242
76e3520e 243STATIC void
cea2e8a9 244S_no_op(pTHX_ char *what, char *s)
463ee0b2 245{
3280af22
NIS
246 char *oldbp = PL_bufptr;
247 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 248
1189a94a
GS
249 if (!s)
250 s = oldbp;
07c798fb 251 else
1189a94a 252 PL_bufptr = s;
cea2e8a9 253 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 254 if (is_first)
cea2e8a9 255 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 256 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 257 char *t;
7e2040f0 258 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 259 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 260 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 261 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 262 }
07c798fb
HS
263 else {
264 assert(s >= oldbp);
cea2e8a9 265 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 266 }
3280af22 267 PL_bufptr = oldbp;
8990e307
LW
268}
269
ffb4593c
NT
270/*
271 * S_missingterm
272 * Complain about missing quote/regexp/heredoc terminator.
273 * If it's called with (char *)NULL then it cauterizes the line buffer.
274 * If we're in a delimited string and the delimiter is a control
275 * character, it's reformatted into a two-char sequence like ^C.
276 * This is fatal.
277 */
278
76e3520e 279STATIC void
cea2e8a9 280S_missingterm(pTHX_ char *s)
8990e307
LW
281{
282 char tmpbuf[3];
283 char q;
284 if (s) {
285 char *nl = strrchr(s,'\n');
d2719217 286 if (nl)
8990e307
LW
287 *nl = '\0';
288 }
9d116dd7
JH
289 else if (
290#ifdef EBCDIC
291 iscntrl(PL_multi_close)
292#else
293 PL_multi_close < 32 || PL_multi_close == 127
294#endif
295 ) {
8990e307 296 *tmpbuf = '^';
3280af22 297 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
298 s = "\\n";
299 tmpbuf[2] = '\0';
300 s = tmpbuf;
301 }
302 else {
3280af22 303 *tmpbuf = PL_multi_close;
8990e307
LW
304 tmpbuf[1] = '\0';
305 s = tmpbuf;
306 }
307 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 308 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 309}
79072805 310
ffb4593c
NT
311/*
312 * Perl_deprecate
ffb4593c
NT
313 */
314
79072805 315void
864dbfa3 316Perl_deprecate(pTHX_ char *s)
a0d0e21e 317{
599cee73 318 if (ckWARN(WARN_DEPRECATED))
9014280d 319 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
320}
321
12bcd1a6
PM
322void
323Perl_deprecate_old(pTHX_ char *s)
324{
325 /* This function should NOT be called for any new deprecated warnings */
326 /* Use Perl_deprecate instead */
327 /* */
328 /* It is here to maintain backward compatibility with the pre-5.8 */
329 /* warnings category hierarchy. The "deprecated" category used to */
330 /* live under the "syntax" category. It is now a top-level category */
331 /* in its own right. */
332
333 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
334 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
335 "Use of %s is deprecated", s);
336}
337
ffb4593c
NT
338/*
339 * depcom
9cbb5ea2 340 * Deprecate a comma-less variable list.
ffb4593c
NT
341 */
342
76e3520e 343STATIC void
cea2e8a9 344S_depcom(pTHX)
a0d0e21e 345{
12bcd1a6 346 deprecate_old("comma-less variable list");
a0d0e21e
LW
347}
348
ffb4593c 349/*
9cbb5ea2
GS
350 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
351 * utf16-to-utf8-reversed.
ffb4593c
NT
352 */
353
c39cd008
GS
354#ifdef PERL_CR_FILTER
355static void
356strip_return(SV *sv)
357{
358 register char *s = SvPVX(sv);
359 register char *e = s + SvCUR(sv);
360 /* outer loop optimized to do nothing if there are no CR-LFs */
361 while (s < e) {
362 if (*s++ == '\r' && *s == '\n') {
363 /* hit a CR-LF, need to copy the rest */
364 register char *d = s - 1;
365 *d++ = *s++;
366 while (s < e) {
367 if (*s == '\r' && s[1] == '\n')
368 s++;
369 *d++ = *s++;
370 }
371 SvCUR(sv) -= s - d;
372 return;
373 }
374 }
375}
a868473f 376
76e3520e 377STATIC I32
c39cd008 378S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 379{
c39cd008
GS
380 I32 count = FILTER_READ(idx+1, sv, maxlen);
381 if (count > 0 && !maxlen)
382 strip_return(sv);
383 return count;
a868473f
NIS
384}
385#endif
386
ffb4593c
NT
387/*
388 * Perl_lex_start
9cbb5ea2
GS
389 * Initialize variables. Uses the Perl save_stack to save its state (for
390 * recursive calls to the parser).
ffb4593c
NT
391 */
392
a0d0e21e 393void
864dbfa3 394Perl_lex_start(pTHX_ SV *line)
79072805 395{
8990e307
LW
396 char *s;
397 STRLEN len;
398
3280af22
NIS
399 SAVEI32(PL_lex_dojoin);
400 SAVEI32(PL_lex_brackets);
3280af22
NIS
401 SAVEI32(PL_lex_casemods);
402 SAVEI32(PL_lex_starts);
403 SAVEI32(PL_lex_state);
7766f137 404 SAVEVPTR(PL_lex_inpat);
3280af22 405 SAVEI32(PL_lex_inwhat);
18b09519
GS
406 if (PL_lex_state == LEX_KNOWNEXT) {
407 I32 toke = PL_nexttoke;
408 while (--toke >= 0) {
409 SAVEI32(PL_nexttype[toke]);
410 SAVEVPTR(PL_nextval[toke]);
411 }
412 SAVEI32(PL_nexttoke);
18b09519 413 }
57843af0 414 SAVECOPLINE(PL_curcop);
3280af22
NIS
415 SAVEPPTR(PL_bufptr);
416 SAVEPPTR(PL_bufend);
417 SAVEPPTR(PL_oldbufptr);
418 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
419 SAVEPPTR(PL_last_lop);
420 SAVEPPTR(PL_last_uni);
3280af22
NIS
421 SAVEPPTR(PL_linestart);
422 SAVESPTR(PL_linestr);
423 SAVEPPTR(PL_lex_brackstack);
424 SAVEPPTR(PL_lex_casestack);
c76ac1ee 425 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
426 SAVESPTR(PL_lex_stuff);
427 SAVEI32(PL_lex_defer);
09bef843 428 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 429 SAVESPTR(PL_lex_repl);
bebdddfc
GS
430 SAVEINT(PL_expect);
431 SAVEINT(PL_lex_expect);
3280af22
NIS
432
433 PL_lex_state = LEX_NORMAL;
434 PL_lex_defer = 0;
435 PL_expect = XSTATE;
436 PL_lex_brackets = 0;
3280af22
NIS
437 New(899, PL_lex_brackstack, 120, char);
438 New(899, PL_lex_casestack, 12, char);
439 SAVEFREEPV(PL_lex_brackstack);
440 SAVEFREEPV(PL_lex_casestack);
441 PL_lex_casemods = 0;
442 *PL_lex_casestack = '\0';
443 PL_lex_dojoin = 0;
444 PL_lex_starts = 0;
445 PL_lex_stuff = Nullsv;
446 PL_lex_repl = Nullsv;
447 PL_lex_inpat = 0;
76be56bc 448 PL_nexttoke = 0;
3280af22 449 PL_lex_inwhat = 0;
09bef843 450 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
451 PL_linestr = line;
452 if (SvREADONLY(PL_linestr))
453 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
454 s = SvPV(PL_linestr, len);
8990e307 455 if (len && s[len-1] != ';') {
3280af22
NIS
456 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
457 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
458 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 459 }
3280af22
NIS
460 SvTEMP_off(PL_linestr);
461 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
462 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 463 PL_last_lop = PL_last_uni = Nullch;
3280af22 464 PL_rsfp = 0;
79072805 465}
a687059c 466
ffb4593c
NT
467/*
468 * Perl_lex_end
9cbb5ea2
GS
469 * Finalizer for lexing operations. Must be called when the parser is
470 * done with the lexer.
ffb4593c
NT
471 */
472
463ee0b2 473void
864dbfa3 474Perl_lex_end(pTHX)
463ee0b2 475{
3280af22 476 PL_doextract = FALSE;
463ee0b2
LW
477}
478
ffb4593c
NT
479/*
480 * S_incline
481 * This subroutine has nothing to do with tilting, whether at windmills
482 * or pinball tables. Its name is short for "increment line". It
57843af0 483 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 484 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
485 * # line 500 "foo.pm"
486 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
487 */
488
76e3520e 489STATIC void
cea2e8a9 490S_incline(pTHX_ char *s)
463ee0b2
LW
491{
492 char *t;
493 char *n;
73659bf1 494 char *e;
463ee0b2 495 char ch;
463ee0b2 496
57843af0 497 CopLINE_inc(PL_curcop);
463ee0b2
LW
498 if (*s++ != '#')
499 return;
bf4acbe4 500 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
501 if (strnEQ(s, "line", 4))
502 s += 4;
503 else
504 return;
084592ab 505 if (SPACE_OR_TAB(*s))
73659bf1 506 s++;
4e553d73 507 else
73659bf1 508 return;
bf4acbe4 509 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
510 if (!isDIGIT(*s))
511 return;
512 n = s;
513 while (isDIGIT(*s))
514 s++;
bf4acbe4 515 while (SPACE_OR_TAB(*s))
463ee0b2 516 s++;
73659bf1 517 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 518 s++;
73659bf1
GS
519 e = t + 1;
520 }
463ee0b2 521 else {
463ee0b2 522 for (t = s; !isSPACE(*t); t++) ;
73659bf1 523 e = t;
463ee0b2 524 }
bf4acbe4 525 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
526 e++;
527 if (*e != '\n' && *e != '\0')
528 return; /* false alarm */
529
463ee0b2
LW
530 ch = *t;
531 *t = '\0';
f4dd75d9 532 if (t - s > 0) {
05ec9bb3 533 CopFILE_free(PL_curcop);
57843af0 534 CopFILE_set(PL_curcop, s);
f4dd75d9 535 }
463ee0b2 536 *t = ch;
57843af0 537 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
538}
539
ffb4593c
NT
540/*
541 * S_skipspace
542 * Called to gobble the appropriate amount and type of whitespace.
543 * Skips comments as well.
544 */
545
76e3520e 546STATIC char *
cea2e8a9 547S_skipspace(pTHX_ register char *s)
a687059c 548{
3280af22 549 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 550 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
551 s++;
552 return s;
553 }
554 for (;;) {
fd049845 555 STRLEN prevlen;
09bef843 556 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 557 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
558 while (s < PL_bufend && isSPACE(*s)) {
559 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
560 incline(s);
561 }
ffb4593c
NT
562
563 /* comment */
3280af22
NIS
564 if (s < PL_bufend && *s == '#') {
565 while (s < PL_bufend && *s != '\n')
463ee0b2 566 s++;
60e6418e 567 if (s < PL_bufend) {
463ee0b2 568 s++;
60e6418e
GS
569 if (PL_in_eval && !PL_rsfp) {
570 incline(s);
571 continue;
572 }
573 }
463ee0b2 574 }
ffb4593c
NT
575
576 /* only continue to recharge the buffer if we're at the end
577 * of the buffer, we're not reading from a source filter, and
578 * we're in normal lexing mode
579 */
09bef843
SB
580 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
581 PL_lex_state == LEX_FORMLINE)
463ee0b2 582 return s;
ffb4593c
NT
583
584 /* try to recharge the buffer */
9cbb5ea2
GS
585 if ((s = filter_gets(PL_linestr, PL_rsfp,
586 (prevlen = SvCUR(PL_linestr)))) == Nullch)
587 {
588 /* end of file. Add on the -p or -n magic */
3280af22
NIS
589 if (PL_minus_n || PL_minus_p) {
590 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
591 ";}continue{print or die qq(-p destination: $!\\n)" :
592 "");
3280af22
NIS
593 sv_catpv(PL_linestr,";}");
594 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
595 }
596 else
3280af22 597 sv_setpv(PL_linestr,";");
ffb4593c
NT
598
599 /* reset variables for next time we lex */
9cbb5ea2
GS
600 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
601 = SvPVX(PL_linestr);
3280af22 602 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 603 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
604
605 /* Close the filehandle. Could be from -P preprocessor,
606 * STDIN, or a regular file. If we were reading code from
607 * STDIN (because the commandline held no -e or filename)
608 * then we don't close it, we reset it so the code can
609 * read from STDIN too.
610 */
611
3280af22
NIS
612 if (PL_preprocess && !PL_in_eval)
613 (void)PerlProc_pclose(PL_rsfp);
614 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
615 PerlIO_clearerr(PL_rsfp);
8990e307 616 else
3280af22
NIS
617 (void)PerlIO_close(PL_rsfp);
618 PL_rsfp = Nullfp;
463ee0b2
LW
619 return s;
620 }
ffb4593c
NT
621
622 /* not at end of file, so we only read another line */
09bef843
SB
623 /* make corresponding updates to old pointers, for yyerror() */
624 oldprevlen = PL_oldbufptr - PL_bufend;
625 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
626 if (PL_last_uni)
627 oldunilen = PL_last_uni - PL_bufend;
628 if (PL_last_lop)
629 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
630 PL_linestart = PL_bufptr = s + prevlen;
631 PL_bufend = s + SvCUR(PL_linestr);
632 s = PL_bufptr;
09bef843
SB
633 PL_oldbufptr = s + oldprevlen;
634 PL_oldoldbufptr = s + oldoldprevlen;
635 if (PL_last_uni)
636 PL_last_uni = s + oldunilen;
637 if (PL_last_lop)
638 PL_last_lop = s + oldloplen;
a0d0e21e 639 incline(s);
ffb4593c
NT
640
641 /* debugger active and we're not compiling the debugger code,
642 * so store the line into the debugger's array of lines
643 */
3280af22 644 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
645 SV *sv = NEWSV(85,0);
646
647 sv_upgrade(sv, SVt_PVMG);
3280af22 648 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
649 (void)SvIOK_on(sv);
650 SvIVX(sv) = 0;
57843af0 651 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 652 }
463ee0b2 653 }
a687059c 654}
378cc40b 655
ffb4593c
NT
656/*
657 * S_check_uni
658 * Check the unary operators to ensure there's no ambiguity in how they're
659 * used. An ambiguous piece of code would be:
660 * rand + 5
661 * This doesn't mean rand() + 5. Because rand() is a unary operator,
662 * the +5 is its argument.
663 */
664
76e3520e 665STATIC void
cea2e8a9 666S_check_uni(pTHX)
ba106d47 667{
2f3197b3 668 char *s;
a0d0e21e 669 char *t;
2f3197b3 670
3280af22 671 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 672 return;
3280af22
NIS
673 while (isSPACE(*PL_last_uni))
674 PL_last_uni++;
7e2040f0 675 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 676 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 677 return;
0453d815 678 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 679 char ch = *s;
0453d815 680 *s = '\0';
9014280d 681 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 682 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
683 PL_last_uni);
684 *s = ch;
685 }
2f3197b3
LW
686}
687
ffb4593c
NT
688/*
689 * LOP : macro to build a list operator. Its behaviour has been replaced
690 * with a subroutine, S_lop() for which LOP is just another name.
691 */
692
a0d0e21e
LW
693#define LOP(f,x) return lop(f,x,s)
694
ffb4593c
NT
695/*
696 * S_lop
697 * Build a list operator (or something that might be one). The rules:
698 * - if we have a next token, then it's a list operator [why?]
699 * - if the next thing is an opening paren, then it's a function
700 * - else it's a list operator
701 */
702
76e3520e 703STATIC I32
a0be28da 704S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 705{
79072805 706 yylval.ival = f;
35c8bce7 707 CLINE;
075953c3 708 REPORT("lop", f)
3280af22
NIS
709 PL_expect = x;
710 PL_bufptr = s;
711 PL_last_lop = PL_oldbufptr;
712 PL_last_lop_op = f;
713 if (PL_nexttoke)
a0d0e21e 714 return LSTOP;
79072805
LW
715 if (*s == '(')
716 return FUNC;
717 s = skipspace(s);
718 if (*s == '(')
719 return FUNC;
720 else
721 return LSTOP;
722}
723
ffb4593c
NT
724/*
725 * S_force_next
9cbb5ea2 726 * When the lexer realizes it knows the next token (for instance,
ffb4593c 727 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
728 * to know what token to return the next time the lexer is called. Caller
729 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
730 * handles the token correctly.
ffb4593c
NT
731 */
732
4e553d73 733STATIC void
cea2e8a9 734S_force_next(pTHX_ I32 type)
79072805 735{
3280af22
NIS
736 PL_nexttype[PL_nexttoke] = type;
737 PL_nexttoke++;
738 if (PL_lex_state != LEX_KNOWNEXT) {
739 PL_lex_defer = PL_lex_state;
740 PL_lex_expect = PL_expect;
741 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
742 }
743}
744
ffb4593c
NT
745/*
746 * S_force_word
747 * When the lexer knows the next thing is a word (for instance, it has
748 * just seen -> and it knows that the next char is a word char, then
749 * it calls S_force_word to stick the next word into the PL_next lookahead.
750 *
751 * Arguments:
b1b65b59 752 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
753 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
754 * int check_keyword : if true, Perl checks to make sure the word isn't
755 * a keyword (do this if the word is a label, e.g. goto FOO)
756 * int allow_pack : if true, : characters will also be allowed (require,
757 * use, etc. do this)
9cbb5ea2 758 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
759 */
760
76e3520e 761STATIC char *
cea2e8a9 762S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 763{
463ee0b2
LW
764 register char *s;
765 STRLEN len;
4e553d73 766
463ee0b2
LW
767 start = skipspace(start);
768 s = start;
7e2040f0 769 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 770 (allow_pack && *s == ':') ||
15f0808c 771 (allow_initial_tick && *s == '\'') )
a0d0e21e 772 {
3280af22
NIS
773 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
774 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
775 return start;
776 if (token == METHOD) {
777 s = skipspace(s);
778 if (*s == '(')
3280af22 779 PL_expect = XTERM;
463ee0b2 780 else {
3280af22 781 PL_expect = XOPERATOR;
463ee0b2 782 }
79072805 783 }
3280af22
NIS
784 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
785 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
786 force_next(token);
787 }
788 return s;
789}
790
ffb4593c
NT
791/*
792 * S_force_ident
9cbb5ea2 793 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
794 * text only contains the "foo" portion. The first argument is a pointer
795 * to the "foo", and the second argument is the type symbol to prefix.
796 * Forces the next token to be a "WORD".
9cbb5ea2 797 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
798 */
799
76e3520e 800STATIC void
cea2e8a9 801S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
802{
803 if (s && *s) {
11343788 804 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 805 PL_nextval[PL_nexttoke].opval = o;
79072805 806 force_next(WORD);
748a9306 807 if (kind) {
11343788 808 o->op_private = OPpCONST_ENTERED;
55497cff
PP
809 /* XXX see note in pp_entereval() for why we forgo typo
810 warnings if the symbol must be introduced in an eval.
811 GSAR 96-10-12 */
3280af22 812 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
813 kind == '$' ? SVt_PV :
814 kind == '@' ? SVt_PVAV :
815 kind == '%' ? SVt_PVHV :
816 SVt_PVGV
817 );
748a9306 818 }
79072805
LW
819 }
820}
821
1571675a
GS
822NV
823Perl_str_to_version(pTHX_ SV *sv)
824{
825 NV retval = 0.0;
826 NV nshift = 1.0;
827 STRLEN len;
828 char *start = SvPVx(sv,len);
3aa33fe5 829 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
830 char *end = start + len;
831 while (start < end) {
ba210ebe 832 STRLEN skip;
1571675a
GS
833 UV n;
834 if (utf)
9041c2e3 835 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
836 else {
837 n = *(U8*)start;
838 skip = 1;
839 }
840 retval += ((NV)n)/nshift;
841 start += skip;
842 nshift *= 1000;
843 }
844 return retval;
845}
846
4e553d73 847/*
ffb4593c
NT
848 * S_force_version
849 * Forces the next token to be a version number.
e759cc13
RGS
850 * If the next token appears to be an invalid version number, (e.g. "v2b"),
851 * and if "guessing" is TRUE, then no new token is created (and the caller
852 * must use an alternative parsing method).
ffb4593c
NT
853 */
854
76e3520e 855STATIC char *
e759cc13 856S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
857{
858 OP *version = Nullop;
44dcb63b 859 char *d;
89bfa8cd
PP
860
861 s = skipspace(s);
862
44dcb63b 863 d = s;
dd629d5b 864 if (*d == 'v')
44dcb63b 865 d++;
44dcb63b 866 if (isDIGIT(*d)) {
e759cc13
RGS
867 while (isDIGIT(*d) || *d == '_' || *d == '.')
868 d++;
9f3d182e 869 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 870 SV *ver;
b73d6f50 871 s = scan_num(s, &yylval);
89bfa8cd 872 version = yylval.opval;
dd629d5b
GS
873 ver = cSVOPx(version)->op_sv;
874 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 875 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
876 SvNVX(ver) = str_to_version(ver);
877 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 878 }
89bfa8cd 879 }
e759cc13
RGS
880 else if (guessing)
881 return s;
89bfa8cd
PP
882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
4e553d73 886 force_next(WORD);
89bfa8cd 887
e759cc13 888 return s;
89bfa8cd
PP
889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
be4731d2 920 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
922 if (SvUTF8(sv))
923 SvUTF8_on(pv);
924 }
79072805
LW
925 while (s < send) {
926 if (*s == '\\') {
a0d0e21e 927 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
928 s++; /* all that, just for this */
929 }
930 *d++ = *s++;
931 }
932 *d = '\0';
463ee0b2 933 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 934 finish:
3280af22 935 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 936 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
937 return sv;
938}
939
ffb4593c
NT
940/*
941 * Now come three functions related to double-quote context,
942 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
943 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
944 * interact with PL_lex_state, and create fake ( ... ) argument lists
945 * to handle functions and concatenation.
946 * They assume that whoever calls them will be setting up a fake
947 * join call, because each subthing puts a ',' after it. This lets
948 * "lower \luPpEr"
949 * become
950 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
951 *
952 * (I'm not sure whether the spurious commas at the end of lcfirst's
953 * arguments and join's arguments are created or not).
954 */
955
956/*
957 * S_sublex_start
958 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
959 *
960 * Pattern matching will set PL_lex_op to the pattern-matching op to
961 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
962 *
963 * OP_CONST and OP_READLINE are easy--just make the new op and return.
964 *
965 * Everything else becomes a FUNC.
966 *
967 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
968 * had an OP_CONST or OP_READLINE). This just sets us up for a
969 * call to S_sublex_push().
970 */
971
76e3520e 972STATIC I32
cea2e8a9 973S_sublex_start(pTHX)
79072805
LW
974{
975 register I32 op_type = yylval.ival;
79072805
LW
976
977 if (op_type == OP_NULL) {
3280af22
NIS
978 yylval.opval = PL_lex_op;
979 PL_lex_op = Nullop;
79072805
LW
980 return THING;
981 }
982 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 983 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
984
985 if (SvTYPE(sv) == SVt_PVIV) {
986 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
987 STRLEN len;
988 char *p;
989 SV *nsv;
990
991 p = SvPV(sv, len);
79cb57f6 992 nsv = newSVpvn(p, len);
01ec43d0
GS
993 if (SvUTF8(sv))
994 SvUTF8_on(nsv);
b3ac6de7
IZ
995 SvREFCNT_dec(sv);
996 sv = nsv;
4e553d73 997 }
b3ac6de7 998 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 999 PL_lex_stuff = Nullsv;
79072805
LW
1000 return THING;
1001 }
1002
3280af22
NIS
1003 PL_sublex_info.super_state = PL_lex_state;
1004 PL_sublex_info.sub_inwhat = op_type;
1005 PL_sublex_info.sub_op = PL_lex_op;
1006 PL_lex_state = LEX_INTERPPUSH;
55497cff 1007
3280af22
NIS
1008 PL_expect = XTERM;
1009 if (PL_lex_op) {
1010 yylval.opval = PL_lex_op;
1011 PL_lex_op = Nullop;
55497cff
PP
1012 return PMFUNC;
1013 }
1014 else
1015 return FUNC;
1016}
1017
ffb4593c
NT
1018/*
1019 * S_sublex_push
1020 * Create a new scope to save the lexing state. The scope will be
1021 * ended in S_sublex_done. Returns a '(', starting the function arguments
1022 * to the uc, lc, etc. found before.
1023 * Sets PL_lex_state to LEX_INTERPCONCAT.
1024 */
1025
76e3520e 1026STATIC I32
cea2e8a9 1027S_sublex_push(pTHX)
55497cff 1028{
f46d017c 1029 ENTER;
55497cff 1030
3280af22
NIS
1031 PL_lex_state = PL_sublex_info.super_state;
1032 SAVEI32(PL_lex_dojoin);
1033 SAVEI32(PL_lex_brackets);
3280af22
NIS
1034 SAVEI32(PL_lex_casemods);
1035 SAVEI32(PL_lex_starts);
1036 SAVEI32(PL_lex_state);
7766f137 1037 SAVEVPTR(PL_lex_inpat);
3280af22 1038 SAVEI32(PL_lex_inwhat);
57843af0 1039 SAVECOPLINE(PL_curcop);
3280af22 1040 SAVEPPTR(PL_bufptr);
8452ff4b 1041 SAVEPPTR(PL_bufend);
3280af22
NIS
1042 SAVEPPTR(PL_oldbufptr);
1043 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1044 SAVEPPTR(PL_last_lop);
1045 SAVEPPTR(PL_last_uni);
3280af22
NIS
1046 SAVEPPTR(PL_linestart);
1047 SAVESPTR(PL_linestr);
1048 SAVEPPTR(PL_lex_brackstack);
1049 SAVEPPTR(PL_lex_casestack);
1050
1051 PL_linestr = PL_lex_stuff;
1052 PL_lex_stuff = Nullsv;
1053
9cbb5ea2
GS
1054 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1055 = SvPVX(PL_linestr);
3280af22 1056 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1057 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1058 SAVEFREESV(PL_linestr);
1059
1060 PL_lex_dojoin = FALSE;
1061 PL_lex_brackets = 0;
3280af22
NIS
1062 New(899, PL_lex_brackstack, 120, char);
1063 New(899, PL_lex_casestack, 12, char);
1064 SAVEFREEPV(PL_lex_brackstack);
1065 SAVEFREEPV(PL_lex_casestack);
1066 PL_lex_casemods = 0;
1067 *PL_lex_casestack = '\0';
1068 PL_lex_starts = 0;
1069 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1070 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1071
1072 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1073 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1074 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1075 else
3280af22 1076 PL_lex_inpat = Nullop;
79072805 1077
55497cff 1078 return '(';
79072805
LW
1079}
1080
ffb4593c
NT
1081/*
1082 * S_sublex_done
1083 * Restores lexer state after a S_sublex_push.
1084 */
1085
76e3520e 1086STATIC I32
cea2e8a9 1087S_sublex_done(pTHX)
79072805 1088{
3280af22 1089 if (!PL_lex_starts++) {
9aa983d2
JH
1090 SV *sv = newSVpvn("",0);
1091 if (SvUTF8(PL_linestr))
1092 SvUTF8_on(sv);
3280af22 1093 PL_expect = XOPERATOR;
9aa983d2 1094 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1095 return THING;
1096 }
1097
3280af22
NIS
1098 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1099 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1100 return yylex();
79072805
LW
1101 }
1102
ffb4593c 1103 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1104 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1105 PL_linestr = PL_lex_repl;
1106 PL_lex_inpat = 0;
1107 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1108 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1109 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1110 SAVEFREESV(PL_linestr);
1111 PL_lex_dojoin = FALSE;
1112 PL_lex_brackets = 0;
3280af22
NIS
1113 PL_lex_casemods = 0;
1114 *PL_lex_casestack = '\0';
1115 PL_lex_starts = 0;
25da4f38 1116 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1117 PL_lex_state = LEX_INTERPNORMAL;
1118 PL_lex_starts++;
e9fa98b2
HS
1119 /* we don't clear PL_lex_repl here, so that we can check later
1120 whether this is an evalled subst; that means we rely on the
1121 logic to ensure sublex_done() is called again only via the
1122 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1123 }
e9fa98b2 1124 else {
3280af22 1125 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1126 PL_lex_repl = Nullsv;
1127 }
79072805 1128 return ',';
ffed7fef
LW
1129 }
1130 else {
f46d017c 1131 LEAVE;
3280af22
NIS
1132 PL_bufend = SvPVX(PL_linestr);
1133 PL_bufend += SvCUR(PL_linestr);
1134 PL_expect = XOPERATOR;
09bef843 1135 PL_sublex_info.sub_inwhat = 0;
79072805 1136 return ')';
ffed7fef
LW
1137 }
1138}
1139
02aa26ce
NT
1140/*
1141 scan_const
1142
1143 Extracts a pattern, double-quoted string, or transliteration. This
1144 is terrifying code.
1145
3280af22
NIS
1146 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1147 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1148 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1149
9b599b2a
GS
1150 Returns a pointer to the character scanned up to. Iff this is
1151 advanced from the start pointer supplied (ie if anything was
1152 successfully parsed), will leave an OP for the substring scanned
1153 in yylval. Caller must intuit reason for not parsing further
1154 by looking at the next characters herself.
1155
02aa26ce
NT
1156 In patterns:
1157 backslashes:
1158 double-quoted style: \r and \n
1159 regexp special ones: \D \s
1160 constants: \x3
1161 backrefs: \1 (deprecated in substitution replacements)
1162 case and quoting: \U \Q \E
1163 stops on @ and $, but not for $ as tail anchor
1164
1165 In transliterations:
1166 characters are VERY literal, except for - not at the start or end
1167 of the string, which indicates a range. scan_const expands the
1168 range to the full set of intermediate characters.
1169
1170 In double-quoted strings:
1171 backslashes:
1172 double-quoted style: \r and \n
1173 constants: \x3
1174 backrefs: \1 (deprecated)
1175 case and quoting: \U \Q \E
1176 stops on @ and $
1177
1178 scan_const does *not* construct ops to handle interpolated strings.
1179 It stops processing as soon as it finds an embedded $ or @ variable
1180 and leaves it to the caller to work out what's going on.
1181
1182 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1183
1184 $ in pattern could be $foo or could be tail anchor. Assumption:
1185 it's a tail anchor if $ is the last thing in the string, or if it's
1186 followed by one of ")| \n\t"
1187
1188 \1 (backreferences) are turned into $1
1189
1190 The structure of the code is
1191 while (there's a character to process) {
1192 handle transliteration ranges
1193 skip regexp comments
1194 skip # initiated comments in //x patterns
1195 check for embedded @foo
1196 check for embedded scalars
1197 if (backslash) {
1198 leave intact backslashes from leave (below)
1199 deprecate \1 in strings and sub replacements
1200 handle string-changing backslashes \l \U \Q \E, etc.
1201 switch (what was escaped) {
1202 handle - in a transliteration (becomes a literal -)
1203 handle \132 octal characters
1204 handle 0x15 hex characters
1205 handle \cV (control V)
1206 handle printf backslashes (\f, \r, \n, etc)
1207 } (end switch)
1208 } (end if backslash)
1209 } (end while character to read)
4e553d73 1210
02aa26ce
NT
1211*/
1212
76e3520e 1213STATIC char *
cea2e8a9 1214S_scan_const(pTHX_ char *start)
79072805 1215{
3280af22 1216 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1217 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1218 register char *s = start; /* start of the constant */
1219 register char *d = SvPVX(sv); /* destination for copies */
1220 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1221 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1222 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1223 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1224 UV uv;
1225
dff6d3cd 1226 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1227 PL_lex_inpat
4a2d328f 1228 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1229 : "";
79072805 1230
2b9d42f0
NIS
1231 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1232 /* If we are doing a trans and we know we want UTF8 set expectation */
1233 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1234 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1235 }
1236
1237
79072805 1238 while (s < send || dorange) {
02aa26ce 1239 /* get transliterations out of the way (they're most literal) */
3280af22 1240 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1241 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1242 if (dorange) {
1ba5c669
JH
1243 I32 i; /* current expanded character */
1244 I32 min; /* first character in range */
1245 I32 max; /* last character in range */
02aa26ce 1246
2b9d42f0 1247 if (has_utf8) {
8973db79
JH
1248 char *c = (char*)utf8_hop((U8*)d, -1);
1249 char *e = d++;
1250 while (e-- > c)
1251 *(e + 1) = *e;
25716404 1252 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1253 /* mark the range as done, and continue */
1254 dorange = FALSE;
1255 didrange = TRUE;
1256 continue;
1257 }
2b9d42f0 1258
02aa26ce 1259 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1260 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1261 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1262 d -= 2; /* eat the first char and the - */
1263
8ada0baa
JH
1264 min = (U8)*d; /* first char in range */
1265 max = (U8)d[1]; /* last char in range */
1266
c2e66d9e 1267 if (min > max) {
01ec43d0 1268 Perl_croak(aTHX_
1ba5c669
JH
1269 "Invalid [] range \"%c-%c\" in transliteration operator",
1270 (char)min, (char)max);
c2e66d9e
GS
1271 }
1272
c7f1f016 1273#ifdef EBCDIC
8ada0baa
JH
1274 if ((isLOWER(min) && isLOWER(max)) ||
1275 (isUPPER(min) && isUPPER(max))) {
1276 if (isLOWER(min)) {
1277 for (i = min; i <= max; i++)
1278 if (isLOWER(i))
db42d148 1279 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1280 } else {
1281 for (i = min; i <= max; i++)
1282 if (isUPPER(i))
db42d148 1283 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1284 }
1285 }
1286 else
1287#endif
1288 for (i = min; i <= max; i++)
1289 *d++ = i;
02aa26ce
NT
1290
1291 /* mark the range as done, and continue */
79072805 1292 dorange = FALSE;
01ec43d0 1293 didrange = TRUE;
79072805 1294 continue;
4e553d73 1295 }
02aa26ce
NT
1296
1297 /* range begins (ignore - as first or last char) */
79072805 1298 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1299 if (didrange) {
1fafa243 1300 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1301 }
2b9d42f0 1302 if (has_utf8) {
25716404 1303 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1304 s++;
1305 continue;
1306 }
79072805
LW
1307 dorange = TRUE;
1308 s++;
01ec43d0
GS
1309 }
1310 else {
1311 didrange = FALSE;
1312 }
79072805 1313 }
02aa26ce
NT
1314
1315 /* if we get here, we're not doing a transliteration */
1316
0f5d15d6
IZ
1317 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1318 except for the last char, which will be done separately. */
3280af22 1319 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1320 if (s[2] == '#') {
1321 while (s < send && *s != ')')
db42d148 1322 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1323 }
1324 else if (s[2] == '{' /* This should match regcomp.c */
1325 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1326 {
cc6b7395 1327 I32 count = 1;
0f5d15d6 1328 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1329 char c;
1330
d9f97599
GS
1331 while (count && (c = *regparse)) {
1332 if (c == '\\' && regparse[1])
1333 regparse++;
4e553d73 1334 else if (c == '{')
cc6b7395 1335 count++;
4e553d73 1336 else if (c == '}')
cc6b7395 1337 count--;
d9f97599 1338 regparse++;
cc6b7395 1339 }
5bdf89e7
IZ
1340 if (*regparse != ')') {
1341 regparse--; /* Leave one char for continuation. */
cc6b7395 1342 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1343 }
0f5d15d6 1344 while (s < regparse)
db42d148 1345 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1346 }
748a9306 1347 }
02aa26ce
NT
1348
1349 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1350 else if (*s == '#' && PL_lex_inpat &&
1351 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1352 while (s+1 < send && *s != '\n')
db42d148 1353 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1354 }
02aa26ce 1355
5d1d4326
JH
1356 /* check for embedded arrays
1357 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1358 */
7e2040f0 1359 else if (*s == '@' && s[1]
5d1d4326 1360 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1361 break;
02aa26ce
NT
1362
1363 /* check for embedded scalars. only stop if we're sure it's a
1364 variable.
1365 */
79072805 1366 else if (*s == '$') {
3280af22 1367 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1368 break;
6002328a 1369 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1370 break; /* in regexp, $ might be tail anchor */
1371 }
02aa26ce 1372
2b9d42f0
NIS
1373 /* End of else if chain - OP_TRANS rejoin rest */
1374
02aa26ce 1375 /* backslashes */
79072805
LW
1376 if (*s == '\\' && s+1 < send) {
1377 s++;
02aa26ce
NT
1378
1379 /* some backslashes we leave behind */
c9f97d15 1380 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1381 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1382 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1383 continue;
1384 }
02aa26ce
NT
1385
1386 /* deprecate \1 in strings and substitution replacements */
3280af22 1387 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1388 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1389 {
599cee73 1390 if (ckWARN(WARN_SYNTAX))
9014280d 1391 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1392 *--s = '$';
1393 break;
1394 }
02aa26ce
NT
1395
1396 /* string-change backslash escapes */
3280af22 1397 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1398 --s;
1399 break;
1400 }
02aa26ce
NT
1401
1402 /* if we get here, it's either a quoted -, or a digit */
79072805 1403 switch (*s) {
02aa26ce
NT
1404
1405 /* quoted - in transliterations */
79072805 1406 case '-':
3280af22 1407 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1408 *d++ = *s++;
1409 continue;
1410 }
1411 /* FALL THROUGH */
1412 default:
11b8faa4 1413 {
707afd92
MS
1414 if (ckWARN(WARN_MISC) &&
1415 isALNUM(*s) &&
1416 *s != '_')
9014280d 1417 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1418 "Unrecognized escape \\%c passed through",
1419 *s);
1420 /* default action is to copy the quoted character */
f9a63242 1421 goto default_action;
11b8faa4 1422 }
02aa26ce
NT
1423
1424 /* \132 indicates an octal constant */
79072805
LW
1425 case '0': case '1': case '2': case '3':
1426 case '4': case '5': case '6': case '7':
ba210ebe 1427 {
53305cf1
NC
1428 I32 flags = 0;
1429 STRLEN len = 3;
1430 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1431 s += len;
1432 }
012bcf8d 1433 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1434
1435 /* \x24 indicates a hex constant */
79072805 1436 case 'x':
a0ed51b3
LW
1437 ++s;
1438 if (*s == '{') {
1439 char* e = strchr(s, '}');
a4c04bdc
NC
1440 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1441 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1442 STRLEN len;
355860ce 1443
53305cf1 1444 ++s;
adaeee49 1445 if (!e) {
a0ed51b3 1446 yyerror("Missing right brace on \\x{}");
355860ce 1447 continue;
ba210ebe 1448 }
53305cf1
NC
1449 len = e - s;
1450 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1451 s = e + 1;
a0ed51b3
LW
1452 }
1453 else {
ba210ebe 1454 {
53305cf1 1455 STRLEN len = 2;
a4c04bdc 1456 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1457 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1458 s += len;
1459 }
012bcf8d
GS
1460 }
1461
1462 NUM_ESCAPE_INSERT:
1463 /* Insert oct or hex escaped character.
301d3d20 1464 * There will always enough room in sv since such
db42d148 1465 * escapes will be longer than any UTF-8 sequence
301d3d20 1466 * they can end up as. */
ba7cea30 1467
c7f1f016
NIS
1468 /* We need to map to chars to ASCII before doing the tests
1469 to cover EBCDIC
1470 */
c4d5f83a 1471 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1472 if (!has_utf8 && uv > 255) {
301d3d20
JH
1473 /* Might need to recode whatever we have
1474 * accumulated so far if it contains any
1475 * hibit chars.
1476 *
1477 * (Can't we keep track of that and avoid
1478 * this rescan? --jhi)
012bcf8d 1479 */
c7f1f016 1480 int hicount = 0;
63cd0674
NIS
1481 U8 *c;
1482 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1483 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1484 hicount++;
db42d148 1485 }
012bcf8d 1486 }
63cd0674 1487 if (hicount) {
db42d148
NIS
1488 STRLEN offset = d - SvPVX(sv);
1489 U8 *src, *dst;
1490 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1491 src = (U8 *)d - 1;
1492 dst = src+hicount;
1493 d += hicount;
1494 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1495 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1496 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1497 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1498 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1499 }
1500 else {
63cd0674 1501 *dst-- = *src;
012bcf8d 1502 }
c7f1f016 1503 src--;
012bcf8d
GS
1504 }
1505 }
1506 }
1507
9aa983d2 1508 if (has_utf8 || uv > 255) {
9041c2e3 1509 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1510 has_utf8 = TRUE;
f9a63242
JH
1511 if (PL_lex_inwhat == OP_TRANS &&
1512 PL_sublex_info.sub_op) {
1513 PL_sublex_info.sub_op->op_private |=
1514 (PL_lex_repl ? OPpTRANS_FROM_UTF
1515 : OPpTRANS_TO_UTF);
f9a63242 1516 }
012bcf8d 1517 }
a0ed51b3 1518 else {
012bcf8d 1519 *d++ = (char)uv;
a0ed51b3 1520 }
012bcf8d
GS
1521 }
1522 else {
c4d5f83a 1523 *d++ = (char) uv;
a0ed51b3 1524 }
79072805 1525 continue;
02aa26ce 1526
b239daa5 1527 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1528 case 'N':
55eda711 1529 ++s;
423cee85
JH
1530 if (*s == '{') {
1531 char* e = strchr(s, '}');
155aba94 1532 SV *res;
423cee85
JH
1533 STRLEN len;
1534 char *str;
4e553d73 1535
423cee85 1536 if (!e) {
5777a3f7 1537 yyerror("Missing right brace on \\N{}");
423cee85
JH
1538 e = s - 1;
1539 goto cont_scan;
1540 }
55eda711
JH
1541 res = newSVpvn(s + 1, e - s - 1);
1542 res = new_constant( Nullch, 0, "charnames",
1543 res, Nullsv, "\\N{...}" );
f9a63242
JH
1544 if (has_utf8)
1545 sv_utf8_upgrade(res);
423cee85 1546 str = SvPV(res,len);
1c47067b
JH
1547#ifdef EBCDIC_NEVER_MIND
1548 /* charnames uses pack U and that has been
1549 * recently changed to do the below uni->native
1550 * mapping, so this would be redundant (and wrong,
1551 * the code point would be doubly converted).
1552 * But leave this in just in case the pack U change
1553 * gets revoked, but the semantics is still
1554 * desireable for charnames. --jhi */
cddc7ef4
JH
1555 {
1556 UV uv = utf8_to_uvchr((U8*)str, 0);
1557
1558 if (uv < 0x100) {
1559 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1560
1561 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1562 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1563 str = SvPV(res, len);
1564 }
1565 }
1566#endif
89491803 1567 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1568 char *ostart = SvPVX(sv);
1569 SvCUR_set(sv, d - ostart);
1570 SvPOK_on(sv);
e4f3eed8 1571 *d = '\0';
f08d6ad9 1572 sv_utf8_upgrade(sv);
d2f449dd
SB
1573 /* this just broke our allocation above... */
1574 SvGROW(sv, send - start);
f08d6ad9 1575 d = SvPVX(sv) + SvCUR(sv);
89491803 1576 has_utf8 = TRUE;
f08d6ad9 1577 }
b239daa5 1578 if (len > e - s + 4) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1579 char *odest = SvPVX(sv);
1580
8973db79 1581 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1582 d = SvPVX(sv) + (d - odest);
1583 }
1584 Copy(str, d, len, char);
1585 d += len;
1586 SvREFCNT_dec(res);
1587 cont_scan:
1588 s = e + 1;
1589 }
1590 else
5777a3f7 1591 yyerror("Missing braces on \\N{}");
423cee85
JH
1592 continue;
1593
02aa26ce 1594 /* \c is a control character */
79072805
LW
1595 case 'c':
1596 s++;
ba210ebe
JH
1597 {
1598 U8 c = *s++;
c7f1f016
NIS
1599#ifdef EBCDIC
1600 if (isLOWER(c))
1601 c = toUPPER(c);
1602#endif
db42d148 1603 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1604 }
79072805 1605 continue;
02aa26ce
NT
1606
1607 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1608 case 'b':
db42d148 1609 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1610 break;
1611 case 'n':
db42d148 1612 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1613 break;
1614 case 'r':
db42d148 1615 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1616 break;
1617 case 'f':
db42d148 1618 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1619 break;
1620 case 't':
db42d148 1621 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1622 break;
34a3fe2a 1623 case 'e':
db42d148 1624 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1625 break;
1626 case 'a':
db42d148 1627 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1628 break;
02aa26ce
NT
1629 } /* end switch */
1630
79072805
LW
1631 s++;
1632 continue;
02aa26ce
NT
1633 } /* end if (backslash) */
1634
f9a63242 1635 default_action:
2b9d42f0
NIS
1636 /* If we started with encoded form, or already know we want it
1637 and then encode the next character */
1638 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1639 STRLEN len = 1;
1640 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1641 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1642 s += len;
1643 if (need > len) {
1644 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1645 STRLEN off = d - SvPVX(sv);
1646 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1647 }
1648 d = (char*)uvchr_to_utf8((U8*)d, uv);
1649 has_utf8 = TRUE;
1650 }
1651 else {
1652 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1653 }
02aa26ce
NT
1654 } /* while loop to process each character */
1655
1656 /* terminate the string and set up the sv */
79072805 1657 *d = '\0';
463ee0b2 1658 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1659 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1660 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1661
79072805 1662 SvPOK_on(sv);
9f4817db 1663 if (PL_encoding && !has_utf8) {
799ef3cb 1664 sv_recode_to_utf8(sv, PL_encoding);
9f4817db
JH
1665 has_utf8 = TRUE;
1666 }
2b9d42f0 1667 if (has_utf8) {
7e2040f0 1668 SvUTF8_on(sv);
2b9d42f0
NIS
1669 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1670 PL_sublex_info.sub_op->op_private |=
1671 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1672 }
1673 }
79072805 1674
02aa26ce 1675 /* shrink the sv if we allocated more than we used */
79072805
LW
1676 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1677 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1678 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1679 }
02aa26ce 1680
9b599b2a 1681 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1682 if (s > PL_bufptr) {
1683 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1684 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1685 sv, Nullsv,
4e553d73 1686 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1687 ? "tr"
3280af22 1688 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1689 ? "s"
1690 : "qq")));
79072805 1691 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1692 } else
8990e307 1693 SvREFCNT_dec(sv);
79072805
LW
1694 return s;
1695}
1696
ffb4593c
NT
1697/* S_intuit_more
1698 * Returns TRUE if there's more to the expression (e.g., a subscript),
1699 * FALSE otherwise.
ffb4593c
NT
1700 *
1701 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1702 *
1703 * ->[ and ->{ return TRUE
1704 * { and [ outside a pattern are always subscripts, so return TRUE
1705 * if we're outside a pattern and it's not { or [, then return FALSE
1706 * if we're in a pattern and the first char is a {
1707 * {4,5} (any digits around the comma) returns FALSE
1708 * if we're in a pattern and the first char is a [
1709 * [] returns FALSE
1710 * [SOMETHING] has a funky algorithm to decide whether it's a
1711 * character class or not. It has to deal with things like
1712 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1713 * anything else returns TRUE
1714 */
1715
9cbb5ea2
GS
1716/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1717
76e3520e 1718STATIC int
cea2e8a9 1719S_intuit_more(pTHX_ register char *s)
79072805 1720{
3280af22 1721 if (PL_lex_brackets)
79072805
LW
1722 return TRUE;
1723 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1724 return TRUE;
1725 if (*s != '{' && *s != '[')
1726 return FALSE;
3280af22 1727 if (!PL_lex_inpat)
79072805
LW
1728 return TRUE;
1729
1730 /* In a pattern, so maybe we have {n,m}. */
1731 if (*s == '{') {
1732 s++;
1733 if (!isDIGIT(*s))
1734 return TRUE;
1735 while (isDIGIT(*s))
1736 s++;
1737 if (*s == ',')
1738 s++;
1739 while (isDIGIT(*s))
1740 s++;
1741 if (*s == '}')
1742 return FALSE;
1743 return TRUE;
1744
1745 }
1746
1747 /* On the other hand, maybe we have a character class */
1748
1749 s++;
1750 if (*s == ']' || *s == '^')
1751 return FALSE;
1752 else {
ffb4593c 1753 /* this is terrifying, and it works */
79072805
LW
1754 int weight = 2; /* let's weigh the evidence */
1755 char seen[256];
f27ffc4a 1756 unsigned char un_char = 255, last_un_char;
93a17b20 1757 char *send = strchr(s,']');
3280af22 1758 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1759
1760 if (!send) /* has to be an expression */
1761 return TRUE;
1762
1763 Zero(seen,256,char);
1764 if (*s == '$')
1765 weight -= 3;
1766 else if (isDIGIT(*s)) {
1767 if (s[1] != ']') {
1768 if (isDIGIT(s[1]) && s[2] == ']')
1769 weight -= 10;
1770 }
1771 else
1772 weight -= 100;
1773 }
1774 for (; s < send; s++) {
1775 last_un_char = un_char;
1776 un_char = (unsigned char)*s;
1777 switch (*s) {
1778 case '@':
1779 case '&':
1780 case '$':
1781 weight -= seen[un_char] * 10;
7e2040f0 1782 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1783 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1784 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1785 weight -= 100;
1786 else
1787 weight -= 10;
1788 }
1789 else if (*s == '$' && s[1] &&
93a17b20
LW
1790 strchr("[#!%*<>()-=",s[1])) {
1791 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1792 weight -= 10;
1793 else
1794 weight -= 1;
1795 }
1796 break;
1797 case '\\':
1798 un_char = 254;
1799 if (s[1]) {
93a17b20 1800 if (strchr("wds]",s[1]))
79072805
LW
1801 weight += 100;
1802 else if (seen['\''] || seen['"'])
1803 weight += 1;
93a17b20 1804 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1805 weight += 40;
1806 else if (isDIGIT(s[1])) {
1807 weight += 40;
1808 while (s[1] && isDIGIT(s[1]))
1809 s++;
1810 }
1811 }
1812 else
1813 weight += 100;
1814 break;
1815 case '-':
1816 if (s[1] == '\\')
1817 weight += 50;
93a17b20 1818 if (strchr("aA01! ",last_un_char))
79072805 1819 weight += 30;
93a17b20 1820 if (strchr("zZ79~",s[1]))
79072805 1821 weight += 30;
f27ffc4a
GS
1822 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1823 weight -= 5; /* cope with negative subscript */
79072805
LW
1824 break;
1825 default:
93a17b20 1826 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1827 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1828 char *d = tmpbuf;
1829 while (isALPHA(*s))
1830 *d++ = *s++;
1831 *d = '\0';
1832 if (keyword(tmpbuf, d - tmpbuf))
1833 weight -= 150;
1834 }
1835 if (un_char == last_un_char + 1)
1836 weight += 5;
1837 weight -= seen[un_char];
1838 break;
1839 }
1840 seen[un_char]++;
1841 }
1842 if (weight >= 0) /* probably a character class */
1843 return FALSE;
1844 }
1845
1846 return TRUE;
1847}
ffed7fef 1848
ffb4593c
NT
1849/*
1850 * S_intuit_method
1851 *
1852 * Does all the checking to disambiguate
1853 * foo bar
1854 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1855 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1856 *
1857 * First argument is the stuff after the first token, e.g. "bar".
1858 *
1859 * Not a method if bar is a filehandle.
1860 * Not a method if foo is a subroutine prototyped to take a filehandle.
1861 * Not a method if it's really "Foo $bar"
1862 * Method if it's "foo $bar"
1863 * Not a method if it's really "print foo $bar"
1864 * Method if it's really "foo package::" (interpreted as package->foo)
1865 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1866 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1867 * =>
1868 */
1869
76e3520e 1870STATIC int
cea2e8a9 1871S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1872{
1873 char *s = start + (*start == '$');
3280af22 1874 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1875 STRLEN len;
1876 GV* indirgv;
1877
1878 if (gv) {
b6c543e3 1879 CV *cv;
a0d0e21e
LW
1880 if (GvIO(gv))
1881 return 0;
b6c543e3
IZ
1882 if ((cv = GvCVu(gv))) {
1883 char *proto = SvPVX(cv);
1884 if (proto) {
1885 if (*proto == ';')
1886 proto++;
1887 if (*proto == '*')
1888 return 0;
1889 }
1890 } else
a0d0e21e
LW
1891 gv = 0;
1892 }
8903cb82 1893 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1894 /* start is the beginning of the possible filehandle/object,
1895 * and s is the end of it
1896 * tmpbuf is a copy of it
1897 */
1898
a0d0e21e 1899 if (*start == '$') {
3280af22 1900 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1901 return 0;
1902 s = skipspace(s);
3280af22
NIS
1903 PL_bufptr = start;
1904 PL_expect = XREF;
a0d0e21e
LW
1905 return *s == '(' ? FUNCMETH : METHOD;
1906 }
1907 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1908 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1909 len -= 2;
1910 tmpbuf[len] = '\0';
1911 goto bare_package;
1912 }
1913 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1914 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1915 return 0;
1916 /* filehandle or package name makes it a method */
89bfa8cd 1917 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1918 s = skipspace(s);
3280af22 1919 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1920 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1921 bare_package:
3280af22 1922 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1923 newSVpvn(tmpbuf,len));
3280af22
NIS
1924 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1925 PL_expect = XTERM;
a0d0e21e 1926 force_next(WORD);
3280af22 1927 PL_bufptr = s;
a0d0e21e
LW
1928 return *s == '(' ? FUNCMETH : METHOD;
1929 }
1930 }
1931 return 0;
1932}
1933
ffb4593c
NT
1934/*
1935 * S_incl_perldb
1936 * Return a string of Perl code to load the debugger. If PERL5DB
1937 * is set, it will return the contents of that, otherwise a
1938 * compile-time require of perl5db.pl.
1939 */
1940
76e3520e 1941STATIC char*
cea2e8a9 1942S_incl_perldb(pTHX)
a0d0e21e 1943{
3280af22 1944 if (PL_perldb) {
76e3520e 1945 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1946
1947 if (pdb)
1948 return pdb;
91487cfc 1949 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1950 return "BEGIN { require 'perl5db.pl' }";
1951 }
1952 return "";
1953}
1954
1955
16d20bd9 1956/* Encoded script support. filter_add() effectively inserts a
4e553d73 1957 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1958 * Note that the filter function only applies to the current source file
1959 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1960 *
1961 * The datasv parameter (which may be NULL) can be used to pass
1962 * private data to this instance of the filter. The filter function
1963 * can recover the SV using the FILTER_DATA macro and use it to
1964 * store private buffers and state information.
1965 *
1966 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1967 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1968 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1969 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1970 * private use must be set using malloc'd pointers.
1971 */
16d20bd9
AD
1972
1973SV *
864dbfa3 1974Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1975{
f4c556ac
GS
1976 if (!funcp)
1977 return Nullsv;
1978
3280af22
NIS
1979 if (!PL_rsfp_filters)
1980 PL_rsfp_filters = newAV();
16d20bd9 1981 if (!datasv)
8c52afec 1982 datasv = NEWSV(255,0);
16d20bd9 1983 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1984 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1985 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1986 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 1987 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 1988 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
1989 av_unshift(PL_rsfp_filters, 1);
1990 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1991 return(datasv);
1992}
4e553d73 1993
16d20bd9
AD
1994
1995/* Delete most recently added instance of this filter function. */
a0d0e21e 1996void
864dbfa3 1997Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1998{
e0c19803 1999 SV *datasv;
fe5a182c 2000 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2001 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2002 return;
2003 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2004 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2005 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2006 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2007 IoANY(datasv) = (void *)NULL;
3280af22 2008 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2009
16d20bd9
AD
2010 return;
2011 }
2012 /* we need to search for the correct entry and clear it */
cea2e8a9 2013 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2014}
2015
2016
2017/* Invoke the n'th filter function for the current rsfp. */
2018I32
864dbfa3 2019Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2020
2021
8ac85365 2022 /* 0 = read one text line */
a0d0e21e 2023{
16d20bd9
AD
2024 filter_t funcp;
2025 SV *datasv = NULL;
e50aee73 2026
3280af22 2027 if (!PL_rsfp_filters)
16d20bd9 2028 return -1;
3280af22 2029 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2030 /* Provide a default input filter to make life easy. */
2031 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2032 DEBUG_P(PerlIO_printf(Perl_debug_log,
2033 "filter_read %d: from rsfp\n", idx));
4e553d73 2034 if (maxlen) {
16d20bd9
AD
2035 /* Want a block */
2036 int len ;
2037 int old_len = SvCUR(buf_sv) ;
2038
2039 /* ensure buf_sv is large enough */
2040 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2041 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2042 if (PerlIO_error(PL_rsfp))
37120919
AD
2043 return -1; /* error */
2044 else
2045 return 0 ; /* end of file */
2046 }
16d20bd9
AD
2047 SvCUR_set(buf_sv, old_len + len) ;
2048 } else {
2049 /* Want a line */
3280af22
NIS
2050 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2051 if (PerlIO_error(PL_rsfp))
37120919
AD
2052 return -1; /* error */
2053 else
2054 return 0 ; /* end of file */
2055 }
16d20bd9
AD
2056 }
2057 return SvCUR(buf_sv);
2058 }
2059 /* Skip this filter slot if filter has been deleted */
3280af22 2060 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2061 DEBUG_P(PerlIO_printf(Perl_debug_log,
2062 "filter_read %d: skipped (filter deleted)\n",
2063 idx));
16d20bd9
AD
2064 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2065 }
2066 /* Get function pointer hidden within datasv */
4755096e 2067 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2068 DEBUG_P(PerlIO_printf(Perl_debug_log,
2069 "filter_read %d: via function %p (%s)\n",
fe5a182c 2070 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2071 /* Call function. The function is expected to */
2072 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2073 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2074 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2075}
2076
76e3520e 2077STATIC char *
cea2e8a9 2078S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2079{
c39cd008 2080#ifdef PERL_CR_FILTER
3280af22 2081 if (!PL_rsfp_filters) {
c39cd008 2082 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2083 }
2084#endif
3280af22 2085 if (PL_rsfp_filters) {
16d20bd9 2086
55497cff
PP
2087 if (!append)
2088 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2089 if (FILTER_READ(0, sv, 0) > 0)
2090 return ( SvPVX(sv) ) ;
2091 else
2092 return Nullch ;
2093 }
9d116dd7 2094 else
fd049845 2095 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2096}
2097
01ec43d0
GS
2098STATIC HV *
2099S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2100{
2101 GV *gv;
2102
01ec43d0 2103 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2104 return PL_curstash;
2105
2106 if (len > 2 &&
2107 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2108 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2109 {
2110 return GvHV(gv); /* Foo:: */
def3634b
GS
2111 }
2112
2113 /* use constant CLASS => 'MyClass' */
2114 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2115 SV *sv;
2116 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2117 pkgname = SvPV_nolen(sv);
2118 }
2119 }
2120
2121 return gv_stashpv(pkgname, FALSE);
2122}
a0d0e21e 2123
748a9306
LW
2124#ifdef DEBUGGING
2125 static char* exp_name[] =
09bef843
SB
2126 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2127 "ATTRTERM", "TERMBLOCK"
2128 };
748a9306 2129#endif
463ee0b2 2130
02aa26ce
NT
2131/*
2132 yylex
2133
2134 Works out what to call the token just pulled out of the input
2135 stream. The yacc parser takes care of taking the ops we return and
2136 stitching them into a tree.
2137
2138 Returns:
2139 PRIVATEREF
2140
2141 Structure:
2142 if read an identifier
2143 if we're in a my declaration
2144 croak if they tried to say my($foo::bar)
2145 build the ops for a my() declaration
2146 if it's an access to a my() variable
2147 are we in a sort block?
2148 croak if my($a); $a <=> $b
2149 build ops for access to a my() variable
2150 if in a dq string, and they've said @foo and we can't find @foo
2151 croak
2152 build ops for a bareword
2153 if we already built the token before, use it.
2154*/
2155
dba4d153 2156#ifdef USE_PURE_BISON
864dbfa3 2157int
dba4d153 2158Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2159{
20141f0e
RI
2160 int r;
2161
6f202aea 2162 yyactlevel++;
20141f0e
RI
2163 yylval_pointer[yyactlevel] = lvalp;
2164 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2165 if (yyactlevel >= YYMAXLEVEL)
2166 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2167
dba4d153 2168 r = Perl_yylex(aTHX);
20141f0e 2169
d8ae6756
RI
2170 if (yyactlevel > 0)
2171 yyactlevel--;
20141f0e
RI
2172
2173 return r;
2174}
dba4d153 2175#endif
20141f0e 2176
dba4d153
JH
2177#ifdef __SC__
2178#pragma segment Perl_yylex
2179#endif
dba4d153 2180int
dba4d153 2181Perl_yylex(pTHX)
20141f0e 2182{
79072805 2183 register char *s;
378cc40b 2184 register char *d;
79072805 2185 register I32 tmp;
463ee0b2 2186 STRLEN len;
161b471a
NIS
2187 GV *gv = Nullgv;
2188 GV **gvp = 0;
aa7440fb 2189 bool bof = FALSE;
a687059c 2190
02aa26ce 2191 /* check if there's an identifier for us to look at */
ba979b31 2192 if (PL_pending_ident)
e930465f 2193 return S_pending_ident(aTHX);
bbce6d69 2194
02aa26ce
NT
2195 /* no identifier pending identification */
2196
3280af22 2197 switch (PL_lex_state) {
79072805
LW
2198#ifdef COMMENTARY
2199 case LEX_NORMAL: /* Some compilers will produce faster */
2200 case LEX_INTERPNORMAL: /* code if we comment these out. */
2201 break;
2202#endif
2203
09bef843 2204 /* when we've already built the next token, just pull it out of the queue */
79072805 2205 case LEX_KNOWNEXT:
3280af22
NIS
2206 PL_nexttoke--;
2207 yylval = PL_nextval[PL_nexttoke];
2208 if (!PL_nexttoke) {
2209 PL_lex_state = PL_lex_defer;
2210 PL_expect = PL_lex_expect;
2211 PL_lex_defer = LEX_NORMAL;
463ee0b2 2212 }
607df283 2213 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2214 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2215 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2216
3280af22 2217 return(PL_nexttype[PL_nexttoke]);
79072805 2218
02aa26ce 2219 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2220 when we get here, PL_bufptr is at the \
02aa26ce 2221 */
79072805
LW
2222 case LEX_INTERPCASEMOD:
2223#ifdef DEBUGGING
3280af22 2224 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2225 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2226#endif
02aa26ce 2227 /* handle \E or end of string */
3280af22 2228 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2229 char oldmod;
02aa26ce
NT
2230
2231 /* if at a \E */
3280af22
NIS
2232 if (PL_lex_casemods) {
2233 oldmod = PL_lex_casestack[--PL_lex_casemods];
2234 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2235
3280af22
NIS
2236 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2237 PL_bufptr += 2;
2238 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2239 }
79072805
LW
2240 return ')';
2241 }
3280af22
NIS
2242 if (PL_bufptr != PL_bufend)
2243 PL_bufptr += 2;
2244 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2245 return yylex();
79072805
LW
2246 }
2247 else {
607df283 2248 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2249 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2250 s = PL_bufptr + 1;
79072805
LW
2251 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2252 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2253 if (strchr("LU", *s) &&
3280af22 2254 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2255 {
3280af22 2256 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2257 return ')';
2258 }
3280af22
NIS
2259 if (PL_lex_casemods > 10) {
2260 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2261 if (newlb != PL_lex_casestack) {
a0d0e21e 2262 SAVEFREEPV(newlb);
3280af22 2263 PL_lex_casestack = newlb;
a0d0e21e
LW
2264 }
2265 }
3280af22
NIS
2266 PL_lex_casestack[PL_lex_casemods++] = *s;
2267 PL_lex_casestack[PL_lex_casemods] = '\0';
2268 PL_lex_state = LEX_INTERPCONCAT;
2269 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2270 force_next('(');
2271 if (*s == 'l')
3280af22 2272 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2273 else if (*s == 'u')
3280af22 2274 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2275 else if (*s == 'L')
3280af22 2276 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2277 else if (*s == 'U')
3280af22 2278 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2279 else if (*s == 'Q')
3280af22 2280 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2281 else
cea2e8a9 2282 Perl_croak(aTHX_ "panic: yylex");
3280af22 2283 PL_bufptr = s + 1;
79072805 2284 force_next(FUNC);
3280af22
NIS
2285 if (PL_lex_starts) {
2286 s = PL_bufptr;
2287 PL_lex_starts = 0;
79072805
LW
2288 Aop(OP_CONCAT);
2289 }
2290 else
cea2e8a9 2291 return yylex();
79072805
LW
2292 }
2293
55497cff
PP
2294 case LEX_INTERPPUSH:
2295 return sublex_push();
2296
79072805 2297 case LEX_INTERPSTART:
3280af22 2298 if (PL_bufptr == PL_bufend)
79072805 2299 return sublex_done();
607df283 2300 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2301 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2302 PL_expect = XTERM;
2303 PL_lex_dojoin = (*PL_bufptr == '@');
2304 PL_lex_state = LEX_INTERPNORMAL;
2305 if (PL_lex_dojoin) {
2306 PL_nextval[PL_nexttoke].ival = 0;
79072805 2307 force_next(',');
4d1ff10f 2308#ifdef USE_5005THREADS
533c011a
NIS
2309 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2310 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2311 force_next(PRIVATEREF);
2312#else
a0d0e21e 2313 force_ident("\"", '$');
4d1ff10f 2314#endif /* USE_5005THREADS */
3280af22 2315 PL_nextval[PL_nexttoke].ival = 0;
79072805 2316 force_next('$');
3280af22 2317 PL_nextval[PL_nexttoke].ival = 0;
79072805 2318 force_next('(');
3280af22 2319 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2320 force_next(FUNC);
2321 }
3280af22
NIS
2322 if (PL_lex_starts++) {
2323 s = PL_bufptr;
79072805
LW
2324 Aop(OP_CONCAT);
2325 }
cea2e8a9 2326 return yylex();
79072805
LW
2327
2328 case LEX_INTERPENDMAYBE:
3280af22
NIS
2329 if (intuit_more(PL_bufptr)) {
2330 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2331 break;
2332 }
2333 /* FALL THROUGH */
2334
2335 case LEX_INTERPEND:
3280af22
NIS
2336 if (PL_lex_dojoin) {
2337 PL_lex_dojoin = FALSE;
2338 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2339 return ')';
2340 }
43a16006 2341 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2342 && SvEVALED(PL_lex_repl))
43a16006 2343 {
e9fa98b2 2344 if (PL_bufptr != PL_bufend)
cea2e8a9 2345 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2346 PL_lex_repl = Nullsv;
2347 }
79072805
LW
2348 /* FALLTHROUGH */
2349 case LEX_INTERPCONCAT:
2350#ifdef DEBUGGING
3280af22 2351 if (PL_lex_brackets)
cea2e8a9 2352 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2353#endif
3280af22 2354 if (PL_bufptr == PL_bufend)
79072805
LW
2355 return sublex_done();
2356
3280af22
NIS
2357 if (SvIVX(PL_linestr) == '\'') {
2358 SV *sv = newSVsv(PL_linestr);
2359 if (!PL_lex_inpat)
76e3520e 2360 sv = tokeq(sv);
3280af22 2361 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2362 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2363 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2364 s = PL_bufend;
79072805
LW
2365 }
2366 else {
3280af22 2367 s = scan_const(PL_bufptr);
79072805 2368 if (*s == '\\')
3280af22 2369 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2370 else
3280af22 2371 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2372 }
2373
3280af22
NIS
2374 if (s != PL_bufptr) {
2375 PL_nextval[PL_nexttoke] = yylval;
2376 PL_expect = XTERM;
79072805 2377 force_next(THING);
3280af22 2378 if (PL_lex_starts++)
79072805
LW
2379 Aop(OP_CONCAT);
2380 else {
3280af22 2381 PL_bufptr = s;
cea2e8a9 2382 return yylex();
79072805
LW
2383 }
2384 }
2385
cea2e8a9 2386 return yylex();
a0d0e21e 2387 case LEX_FORMLINE:
3280af22
NIS
2388 PL_lex_state = LEX_NORMAL;
2389 s = scan_formline(PL_bufptr);
2390 if (!PL_lex_formbrack)
a0d0e21e
LW
2391 goto rightbracket;
2392 OPERATOR(';');
79072805
LW
2393 }
2394
3280af22
NIS
2395 s = PL_bufptr;
2396 PL_oldoldbufptr = PL_oldbufptr;
2397 PL_oldbufptr = s;
607df283 2398 DEBUG_T( {
bf49b057
GS
2399 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2400 exp_name[PL_expect], s);
5f80b19c 2401 } );
463ee0b2
LW
2402
2403 retry:
378cc40b
LW
2404 switch (*s) {
2405 default:
7e2040f0 2406 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2407 goto keylookup;
cea2e8a9 2408 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2409 case 4:
2410 case 26:
2411 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2412 case 0:
3280af22
NIS
2413 if (!PL_rsfp) {
2414 PL_last_uni = 0;
2415 PL_last_lop = 0;
2416 if (PL_lex_brackets)
d98d5fff 2417 yyerror("Missing right curly or square bracket");
4e553d73 2418 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2419 "### Tokener got EOF\n");
5f80b19c 2420 } );
79072805 2421 TOKEN(0);
463ee0b2 2422 }
3280af22 2423 if (s++ < PL_bufend)
a687059c 2424 goto retry; /* ignore stray nulls */
3280af22
NIS
2425 PL_last_uni = 0;
2426 PL_last_lop = 0;
2427 if (!PL_in_eval && !PL_preambled) {
2428 PL_preambled = TRUE;
2429 sv_setpv(PL_linestr,incl_perldb());
2430 if (SvCUR(PL_linestr))
2431 sv_catpv(PL_linestr,";");
2432 if (PL_preambleav){
2433 while(AvFILLp(PL_preambleav) >= 0) {
2434 SV *tmpsv = av_shift(PL_preambleav);
2435 sv_catsv(PL_linestr, tmpsv);
2436 sv_catpv(PL_linestr, ";");
91b7def8
PP
2437 sv_free(tmpsv);
2438 }
3280af22
NIS
2439 sv_free((SV*)PL_preambleav);
2440 PL_preambleav = NULL;
91b7def8 2441 }
3280af22
NIS
2442 if (PL_minus_n || PL_minus_p) {
2443 sv_catpv(PL_linestr, "LINE: while (<>) {");
2444 if (PL_minus_l)
2445 sv_catpv(PL_linestr,"chomp;");
2446 if (PL_minus_a) {
3280af22
NIS
2447 if (PL_minus_F) {
2448 if (strchr("/'\"", *PL_splitstr)
2449 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2450 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121
PP
2451 else {
2452 char delim;
2453 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2454 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2455 delim = *s;
75c72d73 2456 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2457 "q" + (delim == '\''), delim);
3280af22 2458 for (s = PL_splitstr; *s; s++) {
54310121 2459 if (*s == '\\')
3280af22
NIS
2460 sv_catpvn(PL_linestr, "\\", 1);
2461 sv_catpvn(PL_linestr, s, 1);
54310121 2462 }
cea2e8a9 2463 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2464 }
2304df62
AD
2465 }
2466 else
75c72d73 2467 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2468 }
79072805 2469 }
3280af22
NIS
2470 sv_catpv(PL_linestr, "\n");
2471 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2472 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2473 PL_last_lop = PL_last_uni = Nullch;
3280af22 2474 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2475 SV *sv = NEWSV(85,0);
2476
2477 sv_upgrade(sv, SVt_PVMG);
3280af22 2478 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2479 (void)SvIOK_on(sv);
2480 SvIVX(sv) = 0;
57843af0 2481 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2482 }
79072805 2483 goto retry;
a687059c 2484 }
e929a76b 2485 do {
aa7440fb 2486 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2487 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2488 fake_eof:
2489 if (PL_rsfp) {
2490 if (PL_preprocess && !PL_in_eval)
2491 (void)PerlProc_pclose(PL_rsfp);
2492 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2493 PerlIO_clearerr(PL_rsfp);
2494 else
2495 (void)PerlIO_close(PL_rsfp);
2496 PL_rsfp = Nullfp;
2497 PL_doextract = FALSE;
2498 }
2499 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2500 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2501 sv_catpv(PL_linestr,";}");
2502 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2503 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2504 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2505 PL_minus_n = PL_minus_p = 0;
2506 goto retry;
2507 }
2508 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2509 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2510 sv_setpv(PL_linestr,"");
2511 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2512 }
2513 /* if it looks like the start of a BOM, check if it in fact is */
2514 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2515#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2516# ifdef __GNU_LIBRARY__
2517# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2518# define FTELL_FOR_PIPE_IS_BROKEN
2519# endif
e3f494f1
JH
2520# else
2521# ifdef __GLIBC__
2522# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2523# define FTELL_FOR_PIPE_IS_BROKEN
2524# endif
2525# endif
226017aa
DD
2526# endif
2527#endif
2528#ifdef FTELL_FOR_PIPE_IS_BROKEN
2529 /* This loses the possibility to detect the bof
2530 * situation on perl -P when the libc5 is being used.
2531 * Workaround? Maybe attach some extra state to PL_rsfp?
2532 */
2533 if (!PL_preprocess)
7e28d3af 2534 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2535#else
7e28d3af 2536 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2537#endif
7e28d3af 2538 if (bof) {
3280af22 2539 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2540 s = swallow_bom((U8*)s);
e929a76b 2541 }
378cc40b 2542 }
3280af22 2543 if (PL_doextract) {
a0d0e21e
LW
2544 /* Incest with pod. */
2545 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2546 sv_setpv(PL_linestr, "");
2547 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2549 PL_last_lop = PL_last_uni = Nullch;
3280af22 2550 PL_doextract = FALSE;
a0d0e21e 2551 }
4e553d73 2552 }
463ee0b2 2553 incline(s);
3280af22
NIS
2554 } while (PL_doextract);
2555 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2556 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2557 SV *sv = NEWSV(85,0);
a687059c 2558
93a17b20 2559 sv_upgrade(sv, SVt_PVMG);
3280af22 2560 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2561 (void)SvIOK_on(sv);
2562 SvIVX(sv) = 0;
57843af0 2563 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2564 }
3280af22 2565 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2566 PL_last_lop = PL_last_uni = Nullch;
57843af0 2567 if (CopLINE(PL_curcop) == 1) {
3280af22 2568 while (s < PL_bufend && isSPACE(*s))
79072805 2569 s++;
a0d0e21e 2570 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2571 s++;
44a8e56a 2572 d = Nullch;
3280af22 2573 if (!PL_in_eval) {
44a8e56a
PP
2574 if (*s == '#' && *(s+1) == '!')
2575 d = s + 2;
2576#ifdef ALTERNATE_SHEBANG
2577 else {
2578 static char as[] = ALTERNATE_SHEBANG;
2579 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2580 d = s + (sizeof(as) - 1);
2581 }
2582#endif /* ALTERNATE_SHEBANG */
2583 }
2584 if (d) {
b8378b72 2585 char *ipath;
774d564b 2586 char *ipathend;
b8378b72 2587
774d564b 2588 while (isSPACE(*d))
b8378b72
CS
2589 d++;
2590 ipath = d;
774d564b
PP
2591 while (*d && !isSPACE(*d))
2592 d++;
2593 ipathend = d;
2594
2595#ifdef ARG_ZERO_IS_SCRIPT
2596 if (ipathend > ipath) {
2597 /*
2598 * HP-UX (at least) sets argv[0] to the script name,
2599 * which makes $^X incorrect. And Digital UNIX and Linux,
2600 * at least, set argv[0] to the basename of the Perl
2601 * interpreter. So, having found "#!", we'll set it right.
2602 */
ee2f7564 2603 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2604 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2605 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2606 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2607 SvSETMAGIC(x);
2608 }
774d564b 2609 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2610 }
774d564b 2611#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2612
2613 /*
2614 * Look for options.
2615 */
748a9306 2616 d = instr(s,"perl -");
84e30d1a 2617 if (!d) {
748a9306 2618 d = instr(s,"perl");
84e30d1a
GS
2619#if defined(DOSISH)
2620 /* avoid getting into infinite loops when shebang
2621 * line contains "Perl" rather than "perl" */
2622 if (!d) {
2623 for (d = ipathend-4; d >= ipath; --d) {
2624 if ((*d == 'p' || *d == 'P')
2625 && !ibcmp(d, "perl", 4))
2626 {
2627 break;
2628 }
2629 }
2630 if (d < ipath)
2631 d = Nullch;
2632 }
2633#endif
2634 }
44a8e56a
PP
2635#ifdef ALTERNATE_SHEBANG
2636 /*
2637 * If the ALTERNATE_SHEBANG on this system starts with a
2638 * character that can be part of a Perl expression, then if
2639 * we see it but not "perl", we're probably looking at the
2640 * start of Perl code, not a request to hand off to some
2641 * other interpreter. Similarly, if "perl" is there, but
2642 * not in the first 'word' of the line, we assume the line
2643 * contains the start of the Perl program.
44a8e56a
PP
2644 */
2645 if (d && *s != '#') {
774d564b 2646 char *c = ipath;
44a8e56a
PP
2647 while (*c && !strchr("; \t\r\n\f\v#", *c))
2648 c++;
2649 if (c < d)
2650 d = Nullch; /* "perl" not in first word; ignore */
2651 else
2652 *s = '#'; /* Don't try to parse shebang line */
2653 }
774d564b 2654#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2655#ifndef MACOS_TRADITIONAL
748a9306 2656 if (!d &&
44a8e56a 2657 *s == '#' &&
774d564b 2658 ipathend > ipath &&
3280af22 2659 !PL_minus_c &&
748a9306 2660 !instr(s,"indir") &&
3280af22 2661 instr(PL_origargv[0],"perl"))
748a9306 2662 {
9f68db38 2663 char **newargv;
9f68db38 2664
774d564b
PP
2665 *ipathend = '\0';
2666 s = ipathend + 1;
3280af22 2667 while (s < PL_bufend && isSPACE(*s))
9f68db38 2668 s++;
3280af22
NIS
2669 if (s < PL_bufend) {
2670 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2671 newargv[1] = s;
3280af22 2672 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2673 s++;
2674 *s = '\0';
3280af22 2675 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2676 }
2677 else
3280af22 2678 newargv = PL_origargv;
774d564b 2679 newargv[0] = ipath;
b4748376 2680 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2681 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2682 }
bf4acbe4 2683#endif
748a9306 2684 if (d) {
3280af22
NIS
2685 U32 oldpdb = PL_perldb;
2686 bool oldn = PL_minus_n;
2687 bool oldp = PL_minus_p;
748a9306
LW
2688
2689 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2690 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2691
2692 if (*d++ == '-') {
a11ec5a9 2693 bool switches_done = PL_doswitches;
8cc95fdb
PP
2694 do {
2695 if (*d == 'M' || *d == 'm') {
2696 char *m = d;
2697 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2698 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2699 (int)(d - m), m);
2700 }
2701 d = moreswitches(d);
2702 } while (d);
155aba94
GS
2703 if ((PERLDB_LINE && !oldpdb) ||
2704 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2705 /* if we have already added "LINE: while (<>) {",
2706 we must not do it again */
748a9306 2707 {
3280af22
NIS
2708 sv_setpv(PL_linestr, "");
2709 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2710 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2711 PL_last_lop = PL_last_uni = Nullch;
3280af22 2712 PL_preambled = FALSE;
84902520 2713 if (PERLDB_LINE)
3280af22 2714 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2715 goto retry;
2716 }
a11ec5a9
RGS
2717 if (PL_doswitches && !switches_done) {
2718 int argc = PL_origargc;
2719 char **argv = PL_origargv;
2720 do {
2721 argc--,argv++;
2722 } while (argc && argv[0][0] == '-' && argv[0][1]);
2723 init_argv_symbols(argc,argv);
2724 }
a0d0e21e 2725 }
79072805 2726 }
9f68db38 2727 }
79072805 2728 }
3280af22
NIS
2729 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2730 PL_bufptr = s;
2731 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2732 return yylex();
ae986130 2733 }
378cc40b 2734 goto retry;
4fdae800 2735 case '\r':
6a27c188 2736#ifdef PERL_STRICT_CR
cea2e8a9 2737 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2738 Perl_croak(aTHX_
cc507455 2739 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2740#endif
4fdae800 2741 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2742#ifdef MACOS_TRADITIONAL
2743 case '\312':
2744#endif
378cc40b
LW
2745 s++;
2746 goto retry;
378cc40b 2747 case '#':
e929a76b 2748 case '\n':
3280af22 2749 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2750 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2751 /* handle eval qq[#line 1 "foo"\n ...] */
2752 CopLINE_dec(PL_curcop);
2753 incline(s);
2754 }
3280af22 2755 d = PL_bufend;
a687059c 2756 while (s < d && *s != '\n')
378cc40b 2757 s++;
0f85fab0 2758 if (s < d)
378cc40b 2759 s++;
78c267c1 2760 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2761 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2762 incline(s);
3280af22
NIS
2763 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2764 PL_bufptr = s;
2765 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2766 return yylex();
a687059c 2767 }
378cc40b 2768 }
a687059c 2769 else {
378cc40b 2770 *s = '\0';
3280af22 2771 PL_bufend = s;
a687059c 2772 }
378cc40b
LW
2773 goto retry;
2774 case '-':
79072805 2775 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2776 I32 ftst = 0;
2777
378cc40b 2778 s++;
3280af22 2779 PL_bufptr = s;
748a9306
LW
2780 tmp = *s++;
2781
bf4acbe4 2782 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2783 s++;
2784
2785 if (strnEQ(s,"=>",2)) {
3280af22 2786 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2787 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2788 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2789 } );
748a9306
LW
2790 OPERATOR('-'); /* unary minus */
2791 }
3280af22 2792 PL_last_uni = PL_oldbufptr;
748a9306 2793 switch (tmp) {
e5edeb50
JH
2794 case 'r': ftst = OP_FTEREAD; break;
2795 case 'w': ftst = OP_FTEWRITE; break;
2796 case 'x': ftst = OP_FTEEXEC; break;
2797 case 'o': ftst = OP_FTEOWNED; break;
2798 case 'R': ftst = OP_FTRREAD; break;
2799 case 'W': ftst = OP_FTRWRITE; break;
2800 case 'X': ftst = OP_FTREXEC; break;
2801 case 'O': ftst = OP_FTROWNED; break;
2802 case 'e': ftst = OP_FTIS; break;
2803 case 'z': ftst = OP_FTZERO; break;
2804 case 's': ftst = OP_FTSIZE; break;
2805 case 'f': ftst = OP_FTFILE; break;
2806 case 'd': ftst = OP_FTDIR; break;
2807 case 'l': ftst = OP_FTLINK; break;
2808 case 'p': ftst = OP_FTPIPE; break;
2809 case 'S': ftst = OP_FTSOCK; break;
2810 case 'u': ftst = OP_FTSUID; break;
2811 case 'g': ftst = OP_FTSGID; break;
2812 case 'k': ftst = OP_FTSVTX; break;
2813 case 'b': ftst = OP_FTBLK; break;
2814 case 'c': ftst = OP_FTCHR; break;
2815 case 't': ftst = OP_FTTTY; break;
2816 case 'T': ftst = OP_FTTEXT; break;
2817 case 'B': ftst = OP_FTBINARY; break;
2818 case 'M': case 'A': case 'C':
2819 gv_fetchpv("\024",TRUE, SVt_PV);
2820 switch (tmp) {
2821 case 'M': ftst = OP_FTMTIME; break;
2822 case 'A': ftst = OP_FTATIME; break;
2823 case 'C': ftst = OP_FTCTIME; break;
2824 default: break;
2825 }
2826 break;
378cc40b 2827 default:
378cc40b
LW
2828 break;
2829 }
e5edeb50
JH
2830 if (ftst) {
2831 PL_last_lop_op = ftst;
4e553d73 2832 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2833 "### Saw file test %c\n", (int)ftst);
5f80b19c 2834 } );
e5edeb50
JH
2835 FTST(ftst);
2836 }
2837 else {
2838 /* Assume it was a minus followed by a one-letter named
2839 * subroutine call (or a -bareword), then. */
95c31fe3 2840 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2841 "### %c looked like a file test but was not\n",
2842 (int)ftst);
5f80b19c 2843 } );
e5edeb50
JH
2844 s -= 2;
2845 }
378cc40b 2846 }
a687059c
LW
2847 tmp = *s++;
2848 if (*s == tmp) {
2849 s++;
3280af22 2850 if (PL_expect == XOPERATOR)
79072805
LW
2851 TERM(POSTDEC);
2852 else
2853 OPERATOR(PREDEC);
2854 }
2855 else if (*s == '>') {
2856 s++;
2857 s = skipspace(s);
7e2040f0 2858 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2859 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2860 TOKEN(ARROW);
79072805 2861 }
748a9306
LW
2862 else if (*s == '$')
2863 OPERATOR(ARROW);
463ee0b2 2864 else
748a9306 2865 TERM(ARROW);
a687059c 2866 }
3280af22 2867 if (PL_expect == XOPERATOR)
79072805
LW
2868 Aop(OP_SUBTRACT);
2869 else {
3280af22 2870 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2871 check_uni();
79072805 2872 OPERATOR('-'); /* unary minus */
2f3197b3 2873 }
79072805 2874
378cc40b 2875 case '+':
a687059c
LW
2876 tmp = *s++;
2877 if (*s == tmp) {
378cc40b 2878 s++;
3280af22 2879 if (PL_expect == XOPERATOR)
79072805
LW
2880 TERM(POSTINC);
2881 else
2882 OPERATOR(PREINC);
378cc40b 2883 }
3280af22 2884 if (PL_expect == XOPERATOR)
79072805
LW
2885 Aop(OP_ADD);
2886 else {
3280af22 2887 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2888 check_uni();
a687059c 2889 OPERATOR('+');
2f3197b3 2890 }
a687059c 2891
378cc40b 2892 case '*':
3280af22
NIS
2893 if (PL_expect != XOPERATOR) {
2894 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2895 PL_expect = XOPERATOR;
2896 force_ident(PL_tokenbuf, '*');
2897 if (!*PL_tokenbuf)
a0d0e21e 2898 PREREF('*');
79072805 2899 TERM('*');
a687059c 2900 }
79072805
LW
2901 s++;
2902 if (*s == '*') {
a687059c 2903 s++;
79072805 2904 PWop(OP_POW);
a687059c 2905 }
79072805
LW
2906 Mop(OP_MULTIPLY);
2907
378cc40b 2908 case '%':
3280af22 2909 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2910 ++s;
2911 Mop(OP_MODULO);
a687059c 2912 }
3280af22
NIS
2913 PL_tokenbuf[0] = '%';
2914 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2915 if (!PL_tokenbuf[1]) {
2916 if (s == PL_bufend)
bbce6d69
PP
2917 yyerror("Final % should be \\% or %name");
2918 PREREF('%');
a687059c 2919 }
3280af22 2920 PL_pending_ident = '%';
bbce6d69 2921 TERM('%');
a687059c 2922
378cc40b 2923 case '^':
79072805 2924 s++;
a0d0e21e 2925 BOop(OP_BIT_XOR);
79072805 2926 case '[':
3280af22 2927 PL_lex_brackets++;
79072805 2928 /* FALL THROUGH */
378cc40b 2929 case '~':
378cc40b 2930 case ',':
378cc40b
LW
2931 tmp = *s++;
2932 OPERATOR(tmp);
a0d0e21e
LW
2933 case ':':
2934 if (s[1] == ':') {
2935 len = 0;
2936 goto just_a_word;
2937 }
2938 s++;
09bef843
SB
2939 switch (PL_expect) {
2940 OP *attrs;
2941 case XOPERATOR:
2942 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2943 break;
2944 PL_bufptr = s; /* update in case we back off */
2945 goto grabattrs;
2946 case XATTRBLOCK:
2947 PL_expect = XBLOCK;
2948 goto grabattrs;
2949 case XATTRTERM:
2950 PL_expect = XTERMBLOCK;
2951 grabattrs:
2952 s = skipspace(s);
2953 attrs = Nullop;
7e2040f0 2954 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2955 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2956 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2957 if (tmp < 0) tmp = -tmp;
2958 switch (tmp) {
2959 case KEY_or:
2960 case KEY_and:
2961 case KEY_for:
2962 case KEY_unless:
2963 case KEY_if:
2964 case KEY_while:
2965 case KEY_until:
2966 goto got_attrs;
2967 default:
2968 break;
2969 }
2970 }
09bef843
SB
2971 if (*d == '(') {
2972 d = scan_str(d,TRUE,TRUE);
2973 if (!d) {
09bef843
SB
2974 /* MUST advance bufptr here to avoid bogus
2975 "at end of line" context messages from yyerror().
2976 */
2977 PL_bufptr = s + len;
2978 yyerror("Unterminated attribute parameter in attribute list");
2979 if (attrs)
2980 op_free(attrs);
2981 return 0; /* EOF indicator */
2982 }
2983 }
2984 if (PL_lex_stuff) {
2985 SV *sv = newSVpvn(s, len);
2986 sv_catsv(sv, PL_lex_stuff);
2987 attrs = append_elem(OP_LIST, attrs,
2988 newSVOP(OP_CONST, 0, sv));
2989 SvREFCNT_dec(PL_lex_stuff);
2990 PL_lex_stuff = Nullsv;
2991 }
2992 else {
d3cea301
SB
2993 /* NOTE: any CV attrs applied here need to be part of
2994 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
2995 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
2996 CvLVALUE_on(PL_compcv);
2997 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
2998 CvLOCKED_on(PL_compcv);
2999 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3000 CvMETHOD_on(PL_compcv);
87ecf892 3001#ifdef USE_ITHREADS
d3cea301
SB
3002 else if (PL_in_my == KEY_our && len == 6 &&
3003 strnEQ(s, "unique", len))
7fb37951 3004 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3005#endif
78f9721b
SM
3006 /* After we've set the flags, it could be argued that
3007 we don't need to do the attributes.pm-based setting
3008 process, and shouldn't bother appending recognized
d3cea301
SB
3009 flags. To experiment with that, uncomment the
3010 following "else". (Note that's already been
3011 uncommented. That keeps the above-applied built-in
3012 attributes from being intercepted (and possibly
3013 rejected) by a package's attribute routines, but is
3014 justified by the performance win for the common case
3015 of applying only built-in attributes.) */
0256094b 3016 else
78f9721b
SM
3017 attrs = append_elem(OP_LIST, attrs,
3018 newSVOP(OP_CONST, 0,
3019 newSVpvn(s, len)));
09bef843
SB
3020 }
3021 s = skipspace(d);
0120eecf 3022 if (*s == ':' && s[1] != ':')
09bef843 3023 s = skipspace(s+1);
0120eecf
GS
3024 else if (s == d)
3025 break; /* require real whitespace or :'s */
09bef843 3026 }
f9829d6b
GS
3027 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3028 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3029 char q = ((*s == '\'') ? '"' : '\'');
3030 /* If here for an expression, and parsed no attrs, back off. */
3031 if (tmp == '=' && !attrs) {
3032 s = PL_bufptr;
3033 break;
3034 }
3035 /* MUST advance bufptr here to avoid bogus "at end of line"
3036 context messages from yyerror().
3037 */
3038 PL_bufptr = s;
3039 if (!*s)
3040 yyerror("Unterminated attribute list");
3041 else
3042 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3043 q, *s, q));
3044 if (attrs)
3045 op_free(attrs);
3046 OPERATOR(':');
3047 }
f9829d6b 3048 got_attrs:
09bef843
SB
3049 if (attrs) {
3050 PL_nextval[PL_nexttoke].opval = attrs;
3051 force_next(THING);
3052 }
3053 TOKEN(COLONATTR);
3054 }
a0d0e21e 3055 OPERATOR(':');
8990e307
LW
3056 case '(':
3057 s++;
3280af22
NIS
3058 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3059 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3060 else
3280af22 3061 PL_expect = XTERM;
a0d0e21e 3062 TOKEN('(');
378cc40b 3063 case ';':
f4dd75d9 3064 CLINE;
378cc40b
LW
3065 tmp = *s++;
3066 OPERATOR(tmp);
3067 case ')':
378cc40b 3068 tmp = *s++;
16d20bd9
AD
3069 s = skipspace(s);
3070 if (*s == '{')
3071 PREBLOCK(tmp);
378cc40b 3072 TERM(tmp);
79072805
LW
3073 case ']':
3074 s++;
3280af22 3075 if (PL_lex_brackets <= 0)
d98d5fff 3076 yyerror("Unmatched right square bracket");
463ee0b2 3077 else
3280af22
NIS
3078 --PL_lex_brackets;
3079 if (PL_lex_state == LEX_INTERPNORMAL) {
3080 if (PL_lex_brackets == 0) {
a0d0e21e 3081 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3082 PL_lex_state = LEX_INTERPEND;
79072805
LW
3083 }
3084 }
4633a7c4 3085 TERM(']');
79072805
LW
3086 case '{':
3087 leftbracket:
79072805 3088 s++;
3280af22
NIS
3089 if (PL_lex_brackets > 100) {
3090 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3091 if (newlb != PL_lex_brackstack) {
8990e307 3092 SAVEFREEPV(newlb);
3280af22 3093 PL_lex_brackstack = newlb;
8990e307
LW
3094 }
3095 }
3280af22 3096 switch (PL_expect) {
a0d0e21e 3097 case XTERM:
3280af22 3098 if (PL_lex_formbrack) {
a0d0e21e
LW
3099 s--;
3100 PRETERMBLOCK(DO);
3101 }
3280af22
NIS
3102 if (PL_oldoldbufptr == PL_last_lop)
3103 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3104 else
3280af22 3105 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3106 OPERATOR(HASHBRACK);
a0d0e21e 3107 case XOPERATOR:
bf4acbe4 3108 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3109 s++;
44a8e56a 3110 d = s;
3280af22
NIS
3111 PL_tokenbuf[0] = '\0';
3112 if (d < PL_bufend && *d == '-') {
3113 PL_tokenbuf[0] = '-';
44a8e56a 3114 d++;
bf4acbe4 3115 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3116 d++;
3117 }
7e2040f0 3118 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3119 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3120 FALSE, &len);
bf4acbe4 3121 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3122 d++;
3123 if (*d == '}') {
3280af22 3124 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3125 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3126 if (minus)
3127 force_next('-');
748a9306
LW
3128 }
3129 }
3130 /* FALL THROUGH */
09bef843 3131 case XATTRBLOCK:
748a9306 3132 case XBLOCK:
3280af22
NIS
3133 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3134 PL_expect = XSTATE;
a0d0e21e 3135 break;
09bef843 3136 case XATTRTERM:
a0d0e21e 3137 case XTERMBLOCK:
3280af22
NIS
3138 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3139 PL_expect = XSTATE;
a0d0e21e
LW
3140 break;
3141 default: {
3142 char *t;
3280af22
NIS
3143 if (PL_oldoldbufptr == PL_last_lop)
3144 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3145 else
3280af22 3146 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3147 s = skipspace(s);
8452ff4b
SB
3148 if (*s == '}') {
3149 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3150 PL_expect = XTERM;
3151 /* This hack is to get the ${} in the message. */
3152 PL_bufptr = s+1;
3153 yyerror("syntax error");
3154 break;
3155 }
a0d0e21e 3156 OPERATOR(HASHBRACK);
8452ff4b 3157 }
b8a4b1be
GS
3158 /* This hack serves to disambiguate a pair of curlies
3159 * as being a block or an anon hash. Normally, expectation
3160 * determines that, but in cases where we're not in a
3161 * position to expect anything in particular (like inside
3162 * eval"") we have to resolve the ambiguity. This code
3163 * covers the case where the first term in the curlies is a
3164 * quoted string. Most other cases need to be explicitly
3165 * disambiguated by prepending a `+' before the opening
3166 * curly in order to force resolution as an anon hash.
3167 *
3168 * XXX should probably propagate the outer expectation
3169 * into eval"" to rely less on this hack, but that could
3170 * potentially break current behavior of eval"".
3171 * GSAR 97-07-21
3172 */
3173 t = s;
3174 if (*s == '\'' || *s == '"' || *s == '`') {
3175 /* common case: get past first string, handling escapes */
3280af22 3176 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3177 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3178 t++;
3179 t++;
a0d0e21e 3180 }
b8a4b1be 3181 else if (*s == 'q') {
3280af22 3182 if (++t < PL_bufend
b8a4b1be 3183 && (!isALNUM(*t)
3280af22 3184 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3185 && !isALNUM(*t))))
3186 {
b8a4b1be
GS
3187 char *tmps;
3188 char open, close, term;
3189 I32 brackets = 1;
3190
3280af22 3191 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3192 t++;
3193 term = *t;
3194 open = term;
3195 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3196 term = tmps[5];
3197 close = term;
3198 if (open == close)
3280af22
NIS
3199 for (t++; t < PL_bufend; t++) {
3200 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3201 t++;
6d07e5e9 3202 else if (*t == open)
b8a4b1be
GS
3203 break;
3204 }
3205 else
3280af22
NIS
3206 for (t++; t < PL_bufend; t++) {
3207 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3208 t++;
6d07e5e9 3209 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3210 break;
3211 else if (*t == open)
3212 brackets++;
3213 }
3214 }
3215 t++;
a0d0e21e 3216 }
7e2040f0 3217 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3218 t += UTF8SKIP(t);
7e2040f0 3219 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3220 t += UTF8SKIP(t);
a0d0e21e 3221 }
3280af22 3222 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3223 t++;
b8a4b1be
GS
3224 /* if comma follows first term, call it an anon hash */
3225 /* XXX it could be a comma expression with loop modifiers */
3280af22 3226 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3227 || (*t == '=' && t[1] == '>')))
a0d0e21e 3228 OPERATOR(HASHBRACK);
3280af22 3229 if (PL_expect == XREF)
4e4e412b 3230 PL_expect = XTERM;
a0d0e21e 3231 else {
3280af22
NIS
3232 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3233 PL_expect = XSTATE;
a0d0e21e 3234 }
8990e307 3235 }
a0d0e21e 3236 break;
463ee0b2 3237 }
57843af0 3238 yylval.ival = CopLINE(PL_curcop);
79072805 3239 if (isSPACE(*s) || *s == '#')
3280af22 3240 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3241 TOKEN('{');
378cc40b 3242 case '}':
79072805
LW
3243 rightbracket:
3244 s++;
3280af22 3245 if (PL_lex_brackets <= 0)
d98d5fff 3246 yyerror("Unmatched right curly bracket");
463ee0b2 3247 else
3280af22 3248 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3249 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3250 PL_lex_formbrack = 0;
3251 if (PL_lex_state == LEX_INTERPNORMAL) {
3252 if (PL_lex_brackets == 0) {
9059aa12
LW
3253 if (PL_expect & XFAKEBRACK) {
3254 PL_expect &= XENUMMASK;
3280af22
NIS
3255 PL_lex_state = LEX_INTERPEND;
3256 PL_bufptr = s;
cea2e8a9 3257 return yylex(); /* ignore fake brackets */
79072805 3258 }
fa83b5b6 3259 if (*s == '-' && s[1] == '>')
3280af22 3260 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3261 else if (*s != '[' && *s != '{')
3280af22 3262 PL_lex_state = LEX_INTERPEND;
79072805
LW
3263 }
3264 }
9059aa12
LW
3265 if (PL_expect & XFAKEBRACK) {
3266 PL_expect &= XENUMMASK;
3280af22 3267 PL_bufptr = s;
cea2e8a9 3268 return yylex(); /* ignore fake brackets */
748a9306 3269 }
79072805
LW
3270 force_next('}');
3271 TOKEN(';');
378cc40b
LW
3272 case '&':
3273 s++;
3274 tmp = *s++;
3275 if (tmp == '&')
a0d0e21e 3276 AOPERATOR(ANDAND);
378cc40b 3277 s--;
3280af22 3278 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3279 if (ckWARN(WARN_SEMICOLON)
3280 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3281 {
57843af0 3282 CopLINE_dec(PL_curcop);
9014280d 3283 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3284 CopLINE_inc(PL_curcop);
463ee0b2 3285 }
79072805 3286 BAop(OP_BIT_AND);
463ee0b2 3287 }
79072805 3288
3280af22
NIS
3289 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3290 if (*PL_tokenbuf) {
3291 PL_expect = XOPERATOR;
3292 force_ident(PL_tokenbuf, '&');
463ee0b2 3293 }
79072805
LW
3294 else
3295 PREREF('&');
c07a80fd 3296 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3297 TERM('&');
3298
378cc40b
LW
3299 case '|':
3300 s++;
3301 tmp = *s++;
3302 if (tmp == '|')
a0d0e21e 3303 AOPERATOR(OROR);
378cc40b 3304 s--;
79072805 3305 BOop(OP_BIT_OR);
378cc40b
LW
3306 case '=':
3307 s++;
3308 tmp = *s++;
3309 if (tmp == '=')
79072805
LW
3310 Eop(OP_EQ);
3311 if (tmp == '>')
3312 OPERATOR(',');
378cc40b 3313 if (tmp == '~')
79072805 3314 PMop(OP_MATCH);
599cee73 3315 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3316 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3317 s--;
3280af22
NIS
3318 if (PL_expect == XSTATE && isALPHA(tmp) &&
3319 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3320 {
3280af22
NIS
3321 if (PL_in_eval && !PL_rsfp) {
3322 d = PL_bufend;
a5f75d66
AD
3323 while (s < d) {
3324 if (*s++ == '\n') {
3325 incline(s);
3326 if (strnEQ(s,"=cut",4)) {
3327 s = strchr(s,'\n');
3328 if (s)
3329 s++;
3330 else
3331 s = d;
3332 incline(s);
3333 goto retry;
3334 }
3335 }
3336 }
3337 goto retry;
3338 }
3280af22
NIS
3339 s = PL_bufend;
3340 PL_doextract = TRUE;
a0d0e21e
LW
3341 goto retry;
3342 }
3280af22 3343 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3344 char *t;
51882d45 3345#ifdef PERL_STRICT_CR
bf4acbe4 3346 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3347#else
bf4acbe4 3348 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3349#endif
a0d0e21e
LW
3350 if (*t == '\n' || *t == '#') {
3351 s--;
3280af22 3352 PL_expect = XBLOCK;
a0d0e21e
LW
3353 goto leftbracket;
3354 }
79072805 3355 }
a0d0e21e
LW
3356 yylval.ival = 0;
3357 OPERATOR(ASSIGNOP);
378cc40b
LW
3358 case '!':
3359 s++;
3360 tmp = *s++;
3361 if (tmp == '=')
79072805 3362 Eop(OP_NE);
378cc40b 3363 if (tmp == '~')
79072805 3364 PMop(OP_NOT);
378cc40b
LW
3365 s--;
3366 OPERATOR('!');
3367 case '<':
3280af22 3368 if (PL_expect != XOPERATOR) {
93a17b20 3369 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3370 check_uni();
79072805
LW
3371 if (s[1] == '<')
3372 s = scan_heredoc(s);
3373 else
3374 s = scan_inputsymbol(s);
3375 TERM(sublex_start());
378cc40b
LW
3376 }
3377 s++;
3378 tmp = *s++;
3379 if (tmp == '<')
79072805 3380 SHop(OP_LEFT_SHIFT);
395c3793
LW
3381 if (tmp == '=') {
3382 tmp = *s++;
3383 if (tmp == '>')
79072805 3384 Eop(OP_NCMP);
395c3793 3385 s--;
79072805 3386 Rop(OP_LE);
395c3793 3387 }
378cc40b 3388 s--;
79072805 3389 Rop(OP_LT);
378cc40b
LW
3390 case '>':
3391 s++;
3392 tmp = *s++;
3393 if (tmp == '>')
79072805 3394 SHop(OP_RIGHT_SHIFT);
378cc40b 3395 if (tmp == '=')
79072805 3396 Rop(OP_GE);
378cc40b 3397 s--;
79072805 3398 Rop(OP_GT);
378cc40b
LW
3399
3400 case '$':
bbce6d69
PP
3401 CLINE;
3402
3280af22
NIS
3403 if (PL_expect == XOPERATOR) {
3404 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3405 PL_expect = XTERM;
a0d0e21e 3406 depcom();
bbce6d69 3407 return ','; /* grandfather non-comma-format format */
a0d0e21e 3408 }
8990e307 3409 }
a0d0e21e 3410
7e2040f0 3411 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3412 PL_tokenbuf[0] = '@';
376b8730
SM
3413 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3414 sizeof PL_tokenbuf - 1, FALSE);
3415 if (PL_expect == XOPERATOR)
3416 no_op("Array length", s);
3280af22 3417 if (!PL_tokenbuf[1])
a0d0e21e 3418 PREREF(DOLSHARP);
3280af22
NIS
3419 PL_expect = XOPERATOR;
3420 PL_pending_ident = '#';
463ee0b2 3421 TOKEN(DOLSHARP);
79072805 3422 }
bbce6d69 3423
3280af22 3424 PL_tokenbuf[0] = '$';
376b8730
SM
3425 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3426 sizeof PL_tokenbuf - 1, FALSE);
3427 if (PL_expect == XOPERATOR)
3428 no_op("Scalar", s);
3280af22
NIS
3429 if (!PL_tokenbuf[1]) {
3430 if (s == PL_bufend)
bbce6d69
PP
3431 yyerror("Final $ should be \\$ or $name");
3432 PREREF('$');
8990e307 3433 }
a0d0e21e 3434
bbce6d69 3435 /* This kludge not intended to be bulletproof. */
3280af22 3436 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3437 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3438 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3439 yylval.opval->op_private = OPpCONST_ARYBASE;
3440 TERM(THING);
3441 }
3442
ff68c719 3443 d = s;
69d2bceb 3444 tmp = (I32)*s;
3280af22 3445 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3446 s = skipspace(s);
3447
3280af22 3448 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3449 char *t;
3450 if (*s == '[') {
3280af22 3451 PL_tokenbuf[0] = '@';
599cee73 3452 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3453 for(t = s + 1;
7e2040f0 3454 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3455 t++) ;
a0d0e21e 3456 if (*t++ == ',') {
3280af22
NIS
3457 PL_bufptr = skipspace(PL_bufptr);
3458 while (t < PL_bufend && *t != ']')
bbce6d69 3459 t++;
9014280d 3460 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3461 "Multidimensional syntax %.*s not supported",
3462 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3463 }
3464 }
bbce6d69
PP
3465 }
3466 else if (*s == '{') {
3280af22 3467 PL_tokenbuf[0] = '%';
599cee73 3468 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3469 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3470 {
3280af22 3471 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3472 STRLEN len;
3473 for (t++; isSPACE(*t); t++) ;
7e2040f0 3474 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3475 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3476 for (; isSPACE(*t); t++) ;
864dbfa3 3477 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3478 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3479 "You need to quote \"%s\"", tmpbuf);
748a9306 3480 }
93a17b20
LW
3481 }
3482 }
2f3197b3 3483 }
bbce6d69 3484
3280af22 3485 PL_expect = XOPERATOR;
69d2bceb 3486 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3487 bool islop = (PL_last_lop == PL_oldoldbufptr);
3488 if (!islop || PL_last_lop_op == OP_GREPSTART)
3489 PL_expect = XOPERATOR;
bbce6d69 3490 else if (strchr("$@\"'`q", *s))
3280af22 3491 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3492 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3493 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3494 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3495 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3496 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3497 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3498 /* binary operators exclude handle interpretations */
3499 switch (tmp) {
3500 case -KEY_x:
3501 case -KEY_eq:
3502 case -KEY_ne:
3503 case -KEY_gt:
3504 case -KEY_lt:
3505 case -KEY_ge:
3506 case -KEY_le:
3507 case -KEY_cmp:
3508 break;
3509 default:
3280af22 3510 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3511 break;
3512 }
3513 }
68dc0745
PP
3514