This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
allow "\x{12ab}" even without C<use utf8>
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a
PP
47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff
PP
49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
395c3793
LW
61#ifdef I_FCNTL
62#include <fcntl.h>
63#endif
fe14fcc3
LW
64#ifdef I_SYS_FILE
65#include <sys/file.h>
66#endif
395c3793 67
a790bc05
PP
68/* XXX If this causes problems, set i_unistd=undef in the hint file. */
69#ifdef I_UNISTD
70# include <unistd.h> /* Needed for execv() */
71#endif
72
73
79072805
LW
74#ifdef ff_next
75#undef ff_next
d48672a2
LW
76#endif
77
a1a0e61e
TD
78#ifdef USE_PURE_BISON
79YYSTYPE* yylval_pointer = NULL;
80int* yychar_pointer = NULL;
22c35a8c
GS
81# undef yylval
82# undef yychar
e4bfbdd4
JH
83# define yylval (*yylval_pointer)
84# define yychar (*yychar_pointer)
85# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
86# undef yylex
87# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
88#endif
89
79072805 90#include "keywords.h"
fe14fcc3 91
ffb4593c
NT
92/* CLINE is a macro that ensures PL_copline has a sane value */
93
ae986130
LW
94#ifdef CLINE
95#undef CLINE
96#endif
57843af0 97#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 98
ffb4593c
NT
99/*
100 * Convenience functions to return different tokens and prime the
9cbb5ea2 101 * lexer for the next token. They all take an argument.
ffb4593c
NT
102 *
103 * TOKEN : generic token (used for '(', DOLSHARP, etc)
104 * OPERATOR : generic operator
105 * AOPERATOR : assignment operator
106 * PREBLOCK : beginning the block after an if, while, foreach, ...
107 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
108 * PREREF : *EXPR where EXPR is not a simple identifier
109 * TERM : expression term
110 * LOOPX : loop exiting command (goto, last, dump, etc)
111 * FTST : file test operator
112 * FUN0 : zero-argument function
2d2e263d 113 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
114 * BOop : bitwise or or xor
115 * BAop : bitwise and
116 * SHop : shift operator
117 * PWop : power operator
9cbb5ea2 118 * PMop : pattern-matching operator
ffb4593c
NT
119 * Aop : addition-level operator
120 * Mop : multiplication-level operator
121 * Eop : equality-testing operator
122 * Rop : relational operator <= != gt
123 *
124 * Also see LOP and lop() below.
125 */
126
3280af22
NIS
127#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
128#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
129#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
130#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
131#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
132#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
133#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
134#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
135#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
136#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
137#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
138#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
139#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
140#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
141#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
142#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
143#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
144#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
145#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
146#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 147
a687059c
LW
148/* This bit of chicanery makes a unary function followed by
149 * a parenthesis into a function with one argument, highest precedence.
150 */
2f3197b3 151#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_expect = XTERM, \
153 PL_bufptr = s, \
154 PL_last_uni = PL_oldbufptr, \
155 PL_last_lop_op = f, \
a687059c
LW
156 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
157
79072805 158#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
159 PL_bufptr = s, \
160 PL_last_uni = PL_oldbufptr, \
79072805
LW
161 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
162
9f68db38 163/* grandfather return to old style */
3280af22 164#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 165
ffb4593c
NT
166/*
167 * S_ao
168 *
169 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
170 * into an OP_ANDASSIGN or OP_ORASSIGN
171 */
172
76e3520e 173STATIC int
cea2e8a9 174S_ao(pTHX_ int toketype)
a0d0e21e 175{
3280af22
NIS
176 if (*PL_bufptr == '=') {
177 PL_bufptr++;
a0d0e21e
LW
178 if (toketype == ANDAND)
179 yylval.ival = OP_ANDASSIGN;
180 else if (toketype == OROR)
181 yylval.ival = OP_ORASSIGN;
182 toketype = ASSIGNOP;
183 }
184 return toketype;
185}
186
ffb4593c
NT
187/*
188 * S_no_op
189 * When Perl expects an operator and finds something else, no_op
190 * prints the warning. It always prints "<something> found where
191 * operator expected. It prints "Missing semicolon on previous line?"
192 * if the surprise occurs at the start of the line. "do you need to
193 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
194 * where the compiler doesn't know if foo is a method call or a function.
195 * It prints "Missing operator before end of line" if there's nothing
196 * after the missing operator, or "... before <...>" if there is something
197 * after the missing operator.
198 */
199
76e3520e 200STATIC void
cea2e8a9 201S_no_op(pTHX_ char *what, char *s)
463ee0b2 202{
3280af22
NIS
203 char *oldbp = PL_bufptr;
204 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 205
1189a94a
GS
206 if (!s)
207 s = oldbp;
208 else {
209 assert(s >= oldbp);
210 PL_bufptr = s;
211 }
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
221 }
222 else
cea2e8a9 223 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 224 PL_bufptr = oldbp;
8990e307
LW
225}
226
ffb4593c
NT
227/*
228 * S_missingterm
229 * Complain about missing quote/regexp/heredoc terminator.
230 * If it's called with (char *)NULL then it cauterizes the line buffer.
231 * If we're in a delimited string and the delimiter is a control
232 * character, it's reformatted into a two-char sequence like ^C.
233 * This is fatal.
234 */
235
76e3520e 236STATIC void
cea2e8a9 237S_missingterm(pTHX_ char *s)
8990e307
LW
238{
239 char tmpbuf[3];
240 char q;
241 if (s) {
242 char *nl = strrchr(s,'\n');
d2719217 243 if (nl)
8990e307
LW
244 *nl = '\0';
245 }
9d116dd7
JH
246 else if (
247#ifdef EBCDIC
248 iscntrl(PL_multi_close)
249#else
250 PL_multi_close < 32 || PL_multi_close == 127
251#endif
252 ) {
8990e307 253 *tmpbuf = '^';
3280af22 254 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
255 s = "\\n";
256 tmpbuf[2] = '\0';
257 s = tmpbuf;
258 }
259 else {
3280af22 260 *tmpbuf = PL_multi_close;
8990e307
LW
261 tmpbuf[1] = '\0';
262 s = tmpbuf;
263 }
264 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 265 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 266}
79072805 267
ffb4593c
NT
268/*
269 * Perl_deprecate
ffb4593c
NT
270 */
271
79072805 272void
864dbfa3 273Perl_deprecate(pTHX_ char *s)
a0d0e21e 274{
d008e5eb 275 dTHR;
599cee73 276 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 277 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
278}
279
ffb4593c
NT
280/*
281 * depcom
9cbb5ea2 282 * Deprecate a comma-less variable list.
ffb4593c
NT
283 */
284
76e3520e 285STATIC void
cea2e8a9 286S_depcom(pTHX)
a0d0e21e
LW
287{
288 deprecate("comma-less variable list");
289}
290
ffb4593c 291/*
9cbb5ea2
GS
292 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
293 * utf16-to-utf8-reversed.
ffb4593c
NT
294 */
295
c39cd008
GS
296#ifdef PERL_CR_FILTER
297static void
298strip_return(SV *sv)
299{
300 register char *s = SvPVX(sv);
301 register char *e = s + SvCUR(sv);
302 /* outer loop optimized to do nothing if there are no CR-LFs */
303 while (s < e) {
304 if (*s++ == '\r' && *s == '\n') {
305 /* hit a CR-LF, need to copy the rest */
306 register char *d = s - 1;
307 *d++ = *s++;
308 while (s < e) {
309 if (*s == '\r' && s[1] == '\n')
310 s++;
311 *d++ = *s++;
312 }
313 SvCUR(sv) -= s - d;
314 return;
315 }
316 }
317}
a868473f 318
76e3520e 319STATIC I32
c39cd008 320S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 321{
c39cd008
GS
322 I32 count = FILTER_READ(idx+1, sv, maxlen);
323 if (count > 0 && !maxlen)
324 strip_return(sv);
325 return count;
a868473f
NIS
326}
327#endif
328
a0ed51b3 329STATIC I32
cea2e8a9 330S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
331{
332 I32 count = FILTER_READ(idx+1, sv, maxlen);
333 if (count) {
dfe13c55
GS
334 U8* tmps;
335 U8* tend;
336 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 337 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 338 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
339
340 }
341 return count;
342}
343
344STATIC I32
cea2e8a9 345S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
346{
347 I32 count = FILTER_READ(idx+1, sv, maxlen);
348 if (count) {
dfe13c55
GS
349 U8* tmps;
350 U8* tend;
351 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 352 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 353 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
354
355 }
356 return count;
357}
a868473f 358
ffb4593c
NT
359/*
360 * Perl_lex_start
9cbb5ea2
GS
361 * Initialize variables. Uses the Perl save_stack to save its state (for
362 * recursive calls to the parser).
ffb4593c
NT
363 */
364
a0d0e21e 365void
864dbfa3 366Perl_lex_start(pTHX_ SV *line)
79072805 367{
0f15f207 368 dTHR;
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
57843af0 379 SAVECOPLINE(PL_curcop);
3280af22
NIS
380 SAVEPPTR(PL_bufptr);
381 SAVEPPTR(PL_bufend);
382 SAVEPPTR(PL_oldbufptr);
383 SAVEPPTR(PL_oldoldbufptr);
384 SAVEPPTR(PL_linestart);
385 SAVESPTR(PL_linestr);
386 SAVEPPTR(PL_lex_brackstack);
387 SAVEPPTR(PL_lex_casestack);
c76ac1ee 388 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
389 SAVESPTR(PL_lex_stuff);
390 SAVEI32(PL_lex_defer);
09bef843 391 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 392 SAVESPTR(PL_lex_repl);
bebdddfc
GS
393 SAVEINT(PL_expect);
394 SAVEINT(PL_lex_expect);
3280af22
NIS
395
396 PL_lex_state = LEX_NORMAL;
397 PL_lex_defer = 0;
398 PL_expect = XSTATE;
399 PL_lex_brackets = 0;
3280af22
NIS
400 New(899, PL_lex_brackstack, 120, char);
401 New(899, PL_lex_casestack, 12, char);
402 SAVEFREEPV(PL_lex_brackstack);
403 SAVEFREEPV(PL_lex_casestack);
404 PL_lex_casemods = 0;
405 *PL_lex_casestack = '\0';
406 PL_lex_dojoin = 0;
407 PL_lex_starts = 0;
408 PL_lex_stuff = Nullsv;
409 PL_lex_repl = Nullsv;
410 PL_lex_inpat = 0;
411 PL_lex_inwhat = 0;
09bef843 412 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
413 PL_linestr = line;
414 if (SvREADONLY(PL_linestr))
415 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
416 s = SvPV(PL_linestr, len);
8990e307 417 if (len && s[len-1] != ';') {
3280af22
NIS
418 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
419 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
420 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 421 }
3280af22
NIS
422 SvTEMP_off(PL_linestr);
423 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
424 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
425 SvREFCNT_dec(PL_rs);
79cb57f6 426 PL_rs = newSVpvn("\n", 1);
3280af22 427 PL_rsfp = 0;
79072805 428}
a687059c 429
ffb4593c
NT
430/*
431 * Perl_lex_end
9cbb5ea2
GS
432 * Finalizer for lexing operations. Must be called when the parser is
433 * done with the lexer.
ffb4593c
NT
434 */
435
463ee0b2 436void
864dbfa3 437Perl_lex_end(pTHX)
463ee0b2 438{
3280af22 439 PL_doextract = FALSE;
463ee0b2
LW
440}
441
ffb4593c
NT
442/*
443 * S_incline
444 * This subroutine has nothing to do with tilting, whether at windmills
445 * or pinball tables. Its name is short for "increment line". It
57843af0 446 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 447 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
448 * # line 500 "foo.pm"
449 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
450 */
451
76e3520e 452STATIC void
cea2e8a9 453S_incline(pTHX_ char *s)
463ee0b2 454{
0f15f207 455 dTHR;
463ee0b2
LW
456 char *t;
457 char *n;
458 char ch;
459 int sawline = 0;
460
57843af0 461 CopLINE_inc(PL_curcop);
463ee0b2
LW
462 if (*s++ != '#')
463 return;
464 while (*s == ' ' || *s == '\t') s++;
465 if (strnEQ(s, "line ", 5)) {
466 s += 5;
467 sawline = 1;
468 }
469 if (!isDIGIT(*s))
470 return;
471 n = s;
472 while (isDIGIT(*s))
473 s++;
474 while (*s == ' ' || *s == '\t')
475 s++;
476 if (*s == '"' && (t = strchr(s+1, '"')))
477 s++;
478 else {
479 if (!sawline)
480 return; /* false alarm */
481 for (t = s; !isSPACE(*t); t++) ;
482 }
483 ch = *t;
484 *t = '\0';
485 if (t - s > 0)
57843af0 486 CopFILE_set(PL_curcop, s);
463ee0b2 487 *t = ch;
57843af0 488 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
489}
490
ffb4593c
NT
491/*
492 * S_skipspace
493 * Called to gobble the appropriate amount and type of whitespace.
494 * Skips comments as well.
495 */
496
76e3520e 497STATIC char *
cea2e8a9 498S_skipspace(pTHX_ register char *s)
a687059c 499{
11343788 500 dTHR;
3280af22
NIS
501 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
502 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
503 s++;
504 return s;
505 }
506 for (;;) {
fd049845 507 STRLEN prevlen;
09bef843
SB
508 SSize_t oldprevlen, oldoldprevlen;
509 SSize_t oldloplen, oldunilen;
60e6418e
GS
510 while (s < PL_bufend && isSPACE(*s)) {
511 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
512 incline(s);
513 }
ffb4593c
NT
514
515 /* comment */
3280af22
NIS
516 if (s < PL_bufend && *s == '#') {
517 while (s < PL_bufend && *s != '\n')
463ee0b2 518 s++;
60e6418e 519 if (s < PL_bufend) {
463ee0b2 520 s++;
60e6418e
GS
521 if (PL_in_eval && !PL_rsfp) {
522 incline(s);
523 continue;
524 }
525 }
463ee0b2 526 }
ffb4593c
NT
527
528 /* only continue to recharge the buffer if we're at the end
529 * of the buffer, we're not reading from a source filter, and
530 * we're in normal lexing mode
531 */
09bef843
SB
532 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
533 PL_lex_state == LEX_FORMLINE)
463ee0b2 534 return s;
ffb4593c
NT
535
536 /* try to recharge the buffer */
9cbb5ea2
GS
537 if ((s = filter_gets(PL_linestr, PL_rsfp,
538 (prevlen = SvCUR(PL_linestr)))) == Nullch)
539 {
540 /* end of file. Add on the -p or -n magic */
3280af22
NIS
541 if (PL_minus_n || PL_minus_p) {
542 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
543 ";}continue{print or die qq(-p destination: $!\\n)" :
544 "");
3280af22
NIS
545 sv_catpv(PL_linestr,";}");
546 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
547 }
548 else
3280af22 549 sv_setpv(PL_linestr,";");
ffb4593c
NT
550
551 /* reset variables for next time we lex */
9cbb5ea2
GS
552 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
553 = SvPVX(PL_linestr);
3280af22 554 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
555
556 /* Close the filehandle. Could be from -P preprocessor,
557 * STDIN, or a regular file. If we were reading code from
558 * STDIN (because the commandline held no -e or filename)
559 * then we don't close it, we reset it so the code can
560 * read from STDIN too.
561 */
562
3280af22
NIS
563 if (PL_preprocess && !PL_in_eval)
564 (void)PerlProc_pclose(PL_rsfp);
565 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
566 PerlIO_clearerr(PL_rsfp);
8990e307 567 else
3280af22
NIS
568 (void)PerlIO_close(PL_rsfp);
569 PL_rsfp = Nullfp;
463ee0b2
LW
570 return s;
571 }
ffb4593c
NT
572
573 /* not at end of file, so we only read another line */
09bef843
SB
574 /* make corresponding updates to old pointers, for yyerror() */
575 oldprevlen = PL_oldbufptr - PL_bufend;
576 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
577 if (PL_last_uni)
578 oldunilen = PL_last_uni - PL_bufend;
579 if (PL_last_lop)
580 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
581 PL_linestart = PL_bufptr = s + prevlen;
582 PL_bufend = s + SvCUR(PL_linestr);
583 s = PL_bufptr;
09bef843
SB
584 PL_oldbufptr = s + oldprevlen;
585 PL_oldoldbufptr = s + oldoldprevlen;
586 if (PL_last_uni)
587 PL_last_uni = s + oldunilen;
588 if (PL_last_lop)
589 PL_last_lop = s + oldloplen;
a0d0e21e 590 incline(s);
ffb4593c
NT
591
592 /* debugger active and we're not compiling the debugger code,
593 * so store the line into the debugger's array of lines
594 */
3280af22 595 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
596 SV *sv = NEWSV(85,0);
597
598 sv_upgrade(sv, SVt_PVMG);
3280af22 599 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 600 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 601 }
463ee0b2 602 }
a687059c 603}
378cc40b 604
ffb4593c
NT
605/*
606 * S_check_uni
607 * Check the unary operators to ensure there's no ambiguity in how they're
608 * used. An ambiguous piece of code would be:
609 * rand + 5
610 * This doesn't mean rand() + 5. Because rand() is a unary operator,
611 * the +5 is its argument.
612 */
613
76e3520e 614STATIC void
cea2e8a9 615S_check_uni(pTHX)
ba106d47 616{
2f3197b3 617 char *s;
a0d0e21e 618 char *t;
0453d815 619 dTHR;
2f3197b3 620
3280af22 621 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 622 return;
3280af22
NIS
623 while (isSPACE(*PL_last_uni))
624 PL_last_uni++;
7e2040f0 625 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 626 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 627 return;
0453d815 628 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 629 char ch = *s;
0453d815
PM
630 *s = '\0';
631 Perl_warner(aTHX_ WARN_AMBIGUOUS,
632 "Warning: Use of \"%s\" without parens is ambiguous",
633 PL_last_uni);
634 *s = ch;
635 }
2f3197b3
LW
636}
637
ffb4593c
NT
638/* workaround to replace the UNI() macro with a function. Only the
639 * hints/uts.sh file mentions this. Other comments elsewhere in the
640 * source indicate Microport Unix might need it too.
641 */
642
ffed7fef
LW
643#ifdef CRIPPLED_CC
644
645#undef UNI
ffed7fef 646#define UNI(f) return uni(f,s)
ffed7fef 647
76e3520e 648STATIC int
cea2e8a9 649S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
650{
651 yylval.ival = f;
3280af22
NIS
652 PL_expect = XTERM;
653 PL_bufptr = s;
8f872242
NIS
654 PL_last_uni = PL_oldbufptr;
655 PL_last_lop_op = f;
ffed7fef
LW
656 if (*s == '(')
657 return FUNC1;
658 s = skipspace(s);
659 if (*s == '(')
660 return FUNC1;
661 else
662 return UNIOP;
663}
664
a0d0e21e
LW
665#endif /* CRIPPLED_CC */
666
ffb4593c
NT
667/*
668 * LOP : macro to build a list operator. Its behaviour has been replaced
669 * with a subroutine, S_lop() for which LOP is just another name.
670 */
671
a0d0e21e
LW
672#define LOP(f,x) return lop(f,x,s)
673
ffb4593c
NT
674/*
675 * S_lop
676 * Build a list operator (or something that might be one). The rules:
677 * - if we have a next token, then it's a list operator [why?]
678 * - if the next thing is an opening paren, then it's a function
679 * - else it's a list operator
680 */
681
76e3520e 682STATIC I32
a0be28da 683S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 684{
0f15f207 685 dTHR;
79072805 686 yylval.ival = f;
35c8bce7 687 CLINE;
3280af22
NIS
688 PL_expect = x;
689 PL_bufptr = s;
690 PL_last_lop = PL_oldbufptr;
691 PL_last_lop_op = f;
692 if (PL_nexttoke)
a0d0e21e 693 return LSTOP;
79072805
LW
694 if (*s == '(')
695 return FUNC;
696 s = skipspace(s);
697 if (*s == '(')
698 return FUNC;
699 else
700 return LSTOP;
701}
702
ffb4593c
NT
703/*
704 * S_force_next
9cbb5ea2 705 * When the lexer realizes it knows the next token (for instance,
ffb4593c 706 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
707 * to know what token to return the next time the lexer is called. Caller
708 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
709 * handles the token correctly.
ffb4593c
NT
710 */
711
76e3520e 712STATIC void
cea2e8a9 713S_force_next(pTHX_ I32 type)
79072805 714{
3280af22
NIS
715 PL_nexttype[PL_nexttoke] = type;
716 PL_nexttoke++;
717 if (PL_lex_state != LEX_KNOWNEXT) {
718 PL_lex_defer = PL_lex_state;
719 PL_lex_expect = PL_expect;
720 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
721 }
722}
723
ffb4593c
NT
724/*
725 * S_force_word
726 * When the lexer knows the next thing is a word (for instance, it has
727 * just seen -> and it knows that the next char is a word char, then
728 * it calls S_force_word to stick the next word into the PL_next lookahead.
729 *
730 * Arguments:
b1b65b59 731 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
732 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
733 * int check_keyword : if true, Perl checks to make sure the word isn't
734 * a keyword (do this if the word is a label, e.g. goto FOO)
735 * int allow_pack : if true, : characters will also be allowed (require,
736 * use, etc. do this)
9cbb5ea2 737 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
738 */
739
76e3520e 740STATIC char *
cea2e8a9 741S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 742{
463ee0b2
LW
743 register char *s;
744 STRLEN len;
745
746 start = skipspace(start);
747 s = start;
7e2040f0 748 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 749 (allow_pack && *s == ':') ||
15f0808c 750 (allow_initial_tick && *s == '\'') )
a0d0e21e 751 {
3280af22
NIS
752 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
753 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
754 return start;
755 if (token == METHOD) {
756 s = skipspace(s);
757 if (*s == '(')
3280af22 758 PL_expect = XTERM;
463ee0b2 759 else {
3280af22 760 PL_expect = XOPERATOR;
463ee0b2 761 }
79072805 762 }
3280af22
NIS
763 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
764 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
765 force_next(token);
766 }
767 return s;
768}
769
ffb4593c
NT
770/*
771 * S_force_ident
9cbb5ea2 772 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
773 * text only contains the "foo" portion. The first argument is a pointer
774 * to the "foo", and the second argument is the type symbol to prefix.
775 * Forces the next token to be a "WORD".
9cbb5ea2 776 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
777 */
778
76e3520e 779STATIC void
cea2e8a9 780S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
781{
782 if (s && *s) {
11343788 783 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 784 PL_nextval[PL_nexttoke].opval = o;
79072805 785 force_next(WORD);
748a9306 786 if (kind) {
e858de61 787 dTHR; /* just for in_eval */
11343788 788 o->op_private = OPpCONST_ENTERED;
55497cff
PP
789 /* XXX see note in pp_entereval() for why we forgo typo
790 warnings if the symbol must be introduced in an eval.
791 GSAR 96-10-12 */
3280af22 792 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
793 kind == '$' ? SVt_PV :
794 kind == '@' ? SVt_PVAV :
795 kind == '%' ? SVt_PVHV :
796 SVt_PVGV
797 );
748a9306 798 }
79072805
LW
799 }
800}
801
ffb4593c
NT
802/*
803 * S_force_version
804 * Forces the next token to be a version number.
805 */
806
76e3520e 807STATIC char *
cea2e8a9 808S_force_version(pTHX_ char *s)
89bfa8cd
PP
809{
810 OP *version = Nullop;
811
812 s = skipspace(s);
813
a7cb1f99
GS
814 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
815 char *d = s;
816 if (*d == 'v')
817 d++;
818 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
819 if ((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
89bfa8cd
PP
820 s = scan_num(s);
821 /* real VERSION number -- GBARR */
822 version = yylval.opval;
823 }
824 }
825
826 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 827 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
828 force_next(WORD);
829
830 return (s);
831}
832
ffb4593c
NT
833/*
834 * S_tokeq
835 * Tokenize a quoted string passed in as an SV. It finds the next
836 * chunk, up to end of string or a backslash. It may make a new
837 * SV containing that chunk (if HINT_NEW_STRING is on). It also
838 * turns \\ into \.
839 */
840
76e3520e 841STATIC SV *
cea2e8a9 842S_tokeq(pTHX_ SV *sv)
79072805
LW
843{
844 register char *s;
845 register char *send;
846 register char *d;
b3ac6de7
IZ
847 STRLEN len = 0;
848 SV *pv = sv;
79072805
LW
849
850 if (!SvLEN(sv))
b3ac6de7 851 goto finish;
79072805 852
a0d0e21e 853 s = SvPV_force(sv, len);
748a9306 854 if (SvIVX(sv) == -1)
b3ac6de7 855 goto finish;
463ee0b2 856 send = s + len;
79072805
LW
857 while (s < send && *s != '\\')
858 s++;
859 if (s == send)
b3ac6de7 860 goto finish;
79072805 861 d = s;
3280af22 862 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 863 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
864 while (s < send) {
865 if (*s == '\\') {
a0d0e21e 866 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
867 s++; /* all that, just for this */
868 }
869 *d++ = *s++;
870 }
871 *d = '\0';
463ee0b2 872 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 873 finish:
3280af22 874 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 875 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
876 return sv;
877}
878
ffb4593c
NT
879/*
880 * Now come three functions related to double-quote context,
881 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
882 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
883 * interact with PL_lex_state, and create fake ( ... ) argument lists
884 * to handle functions and concatenation.
885 * They assume that whoever calls them will be setting up a fake
886 * join call, because each subthing puts a ',' after it. This lets
887 * "lower \luPpEr"
888 * become
889 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
890 *
891 * (I'm not sure whether the spurious commas at the end of lcfirst's
892 * arguments and join's arguments are created or not).
893 */
894
895/*
896 * S_sublex_start
897 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
898 *
899 * Pattern matching will set PL_lex_op to the pattern-matching op to
900 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
901 *
902 * OP_CONST and OP_READLINE are easy--just make the new op and return.
903 *
904 * Everything else becomes a FUNC.
905 *
906 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
907 * had an OP_CONST or OP_READLINE). This just sets us up for a
908 * call to S_sublex_push().
909 */
910
76e3520e 911STATIC I32
cea2e8a9 912S_sublex_start(pTHX)
79072805
LW
913{
914 register I32 op_type = yylval.ival;
79072805
LW
915
916 if (op_type == OP_NULL) {
3280af22
NIS
917 yylval.opval = PL_lex_op;
918 PL_lex_op = Nullop;
79072805
LW
919 return THING;
920 }
921 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 922 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
923
924 if (SvTYPE(sv) == SVt_PVIV) {
925 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
926 STRLEN len;
927 char *p;
928 SV *nsv;
929
930 p = SvPV(sv, len);
79cb57f6 931 nsv = newSVpvn(p, len);
b3ac6de7
IZ
932 SvREFCNT_dec(sv);
933 sv = nsv;
934 }
935 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 936 PL_lex_stuff = Nullsv;
79072805
LW
937 return THING;
938 }
939
3280af22
NIS
940 PL_sublex_info.super_state = PL_lex_state;
941 PL_sublex_info.sub_inwhat = op_type;
942 PL_sublex_info.sub_op = PL_lex_op;
943 PL_lex_state = LEX_INTERPPUSH;
55497cff 944
3280af22
NIS
945 PL_expect = XTERM;
946 if (PL_lex_op) {
947 yylval.opval = PL_lex_op;
948 PL_lex_op = Nullop;
55497cff
PP
949 return PMFUNC;
950 }
951 else
952 return FUNC;
953}
954
ffb4593c
NT
955/*
956 * S_sublex_push
957 * Create a new scope to save the lexing state. The scope will be
958 * ended in S_sublex_done. Returns a '(', starting the function arguments
959 * to the uc, lc, etc. found before.
960 * Sets PL_lex_state to LEX_INTERPCONCAT.
961 */
962
76e3520e 963STATIC I32
cea2e8a9 964S_sublex_push(pTHX)
55497cff 965{
0f15f207 966 dTHR;
f46d017c 967 ENTER;
55497cff 968
3280af22
NIS
969 PL_lex_state = PL_sublex_info.super_state;
970 SAVEI32(PL_lex_dojoin);
971 SAVEI32(PL_lex_brackets);
3280af22
NIS
972 SAVEI32(PL_lex_casemods);
973 SAVEI32(PL_lex_starts);
974 SAVEI32(PL_lex_state);
7766f137 975 SAVEVPTR(PL_lex_inpat);
3280af22 976 SAVEI32(PL_lex_inwhat);
57843af0 977 SAVECOPLINE(PL_curcop);
3280af22
NIS
978 SAVEPPTR(PL_bufptr);
979 SAVEPPTR(PL_oldbufptr);
980 SAVEPPTR(PL_oldoldbufptr);
981 SAVEPPTR(PL_linestart);
982 SAVESPTR(PL_linestr);
983 SAVEPPTR(PL_lex_brackstack);
984 SAVEPPTR(PL_lex_casestack);
985
986 PL_linestr = PL_lex_stuff;
987 PL_lex_stuff = Nullsv;
988
9cbb5ea2
GS
989 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
990 = SvPVX(PL_linestr);
3280af22
NIS
991 PL_bufend += SvCUR(PL_linestr);
992 SAVEFREESV(PL_linestr);
993
994 PL_lex_dojoin = FALSE;
995 PL_lex_brackets = 0;
3280af22
NIS
996 New(899, PL_lex_brackstack, 120, char);
997 New(899, PL_lex_casestack, 12, char);
998 SAVEFREEPV(PL_lex_brackstack);
999 SAVEFREEPV(PL_lex_casestack);
1000 PL_lex_casemods = 0;
1001 *PL_lex_casestack = '\0';
1002 PL_lex_starts = 0;
1003 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1004 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1005
1006 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1007 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1008 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1009 else
3280af22 1010 PL_lex_inpat = Nullop;
79072805 1011
55497cff 1012 return '(';
79072805
LW
1013}
1014
ffb4593c
NT
1015/*
1016 * S_sublex_done
1017 * Restores lexer state after a S_sublex_push.
1018 */
1019
76e3520e 1020STATIC I32
cea2e8a9 1021S_sublex_done(pTHX)
79072805 1022{
3280af22
NIS
1023 if (!PL_lex_starts++) {
1024 PL_expect = XOPERATOR;
79cb57f6 1025 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1026 return THING;
1027 }
1028
3280af22
NIS
1029 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1030 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1031 return yylex();
79072805
LW
1032 }
1033
ffb4593c 1034 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1035 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1036 PL_linestr = PL_lex_repl;
1037 PL_lex_inpat = 0;
1038 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1039 PL_bufend += SvCUR(PL_linestr);
1040 SAVEFREESV(PL_linestr);
1041 PL_lex_dojoin = FALSE;
1042 PL_lex_brackets = 0;
3280af22
NIS
1043 PL_lex_casemods = 0;
1044 *PL_lex_casestack = '\0';
1045 PL_lex_starts = 0;
25da4f38 1046 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1047 PL_lex_state = LEX_INTERPNORMAL;
1048 PL_lex_starts++;
e9fa98b2
HS
1049 /* we don't clear PL_lex_repl here, so that we can check later
1050 whether this is an evalled subst; that means we rely on the
1051 logic to ensure sublex_done() is called again only via the
1052 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1053 }
e9fa98b2 1054 else {
3280af22 1055 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1056 PL_lex_repl = Nullsv;
1057 }
79072805 1058 return ',';
ffed7fef
LW
1059 }
1060 else {
f46d017c 1061 LEAVE;
3280af22
NIS
1062 PL_bufend = SvPVX(PL_linestr);
1063 PL_bufend += SvCUR(PL_linestr);
1064 PL_expect = XOPERATOR;
09bef843 1065 PL_sublex_info.sub_inwhat = 0;
79072805 1066 return ')';
ffed7fef
LW
1067 }
1068}
1069
02aa26ce
NT
1070/*
1071 scan_const
1072
1073 Extracts a pattern, double-quoted string, or transliteration. This
1074 is terrifying code.
1075
3280af22
NIS
1076 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1077 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1078 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1079
9b599b2a
GS
1080 Returns a pointer to the character scanned up to. Iff this is
1081 advanced from the start pointer supplied (ie if anything was
1082 successfully parsed), will leave an OP for the substring scanned
1083 in yylval. Caller must intuit reason for not parsing further
1084 by looking at the next characters herself.
1085
02aa26ce
NT
1086 In patterns:
1087 backslashes:
1088 double-quoted style: \r and \n
1089 regexp special ones: \D \s
1090 constants: \x3
1091 backrefs: \1 (deprecated in substitution replacements)
1092 case and quoting: \U \Q \E
1093 stops on @ and $, but not for $ as tail anchor
1094
1095 In transliterations:
1096 characters are VERY literal, except for - not at the start or end
1097 of the string, which indicates a range. scan_const expands the
1098 range to the full set of intermediate characters.
1099
1100 In double-quoted strings:
1101 backslashes:
1102 double-quoted style: \r and \n
1103 constants: \x3
1104 backrefs: \1 (deprecated)
1105 case and quoting: \U \Q \E
1106 stops on @ and $
1107
1108 scan_const does *not* construct ops to handle interpolated strings.
1109 It stops processing as soon as it finds an embedded $ or @ variable
1110 and leaves it to the caller to work out what's going on.
1111
1112 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1113
1114 $ in pattern could be $foo or could be tail anchor. Assumption:
1115 it's a tail anchor if $ is the last thing in the string, or if it's
1116 followed by one of ")| \n\t"
1117
1118 \1 (backreferences) are turned into $1
1119
1120 The structure of the code is
1121 while (there's a character to process) {
1122 handle transliteration ranges
1123 skip regexp comments
1124 skip # initiated comments in //x patterns
1125 check for embedded @foo
1126 check for embedded scalars
1127 if (backslash) {
1128 leave intact backslashes from leave (below)
1129 deprecate \1 in strings and sub replacements
1130 handle string-changing backslashes \l \U \Q \E, etc.
1131 switch (what was escaped) {
1132 handle - in a transliteration (becomes a literal -)
1133 handle \132 octal characters
1134 handle 0x15 hex characters
1135 handle \cV (control V)
1136 handle printf backslashes (\f, \r, \n, etc)
1137 } (end switch)
1138 } (end if backslash)
1139 } (end while character to read)
1140
1141*/
1142
76e3520e 1143STATIC char *
cea2e8a9 1144S_scan_const(pTHX_ char *start)
79072805 1145{
3280af22 1146 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1147 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1148 register char *s = start; /* start of the constant */
1149 register char *d = SvPVX(sv); /* destination for copies */
1150 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1151 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1152 I32 len; /* ? */
ac2262e3 1153 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1154 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1155 : UTF;
ac2262e3 1156 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1157 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1158 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1159 : UTF;
dff6d3cd 1160 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1161 PL_lex_inpat
4a2d328f 1162 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1163 : "";
79072805
LW
1164
1165 while (s < send || dorange) {
02aa26ce 1166 /* get transliterations out of the way (they're most literal) */
3280af22 1167 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1168 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1169 if (dorange) {
02aa26ce 1170 I32 i; /* current expanded character */
8ada0baa 1171 I32 min; /* first character in range */
02aa26ce
NT
1172 I32 max; /* last character in range */
1173
1174 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1175 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1176 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1177 d -= 2; /* eat the first char and the - */
1178
8ada0baa
JH
1179 min = (U8)*d; /* first char in range */
1180 max = (U8)d[1]; /* last char in range */
1181
1182#ifndef ASCIIish
1183 if ((isLOWER(min) && isLOWER(max)) ||
1184 (isUPPER(min) && isUPPER(max))) {
1185 if (isLOWER(min)) {
1186 for (i = min; i <= max; i++)
1187 if (isLOWER(i))
1188 *d++ = i;
1189 } else {
1190 for (i = min; i <= max; i++)
1191 if (isUPPER(i))
1192 *d++ = i;
1193 }
1194 }
1195 else
1196#endif
1197 for (i = min; i <= max; i++)
1198 *d++ = i;
02aa26ce
NT
1199
1200 /* mark the range as done, and continue */
79072805
LW
1201 dorange = FALSE;
1202 continue;
1203 }
02aa26ce
NT
1204
1205 /* range begins (ignore - as first or last char) */
79072805 1206 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1207 if (utf) {
a176fa2a 1208 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1209 s++;
1210 continue;
1211 }
79072805
LW
1212 dorange = TRUE;
1213 s++;
1214 }
1215 }
02aa26ce
NT
1216
1217 /* if we get here, we're not doing a transliteration */
1218
0f5d15d6
IZ
1219 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1220 except for the last char, which will be done separately. */
3280af22 1221 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1222 if (s[2] == '#') {
1223 while (s < send && *s != ')')
1224 *d++ = *s++;
0f5d15d6
IZ
1225 } else if (s[2] == '{'
1226 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1227 I32 count = 1;
0f5d15d6 1228 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1229 char c;
1230
d9f97599
GS
1231 while (count && (c = *regparse)) {
1232 if (c == '\\' && regparse[1])
1233 regparse++;
cc6b7395
IZ
1234 else if (c == '{')
1235 count++;
1236 else if (c == '}')
1237 count--;
d9f97599 1238 regparse++;
cc6b7395 1239 }
5bdf89e7
IZ
1240 if (*regparse != ')') {
1241 regparse--; /* Leave one char for continuation. */
cc6b7395 1242 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1243 }
0f5d15d6 1244 while (s < regparse)
cc6b7395
IZ
1245 *d++ = *s++;
1246 }
748a9306 1247 }
02aa26ce
NT
1248
1249 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1250 else if (*s == '#' && PL_lex_inpat &&
1251 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1252 while (s+1 < send && *s != '\n')
1253 *d++ = *s++;
1254 }
02aa26ce
NT
1255
1256 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1257 else if (*s == '@' && s[1]
1258 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1259 break;
02aa26ce
NT
1260
1261 /* check for embedded scalars. only stop if we're sure it's a
1262 variable.
1263 */
79072805 1264 else if (*s == '$') {
3280af22 1265 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1266 break;
c277df42 1267 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1268 break; /* in regexp, $ might be tail anchor */
1269 }
02aa26ce 1270
a0ed51b3
LW
1271 /* (now in tr/// code again) */
1272
d008e5eb
GS
1273 if (*s & 0x80 && thisutf) {
1274 dTHR; /* only for ckWARN */
1275 if (ckWARN(WARN_UTF8)) {
dfe13c55 1276 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb 1277 if (len) {
b1c7b182 1278 has_utf = TRUE;
d008e5eb
GS
1279 while (len--)
1280 *d++ = *s++;
1281 continue;
1282 }
a0ed51b3 1283 }
b1c7b182
GS
1284 else
1285 has_utf = TRUE; /* assume valid utf8 */
a0ed51b3
LW
1286 }
1287
02aa26ce 1288 /* backslashes */
79072805
LW
1289 if (*s == '\\' && s+1 < send) {
1290 s++;
02aa26ce
NT
1291
1292 /* some backslashes we leave behind */
c9f97d15 1293 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1294 *d++ = '\\';
1295 *d++ = *s++;
1296 continue;
1297 }
02aa26ce
NT
1298
1299 /* deprecate \1 in strings and substitution replacements */
3280af22 1300 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1301 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1302 {
d008e5eb 1303 dTHR; /* only for ckWARN */
599cee73 1304 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1305 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1306 *--s = '$';
1307 break;
1308 }
02aa26ce
NT
1309
1310 /* string-change backslash escapes */
3280af22 1311 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1312 --s;
1313 break;
1314 }
02aa26ce
NT
1315
1316 /* if we get here, it's either a quoted -, or a digit */
79072805 1317 switch (*s) {
02aa26ce
NT
1318
1319 /* quoted - in transliterations */
79072805 1320 case '-':
3280af22 1321 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1322 *d++ = *s++;
1323 continue;
1324 }
1325 /* FALL THROUGH */
1326 default:
11b8faa4
JH
1327 {
1328 dTHR;
1329 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1330 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1331 "Unrecognized escape \\%c passed through",
1332 *s);
1333 /* default action is to copy the quoted character */
1334 *d++ = *s++;
1335 continue;
1336 }
02aa26ce
NT
1337
1338 /* \132 indicates an octal constant */
79072805
LW
1339 case '0': case '1': case '2': case '3':
1340 case '4': case '5': case '6': case '7':
dff6d3cd 1341 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1342 s += len;
1343 continue;
02aa26ce
NT
1344
1345 /* \x24 indicates a hex constant */
79072805 1346 case 'x':
a0ed51b3
LW
1347 ++s;
1348 if (*s == '{') {
1349 char* e = strchr(s, '}');
1350
adaeee49 1351 if (!e) {
a0ed51b3 1352 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1353 e = s;
1354 }
a0ed51b3 1355 /* note: utf always shorter than hex */
dfe13c55 1356 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1357 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1358 s = e + 1;
7e2040f0 1359 has_utf = TRUE;
a0ed51b3
LW
1360 }
1361 else {
1362 UV uv = (UV)scan_hex(s, 2, &len);
1363 if (utf && PL_lex_inwhat == OP_TRANS &&
1364 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1365 {
dfe13c55 1366 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
7e2040f0 1367 has_utf = TRUE;
a0ed51b3
LW
1368 }
1369 else {
d008e5eb
GS
1370 if (uv >= 127 && UTF) {
1371 dTHR;
1372 if (ckWARN(WARN_UTF8))
cea2e8a9 1373 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb 1374 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
894356b3 1375 (int)len,s,(int)len,s);
d008e5eb 1376 }
a0ed51b3
LW
1377 *d++ = (char)uv;
1378 }
1379 s += len;
1380 }
79072805 1381 continue;
02aa26ce 1382
4a2d328f
IZ
1383 /* \N{latin small letter a} is a named character */
1384 case 'N':
423cee85
JH
1385 ++s;
1386 if (*s == '{') {
1387 char* e = strchr(s, '}');
1388 HV *hv;
1389 SV **svp;
1390 SV *res, *cv;
1391 STRLEN len;
1392 char *str;
1393 char *why = Nullch;
1394
1395 if (!e) {
5777a3f7 1396 yyerror("Missing right brace on \\N{}");
423cee85
JH
1397 e = s - 1;
1398 goto cont_scan;
1399 }
1400 res = newSVpvn(s + 1, e - s - 1);
1401 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1402 res, Nullsv, "\\N{...}" );
423cee85
JH
1403 str = SvPV(res,len);
1404 if (len > e - s + 4) {
1405 char *odest = SvPVX(sv);
1406
1407 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1408 d = SvPVX(sv) + (d - odest);
1409 }
1410 Copy(str, d, len, char);
1411 d += len;
1412 SvREFCNT_dec(res);
1413 cont_scan:
1414 s = e + 1;
1415 }
1416 else
5777a3f7 1417 yyerror("Missing braces on \\N{}");
423cee85
JH
1418 continue;
1419
02aa26ce 1420 /* \c is a control character */
79072805
LW
1421 case 'c':
1422 s++;
9d116dd7
JH
1423#ifdef EBCDIC
1424 *d = *s++;
1425 if (isLOWER(*d))
1426 *d = toUPPER(*d);
1427 *d++ = toCTRL(*d);
1428#else
bbce6d69
PP
1429 len = *s++;
1430 *d++ = toCTRL(len);
9d116dd7 1431#endif
79072805 1432 continue;
02aa26ce
NT
1433
1434 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1435 case 'b':
1436 *d++ = '\b';
1437 break;
1438 case 'n':
1439 *d++ = '\n';
1440 break;
1441 case 'r':
1442 *d++ = '\r';
1443 break;
1444 case 'f':
1445 *d++ = '\f';
1446 break;
1447 case 't':
1448 *d++ = '\t';
1449 break;
34a3fe2a
PP
1450#ifdef EBCDIC
1451 case 'e':
1452 *d++ = '\047'; /* CP 1047 */
1453 break;
1454 case 'a':
1455 *d++ = '\057'; /* CP 1047 */
1456 break;
1457#else
79072805
LW
1458 case 'e':
1459 *d++ = '\033';
1460 break;
1461 case 'a':
1462 *d++ = '\007';
1463 break;
34a3fe2a 1464#endif
02aa26ce
NT
1465 } /* end switch */
1466
79072805
LW
1467 s++;
1468 continue;
02aa26ce
NT
1469 } /* end if (backslash) */
1470
79072805 1471 *d++ = *s++;
02aa26ce
NT
1472 } /* while loop to process each character */
1473
1474 /* terminate the string and set up the sv */
79072805 1475 *d = '\0';
463ee0b2 1476 SvCUR_set(sv, d - SvPVX(sv));
79072805 1477 SvPOK_on(sv);
7e2040f0
GS
1478 if (has_utf)
1479 SvUTF8_on(sv);
79072805 1480
02aa26ce 1481 /* shrink the sv if we allocated more than we used */
79072805
LW
1482 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1483 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1484 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1485 }
02aa26ce 1486
9b599b2a 1487 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1488 if (s > PL_bufptr) {
1489 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1490 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1491 sv, Nullsv,
3280af22 1492 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1493 ? "tr"
3280af22 1494 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1495 ? "s"
1496 : "qq")));
79072805 1497 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1498 } else
8990e307 1499 SvREFCNT_dec(sv);
79072805
LW
1500 return s;
1501}
1502
ffb4593c
NT
1503/* S_intuit_more
1504 * Returns TRUE if there's more to the expression (e.g., a subscript),
1505 * FALSE otherwise.
ffb4593c
NT
1506 *
1507 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1508 *
1509 * ->[ and ->{ return TRUE
1510 * { and [ outside a pattern are always subscripts, so return TRUE
1511 * if we're outside a pattern and it's not { or [, then return FALSE
1512 * if we're in a pattern and the first char is a {
1513 * {4,5} (any digits around the comma) returns FALSE
1514 * if we're in a pattern and the first char is a [
1515 * [] returns FALSE
1516 * [SOMETHING] has a funky algorithm to decide whether it's a
1517 * character class or not. It has to deal with things like
1518 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1519 * anything else returns TRUE
1520 */
1521
9cbb5ea2
GS
1522/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1523
76e3520e 1524STATIC int
cea2e8a9 1525S_intuit_more(pTHX_ register char *s)
79072805 1526{
3280af22 1527 if (PL_lex_brackets)
79072805
LW
1528 return TRUE;
1529 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1530 return TRUE;
1531 if (*s != '{' && *s != '[')
1532 return FALSE;
3280af22 1533 if (!PL_lex_inpat)
79072805
LW
1534 return TRUE;
1535
1536 /* In a pattern, so maybe we have {n,m}. */
1537 if (*s == '{') {
1538 s++;
1539 if (!isDIGIT(*s))
1540 return TRUE;
1541 while (isDIGIT(*s))
1542 s++;
1543 if (*s == ',')
1544 s++;
1545 while (isDIGIT(*s))
1546 s++;
1547 if (*s == '}')
1548 return FALSE;
1549 return TRUE;
1550
1551 }
1552
1553 /* On the other hand, maybe we have a character class */
1554
1555 s++;
1556 if (*s == ']' || *s == '^')
1557 return FALSE;
1558 else {
ffb4593c 1559 /* this is terrifying, and it works */
79072805
LW
1560 int weight = 2; /* let's weigh the evidence */
1561 char seen[256];
f27ffc4a 1562 unsigned char un_char = 255, last_un_char;
93a17b20 1563 char *send = strchr(s,']');
3280af22 1564 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1565
1566 if (!send) /* has to be an expression */
1567 return TRUE;
1568
1569 Zero(seen,256,char);
1570 if (*s == '$')
1571 weight -= 3;
1572 else if (isDIGIT(*s)) {
1573 if (s[1] != ']') {
1574 if (isDIGIT(s[1]) && s[2] == ']')
1575 weight -= 10;
1576 }
1577 else
1578 weight -= 100;
1579 }
1580 for (; s < send; s++) {
1581 last_un_char = un_char;
1582 un_char = (unsigned char)*s;
1583 switch (*s) {
1584 case '@':
1585 case '&':
1586 case '$':
1587 weight -= seen[un_char] * 10;
7e2040f0 1588 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1589 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1590 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1591 weight -= 100;
1592 else
1593 weight -= 10;
1594 }
1595 else if (*s == '$' && s[1] &&
93a17b20
LW
1596 strchr("[#!%*<>()-=",s[1])) {
1597 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1598 weight -= 10;
1599 else
1600 weight -= 1;
1601 }
1602 break;
1603 case '\\':
1604 un_char = 254;
1605 if (s[1]) {
93a17b20 1606 if (strchr("wds]",s[1]))
79072805
LW
1607 weight += 100;
1608 else if (seen['\''] || seen['"'])
1609 weight += 1;
93a17b20 1610 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1611 weight += 40;
1612 else if (isDIGIT(s[1])) {
1613 weight += 40;
1614 while (s[1] && isDIGIT(s[1]))
1615 s++;
1616 }
1617 }
1618 else
1619 weight += 100;
1620 break;
1621 case '-':
1622 if (s[1] == '\\')
1623 weight += 50;
93a17b20 1624 if (strchr("aA01! ",last_un_char))
79072805 1625 weight += 30;
93a17b20 1626 if (strchr("zZ79~",s[1]))
79072805 1627 weight += 30;
f27ffc4a
GS
1628 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1629 weight -= 5; /* cope with negative subscript */
79072805
LW
1630 break;
1631 default:
93a17b20 1632 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1633 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1634 char *d = tmpbuf;
1635 while (isALPHA(*s))
1636 *d++ = *s++;
1637 *d = '\0';
1638 if (keyword(tmpbuf, d - tmpbuf))
1639 weight -= 150;
1640 }
1641 if (un_char == last_un_char + 1)
1642 weight += 5;
1643 weight -= seen[un_char];
1644 break;
1645 }
1646 seen[un_char]++;
1647 }
1648 if (weight >= 0) /* probably a character class */
1649 return FALSE;
1650 }
1651
1652 return TRUE;
1653}
ffed7fef 1654
ffb4593c
NT
1655/*
1656 * S_intuit_method
1657 *
1658 * Does all the checking to disambiguate
1659 * foo bar
1660 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1661 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1662 *
1663 * First argument is the stuff after the first token, e.g. "bar".
1664 *
1665 * Not a method if bar is a filehandle.
1666 * Not a method if foo is a subroutine prototyped to take a filehandle.
1667 * Not a method if it's really "Foo $bar"
1668 * Method if it's "foo $bar"
1669 * Not a method if it's really "print foo $bar"
1670 * Method if it's really "foo package::" (interpreted as package->foo)
1671 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1672 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1673 * =>
1674 */
1675
76e3520e 1676STATIC int
cea2e8a9 1677S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1678{
1679 char *s = start + (*start == '$');
3280af22 1680 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1681 STRLEN len;
1682 GV* indirgv;
1683
1684 if (gv) {
b6c543e3 1685 CV *cv;
a0d0e21e
LW
1686 if (GvIO(gv))
1687 return 0;
b6c543e3
IZ
1688 if ((cv = GvCVu(gv))) {
1689 char *proto = SvPVX(cv);
1690 if (proto) {
1691 if (*proto == ';')
1692 proto++;
1693 if (*proto == '*')
1694 return 0;
1695 }
1696 } else
a0d0e21e
LW
1697 gv = 0;
1698 }
8903cb82 1699 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1700 /* start is the beginning of the possible filehandle/object,
1701 * and s is the end of it
1702 * tmpbuf is a copy of it
1703 */
1704
a0d0e21e 1705 if (*start == '$') {
3280af22 1706 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1707 return 0;
1708 s = skipspace(s);
3280af22
NIS
1709 PL_bufptr = start;
1710 PL_expect = XREF;
a0d0e21e
LW
1711 return *s == '(' ? FUNCMETH : METHOD;
1712 }
1713 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1714 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1715 len -= 2;
1716 tmpbuf[len] = '\0';
1717 goto bare_package;
1718 }
1719 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1720 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1721 return 0;
1722 /* filehandle or package name makes it a method */
89bfa8cd 1723 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1724 s = skipspace(s);
3280af22 1725 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1726 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1727 bare_package:
3280af22 1728 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1729 newSVpvn(tmpbuf,len));
3280af22
NIS
1730 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1731 PL_expect = XTERM;
a0d0e21e 1732 force_next(WORD);
3280af22 1733 PL_bufptr = s;
a0d0e21e
LW
1734 return *s == '(' ? FUNCMETH : METHOD;
1735 }
1736 }
1737 return 0;
1738}
1739
ffb4593c
NT
1740/*
1741 * S_incl_perldb
1742 * Return a string of Perl code to load the debugger. If PERL5DB
1743 * is set, it will return the contents of that, otherwise a
1744 * compile-time require of perl5db.pl.
1745 */
1746
76e3520e 1747STATIC char*
cea2e8a9 1748S_incl_perldb(pTHX)
a0d0e21e 1749{
3280af22 1750 if (PL_perldb) {
76e3520e 1751 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1752
1753 if (pdb)
1754 return pdb;
61bb5906 1755 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1756 return "BEGIN { require 'perl5db.pl' }";
1757 }
1758 return "";
1759}
1760
1761
16d20bd9
AD
1762/* Encoded script support. filter_add() effectively inserts a
1763 * 'pre-processing' function into the current source input stream.
1764 * Note that the filter function only applies to the current source file
1765 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1766 *
1767 * The datasv parameter (which may be NULL) can be used to pass
1768 * private data to this instance of the filter. The filter function
1769 * can recover the SV using the FILTER_DATA macro and use it to
1770 * store private buffers and state information.
1771 *
1772 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1773 * and the IoDIRP field is used to store the function pointer,
1774 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1775 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1776 * private use must be set using malloc'd pointers.
1777 */
16d20bd9
AD
1778
1779SV *
864dbfa3 1780Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1781{
f4c556ac
GS
1782 if (!funcp)
1783 return Nullsv;
1784
3280af22
NIS
1785 if (!PL_rsfp_filters)
1786 PL_rsfp_filters = newAV();
16d20bd9 1787 if (!datasv)
8c52afec 1788 datasv = NEWSV(255,0);
16d20bd9 1789 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1790 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1791 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1792 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1793 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1794 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1795 av_unshift(PL_rsfp_filters, 1);
1796 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1797 return(datasv);
1798}
1799
1800
1801/* Delete most recently added instance of this filter function. */
a0d0e21e 1802void
864dbfa3 1803Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1804{
e0c19803 1805 SV *datasv;
f4c556ac 1806 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1807 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1808 return;
1809 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1810 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1811 if (IoDIRP(datasv) == (DIR*)funcp) {
1812 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1813 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1814 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1815
16d20bd9
AD
1816 return;
1817 }
1818 /* we need to search for the correct entry and clear it */
cea2e8a9 1819 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1820}
1821
1822
1823/* Invoke the n'th filter function for the current rsfp. */
1824I32
864dbfa3 1825Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1826
1827
1828 /* 0 = read one text line */
a0d0e21e 1829{
16d20bd9
AD
1830 filter_t funcp;
1831 SV *datasv = NULL;
e50aee73 1832
3280af22 1833 if (!PL_rsfp_filters)
16d20bd9 1834 return -1;
3280af22 1835 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1836 /* Provide a default input filter to make life easy. */
1837 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1838 DEBUG_P(PerlIO_printf(Perl_debug_log,
1839 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1840 if (maxlen) {
1841 /* Want a block */
1842 int len ;
1843 int old_len = SvCUR(buf_sv) ;
1844
1845 /* ensure buf_sv is large enough */
1846 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1847 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1848 if (PerlIO_error(PL_rsfp))
37120919
AD
1849 return -1; /* error */
1850 else
1851 return 0 ; /* end of file */
1852 }
16d20bd9
AD
1853 SvCUR_set(buf_sv, old_len + len) ;
1854 } else {
1855 /* Want a line */
3280af22
NIS
1856 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1857 if (PerlIO_error(PL_rsfp))
37120919
AD
1858 return -1; /* error */
1859 else
1860 return 0 ; /* end of file */
1861 }
16d20bd9
AD
1862 }
1863 return SvCUR(buf_sv);
1864 }
1865 /* Skip this filter slot if filter has been deleted */
3280af22 1866 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1867 DEBUG_P(PerlIO_printf(Perl_debug_log,
1868 "filter_read %d: skipped (filter deleted)\n",
1869 idx));
16d20bd9
AD
1870 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1871 }
1872 /* Get function pointer hidden within datasv */
1873 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1874 DEBUG_P(PerlIO_printf(Perl_debug_log,
1875 "filter_read %d: via function %p (%s)\n",
1876 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1877 /* Call function. The function is expected to */
1878 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1879 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1880 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1881}
1882
76e3520e 1883STATIC char *
cea2e8a9 1884S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1885{
c39cd008 1886#ifdef PERL_CR_FILTER
3280af22 1887 if (!PL_rsfp_filters) {
c39cd008 1888 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1889 }
1890#endif
3280af22 1891 if (PL_rsfp_filters) {
16d20bd9 1892
55497cff
PP
1893 if (!append)
1894 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1895 if (FILTER_READ(0, sv, 0) > 0)
1896 return ( SvPVX(sv) ) ;
1897 else
1898 return Nullch ;
1899 }
9d116dd7 1900 else
fd049845 1901 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1902}
1903
1904
748a9306
LW
1905#ifdef DEBUGGING
1906 static char* exp_name[] =
09bef843
SB
1907 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1908 "ATTRTERM", "TERMBLOCK"
1909 };
748a9306 1910#endif
463ee0b2 1911
02aa26ce
NT
1912/*
1913 yylex
1914
1915 Works out what to call the token just pulled out of the input
1916 stream. The yacc parser takes care of taking the ops we return and
1917 stitching them into a tree.
1918
1919 Returns:
1920 PRIVATEREF
1921
1922 Structure:
1923 if read an identifier
1924 if we're in a my declaration
1925 croak if they tried to say my($foo::bar)
1926 build the ops for a my() declaration
1927 if it's an access to a my() variable
1928 are we in a sort block?
1929 croak if my($a); $a <=> $b
1930 build ops for access to a my() variable
1931 if in a dq string, and they've said @foo and we can't find @foo
1932 croak
1933 build ops for a bareword
1934 if we already built the token before, use it.
1935*/
1936
864dbfa3
GS
1937int
1938#ifdef USE_PURE_BISON
cea2e8a9 1939Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1940#else
cea2e8a9 1941Perl_yylex(pTHX)
864dbfa3 1942#endif
378cc40b 1943{
11343788 1944 dTHR;
79072805 1945 register char *s;
378cc40b 1946 register char *d;
79072805 1947 register I32 tmp;
463ee0b2 1948 STRLEN len;
161b471a
NIS
1949 GV *gv = Nullgv;
1950 GV **gvp = 0;
a687059c 1951
a1a0e61e
TD
1952#ifdef USE_PURE_BISON
1953 yylval_pointer = lvalp;
1954 yychar_pointer = lcharp;
1955#endif
1956
02aa26ce 1957 /* check if there's an identifier for us to look at */
3280af22 1958 if (PL_pending_ident) {
02aa26ce 1959 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1960 char pit = PL_pending_ident;
1961 PL_pending_ident = 0;
bbce6d69 1962
02aa26ce
NT
1963 /* if we're in a my(), we can't allow dynamics here.
1964 $foo'bar has already been turned into $foo::bar, so
1965 just check for colons.
1966
1967 if it's a legal name, the OP is a PADANY.
1968 */
3280af22 1969 if (PL_in_my) {
77ca0c92 1970 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
1971 if (strchr(PL_tokenbuf,':'))
1972 yyerror(Perl_form(aTHX_ "No package name allowed for "
1973 "variable %s in \"our\"",
1974 PL_tokenbuf));
77ca0c92
LW
1975 tmp = pad_allocmy(PL_tokenbuf);
1976 }
1977 else {
1978 if (strchr(PL_tokenbuf,':'))
1979 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1980
77ca0c92
LW
1981 yylval.opval = newOP(OP_PADANY, 0);
1982 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1983 return PRIVATEREF;
1984 }
bbce6d69
PP
1985 }
1986
02aa26ce
NT
1987 /*
1988 build the ops for accesses to a my() variable.
1989
1990 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1991 then used in a comparison. This catches most, but not
1992 all cases. For instance, it catches
1993 sort { my($a); $a <=> $b }
1994 but not
1995 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1996 (although why you'd do that is anyone's guess).
1997 */
1998
3280af22 1999 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2000#ifdef USE_THREADS
54b9620d 2001 /* Check for single character per-thread SVs */
3280af22
NIS
2002 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2003 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2004 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2005 {
2faa37cc 2006 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2007 yylval.opval->op_targ = tmp;
2008 return PRIVATEREF;
2009 }
2010#endif /* USE_THREADS */
3280af22 2011 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2012 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2013 /* might be an "our" variable" */
f472eb5c 2014 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2015 /* build ops for a bareword */
f472eb5c
GS
2016 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2017 sv_catpvn(sym, "::", 2);
2018 sv_catpv(sym, PL_tokenbuf+1);
2019 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2020 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2021 gv_fetchpv(SvPVX(sym),
77ca0c92 2022 (PL_in_eval
f472eb5c
GS
2023 ? (GV_ADDMULTI | GV_ADDINEVAL)
2024 : TRUE
77ca0c92
LW
2025 ),
2026 ((PL_tokenbuf[0] == '$') ? SVt_PV
2027 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2028 : SVt_PVHV));
2029 return WORD;
2030 }
2031
02aa26ce 2032 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2033 if (PL_last_lop_op == OP_SORT &&
2034 PL_tokenbuf[0] == '$' &&
2035 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2036 && !PL_tokenbuf[2])
bbce6d69 2037 {
3280af22
NIS
2038 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2039 d < PL_bufend && *d != '\n';
a863c7d1
MB
2040 d++)
2041 {
2042 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2043 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2044 PL_tokenbuf);
a863c7d1 2045 }
bbce6d69
PP
2046 }
2047 }
bbce6d69 2048
a863c7d1
MB
2049 yylval.opval = newOP(OP_PADANY, 0);
2050 yylval.opval->op_targ = tmp;
2051 return PRIVATEREF;
2052 }
bbce6d69
PP
2053 }
2054
02aa26ce
NT
2055 /*
2056 Whine if they've said @foo in a doublequoted string,
2057 and @foo isn't a variable we can find in the symbol
2058 table.
2059 */
3280af22
NIS
2060 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2061 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2062 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2063 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2064 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2065 }
2066
02aa26ce 2067 /* build ops for a bareword */
3280af22 2068 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2069 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2070 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2071 ((PL_tokenbuf[0] == '$') ? SVt_PV
2072 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2073 : SVt_PVHV));
2074 return WORD;
2075 }
2076
02aa26ce
NT
2077 /* no identifier pending identification */
2078
3280af22 2079 switch (PL_lex_state) {
79072805
LW
2080#ifdef COMMENTARY
2081 case LEX_NORMAL: /* Some compilers will produce faster */
2082 case LEX_INTERPNORMAL: /* code if we comment these out. */
2083 break;
2084#endif
2085
09bef843 2086 /* when we've already built the next token, just pull it out of the queue */
79072805 2087 case LEX_KNOWNEXT:
3280af22
NIS
2088 PL_nexttoke--;
2089 yylval = PL_nextval[PL_nexttoke];
2090 if (!PL_nexttoke) {
2091 PL_lex_state = PL_lex_defer;
2092 PL_expect = PL_lex_expect;
2093 PL_lex_defer = LEX_NORMAL;
463ee0b2 2094 }
3280af22 2095 return(PL_nexttype[PL_nexttoke]);
79072805 2096
02aa26ce 2097 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2098 when we get here, PL_bufptr is at the \
02aa26ce 2099 */
79072805
LW
2100 case LEX_INTERPCASEMOD:
2101#ifdef DEBUGGING
3280af22 2102 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2103 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2104#endif
02aa26ce 2105 /* handle \E or end of string */
3280af22 2106 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2107 char oldmod;
02aa26ce
NT
2108
2109 /* if at a \E */
3280af22
NIS
2110 if (PL_lex_casemods) {
2111 oldmod = PL_lex_casestack[--PL_lex_casemods];
2112 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2113
3280af22
NIS
2114 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2115 PL_bufptr += 2;
2116 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2117 }
79072805
LW
2118 return ')';
2119 }
3280af22
NIS
2120 if (PL_bufptr != PL_bufend)
2121 PL_bufptr += 2;
2122 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2123 return yylex();
79072805
LW
2124 }
2125 else {
3280af22 2126 s = PL_bufptr + 1;
79072805
LW
2127 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2128 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2129 if (strchr("LU", *s) &&
3280af22 2130 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2131 {
3280af22 2132 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2133 return ')';
2134 }
3280af22
NIS
2135 if (PL_lex_casemods > 10) {
2136 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2137 if (newlb != PL_lex_casestack) {
a0d0e21e 2138 SAVEFREEPV(newlb);
3280af22 2139 PL_lex_casestack = newlb;
a0d0e21e
LW
2140 }
2141 }
3280af22
NIS
2142 PL_lex_casestack[PL_lex_casemods++] = *s;
2143 PL_lex_casestack[PL_lex_casemods] = '\0';
2144 PL_lex_state = LEX_INTERPCONCAT;
2145 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2146 force_next('(');
2147 if (*s == 'l')
3280af22 2148 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2149 else if (*s == 'u')
3280af22 2150 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2151 else if (*s == 'L')
3280af22 2152 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2153 else if (*s == 'U')
3280af22 2154 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2155 else if (*s == 'Q')
3280af22 2156 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2157 else
cea2e8a9 2158 Perl_croak(aTHX_ "panic: yylex");
3280af22 2159 PL_bufptr = s + 1;
79072805 2160 force_next(FUNC);
3280af22
NIS
2161 if (PL_lex_starts) {
2162 s = PL_bufptr;
2163 PL_lex_starts = 0;
79072805
LW
2164 Aop(OP_CONCAT);
2165 }
2166 else
cea2e8a9 2167 return yylex();
79072805
LW
2168 }
2169
55497cff
PP
2170 case LEX_INTERPPUSH:
2171 return sublex_push();
2172
79072805 2173 case LEX_INTERPSTART:
3280af22 2174 if (PL_bufptr == PL_bufend)
79072805 2175 return sublex_done();
3280af22
NIS
2176 PL_expect = XTERM;
2177 PL_lex_dojoin = (*PL_bufptr == '@');
2178 PL_lex_state = LEX_INTERPNORMAL;
2179 if (PL_lex_dojoin) {
2180 PL_nextval[PL_nexttoke].ival = 0;
79072805 2181 force_next(',');
554b3eca 2182#ifdef USE_THREADS
533c011a
NIS
2183 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2184 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2185 force_next(PRIVATEREF);
2186#else
a0d0e21e 2187 force_ident("\"", '$');
554b3eca 2188#endif /* USE_THREADS */
3280af22 2189 PL_nextval[PL_nexttoke].ival = 0;
79072805 2190 force_next('$');
3280af22 2191 PL_nextval[PL_nexttoke].ival = 0;
79072805 2192 force_next('(');
3280af22 2193 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2194 force_next(FUNC);
2195 }
3280af22
NIS
2196 if (PL_lex_starts++) {
2197 s = PL_bufptr;
79072805
LW
2198 Aop(OP_CONCAT);
2199 }
cea2e8a9 2200 return yylex();
79072805
LW
2201
2202 case LEX_INTERPENDMAYBE:
3280af22
NIS
2203 if (intuit_more(PL_bufptr)) {
2204 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2205 break;
2206 }
2207 /* FALL THROUGH */
2208
2209 case LEX_INTERPEND:
3280af22
NIS
2210 if (PL_lex_dojoin) {
2211 PL_lex_dojoin = FALSE;
2212 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2213 return ')';
2214 }
43a16006 2215 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2216 && SvEVALED(PL_lex_repl))
43a16006 2217 {
e9fa98b2 2218 if (PL_bufptr != PL_bufend)
cea2e8a9 2219 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2220 PL_lex_repl = Nullsv;
2221 }
79072805
LW
2222 /* FALLTHROUGH */
2223 case LEX_INTERPCONCAT:
2224#ifdef DEBUGGING
3280af22 2225 if (PL_lex_brackets)
cea2e8a9 2226 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2227#endif
3280af22 2228 if (PL_bufptr == PL_bufend)
79072805
LW
2229 return sublex_done();
2230
3280af22
NIS
2231 if (SvIVX(PL_linestr) == '\'') {
2232 SV *sv = newSVsv(PL_linestr);
2233 if (!PL_lex_inpat)
76e3520e 2234 sv = tokeq(sv);
3280af22 2235 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2236 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2237 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2238 s = PL_bufend;
79072805
LW
2239 }
2240 else {
3280af22 2241 s = scan_const(PL_bufptr);
79072805 2242 if (*s == '\\')
3280af22 2243 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2244 else
3280af22 2245 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2246 }
2247
3280af22
NIS
2248 if (s != PL_bufptr) {
2249 PL_nextval[PL_nexttoke] = yylval;
2250 PL_expect = XTERM;
79072805 2251 force_next(THING);
3280af22 2252 if (PL_lex_starts++)
79072805
LW
2253 Aop(OP_CONCAT);
2254 else {
3280af22 2255 PL_bufptr = s;
cea2e8a9 2256 return yylex();
79072805
LW
2257 }
2258 }
2259
cea2e8a9 2260 return yylex();
a0d0e21e 2261 case LEX_FORMLINE:
3280af22
NIS
2262 PL_lex_state = LEX_NORMAL;
2263 s = scan_formline(PL_bufptr);
2264 if (!PL_lex_formbrack)
a0d0e21e
LW
2265 goto rightbracket;
2266 OPERATOR(';');
79072805
LW
2267 }
2268
3280af22
NIS
2269 s = PL_bufptr;
2270 PL_oldoldbufptr = PL_oldbufptr;
2271 PL_oldbufptr = s;
79072805 2272 DEBUG_p( {
bf49b057
GS
2273 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2274 exp_name[PL_expect], s);
79072805 2275 } )
463ee0b2
LW
2276
2277 retry:
378cc40b
LW
2278 switch (*s) {
2279 default:
7e2040f0 2280 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2281 goto keylookup;
cea2e8a9 2282 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2283 case 4:
2284 case 26:
2285 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2286 case 0:
3280af22
NIS
2287 if (!PL_rsfp) {
2288 PL_last_uni = 0;
2289 PL_last_lop = 0;
2290 if (PL_lex_brackets)
d98d5fff 2291 yyerror("Missing right curly or square bracket");
79072805 2292 TOKEN(0);
463ee0b2 2293 }
3280af22 2294 if (s++ < PL_bufend)
a687059c 2295 goto retry; /* ignore stray nulls */
3280af22
NIS
2296 PL_last_uni = 0;
2297 PL_last_lop = 0;
2298 if (!PL_in_eval && !PL_preambled) {
2299 PL_preambled = TRUE;
2300 sv_setpv(PL_linestr,incl_perldb());
2301 if (SvCUR(PL_linestr))
2302 sv_catpv(PL_linestr,";");
2303 if (PL_preambleav){
2304 while(AvFILLp(PL_preambleav) >= 0) {
2305 SV *tmpsv = av_shift(PL_preambleav);
2306 sv_catsv(PL_linestr, tmpsv);
2307 sv_catpv(PL_linestr, ";");
91b7def8
PP
2308 sv_free(tmpsv);
2309 }
3280af22
NIS
2310 sv_free((SV*)PL_preambleav);
2311 PL_preambleav = NULL;
91b7def8 2312 }
3280af22
NIS
2313 if (PL_minus_n || PL_minus_p) {
2314 sv_catpv(PL_linestr, "LINE: while (<>) {");
2315 if (PL_minus_l)
2316 sv_catpv(PL_linestr,"chomp;");
2317 if (PL_minus_a) {
8fd239a7
CS
2318 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2319 if (gv)
2320 GvIMPORTED_AV_on(gv);
3280af22
NIS
2321 if (PL_minus_F) {
2322 if (strchr("/'\"", *PL_splitstr)
2323 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2324 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2325 else {
2326 char delim;
2327 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2328 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2329 delim = *s;
cea2e8a9 2330 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2331 "q" + (delim == '\''), delim);
3280af22 2332 for (s = PL_splitstr; *s; s++) {
54310121 2333 if (*s == '\\')
3280af22
NIS
2334 sv_catpvn(PL_linestr, "\\", 1);
2335 sv_catpvn(PL_linestr, s, 1);
54310121 2336 }
cea2e8a9 2337 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2338 }
2304df62
AD
2339 }
2340 else
3280af22 2341 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2342 }
79072805 2343 }
3280af22
NIS
2344 sv_catpv(PL_linestr, "\n");
2345 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2346 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2347 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2348 SV *sv = NEWSV(85,0);
2349
2350 sv_upgrade(sv, SVt_PVMG);
3280af22 2351 sv_setsv(sv,PL_linestr);
57843af0 2352 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2353 }
79072805 2354 goto retry;
a687059c 2355 }
e929a76b 2356 do {
3280af22 2357 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2358 fake_eof:
3280af22
NIS
2359 if (PL_rsfp) {
2360 if (PL_preprocess && !PL_in_eval)
2361 (void)PerlProc_pclose(PL_rsfp);
2362 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2363 PerlIO_clearerr(PL_rsfp);
395c3793 2364 else
3280af22
NIS
2365 (void)PerlIO_close(PL_rsfp);
2366 PL_rsfp = Nullfp;
4a9ae47a 2367 PL_doextract = FALSE;
395c3793 2368 }
3280af22
NIS
2369 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2370 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2371 sv_catpv(PL_linestr,";}");
2372 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2373 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2374 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2375 goto retry;
2376 }
3280af22
NIS
2377 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2378 sv_setpv(PL_linestr,"");
79072805 2379 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2380 }
3280af22 2381 if (PL_doextract) {
a0d0e21e 2382 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2383 PL_doextract = FALSE;
a0d0e21e
LW
2384
2385 /* Incest with pod. */
2386 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2387 sv_setpv(PL_linestr, "");
2388 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2389 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2390 PL_doextract = FALSE;
a0d0e21e
LW
2391 }
2392 }
463ee0b2 2393 incline(s);
3280af22
NIS
2394 } while (PL_doextract);
2395 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2396 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2397 SV *sv = NEWSV(85,0);
a687059c 2398
93a17b20 2399 sv_upgrade(sv, SVt_PVMG);
3280af22 2400 sv_setsv(sv,PL_linestr);
57843af0 2401 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2402 }
3280af22 2403 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2404 if (CopLINE(PL_curcop) == 1) {
3280af22 2405 while (s < PL_bufend && isSPACE(*s))
79072805 2406 s++;
a0d0e21e 2407 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2408 s++;
44a8e56a 2409 d = Nullch;
3280af22 2410 if (!PL_in_eval) {
44a8e56a
PP
2411 if (*s == '#' && *(s+1) == '!')
2412 d = s + 2;
2413#ifdef ALTERNATE_SHEBANG
2414 else {
2415 static char as[] = ALTERNATE_SHEBANG;
2416 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2417 d = s + (sizeof(as) - 1);
2418 }
2419#endif /* ALTERNATE_SHEBANG */
2420 }
2421 if (d) {
b8378b72 2422 char *ipath;
774d564b 2423 char *ipathend;
b8378b72 2424
774d564b 2425 while (isSPACE(*d))
b8378b72
CS
2426 d++;
2427 ipath = d;
774d564b
PP
2428 while (*d && !isSPACE(*d))
2429 d++;
2430 ipathend = d;
2431
2432#ifdef ARG_ZERO_IS_SCRIPT
2433 if (ipathend > ipath) {
2434 /*
2435 * HP-UX (at least) sets argv[0] to the script name,
2436 * which makes $^X incorrect. And Digital UNIX and Linux,
2437 * at least, set argv[0] to the basename of the Perl
2438 * interpreter. So, having found "#!", we'll set it right.
2439 */
2440 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2441 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2442 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2443 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2444 SvSETMAGIC(x);
2445 }
774d564b 2446 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2447 }
774d564b 2448#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2449
2450 /*
2451 * Look for options.
2452 */
748a9306 2453 d = instr(s,"perl -");
84e30d1a 2454 if (!d) {
748a9306 2455 d = instr(s,"perl");
84e30d1a
GS
2456#if defined(DOSISH)
2457 /* avoid getting into infinite loops when shebang
2458 * line contains "Perl" rather than "perl" */
2459 if (!d) {
2460 for (d = ipathend-4; d >= ipath; --d) {
2461 if ((*d == 'p' || *d == 'P')
2462 && !ibcmp(d, "perl", 4))
2463 {
2464 break;
2465 }
2466 }
2467 if (d < ipath)
2468 d = Nullch;
2469 }
2470#endif
2471 }
44a8e56a
PP
2472#ifdef ALTERNATE_SHEBANG
2473 /*
2474 * If the ALTERNATE_SHEBANG on this system starts with a
2475 * character that can be part of a Perl expression, then if
2476 * we see it but not "perl", we're probably looking at the
2477 * start of Perl code, not a request to hand off to some
2478 * other interpreter. Similarly, if "perl" is there, but
2479 * not in the first 'word' of the line, we assume the line
2480 * contains the start of the Perl program.
44a8e56a
PP
2481 */
2482 if (d && *s != '#') {
774d564b 2483 char *c = ipath;
44a8e56a
PP
2484 while (*c && !strchr("; \t\r\n\f\v#", *c))
2485 c++;
2486 if (c < d)
2487 d = Nullch; /* "perl" not in first word; ignore */
2488 else
2489 *s = '#'; /* Don't try to parse shebang line */
2490 }
774d564b 2491#endif /* ALTERNATE_SHEBANG */
748a9306 2492 if (!d &&
44a8e56a 2493 *s == '#' &&
774d564b 2494 ipathend > ipath &&
3280af22 2495 !PL_minus_c &&
748a9306 2496 !instr(s,"indir") &&
3280af22 2497 instr(PL_origargv[0],"perl"))
748a9306 2498 {
9f68db38 2499 char **newargv;
9f68db38 2500
774d564b
PP
2501 *ipathend = '\0';
2502 s = ipathend + 1;
3280af22 2503 while (s < PL_bufend && isSPACE(*s))
9f68db38 2504 s++;
3280af22
NIS
2505 if (s < PL_bufend) {
2506 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2507 newargv[1] = s;
3280af22 2508 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2509 s++;
2510 *s = '\0';
3280af22 2511 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2512 }
2513 else
3280af22 2514 newargv = PL_origargv;
774d564b 2515 newargv[0] = ipath;
80252599 2516 PerlProc_execv(ipath, newargv);
cea2e8a9 2517 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2518 }
748a9306 2519 if (d) {
3280af22
NIS
2520 U32 oldpdb = PL_perldb;
2521 bool oldn = PL_minus_n;
2522 bool oldp = PL_minus_p;
748a9306
LW
2523
2524 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2525 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2526
2527 if (*d++ == '-') {
8cc95fdb
PP
2528 do {
2529 if (*d == 'M' || *d == 'm') {
2530 char *m = d;
2531 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2532 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2533 (int)(d - m), m);
2534 }
2535 d = moreswitches(d);
2536 } while (d);
84902520 2537 if (PERLDB_LINE && !oldpdb ||
3280af22 2538 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2539 /* if we have already added "LINE: while (<>) {",
2540 we must not do it again */
748a9306 2541 {
3280af22
NIS
2542 sv_setpv(PL_linestr, "");
2543 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2544 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2545 PL_preambled = FALSE;
84902520 2546 if (PERLDB_LINE)
3280af22 2547 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2548 goto retry;
2549 }
a0d0e21e 2550 }
79072805 2551 }
9f68db38 2552 }
79072805 2553 }
3280af22
NIS
2554 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2555 PL_bufptr = s;
2556 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2557 return yylex();
ae986130 2558 }
378cc40b 2559 goto retry;
4fdae800 2560 case '\r':
6a27c188 2561#ifdef PERL_STRICT_CR
cea2e8a9
GS
2562 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2563 Perl_croak(aTHX_
54310121 2564 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2565#endif
4fdae800 2566 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2567 s++;
2568 goto retry;
378cc40b 2569 case '#':
e929a76b 2570 case '\n':
3280af22
NIS
2571 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2572 d = PL_bufend;
a687059c 2573 while (s < d && *s != '\n')
378cc40b 2574 s++;
0f85fab0 2575 if (s < d)
378cc40b 2576 s++;
463ee0b2 2577 incline(s);
3280af22
NIS
2578 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2579 PL_bufptr = s;
2580 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2581 return yylex();
a687059c 2582 }
378cc40b 2583 }
a687059c 2584 else {
378cc40b 2585 *s = '\0';
3280af22 2586 PL_bufend = s;
a687059c 2587 }
378cc40b
LW
2588 goto retry;
2589 case '-':
79072805 2590 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2591 s++;
3280af22 2592 PL_bufptr = s;
748a9306
LW
2593 tmp = *s++;
2594
3280af22 2595 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2596 s++;
2597
2598 if (strnEQ(s,"=>",2)) {
3280af22 2599 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2600 OPERATOR('-'); /* unary minus */
2601 }
3280af22
NIS
2602 PL_last_uni = PL_oldbufptr;
2603 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2604 switch (tmp) {
79072805
LW
2605 case 'r': FTST(OP_FTEREAD);
2606 case 'w': FTST(OP_FTEWRITE);
2607 case 'x': FTST(OP_FTEEXEC);
2608 case 'o': FTST(OP_FTEOWNED);
2609 case 'R': FTST(OP_FTRREAD);
2610 case 'W': FTST(OP_FTRWRITE);
2611 case 'X': FTST(OP_FTREXEC);
2612 case 'O': FTST(OP_FTROWNED);
2613 case 'e': FTST(OP_FTIS);
2614 case 'z': FTST(OP_FTZERO);
2615 case 's': FTST(OP_FTSIZE);
2616 case 'f': FTST(OP_FTFILE);
2617 case 'd': FTST(OP_FTDIR);
2618 case 'l': FTST(OP_FTLINK);
2619 case 'p': FTST(OP_FTPIPE);
2620 case 'S': FTST(OP_FTSOCK);
2621 case 'u': FTST(OP_FTSUID);
2622 case 'g': FTST(OP_FTSGID);
2623 case 'k': FTST(OP_FTSVTX);
2624 case 'b': FTST(OP_FTBLK);
2625 case 'c': FTST(OP_FTCHR);
2626 case 't': FTST(OP_FTTTY);
2627 case 'T': FTST(OP_FTTEXT);
2628 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2629 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2630 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2631 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2632 default:
cea2e8a9 2633 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2634 break;
2635 }
2636 }
a687059c
LW
2637 tmp = *s++;
2638 if (*s == tmp) {
2639 s++;
3280af22 2640 if (PL_expect == XOPERATOR)
79072805
LW
2641 TERM(POSTDEC);
2642 else
2643 OPERATOR(PREDEC);
2644 }
2645 else if (*s == '>') {
2646 s++;
2647 s = skipspace(s);
7e2040f0 2648 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2649 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2650 TOKEN(ARROW);
79072805 2651 }
748a9306
LW
2652 else if (*s == '$')
2653 OPERATOR(ARROW);
463ee0b2 2654 else
748a9306 2655 TERM(ARROW);
a687059c 2656 }
3280af22 2657 if (PL_expect == XOPERATOR)
79072805
LW
2658 Aop(OP_SUBTRACT);
2659 else {
3280af22 2660 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2661 check_uni();
79072805 2662 OPERATOR('-'); /* unary minus */
2f3197b3 2663 }
79072805 2664
378cc40b 2665 case '+':
a687059c
LW
2666 tmp = *s++;
2667 if (*s == tmp) {
378cc40b 2668 s++;
3280af22 2669 if (PL_expect == XOPERATOR)
79072805
LW
2670 TERM(POSTINC);
2671 else
2672 OPERATOR(PREINC);
378cc40b 2673 }
3280af22 2674 if (PL_expect == XOPERATOR)
79072805
LW
2675 Aop(OP_ADD);
2676 else {
3280af22 2677 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2678 check_uni();
a687059c 2679 OPERATOR('+');
2f3197b3 2680 }
a687059c 2681
378cc40b 2682 case '*':
3280af22
NIS
2683 if (PL_expect != XOPERATOR) {
2684 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2685 PL_expect = XOPERATOR;
2686 force_ident(PL_tokenbuf, '*');
2687 if (!*PL_tokenbuf)
a0d0e21e 2688 PREREF('*');
79072805 2689 TERM('*');
a687059c 2690 }
79072805
LW
2691 s++;
2692 if (*s == '*') {
a687059c 2693 s++;
79072805 2694 PWop(OP_POW);
a687059c 2695 }
79072805
LW
2696 Mop(OP_MULTIPLY);
2697
378cc40b 2698 case '%':
3280af22 2699 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2700 ++s;
2701 Mop(OP_MODULO);
a687059c 2702 }
3280af22
NIS
2703 PL_tokenbuf[0] = '%';
2704 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2705 if (!PL_tokenbuf[1]) {
2706 if (s == PL_bufend)
bbce6d69
PP
2707 yyerror("Final % should be \\% or %name");
2708 PREREF('%');
a687059c 2709 }
3280af22 2710 PL_pending_ident = '%';
bbce6d69 2711 TERM('%');
a687059c 2712
378cc40b 2713 case '^':
79072805 2714 s++;
a0d0e21e 2715 BOop(OP_BIT_XOR);
79072805 2716 case '[':
3280af22 2717 PL_lex_brackets++;
79072805 2718 /* FALL THROUGH */
378cc40b 2719 case '~':
378cc40b 2720 case ',':
378cc40b
LW
2721 tmp = *s++;
2722 OPERATOR(tmp);
a0d0e21e
LW
2723 case ':':
2724 if (s[1] == ':') {
2725 len = 0;
2726 goto just_a_word;
2727 }
2728 s++;
09bef843
SB
2729 switch (PL_expect) {
2730 OP *attrs;
2731 case XOPERATOR:
2732 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2733 break;
2734 PL_bufptr = s; /* update in case we back off */
2735 goto grabattrs;
2736 case XATTRBLOCK:
2737 PL_expect = XBLOCK;
2738 goto grabattrs;
2739 case XATTRTERM:
2740 PL_expect = XTERMBLOCK;
2741 grabattrs:
2742 s = skipspace(s);
2743 attrs = Nullop;
7e2040f0 2744 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2745 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2746 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2747 if (tmp < 0) tmp = -tmp;
2748 switch (tmp) {
2749 case KEY_or:
2750 case KEY_and:
2751 case KEY_for:
2752 case KEY_unless:
2753 case KEY_if:
2754 case KEY_while:
2755 case KEY_until:
2756 goto got_attrs;
2757 default:
2758 break;
2759 }
2760 }
09bef843
SB
2761 if (*d == '(') {
2762 d = scan_str(d,TRUE,TRUE);
2763 if (!d) {
2764 if (PL_lex_stuff) {
2765 SvREFCNT_dec(PL_lex_stuff);
2766 PL_lex_stuff = Nullsv;
2767 }
2768 /* MUST advance bufptr here to avoid bogus
2769 "at end of line" context messages from yyerror().
2770 */
2771 PL_bufptr = s + len;
2772 yyerror("Unterminated attribute parameter in attribute list");
2773 if (attrs)
2774 op_free(attrs);
2775 return 0; /* EOF indicator */
2776 }
2777 }
2778 if (PL_lex_stuff) {
2779 SV *sv = newSVpvn(s, len);
2780 sv_catsv(sv, PL_lex_stuff);
2781 attrs = append_elem(OP_LIST, attrs,
2782 newSVOP(OP_CONST, 0, sv));
2783 SvREFCNT_dec(PL_lex_stuff);
2784 PL_lex_stuff = Nullsv;
2785 }
2786 else {
2787 attrs = append_elem(OP_LIST, attrs,
2788 newSVOP(OP_CONST, 0,
2789 newSVpvn(s, len)));
2790 }
2791 s = skipspace(d);
0120eecf 2792 if (*s == ':' && s[1] != ':')
09bef843 2793 s = skipspace(s+1);
0120eecf
GS
2794 else if (s == d)
2795 break; /* require real whitespace or :'s */
09bef843 2796 }
f9829d6b
GS
2797 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2798 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2799 char q = ((*s == '\'') ? '"' : '\'');
2800 /* If here for an expression, and parsed no attrs, back off. */
2801 if (tmp == '=' && !attrs) {
2802 s = PL_bufptr;
2803 break;
2804 }
2805 /* MUST advance bufptr here to avoid bogus "at end of line"
2806 context messages from yyerror().
2807 */
2808 PL_bufptr = s;
2809 if (!*s)
2810 yyerror("Unterminated attribute list");
2811 else
2812 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2813 q, *s, q));
2814 if (attrs)
2815 op_free(attrs);
2816 OPERATOR(':');
2817 }
f9829d6b 2818 got_attrs:
09bef843
SB
2819 if (attrs) {
2820 PL_nextval[PL_nexttoke].opval = attrs;
2821 force_next(THING);
2822 }
2823 TOKEN(COLONATTR);
2824 }
a0d0e21e 2825 OPERATOR(':');
8990e307
LW
2826 case '(':
2827 s++;
3280af22
NIS
2828 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2829 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2830 else
3280af22 2831 PL_expect = XTERM;
a0d0e21e 2832 TOKEN('(');
378cc40b 2833 case ';':
57843af0
GS
2834 if (CopLINE(PL_curcop) < PL_copline)
2835 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2836 tmp = *s++;
2837 OPERATOR(tmp);
2838 case ')':
378cc40b 2839 tmp = *s++;
16d20bd9
AD
2840 s = skipspace(s);
2841 if (*s == '{')
2842 PREBLOCK(tmp);
378cc40b 2843 TERM(tmp);
79072805
LW
2844 case ']':
2845 s++;
3280af22 2846 if (PL_lex_brackets <= 0)
d98d5fff 2847 yyerror("Unmatched right square bracket");
463ee0b2 2848 else
3280af22
NIS
2849 --PL_lex_brackets;
2850 if (PL_lex_state == LEX_INTERPNORMAL) {
2851 if (PL_lex_brackets == 0) {
a0d0e21e 2852 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2853 PL_lex_state = LEX_INTERPEND;
79072805
LW
2854 }
2855 }
4633a7c4 2856 TERM(']');
79072805
LW
2857 case '{':
2858 leftbracket:
79072805 2859 s++;
3280af22
NIS
2860 if (PL_lex_brackets > 100) {
2861 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2862 if (newlb != PL_lex_brackstack) {
8990e307 2863 SAVEFREEPV(newlb);
3280af22 2864 PL_lex_brackstack = newlb;
8990e307
LW
2865 }
2866 }
3280af22 2867 switch (PL_expect) {
a0d0e21e 2868 case XTERM:
3280af22 2869 if (PL_lex_formbrack) {
a0d0e21e
LW
2870 s--;
2871 PRETERMBLOCK(DO);
2872 }
3280af22
NIS
2873 if (PL_oldoldbufptr == PL_last_lop)
2874 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2875 else
3280af22 2876 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2877 OPERATOR(HASHBRACK);
a0d0e21e 2878 case XOPERATOR:
3280af22 2879 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2880 s++;
44a8e56a 2881 d = s;
3280af22
NIS
2882 PL_tokenbuf[0] = '\0';
2883 if (d < PL_bufend && *d == '-') {
2884 PL_tokenbuf[0] = '-';
44a8e56a 2885 d++;
3280af22 2886 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2887 d++;
2888 }
7e2040f0 2889 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2890 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2891 FALSE, &len);
3280af22 2892 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2893 d++;
2894 if (*d == '}') {
3280af22 2895 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2896 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2897 if (minus)
2898 force_next('-');
748a9306
LW
2899 }
2900 }
2901 /* FALL THROUGH */
09bef843 2902 case XATTRBLOCK:
748a9306 2903 case XBLOCK:
3280af22
NIS
2904 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2905 PL_expect = XSTATE;
a0d0e21e 2906 break;
09bef843 2907 case XATTRTERM:
a0d0e21e 2908 case XTERMBLOCK:
3280af22
NIS
2909 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2910 PL_expect = XSTATE;
a0d0e21e
LW
2911 break;
2912 default: {
2913 char *t;
3280af22
NIS
2914 if (PL_oldoldbufptr == PL_last_lop)
2915 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2916 else
3280af22 2917 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2918 s = skipspace(s);
09ecc4b6 2919 if (*s == '}')
a0d0e21e 2920 OPERATOR(HASHBRACK);
b8a4b1be
GS
2921 /* This hack serves to disambiguate a pair of curlies
2922 * as being a block or an anon hash. Normally, expectation
2923 * determines that, but in cases where we're not in a
2924 * position to expect anything in particular (like inside
2925 * eval"") we have to resolve the ambiguity. This code
2926 * covers the case where the first term in the curlies is a
2927 * quoted string. Most other cases need to be explicitly
2928 * disambiguated by prepending a `+' before the opening
2929 * curly in order to force resolution as an anon hash.
2930 *
2931 * XXX should probably propagate the outer expectation
2932 * into eval"" to rely less on this hack, but that could
2933 * potentially break current behavior of eval"".
2934 * GSAR 97-07-21
2935 */
2936 t = s;
2937 if (*s == '\'' || *s == '"' || *s == '`') {
2938 /* common case: get past first string, handling escapes */
3280af22 2939 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2940 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2941 t++;
2942 t++;
a0d0e21e 2943 }
b8a4b1be 2944 else if (*s == 'q') {
3280af22 2945 if (++t < PL_bufend
b8a4b1be 2946 && (!isALNUM(*t)
3280af22 2947 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2948 && !isALNUM(*t))))
2949 {
b8a4b1be
GS
2950 char *tmps;
2951 char open, close, term;
2952 I32 brackets = 1;
2953
3280af22 2954 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2955 t++;
2956 term = *t;
2957 open = term;
2958 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2959 term = tmps[5];
2960 close = term;
2961 if (open == close)
3280af22
NIS
2962 for (t++; t < PL_bufend; t++) {
2963 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2964 t++;
6d07e5e9 2965 else if (*t == open)
b8a4b1be
GS
2966 break;
2967 }
2968 else
3280af22
NIS
2969 for (t++; t < PL_bufend; t++) {
2970 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2971 t++;
6d07e5e9 2972 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2973 break;
2974 else if (*t == open)
2975 brackets++;
2976 }
2977 }
2978 t++;
a0d0e21e 2979 }
7e2040f0 2980 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 2981 t += UTF8SKIP(t);
7e2040f0 2982 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 2983 t += UTF8SKIP(t);
a0d0e21e 2984 }
3280af22 2985 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2986 t++;
b8a4b1be
GS
2987 /* if comma follows first term, call it an anon hash */
2988 /* XXX it could be a comma expression with loop modifiers */
3280af22 2989 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2990 || (*t == '=' && t[1] == '>')))
a0d0e21e 2991 OPERATOR(HASHBRACK);
3280af22 2992 if (PL_expect == XREF)
4e4e412b 2993 PL_expect = XTERM;
a0d0e21e 2994 else {
3280af22
NIS
2995 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2996 PL_expect = XSTATE;
a0d0e21e 2997 }
8990e307 2998 }
a0d0e21e 2999 break;
463ee0b2 3000 }
57843af0 3001 yylval.ival = CopLINE(PL_curcop);
79072805 3002 if (isSPACE(*s) || *s == '#')
3280af22 3003 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3004 TOKEN('{');
378cc40b 3005 case '}':
79072805
LW
3006 rightbracket:
3007 s++;
3280af22 3008 if (PL_lex_brackets <= 0)
d98d5fff 3009 yyerror("Unmatched right curly bracket");
463ee0b2 3010 else
3280af22
NIS
3011 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3012 if (PL_lex_brackets < PL_lex_formbrack)
3013 PL_lex_formbrack = 0;
3014 if (PL_lex_state == LEX_INTERPNORMAL) {
3015 if (PL_lex_brackets == 0) {
9059aa12
LW
3016 if (PL_expect & XFAKEBRACK) {
3017 PL_expect &= XENUMMASK;
3280af22
NIS
3018 PL_lex_state = LEX_INTERPEND;
3019 PL_bufptr = s;
cea2e8a9 3020 return yylex(); /* ignore fake brackets */
79072805 3021 }
fa83b5b6 3022 if (*s == '-' && s[1] == '>')
3280af22 3023 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3024 else if (*s != '[' && *s != '{')
3280af22 3025 PL_lex_state = LEX_INTERPEND;
79072805
LW
3026 }
3027 }
9059aa12
LW
3028 if (PL_expect & XFAKEBRACK) {
3029 PL_expect &= XENUMMASK;
3280af22 3030 PL_bufptr = s;
cea2e8a9 3031 return yylex(); /* ignore fake brackets */
748a9306 3032 }
79072805
LW
3033 force_next('}');
3034 TOKEN(';');
378cc40b
LW
3035 case '&':
3036 s++;
3037 tmp = *s++;
3038 if (tmp == '&')
a0d0e21e 3039 AOPERATOR(ANDAND);
378cc40b 3040 s--;
3280af22 3041 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3042 if (ckWARN(WARN_SEMICOLON)
3043 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3044 {
57843af0 3045 CopLINE_dec(PL_curcop);
cea2e8a9 3046 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3047 CopLINE_inc(PL_curcop);
463ee0b2 3048 }
79072805 3049 BAop(OP_BIT_AND);
463ee0b2 3050 }
79072805 3051
3280af22
NIS
3052 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3053 if (*PL_tokenbuf) {
3054 PL_expect = XOPERATOR;
3055 force_ident(PL_tokenbuf, '&');
463ee0b2 3056 }
79072805
LW
3057 else
3058 PREREF('&');
c07a80fd 3059 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3060 TERM('&');
3061
378cc40b
LW
3062 case '|':
3063 s++;
3064 tmp = *s++;
3065 if (tmp == '|')
a0d0e21e 3066 AOPERATOR(OROR);
378cc40b 3067 s--;
79072805 3068 BOop(OP_BIT_OR);
378cc40b
LW
3069 case '=':
3070 s++;
3071 tmp = *s++;
3072 if (tmp == '=')
79072805
LW
3073 Eop(OP_EQ);
3074 if (tmp == '>')
3075 OPERATOR(',');
378cc40b 3076 if (tmp == '~')
79072805 3077 PMop(OP_MATCH);
599cee73 3078 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3079 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3080 s--;
3280af22
NIS
3081 if (PL_expect == XSTATE && isALPHA(tmp) &&
3082 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3083 {
3280af22
NIS
3084 if (PL_in_eval && !PL_rsfp) {
3085 d = PL_bufend;
a5f75d66
AD
3086 while (s < d) {
3087 if (*s++ == '\n') {
3088 incline(s);
3089 if (strnEQ(s,"=cut",4)) {
3090 s = strchr(s,'\n');
3091 if (s)
3092 s++;
3093 else
3094 s = d;
3095 incline(s);
3096 goto retry;
3097 }
3098 }
3099 }
3100 goto retry;
3101 }
3280af22
NIS
3102 s = PL_bufend;
3103 PL_doextract = TRUE;
a0d0e21e
LW
3104 goto retry;
3105 }
3280af22 3106 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3107 char *t;
51882d45 3108#ifdef PERL_STRICT_CR
a0d0e21e 3109 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3110#else
3111 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3112#endif
a0d0e21e
LW
3113 if (*t == '\n' || *t == '#') {
3114 s--;
3280af22 3115 PL_expect = XBLOCK;
a0d0e21e
LW
3116 goto leftbracket;
3117 }
79072805 3118 }
a0d0e21e
LW
3119 yylval.ival = 0;
3120 OPERATOR(ASSIGNOP);
378cc40b
LW
3121 case '!':
3122 s++;
3123 tmp = *s++;
3124 if (tmp == '=')
79072805 3125 Eop(OP_NE);
378cc40b 3126 if (tmp == '~')
79072805 3127 PMop(OP_NOT);
378cc40b
LW
3128 s--;
3129 OPERATOR('!');
3130 case '<':
3280af22 3131 if (PL_expect != XOPERATOR) {
93a17b20 3132 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3133 check_uni();
79072805
LW
3134 if (s[1] == '<')
3135 s = scan_heredoc(s);
3136 else
3137 s = scan_inputsymbol(s);
3138 TERM(sublex_start());
378cc40b
LW
3139 }
3140 s++;
3141 tmp = *s++;
3142 if (tmp == '<')
79072805 3143 SHop(OP_LEFT_SHIFT);
395c3793
LW
3144 if (tmp == '=') {
3145 tmp = *s++;
3146 if (tmp == '>')
79072805 3147 Eop(OP_NCMP);
395c3793 3148 s--;
79072805 3149 Rop(OP_LE);
395c3793 3150 }
378cc40b 3151 s--;
79072805 3152 Rop(OP_LT);
378cc40b
LW
3153 case '>':
3154 s++;
3155 tmp = *s++;
3156 if (tmp == '>')
79072805 3157 SHop(OP_RIGHT_SHIFT);
378cc40b 3158 if (tmp == '=')
79072805 3159 Rop(OP_GE);
378cc40b 3160 s--;
79072805 3161 Rop(OP_GT);
378cc40b
LW
3162
3163 case '$':
bbce6d69
PP
3164 CLINE;
3165
3280af22
NIS
3166 if (PL_expect == XOPERATOR) {
3167 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3168 PL_expect = XTERM;
a0d0e21e 3169 depcom();
bbce6d69 3170 return ','; /* grandfather non-comma-format format */
a0d0e21e 3171 }
8990e307 3172 }
a0d0e21e 3173
7e2040f0 3174 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3175 PL_tokenbuf[0] = '@';
376b8730
SM
3176 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3177 sizeof PL_tokenbuf - 1, FALSE);
3178 if (PL_expect == XOPERATOR)
3179 no_op("Array length", s);
3280af22 3180 if (!PL_tokenbuf[1])
a0d0e21e 3181 PREREF(DOLSHARP);
3280af22
NIS
3182 PL_expect = XOPERATOR;
3183 PL_pending_ident = '#';
463ee0b2 3184 TOKEN(DOLSHARP);
79072805 3185 }
bbce6d69 3186
3280af22 3187 PL_tokenbuf[0] = '$';
376b8730
SM
3188 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3189 sizeof PL_tokenbuf - 1, FALSE);
3190 if (PL_expect == XOPERATOR)
3191 no_op("Scalar", s);
3280af22
NIS
3192 if (!PL_tokenbuf[1]) {
3193 if (s == PL_bufend)
bbce6d69
PP
3194 yyerror("Final $ should be \\$ or $name");
3195 PREREF('$');
8990e307 3196 }
a0d0e21e 3197
bbce6d69 3198 /* This kludge not intended to be bulletproof. */
3280af22 3199 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3200 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3201 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3202 yylval.opval->op_private = OPpCONST_ARYBASE;
3203 TERM(THING);
3204 }
3205
ff68c719 3206 d = s;
69d2bceb 3207 tmp = (I32)*s;
3280af22 3208 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3209 s = skipspace(s);
3210
3280af22 3211 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3212 char *t;
3213 if (*s == '[') {
3280af22 3214 PL_tokenbuf[0] = '@';
599cee73 3215 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3216 for(t = s + 1;
7e2040f0 3217 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3218 t++) ;
a0d0e21e 3219 if (*t++ == ',') {
3280af22
NIS
3220 PL_bufptr = skipspace(PL_bufptr);
3221 while (t < PL_bufend && *t != ']')
bbce6d69 3222 t++;
cea2e8a9 3223 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3224 "Multidimensional syntax %.*s not supported",
3225 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3226 }
3227 }
bbce6d69
PP
3228 }
3229 else if (*s == '{') {
3280af22 3230 PL_tokenbuf[0] = '%';
599cee73 3231 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3232 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3233 {
3280af22 3234 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3235 STRLEN len;
3236 for (t++; isSPACE(*t); t++) ;
7e2040f0 3237 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3238 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3239 for (; isSPACE(*t); t++) ;
864dbfa3 3240 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3241 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3242 "You need to quote \"%s\"", tmpbuf);
748a9306 3243 }
93a17b20
LW
3244 }
3245 }
2f3197b3 3246 }
bbce6d69 3247
3280af22 3248 PL_expect = XOPERATOR;
69d2bceb 3249 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3250 bool islop = (PL_last_lop == PL_oldoldbufptr);
3251 if (!islop || PL_last_lop_op == OP_GREPSTART)
3252 PL_expect = XOPERATOR;
bbce6d69 3253 else if (strchr("$@\"'`q", *s))
3280af22 3254 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3255 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3256 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3257 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3258 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3259 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3260 if (tmp = keyword(tmpbuf, len)) {
3261 /* binary operators exclude handle interpretations */
3262 switch (tmp) {
3263 case -KEY_x:
3264 case -KEY_eq:
3265 case -KEY_ne:
3266 case -KEY_gt:
3267 case -KEY_lt:
3268 case -KEY_ge:
3269 case -KEY_le:
3270 case -KEY_cmp:
3271 break;
3272 default:
3280af22 3273 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3274 break;
3275 }
3276 }
68dc0745
PP
3277 else {
3278 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3279 if (gv && GvCVu(gv))
3280af22 3280 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3281 }
93a17b20 3282 }
bbce6d69 3283 else if (isDIGIT(*s))
3280af22 3284 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3285 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3286 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3287 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3288 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3289 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3290 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3291 }
3280af22 3292 PL_pending_ident = '$';
79072805 3293 TOKEN('$');
378cc40b
LW
3294
3295 case '@':
3280af22 3296 if (PL_expect == XOPERATOR)
bbce6d69 3297 no_op("Array", s);
3280af22
NIS
3298 PL_tokenbuf[0] = '@';
3299 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3300 if (!PL_tokenbuf[1]) {
3301 if (s == PL_bufend)
bbce6d69
PP
3302 yyerror("Final @ should be \\@ or @name");
3303 PREREF('@');
3304 }
3280af22 3305 if (PL_lex_state == LEX_NORMAL)
ff68c719 3306 s = skipspace(s);
3280af22 3307 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3308 if (*s == '{')
3280af22 3309 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3310
3311 /* Warn about @ where they meant $. */
599cee73 3312 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3313 if (*s == '[' || *s == '{') {
3314 char *t = s + 1;
7e2040f0 3315 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3316 t++;
3317 if (*t == '}' || *t == ']') {
3318 t++;
3280af22 3319 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3320 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3321 "Scalar value %.*s better written as $%.*s",
3280af22 3322 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3323 }
93a17b20
LW
3324 }
3325 }
463ee0b2 3326 }
3280af22 3327 PL_pending_ident = '@';
79072805 3328 TERM('@');
378cc40b
LW
3329
3330 case '/': /* may either be division or pattern */
3331 case '?': /* may either be conditional or pattern */
3280af22 3332 if (PL_expect != XOPERATOR) {
c277df42 3333 /* Disable warning on "study /blah/" */
3280af22
NIS
3334 if (PL_oldoldbufptr == PL_last_uni
3335 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3336 || memNE(PL_last_uni, "study", 5)
3337 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3338 check_uni();
8782bef2 3339 s = scan_pat(s,OP_MATCH);
79072805 3340 TERM(sublex_start());
378cc40b
LW
3341 }
3342 tmp = *s++;
a687059c 3343 if (tmp == '/')
79072805 3344 Mop(OP_DIVIDE);
378cc40b
LW
3345 OPERATOR(tmp);
3346
3347 case '.':
51882d45
GS
3348 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3349#ifdef PERL_STRICT_CR
3350 && s[1] == '\n'
3351#else
3352 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3353#endif
3354 && (s == PL_linestart || s[-1] == '\n') )
3355 {
3280af22
NIS
3356 PL_lex_formbrack = 0;
3357 PL_expect = XSTATE;
79072805
LW
3358 goto rightbracket;
3359 }
3280af22 3360 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3361 tmp = *s++;
a687059c
LW
3362 if (*s == tmp) {
3363 s++;
2f3197b3
LW
3364 if (*s == tmp) {
3365 s++;
79072805 3366 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3367 }
3368 else
79072805 3369 yylval.ival = 0;
378cc40b 3370 OPERATOR(DOTDOT);
a687059c 3371 }
3280af22 3372 if (PL_expect != XOPERATOR)
2f3197b3 3373 check_uni();
79072805 3374 Aop(OP_CONCAT);
378cc40b
LW
3375 }
3376 /* FALL THROUGH */
3377 case '0': case '1': case '2': case '3': case '4':
3378 case '5': case '6': case '7': case '8': case '9':
79072805 3379 s = scan_num(s);
3280af22 3380 if (PL_expect == XOPERATOR)
8990e307 3381 no_op("Number",s);
79072805
LW
3382 TERM(THING);
3383
3384 case '\'':
09bef843 3385 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3386 if (PL_expect == XOPERATOR) {
3387 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3388 PL_expect = XTERM;
a0d0e21e
LW
3389 depcom();
3390 return ','; /* grandfather non-comma-format format */
3391 }
463ee0b2 3392 else
8990e307 3393 no_op("String",s);
463ee0b2 3394 }
79072805 3395 if (!s)
85e6fe83 3396 missingterm((char*)0);
79072805
LW
3397 yylval.ival = OP_CONST;
3398 TERM(sublex_start());
3399
3400 case '"':
09bef843 3401 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3402 if (PL_expect == XOPERATOR) {
3403 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3404 PL_expect = XTERM;
a0d0e21e
LW
3405 depcom();
3406 return ','; /* grandfather non-comma-format format */
3407 }
463ee0b2 3408 else
8990e307 3409 no_op("String",s);
463ee0b2 3410 }
79072805 3411 if (!s)
85e6fe83 3412 missingterm((char*)0);
4633a7c4 3413 yylval.ival = OP_CONST;
3280af22 3414 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3415 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3416 yylval.ival = OP_STRINGIFY;
3417 break;
3418 }
3419 }
79072805
LW
3420 TERM(sublex_start());
3421
3422 case '`':
09bef843 3423 s = scan_str(s,FALSE,FALSE);
3280af22 3424 if (PL_expect == XOPERATOR)
8990e307 3425 no_op("Backticks",s);
79072805 3426 if (!s)
85e6fe83 3427 missingterm((char*)0);
79072805
LW
3428 yylval.ival = OP_BACKTICK;
3429 set_csh();
3430 TERM(sublex_start());
3431
3432 case '\\':
3433 s++;
599cee73 3434 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3435 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3436 *s, *s);
3280af22 3437 if (PL_expect == XOPERATOR)
8990e307 3438 no_op("Backslash",s);
79072805
LW
3439 OPERATOR(REFGEN);
3440
a7cb1f99
GS
3441 case 'v':
3442 if (isDIGIT(s[1]) && PL_expect == XTERM) {
3443 char *start = s;
3444 start++;
3445 start++;
3446 while (isDIGIT(*start))
3447 start++;
3448 if (*start == '.' && isDIGIT(start[1])) {
3449 s = scan_num(s);
3450 TERM(THING);
3451 }
3452 }
3453 goto keylookup;
79072805 3454 case 'x':
3280af22 3455 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3456 s++;
3457 Mop(OP_REPEAT);
2f3197b3 3458 }
79072805
LW
3459 goto keylookup;
3460
378cc40b 3461 case '_':
79072805
LW
3462 case 'a': case 'A':
3463 case 'b': case 'B':
3464 case 'c': case 'C':
3465 case 'd': case 'D':
3466 case 'e': case 'E':
3467 case 'f': case 'F':
3468 case 'g': case 'G':
3469 case 'h': case 'H':
3470 case 'i': case 'I':
3471 case 'j': case 'J':
3472 case 'k': case 'K':
3473 case 'l': case 'L':
3474 case 'm': case 'M':
3475 case 'n': case 'N':
3476 case 'o': case 'O':
3477 case 'p': case 'P':
3478 case 'q': case 'Q':
3479 case 'r': case 'R':
3480 case 's': case 'S':
3481 case 't': case 'T':
3482 case 'u': case 'U':
a7cb1f99 3483 case 'V':
79072805
LW
3484 case 'w': case 'W':
3485 case 'X':
3486 case 'y': case 'Y':
3487 case 'z': case 'Z':
3488
49dc05e3 3489 keylookup: {
2d8e6c8d 3490 STRLEN n_a;
161b471a
NIS
3491 gv = Nullgv;
3492 gvp = 0;
49dc05e3 3493
3280af22
NIS
3494 PL_bufptr = s;
3495 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3496
3497 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3498 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3499 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3500 (PL_tokenbuf[0] == 'q' &&
3501 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3502
3503 /* x::* is just a word, unless x is "CORE" */
3280af22 3504 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3505 goto just_a_word;
3506
3643fb5f 3507 d = s;
3280af22 3508 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3509 d++; /* no comments skipped here, or s### is misparsed */
3510
3511 /* Is this a label? */
3280af22
NIS
3512 if (!tmp && PL_expect == XSTATE
3513 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3514 s = d + 1;
3280af22 3515 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3516 CLINE;
3517 TOKEN(LABEL);
3643fb5f
CS
3518 }
3519
3520 /* Check for keywords */
3280af22 3521 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3522
3523 /* Is this a word before a => operator? */
748a9306
LW
3524 if (strnEQ(d,"=>",2)) {
3525 CLINE;
3280af22 3526 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3527 yylval.opval->op_private = OPpCONST_BARE;
3528 TERM(WORD);
3529 }
3530
a0d0e21e 3531 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3532 GV *ogv = Nullgv; /* override (winner) */
3533 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3534 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3535 CV *cv;
3280af22 3536 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3537 (cv = GvCVu(gv)))
3538 {
3539 if (GvIMPORTED_CV(gv))
3540 ogv = gv;
3541 else if (! CvMETHOD(cv))
3542 hgv = gv;
3543 }
3544 if (!ogv &&
3280af22
NIS
3545 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3546 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3547 GvCVu(gv) && GvIMPORTED_CV(gv))
3548 {
3549 ogv = gv;
3550 }
3551 }
3552 if (ogv) {
3553 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3554 }
3555 else if (gv && !gvp
3556 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3557 && GvCVu(gv)
3280af22 3558 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3559 {
3560 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3561 }
56f7f34b
CS
3562 else { /* no override */
3563 tmp = -tmp;
3564 gv = Nullgv;
3565 gvp = 0;
4944e2f7
GS
3566 if (ckWARN(WARN_AMBIGUOUS) && hgv
3567 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3568 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3569 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3570 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3571 }
a0d0e21e
LW
3572 }
3573
3574 reserved_word:
3575 switch (tmp) {
79072805
LW
3576
3577 default: /* not a keyword */
93a17b20 3578 just_a_word: {
96e4d5b1 3579 SV *sv;
3280af22 3580 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3581
3582 /* Get the rest if it looks like a package qualifier */
3583
a0d0e21e 3584 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3585 STRLEN morelen;
3280af22 3586 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3587 TRUE, &morelen);
3588 if (!morelen)