This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
rename totally bletcherous SvLOCK() thingy (doesn't do what the
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
bf4acbe4
GS
42/* On MacOS, respect nonbreaking spaces */
43#ifdef MACOS_TRADITIONAL
44#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
45#else
46#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
47#endif
48
ffb4593c
NT
49/* LEX_* are values for PL_lex_state, the state of the lexer.
50 * They are arranged oddly so that the guard on the switch statement
79072805
LW
51 * can get by with a single comparison (if the compiler is smart enough).
52 */
53
fb73857a 54/* #define LEX_NOTPARSING 11 is done in perl.h. */
55
55497cff 56#define LEX_NORMAL 10
57#define LEX_INTERPNORMAL 9
58#define LEX_INTERPCASEMOD 8
59#define LEX_INTERPPUSH 7
60#define LEX_INTERPSTART 6
61#define LEX_INTERPEND 5
62#define LEX_INTERPENDMAYBE 4
63#define LEX_INTERPCONCAT 3
64#define LEX_INTERPCONST 2
65#define LEX_FORMLINE 1
66#define LEX_KNOWNEXT 0
79072805 67
a790bc05 68/* XXX If this causes problems, set i_unistd=undef in the hint file. */
69#ifdef I_UNISTD
70# include <unistd.h> /* Needed for execv() */
71#endif
72
73
79072805
LW
74#ifdef ff_next
75#undef ff_next
d48672a2
LW
76#endif
77
a1a0e61e
TD
78#ifdef USE_PURE_BISON
79YYSTYPE* yylval_pointer = NULL;
80int* yychar_pointer = NULL;
22c35a8c
GS
81# undef yylval
82# undef yychar
e4bfbdd4
JH
83# define yylval (*yylval_pointer)
84# define yychar (*yychar_pointer)
85# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
86# undef yylex
87# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
88#endif
89
79072805 90#include "keywords.h"
fe14fcc3 91
ffb4593c
NT
92/* CLINE is a macro that ensures PL_copline has a sane value */
93
ae986130
LW
94#ifdef CLINE
95#undef CLINE
96#endif
57843af0 97#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 98
ffb4593c
NT
99/*
100 * Convenience functions to return different tokens and prime the
9cbb5ea2 101 * lexer for the next token. They all take an argument.
ffb4593c
NT
102 *
103 * TOKEN : generic token (used for '(', DOLSHARP, etc)
104 * OPERATOR : generic operator
105 * AOPERATOR : assignment operator
106 * PREBLOCK : beginning the block after an if, while, foreach, ...
107 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
108 * PREREF : *EXPR where EXPR is not a simple identifier
109 * TERM : expression term
110 * LOOPX : loop exiting command (goto, last, dump, etc)
111 * FTST : file test operator
112 * FUN0 : zero-argument function
2d2e263d 113 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
114 * BOop : bitwise or or xor
115 * BAop : bitwise and
116 * SHop : shift operator
117 * PWop : power operator
9cbb5ea2 118 * PMop : pattern-matching operator
ffb4593c
NT
119 * Aop : addition-level operator
120 * Mop : multiplication-level operator
121 * Eop : equality-testing operator
122 * Rop : relational operator <= != gt
123 *
124 * Also see LOP and lop() below.
125 */
126
3280af22
NIS
127#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
128#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
129#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
130#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
131#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
132#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
133#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
134#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
135#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
136#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
137#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
138#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
139#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
140#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
141#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
142#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
143#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
144#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
145#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
146#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 147
a687059c
LW
148/* This bit of chicanery makes a unary function followed by
149 * a parenthesis into a function with one argument, highest precedence.
150 */
2f3197b3 151#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_expect = XTERM, \
153 PL_bufptr = s, \
154 PL_last_uni = PL_oldbufptr, \
155 PL_last_lop_op = f, \
a687059c
LW
156 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
157
79072805 158#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
159 PL_bufptr = s, \
160 PL_last_uni = PL_oldbufptr, \
79072805
LW
161 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
162
9f68db38 163/* grandfather return to old style */
3280af22 164#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 165
ffb4593c
NT
166/*
167 * S_ao
168 *
169 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
170 * into an OP_ANDASSIGN or OP_ORASSIGN
171 */
172
76e3520e 173STATIC int
cea2e8a9 174S_ao(pTHX_ int toketype)
a0d0e21e 175{
3280af22
NIS
176 if (*PL_bufptr == '=') {
177 PL_bufptr++;
a0d0e21e
LW
178 if (toketype == ANDAND)
179 yylval.ival = OP_ANDASSIGN;
180 else if (toketype == OROR)
181 yylval.ival = OP_ORASSIGN;
182 toketype = ASSIGNOP;
183 }
184 return toketype;
185}
186
ffb4593c
NT
187/*
188 * S_no_op
189 * When Perl expects an operator and finds something else, no_op
190 * prints the warning. It always prints "<something> found where
191 * operator expected. It prints "Missing semicolon on previous line?"
192 * if the surprise occurs at the start of the line. "do you need to
193 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
194 * where the compiler doesn't know if foo is a method call or a function.
195 * It prints "Missing operator before end of line" if there's nothing
196 * after the missing operator, or "... before <...>" if there is something
197 * after the missing operator.
198 */
199
76e3520e 200STATIC void
cea2e8a9 201S_no_op(pTHX_ char *what, char *s)
463ee0b2 202{
3280af22
NIS
203 char *oldbp = PL_bufptr;
204 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 205
1189a94a
GS
206 if (!s)
207 s = oldbp;
208 else {
209 assert(s >= oldbp);
210 PL_bufptr = s;
211 }
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
221 }
222 else
cea2e8a9 223 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 224 PL_bufptr = oldbp;
8990e307
LW
225}
226
ffb4593c
NT
227/*
228 * S_missingterm
229 * Complain about missing quote/regexp/heredoc terminator.
230 * If it's called with (char *)NULL then it cauterizes the line buffer.
231 * If we're in a delimited string and the delimiter is a control
232 * character, it's reformatted into a two-char sequence like ^C.
233 * This is fatal.
234 */
235
76e3520e 236STATIC void
cea2e8a9 237S_missingterm(pTHX_ char *s)
8990e307
LW
238{
239 char tmpbuf[3];
240 char q;
241 if (s) {
242 char *nl = strrchr(s,'\n');
d2719217 243 if (nl)
8990e307
LW
244 *nl = '\0';
245 }
9d116dd7
JH
246 else if (
247#ifdef EBCDIC
248 iscntrl(PL_multi_close)
249#else
250 PL_multi_close < 32 || PL_multi_close == 127
251#endif
252 ) {
8990e307 253 *tmpbuf = '^';
3280af22 254 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
255 s = "\\n";
256 tmpbuf[2] = '\0';
257 s = tmpbuf;
258 }
259 else {
3280af22 260 *tmpbuf = PL_multi_close;
8990e307
LW
261 tmpbuf[1] = '\0';
262 s = tmpbuf;
263 }
264 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 265 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 266}
79072805 267
ffb4593c
NT
268/*
269 * Perl_deprecate
ffb4593c
NT
270 */
271
79072805 272void
864dbfa3 273Perl_deprecate(pTHX_ char *s)
a0d0e21e 274{
d008e5eb 275 dTHR;
599cee73 276 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 277 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
278}
279
ffb4593c
NT
280/*
281 * depcom
9cbb5ea2 282 * Deprecate a comma-less variable list.
ffb4593c
NT
283 */
284
76e3520e 285STATIC void
cea2e8a9 286S_depcom(pTHX)
a0d0e21e
LW
287{
288 deprecate("comma-less variable list");
289}
290
ffb4593c 291/*
9cbb5ea2
GS
292 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
293 * utf16-to-utf8-reversed.
ffb4593c
NT
294 */
295
c39cd008
GS
296#ifdef PERL_CR_FILTER
297static void
298strip_return(SV *sv)
299{
300 register char *s = SvPVX(sv);
301 register char *e = s + SvCUR(sv);
302 /* outer loop optimized to do nothing if there are no CR-LFs */
303 while (s < e) {
304 if (*s++ == '\r' && *s == '\n') {
305 /* hit a CR-LF, need to copy the rest */
306 register char *d = s - 1;
307 *d++ = *s++;
308 while (s < e) {
309 if (*s == '\r' && s[1] == '\n')
310 s++;
311 *d++ = *s++;
312 }
313 SvCUR(sv) -= s - d;
314 return;
315 }
316 }
317}
a868473f 318
76e3520e 319STATIC I32
c39cd008 320S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 321{
c39cd008
GS
322 I32 count = FILTER_READ(idx+1, sv, maxlen);
323 if (count > 0 && !maxlen)
324 strip_return(sv);
325 return count;
a868473f
NIS
326}
327#endif
328
b250498f 329#ifdef PERL_UTF16_FILTER
a0ed51b3 330STATIC I32
cea2e8a9 331S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
332{
333 I32 count = FILTER_READ(idx+1, sv, maxlen);
334 if (count) {
dfe13c55
GS
335 U8* tmps;
336 U8* tend;
337 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 338 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 339 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
340 }
341 return count;
342}
343
344STATIC I32
cea2e8a9 345S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
346{
347 I32 count = FILTER_READ(idx+1, sv, maxlen);
348 if (count) {
dfe13c55
GS
349 U8* tmps;
350 U8* tend;
351 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 352 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 353 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
354 }
355 return count;
356}
155aba94 357#endif
a868473f 358
ffb4593c
NT
359/*
360 * Perl_lex_start
9cbb5ea2
GS
361 * Initialize variables. Uses the Perl save_stack to save its state (for
362 * recursive calls to the parser).
ffb4593c
NT
363 */
364
a0d0e21e 365void
864dbfa3 366Perl_lex_start(pTHX_ SV *line)
79072805 367{
0f15f207 368 dTHR;
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
386 PL_nexttoke = 0;
387 }
57843af0 388 SAVECOPLINE(PL_curcop);
3280af22
NIS
389 SAVEPPTR(PL_bufptr);
390 SAVEPPTR(PL_bufend);
391 SAVEPPTR(PL_oldbufptr);
392 SAVEPPTR(PL_oldoldbufptr);
393 SAVEPPTR(PL_linestart);
394 SAVESPTR(PL_linestr);
395 SAVEPPTR(PL_lex_brackstack);
396 SAVEPPTR(PL_lex_casestack);
c76ac1ee 397 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
398 SAVESPTR(PL_lex_stuff);
399 SAVEI32(PL_lex_defer);
09bef843 400 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 401 SAVESPTR(PL_lex_repl);
bebdddfc
GS
402 SAVEINT(PL_expect);
403 SAVEINT(PL_lex_expect);
3280af22
NIS
404
405 PL_lex_state = LEX_NORMAL;
406 PL_lex_defer = 0;
407 PL_expect = XSTATE;
408 PL_lex_brackets = 0;
3280af22
NIS
409 New(899, PL_lex_brackstack, 120, char);
410 New(899, PL_lex_casestack, 12, char);
411 SAVEFREEPV(PL_lex_brackstack);
412 SAVEFREEPV(PL_lex_casestack);
413 PL_lex_casemods = 0;
414 *PL_lex_casestack = '\0';
415 PL_lex_dojoin = 0;
416 PL_lex_starts = 0;
417 PL_lex_stuff = Nullsv;
418 PL_lex_repl = Nullsv;
419 PL_lex_inpat = 0;
420 PL_lex_inwhat = 0;
09bef843 421 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
422 PL_linestr = line;
423 if (SvREADONLY(PL_linestr))
424 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
425 s = SvPV(PL_linestr, len);
8990e307 426 if (len && s[len-1] != ';') {
3280af22
NIS
427 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
428 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
429 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 430 }
3280af22
NIS
431 SvTEMP_off(PL_linestr);
432 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
433 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
434 SvREFCNT_dec(PL_rs);
79cb57f6 435 PL_rs = newSVpvn("\n", 1);
3280af22 436 PL_rsfp = 0;
79072805 437}
a687059c 438
ffb4593c
NT
439/*
440 * Perl_lex_end
9cbb5ea2
GS
441 * Finalizer for lexing operations. Must be called when the parser is
442 * done with the lexer.
ffb4593c
NT
443 */
444
463ee0b2 445void
864dbfa3 446Perl_lex_end(pTHX)
463ee0b2 447{
3280af22 448 PL_doextract = FALSE;
463ee0b2
LW
449}
450
ffb4593c
NT
451/*
452 * S_incline
453 * This subroutine has nothing to do with tilting, whether at windmills
454 * or pinball tables. Its name is short for "increment line". It
57843af0 455 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 456 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
457 * # line 500 "foo.pm"
458 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
459 */
460
76e3520e 461STATIC void
cea2e8a9 462S_incline(pTHX_ char *s)
463ee0b2 463{
0f15f207 464 dTHR;
463ee0b2
LW
465 char *t;
466 char *n;
73659bf1 467 char *e;
463ee0b2 468 char ch;
463ee0b2 469
57843af0 470 CopLINE_inc(PL_curcop);
463ee0b2
LW
471 if (*s++ != '#')
472 return;
bf4acbe4 473 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
474 if (strnEQ(s, "line", 4))
475 s += 4;
476 else
477 return;
478 if (*s == ' ' || *s == '\t')
479 s++;
480 else
481 return;
bf4acbe4 482 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
483 if (!isDIGIT(*s))
484 return;
485 n = s;
486 while (isDIGIT(*s))
487 s++;
bf4acbe4 488 while (SPACE_OR_TAB(*s))
463ee0b2 489 s++;
73659bf1 490 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 491 s++;
73659bf1
GS
492 e = t + 1;
493 }
463ee0b2 494 else {
463ee0b2 495 for (t = s; !isSPACE(*t); t++) ;
73659bf1 496 e = t;
463ee0b2 497 }
bf4acbe4 498 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
499 e++;
500 if (*e != '\n' && *e != '\0')
501 return; /* false alarm */
502
463ee0b2
LW
503 ch = *t;
504 *t = '\0';
f4dd75d9
GS
505 if (t - s > 0) {
506#ifdef USE_ITHREADS
507 Safefree(CopFILE(PL_curcop));
508#else
509 SvREFCNT_dec(CopFILEGV(PL_curcop));
510#endif
57843af0 511 CopFILE_set(PL_curcop, s);
f4dd75d9 512 }
463ee0b2 513 *t = ch;
57843af0 514 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
515}
516
ffb4593c
NT
517/*
518 * S_skipspace
519 * Called to gobble the appropriate amount and type of whitespace.
520 * Skips comments as well.
521 */
522
76e3520e 523STATIC char *
cea2e8a9 524S_skipspace(pTHX_ register char *s)
a687059c 525{
11343788 526 dTHR;
3280af22 527 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 528 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
529 s++;
530 return s;
531 }
532 for (;;) {
fd049845 533 STRLEN prevlen;
09bef843
SB
534 SSize_t oldprevlen, oldoldprevlen;
535 SSize_t oldloplen, oldunilen;
60e6418e
GS
536 while (s < PL_bufend && isSPACE(*s)) {
537 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
538 incline(s);
539 }
ffb4593c
NT
540
541 /* comment */
3280af22
NIS
542 if (s < PL_bufend && *s == '#') {
543 while (s < PL_bufend && *s != '\n')
463ee0b2 544 s++;
60e6418e 545 if (s < PL_bufend) {
463ee0b2 546 s++;
60e6418e
GS
547 if (PL_in_eval && !PL_rsfp) {
548 incline(s);
549 continue;
550 }
551 }
463ee0b2 552 }
ffb4593c
NT
553
554 /* only continue to recharge the buffer if we're at the end
555 * of the buffer, we're not reading from a source filter, and
556 * we're in normal lexing mode
557 */
09bef843
SB
558 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
559 PL_lex_state == LEX_FORMLINE)
463ee0b2 560 return s;
ffb4593c
NT
561
562 /* try to recharge the buffer */
9cbb5ea2
GS
563 if ((s = filter_gets(PL_linestr, PL_rsfp,
564 (prevlen = SvCUR(PL_linestr)))) == Nullch)
565 {
566 /* end of file. Add on the -p or -n magic */
3280af22
NIS
567 if (PL_minus_n || PL_minus_p) {
568 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
569 ";}continue{print or die qq(-p destination: $!\\n)" :
570 "");
3280af22
NIS
571 sv_catpv(PL_linestr,";}");
572 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
573 }
574 else
3280af22 575 sv_setpv(PL_linestr,";");
ffb4593c
NT
576
577 /* reset variables for next time we lex */
9cbb5ea2
GS
578 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
579 = SvPVX(PL_linestr);
3280af22 580 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
581
582 /* Close the filehandle. Could be from -P preprocessor,
583 * STDIN, or a regular file. If we were reading code from
584 * STDIN (because the commandline held no -e or filename)
585 * then we don't close it, we reset it so the code can
586 * read from STDIN too.
587 */
588
3280af22
NIS
589 if (PL_preprocess && !PL_in_eval)
590 (void)PerlProc_pclose(PL_rsfp);
591 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
592 PerlIO_clearerr(PL_rsfp);
8990e307 593 else
3280af22
NIS
594 (void)PerlIO_close(PL_rsfp);
595 PL_rsfp = Nullfp;
463ee0b2
LW
596 return s;
597 }
ffb4593c
NT
598
599 /* not at end of file, so we only read another line */
09bef843
SB
600 /* make corresponding updates to old pointers, for yyerror() */
601 oldprevlen = PL_oldbufptr - PL_bufend;
602 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
603 if (PL_last_uni)
604 oldunilen = PL_last_uni - PL_bufend;
605 if (PL_last_lop)
606 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
607 PL_linestart = PL_bufptr = s + prevlen;
608 PL_bufend = s + SvCUR(PL_linestr);
609 s = PL_bufptr;
09bef843
SB
610 PL_oldbufptr = s + oldprevlen;
611 PL_oldoldbufptr = s + oldoldprevlen;
612 if (PL_last_uni)
613 PL_last_uni = s + oldunilen;
614 if (PL_last_lop)
615 PL_last_lop = s + oldloplen;
a0d0e21e 616 incline(s);
ffb4593c
NT
617
618 /* debugger active and we're not compiling the debugger code,
619 * so store the line into the debugger's array of lines
620 */
3280af22 621 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
622 SV *sv = NEWSV(85,0);
623
624 sv_upgrade(sv, SVt_PVMG);
3280af22 625 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 626 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 627 }
463ee0b2 628 }
a687059c 629}
378cc40b 630
ffb4593c
NT
631/*
632 * S_check_uni
633 * Check the unary operators to ensure there's no ambiguity in how they're
634 * used. An ambiguous piece of code would be:
635 * rand + 5
636 * This doesn't mean rand() + 5. Because rand() is a unary operator,
637 * the +5 is its argument.
638 */
639
76e3520e 640STATIC void
cea2e8a9 641S_check_uni(pTHX)
ba106d47 642{
2f3197b3 643 char *s;
a0d0e21e 644 char *t;
0453d815 645 dTHR;
2f3197b3 646
3280af22 647 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 648 return;
3280af22
NIS
649 while (isSPACE(*PL_last_uni))
650 PL_last_uni++;
7e2040f0 651 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 652 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 653 return;
0453d815 654 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 655 char ch = *s;
0453d815
PM
656 *s = '\0';
657 Perl_warner(aTHX_ WARN_AMBIGUOUS,
658 "Warning: Use of \"%s\" without parens is ambiguous",
659 PL_last_uni);
660 *s = ch;
661 }
2f3197b3
LW
662}
663
ffb4593c
NT
664/* workaround to replace the UNI() macro with a function. Only the
665 * hints/uts.sh file mentions this. Other comments elsewhere in the
666 * source indicate Microport Unix might need it too.
667 */
668
ffed7fef
LW
669#ifdef CRIPPLED_CC
670
671#undef UNI
ffed7fef 672#define UNI(f) return uni(f,s)
ffed7fef 673
76e3520e 674STATIC int
cea2e8a9 675S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
676{
677 yylval.ival = f;
3280af22
NIS
678 PL_expect = XTERM;
679 PL_bufptr = s;
8f872242
NIS
680 PL_last_uni = PL_oldbufptr;
681 PL_last_lop_op = f;
ffed7fef
LW
682 if (*s == '(')
683 return FUNC1;
684 s = skipspace(s);
685 if (*s == '(')
686 return FUNC1;
687 else
688 return UNIOP;
689}
690
a0d0e21e
LW
691#endif /* CRIPPLED_CC */
692
ffb4593c
NT
693/*
694 * LOP : macro to build a list operator. Its behaviour has been replaced
695 * with a subroutine, S_lop() for which LOP is just another name.
696 */
697
a0d0e21e
LW
698#define LOP(f,x) return lop(f,x,s)
699
ffb4593c
NT
700/*
701 * S_lop
702 * Build a list operator (or something that might be one). The rules:
703 * - if we have a next token, then it's a list operator [why?]
704 * - if the next thing is an opening paren, then it's a function
705 * - else it's a list operator
706 */
707
76e3520e 708STATIC I32
a0be28da 709S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 710{
0f15f207 711 dTHR;
79072805 712 yylval.ival = f;
35c8bce7 713 CLINE;
3280af22
NIS
714 PL_expect = x;
715 PL_bufptr = s;
716 PL_last_lop = PL_oldbufptr;
717 PL_last_lop_op = f;
718 if (PL_nexttoke)
a0d0e21e 719 return LSTOP;
79072805
LW
720 if (*s == '(')
721 return FUNC;
722 s = skipspace(s);
723 if (*s == '(')
724 return FUNC;
725 else
726 return LSTOP;
727}
728
ffb4593c
NT
729/*
730 * S_force_next
9cbb5ea2 731 * When the lexer realizes it knows the next token (for instance,
ffb4593c 732 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
733 * to know what token to return the next time the lexer is called. Caller
734 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
735 * handles the token correctly.
ffb4593c
NT
736 */
737
76e3520e 738STATIC void
cea2e8a9 739S_force_next(pTHX_ I32 type)
79072805 740{
3280af22
NIS
741 PL_nexttype[PL_nexttoke] = type;
742 PL_nexttoke++;
743 if (PL_lex_state != LEX_KNOWNEXT) {
744 PL_lex_defer = PL_lex_state;
745 PL_lex_expect = PL_expect;
746 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
747 }
748}
749
ffb4593c
NT
750/*
751 * S_force_word
752 * When the lexer knows the next thing is a word (for instance, it has
753 * just seen -> and it knows that the next char is a word char, then
754 * it calls S_force_word to stick the next word into the PL_next lookahead.
755 *
756 * Arguments:
b1b65b59 757 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
758 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
759 * int check_keyword : if true, Perl checks to make sure the word isn't
760 * a keyword (do this if the word is a label, e.g. goto FOO)
761 * int allow_pack : if true, : characters will also be allowed (require,
762 * use, etc. do this)
9cbb5ea2 763 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
764 */
765
76e3520e 766STATIC char *
cea2e8a9 767S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 768{
463ee0b2
LW
769 register char *s;
770 STRLEN len;
771
772 start = skipspace(start);
773 s = start;
7e2040f0 774 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 775 (allow_pack && *s == ':') ||
15f0808c 776 (allow_initial_tick && *s == '\'') )
a0d0e21e 777 {
3280af22
NIS
778 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
779 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
780 return start;
781 if (token == METHOD) {
782 s = skipspace(s);
783 if (*s == '(')
3280af22 784 PL_expect = XTERM;
463ee0b2 785 else {
3280af22 786 PL_expect = XOPERATOR;
463ee0b2 787 }
79072805 788 }
3280af22
NIS
789 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
790 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
791 force_next(token);
792 }
793 return s;
794}
795
ffb4593c
NT
796/*
797 * S_force_ident
9cbb5ea2 798 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
799 * text only contains the "foo" portion. The first argument is a pointer
800 * to the "foo", and the second argument is the type symbol to prefix.
801 * Forces the next token to be a "WORD".
9cbb5ea2 802 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
803 */
804
76e3520e 805STATIC void
cea2e8a9 806S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
807{
808 if (s && *s) {
11343788 809 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 810 PL_nextval[PL_nexttoke].opval = o;
79072805 811 force_next(WORD);
748a9306 812 if (kind) {
e858de61 813 dTHR; /* just for in_eval */
11343788 814 o->op_private = OPpCONST_ENTERED;
55497cff 815 /* XXX see note in pp_entereval() for why we forgo typo
816 warnings if the symbol must be introduced in an eval.
817 GSAR 96-10-12 */
3280af22 818 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
819 kind == '$' ? SVt_PV :
820 kind == '@' ? SVt_PVAV :
821 kind == '%' ? SVt_PVHV :
822 SVt_PVGV
823 );
748a9306 824 }
79072805
LW
825 }
826}
827
1571675a
GS
828NV
829Perl_str_to_version(pTHX_ SV *sv)
830{
831 NV retval = 0.0;
832 NV nshift = 1.0;
833 STRLEN len;
834 char *start = SvPVx(sv,len);
3aa33fe5 835 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
836 char *end = start + len;
837 while (start < end) {
838 I32 skip;
839 UV n;
840 if (utf)
841 n = utf8_to_uv((U8*)start, &skip);
842 else {
843 n = *(U8*)start;
844 skip = 1;
845 }
846 retval += ((NV)n)/nshift;
847 start += skip;
848 nshift *= 1000;
849 }
850 return retval;
851}
852
ffb4593c
NT
853/*
854 * S_force_version
855 * Forces the next token to be a version number.
856 */
857
76e3520e 858STATIC char *
cea2e8a9 859S_force_version(pTHX_ char *s)
89bfa8cd 860{
861 OP *version = Nullop;
44dcb63b 862 char *d;
89bfa8cd 863
864 s = skipspace(s);
865
44dcb63b 866 d = s;
dd629d5b 867 if (*d == 'v')
44dcb63b 868 d++;
44dcb63b 869 if (isDIGIT(*d)) {
a7cb1f99 870 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
89bfa8cd 873 s = scan_num(s);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd 881 }
882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 886 force_next(WORD);
887
888 return (s);
889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
3280af22 920 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
922 while (s < send) {
923 if (*s == '\\') {
a0d0e21e 924 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
925 s++; /* all that, just for this */
926 }
927 *d++ = *s++;
928 }
929 *d = '\0';
463ee0b2 930 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 931 finish:
3280af22 932 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 933 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
934 return sv;
935}
936
ffb4593c
NT
937/*
938 * Now come three functions related to double-quote context,
939 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
940 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
941 * interact with PL_lex_state, and create fake ( ... ) argument lists
942 * to handle functions and concatenation.
943 * They assume that whoever calls them will be setting up a fake
944 * join call, because each subthing puts a ',' after it. This lets
945 * "lower \luPpEr"
946 * become
947 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
948 *
949 * (I'm not sure whether the spurious commas at the end of lcfirst's
950 * arguments and join's arguments are created or not).
951 */
952
953/*
954 * S_sublex_start
955 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
956 *
957 * Pattern matching will set PL_lex_op to the pattern-matching op to
958 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
959 *
960 * OP_CONST and OP_READLINE are easy--just make the new op and return.
961 *
962 * Everything else becomes a FUNC.
963 *
964 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
965 * had an OP_CONST or OP_READLINE). This just sets us up for a
966 * call to S_sublex_push().
967 */
968
76e3520e 969STATIC I32
cea2e8a9 970S_sublex_start(pTHX)
79072805
LW
971{
972 register I32 op_type = yylval.ival;
79072805
LW
973
974 if (op_type == OP_NULL) {
3280af22
NIS
975 yylval.opval = PL_lex_op;
976 PL_lex_op = Nullop;
79072805
LW
977 return THING;
978 }
979 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 980 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
981
982 if (SvTYPE(sv) == SVt_PVIV) {
983 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
984 STRLEN len;
985 char *p;
986 SV *nsv;
987
988 p = SvPV(sv, len);
79cb57f6 989 nsv = newSVpvn(p, len);
feee6ba4
JH
990 if (SvUTF8(sv))
991 SvUTF8_on(nsv);
b3ac6de7
IZ
992 SvREFCNT_dec(sv);
993 sv = nsv;
994 }
995 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 996 PL_lex_stuff = Nullsv;
79072805
LW
997 return THING;
998 }
999
3280af22
NIS
1000 PL_sublex_info.super_state = PL_lex_state;
1001 PL_sublex_info.sub_inwhat = op_type;
1002 PL_sublex_info.sub_op = PL_lex_op;
1003 PL_lex_state = LEX_INTERPPUSH;
55497cff 1004
3280af22
NIS
1005 PL_expect = XTERM;
1006 if (PL_lex_op) {
1007 yylval.opval = PL_lex_op;
1008 PL_lex_op = Nullop;
55497cff 1009 return PMFUNC;
1010 }
1011 else
1012 return FUNC;
1013}
1014
ffb4593c
NT
1015/*
1016 * S_sublex_push
1017 * Create a new scope to save the lexing state. The scope will be
1018 * ended in S_sublex_done. Returns a '(', starting the function arguments
1019 * to the uc, lc, etc. found before.
1020 * Sets PL_lex_state to LEX_INTERPCONCAT.
1021 */
1022
76e3520e 1023STATIC I32
cea2e8a9 1024S_sublex_push(pTHX)
55497cff 1025{
0f15f207 1026 dTHR;
f46d017c 1027 ENTER;
55497cff 1028
3280af22
NIS
1029 PL_lex_state = PL_sublex_info.super_state;
1030 SAVEI32(PL_lex_dojoin);
1031 SAVEI32(PL_lex_brackets);
3280af22
NIS
1032 SAVEI32(PL_lex_casemods);
1033 SAVEI32(PL_lex_starts);
1034 SAVEI32(PL_lex_state);
7766f137 1035 SAVEVPTR(PL_lex_inpat);
3280af22 1036 SAVEI32(PL_lex_inwhat);
57843af0 1037 SAVECOPLINE(PL_curcop);
3280af22
NIS
1038 SAVEPPTR(PL_bufptr);
1039 SAVEPPTR(PL_oldbufptr);
1040 SAVEPPTR(PL_oldoldbufptr);
1041 SAVEPPTR(PL_linestart);
1042 SAVESPTR(PL_linestr);
1043 SAVEPPTR(PL_lex_brackstack);
1044 SAVEPPTR(PL_lex_casestack);
1045
1046 PL_linestr = PL_lex_stuff;
1047 PL_lex_stuff = Nullsv;
1048
9cbb5ea2
GS
1049 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1050 = SvPVX(PL_linestr);
3280af22
NIS
1051 PL_bufend += SvCUR(PL_linestr);
1052 SAVEFREESV(PL_linestr);
1053
1054 PL_lex_dojoin = FALSE;
1055 PL_lex_brackets = 0;
3280af22
NIS
1056 New(899, PL_lex_brackstack, 120, char);
1057 New(899, PL_lex_casestack, 12, char);
1058 SAVEFREEPV(PL_lex_brackstack);
1059 SAVEFREEPV(PL_lex_casestack);
1060 PL_lex_casemods = 0;
1061 *PL_lex_casestack = '\0';
1062 PL_lex_starts = 0;
1063 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1064 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1065
1066 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1067 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1068 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1069 else
3280af22 1070 PL_lex_inpat = Nullop;
79072805 1071
55497cff 1072 return '(';
79072805
LW
1073}
1074
ffb4593c
NT
1075/*
1076 * S_sublex_done
1077 * Restores lexer state after a S_sublex_push.
1078 */
1079
76e3520e 1080STATIC I32
cea2e8a9 1081S_sublex_done(pTHX)
79072805 1082{
3280af22
NIS
1083 if (!PL_lex_starts++) {
1084 PL_expect = XOPERATOR;
79cb57f6 1085 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1086 return THING;
1087 }
1088
3280af22
NIS
1089 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1090 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1091 return yylex();
79072805
LW
1092 }
1093
ffb4593c 1094 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1095 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1096 PL_linestr = PL_lex_repl;
1097 PL_lex_inpat = 0;
1098 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1099 PL_bufend += SvCUR(PL_linestr);
1100 SAVEFREESV(PL_linestr);
1101 PL_lex_dojoin = FALSE;
1102 PL_lex_brackets = 0;
3280af22
NIS
1103 PL_lex_casemods = 0;
1104 *PL_lex_casestack = '\0';
1105 PL_lex_starts = 0;
25da4f38 1106 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1107 PL_lex_state = LEX_INTERPNORMAL;
1108 PL_lex_starts++;
e9fa98b2
HS
1109 /* we don't clear PL_lex_repl here, so that we can check later
1110 whether this is an evalled subst; that means we rely on the
1111 logic to ensure sublex_done() is called again only via the
1112 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1113 }
e9fa98b2 1114 else {
3280af22 1115 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1116 PL_lex_repl = Nullsv;
1117 }
79072805 1118 return ',';
ffed7fef
LW
1119 }
1120 else {
f46d017c 1121 LEAVE;
3280af22
NIS
1122 PL_bufend = SvPVX(PL_linestr);
1123 PL_bufend += SvCUR(PL_linestr);
1124 PL_expect = XOPERATOR;
09bef843 1125 PL_sublex_info.sub_inwhat = 0;
79072805 1126 return ')';
ffed7fef
LW
1127 }
1128}
1129
02aa26ce
NT
1130/*
1131 scan_const
1132
1133 Extracts a pattern, double-quoted string, or transliteration. This
1134 is terrifying code.
1135
3280af22
NIS
1136 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1137 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1138 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1139
9b599b2a
GS
1140 Returns a pointer to the character scanned up to. Iff this is
1141 advanced from the start pointer supplied (ie if anything was
1142 successfully parsed), will leave an OP for the substring scanned
1143 in yylval. Caller must intuit reason for not parsing further
1144 by looking at the next characters herself.
1145
02aa26ce
NT
1146 In patterns:
1147 backslashes:
1148 double-quoted style: \r and \n
1149 regexp special ones: \D \s
1150 constants: \x3
1151 backrefs: \1 (deprecated in substitution replacements)
1152 case and quoting: \U \Q \E
1153 stops on @ and $, but not for $ as tail anchor
1154
1155 In transliterations:
1156 characters are VERY literal, except for - not at the start or end
1157 of the string, which indicates a range. scan_const expands the
1158 range to the full set of intermediate characters.
1159
1160 In double-quoted strings:
1161 backslashes:
1162 double-quoted style: \r and \n
1163 constants: \x3
1164 backrefs: \1 (deprecated)
1165 case and quoting: \U \Q \E
1166 stops on @ and $
1167
1168 scan_const does *not* construct ops to handle interpolated strings.
1169 It stops processing as soon as it finds an embedded $ or @ variable
1170 and leaves it to the caller to work out what's going on.
1171
1172 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1173
1174 $ in pattern could be $foo or could be tail anchor. Assumption:
1175 it's a tail anchor if $ is the last thing in the string, or if it's
1176 followed by one of ")| \n\t"
1177
1178 \1 (backreferences) are turned into $1
1179
1180 The structure of the code is
1181 while (there's a character to process) {
1182 handle transliteration ranges
1183 skip regexp comments
1184 skip # initiated comments in //x patterns
1185 check for embedded @foo
1186 check for embedded scalars
1187 if (backslash) {
1188 leave intact backslashes from leave (below)
1189 deprecate \1 in strings and sub replacements
1190 handle string-changing backslashes \l \U \Q \E, etc.
1191 switch (what was escaped) {
1192 handle - in a transliteration (becomes a literal -)
1193 handle \132 octal characters
1194 handle 0x15 hex characters
1195 handle \cV (control V)
1196 handle printf backslashes (\f, \r, \n, etc)
1197 } (end switch)
1198 } (end if backslash)
1199 } (end while character to read)
1200
1201*/
1202
76e3520e 1203STATIC char *
cea2e8a9 1204S_scan_const(pTHX_ char *start)
79072805 1205{
3280af22 1206 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1207 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1208 register char *s = start; /* start of the constant */
1209 register char *d = SvPVX(sv); /* destination for copies */
1210 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1211 bool didrange = FALSE; /* did we just finish a range? */
7e2040f0 1212 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1213 I32 len; /* ? */
012bcf8d
GS
1214 UV uv;
1215
ac2262e3 1216 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1217 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1218 : UTF;
ac2262e3 1219 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1220 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1221 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1222 : UTF;
dff6d3cd 1223 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1224 PL_lex_inpat
4a2d328f 1225 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1226 : "";
79072805
LW
1227
1228 while (s < send || dorange) {
02aa26ce 1229 /* get transliterations out of the way (they're most literal) */
3280af22 1230 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1231 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1232 if (dorange) {
02aa26ce 1233 I32 i; /* current expanded character */
8ada0baa 1234 I32 min; /* first character in range */
02aa26ce
NT
1235 I32 max; /* last character in range */
1236
1237 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1238 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1239 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1240 d -= 2; /* eat the first char and the - */
1241
8ada0baa
JH
1242 min = (U8)*d; /* first char in range */
1243 max = (U8)d[1]; /* last char in range */
1244
c2e66d9e
GS
1245
1246 if (min > max) {
1247 Perl_croak(aTHX_
1248 "Invalid [] range \"%c-%c\" in transliteration operator",
1249 min, max);
1250 }
1251
8ada0baa
JH
1252#ifndef ASCIIish
1253 if ((isLOWER(min) && isLOWER(max)) ||
1254 (isUPPER(min) && isUPPER(max))) {
1255 if (isLOWER(min)) {
1256 for (i = min; i <= max; i++)
1257 if (isLOWER(i))
1258 *d++ = i;
1259 } else {
1260 for (i = min; i <= max; i++)
1261 if (isUPPER(i))
1262 *d++ = i;
1263 }
1264 }
1265 else
1266#endif
1267 for (i = min; i <= max; i++)
1268 *d++ = i;
02aa26ce
NT
1269
1270 /* mark the range as done, and continue */
79072805 1271 dorange = FALSE;
c2e66d9e 1272 didrange = TRUE;
79072805 1273 continue;
c2e66d9e 1274 }
02aa26ce
NT
1275
1276 /* range begins (ignore - as first or last char) */
79072805 1277 else if (*s == '-' && s+1 < send && s != start) {
c2e66d9e 1278 if (didrange) {
1fafa243 1279 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
c2e66d9e 1280 }
a0ed51b3 1281 if (utf) {
a176fa2a 1282 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1283 s++;
1284 continue;
1285 }
79072805
LW
1286 dorange = TRUE;
1287 s++;
c2e66d9e
GS
1288 } else {
1289 didrange = FALSE;
1290 }
79072805 1291 }
02aa26ce
NT
1292
1293 /* if we get here, we're not doing a transliteration */
1294
0f5d15d6
IZ
1295 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1296 except for the last char, which will be done separately. */
3280af22 1297 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1298 if (s[2] == '#') {
1299 while (s < send && *s != ')')
1300 *d++ = *s++;
155aba94
GS
1301 }
1302 else if (s[2] == '{' /* This should match regcomp.c */
1303 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1304 {
cc6b7395 1305 I32 count = 1;
0f5d15d6 1306 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1307 char c;
1308
d9f97599
GS
1309 while (count && (c = *regparse)) {
1310 if (c == '\\' && regparse[1])
1311 regparse++;
cc6b7395
IZ
1312 else if (c == '{')
1313 count++;
1314 else if (c == '}')
1315 count--;
d9f97599 1316 regparse++;
cc6b7395 1317 }
5bdf89e7
IZ
1318 if (*regparse != ')') {
1319 regparse--; /* Leave one char for continuation. */
cc6b7395 1320 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1321 }
0f5d15d6 1322 while (s < regparse)
cc6b7395
IZ
1323 *d++ = *s++;
1324 }
748a9306 1325 }
02aa26ce
NT
1326
1327 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1328 else if (*s == '#' && PL_lex_inpat &&
1329 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1330 while (s+1 < send && *s != '\n')
1331 *d++ = *s++;
1332 }
02aa26ce
NT
1333
1334 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1335 else if (*s == '@' && s[1]
1336 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1337 break;
02aa26ce
NT
1338
1339 /* check for embedded scalars. only stop if we're sure it's a
1340 variable.
1341 */
79072805 1342 else if (*s == '$') {
3280af22 1343 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1344 break;
c277df42 1345 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1346 break; /* in regexp, $ might be tail anchor */
1347 }
02aa26ce 1348
a0ed51b3
LW
1349 /* (now in tr/// code again) */
1350
d008e5eb 1351 if (*s & 0x80 && thisutf) {
012bcf8d
GS
1352 (void)utf8_to_uv((U8*)s, &len);
1353 if (len == 1) {
1354 /* illegal UTF8, make it valid */
012bcf8d 1355 char *old_pvx = SvPVX(sv);
bee7c419
GS
1356 /* need space for one extra char (NOTE: SvCUR() not set here) */
1357 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1358 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1359 }
1360 else {
1361 while (len--)
1362 *d++ = *s++;
1363 }
1364 has_utf = TRUE;
1365 continue;
a0ed51b3
LW
1366 }
1367
02aa26ce 1368 /* backslashes */
79072805
LW
1369 if (*s == '\\' && s+1 < send) {
1370 s++;
02aa26ce
NT
1371
1372 /* some backslashes we leave behind */
c9f97d15 1373 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1374 *d++ = '\\';
1375 *d++ = *s++;
1376 continue;
1377 }
02aa26ce
NT
1378
1379 /* deprecate \1 in strings and substitution replacements */
3280af22 1380 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1381 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1382 {
d008e5eb 1383 dTHR; /* only for ckWARN */
599cee73 1384 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1385 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1386 *--s = '$';
1387 break;
1388 }
02aa26ce
NT
1389
1390 /* string-change backslash escapes */
3280af22 1391 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1392 --s;
1393 break;
1394 }
02aa26ce
NT
1395
1396 /* if we get here, it's either a quoted -, or a digit */
79072805 1397 switch (*s) {
02aa26ce
NT
1398
1399 /* quoted - in transliterations */
79072805 1400 case '-':
3280af22 1401 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1402 *d++ = *s++;
1403 continue;
1404 }
1405 /* FALL THROUGH */
1406 default:
11b8faa4
JH
1407 {
1408 dTHR;
7e84c16c 1409 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1410 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1411 "Unrecognized escape \\%c passed through",
1412 *s);
1413 /* default action is to copy the quoted character */
1414 *d++ = *s++;
1415 continue;
1416 }
02aa26ce
NT
1417
1418 /* \132 indicates an octal constant */
79072805
LW
1419 case '0': case '1': case '2': case '3':
1420 case '4': case '5': case '6': case '7':
b21ed0a9 1421 len = 0; /* disallow underscores */
012bcf8d 1422 uv = (UV)scan_oct(s, 3, &len);
79072805 1423 s += len;
012bcf8d 1424 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1425
1426 /* \x24 indicates a hex constant */
79072805 1427 case 'x':
a0ed51b3
LW
1428 ++s;
1429 if (*s == '{') {
1430 char* e = strchr(s, '}');
adaeee49 1431 if (!e) {
a0ed51b3 1432 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1433 e = s;
1434 }
b21ed0a9 1435 len = 1; /* allow underscores */
012bcf8d
GS
1436 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1437 s = e + 1;
a0ed51b3
LW
1438 }
1439 else {
b21ed0a9 1440 len = 0; /* disallow underscores */
012bcf8d
GS
1441 uv = (UV)scan_hex(s, 2, &len);
1442 s += len;
1443 }
1444
1445 NUM_ESCAPE_INSERT:
1446 /* Insert oct or hex escaped character.
1447 * There will always enough room in sv since such escapes will
1448 * be longer than any utf8 sequence they can end up as
1449 */
1450 if (uv > 127) {
1451 if (!thisutf && !has_utf && uv > 255) {
1452 /* might need to recode whatever we have accumulated so far
1453 * if it contains any hibit chars
1454 */
1455 int hicount = 0;
1456 char *c;
1457 for (c = SvPVX(sv); c < d; c++) {
1458 if (*c & 0x80)
1459 hicount++;
1460 }
1461 if (hicount) {
1462 char *old_pvx = SvPVX(sv);
1463 char *src, *dst;
1464 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1465
1466 src = d - 1;
1467 d += hicount;
1468 dst = d - 1;
1469
1470 while (src < dst) {
1471 if (*src & 0x80) {
1472 dst--;
1473 uv_to_utf8((U8*)dst, (U8)*src--);
1474 dst--;
1475 }
1476 else {
1477 *dst-- = *src--;
1478 }
1479 }
1480 }
1481 }
1482
1483 if (thisutf || uv > 255) {
1484 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1485 has_utf = TRUE;
012bcf8d 1486 }
a0ed51b3 1487 else {
012bcf8d 1488 *d++ = (char)uv;
a0ed51b3 1489 }
012bcf8d
GS
1490 }
1491 else {
1492 *d++ = (char)uv;
a0ed51b3 1493 }
79072805 1494 continue;
02aa26ce 1495
4a2d328f
IZ
1496 /* \N{latin small letter a} is a named character */
1497 case 'N':
423cee85
JH
1498 ++s;
1499 if (*s == '{') {
1500 char* e = strchr(s, '}');
155aba94 1501 SV *res;
423cee85
JH
1502 STRLEN len;
1503 char *str;
423cee85
JH
1504
1505 if (!e) {
5777a3f7 1506 yyerror("Missing right brace on \\N{}");
423cee85
JH
1507 e = s - 1;
1508 goto cont_scan;
1509 }
1510 res = newSVpvn(s + 1, e - s - 1);
1511 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1512 res, Nullsv, "\\N{...}" );
423cee85 1513 str = SvPV(res,len);
f08d6ad9
GS
1514 if (!has_utf && SvUTF8(res)) {
1515 char *ostart = SvPVX(sv);
1516 SvCUR_set(sv, d - ostart);
1517 SvPOK_on(sv);
1518 sv_utf8_upgrade(sv);
1519 d = SvPVX(sv) + SvCUR(sv);
e1992b6d 1520 has_utf = TRUE;
f08d6ad9 1521 }
423cee85
JH
1522 if (len > e - s + 4) {
1523 char *odest = SvPVX(sv);
1524
1525 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1526 d = SvPVX(sv) + (d - odest);
1527 }
1528 Copy(str, d, len, char);
1529 d += len;
1530 SvREFCNT_dec(res);
1531 cont_scan:
1532 s = e + 1;
1533 }
1534 else
5777a3f7 1535 yyerror("Missing braces on \\N{}");
423cee85
JH
1536 continue;
1537
02aa26ce 1538 /* \c is a control character */
79072805
LW
1539 case 'c':
1540 s++;
9d116dd7
JH
1541#ifdef EBCDIC
1542 *d = *s++;
1543 if (isLOWER(*d))
1544 *d = toUPPER(*d);
774a9426
GS
1545 *d = toCTRL(*d);
1546 d++;
9d116dd7 1547#else
bbce6d69 1548 len = *s++;
1549 *d++ = toCTRL(len);
9d116dd7 1550#endif
79072805 1551 continue;
02aa26ce
NT
1552
1553 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1554 case 'b':
1555 *d++ = '\b';
1556 break;
1557 case 'n':
1558 *d++ = '\n';
1559 break;
1560 case 'r':
1561 *d++ = '\r';
1562 break;
1563 case 'f':
1564 *d++ = '\f';
1565 break;
1566 case 't':
1567 *d++ = '\t';
1568 break;
34a3fe2a
PP
1569#ifdef EBCDIC
1570 case 'e':
1571 *d++ = '\047'; /* CP 1047 */
1572 break;
1573 case 'a':
1574 *d++ = '\057'; /* CP 1047 */
1575 break;
1576#else
79072805
LW
1577 case 'e':
1578 *d++ = '\033';
1579 break;
1580 case 'a':
1581 *d++ = '\007';
1582 break;
34a3fe2a 1583#endif
02aa26ce
NT
1584 } /* end switch */
1585
79072805
LW
1586 s++;
1587 continue;
02aa26ce
NT
1588 } /* end if (backslash) */
1589
79072805 1590 *d++ = *s++;
02aa26ce
NT
1591 } /* while loop to process each character */
1592
1593 /* terminate the string and set up the sv */
79072805 1594 *d = '\0';
463ee0b2 1595 SvCUR_set(sv, d - SvPVX(sv));
79072805 1596 SvPOK_on(sv);
7e2040f0
GS
1597 if (has_utf)
1598 SvUTF8_on(sv);
79072805 1599
02aa26ce 1600 /* shrink the sv if we allocated more than we used */
79072805
LW
1601 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1602 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1603 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1604 }
02aa26ce 1605
9b599b2a 1606 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1607 if (s > PL_bufptr) {
1608 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1609 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1610 sv, Nullsv,
3280af22 1611 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1612 ? "tr"
3280af22 1613 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1614 ? "s"
1615 : "qq")));
79072805 1616 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1617 } else
8990e307 1618 SvREFCNT_dec(sv);
79072805
LW
1619 return s;
1620}
1621
ffb4593c
NT
1622/* S_intuit_more
1623 * Returns TRUE if there's more to the expression (e.g., a subscript),
1624 * FALSE otherwise.
ffb4593c
NT
1625 *
1626 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1627 *
1628 * ->[ and ->{ return TRUE
1629 * { and [ outside a pattern are always subscripts, so return TRUE
1630 * if we're outside a pattern and it's not { or [, then return FALSE
1631 * if we're in a pattern and the first char is a {
1632 * {4,5} (any digits around the comma) returns FALSE
1633 * if we're in a pattern and the first char is a [
1634 * [] returns FALSE
1635 * [SOMETHING] has a funky algorithm to decide whether it's a
1636 * character class or not. It has to deal with things like
1637 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1638 * anything else returns TRUE
1639 */
1640
9cbb5ea2
GS
1641/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1642
76e3520e 1643STATIC int
cea2e8a9 1644S_intuit_more(pTHX_ register char *s)
79072805 1645{
3280af22 1646 if (PL_lex_brackets)
79072805
LW
1647 return TRUE;
1648 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1649 return TRUE;
1650 if (*s != '{' && *s != '[')
1651 return FALSE;
3280af22 1652 if (!PL_lex_inpat)
79072805
LW
1653 return TRUE;
1654
1655 /* In a pattern, so maybe we have {n,m}. */
1656 if (*s == '{') {
1657 s++;
1658 if (!isDIGIT(*s))
1659 return TRUE;
1660 while (isDIGIT(*s))
1661 s++;
1662 if (*s == ',')
1663 s++;
1664 while (isDIGIT(*s))
1665 s++;
1666 if (*s == '}')
1667 return FALSE;
1668 return TRUE;
1669
1670 }
1671
1672 /* On the other hand, maybe we have a character class */
1673
1674 s++;
1675 if (*s == ']' || *s == '^')
1676 return FALSE;
1677 else {
ffb4593c 1678 /* this is terrifying, and it works */
79072805
LW
1679 int weight = 2; /* let's weigh the evidence */
1680 char seen[256];
f27ffc4a 1681 unsigned char un_char = 255, last_un_char;
93a17b20 1682 char *send = strchr(s,']');
3280af22 1683 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1684
1685 if (!send) /* has to be an expression */
1686 return TRUE;
1687
1688 Zero(seen,256,char);
1689 if (*s == '$')
1690 weight -= 3;
1691 else if (isDIGIT(*s)) {
1692 if (s[1] != ']') {
1693 if (isDIGIT(s[1]) && s[2] == ']')
1694 weight -= 10;
1695 }
1696 else
1697 weight -= 100;
1698 }
1699 for (; s < send; s++) {
1700 last_un_char = un_char;
1701 un_char = (unsigned char)*s;
1702 switch (*s) {
1703 case '@':
1704 case '&':
1705 case '$':
1706 weight -= seen[un_char] * 10;
7e2040f0 1707 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1708 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1709 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1710 weight -= 100;
1711 else
1712 weight -= 10;
1713 }
1714 else if (*s == '$' && s[1] &&
93a17b20
LW
1715 strchr("[#!%*<>()-=",s[1])) {
1716 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1717 weight -= 10;
1718 else
1719 weight -= 1;
1720 }
1721 break;
1722 case '\\':
1723 un_char = 254;
1724 if (s[1]) {
93a17b20 1725 if (strchr("wds]",s[1]))
79072805
LW
1726 weight += 100;
1727 else if (seen['\''] || seen['"'])
1728 weight += 1;
93a17b20 1729 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1730 weight += 40;
1731 else if (isDIGIT(s[1])) {
1732 weight += 40;
1733 while (s[1] && isDIGIT(s[1]))
1734 s++;
1735 }
1736 }
1737 else
1738 weight += 100;
1739 break;
1740 case '-':
1741 if (s[1] == '\\')
1742 weight += 50;
93a17b20 1743 if (strchr("aA01! ",last_un_char))
79072805 1744 weight += 30;
93a17b20 1745 if (strchr("zZ79~",s[1]))
79072805 1746 weight += 30;
f27ffc4a
GS
1747 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1748 weight -= 5; /* cope with negative subscript */
79072805
LW
1749 break;
1750 default:
93a17b20 1751 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1752 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1753 char *d = tmpbuf;
1754 while (isALPHA(*s))
1755 *d++ = *s++;
1756 *d = '\0';
1757 if (keyword(tmpbuf, d - tmpbuf))
1758 weight -= 150;
1759 }
1760 if (un_char == last_un_char + 1)
1761 weight += 5;
1762 weight -= seen[un_char];
1763 break;
1764 }
1765 seen[un_char]++;
1766 }
1767 if (weight >= 0) /* probably a character class */
1768 return FALSE;
1769 }
1770
1771 return TRUE;
1772}
ffed7fef 1773
ffb4593c
NT
1774/*
1775 * S_intuit_method
1776 *
1777 * Does all the checking to disambiguate
1778 * foo bar
1779 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1780 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1781 *
1782 * First argument is the stuff after the first token, e.g. "bar".
1783 *
1784 * Not a method if bar is a filehandle.
1785 * Not a method if foo is a subroutine prototyped to take a filehandle.
1786 * Not a method if it's really "Foo $bar"
1787 * Method if it's "foo $bar"
1788 * Not a method if it's really "print foo $bar"
1789 * Method if it's really "foo package::" (interpreted as package->foo)
1790 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1791 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1792 * =>
1793 */
1794
76e3520e 1795STATIC int
cea2e8a9 1796S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1797{
1798 char *s = start + (*start == '$');
3280af22 1799 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1800 STRLEN len;
1801 GV* indirgv;
1802
1803 if (gv) {
b6c543e3 1804 CV *cv;
a0d0e21e
LW
1805 if (GvIO(gv))
1806 return 0;
b6c543e3
IZ
1807 if ((cv = GvCVu(gv))) {
1808 char *proto = SvPVX(cv);
1809 if (proto) {
1810 if (*proto == ';')
1811 proto++;
1812 if (*proto == '*')
1813 return 0;
1814 }
1815 } else
a0d0e21e
LW
1816 gv = 0;
1817 }
8903cb82 1818 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1819 /* start is the beginning of the possible filehandle/object,
1820 * and s is the end of it
1821 * tmpbuf is a copy of it
1822 */
1823
a0d0e21e 1824 if (*start == '$') {
3280af22 1825 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1826 return 0;
1827 s = skipspace(s);
3280af22
NIS
1828 PL_bufptr = start;
1829 PL_expect = XREF;
a0d0e21e
LW
1830 return *s == '(' ? FUNCMETH : METHOD;
1831 }
1832 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1833 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1834 len -= 2;
1835 tmpbuf[len] = '\0';
1836 goto bare_package;
1837 }
1838 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1839 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1840 return 0;
1841 /* filehandle or package name makes it a method */
89bfa8cd 1842 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1843 s = skipspace(s);
3280af22 1844 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1845 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1846 bare_package:
3280af22 1847 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1848 newSVpvn(tmpbuf,len));
3280af22
NIS
1849 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1850 PL_expect = XTERM;
a0d0e21e 1851 force_next(WORD);
3280af22 1852 PL_bufptr = s;
a0d0e21e
LW
1853 return *s == '(' ? FUNCMETH : METHOD;
1854 }
1855 }
1856 return 0;
1857}
1858
ffb4593c
NT
1859/*
1860 * S_incl_perldb
1861 * Return a string of Perl code to load the debugger. If PERL5DB
1862 * is set, it will return the contents of that, otherwise a
1863 * compile-time require of perl5db.pl.
1864 */
1865
76e3520e 1866STATIC char*
cea2e8a9 1867S_incl_perldb(pTHX)
a0d0e21e 1868{
3280af22 1869 if (PL_perldb) {
76e3520e 1870 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1871
1872 if (pdb)
1873 return pdb;
61bb5906 1874 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1875 return "BEGIN { require 'perl5db.pl' }";
1876 }
1877 return "";
1878}
1879
1880
16d20bd9
AD
1881/* Encoded script support. filter_add() effectively inserts a
1882 * 'pre-processing' function into the current source input stream.
1883 * Note that the filter function only applies to the current source file
1884 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1885 *
1886 * The datasv parameter (which may be NULL) can be used to pass
1887 * private data to this instance of the filter. The filter function
1888 * can recover the SV using the FILTER_DATA macro and use it to
1889 * store private buffers and state information.
1890 *
1891 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1892 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1893 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1894 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1895 * private use must be set using malloc'd pointers.
1896 */
16d20bd9
AD
1897
1898SV *
864dbfa3 1899Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1900{
f4c556ac
GS
1901 if (!funcp)
1902 return Nullsv;
1903
3280af22
NIS
1904 if (!PL_rsfp_filters)
1905 PL_rsfp_filters = newAV();
16d20bd9 1906 if (!datasv)
8c52afec 1907 datasv = NEWSV(255,0);
16d20bd9 1908 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1909 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1910 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1911 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1912 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1913 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1914 av_unshift(PL_rsfp_filters, 1);
1915 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1916 return(datasv);
1917}
1918
1919
1920/* Delete most recently added instance of this filter function. */
a0d0e21e 1921void
864dbfa3 1922Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1923{
e0c19803 1924 SV *datasv;
f4c556ac 1925 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1926 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1927 return;
1928 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1929 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1930 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1931 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1932 IoANY(datasv) = (void *)NULL;
3280af22 1933 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1934
16d20bd9
AD
1935 return;
1936 }
1937 /* we need to search for the correct entry and clear it */
cea2e8a9 1938 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1939}
1940
1941
1942/* Invoke the n'th filter function for the current rsfp. */
1943I32
864dbfa3 1944Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1945
1946
1947 /* 0 = read one text line */
a0d0e21e 1948{
16d20bd9
AD
1949 filter_t funcp;
1950 SV *datasv = NULL;
e50aee73 1951
3280af22 1952 if (!PL_rsfp_filters)
16d20bd9 1953 return -1;
3280af22 1954 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1955 /* Provide a default input filter to make life easy. */
1956 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1957 DEBUG_P(PerlIO_printf(Perl_debug_log,
1958 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1959 if (maxlen) {
1960 /* Want a block */
1961 int len ;
1962 int old_len = SvCUR(buf_sv) ;
1963
1964 /* ensure buf_sv is large enough */
1965 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1966 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1967 if (PerlIO_error(PL_rsfp))
37120919
AD
1968 return -1; /* error */
1969 else
1970 return 0 ; /* end of file */
1971 }
16d20bd9
AD
1972 SvCUR_set(buf_sv, old_len + len) ;
1973 } else {
1974 /* Want a line */
3280af22
NIS
1975 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1976 if (PerlIO_error(PL_rsfp))
37120919
AD
1977 return -1; /* error */
1978 else
1979 return 0 ; /* end of file */
1980 }
16d20bd9
AD
1981 }
1982 return SvCUR(buf_sv);
1983 }
1984 /* Skip this filter slot if filter has been deleted */
3280af22 1985 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1986 DEBUG_P(PerlIO_printf(Perl_debug_log,
1987 "filter_read %d: skipped (filter deleted)\n",
1988 idx));
16d20bd9
AD
1989 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1990 }
1991 /* Get function pointer hidden within datasv */
4755096e 1992 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1993 DEBUG_P(PerlIO_printf(Perl_debug_log,
1994 "filter_read %d: via function %p (%s)\n",
1995 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1996 /* Call function. The function is expected to */
1997 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1998 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1999 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2000}
2001
76e3520e 2002STATIC char *
cea2e8a9 2003S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2004{
c39cd008 2005#ifdef PERL_CR_FILTER
3280af22 2006 if (!PL_rsfp_filters) {
c39cd008 2007 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2008 }
2009#endif
3280af22 2010 if (PL_rsfp_filters) {
16d20bd9 2011
55497cff 2012 if (!append)
2013 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2014 if (FILTER_READ(0, sv, 0) > 0)
2015 return ( SvPVX(sv) ) ;
2016 else
2017 return Nullch ;
2018 }
9d116dd7 2019 else
fd049845 2020 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2021}
2022
def3634b
GS
2023STATIC HV *S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
2024{
2025 GV *gv;
2026
2027 if (*pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2028 return PL_curstash;
2029
2030 if (len > 2 &&
2031 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2032 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV))) {
2033 return GvHV(gv); /* Foo:: */
2034 }
2035
2036 /* use constant CLASS => 'MyClass' */
2037 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2038 SV *sv;
2039 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2040 pkgname = SvPV_nolen(sv);
2041 }
2042 }
2043
2044 return gv_stashpv(pkgname, FALSE);
2045}
a0d0e21e 2046
748a9306
LW
2047#ifdef DEBUGGING
2048 static char* exp_name[] =
09bef843
SB
2049 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2050 "ATTRTERM", "TERMBLOCK"
2051 };
748a9306 2052#endif
463ee0b2 2053
02aa26ce
NT
2054/*
2055 yylex
2056
2057 Works out what to call the token just pulled out of the input
2058 stream. The yacc parser takes care of taking the ops we return and
2059 stitching them into a tree.
2060
2061 Returns:
2062 PRIVATEREF
2063
2064 Structure:
2065 if read an identifier
2066 if we're in a my declaration
2067 croak if they tried to say my($foo::bar)
2068 build the ops for a my() declaration
2069 if it's an access to a my() variable
2070 are we in a sort block?
2071 croak if my($a); $a <=> $b
2072 build ops for access to a my() variable
2073 if in a dq string, and they've said @foo and we can't find @foo
2074 croak
2075 build ops for a bareword
2076 if we already built the token before, use it.
2077*/
2078
bf4acbe4
GS
2079#ifdef __SC__
2080#pragma segment Perl_yylex
2081#endif
864dbfa3
GS
2082int
2083#ifdef USE_PURE_BISON
cea2e8a9 2084Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 2085#else
cea2e8a9 2086Perl_yylex(pTHX)
864dbfa3 2087#endif
378cc40b 2088{
11343788 2089 dTHR;
79072805 2090 register char *s;
378cc40b 2091 register char *d;
79072805 2092 register I32 tmp;
463ee0b2 2093 STRLEN len;
161b471a
NIS
2094 GV *gv = Nullgv;
2095 GV **gvp = 0;
a687059c 2096
a1a0e61e
TD
2097#ifdef USE_PURE_BISON
2098 yylval_pointer = lvalp;
2099 yychar_pointer = lcharp;
2100#endif
2101
02aa26ce 2102 /* check if there's an identifier for us to look at */
3280af22 2103 if (PL_pending_ident) {
02aa26ce 2104 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2105 char pit = PL_pending_ident;
2106 PL_pending_ident = 0;
bbce6d69 2107
02aa26ce
NT
2108 /* if we're in a my(), we can't allow dynamics here.
2109 $foo'bar has already been turned into $foo::bar, so
2110 just check for colons.
2111
2112 if it's a legal name, the OP is a PADANY.
2113 */
3280af22 2114 if (PL_in_my) {
77ca0c92 2115 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2116 if (strchr(PL_tokenbuf,':'))
2117 yyerror(Perl_form(aTHX_ "No package name allowed for "
2118 "variable %s in \"our\"",
2119 PL_tokenbuf));
77ca0c92
LW
2120 tmp = pad_allocmy(PL_tokenbuf);
2121 }
2122 else {
2123 if (strchr(PL_tokenbuf,':'))
2124 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2125
77ca0c92
LW
2126 yylval.opval = newOP(OP_PADANY, 0);
2127 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2128 return PRIVATEREF;
2129 }
bbce6d69 2130 }
2131
02aa26ce
NT
2132 /*
2133 build the ops for accesses to a my() variable.
2134
2135 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2136 then used in a comparison. This catches most, but not
2137 all cases. For instance, it catches
2138 sort { my($a); $a <=> $b }
2139 but not
2140 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2141 (although why you'd do that is anyone's guess).
2142 */
2143
3280af22 2144 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2145#ifdef USE_THREADS
54b9620d 2146 /* Check for single character per-thread SVs */
3280af22
NIS
2147 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2148 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2149 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2150 {
2faa37cc 2151 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2152 yylval.opval->op_targ = tmp;
2153 return PRIVATEREF;
2154 }
2155#endif /* USE_THREADS */
3280af22 2156 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2157 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2158 /* might be an "our" variable" */
f472eb5c 2159 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2160 /* build ops for a bareword */
f472eb5c
GS
2161 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2162 sv_catpvn(sym, "::", 2);
2163 sv_catpv(sym, PL_tokenbuf+1);
2164 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2165 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2166 gv_fetchpv(SvPVX(sym),
77ca0c92 2167 (PL_in_eval
f472eb5c
GS
2168 ? (GV_ADDMULTI | GV_ADDINEVAL)
2169 : TRUE
77ca0c92
LW
2170 ),
2171 ((PL_tokenbuf[0] == '$') ? SVt_PV
2172 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2173 : SVt_PVHV));
2174 return WORD;
2175 }
2176
02aa26ce 2177 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2178 if (PL_last_lop_op == OP_SORT &&
2179 PL_tokenbuf[0] == '$' &&
2180 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2181 && !PL_tokenbuf[2])
bbce6d69 2182 {
3280af22
NIS
2183 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2184 d < PL_bufend && *d != '\n';
a863c7d1
MB
2185 d++)
2186 {
2187 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2188 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2189 PL_tokenbuf);
a863c7d1 2190 }
bbce6d69 2191 }
2192 }
bbce6d69 2193
a863c7d1
MB
2194 yylval.opval = newOP(OP_PADANY, 0);
2195 yylval.opval->op_targ = tmp;
2196 return PRIVATEREF;
2197 }
bbce6d69 2198 }
2199
02aa26ce
NT
2200 /*
2201 Whine if they've said @foo in a doublequoted string,
2202 and @foo isn't a variable we can find in the symbol
2203 table.
2204 */
3280af22
NIS
2205 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2206 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2207 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2208 && ckWARN(WARN_AMBIGUOUS))
2209 {
2210 /* Downgraded from fatal to warning 20000522 mjd */
2211 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2212 "Possible unintended interpolation of %s in string",
2213 PL_tokenbuf);
2214 }
bbce6d69 2215 }
2216
02aa26ce 2217 /* build ops for a bareword */
3280af22 2218 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2219 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2220 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2221 ((PL_tokenbuf[0] == '$') ? SVt_PV
2222 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2223 : SVt_PVHV));
2224 return WORD;
2225 }
2226
02aa26ce
NT
2227 /* no identifier pending identification */
2228
3280af22 2229 switch (PL_lex_state) {
79072805
LW
2230#ifdef COMMENTARY
2231 case LEX_NORMAL: /* Some compilers will produce faster */
2232 case LEX_INTERPNORMAL: /* code if we comment these out. */
2233 break;
2234#endif
2235
09bef843 2236 /* when we've already built the next token, just pull it out of the queue */
79072805 2237 case LEX_KNOWNEXT:
3280af22
NIS
2238 PL_nexttoke--;
2239 yylval = PL_nextval[PL_nexttoke];
2240 if (!PL_nexttoke) {
2241 PL_lex_state = PL_lex_defer;
2242 PL_expect = PL_lex_expect;
2243 PL_lex_defer = LEX_NORMAL;
463ee0b2 2244 }
3280af22 2245 return(PL_nexttype[PL_nexttoke]);
79072805 2246
02aa26ce 2247 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2248 when we get here, PL_bufptr is at the \
02aa26ce 2249 */
79072805
LW
2250 case LEX_INTERPCASEMOD:
2251#ifdef DEBUGGING
3280af22 2252 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2253 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2254#endif
02aa26ce 2255 /* handle \E or end of string */
3280af22 2256 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2257 char oldmod;
02aa26ce
NT
2258
2259 /* if at a \E */
3280af22
NIS
2260 if (PL_lex_casemods) {
2261 oldmod = PL_lex_casestack[--PL_lex_casemods];
2262 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2263
3280af22
NIS
2264 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2265 PL_bufptr += 2;
2266 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2267 }
79072805
LW
2268 return ')';
2269 }
3280af22
NIS
2270 if (PL_bufptr != PL_bufend)
2271 PL_bufptr += 2;
2272 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2273 return yylex();
79072805
LW
2274 }
2275 else {
3280af22 2276 s = PL_bufptr + 1;
79072805
LW
2277 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2278 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2279 if (strchr("LU", *s) &&
3280af22 2280 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2281 {
3280af22 2282 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2283 return ')';
2284 }
3280af22
NIS
2285 if (PL_lex_casemods > 10) {
2286 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2287 if (newlb != PL_lex_casestack) {
a0d0e21e 2288 SAVEFREEPV(newlb);
3280af22 2289 PL_lex_casestack = newlb;
a0d0e21e
LW
2290 }
2291 }
3280af22
NIS
2292 PL_lex_casestack[PL_lex_casemods++] = *s;
2293 PL_lex_casestack[PL_lex_casemods] = '\0';
2294 PL_lex_state = LEX_INTERPCONCAT;
2295 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2296 force_next('(');
2297 if (*s == 'l')
3280af22 2298 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2299 else if (*s == 'u')
3280af22 2300 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2301 else if (*s == 'L')
3280af22 2302 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2303 else if (*s == 'U')
3280af22 2304 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2305 else if (*s == 'Q')
3280af22 2306 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2307 else
cea2e8a9 2308 Perl_croak(aTHX_ "panic: yylex");
3280af22 2309 PL_bufptr = s + 1;
79072805 2310 force_next(FUNC);
3280af22
NIS
2311 if (PL_lex_starts) {
2312 s = PL_bufptr;
2313 PL_lex_starts = 0;
79072805
LW
2314 Aop(OP_CONCAT);
2315 }
2316 else
cea2e8a9 2317 return yylex();
79072805
LW
2318 }
2319
55497cff 2320 case LEX_INTERPPUSH:
2321 return sublex_push();
2322
79072805 2323 case LEX_INTERPSTART:
3280af22 2324 if (PL_bufptr == PL_bufend)
79072805 2325 return sublex_done();
3280af22
NIS
2326 PL_expect = XTERM;
2327 PL_lex_dojoin = (*PL_bufptr == '@');
2328 PL_lex_state = LEX_INTERPNORMAL;
2329 if (PL_lex_dojoin) {
2330 PL_nextval[PL_nexttoke].ival = 0;
79072805 2331 force_next(',');
554b3eca 2332#ifdef USE_THREADS
533c011a
NIS
2333 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2334 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2335 force_next(PRIVATEREF);
2336#else
a0d0e21e 2337 force_ident("\"", '$');
554b3eca 2338#endif /* USE_THREADS */
3280af22 2339 PL_nextval[PL_nexttoke].ival = 0;
79072805 2340 force_next('$');
3280af22 2341 PL_nextval[PL_nexttoke].ival = 0;
79072805 2342 force_next('(');
3280af22 2343 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2344 force_next(FUNC);
2345 }
3280af22
NIS
2346 if (PL_lex_starts++) {
2347 s = PL_bufptr;
79072805
LW
2348 Aop(OP_CONCAT);
2349 }
cea2e8a9 2350 return yylex();
79072805
LW
2351
2352 case LEX_INTERPENDMAYBE:
3280af22
NIS
2353 if (intuit_more(PL_bufptr)) {
2354 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2355 break;
2356 }
2357 /* FALL THROUGH */
2358
2359 case LEX_INTERPEND:
3280af22
NIS
2360 if (PL_lex_dojoin) {
2361 PL_lex_dojoin = FALSE;
2362 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2363 return ')';
2364 }
43a16006 2365 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2366 && SvEVALED(PL_lex_repl))
43a16006 2367 {
e9fa98b2 2368 if (PL_bufptr != PL_bufend)
cea2e8a9 2369 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2370 PL_lex_repl = Nullsv;
2371 }
79072805
LW
2372 /* FALLTHROUGH */
2373 case LEX_INTERPCONCAT:
2374#ifdef DEBUGGING
3280af22 2375 if (PL_lex_brackets)
cea2e8a9 2376 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2377#endif
3280af22 2378 if (PL_bufptr == PL_bufend)
79072805
LW
2379 return sublex_done();
2380
3280af22
NIS
2381 if (SvIVX(PL_linestr) == '\'') {
2382 SV *sv = newSVsv(PL_linestr);
2383 if (!PL_lex_inpat)
76e3520e 2384 sv = tokeq(sv);
3280af22 2385 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2386 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2387 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2388 s = PL_bufend;
79072805
LW
2389 }
2390 else {
3280af22 2391 s = scan_const(PL_bufptr);
79072805 2392 if (*s == '\\')
3280af22 2393 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2394 else
3280af22 2395 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2396 }
2397
3280af22
NIS
2398 if (s != PL_bufptr) {
2399 PL_nextval[PL_nexttoke] = yylval;
2400 PL_expect = XTERM;
79072805 2401 force_next(THING);
3280af22 2402 if (PL_lex_starts++)
79072805
LW
2403 Aop(OP_CONCAT);
2404 else {
3280af22 2405 PL_bufptr = s;
cea2e8a9 2406 return yylex();
79072805
LW
2407 }
2408 }
2409
cea2e8a9 2410 return yylex();
a0d0e21e 2411 case LEX_FORMLINE:
3280af22
NIS
2412 PL_lex_state = LEX_NORMAL;
2413 s = scan_formline(PL_bufptr);
2414 if (!PL_lex_formbrack)
a0d0e21e
LW
2415 goto rightbracket;
2416 OPERATOR(';');
79072805
LW
2417 }
2418
3280af22
NIS
2419 s = PL_bufptr;
2420 PL_oldoldbufptr = PL_oldbufptr;
2421 PL_oldbufptr = s;
79072805 2422 DEBUG_p( {
bf49b057
GS
2423 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2424 exp_name[PL_expect], s);
79072805 2425 } )
463ee0b2
LW
2426
2427 retry:
378cc40b
LW
2428 switch (*s) {
2429 default:
7e2040f0 2430 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2431 goto keylookup;
cea2e8a9 2432 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2433 case 4:
2434 case 26:
2435 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2436 case 0:
3280af22
NIS
2437 if (!PL_rsfp) {
2438 PL_last_uni = 0;
2439 PL_last_lop = 0;
2440 if (PL_lex_brackets)
d98d5fff 2441 yyerror("Missing right curly or square bracket");
79072805 2442 TOKEN(0);
463ee0b2 2443 }
3280af22 2444 if (s++ < PL_bufend)
a687059c 2445 goto retry; /* ignore stray nulls */
3280af22
NIS
2446 PL_last_uni = 0;
2447 PL_last_lop = 0;
2448 if (!PL_in_eval && !PL_preambled) {
2449 PL_preambled = TRUE;
2450 sv_setpv(PL_linestr,incl_perldb());
2451 if (SvCUR(PL_linestr))
2452 sv_catpv(PL_linestr,";");
2453 if (PL_preambleav){
2454 while(AvFILLp(PL_preambleav) >= 0) {
2455 SV *tmpsv = av_shift(PL_preambleav);
2456 sv_catsv(PL_linestr, tmpsv);
2457 sv_catpv(PL_linestr, ";");
91b7def8 2458 sv_free(tmpsv);
2459 }
3280af22
NIS
2460 sv_free((SV*)PL_preambleav);
2461 PL_preambleav = NULL;
91b7def8 2462 }
3280af22
NIS
2463 if (PL_minus_n || PL_minus_p) {
2464 sv_catpv(PL_linestr, "LINE: while (<>) {");
2465 if (PL_minus_l)
2466 sv_catpv(PL_linestr,"chomp;");
2467 if (PL_minus_a) {
8fd239a7
CS
2468 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2469 if (gv)
2470 GvIMPORTED_AV_on(gv);
3280af22
NIS
2471 if (PL_minus_F) {
2472 if (strchr("/'\"", *PL_splitstr)
2473 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2474 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2475 else {
2476 char delim;
2477 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2478 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2479 delim = *s;
cea2e8a9 2480 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2481 "q" + (delim == '\''), delim);
3280af22 2482 for (s = PL_splitstr; *s; s++) {
54310121 2483 if (*s == '\\')
3280af22
NIS
2484 sv_catpvn(PL_linestr, "\\", 1);
2485 sv_catpvn(PL_linestr, s, 1);
54310121 2486 }
cea2e8a9 2487 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2488 }
2304df62
AD
2489 }
2490 else
3280af22 2491 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2492 }
79072805 2493 }
3280af22
NIS
2494 sv_catpv(PL_linestr, "\n");
2495 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2496 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2497 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2498 SV *sv = NEWSV(85,0);
2499
2500 sv_upgrade(sv, SVt_PVMG);
3280af22 2501 sv_setsv(sv,PL_linestr);
57843af0 2502 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2503 }
79072805 2504 goto retry;
a687059c 2505 }
e929a76b 2506 do {
b250498f
GS
2507 bool bof;
2508 bof = PL_rsfp && (PerlIO_tell(PL_rsfp)==0); /* *Before* read! */
3280af22 2509 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2510 fake_eof:
3280af22
NIS
2511 if (PL_rsfp) {
2512 if (PL_preprocess && !PL_in_eval)
2513 (void)PerlProc_pclose(PL_rsfp);
2514 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2515 PerlIO_clearerr(PL_rsfp);
395c3793 2516 else
3280af22
NIS
2517 (void)PerlIO_close(PL_rsfp);
2518 PL_rsfp = Nullfp;
4a9ae47a 2519 PL_doextract = FALSE;
395c3793 2520 }
3280af22
NIS
2521 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2522 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2523 sv_catpv(PL_linestr,";}");
2524 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2525 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2526 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2527 goto retry;
2528 }
3280af22
NIS
2529 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2530 sv_setpv(PL_linestr,"");
79072805 2531 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2532 }
3280af22 2533 if (PL_doextract) {
a0d0e21e 2534 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2535 PL_doextract = FALSE;
a0d0e21e
LW
2536
2537 /* Incest with pod. */
2538 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2539 sv_setpv(PL_linestr, "");
2540 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2541 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2542 PL_doextract = FALSE;
a0d0e21e 2543 }
b250498f
GS
2544 }
2545 if (bof)
2546 s = swallow_bom(s);
463ee0b2 2547 incline(s);
3280af22
NIS
2548 } while (PL_doextract);
2549 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2550 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2551 SV *sv = NEWSV(85,0);
a687059c 2552
93a17b20 2553 sv_upgrade(sv, SVt_PVMG);
3280af22 2554 sv_setsv(sv,PL_linestr);
57843af0 2555 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2556 }
3280af22 2557 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2558 if (CopLINE(PL_curcop) == 1) {
3280af22 2559 while (s < PL_bufend && isSPACE(*s))
79072805 2560 s++;
a0d0e21e 2561 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2562 s++;
44a8e56a 2563 d = Nullch;
3280af22 2564 if (!PL_in_eval) {
44a8e56a 2565 if (*s == '#' && *(s+1) == '!')
2566 d = s + 2;
2567#ifdef ALTERNATE_SHEBANG
2568 else {
2569 static char as[] = ALTERNATE_SHEBANG;
2570 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2571 d = s + (sizeof(as) - 1);
2572 }
2573#endif /* ALTERNATE_SHEBANG */
2574 }
2575 if (d) {
b8378b72 2576 char *ipath;
774d564b 2577 char *ipathend;
b8378b72 2578
774d564b 2579 while (isSPACE(*d))
b8378b72
CS
2580 d++;
2581 ipath = d;
774d564b 2582 while (*d && !isSPACE(*d))
2583 d++;
2584 ipathend = d;
2585
2586#ifdef ARG_ZERO_IS_SCRIPT
2587 if (ipathend > ipath) {
2588 /*
2589 * HP-UX (at least) sets argv[0] to the script name,
2590 * which makes $^X incorrect. And Digital UNIX and Linux,
2591 * at least, set argv[0] to the basename of the Perl
2592 * interpreter. So, having found "#!", we'll set it right.
2593 */
2594 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2595 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2596 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2597 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2598 SvSETMAGIC(x);
2599 }
774d564b 2600 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2601 }
774d564b 2602#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2603
2604 /*
2605 * Look for options.
2606 */
748a9306 2607 d = instr(s,"perl -");
84e30d1a 2608 if (!d) {
748a9306 2609 d = instr(s,"perl");
84e30d1a
GS
2610#if defined(DOSISH)
2611 /* avoid getting into infinite loops when shebang
2612 * line contains "Perl" rather than "perl" */
2613 if (!d) {
2614 for (d = ipathend-4; d >= ipath; --d) {
2615 if ((*d == 'p' || *d == 'P')
2616 && !ibcmp(d, "perl", 4))
2617 {
2618 break;
2619 }
2620 }
2621 if (d < ipath)
2622 d = Nullch;
2623 }
2624#endif
2625 }
44a8e56a 2626#ifdef ALTERNATE_SHEBANG
2627 /*
2628 * If the ALTERNATE_SHEBANG on this system starts with a
2629 * character that can be part of a Perl expression, then if
2630 * we see it but not "perl", we're probably looking at the
2631 * start of Perl code, not a request to hand off to some
2632 * other interpreter. Similarly, if "perl" is there, but
2633 * not in the first 'word' of the line, we assume the line
2634 * contains the start of the Perl program.
44a8e56a 2635 */
2636 if (d && *s != '#') {
774d564b 2637 char *c = ipath;
44a8e56a 2638 while (*c && !strchr("; \t\r\n\f\v#", *c))
2639 c++;
2640 if (c < d)
2641 d = Nullch; /* "perl" not in first word; ignore */
2642 else
2643 *s = '#'; /* Don't try to parse shebang line */
2644 }
774d564b 2645#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2646#ifndef MACOS_TRADITIONAL
748a9306 2647 if (!d &&
44a8e56a 2648 *s == '#' &&
774d564b 2649 ipathend > ipath &&
3280af22 2650 !PL_minus_c &&
748a9306 2651 !instr(s,"indir") &&
3280af22 2652 instr(PL_origargv[0],"perl"))
748a9306 2653 {
9f68db38 2654 char **newargv;
9f68db38 2655
774d564b 2656 *ipathend = '\0';
2657 s = ipathend + 1;
3280af22 2658 while (s < PL_bufend && isSPACE(*s))
9f68db38 2659 s++;
3280af22
NIS
2660 if (s < PL_bufend) {
2661 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2662 newargv[1] = s;
3280af22 2663 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2664 s++;
2665 *s = '\0';
3280af22 2666 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2667 }
2668 else
3280af22 2669 newargv = PL_origargv;
774d564b 2670 newargv[0] = ipath;
80252599 2671 PerlProc_execv(ipath, newargv);
cea2e8a9 2672 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2673 }
bf4acbe4 2674#endif
748a9306 2675 if (d) {
3280af22
NIS
2676 U32 oldpdb = PL_perldb;
2677 bool oldn = PL_minus_n;
2678 bool oldp = PL_minus_p;
748a9306
LW
2679
2680 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2681 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2682
2683 if (*d++ == '-') {
8cc95fdb 2684 do {
2685 if (*d == 'M' || *d == 'm') {
2686 char *m = d;
2687 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2688 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2689 (int)(d - m), m);
2690 }
2691 d = moreswitches(d);
2692 } while (d);
155aba94
GS
2693 if ((PERLDB_LINE && !oldpdb) ||
2694 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2695 /* if we have already added "LINE: while (<>) {",
2696 we must not do it again */
748a9306 2697 {
3280af22
NIS
2698 sv_setpv(PL_linestr, "");
2699 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2700 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2701 PL_preambled = FALSE;
84902520 2702 if (PERLDB_LINE)
3280af22 2703 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2704 goto retry;
2705 }
a0d0e21e 2706 }
79072805 2707 }
9f68db38 2708 }
79072805 2709 }
3280af22
NIS
2710 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2711 PL_bufptr = s;
2712 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2713 return yylex();
ae986130 2714 }
378cc40b 2715 goto retry;
4fdae800 2716 case '\r':
6a27c188 2717#ifdef PERL_STRICT_CR
cea2e8a9
GS
2718 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2719 Perl_croak(aTHX_
cc507455 2720 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2721#endif
4fdae800 2722 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2723#ifdef MACOS_TRADITIONAL
2724 case '\312':
2725#endif
378cc40b
LW
2726 s++;
2727 goto retry;
378cc40b 2728 case '#':
e929a76b 2729 case '\n':
3280af22 2730 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2731 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2732 /* handle eval qq[#line 1 "foo"\n ...] */
2733 CopLINE_dec(PL_curcop);
2734 incline(s);
2735 }
3280af22 2736 d = PL_bufend;
a687059c 2737 while (s < d && *s != '\n')
378cc40b 2738 s++;
0f85fab0 2739 if (s < d)
378cc40b 2740 s++;
463ee0b2 2741 incline(s);
3280af22
NIS
2742 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2743 PL_bufptr = s;
2744 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2745 return yylex();
a687059c 2746 }
378cc40b 2747 }
a687059c 2748 else {
378cc40b 2749 *s = '\0';
3280af22 2750 PL_bufend = s;
a687059c 2751 }
378cc40b
LW
2752 goto retry;
2753 case '-':
79072805 2754 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2755 s++;
3280af22 2756 PL_bufptr = s;
748a9306
LW
2757 tmp = *s++;
2758
bf4acbe4 2759 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2760 s++;
2761
2762 if (strnEQ(s,"=>",2)) {
3280af22 2763 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2764 OPERATOR('-'); /* unary minus */
2765 }
3280af22
NIS
2766 PL_last_uni = PL_oldbufptr;
2767 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2768 switch (tmp) {
79072805
LW
2769 case 'r': FTST(OP_FTEREAD);
2770 case 'w': FTST(OP_FTEWRITE);
2771 case 'x': FTST(OP_FTEEXEC);
2772 case 'o': FTST(OP_FTEOWNED);
2773 case 'R': FTST(OP_FTRREAD);
2774 case 'W': FTST(OP_FTRWRITE);
2775 case 'X': FTST(OP_FTREXEC);
2776 case 'O': FTST(OP_FTROWNED);
2777 case 'e': FTST(OP_FTIS);
2778 case 'z': FTST(OP_FTZERO);
2779 case 's': FTST(OP_FTSIZE);
2780 case 'f': FTST(OP_FTFILE);
2781 case 'd': FTST(OP_FTDIR);
2782 case 'l': FTST(OP_FTLINK);
2783 case 'p': FTST(OP_FTPIPE);
2784 case 'S': FTST(OP_FTSOCK);
2785 case 'u': FTST(OP_FTSUID);
2786 case 'g': FTST(OP_FTSGID);
2787 case 'k': FTST(OP_FTSVTX);
2788 case 'b': FTST(OP_FTBLK);
2789 case 'c': FTST(OP_FTCHR);
2790 case 't': FTST(OP_FTTTY);
2791 case 'T': FTST(OP_FTTEXT);
2792 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2793 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2794 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2795 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2796 default:
cea2e8a9 2797 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2798 break;
2799 }
2800 }
a687059c
LW
2801 tmp = *s++;
2802 if (*s == tmp) {
2803 s++;
3280af22 2804 if (PL_expect == XOPERATOR)
79072805
LW
2805 TERM(POSTDEC);
2806 else
2807 OPERATOR(PREDEC);
2808 }
2809 else if (*s == '>') {
2810 s++;
2811 s = skipspace(s);
7e2040f0 2812 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2813 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2814 TOKEN(ARROW);
79072805 2815 }
748a9306
LW
2816 else if (*s == '$')
2817 OPERATOR(ARROW);
463ee0b2 2818 else
748a9306 2819 TERM(ARROW);
a687059c 2820 }
3280af22 2821 if (PL_expect == XOPERATOR)
79072805
LW
2822 Aop(OP_SUBTRACT);
2823 else {
3280af22 2824 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2825 check_uni();
79072805 2826 OPERATOR('-'); /* unary minus */
2f3197b3 2827 }
79072805 2828
378cc40b 2829 case '+':
a687059c
LW
2830 tmp = *s++;
2831 if (*s == tmp) {
378cc40b 2832 s++;
3280af22 2833 if (PL_expect == XOPERATOR)
79072805
LW
2834 TERM(POSTINC);
2835 else
2836 OPERATOR(PREINC);
378cc40b 2837 }
3280af22 2838 if (PL_expect == XOPERATOR)
79072805
LW
2839 Aop(OP_ADD);
2840 else {
3280af22 2841 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2842 check_uni();
a687059c 2843 OPERATOR('+');
2f3197b3 2844 }
a687059c 2845
378cc40b 2846 case '*':
3280af22
NIS
2847 if (PL_expect != XOPERATOR) {
2848 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2849 PL_expect = XOPERATOR;
2850 force_ident(PL_tokenbuf, '*');
2851 if (!*PL_tokenbuf)
a0d0e21e 2852 PREREF('*');
79072805 2853 TERM('*');
a687059c 2854 }
79072805
LW
2855 s++;
2856 if (*s == '*') {
a687059c 2857 s++;
79072805 2858 PWop(OP_POW);
a687059c 2859 }
79072805
LW
2860 Mop(OP_MULTIPLY);
2861
378cc40b 2862 case '%':
3280af22 2863 if (PL_expect == XOPERATOR) {
bbce6d69 2864 ++s;
2865 Mop(OP_MODULO);
a687059c 2866 }
3280af22
NIS
2867 PL_tokenbuf[0] = '%';
2868 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2869 if (!PL_tokenbuf[1]) {
2870 if (s == PL_bufend)
bbce6d69 2871 yyerror("Final % should be \\% or %name");
2872 PREREF('%');
a687059c 2873 }
3280af22 2874 PL_pending_ident = '%';
bbce6d69 2875 TERM('%');
a687059c 2876
378cc40b 2877 case '^':
79072805 2878 s++;
a0d0e21e 2879 BOop(OP_BIT_XOR);
79072805 2880 case '[':
3280af22 2881 PL_lex_brackets++;
79072805 2882 /* FALL THROUGH */
378cc40b 2883 case '~':
378cc40b 2884 case ',':
378cc40b
LW
2885 tmp = *s++;
2886 OPERATOR(tmp);
a0d0e21e
LW
2887 case ':':
2888 if (s[1] == ':') {
2889 len = 0;
2890 goto just_a_word;
2891 }
2892 s++;
09bef843
SB
2893 switch (PL_expect) {
2894 OP *attrs;
2895 case XOPERATOR:
2896 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2897 break;
2898 PL_bufptr = s; /* update in case we back off */
2899 goto grabattrs;
2900 case XATTRBLOCK:
2901 PL_expect = XBLOCK;
2902 goto grabattrs;
2903 case XATTRTERM:
2904 PL_expect = XTERMBLOCK;
2905 grabattrs:
2906 s = skipspace(s);
2907 attrs = Nullop;
7e2040f0 2908 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2909 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2910 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2911 if (tmp < 0) tmp = -tmp;
2912 switch (tmp) {
2913 case KEY_or:
2914 case KEY_and:
2915 case KEY_for:
2916 case KEY_unless:
2917 case KEY_if:
2918 case KEY_while:
2919 case KEY_until:
2920 goto got_attrs;
2921 default:
2922 break;
2923 }
2924 }
09bef843
SB
2925 if (*d == '(') {
2926 d = scan_str(d,TRUE,TRUE);
2927 if (!d) {
2928 if (PL_lex_stuff) {
2929 SvREFCNT_dec(PL_lex_stuff);
2930 PL_lex_stuff = Nullsv;
2931 }
2932 /* MUST advance bufptr here to avoid bogus
2933 "at end of line" context messages from yyerror().
2934 */
2935 PL_bufptr = s + len;
2936 yyerror("Unterminated attribute parameter in attribute list");
2937 if (attrs)
2938 op_free(attrs);
2939 return 0; /* EOF indicator */
2940 }
2941 }
2942 if (PL_lex_stuff) {
2943 SV *sv = newSVpvn(s, len);
2944 sv_catsv(sv, PL_lex_stuff);
2945 attrs = append_elem(OP_LIST, attrs,
2946 newSVOP(OP_CONST, 0, sv));
2947 SvREFCNT_dec(PL_lex_stuff);
2948 PL_lex_stuff = Nullsv;
2949 }
2950 else {
2951 attrs = append_elem(OP_LIST, attrs,
2952 newSVOP(OP_CONST, 0,
2953 newSVpvn(s, len)));
2954 }
2955 s = skipspace(d);
0120eecf 2956 if (*s == ':' && s[1] != ':')
09bef843 2957 s = skipspace(s+1);
0120eecf
GS
2958 else if (s == d)
2959 break; /* require real whitespace or :'s */
09bef843 2960 }
f9829d6b
GS
2961 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2962 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2963 char q = ((*s == '\'') ? '"' : '\'');
2964 /* If here for an expression, and parsed no attrs, back off. */
2965 if (tmp == '=' && !attrs) {
2966 s = PL_bufptr;
2967 break;
2968 }
2969 /* MUST advance bufptr here to avoid bogus "at end of line"
2970 context messages from yyerror().
2971 */
2972 PL_bufptr = s;
2973 if (!*s)
2974 yyerror("Unterminated attribute list");
2975 else
2976 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2977 q, *s, q));
2978 if (attrs)
2979 op_free(attrs);
2980 OPERATOR(':');
2981 }
f9829d6b 2982 got_attrs:
09bef843
SB
2983 if (attrs) {
2984 PL_nextval[PL_nexttoke].opval = attrs;
2985 force_next(THING);
2986 }
2987 TOKEN(COLONATTR);
2988 }
a0d0e21e 2989 OPERATOR(':');
8990e307
LW
2990 case '(':
2991 s++;
3280af22
NIS
2992 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2993 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2994 else
3280af22 2995 PL_expect = XTERM;
a0d0e21e 2996 TOKEN('(');
378cc40b 2997 case ';':
f4dd75d9 2998 CLINE;
378cc40b
LW
2999 tmp = *s++;
3000 OPERATOR(tmp);
3001 case ')':
378cc40b 3002 tmp = *s++;
16d20bd9
AD
3003 s = skipspace(s);
3004 if (*s == '{')
3005 PREBLOCK(tmp);
378cc40b 3006 TERM(tmp);
79072805
LW
3007 case ']':
3008 s++;
3280af22 3009 if (PL_lex_brackets <= 0)
d98d5fff 3010 yyerror("Unmatched right square bracket");
463ee0b2 3011 else
3280af22
NIS
3012 --PL_lex_brackets;
3013 if (PL_lex_state == LEX_INTERPNORMAL) {
3014 if (PL_lex_brackets == 0) {
a0d0e21e 3015 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3016 PL_lex_state = LEX_INTERPEND;
79072805
LW
3017 }
3018 }
4633a7c4 3019 TERM(']');
79072805
LW
3020 case '{':
3021 leftbracket:
79072805 3022 s++;
3280af22
NIS
3023 if (PL_lex_brackets > 100) {
3024 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3025 if (newlb != PL_lex_brackstack) {
8990e307 3026 SAVEFREEPV(newlb);
3280af22 3027 PL_lex_brackstack = newlb;
8990e307
LW
3028 }
3029 }
3280af22 3030 switch (PL_expect) {
a0d0e21e 3031 case XTERM:
3280af22 3032 if (PL_lex_formbrack) {
a0d0e21e
LW
3033 s--;
3034 PRETERMBLOCK(DO);
3035 }
3280af22
NIS
3036 if (PL_oldoldbufptr == PL_last_lop)
3037 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3038 else
3280af22 3039 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3040 OPERATOR(HASHBRACK);
a0d0e21e 3041 case XOPERATOR:
bf4acbe4 3042 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3043 s++;
44a8e56a 3044 d = s;
3280af22
NIS
3045 PL_tokenbuf[0] = '\0';
3046 if (d < PL_bufend && *d == '-') {
3047 PL_tokenbuf[0] = '-';
44a8e56a 3048 d++;
bf4acbe4 3049 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3050 d++;
3051 }
7e2040f0 3052 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3053 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3054 FALSE, &len);
bf4acbe4 3055 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3056 d++;
3057 if (*d == '}') {
3280af22 3058 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3059 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3060 if (minus)
3061 force_next('-');
748a9306
LW
3062 }
3063 }
3064 /* FALL THROUGH */
09bef843 3065 case XATTRBLOCK:
748a9306 3066 case XBLOCK:
3280af22
NIS
3067 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3068 PL_expect = XSTATE;
a0d0e21e 3069 break;
09bef843 3070 case XATTRTERM:
a0d0e21e 3071 case XTERMBLOCK:
3280af22
NIS
3072 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3073 PL_expect = XSTATE;
a0d0e21e
LW
3074 break;
3075 default: {
3076 char *t;
3280af22
NIS
3077 if (PL_oldoldbufptr == PL_last_lop)
3078 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3079 else
3280af22 3080 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3081 s = skipspace(s);
09ecc4b6 3082 if (*s == '}')
a0d0e21e 3083 OPERATOR(HASHBRACK);
b8a4b1be
GS
3084 /* This hack serves to disambiguate a pair of curlies
3085 * as being a block or an anon hash. Normally, expectation
3086 * determines that, but in cases where we're not in a
3087 * position to expect anything in particular (like inside
3088 * eval"") we have to resolve the ambiguity. This code
3089 * covers the case where the first term in the curlies is a
3090 * quoted string. Most other cases need to be explicitly
3091 * disambiguated by prepending a `+' before the opening
3092 * curly in order to force resolution as an anon hash.
3093 *
3094 * XXX should probably propagate the outer expectation
3095 * into eval"" to rely less on this hack, but that could
3096 * potentially break current behavior of eval"".
3097 * GSAR 97-07-21
3098 */
3099 t = s;
3100 if (*s == '\'' || *s == '"' || *s == '`') {
3101 /* common case: get past first string, handling escapes */
3280af22 3102 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3103 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3104 t++;
3105 t++;
a0d0e21e 3106 }
b8a4b1be 3107 else if (*s == 'q') {
3280af22 3108 if (++t < PL_bufend
b8a4b1be 3109 && (!isALNUM(*t)
3280af22 3110 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3111 && !isALNUM(*t))))
3112 {
b8a4b1be
GS
3113 char *tmps;
3114 char open, close, term;
3115 I32 brackets = 1;
3116
3280af22 3117 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3118 t++;
3119 term = *t;
3120 open = term;
3121 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3122 term = tmps[5];
3123 close = term;
3124 if (open == close)
3280af22
NIS
3125 for (t++; t < PL_bufend; t++) {
3126 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3127 t++;
6d07e5e9 3128 else if (*t == open)
b8a4b1be
GS
3129 break;
3130 }
3131 else
3280af22
NIS
3132 for (t++; t < PL_bufend; t++) {
3133 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3134 t++;
6d07e5e9 3135 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3136 break;
3137 else if (*t == open)
3138 brackets++;
3139 }
3140 }
3141 t++;
a0d0e21e 3142 }
7e2040f0 3143 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3144 t += UTF8SKIP(t);
7e2040f0 3145 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3146 t += UTF8SKIP(t);
a0d0e21e 3147 }
3280af22 3148 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3149 t++;
b8a4b1be
GS
3150 /* if comma follows first term, call it an anon hash */
3151 /* XXX it could be a comma expression with loop modifiers */
3280af22 3152 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3153 || (*t == '=' && t[1] == '>')))
a0d0e21e 3154 OPERATOR(HASHBRACK);
3280af22 3155 if (PL_expect == XREF)
4e4e412b 3156 PL_expect = XTERM;
a0d0e21e 3157 else {
3280af22
NIS
3158 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3159 PL_expect = XSTATE;
a0d0e21e 3160 }
8990e307 3161 }
a0d0e21e 3162 break;
463ee0b2 3163 }
57843af0 3164 yylval.ival = CopLINE(PL_curcop);
79072805 3165 if (isSPACE(*s) || *s == '#')
3280af22 3166 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3167 TOKEN('{');
378cc40b 3168 case '}':
79072805
LW
3169 rightbracket:
3170 s++;
3280af22 3171 if (PL_lex_brackets <= 0)
d98d5fff 3172 yyerror("Unmatched right curly bracket");
463ee0b2 3173 else
3280af22 3174 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3175 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3176 PL_lex_formbrack = 0;
3177 if (PL_lex_state == LEX_INTERPNORMAL) {
3178 if (PL_lex_brackets == 0) {
9059aa12
LW
3179 if (PL_expect & XFAKEBRACK) {
3180 PL_expect &= XENUMMASK;
3280af22
NIS
3181 PL_lex_state = LEX_INTERPEND;
3182 PL_bufptr = s;
cea2e8a9 3183 return yylex(); /* ignore fake brackets */
79072805 3184 }
fa83b5b6 3185 if (*s == '-' && s[1] == '>')
3280af22 3186 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3187 else if (*s != '[' && *s != '{')
3280af22 3188 PL_lex_state = LEX_INTERPEND;
79072805
LW
3189 }
3190 }
9059aa12
LW
3191 if (PL_expect & XFAKEBRACK) {
3192 PL_expect &= XENUMMASK;
3280af22 3193 PL_bufptr = s;
cea2e8a9 3194 return yylex(); /* ignore fake brackets */
748a9306 3195 }
79072805
LW
3196 force_next('}');
3197 TOKEN(';');
378cc40b
LW
3198 case '&':
3199 s++;
3200 tmp = *s++;
3201 if (tmp == '&')
a0d0e21e 3202 AOPERATOR(ANDAND);
378cc40b 3203 s--;
3280af22 3204 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3205 if (ckWARN(WARN_SEMICOLON)
3206 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3207 {
57843af0 3208 CopLINE_dec(PL_curcop);
cea2e8a9 3209 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3210 CopLINE_inc(PL_curcop);
463ee0b2 3211 }
79072805 3212 BAop(OP_BIT_AND);
463ee0b2 3213 }
79072805 3214
3280af22
NIS
3215 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3216 if (*PL_tokenbuf) {
3217 PL_expect = XOPERATOR;
3218 force_ident(PL_tokenbuf, '&');
463ee0b2 3219 }
79072805
LW
3220 else
3221 PREREF('&');
c07a80fd 3222 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3223 TERM('&');
3224
378cc40b
LW
3225 case '|':
3226 s++;
3227 tmp = *s++;
3228 if (tmp == '|')
a0d0e21e 3229 AOPERATOR(OROR);
378cc40b 3230 s--;
79072805 3231 BOop(OP_BIT_OR);
378cc40b
LW
3232 case '=':
3233 s++;
3234 tmp = *s++;
3235 if (tmp == '=')
79072805
LW
3236 Eop(OP_EQ);
3237 if (tmp == '>')
3238 OPERATOR(',');
378cc40b 3239 if (tmp == '~')
79072805 3240 PMop(OP_MATCH);
599cee73 3241 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3242 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3243 s--;
3280af22
NIS
3244 if (PL_expect == XSTATE && isALPHA(tmp) &&
3245 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3246 {
3280af22
NIS
3247 if (PL_in_eval && !PL_rsfp) {
3248 d = PL_bufend;
a5f75d66
AD
3249 while (s < d) {
3250 if (*s++ == '\n') {
3251 incline(s);
3252 if (strnEQ(s,"=cut",4)) {
3253 s = strchr(s,'\n');
3254 if (s)
3255 s++;
3256 else
3257 s = d;
3258 incline(s);
3259 goto retry;
3260 }
3261 }
3262 }
3263 goto retry;
3264 }
3280af22
NIS
3265 s = PL_bufend;
3266 PL_doextract = TRUE;
a0d0e21e
LW
3267 goto retry;
3268 }
3280af22 3269 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3270 char *t;
51882d45 3271#ifdef PERL_STRICT_CR
bf4acbe4 3272 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3273#else
bf4acbe4 3274 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3275#endif
a0d0e21e
LW
3276 if (*t == '\n' || *t == '#') {
3277 s--;
3280af22 3278 PL_expect = XBLOCK;
a0d0e21e
LW
3279 goto leftbracket;
3280 }
79072805 3281 }
a0d0e21e
LW
3282 yylval.ival = 0;
3283 OPERATOR(ASSIGNOP);
378cc40b
LW
3284 case '!':
3285 s++;
3286 tmp = *s++;
3287 if (tmp == '=')
79072805 3288 Eop(OP_NE);
378cc40b 3289 if (tmp == '~')
79072805 3290 PMop(OP_NOT);
378cc40b
LW
3291 s--;
3292 OPERATOR('!');
3293 case '<':
3280af22 3294 if (PL_expect != XOPERATOR) {
93a17b20 3295 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3296 check_uni();
79072805
LW
3297 if (s[1] == '<')
3298 s = scan_heredoc(s);
3299 else
3300 s = scan_inputsymbol(s);
3301 TERM(sublex_start());
378cc40b
LW
3302 }
3303 s++;
3304 tmp = *s++;
3305 if (tmp == '<')
79072805 3306 SHop(OP_LEFT_SHIFT);
395c3793
LW
3307 if (tmp == '=') {
3308 tmp = *s++;
3309 if (tmp == '>')
79072805 3310 Eop(OP_NCMP);
395c3793 3311 s--;
79072805 3312 Rop(OP_LE);
395c3793 3313 }
378cc40b 3314 s--;
79072805 3315 Rop(OP_LT);
378cc40b
LW
3316 case '>':
3317 s++;
3318 tmp = *s++;
3319 if (tmp == '>')
79072805 3320 SHop(OP_RIGHT_SHIFT);
378cc40b 3321 if (tmp == '=')
79072805 3322 Rop(OP_GE);
378cc40b 3323 s--;
79072805 3324 Rop(OP_GT);
378cc40b
LW
3325
3326 case '$':
bbce6d69 3327 CLINE;
3328
3280af22
NIS
3329 if (PL_expect == XOPERATOR) {
3330 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3331 PL_expect = XTERM;
a0d0e21e 3332 depcom();
bbce6d69 3333 return ','; /* grandfather non-comma-format format */
a0d0e21e 3334 }
8990e307 3335 }
a0d0e21e 3336
7e2040f0 3337 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3338 PL_tokenbuf[0] = '@';
376b8730
SM
3339 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3340 sizeof PL_tokenbuf - 1, FALSE);
3341 if (PL_expect == XOPERATOR)
3342 no_op("Array length", s);
3280af22 3343 if (!PL_tokenbuf[1])
a0d0e21e 3344 PREREF(DOLSHARP);
3280af22
NIS
3345 PL_expect = XOPERATOR;
3346 PL_pending_ident = '#';
463ee0b2 3347 TOKEN(DOLSHARP);
79072805 3348 }
bbce6d69 3349
3280af22 3350 PL_tokenbuf[0] = '$';
376b8730
SM
3351 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3352 sizeof PL_tokenbuf - 1, FALSE);
3353 if (PL_expect == XOPERATOR)
3354 no_op("Scalar", s);
3280af22
NIS
3355 if (!PL_tokenbuf[1]) {
3356 if (s == PL_bufend)
bbce6d69 3357 yyerror("Final $ should be \\$ or $name");
3358 PREREF('$');
8990e307 3359 }
a0d0e21e 3360
bbce6d69 3361 /* This kludge not intended to be bulletproof. */
3280af22 3362 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3363 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3364 newSViv(PL_compiling.cop_arybase));
bbce6d69 3365 yylval.opval->op_private = OPpCONST_ARYBASE;
3366 TERM(THING);
3367 }
3368
ff68c719 3369 d = s;
69d2bceb 3370 tmp = (I32)*s;
3280af22 3371 if (PL_lex_state == LEX_NORMAL)
ff68c719 3372 s = skipspace(s);
3373
3280af22 3374 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3375 char *t;
3376 if (*s == '[') {
3280af22 3377 PL_tokenbuf[0] = '@';
599cee73 3378 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3379 for(t = s + 1;
7e2040f0 3380 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3381 t++) ;
a0d0e21e 3382 if (*t++ == ',') {
3280af22
NIS
3383 PL_bufptr = skipspace(PL_bufptr);
3384 while (t < PL_bufend && *t != ']')
bbce6d69 3385 t++;
cea2e8a9 3386 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3387 "Multidimensional syntax %.*s not supported",
3388 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3389 }
3390 }
bbce6d69 3391 }
3392 else if (*s == '{') {
3280af22 3393 PL_tokenbuf[0] = '%';
599cee73 3394 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3395 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3396 {
3280af22 3397 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3398 STRLEN len;
3399 for (t++; isSPACE(*t); t++) ;
7e2040f0 3400 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3401 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3402 for (; isSPACE(*t); t++) ;
864dbfa3 3403 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3404 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3405 "You need to quote \"%s\"", tmpbuf);
748a9306 3406 }
93a17b20
LW
3407 }
3408 }
2f3197b3 3409 }
bbce6d69 3410
3280af22 3411 PL_expect = XOPERATOR;
69d2bceb 3412 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3413 bool islop = (PL_last_lop == PL_oldoldbufptr);
3414 if (!islop || PL_last_lop_op == OP_GREPSTART)
3415 PL_expect = XOPERATOR;
bbce6d69 3416 else if (strchr("$@\"'`q", *s))
3280af22 3417 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3418 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3419 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3420 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3421 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3422 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3423 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3424 /* binary operators exclude handle interpretations */
3425 switch (tmp) {
3426 case -KEY_x:
3427 case -KEY_eq:
3428 case -KEY_ne:
3429 case -KEY_gt:
3430 case -KEY_lt:
3431 case -KEY_ge:
3432 case -KEY_le:
3433 case -KEY_cmp:
3434 break;
3435 default:
3280af22 3436 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3437 break;
3438 }
3439 }
68dc0745 3440 else {
3441 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3442 if (gv && GvCVu(gv))
3280af22 3443 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3444 }
93a17b20 3445 }
bbce6d69 3446 else if (isDIGIT(*s))
3280af22 3447 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3448 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3449 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3450 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3451 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3452 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3453 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3454 }
3280af22 3455 PL_pending_ident = '$';
79072805 3456 TOKEN('$');
378cc40b
LW
3457
3458 case '@':
3280af22 3459 if (PL_expect == XOPERATOR)
bbce6d69 3460 no_op("Array", s);
3280af22
NIS
3461 PL_tokenbuf[0] = '@';
3462 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3463 if (!PL_tokenbuf[1]) {
3464 if (s == PL_bufend)
bbce6d69 3465 yyerror("Final @ should be \\@ or @name");
3466 PREREF('@');
3467 }
3280af22 3468 if (PL_lex_state == LEX_NORMAL)
ff68c719 3469 s = skipspace(s);
3280af22 3470 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3471 if (*s == '{')
3280af22 3472 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3473
3474 /* Warn about @ where they meant $. */
599cee73 3475 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3476 if (*s == '[' || *s == '{') {
3477 char *t = s + 1;
7e2040f0 3478 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3479 t++;
3480 if (*t == '}' || *t == ']') {
3481 t++;
3280af22 3482 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3483 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3484 "Scalar value %.*s better written as $%.*s",
3280af22 3485 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3486 }
93a17b20
LW
3487 }
3488 }
463ee0b2 3489 }
3280af22 3490 PL_pending_ident = '@';
79072805 3491 TERM('@');
378cc40b
LW
3492
3493 case '/': /* may either be division or pattern */
3494 case '?': /* may either be conditional or pattern */
3280af22 3495 if (PL_expect != XOPERATOR) {
c277df42 3496 /* Disable warning on "study /blah/" */
3280af22
NIS
3497 if (PL_oldoldbufptr == PL_last_uni
3498 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3499 || memNE(PL_last_uni, "study", 5)
3500 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3501 check_uni();
8782bef2 3502 s = scan_pat(s,OP_MATCH);
79072805 3503 TERM(sublex_start());
378cc40b
LW
3504 }
3505 tmp = *s++;
a687059c 3506 if (tmp == '/')
79072805 3507 Mop(OP_DIVIDE);
378cc40b
LW
3508 OPERATOR(tmp);
3509
3510 case '.':
51882d45
GS
3511 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3512#ifdef PERL_STRICT_CR
3513 && s[1] == '\n'
3514#else
3515 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3516#endif
3517 && (s == PL_linestart || s[-1] == '\n') )
3518 {
3280af22
NIS
3519 PL_lex_formbrack = 0;
3520 PL_expect = XSTATE;
79072805
LW
3521 goto rightbracket;
3522 }
3280af22 3523 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3524 tmp = *s++;
a687059c
LW
3525 if (*s == tmp) {
3526 s++;
2f3197b3
LW
3527 if (*s == tmp) {
3528 s++;
79072805 3529 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3530 }
3531 else
79072805 3532 yylval.ival = 0;
378cc40b 3533 OPERATOR(DOTDOT);
a687059c 3534 }
3280af22 3535 if (PL_expect != XOPERATOR)
2f3197b3 3536 check_uni();
79072805 3537 Aop(OP_CONCAT);
378cc40b
LW
3538 }
3539 /* FALL THROUGH */
3540 case '0': case '1': case '2': case '3': case '4':
3541 case '5': case '6': case '7': case '8': case '9':
79072805 3542 s = scan_num(s);
3280af22 3543 if (PL_expect == XOPERATOR)
8990e307 3544 no_op("Number",s);
79072805
LW
3545 TERM(THING);
3546
3547 case '\'':
09bef843 3548 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3549 if (PL_expect == XOPERATOR) {
3550 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3551 PL_expect = XTERM;
a0d0e21e
LW
3552 depcom();
3553 return ','; /* grandfather non-comma-format format */
3554 }
463ee0b2 3555 else
8990e307 3556 no_op("String",s);
463ee0b2 3557 }
79072805 3558 if (!s)
85e6fe83 3559 missingterm((char*)0);
79072805
LW
3560 yylval.ival = OP_CONST;
3561 TERM(sublex_start());
3562
3563 case '"':
09bef843 3564 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3565 if (PL_expect == XOPERATOR) {
3566 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3567 PL_expect = XTERM;
a0d0e21e
LW
3568 depcom();
3569 return ','; /* grandfather non-comma-format format */
3570 }
463ee0b2 3571 else
8990e307 3572 no_op("String",s);
463ee0b2 3573 }
79072805 3574 if (!s)
85e6fe83 3575 missingterm((char*)0);
4633a7c4 3576 yylval.ival = OP_CONST;
3280af22 3577 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3578 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3579 yylval.ival = OP_STRINGIFY;
3580 break;
3581 }
3582 }
79072805
LW
3583 TERM(sublex_start());
3584
3585 case '`':
09bef843 3586 s = scan_str(s,FALSE,FALSE);
3280af22 3587 if (PL_expect == XOPERATOR)
8990e307 3588 no_op("Backticks",s);
79072805 3589 if (!s)
85e6fe83