This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add simple exception handling macros for XS writers.
[perl5.git] / toke.c
... / ...
CommitLineData
1/* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
13 */
14
15/*
16 * This file is the lexer for Perl. It's closely linked to the
17 * parser, perly.y.
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
22#include "EXTERN.h"
23#define PERL_IN_TOKE_C
24#include "perl.h"
25
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
28
29static char ident_too_long[] = "Identifier too long";
30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
32
33static void restore_rsfp(pTHX_ void *f);
34#ifndef PERL_NO_UTF16_FILTER
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
37#endif
38
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
44#else
45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
46#endif
47
48/* In variables named $^X, these are the legal values for X.
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
77
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
94#ifdef ff_next
95#undef ff_next
96#endif
97
98#include "keywords.h"
99
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
102#ifdef CLINE
103#undef CLINE
104#endif
105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
106
107/*
108 * Convenience functions to return different tokens and prime the
109 * lexer for the next token. They all take an argument.
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
121 * FUN1 : not used, except for not, which isn't a UNIOP
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
126 * PMop : pattern-matching operator
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
135#ifdef DEBUGGING /* Serve -DT. */
136# define REPORT(retval) tokereport(s,(int)retval)
137#else
138# define REPORT(retval) (retval)
139#endif
140
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
161
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
166 */
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
169 PL_expect = x, \
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
178
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
186
187/* grandfather return to old style */
188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
189
190#ifdef DEBUGGING
191
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
203{
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
270
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = newSVpvn("<== ", 4);
279
280 for (p = debug_tokens; p->token; p++) {
281 if (p->token == (int)rv) {
282 name = p->name;
283 type = p->type;
284 break;
285 }
286 }
287 if (name)
288 Perl_sv_catpv(aTHX_ report, name);
289 else if ((char)rv > ' ' && (char)rv < '~')
290 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
291 else if (!rv)
292 Perl_sv_catpv(aTHX_ report, "EOF");
293 else
294 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
295 switch (type) {
296 case TOKENTYPE_NONE:
297 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
298 break;
299 case TOKENTYPE_IVAL:
300 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
301 break;
302 case TOKENTYPE_OPNUM:
303 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
304 PL_op_name[yylval.ival]);
305 break;
306 case TOKENTYPE_PVAL:
307 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
308 break;
309 case TOKENTYPE_OPVAL:
310 if (yylval.opval)
311 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
312 PL_op_name[yylval.opval->op_type]);
313 else
314 Perl_sv_catpv(aTHX_ report, "(opval=null)");
315 break;
316 }
317 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
318 if (s - PL_bufptr > 0)
319 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
320 else {
321 if (PL_oldbufptr && *PL_oldbufptr)
322 sv_catpv(report, PL_tokenbuf);
323 }
324 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
325 };
326 return (int)rv;
327}
328
329#endif
330
331/*
332 * S_ao
333 *
334 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
335 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
336 */
337
338STATIC int
339S_ao(pTHX_ int toketype)
340{
341 if (*PL_bufptr == '=') {
342 PL_bufptr++;
343 if (toketype == ANDAND)
344 yylval.ival = OP_ANDASSIGN;
345 else if (toketype == OROR)
346 yylval.ival = OP_ORASSIGN;
347 else if (toketype == DORDOR)
348 yylval.ival = OP_DORASSIGN;
349 toketype = ASSIGNOP;
350 }
351 return toketype;
352}
353
354/*
355 * S_no_op
356 * When Perl expects an operator and finds something else, no_op
357 * prints the warning. It always prints "<something> found where
358 * operator expected. It prints "Missing semicolon on previous line?"
359 * if the surprise occurs at the start of the line. "do you need to
360 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
361 * where the compiler doesn't know if foo is a method call or a function.
362 * It prints "Missing operator before end of line" if there's nothing
363 * after the missing operator, or "... before <...>" if there is something
364 * after the missing operator.
365 */
366
367STATIC void
368S_no_op(pTHX_ char *what, char *s)
369{
370 char *oldbp = PL_bufptr;
371 bool is_first = (PL_oldbufptr == PL_linestart);
372
373 if (!s)
374 s = oldbp;
375 else
376 PL_bufptr = s;
377 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
378 if (ckWARN_d(WARN_SYNTAX)) {
379 if (is_first)
380 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
381 "\t(Missing semicolon on previous line?)\n");
382 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
383 char *t;
384 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
385 if (t < PL_bufptr && isSPACE(*t))
386 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
387 "\t(Do you need to predeclare %.*s?)\n",
388 t - PL_oldoldbufptr, PL_oldoldbufptr);
389 }
390 else {
391 assert(s >= oldbp);
392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
393 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
394 }
395 }
396 PL_bufptr = oldbp;
397}
398
399/*
400 * S_missingterm
401 * Complain about missing quote/regexp/heredoc terminator.
402 * If it's called with (char *)NULL then it cauterizes the line buffer.
403 * If we're in a delimited string and the delimiter is a control
404 * character, it's reformatted into a two-char sequence like ^C.
405 * This is fatal.
406 */
407
408STATIC void
409S_missingterm(pTHX_ char *s)
410{
411 char tmpbuf[3];
412 char q;
413 if (s) {
414 char *nl = strrchr(s,'\n');
415 if (nl)
416 *nl = '\0';
417 }
418 else if (
419#ifdef EBCDIC
420 iscntrl(PL_multi_close)
421#else
422 PL_multi_close < 32 || PL_multi_close == 127
423#endif
424 ) {
425 *tmpbuf = '^';
426 tmpbuf[1] = toCTRL(PL_multi_close);
427 s = "\\n";
428 tmpbuf[2] = '\0';
429 s = tmpbuf;
430 }
431 else {
432 *tmpbuf = (char)PL_multi_close;
433 tmpbuf[1] = '\0';
434 s = tmpbuf;
435 }
436 q = strchr(s,'"') ? '\'' : '"';
437 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
438}
439
440/*
441 * Perl_deprecate
442 */
443
444void
445Perl_deprecate(pTHX_ char *s)
446{
447 if (ckWARN(WARN_DEPRECATED))
448 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
449}
450
451void
452Perl_deprecate_old(pTHX_ char *s)
453{
454 /* This function should NOT be called for any new deprecated warnings */
455 /* Use Perl_deprecate instead */
456 /* */
457 /* It is here to maintain backward compatibility with the pre-5.8 */
458 /* warnings category hierarchy. The "deprecated" category used to */
459 /* live under the "syntax" category. It is now a top-level category */
460 /* in its own right. */
461
462 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
463 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
464 "Use of %s is deprecated", s);
465}
466
467/*
468 * depcom
469 * Deprecate a comma-less variable list.
470 */
471
472STATIC void
473S_depcom(pTHX)
474{
475 deprecate_old("comma-less variable list");
476}
477
478/*
479 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
480 * utf16-to-utf8-reversed.
481 */
482
483#ifdef PERL_CR_FILTER
484static void
485strip_return(SV *sv)
486{
487 register char *s = SvPVX(sv);
488 register char *e = s + SvCUR(sv);
489 /* outer loop optimized to do nothing if there are no CR-LFs */
490 while (s < e) {
491 if (*s++ == '\r' && *s == '\n') {
492 /* hit a CR-LF, need to copy the rest */
493 register char *d = s - 1;
494 *d++ = *s++;
495 while (s < e) {
496 if (*s == '\r' && s[1] == '\n')
497 s++;
498 *d++ = *s++;
499 }
500 SvCUR(sv) -= s - d;
501 return;
502 }
503 }
504}
505
506STATIC I32
507S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
508{
509 I32 count = FILTER_READ(idx+1, sv, maxlen);
510 if (count > 0 && !maxlen)
511 strip_return(sv);
512 return count;
513}
514#endif
515
516/*
517 * Perl_lex_start
518 * Initialize variables. Uses the Perl save_stack to save its state (for
519 * recursive calls to the parser).
520 */
521
522void
523Perl_lex_start(pTHX_ SV *line)
524{
525 char *s;
526 STRLEN len;
527
528 SAVEI32(PL_lex_dojoin);
529 SAVEI32(PL_lex_brackets);
530 SAVEI32(PL_lex_casemods);
531 SAVEI32(PL_lex_starts);
532 SAVEI32(PL_lex_state);
533 SAVEVPTR(PL_lex_inpat);
534 SAVEI32(PL_lex_inwhat);
535 if (PL_lex_state == LEX_KNOWNEXT) {
536 I32 toke = PL_nexttoke;
537 while (--toke >= 0) {
538 SAVEI32(PL_nexttype[toke]);
539 SAVEVPTR(PL_nextval[toke]);
540 }
541 SAVEI32(PL_nexttoke);
542 }
543 SAVECOPLINE(PL_curcop);
544 SAVEPPTR(PL_bufptr);
545 SAVEPPTR(PL_bufend);
546 SAVEPPTR(PL_oldbufptr);
547 SAVEPPTR(PL_oldoldbufptr);
548 SAVEPPTR(PL_last_lop);
549 SAVEPPTR(PL_last_uni);
550 SAVEPPTR(PL_linestart);
551 SAVESPTR(PL_linestr);
552 SAVEGENERICPV(PL_lex_brackstack);
553 SAVEGENERICPV(PL_lex_casestack);
554 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
555 SAVESPTR(PL_lex_stuff);
556 SAVEI32(PL_lex_defer);
557 SAVEI32(PL_sublex_info.sub_inwhat);
558 SAVESPTR(PL_lex_repl);
559 SAVEINT(PL_expect);
560 SAVEINT(PL_lex_expect);
561
562 PL_lex_state = LEX_NORMAL;
563 PL_lex_defer = 0;
564 PL_expect = XSTATE;
565 PL_lex_brackets = 0;
566 New(899, PL_lex_brackstack, 120, char);
567 New(899, PL_lex_casestack, 12, char);
568 PL_lex_casemods = 0;
569 *PL_lex_casestack = '\0';
570 PL_lex_dojoin = 0;
571 PL_lex_starts = 0;
572 PL_lex_stuff = Nullsv;
573 PL_lex_repl = Nullsv;
574 PL_lex_inpat = 0;
575 PL_nexttoke = 0;
576 PL_lex_inwhat = 0;
577 PL_sublex_info.sub_inwhat = 0;
578 PL_linestr = line;
579 if (SvREADONLY(PL_linestr))
580 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
581 s = SvPV(PL_linestr, len);
582 if (!len || s[len-1] != ';') {
583 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
584 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
585 sv_catpvn(PL_linestr, "\n;", 2);
586 }
587 SvTEMP_off(PL_linestr);
588 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
589 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
590 PL_last_lop = PL_last_uni = Nullch;
591 PL_rsfp = 0;
592}
593
594/*
595 * Perl_lex_end
596 * Finalizer for lexing operations. Must be called when the parser is
597 * done with the lexer.
598 */
599
600void
601Perl_lex_end(pTHX)
602{
603 PL_doextract = FALSE;
604}
605
606/*
607 * S_incline
608 * This subroutine has nothing to do with tilting, whether at windmills
609 * or pinball tables. Its name is short for "increment line". It
610 * increments the current line number in CopLINE(PL_curcop) and checks
611 * to see whether the line starts with a comment of the form
612 * # line 500 "foo.pm"
613 * If so, it sets the current line number and file to the values in the comment.
614 */
615
616STATIC void
617S_incline(pTHX_ char *s)
618{
619 char *t;
620 char *n;
621 char *e;
622 char ch;
623
624 CopLINE_inc(PL_curcop);
625 if (*s++ != '#')
626 return;
627 while (SPACE_OR_TAB(*s)) s++;
628 if (strnEQ(s, "line", 4))
629 s += 4;
630 else
631 return;
632 if (SPACE_OR_TAB(*s))
633 s++;
634 else
635 return;
636 while (SPACE_OR_TAB(*s)) s++;
637 if (!isDIGIT(*s))
638 return;
639 n = s;
640 while (isDIGIT(*s))
641 s++;
642 while (SPACE_OR_TAB(*s))
643 s++;
644 if (*s == '"' && (t = strchr(s+1, '"'))) {
645 s++;
646 e = t + 1;
647 }
648 else {
649 for (t = s; !isSPACE(*t); t++) ;
650 e = t;
651 }
652 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
653 e++;
654 if (*e != '\n' && *e != '\0')
655 return; /* false alarm */
656
657 ch = *t;
658 *t = '\0';
659 if (t - s > 0) {
660 CopFILE_free(PL_curcop);
661 CopFILE_set(PL_curcop, s);
662 }
663 *t = ch;
664 CopLINE_set(PL_curcop, atoi(n)-1);
665}
666
667/*
668 * S_skipspace
669 * Called to gobble the appropriate amount and type of whitespace.
670 * Skips comments as well.
671 */
672
673STATIC char *
674S_skipspace(pTHX_ register char *s)
675{
676 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
677 while (s < PL_bufend && SPACE_OR_TAB(*s))
678 s++;
679 return s;
680 }
681 for (;;) {
682 STRLEN prevlen;
683 SSize_t oldprevlen, oldoldprevlen;
684 SSize_t oldloplen = 0, oldunilen = 0;
685 while (s < PL_bufend && isSPACE(*s)) {
686 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
687 incline(s);
688 }
689
690 /* comment */
691 if (s < PL_bufend && *s == '#') {
692 while (s < PL_bufend && *s != '\n')
693 s++;
694 if (s < PL_bufend) {
695 s++;
696 if (PL_in_eval && !PL_rsfp) {
697 incline(s);
698 continue;
699 }
700 }
701 }
702
703 /* only continue to recharge the buffer if we're at the end
704 * of the buffer, we're not reading from a source filter, and
705 * we're in normal lexing mode
706 */
707 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
708 PL_lex_state == LEX_FORMLINE)
709 return s;
710
711 /* try to recharge the buffer */
712 if ((s = filter_gets(PL_linestr, PL_rsfp,
713 (prevlen = SvCUR(PL_linestr)))) == Nullch)
714 {
715 /* end of file. Add on the -p or -n magic */
716 if (PL_minus_p) {
717 sv_setpv(PL_linestr,
718 ";}continue{print or die qq(-p destination: $!\\n);}");
719 PL_minus_n = PL_minus_p = 0;
720 }
721 else if (PL_minus_n) {
722 sv_setpvn(PL_linestr, ";}", 2);
723 PL_minus_n = 0;
724 }
725 else
726 sv_setpvn(PL_linestr,";", 1);
727
728 /* reset variables for next time we lex */
729 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
730 = SvPVX(PL_linestr);
731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
732 PL_last_lop = PL_last_uni = Nullch;
733
734 /* Close the filehandle. Could be from -P preprocessor,
735 * STDIN, or a regular file. If we were reading code from
736 * STDIN (because the commandline held no -e or filename)
737 * then we don't close it, we reset it so the code can
738 * read from STDIN too.
739 */
740
741 if (PL_preprocess && !PL_in_eval)
742 (void)PerlProc_pclose(PL_rsfp);
743 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
744 PerlIO_clearerr(PL_rsfp);
745 else
746 (void)PerlIO_close(PL_rsfp);
747 PL_rsfp = Nullfp;
748 return s;
749 }
750
751 /* not at end of file, so we only read another line */
752 /* make corresponding updates to old pointers, for yyerror() */
753 oldprevlen = PL_oldbufptr - PL_bufend;
754 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
755 if (PL_last_uni)
756 oldunilen = PL_last_uni - PL_bufend;
757 if (PL_last_lop)
758 oldloplen = PL_last_lop - PL_bufend;
759 PL_linestart = PL_bufptr = s + prevlen;
760 PL_bufend = s + SvCUR(PL_linestr);
761 s = PL_bufptr;
762 PL_oldbufptr = s + oldprevlen;
763 PL_oldoldbufptr = s + oldoldprevlen;
764 if (PL_last_uni)
765 PL_last_uni = s + oldunilen;
766 if (PL_last_lop)
767 PL_last_lop = s + oldloplen;
768 incline(s);
769
770 /* debugger active and we're not compiling the debugger code,
771 * so store the line into the debugger's array of lines
772 */
773 if (PERLDB_LINE && PL_curstash != PL_debstash) {
774 SV *sv = NEWSV(85,0);
775
776 sv_upgrade(sv, SVt_PVMG);
777 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
778 (void)SvIOK_on(sv);
779 SvIVX(sv) = 0;
780 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
781 }
782 }
783}
784
785/*
786 * S_check_uni
787 * Check the unary operators to ensure there's no ambiguity in how they're
788 * used. An ambiguous piece of code would be:
789 * rand + 5
790 * This doesn't mean rand() + 5. Because rand() is a unary operator,
791 * the +5 is its argument.
792 */
793
794STATIC void
795S_check_uni(pTHX)
796{
797 char *s;
798 char *t;
799
800 if (PL_oldoldbufptr != PL_last_uni)
801 return;
802 while (isSPACE(*PL_last_uni))
803 PL_last_uni++;
804 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
805 if ((t = strchr(s, '(')) && t < PL_bufptr)
806 return;
807 if (ckWARN_d(WARN_AMBIGUOUS)){
808 char ch = *s;
809 *s = '\0';
810 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
811 "Warning: Use of \"%s\" without parentheses is ambiguous",
812 PL_last_uni);
813 *s = ch;
814 }
815}
816
817/*
818 * LOP : macro to build a list operator. Its behaviour has been replaced
819 * with a subroutine, S_lop() for which LOP is just another name.
820 */
821
822#define LOP(f,x) return lop(f,x,s)
823
824/*
825 * S_lop
826 * Build a list operator (or something that might be one). The rules:
827 * - if we have a next token, then it's a list operator [why?]
828 * - if the next thing is an opening paren, then it's a function
829 * - else it's a list operator
830 */
831
832STATIC I32
833S_lop(pTHX_ I32 f, int x, char *s)
834{
835 yylval.ival = f;
836 CLINE;
837 PL_expect = x;
838 PL_bufptr = s;
839 PL_last_lop = PL_oldbufptr;
840 PL_last_lop_op = (OPCODE)f;
841 if (PL_nexttoke)
842 return REPORT(LSTOP);
843 if (*s == '(')
844 return REPORT(FUNC);
845 s = skipspace(s);
846 if (*s == '(')
847 return REPORT(FUNC);
848 else
849 return REPORT(LSTOP);
850}
851
852/*
853 * S_force_next
854 * When the lexer realizes it knows the next token (for instance,
855 * it is reordering tokens for the parser) then it can call S_force_next
856 * to know what token to return the next time the lexer is called. Caller
857 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
858 * handles the token correctly.
859 */
860
861STATIC void
862S_force_next(pTHX_ I32 type)
863{
864 PL_nexttype[PL_nexttoke] = type;
865 PL_nexttoke++;
866 if (PL_lex_state != LEX_KNOWNEXT) {
867 PL_lex_defer = PL_lex_state;
868 PL_lex_expect = PL_expect;
869 PL_lex_state = LEX_KNOWNEXT;
870 }
871}
872
873STATIC SV *
874S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
875{
876 SV *sv = newSVpvn(start,len);
877 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
878 SvUTF8_on(sv);
879 return sv;
880}
881
882/*
883 * S_force_word
884 * When the lexer knows the next thing is a word (for instance, it has
885 * just seen -> and it knows that the next char is a word char, then
886 * it calls S_force_word to stick the next word into the PL_next lookahead.
887 *
888 * Arguments:
889 * char *start : buffer position (must be within PL_linestr)
890 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
891 * int check_keyword : if true, Perl checks to make sure the word isn't
892 * a keyword (do this if the word is a label, e.g. goto FOO)
893 * int allow_pack : if true, : characters will also be allowed (require,
894 * use, etc. do this)
895 * int allow_initial_tick : used by the "sub" lexer only.
896 */
897
898STATIC char *
899S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
900{
901 register char *s;
902 STRLEN len;
903
904 start = skipspace(start);
905 s = start;
906 if (isIDFIRST_lazy_if(s,UTF) ||
907 (allow_pack && *s == ':') ||
908 (allow_initial_tick && *s == '\'') )
909 {
910 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
911 if (check_keyword && keyword(PL_tokenbuf, len))
912 return start;
913 if (token == METHOD) {
914 s = skipspace(s);
915 if (*s == '(')
916 PL_expect = XTERM;
917 else {
918 PL_expect = XOPERATOR;
919 }
920 }
921 PL_nextval[PL_nexttoke].opval
922 = (OP*)newSVOP(OP_CONST,0,
923 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
924 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
925 force_next(token);
926 }
927 return s;
928}
929
930/*
931 * S_force_ident
932 * Called when the lexer wants $foo *foo &foo etc, but the program
933 * text only contains the "foo" portion. The first argument is a pointer
934 * to the "foo", and the second argument is the type symbol to prefix.
935 * Forces the next token to be a "WORD".
936 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
937 */
938
939STATIC void
940S_force_ident(pTHX_ register char *s, int kind)
941{
942 if (s && *s) {
943 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
944 PL_nextval[PL_nexttoke].opval = o;
945 force_next(WORD);
946 if (kind) {
947 o->op_private = OPpCONST_ENTERED;
948 /* XXX see note in pp_entereval() for why we forgo typo
949 warnings if the symbol must be introduced in an eval.
950 GSAR 96-10-12 */
951 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
952 kind == '$' ? SVt_PV :
953 kind == '@' ? SVt_PVAV :
954 kind == '%' ? SVt_PVHV :
955 SVt_PVGV
956 );
957 }
958 }
959}
960
961NV
962Perl_str_to_version(pTHX_ SV *sv)
963{
964 NV retval = 0.0;
965 NV nshift = 1.0;
966 STRLEN len;
967 char *start = SvPVx(sv,len);
968 bool utf = SvUTF8(sv) ? TRUE : FALSE;
969 char *end = start + len;
970 while (start < end) {
971 STRLEN skip;
972 UV n;
973 if (utf)
974 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
975 else {
976 n = *(U8*)start;
977 skip = 1;
978 }
979 retval += ((NV)n)/nshift;
980 start += skip;
981 nshift *= 1000;
982 }
983 return retval;
984}
985
986/*
987 * S_force_version
988 * Forces the next token to be a version number.
989 * If the next token appears to be an invalid version number, (e.g. "v2b"),
990 * and if "guessing" is TRUE, then no new token is created (and the caller
991 * must use an alternative parsing method).
992 */
993
994STATIC char *
995S_force_version(pTHX_ char *s, int guessing)
996{
997 OP *version = Nullop;
998 char *d;
999
1000 s = skipspace(s);
1001
1002 d = s;
1003 if (*d == 'v')
1004 d++;
1005 if (isDIGIT(*d)) {
1006 while (isDIGIT(*d) || *d == '_' || *d == '.')
1007 d++;
1008 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1009 SV *ver;
1010 s = scan_num(s, &yylval);
1011 version = yylval.opval;
1012 ver = cSVOPx(version)->op_sv;
1013 if (SvPOK(ver) && !SvNIOK(ver)) {
1014 (void)SvUPGRADE(ver, SVt_PVNV);
1015 SvNVX(ver) = str_to_version(ver);
1016 SvNOK_on(ver); /* hint that it is a version */
1017 }
1018 }
1019 else if (guessing)
1020 return s;
1021 }
1022
1023 /* NOTE: The parser sees the package name and the VERSION swapped */
1024 PL_nextval[PL_nexttoke].opval = version;
1025 force_next(WORD);
1026
1027 return s;
1028}
1029
1030/*
1031 * S_tokeq
1032 * Tokenize a quoted string passed in as an SV. It finds the next
1033 * chunk, up to end of string or a backslash. It may make a new
1034 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1035 * turns \\ into \.
1036 */
1037
1038STATIC SV *
1039S_tokeq(pTHX_ SV *sv)
1040{
1041 register char *s;
1042 register char *send;
1043 register char *d;
1044 STRLEN len = 0;
1045 SV *pv = sv;
1046
1047 if (!SvLEN(sv))
1048 goto finish;
1049
1050 s = SvPV_force(sv, len);
1051 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1052 goto finish;
1053 send = s + len;
1054 while (s < send && *s != '\\')
1055 s++;
1056 if (s == send)
1057 goto finish;
1058 d = s;
1059 if ( PL_hints & HINT_NEW_STRING ) {
1060 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
1061 if (SvUTF8(sv))
1062 SvUTF8_on(pv);
1063 }
1064 while (s < send) {
1065 if (*s == '\\') {
1066 if (s + 1 < send && (s[1] == '\\'))
1067 s++; /* all that, just for this */
1068 }
1069 *d++ = *s++;
1070 }
1071 *d = '\0';
1072 SvCUR_set(sv, d - SvPVX(sv));
1073 finish:
1074 if ( PL_hints & HINT_NEW_STRING )
1075 return new_constant(NULL, 0, "q", sv, pv, "q");
1076 return sv;
1077}
1078
1079/*
1080 * Now come three functions related to double-quote context,
1081 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1082 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1083 * interact with PL_lex_state, and create fake ( ... ) argument lists
1084 * to handle functions and concatenation.
1085 * They assume that whoever calls them will be setting up a fake
1086 * join call, because each subthing puts a ',' after it. This lets
1087 * "lower \luPpEr"
1088 * become
1089 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1090 *
1091 * (I'm not sure whether the spurious commas at the end of lcfirst's
1092 * arguments and join's arguments are created or not).
1093 */
1094
1095/*
1096 * S_sublex_start
1097 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1098 *
1099 * Pattern matching will set PL_lex_op to the pattern-matching op to
1100 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1101 *
1102 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1103 *
1104 * Everything else becomes a FUNC.
1105 *
1106 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1107 * had an OP_CONST or OP_READLINE). This just sets us up for a
1108 * call to S_sublex_push().
1109 */
1110
1111STATIC I32
1112S_sublex_start(pTHX)
1113{
1114 register I32 op_type = yylval.ival;
1115
1116 if (op_type == OP_NULL) {
1117 yylval.opval = PL_lex_op;
1118 PL_lex_op = Nullop;
1119 return THING;
1120 }
1121 if (op_type == OP_CONST || op_type == OP_READLINE) {
1122 SV *sv = tokeq(PL_lex_stuff);
1123
1124 if (SvTYPE(sv) == SVt_PVIV) {
1125 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1126 STRLEN len;
1127 char *p;
1128 SV *nsv;
1129
1130 p = SvPV(sv, len);
1131 nsv = newSVpvn(p, len);
1132 if (SvUTF8(sv))
1133 SvUTF8_on(nsv);
1134 SvREFCNT_dec(sv);
1135 sv = nsv;
1136 }
1137 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1138 PL_lex_stuff = Nullsv;
1139 /* Allow <FH> // "foo" */
1140 if (op_type == OP_READLINE)
1141 PL_expect = XTERMORDORDOR;
1142 return THING;
1143 }
1144
1145 PL_sublex_info.super_state = PL_lex_state;
1146 PL_sublex_info.sub_inwhat = op_type;
1147 PL_sublex_info.sub_op = PL_lex_op;
1148 PL_lex_state = LEX_INTERPPUSH;
1149
1150 PL_expect = XTERM;
1151 if (PL_lex_op) {
1152 yylval.opval = PL_lex_op;
1153 PL_lex_op = Nullop;
1154 return PMFUNC;
1155 }
1156 else
1157 return FUNC;
1158}
1159
1160/*
1161 * S_sublex_push
1162 * Create a new scope to save the lexing state. The scope will be
1163 * ended in S_sublex_done. Returns a '(', starting the function arguments
1164 * to the uc, lc, etc. found before.
1165 * Sets PL_lex_state to LEX_INTERPCONCAT.
1166 */
1167
1168STATIC I32
1169S_sublex_push(pTHX)
1170{
1171 ENTER;
1172
1173 PL_lex_state = PL_sublex_info.super_state;
1174 SAVEI32(PL_lex_dojoin);
1175 SAVEI32(PL_lex_brackets);
1176 SAVEI32(PL_lex_casemods);
1177 SAVEI32(PL_lex_starts);
1178 SAVEI32(PL_lex_state);
1179 SAVEVPTR(PL_lex_inpat);
1180 SAVEI32(PL_lex_inwhat);
1181 SAVECOPLINE(PL_curcop);
1182 SAVEPPTR(PL_bufptr);
1183 SAVEPPTR(PL_bufend);
1184 SAVEPPTR(PL_oldbufptr);
1185 SAVEPPTR(PL_oldoldbufptr);
1186 SAVEPPTR(PL_last_lop);
1187 SAVEPPTR(PL_last_uni);
1188 SAVEPPTR(PL_linestart);
1189 SAVESPTR(PL_linestr);
1190 SAVEGENERICPV(PL_lex_brackstack);
1191 SAVEGENERICPV(PL_lex_casestack);
1192
1193 PL_linestr = PL_lex_stuff;
1194 PL_lex_stuff = Nullsv;
1195
1196 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1197 = SvPVX(PL_linestr);
1198 PL_bufend += SvCUR(PL_linestr);
1199 PL_last_lop = PL_last_uni = Nullch;
1200 SAVEFREESV(PL_linestr);
1201
1202 PL_lex_dojoin = FALSE;
1203 PL_lex_brackets = 0;
1204 New(899, PL_lex_brackstack, 120, char);
1205 New(899, PL_lex_casestack, 12, char);
1206 PL_lex_casemods = 0;
1207 *PL_lex_casestack = '\0';
1208 PL_lex_starts = 0;
1209 PL_lex_state = LEX_INTERPCONCAT;
1210 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1211
1212 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1213 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1214 PL_lex_inpat = PL_sublex_info.sub_op;
1215 else
1216 PL_lex_inpat = Nullop;
1217
1218 return '(';
1219}
1220
1221/*
1222 * S_sublex_done
1223 * Restores lexer state after a S_sublex_push.
1224 */
1225
1226STATIC I32
1227S_sublex_done(pTHX)
1228{
1229 if (!PL_lex_starts++) {
1230 SV *sv = newSVpvn("",0);
1231 if (SvUTF8(PL_linestr))
1232 SvUTF8_on(sv);
1233 PL_expect = XOPERATOR;
1234 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1235 return THING;
1236 }
1237
1238 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1239 PL_lex_state = LEX_INTERPCASEMOD;
1240 return yylex();
1241 }
1242
1243 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1244 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1245 PL_linestr = PL_lex_repl;
1246 PL_lex_inpat = 0;
1247 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1248 PL_bufend += SvCUR(PL_linestr);
1249 PL_last_lop = PL_last_uni = Nullch;
1250 SAVEFREESV(PL_linestr);
1251 PL_lex_dojoin = FALSE;
1252 PL_lex_brackets = 0;
1253 PL_lex_casemods = 0;
1254 *PL_lex_casestack = '\0';
1255 PL_lex_starts = 0;
1256 if (SvEVALED(PL_lex_repl)) {
1257 PL_lex_state = LEX_INTERPNORMAL;
1258 PL_lex_starts++;
1259 /* we don't clear PL_lex_repl here, so that we can check later
1260 whether this is an evalled subst; that means we rely on the
1261 logic to ensure sublex_done() is called again only via the
1262 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1263 }
1264 else {
1265 PL_lex_state = LEX_INTERPCONCAT;
1266 PL_lex_repl = Nullsv;
1267 }
1268 return ',';
1269 }
1270 else {
1271 LEAVE;
1272 PL_bufend = SvPVX(PL_linestr);
1273 PL_bufend += SvCUR(PL_linestr);
1274 PL_expect = XOPERATOR;
1275 PL_sublex_info.sub_inwhat = 0;
1276 return ')';
1277 }
1278}
1279
1280/*
1281 scan_const
1282
1283 Extracts a pattern, double-quoted string, or transliteration. This
1284 is terrifying code.
1285
1286 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1287 processing a pattern (PL_lex_inpat is true), a transliteration
1288 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1289
1290 Returns a pointer to the character scanned up to. Iff this is
1291 advanced from the start pointer supplied (ie if anything was
1292 successfully parsed), will leave an OP for the substring scanned
1293 in yylval. Caller must intuit reason for not parsing further
1294 by looking at the next characters herself.
1295
1296 In patterns:
1297 backslashes:
1298 double-quoted style: \r and \n
1299 regexp special ones: \D \s
1300 constants: \x3
1301 backrefs: \1 (deprecated in substitution replacements)
1302 case and quoting: \U \Q \E
1303 stops on @ and $, but not for $ as tail anchor
1304
1305 In transliterations:
1306 characters are VERY literal, except for - not at the start or end
1307 of the string, which indicates a range. scan_const expands the
1308 range to the full set of intermediate characters.
1309
1310 In double-quoted strings:
1311 backslashes:
1312 double-quoted style: \r and \n
1313 constants: \x3
1314 backrefs: \1 (deprecated)
1315 case and quoting: \U \Q \E
1316 stops on @ and $
1317
1318 scan_const does *not* construct ops to handle interpolated strings.
1319 It stops processing as soon as it finds an embedded $ or @ variable
1320 and leaves it to the caller to work out what's going on.
1321
1322 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1323
1324 $ in pattern could be $foo or could be tail anchor. Assumption:
1325 it's a tail anchor if $ is the last thing in the string, or if it's
1326 followed by one of ")| \n\t"
1327
1328 \1 (backreferences) are turned into $1
1329
1330 The structure of the code is
1331 while (there's a character to process) {
1332 handle transliteration ranges
1333 skip regexp comments
1334 skip # initiated comments in //x patterns
1335 check for embedded @foo
1336 check for embedded scalars
1337 if (backslash) {
1338 leave intact backslashes from leave (below)
1339 deprecate \1 in strings and sub replacements
1340 handle string-changing backslashes \l \U \Q \E, etc.
1341 switch (what was escaped) {
1342 handle - in a transliteration (becomes a literal -)
1343 handle \132 octal characters
1344 handle 0x15 hex characters
1345 handle \cV (control V)
1346 handle printf backslashes (\f, \r, \n, etc)
1347 } (end switch)
1348 } (end if backslash)
1349 } (end while character to read)
1350
1351*/
1352
1353STATIC char *
1354S_scan_const(pTHX_ char *start)
1355{
1356 register char *send = PL_bufend; /* end of the constant */
1357 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1358 register char *s = start; /* start of the constant */
1359 register char *d = SvPVX(sv); /* destination for copies */
1360 bool dorange = FALSE; /* are we in a translit range? */
1361 bool didrange = FALSE; /* did we just finish a range? */
1362 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1363 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1364 UV uv;
1365
1366 const char *leaveit = /* set of acceptably-backslashed characters */
1367 PL_lex_inpat
1368 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1369 : "";
1370
1371 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1372 /* If we are doing a trans and we know we want UTF8 set expectation */
1373 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1374 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1375 }
1376
1377
1378 while (s < send || dorange) {
1379 /* get transliterations out of the way (they're most literal) */
1380 if (PL_lex_inwhat == OP_TRANS) {
1381 /* expand a range A-Z to the full set of characters. AIE! */
1382 if (dorange) {
1383 I32 i; /* current expanded character */
1384 I32 min; /* first character in range */
1385 I32 max; /* last character in range */
1386
1387 if (has_utf8) {
1388 char *c = (char*)utf8_hop((U8*)d, -1);
1389 char *e = d++;
1390 while (e-- > c)
1391 *(e + 1) = *e;
1392 *c = (char)UTF_TO_NATIVE(0xff);
1393 /* mark the range as done, and continue */
1394 dorange = FALSE;
1395 didrange = TRUE;
1396 continue;
1397 }
1398
1399 i = d - SvPVX(sv); /* remember current offset */
1400 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1401 d = SvPVX(sv) + i; /* refresh d after realloc */
1402 d -= 2; /* eat the first char and the - */
1403
1404 min = (U8)*d; /* first char in range */
1405 max = (U8)d[1]; /* last char in range */
1406
1407 if (min > max) {
1408 Perl_croak(aTHX_
1409 "Invalid range \"%c-%c\" in transliteration operator",
1410 (char)min, (char)max);
1411 }
1412
1413#ifdef EBCDIC
1414 if ((isLOWER(min) && isLOWER(max)) ||
1415 (isUPPER(min) && isUPPER(max))) {
1416 if (isLOWER(min)) {
1417 for (i = min; i <= max; i++)
1418 if (isLOWER(i))
1419 *d++ = NATIVE_TO_NEED(has_utf8,i);
1420 } else {
1421 for (i = min; i <= max; i++)
1422 if (isUPPER(i))
1423 *d++ = NATIVE_TO_NEED(has_utf8,i);
1424 }
1425 }
1426 else
1427#endif
1428 for (i = min; i <= max; i++)
1429 *d++ = (char)i;
1430
1431 /* mark the range as done, and continue */
1432 dorange = FALSE;
1433 didrange = TRUE;
1434 continue;
1435 }
1436
1437 /* range begins (ignore - as first or last char) */
1438 else if (*s == '-' && s+1 < send && s != start) {
1439 if (didrange) {
1440 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1441 }
1442 if (has_utf8) {
1443 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1444 s++;
1445 continue;
1446 }
1447 dorange = TRUE;
1448 s++;
1449 }
1450 else {
1451 didrange = FALSE;
1452 }
1453 }
1454
1455 /* if we get here, we're not doing a transliteration */
1456
1457 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1458 except for the last char, which will be done separately. */
1459 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1460 if (s[2] == '#') {
1461 while (s+1 < send && *s != ')')
1462 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1463 }
1464 else if (s[2] == '{' /* This should match regcomp.c */
1465 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1466 {
1467 I32 count = 1;
1468 char *regparse = s + (s[2] == '{' ? 3 : 4);
1469 char c;
1470
1471 while (count && (c = *regparse)) {
1472 if (c == '\\' && regparse[1])
1473 regparse++;
1474 else if (c == '{')
1475 count++;
1476 else if (c == '}')
1477 count--;
1478 regparse++;
1479 }
1480 if (*regparse != ')')
1481 regparse--; /* Leave one char for continuation. */
1482 while (s < regparse)
1483 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1484 }
1485 }
1486
1487 /* likewise skip #-initiated comments in //x patterns */
1488 else if (*s == '#' && PL_lex_inpat &&
1489 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1490 while (s+1 < send && *s != '\n')
1491 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1492 }
1493
1494 /* check for embedded arrays
1495 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1496 */
1497 else if (*s == '@' && s[1]
1498 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1499 break;
1500
1501 /* check for embedded scalars. only stop if we're sure it's a
1502 variable.
1503 */
1504 else if (*s == '$') {
1505 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1506 break;
1507 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1508 break; /* in regexp, $ might be tail anchor */
1509 }
1510
1511 /* End of else if chain - OP_TRANS rejoin rest */
1512
1513 /* backslashes */
1514 if (*s == '\\' && s+1 < send) {
1515 s++;
1516
1517 /* some backslashes we leave behind */
1518 if (*leaveit && *s && strchr(leaveit, *s)) {
1519 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1520 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1521 continue;
1522 }
1523
1524 /* deprecate \1 in strings and substitution replacements */
1525 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1526 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1527 {
1528 if (ckWARN(WARN_SYNTAX))
1529 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1530 *--s = '$';
1531 break;
1532 }
1533
1534 /* string-change backslash escapes */
1535 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1536 --s;
1537 break;
1538 }
1539
1540 /* if we get here, it's either a quoted -, or a digit */
1541 switch (*s) {
1542
1543 /* quoted - in transliterations */
1544 case '-':
1545 if (PL_lex_inwhat == OP_TRANS) {
1546 *d++ = *s++;
1547 continue;
1548 }
1549 /* FALL THROUGH */
1550 default:
1551 {
1552 if (ckWARN(WARN_MISC) &&
1553 isALNUM(*s) &&
1554 *s != '_')
1555 Perl_warner(aTHX_ packWARN(WARN_MISC),
1556 "Unrecognized escape \\%c passed through",
1557 *s);
1558 /* default action is to copy the quoted character */
1559 goto default_action;
1560 }
1561
1562 /* \132 indicates an octal constant */
1563 case '0': case '1': case '2': case '3':
1564 case '4': case '5': case '6': case '7':
1565 {
1566 I32 flags = 0;
1567 STRLEN len = 3;
1568 uv = grok_oct(s, &len, &flags, NULL);
1569 s += len;
1570 }
1571 goto NUM_ESCAPE_INSERT;
1572
1573 /* \x24 indicates a hex constant */
1574 case 'x':
1575 ++s;
1576 if (*s == '{') {
1577 char* e = strchr(s, '}');
1578 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1579 PERL_SCAN_DISALLOW_PREFIX;
1580 STRLEN len;
1581
1582 ++s;
1583 if (!e) {
1584 yyerror("Missing right brace on \\x{}");
1585 continue;
1586 }
1587 len = e - s;
1588 uv = grok_hex(s, &len, &flags, NULL);
1589 s = e + 1;
1590 }
1591 else {
1592 {
1593 STRLEN len = 2;
1594 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1595 uv = grok_hex(s, &len, &flags, NULL);
1596 s += len;
1597 }
1598 }
1599
1600 NUM_ESCAPE_INSERT:
1601 /* Insert oct or hex escaped character.
1602 * There will always enough room in sv since such
1603 * escapes will be longer than any UTF-8 sequence
1604 * they can end up as. */
1605
1606 /* We need to map to chars to ASCII before doing the tests
1607 to cover EBCDIC
1608 */
1609 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1610 if (!has_utf8 && uv > 255) {
1611 /* Might need to recode whatever we have
1612 * accumulated so far if it contains any
1613 * hibit chars.
1614 *
1615 * (Can't we keep track of that and avoid
1616 * this rescan? --jhi)
1617 */
1618 int hicount = 0;
1619 U8 *c;
1620 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1621 if (!NATIVE_IS_INVARIANT(*c)) {
1622 hicount++;
1623 }
1624 }
1625 if (hicount) {
1626 STRLEN offset = d - SvPVX(sv);
1627 U8 *src, *dst;
1628 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1629 src = (U8 *)d - 1;
1630 dst = src+hicount;
1631 d += hicount;
1632 while (src >= (U8 *)SvPVX(sv)) {
1633 if (!NATIVE_IS_INVARIANT(*src)) {
1634 U8 ch = NATIVE_TO_ASCII(*src);
1635 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1636 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1637 }
1638 else {
1639 *dst-- = *src;
1640 }
1641 src--;
1642 }
1643 }
1644 }
1645
1646 if (has_utf8 || uv > 255) {
1647 d = (char*)uvchr_to_utf8((U8*)d, uv);
1648 has_utf8 = TRUE;
1649 if (PL_lex_inwhat == OP_TRANS &&
1650 PL_sublex_info.sub_op) {
1651 PL_sublex_info.sub_op->op_private |=
1652 (PL_lex_repl ? OPpTRANS_FROM_UTF
1653 : OPpTRANS_TO_UTF);
1654 }
1655 }
1656 else {
1657 *d++ = (char)uv;
1658 }
1659 }
1660 else {
1661 *d++ = (char) uv;
1662 }
1663 continue;
1664
1665 /* \N{LATIN SMALL LETTER A} is a named character */
1666 case 'N':
1667 ++s;
1668 if (*s == '{') {
1669 char* e = strchr(s, '}');
1670 SV *res;
1671 STRLEN len;
1672 char *str;
1673
1674 if (!e) {
1675 yyerror("Missing right brace on \\N{}");
1676 e = s - 1;
1677 goto cont_scan;
1678 }
1679 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1680 /* \N{U+...} */
1681 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1682 PERL_SCAN_DISALLOW_PREFIX;
1683 s += 3;
1684 len = e - s;
1685 uv = grok_hex(s, &len, &flags, NULL);
1686 s = e + 1;
1687 goto NUM_ESCAPE_INSERT;
1688 }
1689 res = newSVpvn(s + 1, e - s - 1);
1690 res = new_constant( Nullch, 0, "charnames",
1691 res, Nullsv, "\\N{...}" );
1692 if (has_utf8)
1693 sv_utf8_upgrade(res);
1694 str = SvPV(res,len);
1695#ifdef EBCDIC_NEVER_MIND
1696 /* charnames uses pack U and that has been
1697 * recently changed to do the below uni->native
1698 * mapping, so this would be redundant (and wrong,
1699 * the code point would be doubly converted).
1700 * But leave this in just in case the pack U change
1701 * gets revoked, but the semantics is still
1702 * desireable for charnames. --jhi */
1703 {
1704 UV uv = utf8_to_uvchr((U8*)str, 0);
1705
1706 if (uv < 0x100) {
1707 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1708
1709 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1710 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1711 str = SvPV(res, len);
1712 }
1713 }
1714#endif
1715 if (!has_utf8 && SvUTF8(res)) {
1716 char *ostart = SvPVX(sv);
1717 SvCUR_set(sv, d - ostart);
1718 SvPOK_on(sv);
1719 *d = '\0';
1720 sv_utf8_upgrade(sv);
1721 /* this just broke our allocation above... */
1722 SvGROW(sv, (STRLEN)(send - start));
1723 d = SvPVX(sv) + SvCUR(sv);
1724 has_utf8 = TRUE;
1725 }
1726 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1727 char *odest = SvPVX(sv);
1728
1729 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1730 d = SvPVX(sv) + (d - odest);
1731 }
1732 Copy(str, d, len, char);
1733 d += len;
1734 SvREFCNT_dec(res);
1735 cont_scan:
1736 s = e + 1;
1737 }
1738 else
1739 yyerror("Missing braces on \\N{}");
1740 continue;
1741
1742 /* \c is a control character */
1743 case 'c':
1744 s++;
1745 if (s < send) {
1746 U8 c = *s++;
1747#ifdef EBCDIC
1748 if (isLOWER(c))
1749 c = toUPPER(c);
1750#endif
1751 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1752 }
1753 else {
1754 yyerror("Missing control char name in \\c");
1755 }
1756 continue;
1757
1758 /* printf-style backslashes, formfeeds, newlines, etc */
1759 case 'b':
1760 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1761 break;
1762 case 'n':
1763 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1764 break;
1765 case 'r':
1766 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1767 break;
1768 case 'f':
1769 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1770 break;
1771 case 't':
1772 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1773 break;
1774 case 'e':
1775 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1776 break;
1777 case 'a':
1778 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1779 break;
1780 } /* end switch */
1781
1782 s++;
1783 continue;
1784 } /* end if (backslash) */
1785
1786 default_action:
1787 /* If we started with encoded form, or already know we want it
1788 and then encode the next character */
1789 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1790 STRLEN len = 1;
1791 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1792 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1793 s += len;
1794 if (need > len) {
1795 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1796 STRLEN off = d - SvPVX(sv);
1797 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1798 }
1799 d = (char*)uvchr_to_utf8((U8*)d, uv);
1800 has_utf8 = TRUE;
1801 }
1802 else {
1803 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1804 }
1805 } /* while loop to process each character */
1806
1807 /* terminate the string and set up the sv */
1808 *d = '\0';
1809 SvCUR_set(sv, d - SvPVX(sv));
1810 if (SvCUR(sv) >= SvLEN(sv))
1811 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1812
1813 SvPOK_on(sv);
1814 if (PL_encoding && !has_utf8) {
1815 sv_recode_to_utf8(sv, PL_encoding);
1816 if (SvUTF8(sv))
1817 has_utf8 = TRUE;
1818 }
1819 if (has_utf8) {
1820 SvUTF8_on(sv);
1821 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1822 PL_sublex_info.sub_op->op_private |=
1823 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1824 }
1825 }
1826
1827 /* shrink the sv if we allocated more than we used */
1828 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1829 SvLEN_set(sv, SvCUR(sv) + 1);
1830 Renew(SvPVX(sv), SvLEN(sv), char);
1831 }
1832
1833 /* return the substring (via yylval) only if we parsed anything */
1834 if (s > PL_bufptr) {
1835 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1836 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1837 sv, Nullsv,
1838 ( PL_lex_inwhat == OP_TRANS
1839 ? "tr"
1840 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1841 ? "s"
1842 : "qq")));
1843 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1844 } else
1845 SvREFCNT_dec(sv);
1846 return s;
1847}
1848
1849/* S_intuit_more
1850 * Returns TRUE if there's more to the expression (e.g., a subscript),
1851 * FALSE otherwise.
1852 *
1853 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1854 *
1855 * ->[ and ->{ return TRUE
1856 * { and [ outside a pattern are always subscripts, so return TRUE
1857 * if we're outside a pattern and it's not { or [, then return FALSE
1858 * if we're in a pattern and the first char is a {
1859 * {4,5} (any digits around the comma) returns FALSE
1860 * if we're in a pattern and the first char is a [
1861 * [] returns FALSE
1862 * [SOMETHING] has a funky algorithm to decide whether it's a
1863 * character class or not. It has to deal with things like
1864 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1865 * anything else returns TRUE
1866 */
1867
1868/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1869
1870STATIC int
1871S_intuit_more(pTHX_ register char *s)
1872{
1873 if (PL_lex_brackets)
1874 return TRUE;
1875 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1876 return TRUE;
1877 if (*s != '{' && *s != '[')
1878 return FALSE;
1879 if (!PL_lex_inpat)
1880 return TRUE;
1881
1882 /* In a pattern, so maybe we have {n,m}. */
1883 if (*s == '{') {
1884 s++;
1885 if (!isDIGIT(*s))
1886 return TRUE;
1887 while (isDIGIT(*s))
1888 s++;
1889 if (*s == ',')
1890 s++;
1891 while (isDIGIT(*s))
1892 s++;
1893 if (*s == '}')
1894 return FALSE;
1895 return TRUE;
1896
1897 }
1898
1899 /* On the other hand, maybe we have a character class */
1900
1901 s++;
1902 if (*s == ']' || *s == '^')
1903 return FALSE;
1904 else {
1905 /* this is terrifying, and it works */
1906 int weight = 2; /* let's weigh the evidence */
1907 char seen[256];
1908 unsigned char un_char = 255, last_un_char;
1909 char *send = strchr(s,']');
1910 char tmpbuf[sizeof PL_tokenbuf * 4];
1911
1912 if (!send) /* has to be an expression */
1913 return TRUE;
1914
1915 Zero(seen,256,char);
1916 if (*s == '$')
1917 weight -= 3;
1918 else if (isDIGIT(*s)) {
1919 if (s[1] != ']') {
1920 if (isDIGIT(s[1]) && s[2] == ']')
1921 weight -= 10;
1922 }
1923 else
1924 weight -= 100;
1925 }
1926 for (; s < send; s++) {
1927 last_un_char = un_char;
1928 un_char = (unsigned char)*s;
1929 switch (*s) {
1930 case '@':
1931 case '&':
1932 case '$':
1933 weight -= seen[un_char] * 10;
1934 if (isALNUM_lazy_if(s+1,UTF)) {
1935 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1936 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1937 weight -= 100;
1938 else
1939 weight -= 10;
1940 }
1941 else if (*s == '$' && s[1] &&
1942 strchr("[#!%*<>()-=",s[1])) {
1943 if (/*{*/ strchr("])} =",s[2]))
1944 weight -= 10;
1945 else
1946 weight -= 1;
1947 }
1948 break;
1949 case '\\':
1950 un_char = 254;
1951 if (s[1]) {
1952 if (strchr("wds]",s[1]))
1953 weight += 100;
1954 else if (seen['\''] || seen['"'])
1955 weight += 1;
1956 else if (strchr("rnftbxcav",s[1]))
1957 weight += 40;
1958 else if (isDIGIT(s[1])) {
1959 weight += 40;
1960 while (s[1] && isDIGIT(s[1]))
1961 s++;
1962 }
1963 }
1964 else
1965 weight += 100;
1966 break;
1967 case '-':
1968 if (s[1] == '\\')
1969 weight += 50;
1970 if (strchr("aA01! ",last_un_char))
1971 weight += 30;
1972 if (strchr("zZ79~",s[1]))
1973 weight += 30;
1974 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1975 weight -= 5; /* cope with negative subscript */
1976 break;
1977 default:
1978 if (!isALNUM(last_un_char)
1979 && !(last_un_char == '$' || last_un_char == '@'
1980 || last_un_char == '&')
1981 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
1982 char *d = tmpbuf;
1983 while (isALPHA(*s))
1984 *d++ = *s++;
1985 *d = '\0';
1986 if (keyword(tmpbuf, d - tmpbuf))
1987 weight -= 150;
1988 }
1989 if (un_char == last_un_char + 1)
1990 weight += 5;
1991 weight -= seen[un_char];
1992 break;
1993 }
1994 seen[un_char]++;
1995 }
1996 if (weight >= 0) /* probably a character class */
1997 return FALSE;
1998 }
1999
2000 return TRUE;
2001}
2002
2003/*
2004 * S_intuit_method
2005 *
2006 * Does all the checking to disambiguate
2007 * foo bar
2008 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2009 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2010 *
2011 * First argument is the stuff after the first token, e.g. "bar".
2012 *
2013 * Not a method if bar is a filehandle.
2014 * Not a method if foo is a subroutine prototyped to take a filehandle.
2015 * Not a method if it's really "Foo $bar"
2016 * Method if it's "foo $bar"
2017 * Not a method if it's really "print foo $bar"
2018 * Method if it's really "foo package::" (interpreted as package->foo)
2019 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2020 * Not a method if bar is a filehandle or package, but is quoted with
2021 * =>
2022 */
2023
2024STATIC int
2025S_intuit_method(pTHX_ char *start, GV *gv)
2026{
2027 char *s = start + (*start == '$');
2028 char tmpbuf[sizeof PL_tokenbuf];
2029 STRLEN len;
2030 GV* indirgv;
2031
2032 if (gv) {
2033 CV *cv;
2034 if (GvIO(gv))
2035 return 0;
2036 if ((cv = GvCVu(gv))) {
2037 char *proto = SvPVX(cv);
2038 if (proto) {
2039 if (*proto == ';')
2040 proto++;
2041 if (*proto == '*')
2042 return 0;
2043 }
2044 } else
2045 gv = 0;
2046 }
2047 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2048 /* start is the beginning of the possible filehandle/object,
2049 * and s is the end of it
2050 * tmpbuf is a copy of it
2051 */
2052
2053 if (*start == '$') {
2054 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2055 return 0;
2056 s = skipspace(s);
2057 PL_bufptr = start;
2058 PL_expect = XREF;
2059 return *s == '(' ? FUNCMETH : METHOD;
2060 }
2061 if (!keyword(tmpbuf, len)) {
2062 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2063 len -= 2;
2064 tmpbuf[len] = '\0';
2065 goto bare_package;
2066 }
2067 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2068 if (indirgv && GvCVu(indirgv))
2069 return 0;
2070 /* filehandle or package name makes it a method */
2071 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2072 s = skipspace(s);
2073 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2074 return 0; /* no assumptions -- "=>" quotes bearword */
2075 bare_package:
2076 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2077 newSVpvn(tmpbuf,len));
2078 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2079 PL_expect = XTERM;
2080 force_next(WORD);
2081 PL_bufptr = s;
2082 return *s == '(' ? FUNCMETH : METHOD;
2083 }
2084 }
2085 return 0;
2086}
2087
2088/*
2089 * S_incl_perldb
2090 * Return a string of Perl code to load the debugger. If PERL5DB
2091 * is set, it will return the contents of that, otherwise a
2092 * compile-time require of perl5db.pl.
2093 */
2094
2095STATIC char*
2096S_incl_perldb(pTHX)
2097{
2098 if (PL_perldb) {
2099 char *pdb = PerlEnv_getenv("PERL5DB");
2100
2101 if (pdb)
2102 return pdb;
2103 SETERRNO(0,SS_NORMAL);
2104 return "BEGIN { require 'perl5db.pl' }";
2105 }
2106 return "";
2107}
2108
2109
2110/* Encoded script support. filter_add() effectively inserts a
2111 * 'pre-processing' function into the current source input stream.
2112 * Note that the filter function only applies to the current source file
2113 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2114 *
2115 * The datasv parameter (which may be NULL) can be used to pass
2116 * private data to this instance of the filter. The filter function
2117 * can recover the SV using the FILTER_DATA macro and use it to
2118 * store private buffers and state information.
2119 *
2120 * The supplied datasv parameter is upgraded to a PVIO type
2121 * and the IoDIRP/IoANY field is used to store the function pointer,
2122 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2123 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2124 * private use must be set using malloc'd pointers.
2125 */
2126
2127SV *
2128Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2129{
2130 if (!funcp)
2131 return Nullsv;
2132
2133 if (!PL_rsfp_filters)
2134 PL_rsfp_filters = newAV();
2135 if (!datasv)
2136 datasv = NEWSV(255,0);
2137 if (!SvUPGRADE(datasv, SVt_PVIO))
2138 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
2139 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
2140 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2141 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2142 (void*)funcp, SvPV_nolen(datasv)));
2143 av_unshift(PL_rsfp_filters, 1);
2144 av_store(PL_rsfp_filters, 0, datasv) ;
2145 return(datasv);
2146}
2147
2148
2149/* Delete most recently added instance of this filter function. */
2150void
2151Perl_filter_del(pTHX_ filter_t funcp)
2152{
2153 SV *datasv;
2154 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
2155 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2156 return;
2157 /* if filter is on top of stack (usual case) just pop it off */
2158 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2159 if (IoANY(datasv) == (void *)funcp) {
2160 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2161 IoANY(datasv) = (void *)NULL;
2162 sv_free(av_pop(PL_rsfp_filters));
2163
2164 return;
2165 }
2166 /* we need to search for the correct entry and clear it */
2167 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2168}
2169
2170
2171/* Invoke the idxth filter function for the current rsfp. */
2172/* maxlen 0 = read one text line */
2173I32
2174Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2175{
2176 filter_t funcp;
2177 SV *datasv = NULL;
2178
2179 if (!PL_rsfp_filters)
2180 return -1;
2181 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2182 /* Provide a default input filter to make life easy. */
2183 /* Note that we append to the line. This is handy. */
2184 DEBUG_P(PerlIO_printf(Perl_debug_log,
2185 "filter_read %d: from rsfp\n", idx));
2186 if (maxlen) {
2187 /* Want a block */
2188 int len ;
2189 int old_len = SvCUR(buf_sv) ;
2190
2191 /* ensure buf_sv is large enough */
2192 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2193 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2194 if (PerlIO_error(PL_rsfp))
2195 return -1; /* error */
2196 else
2197 return 0 ; /* end of file */
2198 }
2199 SvCUR_set(buf_sv, old_len + len) ;
2200 } else {
2201 /* Want a line */
2202 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2203 if (PerlIO_error(PL_rsfp))
2204 return -1; /* error */
2205 else
2206 return 0 ; /* end of file */
2207 }
2208 }
2209 return SvCUR(buf_sv);
2210 }
2211 /* Skip this filter slot if filter has been deleted */
2212 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2213 DEBUG_P(PerlIO_printf(Perl_debug_log,
2214 "filter_read %d: skipped (filter deleted)\n",
2215 idx));
2216 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2217 }
2218 /* Get function pointer hidden within datasv */
2219 funcp = (filter_t)IoANY(datasv);
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: via function %p (%s)\n",
2222 idx, (void*)funcp, SvPV_nolen(datasv)));
2223 /* Call function. The function is expected to */
2224 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2225 /* Return: <0:error, =0:eof, >0:not eof */
2226 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2227}
2228
2229STATIC char *
2230S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2231{
2232#ifdef PERL_CR_FILTER
2233 if (!PL_rsfp_filters) {
2234 filter_add(S_cr_textfilter,NULL);
2235 }
2236#endif
2237 if (PL_rsfp_filters) {
2238 if (!append)
2239 SvCUR_set(sv, 0); /* start with empty line */
2240 if (FILTER_READ(0, sv, 0) > 0)
2241 return ( SvPVX(sv) ) ;
2242 else
2243 return Nullch ;
2244 }
2245 else
2246 return (sv_gets(sv, fp, append));
2247}
2248
2249STATIC HV *
2250S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
2251{
2252 GV *gv;
2253
2254 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2255 return PL_curstash;
2256
2257 if (len > 2 &&
2258 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2259 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2260 {
2261 return GvHV(gv); /* Foo:: */
2262 }
2263
2264 /* use constant CLASS => 'MyClass' */
2265 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2266 SV *sv;
2267 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2268 pkgname = SvPV_nolen(sv);
2269 }
2270 }
2271
2272 return gv_stashpv(pkgname, FALSE);
2273}
2274
2275#ifdef DEBUGGING
2276 static char* exp_name[] =
2277 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2278 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2279 };
2280#endif
2281
2282/*
2283 yylex
2284
2285 Works out what to call the token just pulled out of the input
2286 stream. The yacc parser takes care of taking the ops we return and
2287 stitching them into a tree.
2288
2289 Returns:
2290 PRIVATEREF
2291
2292 Structure:
2293 if read an identifier
2294 if we're in a my declaration
2295 croak if they tried to say my($foo::bar)
2296 build the ops for a my() declaration
2297 if it's an access to a my() variable
2298 are we in a sort block?
2299 croak if my($a); $a <=> $b
2300 build ops for access to a my() variable
2301 if in a dq string, and they've said @foo and we can't find @foo
2302 croak
2303 build ops for a bareword
2304 if we already built the token before, use it.
2305*/
2306
2307
2308#ifdef __SC__
2309#pragma segment Perl_yylex
2310#endif
2311int
2312Perl_yylex(pTHX)
2313{
2314 register char *s = PL_bufptr;
2315 register char *d;
2316 register I32 tmp;
2317 STRLEN len;
2318 GV *gv = Nullgv;
2319 GV **gvp = 0;
2320 bool bof = FALSE;
2321 I32 orig_keyword = 0;
2322
2323 DEBUG_T( {
2324 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2325 lex_state_names[PL_lex_state]);
2326 } );
2327 /* check if there's an identifier for us to look at */
2328 if (PL_pending_ident)
2329 return REPORT(S_pending_ident(aTHX));
2330
2331 /* no identifier pending identification */
2332
2333 switch (PL_lex_state) {
2334#ifdef COMMENTARY
2335 case LEX_NORMAL: /* Some compilers will produce faster */
2336 case LEX_INTERPNORMAL: /* code if we comment these out. */
2337 break;
2338#endif
2339
2340 /* when we've already built the next token, just pull it out of the queue */
2341 case LEX_KNOWNEXT:
2342 PL_nexttoke--;
2343 yylval = PL_nextval[PL_nexttoke];
2344 if (!PL_nexttoke) {
2345 PL_lex_state = PL_lex_defer;
2346 PL_expect = PL_lex_expect;
2347 PL_lex_defer = LEX_NORMAL;
2348 }
2349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2350 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2351 (IV)PL_nexttype[PL_nexttoke]); });
2352
2353 return REPORT(PL_nexttype[PL_nexttoke]);
2354
2355 /* interpolated case modifiers like \L \U, including \Q and \E.
2356 when we get here, PL_bufptr is at the \
2357 */
2358 case LEX_INTERPCASEMOD:
2359#ifdef DEBUGGING
2360 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2361 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2362#endif
2363 /* handle \E or end of string */
2364 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2365 char oldmod;
2366
2367 /* if at a \E */
2368 if (PL_lex_casemods) {
2369 oldmod = PL_lex_casestack[--PL_lex_casemods];
2370 PL_lex_casestack[PL_lex_casemods] = '\0';
2371
2372 if (PL_bufptr != PL_bufend
2373 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2374 PL_bufptr += 2;
2375 PL_lex_state = LEX_INTERPCONCAT;
2376 }
2377 return REPORT(')');
2378 }
2379 if (PL_bufptr != PL_bufend)
2380 PL_bufptr += 2;
2381 PL_lex_state = LEX_INTERPCONCAT;
2382 return yylex();
2383 }
2384 else {
2385 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2386 "### Saw case modifier at '%s'\n", PL_bufptr); });
2387 s = PL_bufptr + 1;
2388 if (s[1] == '\\' && s[2] == 'E') {
2389 PL_bufptr = s + 3;
2390 PL_lex_state = LEX_INTERPCONCAT;
2391 return yylex();
2392 }
2393 else {
2394 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2395 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2396 if ((*s == 'L' || *s == 'U') &&
2397 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2398 PL_lex_casestack[--PL_lex_casemods] = '\0';
2399 return REPORT(')');
2400 }
2401 if (PL_lex_casemods > 10)
2402 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2403 PL_lex_casestack[PL_lex_casemods++] = *s;
2404 PL_lex_casestack[PL_lex_casemods] = '\0';
2405 PL_lex_state = LEX_INTERPCONCAT;
2406 PL_nextval[PL_nexttoke].ival = 0;
2407 force_next('(');
2408 if (*s == 'l')
2409 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2410 else if (*s == 'u')
2411 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2412 else if (*s == 'L')
2413 PL_nextval[PL_nexttoke].ival = OP_LC;
2414 else if (*s == 'U')
2415 PL_nextval[PL_nexttoke].ival = OP_UC;
2416 else if (*s == 'Q')
2417 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2418 else
2419 Perl_croak(aTHX_ "panic: yylex");
2420 PL_bufptr = s + 1;
2421 }
2422 force_next(FUNC);
2423 if (PL_lex_starts) {
2424 s = PL_bufptr;
2425 PL_lex_starts = 0;
2426 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2427 if (PL_lex_casemods == 1 && PL_lex_inpat)
2428 OPERATOR(',');
2429 else
2430 Aop(OP_CONCAT);
2431 }
2432 else
2433 return yylex();
2434 }
2435
2436 case LEX_INTERPPUSH:
2437 return REPORT(sublex_push());
2438
2439 case LEX_INTERPSTART:
2440 if (PL_bufptr == PL_bufend)
2441 return REPORT(sublex_done());
2442 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2443 "### Interpolated variable at '%s'\n", PL_bufptr); });
2444 PL_expect = XTERM;
2445 PL_lex_dojoin = (*PL_bufptr == '@');
2446 PL_lex_state = LEX_INTERPNORMAL;
2447 if (PL_lex_dojoin) {
2448 PL_nextval[PL_nexttoke].ival = 0;
2449 force_next(',');
2450 force_ident("\"", '$');
2451 PL_nextval[PL_nexttoke].ival = 0;
2452 force_next('$');
2453 PL_nextval[PL_nexttoke].ival = 0;
2454 force_next('(');
2455 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2456 force_next(FUNC);
2457 }
2458 if (PL_lex_starts++) {
2459 s = PL_bufptr;
2460 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2461 if (!PL_lex_casemods && PL_lex_inpat)
2462 OPERATOR(',');
2463 else
2464 Aop(OP_CONCAT);
2465 }
2466 return yylex();
2467
2468 case LEX_INTERPENDMAYBE:
2469 if (intuit_more(PL_bufptr)) {
2470 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2471 break;
2472 }
2473 /* FALL THROUGH */
2474
2475 case LEX_INTERPEND:
2476 if (PL_lex_dojoin) {
2477 PL_lex_dojoin = FALSE;
2478 PL_lex_state = LEX_INTERPCONCAT;
2479 return REPORT(')');
2480 }
2481 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2482 && SvEVALED(PL_lex_repl))
2483 {
2484 if (PL_bufptr != PL_bufend)
2485 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2486 PL_lex_repl = Nullsv;
2487 }
2488 /* FALLTHROUGH */
2489 case LEX_INTERPCONCAT:
2490#ifdef DEBUGGING
2491 if (PL_lex_brackets)
2492 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2493#endif
2494 if (PL_bufptr == PL_bufend)
2495 return REPORT(sublex_done());
2496
2497 if (SvIVX(PL_linestr) == '\'') {
2498 SV *sv = newSVsv(PL_linestr);
2499 if (!PL_lex_inpat)
2500 sv = tokeq(sv);
2501 else if ( PL_hints & HINT_NEW_RE )
2502 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2503 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2504 s = PL_bufend;
2505 }
2506 else {
2507 s = scan_const(PL_bufptr);
2508 if (*s == '\\')
2509 PL_lex_state = LEX_INTERPCASEMOD;
2510 else
2511 PL_lex_state = LEX_INTERPSTART;
2512 }
2513
2514 if (s != PL_bufptr) {
2515 PL_nextval[PL_nexttoke] = yylval;
2516 PL_expect = XTERM;
2517 force_next(THING);
2518 if (PL_lex_starts++) {
2519 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2520 if (!PL_lex_casemods && PL_lex_inpat)
2521 OPERATOR(',');
2522 else
2523 Aop(OP_CONCAT);
2524 }
2525 else {
2526 PL_bufptr = s;
2527 return yylex();
2528 }
2529 }
2530
2531 return yylex();
2532 case LEX_FORMLINE:
2533 PL_lex_state = LEX_NORMAL;
2534 s = scan_formline(PL_bufptr);
2535 if (!PL_lex_formbrack)
2536 goto rightbracket;
2537 OPERATOR(';');
2538 }
2539
2540 s = PL_bufptr;
2541 PL_oldoldbufptr = PL_oldbufptr;
2542 PL_oldbufptr = s;
2543 DEBUG_T( {
2544 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2545 exp_name[PL_expect], s);
2546 } );
2547
2548 retry:
2549 switch (*s) {
2550 default:
2551 if (isIDFIRST_lazy_if(s,UTF))
2552 goto keylookup;
2553 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2554 case 4:
2555 case 26:
2556 goto fake_eof; /* emulate EOF on ^D or ^Z */
2557 case 0:
2558 if (!PL_rsfp) {
2559 PL_last_uni = 0;
2560 PL_last_lop = 0;
2561 if (PL_lex_brackets) {
2562 if (PL_lex_formbrack)
2563 yyerror("Format not terminated");
2564 else
2565 yyerror("Missing right curly or square bracket");
2566 }
2567 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2568 "### Tokener got EOF\n");
2569 } );
2570 TOKEN(0);
2571 }
2572 if (s++ < PL_bufend)
2573 goto retry; /* ignore stray nulls */
2574 PL_last_uni = 0;
2575 PL_last_lop = 0;
2576 if (!PL_in_eval && !PL_preambled) {
2577 PL_preambled = TRUE;
2578 sv_setpv(PL_linestr,incl_perldb());
2579 if (SvCUR(PL_linestr))
2580 sv_catpvn(PL_linestr,";", 1);
2581 if (PL_preambleav){
2582 while(AvFILLp(PL_preambleav) >= 0) {
2583 SV *tmpsv = av_shift(PL_preambleav);
2584 sv_catsv(PL_linestr, tmpsv);
2585 sv_catpvn(PL_linestr, ";", 1);
2586 sv_free(tmpsv);
2587 }
2588 sv_free((SV*)PL_preambleav);
2589 PL_preambleav = NULL;
2590 }
2591 if (PL_minus_n || PL_minus_p) {
2592 sv_catpv(PL_linestr, "LINE: while (<>) {");
2593 if (PL_minus_l)
2594 sv_catpv(PL_linestr,"chomp;");
2595 if (PL_minus_a) {
2596 if (PL_minus_F) {
2597 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2598 || *PL_splitstr == '"')
2599 && strchr(PL_splitstr + 1, *PL_splitstr))
2600 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2601 else {
2602 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2603 bytes can be used as quoting characters. :-) */
2604 /* The count here deliberately includes the NUL
2605 that terminates the C string constant. This
2606 embeds the opening NUL into the string. */
2607 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2608 s = PL_splitstr;
2609 do {
2610 /* Need to \ \s */
2611 if (*s == '\\')
2612 sv_catpvn(PL_linestr, s, 1);
2613 sv_catpvn(PL_linestr, s, 1);
2614 } while (*s++);
2615 /* This loop will embed the trailing NUL of
2616 PL_linestr as the last thing it does before
2617 terminating. */
2618 sv_catpvn(PL_linestr, ");", 2);
2619 }
2620 }
2621 else
2622 sv_catpv(PL_linestr,"our @F=split(' ');");
2623 }
2624 }
2625 sv_catpvn(PL_linestr, "\n", 1);
2626 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2627 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2628 PL_last_lop = PL_last_uni = Nullch;
2629 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2630 SV *sv = NEWSV(85,0);
2631
2632 sv_upgrade(sv, SVt_PVMG);
2633 sv_setsv(sv,PL_linestr);
2634 (void)SvIOK_on(sv);
2635 SvIVX(sv) = 0;
2636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2637 }
2638 goto retry;
2639 }
2640 do {
2641 bof = PL_rsfp ? TRUE : FALSE;
2642 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2643 fake_eof:
2644 if (PL_rsfp) {
2645 if (PL_preprocess && !PL_in_eval)
2646 (void)PerlProc_pclose(PL_rsfp);
2647 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2648 PerlIO_clearerr(PL_rsfp);
2649 else
2650 (void)PerlIO_close(PL_rsfp);
2651 PL_rsfp = Nullfp;
2652 PL_doextract = FALSE;
2653 }
2654 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2655 sv_setpv(PL_linestr,PL_minus_p
2656 ? ";}continue{print;}" : ";}");
2657 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2658 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2659 PL_last_lop = PL_last_uni = Nullch;
2660 PL_minus_n = PL_minus_p = 0;
2661 goto retry;
2662 }
2663 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2664 PL_last_lop = PL_last_uni = Nullch;
2665 sv_setpv(PL_linestr,"");
2666 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2667 }
2668 /* If it looks like the start of a BOM or raw UTF-16,
2669 * check if it in fact is. */
2670 else if (bof &&
2671 (*s == 0 ||
2672 *(U8*)s == 0xEF ||
2673 *(U8*)s >= 0xFE ||
2674 s[1] == 0)) {
2675#ifdef PERLIO_IS_STDIO
2676# ifdef __GNU_LIBRARY__
2677# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2678# define FTELL_FOR_PIPE_IS_BROKEN
2679# endif
2680# else
2681# ifdef __GLIBC__
2682# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2683# define FTELL_FOR_PIPE_IS_BROKEN
2684# endif
2685# endif
2686# endif
2687#endif
2688#ifdef FTELL_FOR_PIPE_IS_BROKEN
2689 /* This loses the possibility to detect the bof
2690 * situation on perl -P when the libc5 is being used.
2691 * Workaround? Maybe attach some extra state to PL_rsfp?
2692 */
2693 if (!PL_preprocess)
2694 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2695#else
2696 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2697#endif
2698 if (bof) {
2699 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2700 s = swallow_bom((U8*)s);
2701 }
2702 }
2703 if (PL_doextract) {
2704 /* Incest with pod. */
2705 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2706 sv_setpv(PL_linestr, "");
2707 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2708 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2709 PL_last_lop = PL_last_uni = Nullch;
2710 PL_doextract = FALSE;
2711 }
2712 }
2713 incline(s);
2714 } while (PL_doextract);
2715 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2716 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2717 SV *sv = NEWSV(85,0);
2718
2719 sv_upgrade(sv, SVt_PVMG);
2720 sv_setsv(sv,PL_linestr);
2721 (void)SvIOK_on(sv);
2722 SvIVX(sv) = 0;
2723 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2724 }
2725 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2726 PL_last_lop = PL_last_uni = Nullch;
2727 if (CopLINE(PL_curcop) == 1) {
2728 while (s < PL_bufend && isSPACE(*s))
2729 s++;
2730 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2731 s++;
2732 d = Nullch;
2733 if (!PL_in_eval) {
2734 if (*s == '#' && *(s+1) == '!')
2735 d = s + 2;
2736#ifdef ALTERNATE_SHEBANG
2737 else {
2738 static char as[] = ALTERNATE_SHEBANG;
2739 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2740 d = s + (sizeof(as) - 1);
2741 }
2742#endif /* ALTERNATE_SHEBANG */
2743 }
2744 if (d) {
2745 char *ipath;
2746 char *ipathend;
2747
2748 while (isSPACE(*d))
2749 d++;
2750 ipath = d;
2751 while (*d && !isSPACE(*d))
2752 d++;
2753 ipathend = d;
2754
2755#ifdef ARG_ZERO_IS_SCRIPT
2756 if (ipathend > ipath) {
2757 /*
2758 * HP-UX (at least) sets argv[0] to the script name,
2759 * which makes $^X incorrect. And Digital UNIX and Linux,
2760 * at least, set argv[0] to the basename of the Perl
2761 * interpreter. So, having found "#!", we'll set it right.
2762 */
2763 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2764 assert(SvPOK(x) || SvGMAGICAL(x));
2765 if (sv_eq(x, CopFILESV(PL_curcop))) {
2766 sv_setpvn(x, ipath, ipathend - ipath);
2767 SvSETMAGIC(x);
2768 }
2769 else {
2770 STRLEN blen;
2771 STRLEN llen;
2772 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2773 char *lstart = SvPV(x,llen);
2774 if (llen < blen) {
2775 bstart += blen - llen;
2776 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2777 sv_setpvn(x, ipath, ipathend - ipath);
2778 SvSETMAGIC(x);
2779 }
2780 }
2781 }
2782 TAINT_NOT; /* $^X is always tainted, but that's OK */
2783 }
2784#endif /* ARG_ZERO_IS_SCRIPT */
2785
2786 /*
2787 * Look for options.
2788 */
2789 d = instr(s,"perl -");
2790 if (!d) {
2791 d = instr(s,"perl");
2792#if defined(DOSISH)
2793 /* avoid getting into infinite loops when shebang
2794 * line contains "Perl" rather than "perl" */
2795 if (!d) {
2796 for (d = ipathend-4; d >= ipath; --d) {
2797 if ((*d == 'p' || *d == 'P')
2798 && !ibcmp(d, "perl", 4))
2799 {
2800 break;
2801 }
2802 }
2803 if (d < ipath)
2804 d = Nullch;
2805 }
2806#endif
2807 }
2808#ifdef ALTERNATE_SHEBANG
2809 /*
2810 * If the ALTERNATE_SHEBANG on this system starts with a
2811 * character that can be part of a Perl expression, then if
2812 * we see it but not "perl", we're probably looking at the
2813 * start of Perl code, not a request to hand off to some
2814 * other interpreter. Similarly, if "perl" is there, but
2815 * not in the first 'word' of the line, we assume the line
2816 * contains the start of the Perl program.
2817 */
2818 if (d && *s != '#') {
2819 char *c = ipath;
2820 while (*c && !strchr("; \t\r\n\f\v#", *c))
2821 c++;
2822 if (c < d)
2823 d = Nullch; /* "perl" not in first word; ignore */
2824 else
2825 *s = '#'; /* Don't try to parse shebang line */
2826 }
2827#endif /* ALTERNATE_SHEBANG */
2828#ifndef MACOS_TRADITIONAL
2829 if (!d &&
2830 *s == '#' &&
2831 ipathend > ipath &&
2832 !PL_minus_c &&
2833 !instr(s,"indir") &&
2834 instr(PL_origargv[0],"perl"))
2835 {
2836 char **newargv;
2837
2838 *ipathend = '\0';
2839 s = ipathend + 1;
2840 while (s < PL_bufend && isSPACE(*s))
2841 s++;
2842 if (s < PL_bufend) {
2843 Newz(899,newargv,PL_origargc+3,char*);
2844 newargv[1] = s;
2845 while (s < PL_bufend && !isSPACE(*s))
2846 s++;
2847 *s = '\0';
2848 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2849 }
2850 else
2851 newargv = PL_origargv;
2852 newargv[0] = ipath;
2853 PERL_FPU_PRE_EXEC
2854 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2855 PERL_FPU_POST_EXEC
2856 Perl_croak(aTHX_ "Can't exec %s", ipath);
2857 }
2858#endif
2859 if (d) {
2860 U32 oldpdb = PL_perldb;
2861 bool oldn = PL_minus_n;
2862 bool oldp = PL_minus_p;
2863
2864 while (*d && !isSPACE(*d)) d++;
2865 while (SPACE_OR_TAB(*d)) d++;
2866
2867 if (*d++ == '-') {
2868 bool switches_done = PL_doswitches;
2869 do {
2870 if (*d == 'M' || *d == 'm') {
2871 char *m = d;
2872 while (*d && !isSPACE(*d)) d++;
2873 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2874 (int)(d - m), m);
2875 }
2876 d = moreswitches(d);
2877 } while (d);
2878 if (PL_doswitches && !switches_done) {
2879 int argc = PL_origargc;
2880 char **argv = PL_origargv;
2881 do {
2882 argc--,argv++;
2883 } while (argc && argv[0][0] == '-' && argv[0][1]);
2884 init_argv_symbols(argc,argv);
2885 }
2886 if ((PERLDB_LINE && !oldpdb) ||
2887 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2888 /* if we have already added "LINE: while (<>) {",
2889 we must not do it again */
2890 {
2891 sv_setpv(PL_linestr, "");
2892 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2893 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2894 PL_last_lop = PL_last_uni = Nullch;
2895 PL_preambled = FALSE;
2896 if (PERLDB_LINE)
2897 (void)gv_fetchfile(PL_origfilename);
2898 goto retry;
2899 }
2900 if (PL_doswitches && !switches_done) {
2901 int argc = PL_origargc;
2902 char **argv = PL_origargv;
2903 do {
2904 argc--,argv++;
2905 } while (argc && argv[0][0] == '-' && argv[0][1]);
2906 init_argv_symbols(argc,argv);
2907 }
2908 }
2909 }
2910 }
2911 }
2912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2913 PL_bufptr = s;
2914 PL_lex_state = LEX_FORMLINE;
2915 return yylex();
2916 }
2917 goto retry;
2918 case '\r':
2919#ifdef PERL_STRICT_CR
2920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2921 Perl_croak(aTHX_
2922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2923#endif
2924 case ' ': case '\t': case '\f': case 013:
2925#ifdef MACOS_TRADITIONAL
2926 case '\312':
2927#endif
2928 s++;
2929 goto retry;
2930 case '#':
2931 case '\n':
2932 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2933 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2934 /* handle eval qq[#line 1 "foo"\n ...] */
2935 CopLINE_dec(PL_curcop);
2936 incline(s);
2937 }
2938 d = PL_bufend;
2939 while (s < d && *s != '\n')
2940 s++;
2941 if (s < d)
2942 s++;
2943 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2944 Perl_croak(aTHX_ "panic: input overflow");
2945 incline(s);
2946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2947 PL_bufptr = s;
2948 PL_lex_state = LEX_FORMLINE;
2949 return yylex();
2950 }
2951 }
2952 else {
2953 *s = '\0';
2954 PL_bufend = s;
2955 }
2956 goto retry;
2957 case '-':
2958 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2959 I32 ftst = 0;
2960
2961 s++;
2962 PL_bufptr = s;
2963 tmp = *s++;
2964
2965 while (s < PL_bufend && SPACE_OR_TAB(*s))
2966 s++;
2967
2968 if (strnEQ(s,"=>",2)) {
2969 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2970 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2971 "### Saw unary minus before =>, forcing word '%s'\n", s);
2972 } );
2973 OPERATOR('-'); /* unary minus */
2974 }
2975 PL_last_uni = PL_oldbufptr;
2976 switch (tmp) {
2977 case 'r': ftst = OP_FTEREAD; break;
2978 case 'w': ftst = OP_FTEWRITE; break;
2979 case 'x': ftst = OP_FTEEXEC; break;
2980 case 'o': ftst = OP_FTEOWNED; break;
2981 case 'R': ftst = OP_FTRREAD; break;
2982 case 'W': ftst = OP_FTRWRITE; break;
2983 case 'X': ftst = OP_FTREXEC; break;
2984 case 'O': ftst = OP_FTROWNED; break;
2985 case 'e': ftst = OP_FTIS; break;
2986 case 'z': ftst = OP_FTZERO; break;
2987 case 's': ftst = OP_FTSIZE; break;
2988 case 'f': ftst = OP_FTFILE; break;
2989 case 'd': ftst = OP_FTDIR; break;
2990 case 'l': ftst = OP_FTLINK; break;
2991 case 'p': ftst = OP_FTPIPE; break;
2992 case 'S': ftst = OP_FTSOCK; break;
2993 case 'u': ftst = OP_FTSUID; break;
2994 case 'g': ftst = OP_FTSGID; break;
2995 case 'k': ftst = OP_FTSVTX; break;
2996 case 'b': ftst = OP_FTBLK; break;
2997 case 'c': ftst = OP_FTCHR; break;
2998 case 't': ftst = OP_FTTTY; break;
2999 case 'T': ftst = OP_FTTEXT; break;
3000 case 'B': ftst = OP_FTBINARY; break;
3001 case 'M': case 'A': case 'C':
3002 gv_fetchpv("\024",TRUE, SVt_PV);
3003 switch (tmp) {
3004 case 'M': ftst = OP_FTMTIME; break;
3005 case 'A': ftst = OP_FTATIME; break;
3006 case 'C': ftst = OP_FTCTIME; break;
3007 default: break;
3008 }
3009 break;
3010 default:
3011 break;
3012 }
3013 if (ftst) {
3014 PL_last_lop_op = (OPCODE)ftst;
3015 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3016 "### Saw file test %c\n", (int)ftst);
3017 } );
3018 FTST(ftst);
3019 }
3020 else {
3021 /* Assume it was a minus followed by a one-letter named
3022 * subroutine call (or a -bareword), then. */
3023 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3024 "### '-%c' looked like a file test but was not\n",
3025 (int) tmp);
3026 } );
3027 s = --PL_bufptr;
3028 }
3029 }
3030 tmp = *s++;
3031 if (*s == tmp) {
3032 s++;
3033 if (PL_expect == XOPERATOR)
3034 TERM(POSTDEC);
3035 else
3036 OPERATOR(PREDEC);
3037 }
3038 else if (*s == '>') {
3039 s++;
3040 s = skipspace(s);
3041 if (isIDFIRST_lazy_if(s,UTF)) {
3042 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3043 TOKEN(ARROW);
3044 }
3045 else if (*s == '$')
3046 OPERATOR(ARROW);
3047 else
3048 TERM(ARROW);
3049 }
3050 if (PL_expect == XOPERATOR)
3051 Aop(OP_SUBTRACT);
3052 else {
3053 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3054 check_uni();
3055 OPERATOR('-'); /* unary minus */
3056 }
3057
3058 case '+':
3059 tmp = *s++;
3060 if (*s == tmp) {
3061 s++;
3062 if (PL_expect == XOPERATOR)
3063 TERM(POSTINC);
3064 else
3065 OPERATOR(PREINC);
3066 }
3067 if (PL_expect == XOPERATOR)
3068 Aop(OP_ADD);
3069 else {
3070 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3071 check_uni();
3072 OPERATOR('+');
3073 }
3074
3075 case '*':
3076 if (PL_expect != XOPERATOR) {
3077 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3078 PL_expect = XOPERATOR;
3079 force_ident(PL_tokenbuf, '*');
3080 if (!*PL_tokenbuf)
3081 PREREF('*');
3082 TERM('*');
3083 }
3084 s++;
3085 if (*s == '*') {
3086 s++;
3087 PWop(OP_POW);
3088 }
3089 Mop(OP_MULTIPLY);
3090
3091 case '%':
3092 if (PL_expect == XOPERATOR) {
3093 ++s;
3094 Mop(OP_MODULO);
3095 }
3096 PL_tokenbuf[0] = '%';
3097 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3098 if (!PL_tokenbuf[1]) {
3099 PREREF('%');
3100 }
3101 PL_pending_ident = '%';
3102 TERM('%');
3103
3104 case '^':
3105 s++;
3106 BOop(OP_BIT_XOR);
3107 case '[':
3108 PL_lex_brackets++;
3109 /* FALL THROUGH */
3110 case '~':
3111 case ',':
3112 tmp = *s++;
3113 OPERATOR(tmp);
3114 case ':':
3115 if (s[1] == ':') {
3116 len = 0;
3117 goto just_a_word;
3118 }
3119 s++;
3120 switch (PL_expect) {
3121 OP *attrs;
3122 case XOPERATOR:
3123 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3124 break;
3125 PL_bufptr = s; /* update in case we back off */
3126 goto grabattrs;
3127 case XATTRBLOCK:
3128 PL_expect = XBLOCK;
3129 goto grabattrs;
3130 case XATTRTERM:
3131 PL_expect = XTERMBLOCK;
3132 grabattrs:
3133 s = skipspace(s);
3134 attrs = Nullop;
3135 while (isIDFIRST_lazy_if(s,UTF)) {
3136 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3137 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3138 if (tmp < 0) tmp = -tmp;
3139 switch (tmp) {
3140 case KEY_or:
3141 case KEY_and:
3142 case KEY_err:
3143 case KEY_for:
3144 case KEY_unless:
3145 case KEY_if:
3146 case KEY_while:
3147 case KEY_until:
3148 goto got_attrs;
3149 default:
3150 break;
3151 }
3152 }
3153 if (*d == '(') {
3154 d = scan_str(d,TRUE,TRUE);
3155 if (!d) {
3156 /* MUST advance bufptr here to avoid bogus
3157 "at end of line" context messages from yyerror().
3158 */
3159 PL_bufptr = s + len;
3160 yyerror("Unterminated attribute parameter in attribute list");
3161 if (attrs)
3162 op_free(attrs);
3163 return REPORT(0); /* EOF indicator */
3164 }
3165 }
3166 if (PL_lex_stuff) {
3167 SV *sv = newSVpvn(s, len);
3168 sv_catsv(sv, PL_lex_stuff);
3169 attrs = append_elem(OP_LIST, attrs,
3170 newSVOP(OP_CONST, 0, sv));
3171 SvREFCNT_dec(PL_lex_stuff);
3172 PL_lex_stuff = Nullsv;
3173 }
3174 else {
3175 if (len == 6 && strnEQ(s, "unique", len)) {
3176 if (PL_in_my == KEY_our)
3177#ifdef USE_ITHREADS
3178 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3179#else
3180 ; /* skip to avoid loading attributes.pm */
3181#endif
3182 else
3183 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3184 }
3185
3186 /* NOTE: any CV attrs applied here need to be part of
3187 the CVf_BUILTIN_ATTRS define in cv.h! */
3188 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3189 CvLVALUE_on(PL_compcv);
3190 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3191 CvLOCKED_on(PL_compcv);
3192 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3193 CvMETHOD_on(PL_compcv);
3194 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3195 CvASSERTION_on(PL_compcv);
3196 /* After we've set the flags, it could be argued that
3197 we don't need to do the attributes.pm-based setting
3198 process, and shouldn't bother appending recognized
3199 flags. To experiment with that, uncomment the
3200 following "else". (Note that's already been
3201 uncommented. That keeps the above-applied built-in
3202 attributes from being intercepted (and possibly
3203 rejected) by a package's attribute routines, but is
3204 justified by the performance win for the common case
3205 of applying only built-in attributes.) */
3206 else
3207 attrs = append_elem(OP_LIST, attrs,
3208 newSVOP(OP_CONST, 0,
3209 newSVpvn(s, len)));
3210 }
3211 s = skipspace(d);
3212 if (*s == ':' && s[1] != ':')
3213 s = skipspace(s+1);
3214 else if (s == d)
3215 break; /* require real whitespace or :'s */
3216 }
3217 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3218 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3219 char q = ((*s == '\'') ? '"' : '\'');
3220 /* If here for an expression, and parsed no attrs, back off. */
3221 if (tmp == '=' && !attrs) {
3222 s = PL_bufptr;
3223 break;
3224 }
3225 /* MUST advance bufptr here to avoid bogus "at end of line"
3226 context messages from yyerror().
3227 */
3228 PL_bufptr = s;
3229 if (!*s)
3230 yyerror("Unterminated attribute list");
3231 else
3232 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3233 q, *s, q));
3234 if (attrs)
3235 op_free(attrs);
3236 OPERATOR(':');
3237 }
3238 got_attrs:
3239 if (attrs) {
3240 PL_nextval[PL_nexttoke].opval = attrs;
3241 force_next(THING);
3242 }
3243 TOKEN(COLONATTR);
3244 }
3245 OPERATOR(':');
3246 case '(':
3247 s++;
3248 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3249 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3250 else
3251 PL_expect = XTERM;
3252 s = skipspace(s);
3253 TOKEN('(');
3254 case ';':
3255 CLINE;
3256 tmp = *s++;
3257 OPERATOR(tmp);
3258 case ')':
3259 tmp = *s++;
3260 s = skipspace(s);
3261 if (*s == '{')
3262 PREBLOCK(tmp);
3263 TERM(tmp);
3264 case ']':
3265 s++;
3266 if (PL_lex_brackets <= 0)
3267 yyerror("Unmatched right square bracket");
3268 else
3269 --PL_lex_brackets;
3270 if (PL_lex_state == LEX_INTERPNORMAL) {
3271 if (PL_lex_brackets == 0) {
3272 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3273 PL_lex_state = LEX_INTERPEND;
3274 }
3275 }
3276 TERM(']');
3277 case '{':
3278 leftbracket:
3279 s++;
3280 if (PL_lex_brackets > 100) {
3281 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3282 }
3283 switch (PL_expect) {
3284 case XTERM:
3285 if (PL_lex_formbrack) {
3286 s--;
3287 PRETERMBLOCK(DO);
3288 }
3289 if (PL_oldoldbufptr == PL_last_lop)
3290 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3291 else
3292 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3293 OPERATOR(HASHBRACK);
3294 case XOPERATOR:
3295 while (s < PL_bufend && SPACE_OR_TAB(*s))
3296 s++;
3297 d = s;
3298 PL_tokenbuf[0] = '\0';
3299 if (d < PL_bufend && *d == '-') {
3300 PL_tokenbuf[0] = '-';
3301 d++;
3302 while (d < PL_bufend && SPACE_OR_TAB(*d))
3303 d++;
3304 }
3305 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3306 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3307 FALSE, &len);
3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
3309 d++;
3310 if (*d == '}') {
3311 char minus = (PL_tokenbuf[0] == '-');
3312 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3313 if (minus)
3314 force_next('-');
3315 }
3316 }
3317 /* FALL THROUGH */
3318 case XATTRBLOCK:
3319 case XBLOCK:
3320 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3321 PL_expect = XSTATE;
3322 break;
3323 case XATTRTERM:
3324 case XTERMBLOCK:
3325 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3326 PL_expect = XSTATE;
3327 break;
3328 default: {
3329 char *t;
3330 if (PL_oldoldbufptr == PL_last_lop)
3331 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3332 else
3333 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3334 s = skipspace(s);
3335 if (*s == '}') {
3336 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3337 PL_expect = XTERM;
3338 /* This hack is to get the ${} in the message. */
3339 PL_bufptr = s+1;
3340 yyerror("syntax error");
3341 break;
3342 }
3343 OPERATOR(HASHBRACK);
3344 }
3345 /* This hack serves to disambiguate a pair of curlies
3346 * as being a block or an anon hash. Normally, expectation
3347 * determines that, but in cases where we're not in a
3348 * position to expect anything in particular (like inside
3349 * eval"") we have to resolve the ambiguity. This code
3350 * covers the case where the first term in the curlies is a
3351 * quoted string. Most other cases need to be explicitly
3352 * disambiguated by prepending a `+' before the opening
3353 * curly in order to force resolution as an anon hash.
3354 *
3355 * XXX should probably propagate the outer expectation
3356 * into eval"" to rely less on this hack, but that could
3357 * potentially break current behavior of eval"".
3358 * GSAR 97-07-21
3359 */
3360 t = s;
3361 if (*s == '\'' || *s == '"' || *s == '`') {
3362 /* common case: get past first string, handling escapes */
3363 for (t++; t < PL_bufend && *t != *s;)
3364 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3365 t++;
3366 t++;
3367 }
3368 else if (*s == 'q') {
3369 if (++t < PL_bufend
3370 && (!isALNUM(*t)
3371 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3372 && !isALNUM(*t))))
3373 {
3374 /* skip q//-like construct */
3375 char *tmps;
3376 char open, close, term;
3377 I32 brackets = 1;
3378
3379 while (t < PL_bufend && isSPACE(*t))
3380 t++;
3381 /* check for q => */
3382 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3383 OPERATOR(HASHBRACK);
3384 }
3385 term = *t;
3386 open = term;
3387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3388 term = tmps[5];
3389 close = term;
3390 if (open == close)
3391 for (t++; t < PL_bufend; t++) {
3392 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3393 t++;
3394 else if (*t == open)
3395 break;
3396 }
3397 else {
3398 for (t++; t < PL_bufend; t++) {
3399 if (*t == '\\' && t+1 < PL_bufend)
3400 t++;
3401 else if (*t == close && --brackets <= 0)
3402 break;
3403 else if (*t == open)
3404 brackets++;
3405 }
3406 }
3407 t++;
3408 }
3409 else
3410 /* skip plain q word */
3411 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3412 t += UTF8SKIP(t);
3413 }
3414 else if (isALNUM_lazy_if(t,UTF)) {
3415 t += UTF8SKIP(t);
3416 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3417 t += UTF8SKIP(t);
3418 }
3419 while (t < PL_bufend && isSPACE(*t))
3420 t++;
3421 /* if comma follows first term, call it an anon hash */
3422 /* XXX it could be a comma expression with loop modifiers */
3423 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3424 || (*t == '=' && t[1] == '>')))
3425 OPERATOR(HASHBRACK);
3426 if (PL_expect == XREF)
3427 PL_expect = XTERM;
3428 else {
3429 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3430 PL_expect = XSTATE;
3431 }
3432 }
3433 break;
3434 }
3435 yylval.ival = CopLINE(PL_curcop);
3436 if (isSPACE(*s) || *s == '#')
3437 PL_copline = NOLINE; /* invalidate current command line number */
3438 TOKEN('{');
3439 case '}':
3440 rightbracket:
3441 s++;
3442 if (PL_lex_brackets <= 0)
3443 yyerror("Unmatched right curly bracket");
3444 else
3445 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3446 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3447 PL_lex_formbrack = 0;
3448 if (PL_lex_state == LEX_INTERPNORMAL) {
3449 if (PL_lex_brackets == 0) {
3450 if (PL_expect & XFAKEBRACK) {
3451 PL_expect &= XENUMMASK;
3452 PL_lex_state = LEX_INTERPEND;
3453 PL_bufptr = s;
3454 return yylex(); /* ignore fake brackets */
3455 }
3456 if (*s == '-' && s[1] == '>')
3457 PL_lex_state = LEX_INTERPENDMAYBE;
3458 else if (*s != '[' && *s != '{')
3459 PL_lex_state = LEX_INTERPEND;
3460 }
3461 }
3462 if (PL_expect & XFAKEBRACK) {
3463 PL_expect &= XENUMMASK;
3464 PL_bufptr = s;
3465 return yylex(); /* ignore fake brackets */
3466 }
3467 force_next('}');
3468 TOKEN(';');
3469 case '&':
3470 s++;
3471 tmp = *s++;
3472 if (tmp == '&')
3473 AOPERATOR(ANDAND);
3474 s--;
3475 if (PL_expect == XOPERATOR) {
3476 if (ckWARN(WARN_SEMICOLON)
3477 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3478 {
3479 CopLINE_dec(PL_curcop);
3480 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3481 CopLINE_inc(PL_curcop);
3482 }
3483 BAop(OP_BIT_AND);
3484 }
3485
3486 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3487 if (*PL_tokenbuf) {
3488 PL_expect = XOPERATOR;
3489 force_ident(PL_tokenbuf, '&');
3490 }
3491 else
3492 PREREF('&');
3493 yylval.ival = (OPpENTERSUB_AMPER<<8);
3494 TERM('&');
3495
3496 case '|':
3497 s++;
3498 tmp = *s++;
3499 if (tmp == '|')
3500 AOPERATOR(OROR);
3501 s--;
3502 BOop(OP_BIT_OR);
3503 case '=':
3504 s++;
3505 tmp = *s++;
3506 if (tmp == '=')
3507 Eop(OP_EQ);
3508 if (tmp == '>')
3509 OPERATOR(',');
3510 if (tmp == '~')
3511 PMop(OP_MATCH);
3512 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3514 s--;
3515 if (PL_expect == XSTATE && isALPHA(tmp) &&
3516 (s == PL_linestart+1 || s[-2] == '\n') )
3517 {
3518 if (PL_in_eval && !PL_rsfp) {
3519 d = PL_bufend;
3520 while (s < d) {
3521 if (*s++ == '\n') {
3522 incline(s);
3523 if (strnEQ(s,"=cut",4)) {
3524 s = strchr(s,'\n');
3525 if (s)
3526 s++;
3527 else
3528 s = d;
3529 incline(s);
3530 goto retry;
3531 }
3532 }
3533 }
3534 goto retry;
3535 }
3536 s = PL_bufend;
3537 PL_doextract = TRUE;
3538 goto retry;
3539 }
3540 if (PL_lex_brackets < PL_lex_formbrack) {
3541 char *t;
3542#ifdef PERL_STRICT_CR
3543 for (t = s; SPACE_OR_TAB(*t); t++) ;
3544#else
3545 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3546#endif
3547 if (*t == '\n' || *t == '#') {
3548 s--;
3549 PL_expect = XBLOCK;
3550 goto leftbracket;
3551 }
3552 }
3553 yylval.ival = 0;
3554 OPERATOR(ASSIGNOP);
3555 case '!':
3556 s++;
3557 tmp = *s++;
3558 if (tmp == '=') {
3559 /* was this !=~ where !~ was meant?
3560 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3561
3562 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3563 char *t = s+1;
3564
3565 while (t < PL_bufend && isSPACE(*t))
3566 ++t;
3567
3568 if (*t == '/' || *t == '?' ||
3569 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3570 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3571 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3572 "!=~ should be !~");
3573 }
3574 Eop(OP_NE);
3575 }
3576 if (tmp == '~')
3577 PMop(OP_NOT);
3578 s--;
3579 OPERATOR('!');
3580 case '<':
3581 if (PL_expect != XOPERATOR) {
3582 if (s[1] != '<' && !strchr(s,'>'))
3583 check_uni();
3584 if (s[1] == '<')
3585 s = scan_heredoc(s);
3586 else
3587 s = scan_inputsymbol(s);
3588 TERM(sublex_start());
3589 }
3590 s++;
3591 tmp = *s++;
3592 if (tmp == '<')
3593 SHop(OP_LEFT_SHIFT);
3594 if (tmp == '=') {
3595 tmp = *s++;
3596 if (tmp == '>')
3597 Eop(OP_NCMP);
3598 s--;
3599 Rop(OP_LE);
3600 }
3601 s--;
3602 Rop(OP_LT);
3603 case '>':
3604 s++;
3605 tmp = *s++;
3606 if (tmp == '>')
3607 SHop(OP_RIGHT_SHIFT);
3608 if (tmp == '=')
3609 Rop(OP_GE);
3610 s--;
3611 Rop(OP_GT);
3612
3613 case '$':
3614 CLINE;
3615
3616 if (PL_expect == XOPERATOR) {
3617 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3618 PL_expect = XTERM;
3619 depcom();
3620 return REPORT(','); /* grandfather non-comma-format format */
3621 }
3622 }
3623
3624 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3625 PL_tokenbuf[0] = '@';
3626 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3627 sizeof PL_tokenbuf - 1, FALSE);
3628 if (PL_expect == XOPERATOR)
3629 no_op("Array length", s);
3630 if (!PL_tokenbuf[1])
3631 PREREF(DOLSHARP);
3632 PL_expect = XOPERATOR;
3633 PL_pending_ident = '#';
3634 TOKEN(DOLSHARP);
3635 }
3636
3637 PL_tokenbuf[0] = '$';
3638 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3639 sizeof PL_tokenbuf - 1, FALSE);
3640 if (PL_expect == XOPERATOR)
3641 no_op("Scalar", s);
3642 if (!PL_tokenbuf[1]) {
3643 if (s == PL_bufend)
3644 yyerror("Final $ should be \\$ or $name");
3645 PREREF('$');
3646 }
3647
3648 /* This kludge not intended to be bulletproof. */
3649 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3650 yylval.opval = newSVOP(OP_CONST, 0,
3651 newSViv(PL_compiling.cop_arybase));
3652 yylval.opval->op_private = OPpCONST_ARYBASE;
3653 TERM(THING);
3654 }
3655
3656 d = s;
3657 tmp = (I32)*s;
3658 if (PL_lex_state == LEX_NORMAL)
3659 s = skipspace(s);
3660
3661 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3662 char *t;
3663 if (*s == '[') {
3664 PL_tokenbuf[0] = '@';
3665 if (ckWARN(WARN_SYNTAX)) {
3666 for(t = s + 1;
3667 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3668 t++) ;
3669 if (*t++ == ',') {
3670 PL_bufptr = skipspace(PL_bufptr);
3671 while (t < PL_bufend && *t != ']')
3672 t++;
3673 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3674 "Multidimensional syntax %.*s not supported",
3675 (t - PL_bufptr) + 1, PL_bufptr);
3676 }
3677 }
3678 }
3679 else if (*s == '{') {
3680 PL_tokenbuf[0] = '%';
3681 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3682 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3683 {
3684 char tmpbuf[sizeof PL_tokenbuf];
3685 STRLEN len;
3686 for (t++; isSPACE(*t); t++) ;
3687 if (isIDFIRST_lazy_if(t,UTF)) {
3688 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3689 for (; isSPACE(*t); t++) ;
3690 if (*t == ';' && get_cv(tmpbuf, FALSE))
3691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3692 "You need to quote \"%s\"", tmpbuf);
3693 }
3694 }
3695 }
3696 }
3697
3698 PL_expect = XOPERATOR;
3699 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3700 bool islop = (PL_last_lop == PL_oldoldbufptr);
3701 if (!islop || PL_last_lop_op == OP_GREPSTART)
3702 PL_expect = XOPERATOR;
3703 else if (strchr("$@\"'`q", *s))
3704 PL_expect = XTERM; /* e.g. print $fh "foo" */
3705 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3706 PL_expect = XTERM; /* e.g. print $fh &sub */
3707 else if (isIDFIRST_lazy_if(s,UTF)) {
3708 char tmpbuf[sizeof PL_tokenbuf];
3709 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3710 if ((tmp = keyword(tmpbuf, len))) {
3711 /* binary operators exclude handle interpretations */
3712 switch (tmp) {
3713 case -KEY_x:
3714 case -KEY_eq:
3715 case -KEY_ne:
3716 case -KEY_gt:
3717 case -KEY_lt:
3718 case -KEY_ge:
3719 case -KEY_le:
3720 case -KEY_cmp:
3721 break;
3722 default:
3723 PL_expect = XTERM; /* e.g. print $fh length() */
3724 break;
3725 }
3726 }
3727 else {
3728 PL_expect = XTERM; /* e.g. print $fh subr() */
3729 }
3730 }
3731 else if (isDIGIT(*s))
3732 PL_expect = XTERM; /* e.g. print $fh 3 */
3733 else if (*s == '.' && isDIGIT(s[1]))
3734 PL_expect = XTERM; /* e.g. print $fh .3 */
3735 else if ((*s == '?' || *s == '-' || *s == '+')
3736 && !isSPACE(s[1]) && s[1] != '=')
3737 PL_expect = XTERM; /* e.g. print $fh -1 */
3738 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3739 PL_expect = XTERM; /* e.g. print $fh /.../
3740 XXX except DORDOR operator */
3741 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3742 PL_expect = XTERM; /* print $fh <<"EOF" */
3743 }
3744 PL_pending_ident = '$';
3745 TOKEN('$');
3746
3747 case '@':
3748 if (PL_expect == XOPERATOR)
3749 no_op("Array", s);
3750 PL_tokenbuf[0] = '@';
3751 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3752 if (!PL_tokenbuf[1]) {
3753 PREREF('@');
3754 }
3755 if (PL_lex_state == LEX_NORMAL)
3756 s = skipspace(s);
3757 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3758 if (*s == '{')
3759 PL_tokenbuf[0] = '%';
3760
3761 /* Warn about @ where they meant $. */
3762 if (ckWARN(WARN_SYNTAX)) {
3763 if (*s == '[' || *s == '{') {
3764 char *t = s + 1;
3765 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3766 t++;
3767 if (*t == '}' || *t == ']') {
3768 t++;
3769 PL_bufptr = skipspace(PL_bufptr);
3770 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3771 "Scalar value %.*s better written as $%.*s",
3772 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3773 }
3774 }
3775 }
3776 }
3777 PL_pending_ident = '@';
3778 TERM('@');
3779
3780 case '/': /* may be division, defined-or, or pattern */
3781 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3782 s += 2;
3783 AOPERATOR(DORDOR);
3784 }
3785 case '?': /* may either be conditional or pattern */
3786 if(PL_expect == XOPERATOR) {
3787 tmp = *s++;
3788 if(tmp == '?') {
3789 OPERATOR('?');
3790 }
3791 else {
3792 tmp = *s++;
3793 if(tmp == '/') {
3794 /* A // operator. */
3795 AOPERATOR(DORDOR);
3796 }
3797 else {
3798 s--;
3799 Mop(OP_DIVIDE);
3800 }
3801 }
3802 }
3803 else {
3804 /* Disable warning on "study /blah/" */
3805 if (PL_oldoldbufptr == PL_last_uni
3806 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3807 || memNE(PL_last_uni, "study", 5)
3808 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3809 ))
3810 check_uni();
3811 s = scan_pat(s,OP_MATCH);
3812 TERM(sublex_start());
3813 }
3814
3815 case '.':
3816 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3817#ifdef PERL_STRICT_CR
3818 && s[1] == '\n'
3819#else
3820 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3821#endif
3822 && (s == PL_linestart || s[-1] == '\n') )
3823 {
3824 PL_lex_formbrack = 0;
3825 PL_expect = XSTATE;
3826 goto rightbracket;
3827 }
3828 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3829 tmp = *s++;
3830 if (*s == tmp) {
3831 s++;
3832 if (*s == tmp) {
3833 s++;
3834 yylval.ival = OPf_SPECIAL;
3835 }
3836 else
3837 yylval.ival = 0;
3838 OPERATOR(DOTDOT);
3839 }
3840 if (PL_expect != XOPERATOR)
3841 check_uni();
3842 Aop(OP_CONCAT);
3843 }
3844 /* FALL THROUGH */
3845 case '0': case '1': case '2': case '3': case '4':
3846 case '5': case '6': case '7': case '8': case '9':
3847 s = scan_num(s, &yylval);
3848 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3849 "### Saw number in '%s'\n", s);
3850 } );
3851 if (PL_expect == XOPERATOR)
3852 no_op("Number",s);
3853 TERM(THING);
3854
3855 case '\'':
3856 s = scan_str(s,FALSE,FALSE);
3857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3858 "### Saw string before '%s'\n", s);
3859 } );
3860 if (PL_expect == XOPERATOR) {
3861 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3862 PL_expect = XTERM;
3863 depcom();
3864 return REPORT(','); /* grandfather non-comma-format format */
3865 }
3866 else
3867 no_op("String",s);
3868 }
3869 if (!s)
3870 missingterm((char*)0);
3871 yylval.ival = OP_CONST;
3872 TERM(sublex_start());
3873
3874 case '"':
3875 s = scan_str(s,FALSE,FALSE);
3876 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3877 "### Saw string before '%s'\n", s);
3878 } );
3879 if (PL_expect == XOPERATOR) {
3880 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3881 PL_expect = XTERM;
3882 depcom();
3883 return REPORT(','); /* grandfather non-comma-format format */
3884 }
3885 else
3886 no_op("String",s);
3887 }
3888 if (!s)
3889 missingterm((char*)0);
3890 yylval.ival = OP_CONST;
3891 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3892 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3893 yylval.ival = OP_STRINGIFY;
3894 break;
3895 }
3896 }
3897 TERM(sublex_start());
3898
3899 case '`':
3900 s = scan_str(s,FALSE,FALSE);
3901 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3902 "### Saw backtick string before '%s'\n", s);
3903 } );
3904 if (PL_expect == XOPERATOR)
3905 no_op("Backticks",s);
3906 if (!s)
3907 missingterm((char*)0);
3908 yylval.ival = OP_BACKTICK;
3909 set_csh();
3910 TERM(sublex_start());
3911
3912 case '\\':
3913 s++;
3914 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3915 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3916 *s, *s);
3917 if (PL_expect == XOPERATOR)
3918 no_op("Backslash",s);
3919 OPERATOR(REFGEN);
3920
3921 case 'v':
3922 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3923 char *start = s;
3924 start++;
3925 start++;
3926 while (isDIGIT(*start) || *start == '_')
3927 start++;
3928 if (*start == '.' && isDIGIT(start[1])) {
3929 s = scan_num(s, &yylval);
3930 TERM(THING);
3931 }
3932 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3933 else if (!isALPHA(*start) && (PL_expect == XTERM
3934 || PL_expect == XREF || PL_expect == XSTATE
3935 || PL_expect == XTERMORDORDOR)) {
3936 char c = *start;
3937 GV *gv;
3938 *start = '\0';
3939 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3940 *start = c;
3941 if (!gv) {
3942 s = scan_num(s, &yylval);
3943 TERM(THING);
3944 }
3945 }
3946 }
3947 goto keylookup;
3948 case 'x':
3949 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3950 s++;
3951 Mop(OP_REPEAT);
3952 }
3953 goto keylookup;
3954
3955 case '_':
3956 case 'a': case 'A':
3957 case 'b': case 'B':
3958 case 'c': case 'C':
3959 case 'd': case 'D':
3960 case 'e': case 'E':
3961 case 'f': case 'F':
3962 case 'g': case 'G':
3963 case 'h': case 'H':
3964 case 'i': case 'I':
3965 case 'j': case 'J':
3966 case 'k': case 'K':
3967 case 'l': case 'L':
3968 case 'm': case 'M':
3969 case 'n': case 'N':
3970 case 'o': case 'O':
3971 case 'p': case 'P':
3972 case 'q': case 'Q':
3973 case 'r': case 'R':
3974 case 's': case 'S':
3975 case 't': case 'T':
3976 case 'u': case 'U':
3977 case 'V':
3978 case 'w': case 'W':
3979 case 'X':
3980 case 'y': case 'Y':
3981 case 'z': case 'Z':
3982
3983 keylookup: {
3984 orig_keyword = 0;
3985 gv = Nullgv;
3986 gvp = 0;
3987
3988 PL_bufptr = s;
3989 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3990
3991 /* Some keywords can be followed by any delimiter, including ':' */
3992 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3993 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3994 (PL_tokenbuf[0] == 'q' &&
3995 strchr("qwxr", PL_tokenbuf[1])))));
3996
3997 /* x::* is just a word, unless x is "CORE" */
3998 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
3999 goto just_a_word;
4000
4001 d = s;
4002 while (d < PL_bufend && isSPACE(*d))
4003 d++; /* no comments skipped here, or s### is misparsed */
4004
4005 /* Is this a label? */
4006 if (!tmp && PL_expect == XSTATE
4007 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4008 s = d + 1;
4009 yylval.pval = savepv(PL_tokenbuf);
4010 CLINE;
4011 TOKEN(LABEL);
4012 }
4013
4014 /* Check for keywords */
4015 tmp = keyword(PL_tokenbuf, len);
4016
4017 /* Is this a word before a => operator? */
4018 if (*d == '=' && d[1] == '>') {
4019 CLINE;
4020 yylval.opval
4021 = (OP*)newSVOP(OP_CONST, 0,
4022 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4023 yylval.opval->op_private = OPpCONST_BARE;
4024 TERM(WORD);
4025 }
4026
4027 if (tmp < 0) { /* second-class keyword? */
4028 GV *ogv = Nullgv; /* override (winner) */
4029 GV *hgv = Nullgv; /* hidden (loser) */
4030 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4031 CV *cv;
4032 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4033 (cv = GvCVu(gv)))
4034 {
4035 if (GvIMPORTED_CV(gv))
4036 ogv = gv;
4037 else if (! CvMETHOD(cv))
4038 hgv = gv;
4039 }
4040 if (!ogv &&
4041 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4042 (gv = *gvp) != (GV*)&PL_sv_undef &&
4043 GvCVu(gv) && GvIMPORTED_CV(gv))
4044 {
4045 ogv = gv;
4046 }
4047 }
4048 if (ogv) {
4049 orig_keyword = tmp;
4050 tmp = 0; /* overridden by import or by GLOBAL */
4051 }
4052 else if (gv && !gvp
4053 && -tmp==KEY_lock /* XXX generalizable kludge */
4054 && GvCVu(gv)
4055 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4056 {
4057 tmp = 0; /* any sub overrides "weak" keyword */
4058 }
4059 else if (gv && !gvp
4060 && tmp == -KEY_err
4061 && GvCVu(gv)
4062 && PL_expect != XOPERATOR
4063 && PL_expect != XTERMORDORDOR)
4064 {
4065 /* any sub overrides the "err" keyword, except when really an
4066 * operator is expected */
4067 tmp = 0;
4068 }
4069 else { /* no override */
4070 tmp = -tmp;
4071 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4072 Perl_warner(aTHX_ packWARN(WARN_MISC),
4073 "dump() better written as CORE::dump()");
4074 }
4075 gv = Nullgv;
4076 gvp = 0;
4077 if (ckWARN(WARN_AMBIGUOUS) && hgv
4078 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4079 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4080 "Ambiguous call resolved as CORE::%s(), %s",
4081 GvENAME(hgv), "qualify as such or use &");
4082 }
4083 }
4084
4085 reserved_word:
4086 switch (tmp) {
4087
4088 default: /* not a keyword */
4089 just_a_word: {
4090 SV *sv;
4091 int pkgname = 0;
4092 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4093
4094 /* Get the rest if it looks like a package qualifier */
4095
4096 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4097 STRLEN morelen;
4098 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4099 TRUE, &morelen);
4100 if (!morelen)
4101 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4102 *s == '\'' ? "'" : "::");
4103 len += morelen;
4104 pkgname = 1;
4105 }
4106
4107 if (PL_expect == XOPERATOR) {
4108 if (PL_bufptr == PL_linestart) {
4109 CopLINE_dec(PL_curcop);
4110 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4111 CopLINE_inc(PL_curcop);
4112 }
4113 else
4114 no_op("Bareword",s);
4115 }
4116
4117 /* Look for a subroutine with this name in current package,
4118 unless name is "Foo::", in which case Foo is a bearword
4119 (and a package name). */
4120
4121 if (len > 2 &&
4122 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4123 {
4124 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4125 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4126 "Bareword \"%s\" refers to nonexistent package",
4127 PL_tokenbuf);
4128 len -= 2;
4129 PL_tokenbuf[len] = '\0';
4130 gv = Nullgv;
4131 gvp = 0;
4132 }
4133 else {
4134 len = 0;
4135 if (!gv)
4136 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4137 }
4138
4139 /* if we saw a global override before, get the right name */
4140
4141 if (gvp) {
4142 sv = newSVpvn("CORE::GLOBAL::",14);
4143 sv_catpv(sv,PL_tokenbuf);
4144 }
4145 else {
4146 /* If len is 0, newSVpv does strlen(), which is correct.
4147 If len is non-zero, then it will be the true length,
4148 and so the scalar will be created correctly. */
4149 sv = newSVpv(PL_tokenbuf,len);
4150 }
4151
4152 /* Presume this is going to be a bareword of some sort. */
4153
4154 CLINE;
4155 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4156 yylval.opval->op_private = OPpCONST_BARE;
4157 /* UTF-8 package name? */
4158 if (UTF && !IN_BYTES &&
4159 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
4160 SvUTF8_on(sv);
4161
4162 /* And if "Foo::", then that's what it certainly is. */
4163
4164 if (len)
4165 goto safe_bareword;
4166
4167 /* See if it's the indirect object for a list operator. */
4168
4169 if (PL_oldoldbufptr &&
4170 PL_oldoldbufptr < PL_bufptr &&
4171 (PL_oldoldbufptr == PL_last_lop
4172 || PL_oldoldbufptr == PL_last_uni) &&
4173 /* NO SKIPSPACE BEFORE HERE! */
4174 (PL_expect == XREF ||
4175 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4176 {
4177 bool immediate_paren = *s == '(';
4178
4179 /* (Now we can afford to cross potential line boundary.) */
4180 s = skipspace(s);
4181
4182 /* Two barewords in a row may indicate method call. */
4183
4184 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4185 return REPORT(tmp);
4186
4187 /* If not a declared subroutine, it's an indirect object. */
4188 /* (But it's an indir obj regardless for sort.) */
4189
4190 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4191 ((!gv || !GvCVu(gv)) &&
4192 (PL_last_lop_op != OP_MAPSTART &&
4193 PL_last_lop_op != OP_GREPSTART))))
4194 {
4195 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4196 goto bareword;
4197 }
4198 }
4199
4200 PL_expect = XOPERATOR;
4201 s = skipspace(s);
4202
4203 /* Is this a word before a => operator? */
4204 if (*s == '=' && s[1] == '>' && !pkgname) {
4205 CLINE;
4206 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4207 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4208 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4209 TERM(WORD);
4210 }
4211
4212 /* If followed by a paren, it's certainly a subroutine. */
4213 if (*s == '(') {
4214 CLINE;
4215 if (gv && GvCVu(gv)) {
4216 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4217 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4218 s = d + 1;
4219 goto its_constant;
4220 }
4221 }
4222 PL_nextval[PL_nexttoke].opval = yylval.opval;
4223 PL_expect = XOPERATOR;
4224 force_next(WORD);
4225 yylval.ival = 0;
4226 TOKEN('&');
4227 }
4228
4229 /* If followed by var or block, call it a method (unless sub) */
4230
4231 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4232 PL_last_lop = PL_oldbufptr;
4233 PL_last_lop_op = OP_METHOD;
4234 PREBLOCK(METHOD);
4235 }
4236
4237 /* If followed by a bareword, see if it looks like indir obj. */
4238
4239 if (!orig_keyword
4240 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4241 && (tmp = intuit_method(s,gv)))
4242 return REPORT(tmp);
4243
4244 /* Not a method, so call it a subroutine (if defined) */
4245
4246 if (gv && GvCVu(gv)) {
4247 CV* cv;
4248 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4249 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4250 "Ambiguous use of -%s resolved as -&%s()",
4251 PL_tokenbuf, PL_tokenbuf);
4252 /* Check for a constant sub */
4253 cv = GvCV(gv);
4254 if ((sv = cv_const_sv(cv))) {
4255 its_constant:
4256 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4257 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4258 yylval.opval->op_private = 0;
4259 TOKEN(WORD);
4260 }
4261
4262 /* Resolve to GV now. */
4263 op_free(yylval.opval);
4264 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4265 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4266 PL_last_lop = PL_oldbufptr;
4267 PL_last_lop_op = OP_ENTERSUB;
4268 /* Is there a prototype? */
4269 if (SvPOK(cv)) {
4270 STRLEN len;
4271 char *proto = SvPV((SV*)cv, len);
4272 if (!len)
4273 TERM(FUNC0SUB);
4274 if (*proto == '$' && proto[1] == '\0')
4275 OPERATOR(UNIOPSUB);
4276 while (*proto == ';')
4277 proto++;
4278 if (*proto == '&' && *s == '{') {
4279 sv_setpv(PL_subname, PL_curstash ?
4280 "__ANON__" : "__ANON__::__ANON__");
4281 PREBLOCK(LSTOPSUB);
4282 }
4283 }
4284 PL_nextval[PL_nexttoke].opval = yylval.opval;
4285 PL_expect = XTERM;
4286 force_next(WORD);
4287 TOKEN(NOAMP);
4288 }
4289
4290 /* Call it a bare word */
4291
4292 if (PL_hints & HINT_STRICT_SUBS)
4293 yylval.opval->op_private |= OPpCONST_STRICT;
4294 else {
4295 bareword:
4296 if (ckWARN(WARN_RESERVED)) {
4297 if (lastchar != '-') {
4298 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4299 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4300 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4301 PL_tokenbuf);
4302 }
4303 }
4304 }
4305
4306 safe_bareword:
4307 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4308 && ckWARN_d(WARN_AMBIGUOUS)) {
4309 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4310 "Operator or semicolon missing before %c%s",
4311 lastchar, PL_tokenbuf);
4312 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4313 "Ambiguous use of %c resolved as operator %c",
4314 lastchar, lastchar);
4315 }
4316 TOKEN(WORD);
4317 }
4318
4319 case KEY___FILE__:
4320 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4321 newSVpv(CopFILE(PL_curcop),0));
4322 TERM(THING);
4323
4324 case KEY___LINE__:
4325 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4326 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4327 TERM(THING);
4328
4329 case KEY___PACKAGE__:
4330 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4331 (PL_curstash
4332 ? newSVpv(HvNAME(PL_curstash), 0)
4333 : &PL_sv_undef));
4334 TERM(THING);
4335
4336 case KEY___DATA__:
4337 case KEY___END__: {
4338 GV *gv;
4339
4340 /*SUPPRESS 560*/
4341 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4342 char *pname = "main";
4343 if (PL_tokenbuf[2] == 'D')
4344 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
4345 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4346 GvMULTI_on(gv);
4347 if (!GvIO(gv))
4348 GvIOp(gv) = newIO();
4349 IoIFP(GvIOp(gv)) = PL_rsfp;
4350#if defined(HAS_FCNTL) && defined(F_SETFD)
4351 {
4352 int fd = PerlIO_fileno(PL_rsfp);
4353 fcntl(fd,F_SETFD,fd >= 3);
4354 }
4355#endif
4356 /* Mark this internal pseudo-handle as clean */
4357 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4358 if (PL_preprocess)
4359 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4360 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4361 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4362 else
4363 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4364#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4365 /* if the script was opened in binmode, we need to revert
4366 * it to text mode for compatibility; but only iff it has CRs
4367 * XXX this is a questionable hack at best. */
4368 if (PL_bufend-PL_bufptr > 2
4369 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4370 {
4371 Off_t loc = 0;
4372 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4373 loc = PerlIO_tell(PL_rsfp);
4374 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4375 }
4376#ifdef NETWARE
4377 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4378#else
4379 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4380#endif /* NETWARE */
4381#ifdef PERLIO_IS_STDIO /* really? */
4382# if defined(__BORLANDC__)
4383 /* XXX see note in do_binmode() */
4384 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4385# endif
4386#endif
4387 if (loc > 0)
4388 PerlIO_seek(PL_rsfp, loc, 0);
4389 }
4390 }
4391#endif
4392#ifdef PERLIO_LAYERS
4393 if (!IN_BYTES) {
4394 if (UTF)
4395 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4396 else if (PL_encoding) {
4397 SV *name;
4398 dSP;
4399 ENTER;
4400 SAVETMPS;
4401 PUSHMARK(sp);
4402 EXTEND(SP, 1);
4403 XPUSHs(PL_encoding);
4404 PUTBACK;
4405 call_method("name", G_SCALAR);
4406 SPAGAIN;
4407 name = POPs;
4408 PUTBACK;
4409 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4410 Perl_form(aTHX_ ":encoding(%"SVf")",
4411 name));
4412 FREETMPS;
4413 LEAVE;
4414 }
4415 }
4416#endif
4417 PL_rsfp = Nullfp;
4418 }
4419 goto fake_eof;
4420 }
4421
4422 case KEY_AUTOLOAD:
4423 case KEY_DESTROY:
4424 case KEY_BEGIN:
4425 case KEY_CHECK:
4426 case KEY_INIT:
4427 case KEY_END:
4428 if (PL_expect == XSTATE) {
4429 s = PL_bufptr;
4430 goto really_sub;
4431 }
4432 goto just_a_word;
4433
4434 case KEY_CORE:
4435 if (*s == ':' && s[1] == ':') {
4436 s += 2;
4437 d = s;
4438 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4439 if (!(tmp = keyword(PL_tokenbuf, len)))
4440 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4441 if (tmp < 0)
4442 tmp = -tmp;
4443 goto reserved_word;
4444 }
4445 goto just_a_word;
4446
4447 case KEY_abs:
4448 UNI(OP_ABS);
4449
4450 case KEY_alarm:
4451 UNI(OP_ALARM);
4452
4453 case KEY_accept:
4454 LOP(OP_ACCEPT,XTERM);
4455
4456 case KEY_and:
4457 OPERATOR(ANDOP);
4458
4459 case KEY_atan2:
4460 LOP(OP_ATAN2,XTERM);
4461
4462 case KEY_bind:
4463 LOP(OP_BIND,XTERM);
4464
4465 case KEY_binmode:
4466 LOP(OP_BINMODE,XTERM);
4467
4468 case KEY_bless:
4469 LOP(OP_BLESS,XTERM);
4470
4471 case KEY_chop:
4472 UNI(OP_CHOP);
4473
4474 case KEY_continue:
4475 PREBLOCK(CONTINUE);
4476
4477 case KEY_chdir:
4478 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4479 UNI(OP_CHDIR);
4480
4481 case KEY_close:
4482 UNI(OP_CLOSE);
4483
4484 case KEY_closedir:
4485 UNI(OP_CLOSEDIR);
4486
4487 case KEY_cmp:
4488 Eop(OP_SCMP);
4489
4490 case KEY_caller:
4491 UNI(OP_CALLER);
4492
4493 case KEY_crypt:
4494#ifdef FCRYPT
4495 if (!PL_cryptseen) {
4496 PL_cryptseen = TRUE;
4497 init_des();
4498 }
4499#endif
4500 LOP(OP_CRYPT,XTERM);
4501
4502 case KEY_chmod:
4503 LOP(OP_CHMOD,XTERM);
4504
4505 case KEY_chown:
4506 LOP(OP_CHOWN,XTERM);
4507
4508 case KEY_connect:
4509 LOP(OP_CONNECT,XTERM);
4510
4511 case KEY_chr:
4512 UNI(OP_CHR);
4513
4514 case KEY_cos:
4515 UNI(OP_COS);
4516
4517 case KEY_chroot:
4518 UNI(OP_CHROOT);
4519
4520 case KEY_do:
4521 s = skipspace(s);
4522 if (*s == '{')
4523 PRETERMBLOCK(DO);
4524 if (*s != '\'')
4525 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4526 OPERATOR(DO);
4527
4528 case KEY_die:
4529 PL_hints |= HINT_BLOCK_SCOPE;
4530 LOP(OP_DIE,XTERM);
4531
4532 case KEY_defined:
4533 UNI(OP_DEFINED);
4534
4535 case KEY_delete:
4536 UNI(OP_DELETE);
4537
4538 case KEY_dbmopen:
4539 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4540 LOP(OP_DBMOPEN,XTERM);
4541
4542 case KEY_dbmclose:
4543 UNI(OP_DBMCLOSE);
4544
4545 case KEY_dump:
4546 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4547 LOOPX(OP_DUMP);
4548
4549 case KEY_else:
4550 PREBLOCK(ELSE);
4551
4552 case KEY_elsif:
4553 yylval.ival = CopLINE(PL_curcop);
4554 OPERATOR(ELSIF);
4555
4556 case KEY_eq:
4557 Eop(OP_SEQ);
4558
4559 case KEY_exists:
4560 UNI(OP_EXISTS);
4561
4562 case KEY_exit:
4563 UNI(OP_EXIT);
4564
4565 case KEY_eval:
4566 s = skipspace(s);
4567 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4568 UNIBRACK(OP_ENTEREVAL);
4569
4570 case KEY_eof:
4571 UNI(OP_EOF);
4572
4573 case KEY_err:
4574 OPERATOR(DOROP);
4575
4576 case KEY_exp:
4577 UNI(OP_EXP);
4578
4579 case KEY_each:
4580 UNI(OP_EACH);
4581
4582 case KEY_exec:
4583 set_csh();
4584 LOP(OP_EXEC,XREF);
4585
4586 case KEY_endhostent:
4587 FUN0(OP_EHOSTENT);
4588
4589 case KEY_endnetent:
4590 FUN0(OP_ENETENT);
4591
4592 case KEY_endservent:
4593 FUN0(OP_ESERVENT);
4594
4595 case KEY_endprotoent:
4596 FUN0(OP_EPROTOENT);
4597
4598 case KEY_endpwent:
4599 FUN0(OP_EPWENT);
4600
4601 case KEY_endgrent:
4602 FUN0(OP_EGRENT);
4603
4604 case KEY_for:
4605 case KEY_foreach:
4606 yylval.ival = CopLINE(PL_curcop);
4607 s = skipspace(s);
4608 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4609 char *p = s;
4610 if ((PL_bufend - p) >= 3 &&
4611 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4612 p += 2;
4613 else if ((PL_bufend - p) >= 4 &&
4614 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4615 p += 3;
4616 p = skipspace(p);
4617 if (isIDFIRST_lazy_if(p,UTF)) {
4618 p = scan_ident(p, PL_bufend,
4619 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4620 p = skipspace(p);
4621 }
4622 if (*p != '$')
4623 Perl_croak(aTHX_ "Missing $ on loop variable");
4624 }
4625 OPERATOR(FOR);
4626
4627 case KEY_formline:
4628 LOP(OP_FORMLINE,XTERM);
4629
4630 case KEY_fork:
4631 FUN0(OP_FORK);
4632
4633 case KEY_fcntl:
4634 LOP(OP_FCNTL,XTERM);
4635
4636 case KEY_fileno:
4637 UNI(OP_FILENO);
4638
4639 case KEY_flock:
4640 LOP(OP_FLOCK,XTERM);
4641
4642 case KEY_gt:
4643 Rop(OP_SGT);
4644
4645 case KEY_ge:
4646 Rop(OP_SGE);
4647
4648 case KEY_grep:
4649 LOP(OP_GREPSTART, XREF);
4650
4651 case KEY_goto:
4652 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4653 LOOPX(OP_GOTO);
4654
4655 case KEY_gmtime:
4656 UNI(OP_GMTIME);
4657
4658 case KEY_getc:
4659 UNIDOR(OP_GETC);
4660
4661 case KEY_getppid:
4662 FUN0(OP_GETPPID);
4663
4664 case KEY_getpgrp:
4665 UNI(OP_GETPGRP);
4666
4667 case KEY_getpriority:
4668 LOP(OP_GETPRIORITY,XTERM);
4669
4670 case KEY_getprotobyname:
4671 UNI(OP_GPBYNAME);
4672
4673 case KEY_getprotobynumber:
4674 LOP(OP_GPBYNUMBER,XTERM);
4675
4676 case KEY_getprotoent:
4677 FUN0(OP_GPROTOENT);
4678
4679 case KEY_getpwent:
4680 FUN0(OP_GPWENT);
4681
4682 case KEY_getpwnam:
4683 UNI(OP_GPWNAM);
4684
4685 case KEY_getpwuid:
4686 UNI(OP_GPWUID);
4687
4688 case KEY_getpeername:
4689 UNI(OP_GETPEERNAME);
4690
4691 case KEY_gethostbyname:
4692 UNI(OP_GHBYNAME);
4693
4694 case KEY_gethostbyaddr:
4695 LOP(OP_GHBYADDR,XTERM);
4696
4697 case KEY_gethostent:
4698 FUN0(OP_GHOSTENT);
4699
4700 case KEY_getnetbyname:
4701 UNI(OP_GNBYNAME);
4702
4703 case KEY_getnetbyaddr:
4704 LOP(OP_GNBYADDR,XTERM);
4705
4706 case KEY_getnetent:
4707 FUN0(OP_GNETENT);
4708
4709 case KEY_getservbyname:
4710 LOP(OP_GSBYNAME,XTERM);
4711
4712 case KEY_getservbyport:
4713 LOP(OP_GSBYPORT,XTERM);
4714
4715 case KEY_getservent:
4716 FUN0(OP_GSERVENT);
4717
4718 case KEY_getsockname:
4719 UNI(OP_GETSOCKNAME);
4720
4721 case KEY_getsockopt:
4722 LOP(OP_GSOCKOPT,XTERM);
4723
4724 case KEY_getgrent:
4725 FUN0(OP_GGRENT);
4726
4727 case KEY_getgrnam:
4728 UNI(OP_GGRNAM);
4729
4730 case KEY_getgrgid:
4731 UNI(OP_GGRGID);
4732
4733 case KEY_getlogin:
4734 FUN0(OP_GETLOGIN);
4735
4736 case KEY_glob:
4737 set_csh();
4738 LOP(OP_GLOB,XTERM);
4739
4740 case KEY_hex:
4741 UNI(OP_HEX);
4742
4743 case KEY_if:
4744 yylval.ival = CopLINE(PL_curcop);
4745 OPERATOR(IF);
4746
4747 case KEY_index:
4748 LOP(OP_INDEX,XTERM);
4749
4750 case KEY_int:
4751 UNI(OP_INT);
4752
4753 case KEY_ioctl:
4754 LOP(OP_IOCTL,XTERM);
4755
4756 case KEY_join:
4757 LOP(OP_JOIN,XTERM);
4758
4759 case KEY_keys:
4760 UNI(OP_KEYS);
4761
4762 case KEY_kill:
4763 LOP(OP_KILL,XTERM);
4764
4765 case KEY_last:
4766 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4767 LOOPX(OP_LAST);
4768
4769 case KEY_lc:
4770 UNI(OP_LC);
4771
4772 case KEY_lcfirst:
4773 UNI(OP_LCFIRST);
4774
4775 case KEY_local:
4776 yylval.ival = 0;
4777 OPERATOR(LOCAL);
4778
4779 case KEY_length:
4780 UNI(OP_LENGTH);
4781
4782 case KEY_lt:
4783 Rop(OP_SLT);
4784
4785 case KEY_le:
4786 Rop(OP_SLE);
4787
4788 case KEY_localtime:
4789 UNI(OP_LOCALTIME);
4790
4791 case KEY_log:
4792 UNI(OP_LOG);
4793
4794 case KEY_link:
4795 LOP(OP_LINK,XTERM);
4796
4797 case KEY_listen:
4798 LOP(OP_LISTEN,XTERM);
4799
4800 case KEY_lock:
4801 UNI(OP_LOCK);
4802
4803 case KEY_lstat:
4804 UNI(OP_LSTAT);
4805
4806 case KEY_m:
4807 s = scan_pat(s,OP_MATCH);
4808 TERM(sublex_start());
4809
4810 case KEY_map:
4811 LOP(OP_MAPSTART, XREF);
4812
4813 case KEY_mkdir:
4814 LOP(OP_MKDIR,XTERM);
4815
4816 case KEY_msgctl:
4817 LOP(OP_MSGCTL,XTERM);
4818
4819 case KEY_msgget:
4820 LOP(OP_MSGGET,XTERM);
4821
4822 case KEY_msgrcv:
4823 LOP(OP_MSGRCV,XTERM);
4824
4825 case KEY_msgsnd:
4826 LOP(OP_MSGSND,XTERM);
4827
4828 case KEY_our:
4829 case KEY_my:
4830 PL_in_my = tmp;
4831 s = skipspace(s);
4832 if (isIDFIRST_lazy_if(s,UTF)) {
4833 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4834 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4835 goto really_sub;
4836 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4837 if (!PL_in_my_stash) {
4838 char tmpbuf[1024];
4839 PL_bufptr = s;
4840 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4841 yyerror(tmpbuf);
4842 }
4843 }
4844 yylval.ival = 1;
4845 OPERATOR(MY);
4846
4847 case KEY_next:
4848 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4849 LOOPX(OP_NEXT);
4850
4851 case KEY_ne:
4852 Eop(OP_SNE);
4853
4854 case KEY_no:
4855 if (PL_expect != XSTATE)
4856 yyerror("\"no\" not allowed in expression");
4857 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4858 s = force_version(s, FALSE);
4859 yylval.ival = 0;
4860 OPERATOR(USE);
4861
4862 case KEY_not:
4863 if (*s == '(' || (s = skipspace(s), *s == '('))
4864 FUN1(OP_NOT);
4865 else
4866 OPERATOR(NOTOP);
4867
4868 case KEY_open:
4869 s = skipspace(s);
4870 if (isIDFIRST_lazy_if(s,UTF)) {
4871 char *t;
4872 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4873 for (t=d; *t && isSPACE(*t); t++) ;
4874 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4875 /* [perl #16184] */
4876 && !(t[0] == '=' && t[1] == '>')
4877 ) {
4878 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4879 "Precedence problem: open %.*s should be open(%.*s)",
4880 d - s, s, d - s, s);
4881 }
4882 }
4883 LOP(OP_OPEN,XTERM);
4884
4885 case KEY_or:
4886 yylval.ival = OP_OR;
4887 OPERATOR(OROP);
4888
4889 case KEY_ord:
4890 UNI(OP_ORD);
4891
4892 case KEY_oct:
4893 UNI(OP_OCT);
4894
4895 case KEY_opendir:
4896 LOP(OP_OPEN_DIR,XTERM);
4897
4898 case KEY_print:
4899 checkcomma(s,PL_tokenbuf,"filehandle");
4900 LOP(OP_PRINT,XREF);
4901
4902 case KEY_printf:
4903 checkcomma(s,PL_tokenbuf,"filehandle");
4904 LOP(OP_PRTF,XREF);
4905
4906 case KEY_prototype:
4907 UNI(OP_PROTOTYPE);
4908
4909 case KEY_push:
4910 LOP(OP_PUSH,XTERM);
4911
4912 case KEY_pop:
4913 UNIDOR(OP_POP);
4914
4915 case KEY_pos:
4916 UNIDOR(OP_POS);
4917
4918 case KEY_pack:
4919 LOP(OP_PACK,XTERM);
4920
4921 case KEY_package:
4922 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4923 OPERATOR(PACKAGE);
4924
4925 case KEY_pipe:
4926 LOP(OP_PIPE_OP,XTERM);
4927
4928 case KEY_q:
4929 s = scan_str(s,FALSE,FALSE);
4930 if (!s)
4931 missingterm((char*)0);
4932 yylval.ival = OP_CONST;
4933 TERM(sublex_start());
4934
4935 case KEY_quotemeta:
4936 UNI(OP_QUOTEMETA);
4937
4938 case KEY_qw:
4939 s = scan_str(s,FALSE,FALSE);
4940 if (!s)
4941 missingterm((char*)0);
4942 force_next(')');
4943 if (SvCUR(PL_lex_stuff)) {
4944 OP *words = Nullop;
4945 int warned = 0;
4946 d = SvPV_force(PL_lex_stuff, len);
4947 while (len) {
4948 SV *sv;
4949 for (; isSPACE(*d) && len; --len, ++d) ;
4950 if (len) {
4951 char *b = d;
4952 if (!warned && ckWARN(WARN_QW)) {
4953 for (; !isSPACE(*d) && len; --len, ++d) {
4954 if (*d == ',') {
4955 Perl_warner(aTHX_ packWARN(WARN_QW),
4956 "Possible attempt to separate words with commas");
4957 ++warned;
4958 }
4959 else if (*d == '#') {
4960 Perl_warner(aTHX_ packWARN(WARN_QW),
4961 "Possible attempt to put comments in qw() list");
4962 ++warned;
4963 }
4964 }
4965 }
4966 else {
4967 for (; !isSPACE(*d) && len; --len, ++d) ;
4968 }
4969 sv = newSVpvn(b, d-b);
4970 if (DO_UTF8(PL_lex_stuff))
4971 SvUTF8_on(sv);
4972 words = append_elem(OP_LIST, words,
4973 newSVOP(OP_CONST, 0, tokeq(sv)));
4974 }
4975 }
4976 if (words) {
4977 PL_nextval[PL_nexttoke].opval = words;
4978 force_next(THING);
4979 }
4980 }
4981 if (PL_lex_stuff) {
4982 SvREFCNT_dec(PL_lex_stuff);
4983 PL_lex_stuff = Nullsv;
4984 }
4985 PL_expect = XTERM;
4986 TOKEN('(');
4987
4988 case KEY_qq:
4989 s = scan_str(s,FALSE,FALSE);
4990 if (!s)
4991 missingterm((char*)0);
4992 yylval.ival = OP_STRINGIFY;
4993 if (SvIVX(PL_lex_stuff) == '\'')
4994 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
4995 TERM(sublex_start());
4996
4997 case KEY_qr:
4998 s = scan_pat(s,OP_QR);
4999 TERM(sublex_start());
5000
5001 case KEY_qx:
5002 s = scan_str(s,FALSE,FALSE);
5003 if (!s)
5004 missingterm((char*)0);
5005 yylval.ival = OP_BACKTICK;
5006 set_csh();
5007 TERM(sublex_start());
5008
5009 case KEY_return:
5010 OLDLOP(OP_RETURN);
5011
5012 case KEY_require:
5013 s = skipspace(s);
5014 if (isDIGIT(*s)) {
5015 s = force_version(s, FALSE);
5016 }
5017 else if (*s != 'v' || !isDIGIT(s[1])
5018 || (s = force_version(s, TRUE), *s == 'v'))
5019 {
5020 *PL_tokenbuf = '\0';
5021 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5022 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5023 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5024 else if (*s == '<')
5025 yyerror("<> should be quotes");
5026 }
5027 UNI(OP_REQUIRE);
5028
5029 case KEY_reset:
5030 UNI(OP_RESET);
5031
5032 case KEY_redo:
5033 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5034 LOOPX(OP_REDO);
5035
5036 case KEY_rename:
5037 LOP(OP_RENAME,XTERM);
5038
5039 case KEY_rand:
5040 UNI(OP_RAND);
5041
5042 case KEY_rmdir:
5043 UNI(OP_RMDIR);
5044
5045 case KEY_rindex:
5046 LOP(OP_RINDEX,XTERM);
5047
5048 case KEY_read:
5049 LOP(OP_READ,XTERM);
5050
5051 case KEY_readdir:
5052 UNI(OP_READDIR);
5053
5054 case KEY_readline:
5055 set_csh();
5056 UNIDOR(OP_READLINE);
5057
5058 case KEY_readpipe:
5059 set_csh();
5060 UNI(OP_BACKTICK);
5061
5062 case KEY_rewinddir:
5063 UNI(OP_REWINDDIR);
5064
5065 case KEY_recv:
5066 LOP(OP_RECV,XTERM);
5067
5068 case KEY_reverse:
5069 LOP(OP_REVERSE,XTERM);
5070
5071 case KEY_readlink:
5072 UNIDOR(OP_READLINK);
5073
5074 case KEY_ref:
5075 UNI(OP_REF);
5076
5077 case KEY_s:
5078 s = scan_subst(s);
5079 if (yylval.opval)
5080 TERM(sublex_start());
5081 else
5082 TOKEN(1); /* force error */
5083
5084 case KEY_chomp:
5085 UNI(OP_CHOMP);
5086
5087 case KEY_scalar:
5088 UNI(OP_SCALAR);
5089
5090 case KEY_select:
5091 LOP(OP_SELECT,XTERM);
5092
5093 case KEY_seek:
5094 LOP(OP_SEEK,XTERM);
5095
5096 case KEY_semctl:
5097 LOP(OP_SEMCTL,XTERM);
5098
5099 case KEY_semget:
5100 LOP(OP_SEMGET,XTERM);
5101
5102 case KEY_semop:
5103 LOP(OP_SEMOP,XTERM);
5104
5105 case KEY_send:
5106 LOP(OP_SEND,XTERM);
5107
5108 case KEY_setpgrp:
5109 LOP(OP_SETPGRP,XTERM);
5110
5111 case KEY_setpriority:
5112 LOP(OP_SETPRIORITY,XTERM);
5113
5114 case KEY_sethostent:
5115 UNI(OP_SHOSTENT);
5116
5117 case KEY_setnetent:
5118 UNI(OP_SNETENT);
5119
5120 case KEY_setservent:
5121 UNI(OP_SSERVENT);
5122
5123 case KEY_setprotoent:
5124 UNI(OP_SPROTOENT);
5125
5126 case KEY_setpwent:
5127 FUN0(OP_SPWENT);
5128
5129 case KEY_setgrent:
5130 FUN0(OP_SGRENT);
5131
5132 case KEY_seekdir:
5133 LOP(OP_SEEKDIR,XTERM);
5134
5135 case KEY_setsockopt:
5136 LOP(OP_SSOCKOPT,XTERM);
5137
5138 case KEY_shift:
5139 UNIDOR(OP_SHIFT);
5140
5141 case KEY_shmctl:
5142 LOP(OP_SHMCTL,XTERM);
5143
5144 case KEY_shmget:
5145 LOP(OP_SHMGET,XTERM);
5146
5147 case KEY_shmread:
5148 LOP(OP_SHMREAD,XTERM);
5149
5150 case KEY_shmwrite:
5151 LOP(OP_SHMWRITE,XTERM);
5152
5153 case KEY_shutdown:
5154 LOP(OP_SHUTDOWN,XTERM);
5155
5156 case KEY_sin:
5157 UNI(OP_SIN);
5158
5159 case KEY_sleep:
5160 UNI(OP_SLEEP);
5161
5162 case KEY_socket:
5163 LOP(OP_SOCKET,XTERM);
5164
5165 case KEY_socketpair:
5166 LOP(OP_SOCKPAIR,XTERM);
5167
5168 case KEY_sort:
5169 checkcomma(s,PL_tokenbuf,"subroutine name");
5170 s = skipspace(s);
5171 if (*s == ';' || *s == ')') /* probably a close */
5172 Perl_croak(aTHX_ "sort is now a reserved word");
5173 PL_expect = XTERM;
5174 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5175 LOP(OP_SORT,XREF);
5176
5177 case KEY_split:
5178 LOP(OP_SPLIT,XTERM);
5179
5180 case KEY_sprintf:
5181 LOP(OP_SPRINTF,XTERM);
5182
5183 case KEY_splice:
5184 LOP(OP_SPLICE,XTERM);
5185
5186 case KEY_sqrt:
5187 UNI(OP_SQRT);
5188
5189 case KEY_srand:
5190 UNI(OP_SRAND);
5191
5192 case KEY_stat:
5193 UNI(OP_STAT);
5194
5195 case KEY_study:
5196 UNI(OP_STUDY);
5197
5198 case KEY_substr:
5199 LOP(OP_SUBSTR,XTERM);
5200
5201 case KEY_format:
5202 case KEY_sub:
5203 really_sub:
5204 {
5205 char tmpbuf[sizeof PL_tokenbuf];
5206 SSize_t tboffset = 0;
5207 expectation attrful;
5208 bool have_name, have_proto, bad_proto;
5209 int key = tmp;
5210
5211 s = skipspace(s);
5212
5213 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5214 (*s == ':' && s[1] == ':'))
5215 {
5216 PL_expect = XBLOCK;
5217 attrful = XATTRBLOCK;
5218 /* remember buffer pos'n for later force_word */
5219 tboffset = s - PL_oldbufptr;
5220 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5221 if (strchr(tmpbuf, ':'))
5222 sv_setpv(PL_subname, tmpbuf);
5223 else {
5224 sv_setsv(PL_subname,PL_curstname);
5225 sv_catpvn(PL_subname,"::",2);
5226 sv_catpvn(PL_subname,tmpbuf,len);
5227 }
5228 s = skipspace(d);
5229 have_name = TRUE;
5230 }
5231 else {
5232 if (key == KEY_my)
5233 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5234 PL_expect = XTERMBLOCK;
5235 attrful = XATTRTERM;
5236 sv_setpv(PL_subname,"?");
5237 have_name = FALSE;
5238 }
5239
5240 if (key == KEY_format) {
5241 if (*s == '=')
5242 PL_lex_formbrack = PL_lex_brackets + 1;
5243 if (have_name)
5244 (void) force_word(PL_oldbufptr + tboffset, WORD,
5245 FALSE, TRUE, TRUE);
5246 OPERATOR(FORMAT);
5247 }
5248
5249 /* Look for a prototype */
5250 if (*s == '(') {
5251 char *p;
5252
5253 s = scan_str(s,FALSE,FALSE);
5254 if (!s)
5255 Perl_croak(aTHX_ "Prototype not terminated");
5256 /* strip spaces and check for bad characters */
5257 d = SvPVX(PL_lex_stuff);
5258 tmp = 0;
5259 bad_proto = FALSE;
5260 for (p = d; *p; ++p) {
5261 if (!isSPACE(*p)) {
5262 d[tmp++] = *p;
5263 if (!strchr("$@%*;[]&\\", *p))
5264 bad_proto = TRUE;
5265 }
5266 }
5267 d[tmp] = '\0';
5268 if (bad_proto && ckWARN(WARN_SYNTAX))
5269 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5270 "Illegal character in prototype for %"SVf" : %s",
5271 PL_subname, d);
5272 SvCUR(PL_lex_stuff) = tmp;
5273 have_proto = TRUE;
5274
5275 s = skipspace(s);
5276 }
5277 else
5278 have_proto = FALSE;
5279
5280 if (*s == ':' && s[1] != ':')
5281 PL_expect = attrful;
5282 else if (*s != '{' && key == KEY_sub) {
5283 if (!have_name)
5284 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5285 else if (*s != ';')
5286 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5287 }
5288
5289 if (have_proto) {
5290 PL_nextval[PL_nexttoke].opval =
5291 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5292 PL_lex_stuff = Nullsv;
5293 force_next(THING);
5294 }
5295 if (!have_name) {
5296 sv_setpv(PL_subname,
5297 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5298 TOKEN(ANONSUB);
5299 }
5300 (void) force_word(PL_oldbufptr + tboffset, WORD,
5301 FALSE, TRUE, TRUE);
5302 if (key == KEY_my)
5303 TOKEN(MYSUB);
5304 TOKEN(SUB);
5305 }
5306
5307 case KEY_system:
5308 set_csh();
5309 LOP(OP_SYSTEM,XREF);
5310
5311 case KEY_symlink:
5312 LOP(OP_SYMLINK,XTERM);
5313
5314 case KEY_syscall:
5315 LOP(OP_SYSCALL,XTERM);
5316
5317 case KEY_sysopen:
5318 LOP(OP_SYSOPEN,XTERM);
5319
5320 case KEY_sysseek:
5321 LOP(OP_SYSSEEK,XTERM);
5322
5323 case KEY_sysread:
5324 LOP(OP_SYSREAD,XTERM);
5325
5326 case KEY_syswrite:
5327 LOP(OP_SYSWRITE,XTERM);
5328
5329 case KEY_tr:
5330 s = scan_trans(s);
5331 TERM(sublex_start());
5332
5333 case KEY_tell:
5334 UNI(OP_TELL);
5335
5336 case KEY_telldir:
5337 UNI(OP_TELLDIR);
5338
5339 case KEY_tie:
5340 LOP(OP_TIE,XTERM);
5341
5342 case KEY_tied:
5343 UNI(OP_TIED);
5344
5345 case KEY_time:
5346 FUN0(OP_TIME);
5347
5348 case KEY_times:
5349 FUN0(OP_TMS);
5350
5351 case KEY_truncate:
5352 LOP(OP_TRUNCATE,XTERM);
5353
5354 case KEY_uc:
5355 UNI(OP_UC);
5356
5357 case KEY_ucfirst:
5358 UNI(OP_UCFIRST);
5359
5360 case KEY_untie:
5361 UNI(OP_UNTIE);
5362
5363 case KEY_until:
5364 yylval.ival = CopLINE(PL_curcop);
5365 OPERATOR(UNTIL);
5366
5367 case KEY_unless:
5368 yylval.ival = CopLINE(PL_curcop);
5369 OPERATOR(UNLESS);
5370
5371 case KEY_unlink:
5372 LOP(OP_UNLINK,XTERM);
5373
5374 case KEY_undef:
5375 UNIDOR(OP_UNDEF);
5376
5377 case KEY_unpack:
5378 LOP(OP_UNPACK,XTERM);
5379
5380 case KEY_utime:
5381 LOP(OP_UTIME,XTERM);
5382
5383 case KEY_umask:
5384 UNIDOR(OP_UMASK);
5385
5386 case KEY_unshift:
5387 LOP(OP_UNSHIFT,XTERM);
5388
5389 case KEY_use:
5390 if (PL_expect != XSTATE)
5391 yyerror("\"use\" not allowed in expression");
5392 s = skipspace(s);
5393 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5394 s = force_version(s, TRUE);
5395 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5396 PL_nextval[PL_nexttoke].opval = Nullop;
5397 force_next(WORD);
5398 }
5399 else if (*s == 'v') {
5400 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5401 s = force_version(s, FALSE);
5402 }
5403 }
5404 else {
5405 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5406 s = force_version(s, FALSE);
5407 }
5408 yylval.ival = 1;
5409 OPERATOR(USE);
5410
5411 case KEY_values:
5412 UNI(OP_VALUES);
5413
5414 case KEY_vec:
5415 LOP(OP_VEC,XTERM);
5416
5417 case KEY_while:
5418 yylval.ival = CopLINE(PL_curcop);
5419 OPERATOR(WHILE);
5420
5421 case KEY_warn:
5422 PL_hints |= HINT_BLOCK_SCOPE;
5423 LOP(OP_WARN,XTERM);
5424
5425 case KEY_wait:
5426 FUN0(OP_WAIT);
5427
5428 case KEY_waitpid:
5429 LOP(OP_WAITPID,XTERM);
5430
5431 case KEY_wantarray:
5432 FUN0(OP_WANTARRAY);
5433
5434 case KEY_write:
5435#ifdef EBCDIC
5436 {
5437 char ctl_l[2];
5438 ctl_l[0] = toCTRL('L');
5439 ctl_l[1] = '\0';
5440 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5441 }
5442#else
5443 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5444#endif
5445 UNI(OP_ENTERWRITE);
5446
5447 case KEY_x:
5448 if (PL_expect == XOPERATOR)
5449 Mop(OP_REPEAT);
5450 check_uni();
5451 goto just_a_word;
5452
5453 case KEY_xor:
5454 yylval.ival = OP_XOR;
5455 OPERATOR(OROP);
5456
5457 case KEY_y:
5458 s = scan_trans(s);
5459 TERM(sublex_start());
5460 }
5461 }}
5462}
5463#ifdef __SC__
5464#pragma segment Main
5465#endif
5466
5467static int
5468S_pending_ident(pTHX)
5469{
5470 register char *d;
5471 register I32 tmp = 0;
5472 /* pit holds the identifier we read and pending_ident is reset */
5473 char pit = PL_pending_ident;
5474 PL_pending_ident = 0;
5475
5476 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5477 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5478
5479 /* if we're in a my(), we can't allow dynamics here.
5480 $foo'bar has already been turned into $foo::bar, so
5481 just check for colons.
5482
5483 if it's a legal name, the OP is a PADANY.
5484 */
5485 if (PL_in_my) {
5486 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5487 if (strchr(PL_tokenbuf,':'))
5488 yyerror(Perl_form(aTHX_ "No package name allowed for "
5489 "variable %s in \"our\"",
5490 PL_tokenbuf));
5491 tmp = allocmy(PL_tokenbuf);
5492 }
5493 else {
5494 if (strchr(PL_tokenbuf,':'))
5495 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5496
5497 yylval.opval = newOP(OP_PADANY, 0);
5498 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5499 return PRIVATEREF;
5500 }
5501 }
5502
5503 /*
5504 build the ops for accesses to a my() variable.
5505
5506 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5507 then used in a comparison. This catches most, but not
5508 all cases. For instance, it catches
5509 sort { my($a); $a <=> $b }
5510 but not
5511 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5512 (although why you'd do that is anyone's guess).
5513 */
5514
5515 if (!strchr(PL_tokenbuf,':')) {
5516 if (!PL_in_my)
5517 tmp = pad_findmy(PL_tokenbuf);
5518 if (tmp != NOT_IN_PAD) {
5519 /* might be an "our" variable" */
5520 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5521 /* build ops for a bareword */
5522 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
5523 sv_catpvn(sym, "::", 2);
5524 sv_catpv(sym, PL_tokenbuf+1);
5525 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5526 yylval.opval->op_private = OPpCONST_ENTERED;
5527 gv_fetchsv(sym,
5528 (PL_in_eval
5529 ? (GV_ADDMULTI | GV_ADDINEVAL)
5530 : GV_ADDMULTI
5531 ),
5532 ((PL_tokenbuf[0] == '$') ? SVt_PV
5533 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5534 : SVt_PVHV));
5535 return WORD;
5536 }
5537
5538 /* if it's a sort block and they're naming $a or $b */
5539 if (PL_last_lop_op == OP_SORT &&
5540 PL_tokenbuf[0] == '$' &&
5541 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5542 && !PL_tokenbuf[2])
5543 {
5544 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5545 d < PL_bufend && *d != '\n';
5546 d++)
5547 {
5548 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5549 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5550 PL_tokenbuf);
5551 }
5552 }
5553 }
5554
5555 yylval.opval = newOP(OP_PADANY, 0);
5556 yylval.opval->op_targ = tmp;
5557 return PRIVATEREF;
5558 }
5559 }
5560
5561 /*
5562 Whine if they've said @foo in a doublequoted string,
5563 and @foo isn't a variable we can find in the symbol
5564 table.
5565 */
5566 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5567 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5568 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5569 && ckWARN(WARN_AMBIGUOUS))
5570 {
5571 /* Downgraded from fatal to warning 20000522 mjd */
5572 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5573 "Possible unintended interpolation of %s in string",
5574 PL_tokenbuf);
5575 }
5576 }
5577
5578 /* build ops for a bareword */
5579 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5580 yylval.opval->op_private = OPpCONST_ENTERED;
5581 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5582 ((PL_tokenbuf[0] == '$') ? SVt_PV
5583 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5584 : SVt_PVHV));
5585 return WORD;
5586}
5587
5588/* Weights are the number of occurrences of that keyword in about 135M of
5589 input to Perl_keyword from a lot of real perl. This routine is about 20%
5590 faster than the routine it replaces. */
5591
5592I32
5593Perl_keyword (pTHX_ char *name, I32 len) {
5594 /* Initially switch on the length of the name. */
5595 switch (len) {
5596 case 1:
5597 /* Names all of length 1. */
5598 /* m q s x y */
5599 /* Offset 0 gives the best switch position. */
5600 switch (name[0]) {
5601 case 'm':
5602 {
5603 return KEY_m; /* Weight 148776 */
5604 }
5605 break;
5606 case 'q':
5607 {
5608 return KEY_q; /* Weight 69076 */
5609 }
5610 break;
5611 case 's':
5612 {
5613 return KEY_s; /* Weight 403691 */
5614 }
5615 break;
5616 case 'x':
5617 {
5618 return -KEY_x; /* Weight 38549 */
5619 }
5620 break;
5621 case 'y':
5622 {
5623 return KEY_y; /* Weight 567 */
5624 }
5625 break;
5626 }
5627 break;
5628 case 2:
5629 /* Names all of length 2. */
5630 /* do eq ge gt if lc le lt my ne no or qq qr qw qx tr uc */
5631 /* Offset 0 gives the best switch position. */
5632 switch (name[0]) {
5633 case 'd':
5634 if (name[1] == 'o') {
5635 return KEY_do; /* Weight 96004 */
5636 }
5637 break;
5638 case 'e':
5639 if (name[1] == 'q') {
5640 return -KEY_eq; /* Weight 797065 */
5641 }
5642 break;
5643 case 'g':
5644 if (name[1] == 'e') {
5645 return -KEY_ge; /* Weight 5666 */
5646 }
5647 if (name[1] == 't') {
5648 return -KEY_gt; /* Weight 897 */
5649 }
5650 break;
5651 case 'i':
5652 if (name[1] == 'f') {
5653 return KEY_if; /* Weight 2482605 */
5654 }
5655 break;
5656 case 'l':
5657 if (name[1] == 'c') {
5658 return -KEY_lc; /* Weight 38487 */
5659 }
5660 if (name[1] == 'e') {
5661 return -KEY_le; /* Weight 4052 */
5662 }
5663 if (name[1] == 't') {
5664 return -KEY_lt; /* Weight 335 */
5665 }
5666 break;
5667 case 'm':
5668 if (name[1] == 'y') {
5669 return KEY_my; /* Weight 3785925 */
5670 }
5671 break;
5672 case 'n':
5673 if (name[1] == 'e') {
5674 return -KEY_ne; /* Weight 112906 */
5675 }
5676 if (name[1] == 'o') {
5677 return KEY_no; /* Weight 61989 */
5678 }
5679 break;
5680 case 'o':
5681 if (name[1] == 'r') {
5682 return -KEY_or; /* Weight 405163 */
5683 }
5684 break;
5685 case 'q':
5686 if (name[1] == 'w') {
5687 return KEY_qw; /* Weight 415641 */
5688 }
5689 if (name[1] == 'q') {
5690 return KEY_qq; /* Weight 55149 */
5691 }
5692 if (name[1] == 'r') {
5693 return KEY_qr; /* Weight 28519 */
5694 }
5695 if (name[1] == 'x') {
5696 return KEY_qx; /* Weight 177 */
5697 }
5698 break;
5699 case 't':
5700 if (name[1] == 'r') {
5701 return KEY_tr; /* Weight 22665 */
5702 }
5703 break;
5704 case 'u':
5705 if (name[1] == 'c') {
5706 return -KEY_uc; /* Weight 16961 */
5707 }
5708 break;
5709 }
5710 break;
5711 case 3:
5712 /* Names all of length 3. */
5713 /* END abs and chr cmp cos die eof err exp for hex int log map not oct ord
5714 our pop pos ref sin sub tie use vec xor */
5715 /* Offset 0 gives the best switch position. */
5716 switch (*name++) {
5717 case 'E':
5718 if (name[0] == 'N' && name[1] == 'D') {
5719 return KEY_END; /* Weight 3565 */
5720 }
5721 break;
5722 case 'a':
5723 if (name[0] == 'n' && name[1] == 'd') {
5724 return -KEY_and; /* Weight 284867 */
5725 }
5726 if (name[0] == 'b' && name[1] == 's') {
5727 return -KEY_abs; /* Weight 7767 */
5728 }
5729 break;
5730 case 'c':
5731 if (name[0] == 'h' && name[1] == 'r') {
5732 return -KEY_chr; /* Weight 35654 */
5733 }
5734 if (name[0] == 'm' && name[1] == 'p') {
5735 return -KEY_cmp; /* Weight 6808 */
5736 }
5737 if (name[0] == 'o' && name[1] == 's') {
5738 return -KEY_cos; /* Weight 447 */
5739 }
5740 break;
5741 case 'd':
5742 if (name[0] == 'i' && name[1] == 'e') {
5743 return -KEY_die; /* Weight 192203 */
5744 }
5745 break;
5746 case 'e':
5747 if (name[0] == 'o' && name[1] == 'f') {
5748 return -KEY_eof; /* Weight 1618 */
5749 }
5750 if (name[0] == 'r' && name[1] == 'r') {
5751 return -KEY_err; /* Weight 522 */
5752 }
5753 if (name[0] == 'x' && name[1] == 'p') {
5754 return -KEY_exp; /* Weight 423 */
5755 }
5756 break;
5757 case 'f':
5758 if (name[0] == 'o' && name[1] == 'r') {
5759 return KEY_for; /* Weight 118158 */
5760 }
5761 break;
5762 case 'h':
5763 if (name[0] == 'e' && name[1] == 'x') {
5764 return -KEY_hex; /* Weight 3629 */
5765 }
5766 break;
5767 case 'i':
5768 if (name[0] == 'n' && name[1] == 't') {
5769 return -KEY_int; /* Weight 18549 */
5770 }
5771 break;
5772 case 'l':
5773 if (name[0] == 'o' && name[1] == 'g') {
5774 return -KEY_log;
5775 }
5776 break;
5777 case 'm':
5778 if (name[0] == 'a' && name[1] == 'p') {
5779 return KEY_map; /* Weight 115207 */
5780 }
5781 break;
5782 case 'n':
5783 if (name[0] == 'o' && name[1] == 't') {
5784 return -KEY_not; /* Weight 55868 */
5785 }
5786 break;
5787 case 'o':
5788 if (name[0] == 'u' && name[1] == 'r') {
5789 return KEY_our; /* Weight 194417 */
5790 }
5791 if (name[0] == 'r' && name[1] == 'd') {
5792 return -KEY_ord; /* Weight 22221 */
5793 }
5794 if (name[0] == 'c' && name[1] == 't') {
5795 return -KEY_oct; /* Weight 4195 */
5796 }
5797 break;
5798 case 'p':
5799 if (name[0] == 'o' && name[1] == 'p') {
5800 return -KEY_pop; /* Weight 46933 */
5801 }
5802 if (name[0] == 'o' && name[1] == 's') {
5803 return KEY_pos; /* Weight 5503 */
5804 }
5805 break;
5806 case 'r':
5807 if (name[0] == 'e' && name[1] == 'f') {
5808 return -KEY_ref; /* Weight 347102 */
5809 }
5810 break;
5811 case 's':
5812 if (name[0] == 'u' && name[1] == 'b') {
5813 return KEY_sub; /* Weight 2053554 */
5814 }
5815 if (name[0] == 'i' && name[1] == 'n') {
5816 return -KEY_sin; /* Weight 499 */
5817 }
5818 break;
5819 case 't':
5820 if (name[0] == 'i' && name[1] == 'e') {
5821 return KEY_tie; /* Weight 10131 */
5822 }
5823 break;
5824 case 'u':
5825 if (name[0] == 's' && name[1] == 'e') {
5826 return KEY_use; /* Weight 686081 */
5827 }
5828 break;
5829 case 'v':
5830 if (name[0] == 'e' && name[1] == 'c') {
5831 return -KEY_vec; /* Weight 110566 */
5832 }
5833 break;
5834 case 'x':
5835 if (name[0] == 'o' && name[1] == 'r') {
5836 return -KEY_xor; /* Weight 619 */
5837 }
5838 break;
5839 }
5840 break;
5841 case 4:
5842 /* Names all of length 4. */
5843 /* CORE INIT bind chop dump each else eval exec exit fork getc glob goto
5844 grep join keys kill last link lock next open pack pipe push rand read
5845 recv redo seek send sort sqrt stat tell tied time wait warn */
5846 /* Offset 0 gives the best switch position. */
5847 switch (*name++) {
5848 case 'C':
5849 if (!memcmp(name, "ORE", 3)) {
5850 /* C */
5851 return -KEY_CORE; /* Weight 47391 */
5852 }
5853 break;
5854 case 'I':
5855 if (!memcmp(name, "NIT", 3)) {
5856 /* I */
5857 return KEY_INIT; /* Weight 418 */
5858 }
5859 break;
5860 case 'b':
5861 if (!memcmp(name, "ind", 3)) {
5862 /* b */
5863 return -KEY_bind; /* Weight 290 */
5864 }
5865 break;
5866 case 'c':
5867 if (!memcmp(name, "hop", 3)) {
5868 /* c */
5869 return -KEY_chop; /* Weight 10172 */
5870 }
5871 break;
5872 case 'd':
5873 if (!memcmp(name, "ump", 3)) {
5874 /* d */
5875 return -KEY_dump; /* Weight 274 */
5876 }
5877 break;
5878 case 'e':
5879 if (!memcmp(name, "lse", 3)) {
5880 /* e */
5881 return KEY_else; /* Weight 527806 */
5882 }
5883 if (!memcmp(name, "val", 3)) {
5884 /* e */
5885 return KEY_eval; /* Weight 136977 */
5886 }
5887 if (!memcmp(name, "ach", 3)) {
5888 /* e */
5889 return -KEY_each; /* Weight 18414 */
5890 }
5891 if (!memcmp(name, "xit", 3)) {
5892 /* e */
5893 return -KEY_exit; /* Weight 8262 */
5894 }
5895 if (!memcmp(name, "xec", 3)) {
5896 /* e */
5897 return -KEY_exec; /* Weight 429 */
5898 }
5899 break;
5900 case 'f':
5901 if (!memcmp(name, "ork", 3)) {
5902 /* f */
5903 return -KEY_fork; /* Weight 327 */
5904 }
5905 break;
5906 case 'g':
5907 if (!memcmp(name, "oto", 3)) {
5908 /* g */
5909 return KEY_goto; /* Weight 109258 */
5910 }
5911 if (!memcmp(name, "rep", 3)) {
5912 /* g */
5913 return KEY_grep; /* Weight 75912 */
5914 }
5915 if (!memcmp(name, "lob", 3)) {
5916 /* g */
5917 return KEY_glob; /* Weight 2172 */
5918 }
5919 if (!memcmp(name, "etc", 3)) {
5920 /* g */
5921 return -KEY_getc; /* Weight 981 */
5922 }
5923 break;
5924 case 'j':
5925 if (!memcmp(name, "oin", 3)) {
5926 /* j */
5927 return -KEY_join; /* Weight 130820 */
5928 }
5929 break;
5930 case 'k':
5931 if (!memcmp(name, "eys", 3)) {
5932 /* k */
5933 return -KEY_keys; /* Weight 131427 */
5934 }
5935 if (!memcmp(name, "ill", 3)) {
5936 /* k */
5937 return -KEY_kill; /* Weight 382 */
5938 }
5939 break;
5940 case 'l':
5941 if (!memcmp(name, "ast", 3)) {
5942 /* l */
5943 return KEY_last; /* Weight 95078 */
5944 }
5945 if (!memcmp(name, "ock", 3)) {
5946 /* l */
5947 return -KEY_lock; /* Weight 4210 */
5948 }
5949 if (!memcmp(name, "ink", 3)) {
5950 /* l */
5951 return -KEY_link; /* Weight 425 */
5952 }
5953 break;
5954 case 'n':
5955 if (!memcmp(name, "ext", 3)) {
5956 /* n */
5957 return KEY_next; /* Weight 153355 */
5958 }
5959 break;
5960 case 'o':
5961 if (!memcmp(name, "pen", 3)) {
5962 /* o */
5963 return -KEY_open; /* Weight 39060 */
5964 }
5965 break;
5966 case 'p':
5967 if (!memcmp(name, "ush", 3)) {
5968 /* p */
5969 return -KEY_push; /* Weight 256975 */
5970 }
5971 if (!memcmp(name, "ack", 3)) {
5972 /* p */
5973 return -KEY_pack; /* Weight 14491 */
5974 }
5975 if (!memcmp(name, "ipe", 3)) {
5976 /* p */
5977 return -KEY_pipe; /* Weight 344 */
5978 }
5979 break;
5980 case 'r':
5981 if (!memcmp(name, "ead", 3)) {
5982 /* r */
5983 return -KEY_read; /* Weight 9434 */
5984 }
5985 if (!memcmp(name, "edo", 3)) {
5986 /* r */
5987 return KEY_redo; /* Weight 5219 */
5988 }
5989 if (!memcmp(name, "and", 3)) {
5990 /* r */
5991 return -KEY_rand; /* Weight 1824 */
5992 }
5993 if (!memcmp(name, "ecv", 3)) {
5994 /* r */
5995 return -KEY_recv; /* Weight 250 */
5996 }
5997 break;
5998 case 's':
5999 if (!memcmp(name, "tat", 3)) {
6000 /* s */
6001 return -KEY_stat; /* Weight 36702 */
6002 }
6003 if (!memcmp(name, "ort", 3)) {
6004 /* s */
6005 return KEY_sort; /* Weight 36394 */
6006 }
6007 if (!memcmp(name, "eek", 3)) {
6008 /* s */
6009 return -KEY_seek; /* Weight 2174 */
6010 }
6011 if (!memcmp(name, "qrt", 3)) {
6012 /* s */
6013 return -KEY_sqrt; /* Weight 766 */
6014 }
6015 if (!memcmp(name, "end", 3)) {
6016 /* s */
6017 return -KEY_send; /* Weight 496 */
6018 }
6019 break;
6020 case 't':
6021 if (!memcmp(name, "ime", 3)) {
6022 /* t */
6023 return -KEY_time; /* Weight 32168 */
6024 }
6025 if (!memcmp(name, "ied", 3)) {
6026 /* t */
6027 return KEY_tied; /* Weight 9749 */
6028 }
6029 if (!memcmp(name, "ell", 3)) {
6030 /* t */
6031 return -KEY_tell; /* Weight 2578 */
6032 }
6033 break;
6034 case 'w':
6035 if (!memcmp(name, "arn", 3)) {
6036 /* w */
6037 return -KEY_warn; /* Weight 91372 */
6038 }
6039 if (!memcmp(name, "ait", 3)) {
6040 /* w */
6041 return -KEY_wait;
6042 }
6043 break;
6044 }
6045 break;
6046 case 5:
6047 /* Names all of length 5. */
6048 /* BEGIN CHECK alarm atan2 bless chdir chmod chomp chown close crypt elsif
6049 fcntl flock index ioctl local lstat mkdir print reset rmdir semop shift
6050 sleep split srand study times umask undef untie until utime while write
6051 */
6052 /* Offset 3 gives the best switch position. */
6053 switch (name[3]) {
6054 case 'C':
6055 if (!memcmp(name, "CHECK", 5)) {
6056 /* ^ */
6057 return KEY_CHECK; /* Weight 538 */
6058 }
6059 break;
6060 case 'I':
6061 if (!memcmp(name, "BEGIN", 5)) {
6062 /* ^ */
6063 return KEY_BEGIN; /* Weight 24125 */
6064 }
6065 break;
6066 case 'a':
6067 if (!memcmp(name, "local", 5)) {
6068 /* ^ */
6069 return KEY_local; /* Weight 262973 */
6070 }
6071 if (!memcmp(name, "lstat", 5)) {
6072 /* ^ */
6073 return -KEY_lstat; /* Weight 13859 */
6074 }
6075 break;
6076 case 'c':
6077 if (!memcmp(name, "flock", 5)) {
6078 /* ^ */
6079 return -KEY_flock; /* Weight 260 */
6080 }
6081 break;
6082 case 'd':
6083 if (!memcmp(name, "study", 5)) {
6084 /* ^ */
6085 return KEY_study; /* Weight 1933 */
6086 }
6087 break;
6088 case 'e':
6089 if (!memcmp(name, "undef", 5)) {
6090 /* ^ */
6091 return KEY_undef; /* Weight 311156 */
6092 }
6093 if (!memcmp(name, "index", 5)) {
6094 /* ^ */
6095 return -KEY_index; /* Weight 51465 */
6096 }
6097 if (!memcmp(name, "sleep", 5)) {
6098 /* ^ */
6099 return -KEY_sleep; /* Weight 519 */
6100 }
6101 if (!memcmp(name, "times", 5)) {
6102 /* ^ */
6103 return -KEY_times; /* Weight 310 */
6104 }
6105 if (!memcmp(name, "reset", 5)) {
6106 /* ^ */
6107 return -KEY_reset; /* Weight 127 */
6108 }
6109 break;
6110 case 'f':
6111 if (!memcmp(name, "shift", 5)) {
6112 /* ^ */
6113 return -KEY_shift; /* Weight 904125 */
6114 }
6115 break;
6116 case 'i':
6117 if (!memcmp(name, "elsif", 5)) {
6118 /* ^ */
6119 return KEY_elsif; /* Weight 322365 */
6120 }
6121 if (!memcmp(name, "split", 5)) {
6122 /* ^ */
6123 return KEY_split; /* Weight 93678 */
6124 }
6125 if (!memcmp(name, "chdir", 5)) {
6126 /* ^ */
6127 return -KEY_chdir; /* Weight 20317 */
6128 }
6129 if (!memcmp(name, "mkdir", 5)) {
6130 /* ^ */
6131 return -KEY_mkdir; /* Weight 2951 */
6132 }
6133 if (!memcmp(name, "rmdir", 5)) {
6134 /* ^ */
6135 return -KEY_rmdir; /* Weight 2493 */
6136 }
6137 if (!memcmp(name, "until", 5)) {
6138 /* ^ */
6139 return KEY_until; /* Weight 818 */
6140 }
6141 if (!memcmp(name, "untie", 5)) {
6142 /* ^ */
6143 return KEY_untie; /* Weight 420 */
6144 }
6145 break;
6146 case 'l':
6147 if (!memcmp(name, "while", 5)) {
6148 /* ^ */
6149 return KEY_while; /* Weight 120305 */
6150 }
6151 break;
6152 case 'm':
6153 if (!memcmp(name, "chomp", 5)) {
6154 /* ^ */
6155 return -KEY_chomp; /* Weight 22337 */
6156 }
6157 if (!memcmp(name, "utime", 5)) {
6158 /* ^ */
6159 return -KEY_utime; /* Weight 3849 */
6160 }
6161 break;
6162 case 'n':
6163 if (!memcmp(name, "print", 5)) {
6164 /* ^ */
6165 return KEY_print; /* Weight 220904 */
6166 }
6167 if (!memcmp(name, "atan2", 5)) {
6168 /* ^ */
6169 return -KEY_atan2; /* Weight 350 */
6170 }
6171 if (!memcmp(name, "srand", 5)) {
6172 /* ^ */
6173 return -KEY_srand; /* Weight 41 */
6174 }
6175 break;
6176 case 'o':
6177 if (!memcmp(name, "chmod", 5)) {
6178 /* ^ */
6179 return -KEY_chmod; /* Weight 18455 */
6180 }
6181 if (!memcmp(name, "semop", 5)) {
6182 /* ^ */
6183 return -KEY_semop;
6184 }
6185 break;
6186 case 'p':
6187 if (!memcmp(name, "crypt", 5)) {
6188 /* ^ */
6189 return -KEY_crypt; /* Weight 8 */
6190 }
6191 break;
6192 case 'r':
6193 if (!memcmp(name, "alarm", 5)) {
6194 /* ^ */
6195 return -KEY_alarm;
6196 }
6197 break;
6198 case 's':
6199 if (!memcmp(name, "bless", 5)) {
6200 /* ^ */
6201 return -KEY_bless; /* Weight 62111 */
6202 }
6203 if (!memcmp(name, "close", 5)) {
6204 /* ^ */
6205 return -KEY_close; /* Weight 44077 */
6206 }
6207 if (!memcmp(name, "umask", 5)) {
6208 /* ^ */
6209 return -KEY_umask; /* Weight 1658 */
6210 }
6211 break;
6212 case 't':
6213 if (!memcmp(name, "write", 5)) {
6214 /* ^ */
6215 return -KEY_write; /* Weight 2525 */
6216 }
6217 if (!memcmp(name, "fcntl", 5)) {
6218 /* ^ */
6219 return -KEY_fcntl; /* Weight 1257 */
6220 }
6221 if (!memcmp(name, "ioctl", 5)) {
6222 /* ^ */
6223 return -KEY_ioctl; /* Weight 967 */
6224 }
6225 break;
6226 case 'w':
6227 if (!memcmp(name, "chown", 5)) {
6228 /* ^ */
6229 return -KEY_chown; /* Weight 34 */
6230 }
6231 break;
6232 }
6233 break;
6234 case 6:
6235 /* Names all of length 6. */
6236 /* accept caller chroot delete elseif exists fileno format gmtime length
6237 listen msgctl msgget msgrcv msgsnd printf rename return rindex scalar
6238 select semctl semget shmctl shmget socket splice substr system unless
6239 unlink unpack values */
6240 /* Offset 3 gives the best switch position. */
6241 switch (name[3]) {
6242 case 'a':
6243 if (!memcmp(name, "unpack", 6)) {
6244 /* ^ */
6245 return -KEY_unpack; /* Weight 7849 */
6246 }
6247 if (!memcmp(name, "rename", 6)) {
6248 /* ^ */
6249 return -KEY_rename; /* Weight 4918 */
6250 }
6251 break;
6252 case 'c':
6253 if (!memcmp(name, "semctl", 6)) {
6254 /* ^ */
6255 return -KEY_semctl; /* Weight 17 */
6256 }
6257 if (!memcmp(name, "msgctl", 6)) {
6258 /* ^ */
6259 return -KEY_msgctl;
6260 }
6261 if (!memcmp(name, "shmctl", 6)) {
6262 /* ^ */
6263 return -KEY_shmctl;
6264 }
6265 break;
6266 case 'd':
6267 if (!memcmp(name, "rindex", 6)) {
6268 /* ^ */
6269 return -KEY_rindex; /* Weight 5005 */
6270 }
6271 break;
6272 case 'e':
6273 if (!memcmp(name, "unless", 6)) {
6274 /* ^ */
6275 return KEY_unless; /* Weight 913955 */
6276 }
6277 if (!memcmp(name, "delete", 6)) {
6278 /* ^ */
6279 return KEY_delete; /* Weight 74966 */
6280 }
6281 if (!memcmp(name, "select", 6)) {
6282 /* ^ */
6283 return -KEY_select; /* Weight 12209 */
6284 }
6285 if (!memcmp(name, "fileno", 6)) {
6286 /* ^ */
6287 return -KEY_fileno; /* Weight 8591 */
6288 }
6289 if (!memcmp(name, "accept", 6)) {
6290 /* ^ */
6291 return -KEY_accept; /* Weight 233 */
6292 }
6293 if (!memcmp(name, "elseif", 6)) {
6294 /* ^ */
6295 /* This is somewhat hacky. */
6296 if(ckWARN_d(WARN_SYNTAX))
6297 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
6298 break;
6299 }
6300 break;
6301 case 'g':
6302 if (!memcmp(name, "length", 6)) {
6303 /* ^ */
6304 return -KEY_length; /* Weight 163975 */
6305 }
6306 if (!memcmp(name, "msgget", 6)) {
6307 /* ^ */
6308 return -KEY_msgget;
6309 }
6310 if (!memcmp(name, "semget", 6)) {
6311 /* ^ */
6312 return -KEY_semget;
6313 }
6314 if (!memcmp(name, "shmget", 6)) {
6315 /* ^ */
6316 return -KEY_shmget;
6317 }
6318 break;
6319 case 'i':
6320 if (!memcmp(name, "splice", 6)) {
6321 /* ^ */
6322 return -KEY_splice; /* Weight 25143 */
6323 }
6324 if (!memcmp(name, "unlink", 6)) {
6325 /* ^ */
6326 return -KEY_unlink; /* Weight 18616 */
6327 }
6328 if (!memcmp(name, "gmtime", 6)) {
6329 /* ^ */
6330 return -KEY_gmtime; /* Weight 4040 */
6331 }
6332 break;
6333 case 'k':
6334 if (!memcmp(name, "socket", 6)) {
6335 /* ^ */
6336 return -KEY_socket;
6337 }
6338 break;
6339 case 'l':
6340 if (!memcmp(name, "caller", 6)) {
6341 /* ^ */
6342 return -KEY_caller; /* Weight 148457 */
6343 }
6344 if (!memcmp(name, "scalar", 6)) {
6345 /* ^ */
6346 return KEY_scalar; /* Weight 43953 */
6347 }
6348 break;
6349 case 'm':
6350 if (!memcmp(name, "format", 6)) {
6351 /* ^ */
6352 return KEY_format; /* Weight 1735 */
6353 }
6354 break;
6355 case 'n':
6356 if (!memcmp(name, "printf", 6)) {
6357 /* ^ */
6358 return KEY_printf; /* Weight 6874 */
6359 }
6360 break;
6361 case 'o':
6362 if (!memcmp(name, "chroot", 6)) {
6363 /* ^ */
6364 return -KEY_chroot;
6365 }
6366 break;
6367 case 'r':
6368 if (!memcmp(name, "msgrcv", 6)) {
6369 /* ^ */
6370 return -KEY_msgrcv;
6371 }
6372 break;
6373 case 's':
6374 if (!memcmp(name, "exists", 6)) {
6375 /* ^ */
6376 return KEY_exists; /* Weight 145939 */
6377 }
6378 if (!memcmp(name, "substr", 6)) {
6379 /* ^ */
6380 return -KEY_substr; /* Weight 121344 */
6381 }
6382 if (!memcmp(name, "msgsnd", 6)) {
6383 /* ^ */
6384 return -KEY_msgsnd;
6385 }
6386 break;
6387 case 't':
6388 if (!memcmp(name, "system", 6)) {
6389 /* ^ */
6390 return -KEY_system; /* Weight 4326 */
6391 }
6392 if (!memcmp(name, "listen", 6)) {
6393 /* ^ */
6394 return -KEY_listen;
6395 }
6396 break;
6397 case 'u':
6398 if (!memcmp(name, "return", 6)) {
6399 /* ^ */
6400 return KEY_return; /* Weight 1401629 */
6401 }
6402 if (!memcmp(name, "values", 6)) {
6403 /* ^ */
6404 return -KEY_values; /* Weight 10110 */
6405 }
6406 break;
6407 }
6408 break;
6409 case 7:
6410 /* Names all of length 7. */
6411 /* DESTROY __END__ binmode connect dbmopen defined foreach getpgrp getppid
6412 lcfirst opendir package readdir require reverse seekdir setpgrp shmread
6413 sprintf symlink syscall sysopen sysread sysseek telldir ucfirst unshift
6414 waitpid */
6415 /* Offset 3 gives the best switch position. */
6416 switch (name[3]) {
6417 case 'N':
6418 if (!memcmp(name, "__END__", 7)) {
6419 /* ^ */
6420 return KEY___END__; /* Weight 112636 */
6421 }
6422 break;
6423 case 'T':
6424 if (!memcmp(name, "DESTROY", 7)) {
6425 /* ^ */
6426 return KEY_DESTROY; /* Weight 7 */
6427 }
6428 break;
6429 case 'c':
6430 if (!memcmp(name, "syscall", 7)) {
6431 /* ^ */
6432 return -KEY_syscall; /* Weight 560 */
6433 }
6434 break;
6435 case 'd':
6436 if (!memcmp(name, "readdir", 7)) {
6437 /* ^ */
6438 return -KEY_readdir; /* Weight 11716 */
6439 }
6440 break;
6441 case 'e':
6442 if (!memcmp(name, "foreach", 7)) {
6443 /* ^ */
6444 return KEY_foreach; /* Weight 281720 */
6445 }
6446 if (!memcmp(name, "reverse", 7)) {
6447 /* ^ */
6448 return -KEY_reverse; /* Weight 10571 */
6449 }
6450 break;
6451 case 'h':
6452 if (!memcmp(name, "unshift", 7)) {
6453 /* ^ */
6454 return -KEY_unshift; /* Weight 36504 */
6455 }
6456 break;
6457 case 'i':
6458 if (!memcmp(name, "defined", 7)) {
6459 /* ^ */
6460 return KEY_defined; /* Weight 694277 */
6461 }
6462 if (!memcmp(name, "sprintf", 7)) {
6463 /* ^ */
6464 return -KEY_sprintf; /* Weight 72704 */
6465 }
6466 if (!memcmp(name, "ucfirst", 7)) {
6467 /* ^ */
6468 return -KEY_ucfirst; /* Weight 1012 */
6469 }
6470 if (!memcmp(name, "lcfirst", 7)) {
6471 /* ^ */
6472 return -KEY_lcfirst; /* Weight 165 */
6473 }
6474 break;
6475 case 'k':
6476 if (!memcmp(name, "package", 7)) {
6477 /* ^ */
6478 return KEY_package; /* Weight 245661 */
6479 }
6480 if (!memcmp(name, "seekdir", 7)) {
6481 /* ^ */
6482 return -KEY_seekdir; /* Weight 20 */
6483 }
6484 break;
6485 case 'l':
6486 if (!memcmp(name, "symlink", 7)) {
6487 /* ^ */
6488 return -KEY_symlink; /* Weight 386 */
6489 }
6490 if (!memcmp(name, "telldir", 7)) {
6491 /* ^ */
6492 return -KEY_telldir; /* Weight 294 */
6493 }
6494 break;
6495 case 'm':
6496 if (!memcmp(name, "binmode", 7)) {
6497 /* ^ */
6498 return -KEY_binmode; /* Weight 12301 */
6499 }
6500 break;
6501 case 'n':
6502 if (!memcmp(name, "opendir", 7)) {
6503 /* ^ */
6504 return -KEY_opendir; /* Weight 9007 */
6505 }
6506 if (!memcmp(name, "connect", 7)) {
6507 /* ^ */
6508 return -KEY_connect; /* Weight 526 */
6509 }
6510 break;
6511 case 'o':
6512 if (!memcmp(name, "sysopen", 7)) {
6513 /* ^ */
6514 return -KEY_sysopen; /* Weight 1230 */
6515 }
6516 if (!memcmp(name, "dbmopen", 7)) {
6517 /* ^ */
6518 return -KEY_dbmopen;
6519 }
6520 break;
6521 case 'p':
6522 if (!memcmp(name, "getppid", 7)) {
6523 /* ^ */
6524 return -KEY_getppid; /* Weight 10 */
6525 }
6526 if (!memcmp(name, "getpgrp", 7)) {
6527 /* ^ */
6528 return -KEY_getpgrp;
6529 }
6530 if (!memcmp(name, "setpgrp", 7)) {
6531 /* ^ */
6532 return -KEY_setpgrp;
6533 }
6534 break;
6535 case 'r':
6536 if (!memcmp(name, "sysread", 7)) {
6537 /* ^ */
6538 return -KEY_sysread; /* Weight 3729 */
6539 }
6540 if (!memcmp(name, "shmread", 7)) {
6541 /* ^ */
6542 return -KEY_shmread;
6543 }
6544 break;
6545 case 's':
6546 if (!memcmp(name, "sysseek", 7)) {
6547 /* ^ */
6548 return -KEY_sysseek; /* Weight 721 */
6549 }
6550 break;
6551 case 't':
6552 if (!memcmp(name, "waitpid", 7)) {
6553 /* ^ */
6554 return -KEY_waitpid; /* Weight 414 */
6555 }
6556 break;
6557 case 'u':
6558 if (!memcmp(name, "require", 7)) {
6559 /* ^ */
6560 return KEY_require; /* Weight 375220 */
6561 }
6562 break;
6563 }
6564 break;
6565 case 8:
6566 /* Names all of length 8. */
6567 /* AUTOLOAD __DATA__ __FILE__ __LINE__ closedir continue dbmclose endgrent
6568 endpwent formline getgrent getgrgid getgrnam getlogin getpwent getpwnam
6569 getpwuid readline readlink readpipe setgrent setpwent shmwrite shutdown
6570 syswrite truncate */
6571 /* Offset 3 gives the best switch position. */
6572 switch (name[3]) {
6573 case 'A':
6574 if (!memcmp(name, "__DATA__", 8)) {
6575 /* ^ */
6576 return KEY___DATA__; /* Weight 395 */
6577 }
6578 break;
6579 case 'I':
6580 if (!memcmp(name, "__FILE__", 8)) {
6581 /* ^ */
6582 return -KEY___FILE__; /* Weight 888 */
6583 }
6584 if (!memcmp(name, "__LINE__", 8)) {
6585 /* ^ */
6586 return -KEY___LINE__; /* Weight 209 */
6587 }
6588 break;
6589 case 'O':
6590 if (!memcmp(name, "AUTOLOAD", 8)) {
6591 /* ^ */
6592 return KEY_AUTOLOAD; /* Weight 2713 */
6593 }
6594 break;
6595 case 'c':
6596 if (!memcmp(name, "dbmclose", 8)) {
6597 /* ^ */
6598 return -KEY_dbmclose;
6599 }
6600 break;
6601 case 'd':
6602 if (!memcmp(name, "readlink", 8)) {
6603 /* ^ */
6604 return -KEY_readlink; /* Weight 1537 */
6605 }
6606 if (!memcmp(name, "readline", 8)) {
6607 /* ^ */
6608 return -KEY_readline; /* Weight 19 */
6609 }
6610 if (!memcmp(name, "readpipe", 8)) {
6611 /* ^ */
6612 return -KEY_readpipe;
6613 }
6614 break;
6615 case 'g':
6616 if (!memcmp(name, "getgrgid", 8)) {
6617 /* ^ */
6618 return -KEY_getgrgid; /* Weight 67 */
6619 }
6620 if (!memcmp(name, "getgrnam", 8)) {
6621 /* ^ */
6622 return -KEY_getgrnam; /* Weight 11 */
6623 }
6624 if (!memcmp(name, "endgrent", 8)) {
6625 /* ^ */
6626 return -KEY_endgrent;
6627 }
6628 if (!memcmp(name, "getgrent", 8)) {
6629 /* ^ */
6630 return -KEY_getgrent;
6631 }
6632 if (!memcmp(name, "setgrent", 8)) {
6633 /* ^ */
6634 return -KEY_setgrent;
6635 }
6636 break;
6637 case 'l':
6638 if (!memcmp(name, "getlogin", 8)) {
6639 /* ^ */
6640 return -KEY_getlogin; /* Weight 158 */
6641 }
6642 break;
6643 case 'm':
6644 if (!memcmp(name, "formline", 8)) {
6645 /* ^ */
6646 return -KEY_formline; /* Weight 959 */
6647 }
6648 break;
6649 case 'n':
6650 if (!memcmp(name, "truncate", 8)) {
6651 /* ^ */
6652 return -KEY_truncate; /* Weight 1351 */
6653 }
6654 break;
6655 case 'p':
6656 if (!memcmp(name, "getpwuid", 8)) {
6657 /* ^ */
6658 return -KEY_getpwuid; /* Weight 681 */
6659 }
6660 if (!memcmp(name, "getpwnam", 8)) {
6661 /* ^ */
6662 return -KEY_getpwnam; /* Weight 483 */
6663 }
6664 if (!memcmp(name, "getpwent", 8)) {
6665 /* ^ */
6666 return -KEY_getpwent; /* Weight 12 */
6667 }
6668 if (!memcmp(name, "endpwent", 8)) {
6669 /* ^ */
6670 return -KEY_endpwent;
6671 }
6672 if (!memcmp(name, "setpwent", 8)) {
6673 /* ^ */
6674 return -KEY_setpwent;
6675 }
6676 break;
6677 case 's':
6678 if (!memcmp(name, "closedir", 8)) {
6679 /* ^ */
6680 return -KEY_closedir; /* Weight 11986 */
6681 }
6682 break;
6683 case 't':
6684 if (!memcmp(name, "continue", 8)) {
6685 /* ^ */
6686 return -KEY_continue; /* Weight 2925 */
6687 }
6688 if (!memcmp(name, "shutdown", 8)) {
6689 /* ^ */
6690 return -KEY_shutdown;
6691 }
6692 break;
6693 case 'w':
6694 if (!memcmp(name, "syswrite", 8)) {
6695 /* ^ */
6696 return -KEY_syswrite; /* Weight 4437 */
6697 }
6698 if (!memcmp(name, "shmwrite", 8)) {
6699 /* ^ */
6700 return -KEY_shmwrite;
6701 }
6702 break;
6703 }
6704 break;
6705 case 9:
6706 /* Names all of length 9. */
6707 /* endnetent getnetent localtime prototype quotemeta rewinddir setnetent
6708 wantarray */
6709 /* Offset 0 gives the best switch position. */
6710 switch (*name++) {
6711 case 'e':
6712 if (!memcmp(name, "ndnetent", 8)) {
6713 /* e */
6714 return -KEY_endnetent;
6715 }
6716 break;
6717 case 'g':
6718 if (!memcmp(name, "etnetent", 8)) {
6719 /* g */
6720 return -KEY_getnetent;
6721 }
6722 break;
6723 case 'l':
6724 if (!memcmp(name, "ocaltime", 8)) {
6725 /* l */
6726 return -KEY_localtime; /* Weight 7993 */
6727 }
6728 break;
6729 case 'p':
6730 if (!memcmp(name, "rototype", 8)) {
6731 /* p */
6732 return KEY_prototype; /* Weight 1602 */
6733 }
6734 break;
6735 case 'q':
6736 if (!memcmp(name, "uotemeta", 8)) {
6737 /* q */
6738 return -KEY_quotemeta; /* Weight 3120 */
6739 }
6740 break;
6741 case 'r':
6742 if (!memcmp(name, "ewinddir", 8)) {
6743 /* r */
6744 return -KEY_rewinddir; /* Weight 218 */
6745 }
6746 break;
6747 case 's':
6748 if (!memcmp(name, "etnetent", 8)) {
6749 /* s */
6750 return -KEY_setnetent; /* Weight 1 */
6751 }
6752 break;
6753 case 'w':
6754 if (!memcmp(name, "antarray", 8)) {
6755 /* w */
6756 return -KEY_wantarray; /* Weight 43024 */
6757 }
6758 break;
6759 }
6760 break;
6761 case 10:
6762 /* Names all of length 10. */
6763 /* endhostent endservent gethostent getservent getsockopt sethostent
6764 setservent setsockopt socketpair */
6765 /* Offset 6 gives the best switch position. */
6766 switch (name[6]) {
6767 case 'k':
6768 if (!memcmp(name, "setsockopt", 10)) {
6769 /* ^ */
6770 return -KEY_setsockopt; /* Weight 356 */
6771 }
6772 if (!memcmp(name, "getsockopt", 10)) {
6773 /* ^ */
6774 return -KEY_getsockopt; /* Weight 243 */
6775 }
6776 break;
6777 case 'p':
6778 if (!memcmp(name, "socketpair", 10)) {
6779 /* ^ */
6780 return -KEY_socketpair;
6781 }
6782 break;
6783 case 't':
6784 if (!memcmp(name, "gethostent", 10)) {
6785 /* ^ */
6786 return -KEY_gethostent; /* Weight 3 */
6787 }
6788 if (!memcmp(name, "endhostent", 10)) {
6789 /* ^ */
6790 return -KEY_endhostent;
6791 }
6792 if (!memcmp(name, "sethostent", 10)) {
6793 /* ^ */
6794 return -KEY_sethostent;
6795 }
6796 break;
6797 case 'v':
6798 if (!memcmp(name, "getservent", 10)) {
6799 /* ^ */
6800 return -KEY_getservent; /* Weight 4 */
6801 }
6802 if (!memcmp(name, "endservent", 10)) {
6803 /* ^ */
6804 return -KEY_endservent;
6805 }
6806 if (!memcmp(name, "setservent", 10)) {
6807 /* ^ */
6808 return -KEY_setservent;
6809 }
6810 break;
6811 }
6812 break;
6813 case 11:
6814 /* Names all of length 11. */
6815 /* __PACKAGE__ endprotoent getpeername getpriority getprotoent getsockname
6816 setpriority setprotoent */
6817 /* Offset 5 gives the best switch position. */
6818 switch (name[5]) {
6819 case 'K':
6820 if (!memcmp(name, "__PACKAGE__", 11)) {
6821 /* ^ */
6822 return -KEY___PACKAGE__; /* Weight 36767 */
6823 }
6824 break;
6825 case 'c':
6826 if (!memcmp(name, "getsockname", 11)) {
6827 /* ^ */
6828 return -KEY_getsockname; /* Weight 235 */
6829 }
6830 break;
6831 case 'e':
6832 if (!memcmp(name, "getpeername", 11)) {
6833 /* ^ */
6834 return -KEY_getpeername; /* Weight 713 */
6835 }
6836 break;
6837 case 'i':
6838 if (!memcmp(name, "getpriority", 11)) {
6839 /* ^ */
6840 return -KEY_getpriority; /* Weight 5 */
6841 }
6842 if (!memcmp(name, "setpriority", 11)) {
6843 /* ^ */
6844 return -KEY_setpriority; /* Weight 2 */
6845 }
6846 break;
6847 case 'o':
6848 if (!memcmp(name, "endprotoent", 11)) {
6849 /* ^ */
6850 return -KEY_endprotoent;
6851 }
6852 if (!memcmp(name, "getprotoent", 11)) {
6853 /* ^ */
6854 return -KEY_getprotoent;
6855 }
6856 if (!memcmp(name, "setprotoent", 11)) {
6857 /* ^ */
6858 return -KEY_setprotoent;
6859 }
6860 break;
6861 }
6862 break;
6863 case 12:
6864 /* Names all of length 12. */
6865 /* getnetbyaddr getnetbyname */
6866 /* Offset 9 gives the best switch position. */
6867 switch (name[9]) {
6868 case 'a':
6869 if (!memcmp(name, "getnetbyname", 12)) {
6870 /* ^ */
6871 return -KEY_getnetbyname;
6872 }
6873 break;
6874 case 'd':
6875 if (!memcmp(name, "getnetbyaddr", 12)) {
6876 /* ^ */
6877 return -KEY_getnetbyaddr;
6878 }
6879 break;
6880 }
6881 break;
6882 case 13:
6883 /* Names all of length 13. */
6884 /* gethostbyaddr gethostbyname getservbyname getservbyport */
6885 /* Offset 10 gives the best switch position. */
6886 switch (name[10]) {
6887 case 'a':
6888 if (!memcmp(name, "gethostbyname", 13)) {
6889 /* ^ */
6890 return -KEY_gethostbyname; /* Weight 970 */
6891 }
6892 if (!memcmp(name, "getservbyname", 13)) {
6893 /* ^ */
6894 return -KEY_getservbyname; /* Weight 299 */
6895 }
6896 break;
6897 case 'd':
6898 if (!memcmp(name, "gethostbyaddr", 13)) {
6899 /* ^ */
6900 return -KEY_gethostbyaddr; /* Weight 68 */
6901 }
6902 break;
6903 case 'o':
6904 if (!memcmp(name, "getservbyport", 13)) {
6905 /* ^ */
6906 return -KEY_getservbyport;
6907 }
6908 break;
6909 }
6910 break;
6911 case 14:
6912 if (!memcmp(name, "getprotobyname", 14)) {
6913 return -KEY_getprotobyname; /* Weight 755 */
6914 }
6915 break;
6916 case 16:
6917 if (!memcmp(name, "getprotobynumber", 16)) {
6918 return -KEY_getprotobynumber; /* Weight 232 */
6919 }
6920 break;
6921 }
6922 return 0;
6923}
6924
6925STATIC void
6926S_checkcomma(pTHX_ register char *s, char *name, char *what)
6927{
6928 char *w;
6929
6930 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
6931 if (ckWARN(WARN_SYNTAX)) {
6932 int level = 1;
6933 for (w = s+2; *w && level; w++) {
6934 if (*w == '(')
6935 ++level;
6936 else if (*w == ')')
6937 --level;
6938 }
6939 if (*w)
6940 for (; *w && isSPACE(*w); w++) ;
6941 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
6942 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6943 "%s (...) interpreted as function",name);
6944 }
6945 }
6946 while (s < PL_bufend && isSPACE(*s))
6947 s++;
6948 if (*s == '(')
6949 s++;
6950 while (s < PL_bufend && isSPACE(*s))
6951 s++;
6952 if (isIDFIRST_lazy_if(s,UTF)) {
6953 w = s++;
6954 while (isALNUM_lazy_if(s,UTF))
6955 s++;
6956 while (s < PL_bufend && isSPACE(*s))
6957 s++;
6958 if (*s == ',') {
6959 int kw;
6960 *s = '\0';
6961 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
6962 *s = ',';
6963 if (kw)
6964 return;
6965 Perl_croak(aTHX_ "No comma allowed after %s", what);
6966 }
6967 }
6968}
6969
6970/* Either returns sv, or mortalizes sv and returns a new SV*.
6971 Best used as sv=new_constant(..., sv, ...).
6972 If s, pv are NULL, calls subroutine with one argument,
6973 and type is used with error messages only. */
6974
6975STATIC SV *
6976S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
6977 const char *type)
6978{
6979 dSP;
6980 HV *table = GvHV(PL_hintgv); /* ^H */
6981 SV *res;
6982 SV **cvp;
6983 SV *cv, *typesv;
6984 const char *why1, *why2, *why3;
6985
6986 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
6987 SV *msg;
6988
6989 why2 = strEQ(key,"charnames")
6990 ? "(possibly a missing \"use charnames ...\")"
6991 : "";
6992 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
6993 (type ? type: "undef"), why2);
6994
6995 /* This is convoluted and evil ("goto considered harmful")
6996 * but I do not understand the intricacies of all the different
6997 * failure modes of %^H in here. The goal here is to make
6998 * the most probable error message user-friendly. --jhi */
6999
7000 goto msgdone;
7001
7002 report:
7003 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
7004 (type ? type: "undef"), why1, why2, why3);
7005 msgdone:
7006 yyerror(SvPVX(msg));
7007 SvREFCNT_dec(msg);
7008 return sv;
7009 }
7010 cvp = hv_fetch(table, key, strlen(key), FALSE);
7011 if (!cvp || !SvOK(*cvp)) {
7012 why1 = "$^H{";
7013 why2 = key;
7014 why3 = "} is not defined";
7015 goto report;
7016 }
7017 sv_2mortal(sv); /* Parent created it permanently */
7018 cv = *cvp;
7019 if (!pv && s)
7020 pv = sv_2mortal(newSVpvn(s, len));
7021 if (type && pv)
7022 typesv = sv_2mortal(newSVpv(type, 0));
7023 else
7024 typesv = &PL_sv_undef;
7025
7026 PUSHSTACKi(PERLSI_OVERLOAD);
7027 ENTER ;
7028 SAVETMPS;
7029
7030 PUSHMARK(SP) ;
7031 EXTEND(sp, 3);
7032 if (pv)
7033 PUSHs(pv);
7034 PUSHs(sv);
7035 if (pv)
7036 PUSHs(typesv);
7037 PUTBACK;
7038 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
7039
7040 SPAGAIN ;
7041
7042 /* Check the eval first */
7043 if (!PL_in_eval && SvTRUE(ERRSV)) {
7044 STRLEN n_a;
7045 sv_catpv(ERRSV, "Propagated");
7046 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
7047 (void)POPs;
7048 res = SvREFCNT_inc(sv);
7049 }
7050 else {
7051 res = POPs;
7052 (void)SvREFCNT_inc(res);
7053 }
7054
7055 PUTBACK ;
7056 FREETMPS ;
7057 LEAVE ;
7058 POPSTACK;
7059
7060 if (!SvOK(res)) {
7061 why1 = "Call to &{$^H{";
7062 why2 = key;
7063 why3 = "}} did not return a defined value";
7064 sv = res;
7065 goto report;
7066 }
7067
7068 return res;
7069}
7070
7071/* Returns a NUL terminated string, with the length of the string written to
7072 *slp
7073 */
7074STATIC char *
7075S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
7076{
7077 register char *d = dest;
7078 register char *e = d + destlen - 3; /* two-character token, ending NUL */
7079 for (;;) {
7080 if (d >= e)
7081 Perl_croak(aTHX_ ident_too_long);
7082 if (isALNUM(*s)) /* UTF handled below */
7083 *d++ = *s++;
7084 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
7085 *d++ = ':';
7086 *d++ = ':';
7087 s++;
7088 }
7089 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
7090 *d++ = *s++;
7091 *d++ = *s++;
7092 }
7093 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
7094 char *t = s + UTF8SKIP(s);
7095 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
7096 t += UTF8SKIP(t);
7097 if (d + (t - s) > e)
7098 Perl_croak(aTHX_ ident_too_long);
7099 Copy(s, d, t - s, char);
7100 d += t - s;
7101 s = t;
7102 }
7103 else {
7104 *d = '\0';
7105 *slp = d - dest;
7106 return s;
7107 }
7108 }
7109}
7110
7111STATIC char *
7112S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
7113{
7114 register char *d;
7115 register char *e;
7116 char *bracket = 0;
7117 char funny = *s++;
7118
7119 if (isSPACE(*s))
7120 s = skipspace(s);
7121 d = dest;
7122 e = d + destlen - 3; /* two-character token, ending NUL */
7123 if (isDIGIT(*s)) {
7124 while (isDIGIT(*s)) {
7125 if (d >= e)
7126 Perl_croak(aTHX_ ident_too_long);
7127 *d++ = *s++;
7128 }
7129 }
7130 else {
7131 for (;;) {
7132 if (d >= e)
7133 Perl_croak(aTHX_ ident_too_long);
7134 if (isALNUM(*s)) /* UTF handled below */
7135 *d++ = *s++;
7136 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
7137 *d++ = ':';
7138 *d++ = ':';
7139 s++;
7140 }
7141 else if (*s == ':' && s[1] == ':') {
7142 *d++ = *s++;
7143 *d++ = *s++;
7144 }
7145 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
7146 char *t = s + UTF8SKIP(s);
7147 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
7148 t += UTF8SKIP(t);
7149 if (d + (t - s) > e)
7150 Perl_croak(aTHX_ ident_too_long);
7151 Copy(s, d, t - s, char);
7152 d += t - s;
7153 s = t;
7154 }
7155 else
7156 break;
7157 }
7158 }
7159 *d = '\0';
7160 d = dest;
7161 if (*d) {
7162 if (PL_lex_state != LEX_NORMAL)
7163 PL_lex_state = LEX_INTERPENDMAYBE;
7164 return s;
7165 }
7166 if (*s == '$' && s[1] &&
7167 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
7168 {
7169 return s;
7170 }
7171 if (*s == '{') {
7172 bracket = s;
7173 s++;
7174 }
7175 else if (ck_uni)
7176 check_uni();
7177 if (s < send)
7178 *d = *s++;
7179 d[1] = '\0';
7180 if (*d == '^' && *s && isCONTROLVAR(*s)) {
7181 *d = toCTRL(*s);
7182 s++;
7183 }
7184 if (bracket) {
7185 if (isSPACE(s[-1])) {
7186 while (s < send) {
7187 char ch = *s++;
7188 if (!SPACE_OR_TAB(ch)) {
7189 *d = ch;
7190 break;
7191 }
7192 }
7193 }
7194 if (isIDFIRST_lazy_if(d,UTF)) {
7195 d++;
7196 if (UTF) {
7197 e = s;
7198 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
7199 e += UTF8SKIP(e);
7200 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
7201 e += UTF8SKIP(e);
7202 }
7203 Copy(s, d, e - s, char);
7204 d += e - s;
7205 s = e;
7206 }
7207 else {
7208 while ((isALNUM(*s) || *s == ':') && d < e)
7209 *d++ = *s++;
7210 if (d >= e)
7211 Perl_croak(aTHX_ ident_too_long);
7212 }
7213 *d = '\0';
7214 while (s < send && SPACE_OR_TAB(*s)) s++;
7215 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
7216 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
7217 const char *brack = *s == '[' ? "[...]" : "{...}";
7218 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7219 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
7220 funny, dest, brack, funny, dest, brack);
7221 }
7222 bracket++;
7223 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
7224 return s;
7225 }
7226 }
7227 /* Handle extended ${^Foo} variables
7228 * 1999-02-27 mjd-perl-patch@plover.com */
7229 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
7230 && isALNUM(*s))
7231 {
7232 d++;
7233 while (isALNUM(*s) && d < e) {
7234 *d++ = *s++;
7235 }
7236 if (d >= e)
7237 Perl_croak(aTHX_ ident_too_long);
7238 *d = '\0';
7239 }
7240 if (*s == '}') {
7241 s++;
7242 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
7243 PL_lex_state = LEX_INTERPEND;
7244 PL_expect = XREF;
7245 }
7246 if (funny == '#')
7247 funny = '@';
7248 if (PL_lex_state == LEX_NORMAL) {
7249 if (ckWARN(WARN_AMBIGUOUS) &&
7250 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
7251 {
7252 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7253 "Ambiguous use of %c{%s} resolved to %c%s",
7254 funny, dest, funny, dest);
7255 }
7256 }
7257 }
7258 else {
7259 s = bracket; /* let the parser handle it */
7260 *dest = '\0';
7261 }
7262 }
7263 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
7264 PL_lex_state = LEX_INTERPEND;
7265 return s;
7266}
7267
7268void
7269Perl_pmflag(pTHX_ U32* pmfl, int ch)
7270{
7271 if (ch == 'i')
7272 *pmfl |= PMf_FOLD;
7273 else if (ch == 'g')
7274 *pmfl |= PMf_GLOBAL;
7275 else if (ch == 'c')
7276 *pmfl |= PMf_CONTINUE;
7277 else if (ch == 'o')
7278 *pmfl |= PMf_KEEP;
7279 else if (ch == 'm')
7280 *pmfl |= PMf_MULTILINE;
7281 else if (ch == 's')
7282 *pmfl |= PMf_SINGLELINE;
7283 else if (ch == 'x')
7284 *pmfl |= PMf_EXTENDED;
7285}
7286
7287STATIC char *
7288S_scan_pat(pTHX_ char *start, I32 type)
7289{
7290 PMOP *pm;
7291 char *s;
7292
7293 s = scan_str(start,FALSE,FALSE);
7294 if (!s)
7295 Perl_croak(aTHX_ "Search pattern not terminated");
7296
7297 pm = (PMOP*)newPMOP(type, 0);
7298 if (PL_multi_open == '?')
7299 pm->op_pmflags |= PMf_ONCE;
7300 if(type == OP_QR) {
7301 while (*s && strchr("iomsx", *s))
7302 pmflag(&pm->op_pmflags,*s++);
7303 }
7304 else {
7305 while (*s && strchr("iogcmsx", *s))
7306 pmflag(&pm->op_pmflags,*s++);
7307 }
7308 /* issue a warning if /c is specified,but /g is not */
7309 if (ckWARN(WARN_REGEXP) &&
7310 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
7311 {
7312 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
7313 }
7314
7315 pm->op_pmpermflags = pm->op_pmflags;
7316
7317 PL_lex_op = (OP*)pm;
7318 yylval.ival = OP_MATCH;
7319 return s;
7320}
7321
7322STATIC char *
7323S_scan_subst(pTHX_ char *start)
7324{
7325 register char *s;
7326 register PMOP *pm;
7327 I32 first_start;
7328 I32 es = 0;
7329
7330 yylval.ival = OP_NULL;
7331
7332 s = scan_str(start,FALSE,FALSE);
7333
7334 if (!s)
7335 Perl_croak(aTHX_ "Substitution pattern not terminated");
7336
7337 if (s[-1] == PL_multi_open)
7338 s--;
7339
7340 first_start = PL_multi_start;
7341 s = scan_str(s,FALSE,FALSE);
7342 if (!s) {
7343 if (PL_lex_stuff) {
7344 SvREFCNT_dec(PL_lex_stuff);
7345 PL_lex_stuff = Nullsv;
7346 }
7347 Perl_croak(aTHX_ "Substitution replacement not terminated");
7348 }
7349 PL_multi_start = first_start; /* so whole substitution is taken together */
7350
7351 pm = (PMOP*)newPMOP(OP_SUBST, 0);
7352 while (*s) {
7353 if (*s == 'e') {
7354 s++;
7355 es++;
7356 }
7357 else if (strchr("iogcmsx", *s))
7358 pmflag(&pm->op_pmflags,*s++);
7359 else
7360 break;
7361 }
7362
7363 /* /c is not meaningful with s/// */
7364 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
7365 {
7366 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
7367 }
7368
7369 if (es) {
7370 SV *repl;
7371 PL_sublex_info.super_bufptr = s;
7372 PL_sublex_info.super_bufend = PL_bufend;
7373 PL_multi_end = 0;
7374 pm->op_pmflags |= PMf_EVAL;
7375 repl = newSVpvn("",0);
7376 while (es-- > 0)
7377 sv_catpv(repl, es ? "eval " : "do ");
7378 sv_catpvn(repl, "{ ", 2);
7379 sv_catsv(repl, PL_lex_repl);
7380 sv_catpvn(repl, " };", 2);
7381 SvEVALED_on(repl);
7382 SvREFCNT_dec(PL_lex_repl);
7383 PL_lex_repl = repl;
7384 }
7385
7386 pm->op_pmpermflags = pm->op_pmflags;
7387 PL_lex_op = (OP*)pm;
7388 yylval.ival = OP_SUBST;
7389 return s;
7390}
7391
7392STATIC char *
7393S_scan_trans(pTHX_ char *start)
7394{
7395 register char* s;
7396 OP *o;
7397 short *tbl;
7398 I32 squash;
7399 I32 del;
7400 I32 complement;
7401
7402 yylval.ival = OP_NULL;
7403
7404 s = scan_str(start,FALSE,FALSE);
7405 if (!s)
7406 Perl_croak(aTHX_ "Transliteration pattern not terminated");
7407 if (s[-1] == PL_multi_open)
7408 s--;
7409
7410 s = scan_str(s,FALSE,FALSE);
7411 if (!s) {
7412 if (PL_lex_stuff) {
7413 SvREFCNT_dec(PL_lex_stuff);
7414 PL_lex_stuff = Nullsv;
7415 }
7416 Perl_croak(aTHX_ "Transliteration replacement not terminated");
7417 }
7418
7419 complement = del = squash = 0;
7420 while (1) {
7421 switch (*s) {
7422 case 'c':
7423 complement = OPpTRANS_COMPLEMENT;
7424 break;
7425 case 'd':
7426 del = OPpTRANS_DELETE;
7427 break;
7428 case 's':
7429 squash = OPpTRANS_SQUASH;
7430 break;
7431 default:
7432 goto no_more;
7433 }
7434 s++;
7435 }
7436 no_more:
7437
7438 New(803, tbl, complement&&!del?258:256, short);
7439 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7440 o->op_private &= ~OPpTRANS_ALL;
7441 o->op_private |= del|squash|complement|
7442 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7443 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
7444
7445 PL_lex_op = o;
7446 yylval.ival = OP_TRANS;
7447 return s;
7448}
7449
7450STATIC char *
7451S_scan_heredoc(pTHX_ register char *s)
7452{
7453 SV *herewas;
7454 I32 op_type = OP_SCALAR;
7455 I32 len;
7456 SV *tmpstr;
7457 char term;
7458 register char *d;
7459 register char *e;
7460 char *peek;
7461 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
7462
7463 s += 2;
7464 d = PL_tokenbuf;
7465 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
7466 if (!outer)
7467 *d++ = '\n';
7468 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
7469 if (*peek == '`' || *peek == '\'' || *peek =='"') {
7470 s = peek;
7471 term = *s++;
7472 s = delimcpy(d, e, s, PL_bufend, term, &len);
7473 d += len;
7474 if (s < PL_bufend)
7475 s++;
7476 }
7477 else {
7478 if (*s == '\\')
7479 s++, term = '\'';
7480 else
7481 term = '"';
7482 if (!isALNUM_lazy_if(s,UTF))
7483 deprecate_old("bare << to mean <<\"\"");
7484 for (; isALNUM_lazy_if(s,UTF); s++) {
7485 if (d < e)
7486 *d++ = *s;
7487 }
7488 }
7489 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
7490 Perl_croak(aTHX_ "Delimiter for here document is too long");
7491 *d++ = '\n';
7492 *d = '\0';
7493 len = d - PL_tokenbuf;
7494#ifndef PERL_STRICT_CR
7495 d = strchr(s, '\r');
7496 if (d) {
7497 char *olds = s;
7498 s = d;
7499 while (s < PL_bufend) {
7500 if (*s == '\r') {
7501 *d++ = '\n';
7502 if (*++s == '\n')
7503 s++;
7504 }
7505 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
7506 *d++ = *s++;
7507 s++;
7508 }
7509 else
7510 *d++ = *s++;
7511 }
7512 *d = '\0';
7513 PL_bufend = d;
7514 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
7515 s = olds;
7516 }
7517#endif
7518 d = "\n";
7519 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
7520 herewas = newSVpvn(s,PL_bufend-s);
7521 else
7522 s--, herewas = newSVpvn(s,d-s);
7523 s += SvCUR(herewas);
7524
7525 tmpstr = NEWSV(87,79);
7526 sv_upgrade(tmpstr, SVt_PVIV);
7527 if (term == '\'') {
7528 op_type = OP_CONST;
7529 SvIVX(tmpstr) = -1;
7530 }
7531 else if (term == '`') {
7532 op_type = OP_BACKTICK;
7533 SvIVX(tmpstr) = '\\';
7534 }
7535
7536 CLINE;
7537 PL_multi_start = CopLINE(PL_curcop);
7538 PL_multi_open = PL_multi_close = '<';
7539 term = *PL_tokenbuf;
7540 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
7541 char *bufptr = PL_sublex_info.super_bufptr;
7542 char *bufend = PL_sublex_info.super_bufend;
7543 char *olds = s - SvCUR(herewas);
7544 s = strchr(bufptr, '\n');
7545 if (!s)
7546 s = bufend;
7547 d = s;
7548 while (s < bufend &&
7549 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
7550 if (*s++ == '\n')
7551 CopLINE_inc(PL_curcop);
7552 }
7553 if (s >= bufend) {
7554 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7555 missingterm(PL_tokenbuf);
7556 }
7557 sv_setpvn(herewas,bufptr,d-bufptr+1);
7558 sv_setpvn(tmpstr,d+1,s-d);
7559 s += len - 1;
7560 sv_catpvn(herewas,s,bufend-s);
7561 Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
7562
7563 s = olds;
7564 goto retval;
7565 }
7566 else if (!outer) {
7567 d = s;
7568 while (s < PL_bufend &&
7569 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
7570 if (*s++ == '\n')
7571 CopLINE_inc(PL_curcop);
7572 }
7573 if (s >= PL_bufend) {
7574 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7575 missingterm(PL_tokenbuf);
7576 }
7577 sv_setpvn(tmpstr,d+1,s-d);
7578 s += len - 1;
7579 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
7580
7581 sv_catpvn(herewas,s,PL_bufend-s);
7582 sv_setsv(PL_linestr,herewas);
7583 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
7584 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7585 PL_last_lop = PL_last_uni = Nullch;
7586 }
7587 else
7588 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
7589 while (s >= PL_bufend) { /* multiple line string? */
7590 if (!outer ||
7591 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
7592 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7593 missingterm(PL_tokenbuf);
7594 }
7595 CopLINE_inc(PL_curcop);
7596 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7597 PL_last_lop = PL_last_uni = Nullch;
7598#ifndef PERL_STRICT_CR
7599 if (PL_bufend - PL_linestart >= 2) {
7600 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
7601 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
7602 {
7603 PL_bufend[-2] = '\n';
7604 PL_bufend--;
7605 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
7606 }
7607 else if (PL_bufend[-1] == '\r')
7608 PL_bufend[-1] = '\n';
7609 }
7610 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
7611 PL_bufend[-1] = '\n';
7612#endif
7613 if (PERLDB_LINE && PL_curstash != PL_debstash) {
7614 SV *sv = NEWSV(88,0);
7615
7616 sv_upgrade(sv, SVt_PVMG);
7617 sv_setsv(sv,PL_linestr);
7618 (void)SvIOK_on(sv);
7619 SvIVX(sv) = 0;
7620 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
7621 }
7622 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
7623 STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
7624 *(SvPVX(PL_linestr) + off ) = ' ';
7625 sv_catsv(PL_linestr,herewas);
7626 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7627 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
7628 }
7629 else {
7630 s = PL_bufend;
7631 sv_catsv(tmpstr,PL_linestr);
7632 }
7633 }
7634 s++;
7635retval:
7636 PL_multi_end = CopLINE(PL_curcop);
7637 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
7638 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
7639 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
7640 }
7641 SvREFCNT_dec(herewas);
7642 if (!IN_BYTES) {
7643 if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7644 SvUTF8_on(tmpstr);
7645 else if (PL_encoding)
7646 sv_recode_to_utf8(tmpstr, PL_encoding);
7647 }
7648 PL_lex_stuff = tmpstr;
7649 yylval.ival = op_type;
7650 return s;
7651}
7652
7653/* scan_inputsymbol
7654 takes: current position in input buffer
7655 returns: new position in input buffer
7656 side-effects: yylval and lex_op are set.
7657
7658 This code handles:
7659
7660 <> read from ARGV
7661 <FH> read from filehandle
7662 <pkg::FH> read from package qualified filehandle
7663 <pkg'FH> read from package qualified filehandle
7664 <$fh> read from filehandle in $fh
7665 <*.h> filename glob
7666
7667*/
7668
7669STATIC char *
7670S_scan_inputsymbol(pTHX_ char *start)
7671{
7672 register char *s = start; /* current position in buffer */
7673 register char *d;
7674 register char *e;
7675 char *end;
7676 I32 len;
7677
7678 d = PL_tokenbuf; /* start of temp holding space */
7679 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
7680 end = strchr(s, '\n');
7681 if (!end)
7682 end = PL_bufend;
7683 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
7684
7685 /* die if we didn't have space for the contents of the <>,
7686 or if it didn't end, or if we see a newline
7687 */
7688
7689 if (len >= sizeof PL_tokenbuf)
7690 Perl_croak(aTHX_ "Excessively long <> operator");
7691 if (s >= end)
7692 Perl_croak(aTHX_ "Unterminated <> operator");
7693
7694 s++;
7695
7696 /* check for <$fh>
7697 Remember, only scalar variables are interpreted as filehandles by
7698 this code. Anything more complex (e.g., <$fh{$num}>) will be
7699 treated as a glob() call.
7700 This code makes use of the fact that except for the $ at the front,
7701 a scalar variable and a filehandle look the same.
7702 */
7703 if (*d == '$' && d[1]) d++;
7704
7705 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7706 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
7707 d++;
7708
7709 /* If we've tried to read what we allow filehandles to look like, and
7710 there's still text left, then it must be a glob() and not a getline.
7711 Use scan_str to pull out the stuff between the <> and treat it
7712 as nothing more than a string.
7713 */
7714
7715 if (d - PL_tokenbuf != len) {
7716 yylval.ival = OP_GLOB;
7717 set_csh();
7718 s = scan_str(start,FALSE,FALSE);
7719 if (!s)
7720 Perl_croak(aTHX_ "Glob not terminated");
7721 return s;
7722 }
7723 else {
7724 bool readline_overriden = FALSE;
7725 GV *gv_readline = Nullgv;
7726 GV **gvp;
7727 /* we're in a filehandle read situation */
7728 d = PL_tokenbuf;
7729
7730 /* turn <> into <ARGV> */
7731 if (!len)
7732 Copy("ARGV",d,5,char);
7733
7734 /* Check whether readline() is overriden */
7735 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
7736 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
7737 ||
7738 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
7739 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
7740 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
7741 readline_overriden = TRUE;
7742
7743 /* if <$fh>, create the ops to turn the variable into a
7744 filehandle
7745 */
7746 if (*d == '$') {
7747 I32 tmp;
7748
7749 /* try to find it in the pad for this block, otherwise find
7750 add symbol table ops
7751 */
7752 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
7753 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
7754 SV *sym = sv_2mortal(
7755 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
7756 sv_catpvn(sym, "::", 2);
7757 sv_catpv(sym, d+1);
7758 d = SvPVX(sym);
7759 goto intro_sym;
7760 }
7761 else {
7762 OP *o = newOP(OP_PADSV, 0);
7763 o->op_targ = tmp;
7764 PL_lex_op = readline_overriden
7765 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7766 append_elem(OP_LIST, o,
7767 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
7768 : (OP*)newUNOP(OP_READLINE, 0, o);
7769 }
7770 }
7771 else {
7772 GV *gv;
7773 ++d;
7774intro_sym:
7775 gv = gv_fetchpv(d,
7776 (PL_in_eval
7777 ? (GV_ADDMULTI | GV_ADDINEVAL)
7778 : GV_ADDMULTI),
7779 SVt_PV);
7780 PL_lex_op = readline_overriden
7781 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7782 append_elem(OP_LIST,
7783 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
7784 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
7785 : (OP*)newUNOP(OP_READLINE, 0,
7786 newUNOP(OP_RV2SV, 0,
7787 newGVOP(OP_GV, 0, gv)));
7788 }
7789 if (!readline_overriden)
7790 PL_lex_op->op_flags |= OPf_SPECIAL;
7791 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
7792 yylval.ival = OP_NULL;
7793 }
7794
7795 /* If it's none of the above, it must be a literal filehandle
7796 (<Foo::BAR> or <FOO>) so build a simple readline OP */
7797 else {
7798 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
7799 PL_lex_op = readline_overriden
7800 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7801 append_elem(OP_LIST,
7802 newGVOP(OP_GV, 0, gv),
7803 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
7804 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
7805 yylval.ival = OP_NULL;
7806 }
7807 }
7808
7809 return s;
7810}
7811
7812
7813/* scan_str
7814 takes: start position in buffer
7815 keep_quoted preserve \ on the embedded delimiter(s)
7816 keep_delims preserve the delimiters around the string
7817 returns: position to continue reading from buffer
7818 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
7819 updates the read buffer.
7820
7821 This subroutine pulls a string out of the input. It is called for:
7822 q single quotes q(literal text)
7823 ' single quotes 'literal text'
7824 qq double quotes qq(interpolate $here please)
7825 " double quotes "interpolate $here please"
7826 qx backticks qx(/bin/ls -l)
7827 ` backticks `/bin/ls -l`
7828 qw quote words @EXPORT_OK = qw( func() $spam )
7829 m// regexp match m/this/
7830 s/// regexp substitute s/this/that/
7831 tr/// string transliterate tr/this/that/
7832 y/// string transliterate y/this/that/
7833 ($*@) sub prototypes sub foo ($)
7834 (stuff) sub attr parameters sub foo : attr(stuff)
7835 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
7836
7837 In most of these cases (all but <>, patterns and transliterate)
7838 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
7839 calls scan_str(). s/// makes yylex() call scan_subst() which calls
7840 scan_str(). tr/// and y/// make yylex() call scan_trans() which
7841 calls scan_str().
7842
7843 It skips whitespace before the string starts, and treats the first
7844 character as the delimiter. If the delimiter is one of ([{< then
7845 the corresponding "close" character )]}> is used as the closing
7846 delimiter. It allows quoting of delimiters, and if the string has
7847 balanced delimiters ([{<>}]) it allows nesting.
7848
7849 On success, the SV with the resulting string is put into lex_stuff or,
7850 if that is already non-NULL, into lex_repl. The second case occurs only
7851 when parsing the RHS of the special constructs s/// and tr/// (y///).
7852 For convenience, the terminating delimiter character is stuffed into
7853 SvIVX of the SV.
7854*/
7855
7856STATIC char *
7857S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
7858{
7859 SV *sv; /* scalar value: string */
7860 char *tmps; /* temp string, used for delimiter matching */
7861 register char *s = start; /* current position in the buffer */
7862 register char term; /* terminating character */
7863 register char *to; /* current position in the sv's data */
7864 I32 brackets = 1; /* bracket nesting level */
7865 bool has_utf8 = FALSE; /* is there any utf8 content? */
7866 I32 termcode; /* terminating char. code */
7867 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
7868 STRLEN termlen; /* length of terminating string */
7869 char *last = NULL; /* last position for nesting bracket */
7870
7871 /* skip space before the delimiter */
7872 if (isSPACE(*s))
7873 s = skipspace(s);
7874
7875 /* mark where we are, in case we need to report errors */
7876 CLINE;
7877
7878 /* after skipping whitespace, the next character is the terminator */
7879 term = *s;
7880 if (!UTF) {
7881 termcode = termstr[0] = term;
7882 termlen = 1;
7883 }
7884 else {
7885 termcode = utf8_to_uvchr((U8*)s, &termlen);
7886 Copy(s, termstr, termlen, U8);
7887 if (!UTF8_IS_INVARIANT(term))
7888 has_utf8 = TRUE;
7889 }
7890
7891 /* mark where we are */
7892 PL_multi_start = CopLINE(PL_curcop);
7893 PL_multi_open = term;
7894
7895 /* find corresponding closing delimiter */
7896 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
7897 termcode = termstr[0] = term = tmps[5];
7898
7899 PL_multi_close = term;
7900
7901 /* create a new SV to hold the contents. 87 is leak category, I'm
7902 assuming. 79 is the SV's initial length. What a random number. */
7903 sv = NEWSV(87,79);
7904 sv_upgrade(sv, SVt_PVIV);
7905 SvIVX(sv) = termcode;
7906 (void)SvPOK_only(sv); /* validate pointer */
7907
7908 /* move past delimiter and try to read a complete string */
7909 if (keep_delims)
7910 sv_catpvn(sv, s, termlen);
7911 s += termlen;
7912 for (;;) {
7913 if (PL_encoding && !UTF) {
7914 bool cont = TRUE;
7915
7916 while (cont) {
7917 int offset = s - SvPVX(PL_linestr);
7918 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
7919 &offset, (char*)termstr, termlen);
7920 char *ns = SvPVX(PL_linestr) + offset;
7921 char *svlast = SvEND(sv) - 1;
7922
7923 for (; s < ns; s++) {
7924 if (*s == '\n' && !PL_rsfp)
7925 CopLINE_inc(PL_curcop);
7926 }
7927 if (!found)
7928 goto read_more_line;
7929 else {
7930 /* handle quoted delimiters */
7931 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
7932 char *t;
7933 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
7934 t--;
7935 if ((svlast-1 - t) % 2) {
7936 if (!keep_quoted) {
7937 *(svlast-1) = term;
7938 *svlast = '\0';
7939 SvCUR_set(sv, SvCUR(sv) - 1);
7940 }
7941 continue;
7942 }
7943 }
7944 if (PL_multi_open == PL_multi_close) {
7945 cont = FALSE;
7946 }
7947 else {
7948 char *t, *w;
7949 if (!last)
7950 last = SvPVX(sv);
7951 for (w = t = last; t < svlast; w++, t++) {
7952 /* At here, all closes are "was quoted" one,
7953 so we don't check PL_multi_close. */
7954 if (*t == '\\') {
7955 if (!keep_quoted && *(t+1) == PL_multi_open)
7956 t++;
7957 else
7958 *w++ = *t++;
7959 }
7960 else if (*t == PL_multi_open)
7961 brackets++;
7962
7963 *w = *t;
7964 }
7965 if (w < t) {
7966 *w++ = term;
7967 *w = '\0';
7968 SvCUR_set(sv, w - SvPVX(sv));
7969 }
7970 last = w;
7971 if (--brackets <= 0)
7972 cont = FALSE;
7973 }
7974 }
7975 }
7976 if (!keep_delims) {
7977 SvCUR_set(sv, SvCUR(sv) - 1);
7978 *SvEND(sv) = '\0';
7979 }
7980 break;
7981 }
7982
7983 /* extend sv if need be */
7984 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
7985 /* set 'to' to the next character in the sv's string */
7986 to = SvPVX(sv)+SvCUR(sv);
7987
7988 /* if open delimiter is the close delimiter read unbridle */
7989 if (PL_multi_open == PL_multi_close) {
7990 for (; s < PL_bufend; s++,to++) {
7991 /* embedded newlines increment the current line number */
7992 if (*s == '\n' && !PL_rsfp)
7993 CopLINE_inc(PL_curcop);
7994 /* handle quoted delimiters */
7995 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
7996 if (!keep_quoted && s[1] == term)
7997 s++;
7998 /* any other quotes are simply copied straight through */
7999 else
8000 *to++ = *s++;
8001 }
8002 /* terminate when run out of buffer (the for() condition), or
8003 have found the terminator */
8004 else if (*s == term) {
8005 if (termlen == 1)
8006 break;
8007 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
8008 break;
8009 }
8010 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
8011 has_utf8 = TRUE;
8012 *to = *s;
8013 }
8014 }
8015
8016 /* if the terminator isn't the same as the start character (e.g.,
8017 matched brackets), we have to allow more in the quoting, and
8018 be prepared for nested brackets.
8019 */
8020 else {
8021 /* read until we run out of string, or we find the terminator */
8022 for (; s < PL_bufend; s++,to++) {
8023 /* embedded newlines increment the line count */
8024 if (*s == '\n' && !PL_rsfp)
8025 CopLINE_inc(PL_curcop);
8026 /* backslashes can escape the open or closing characters */
8027 if (*s == '\\' && s+1 < PL_bufend) {
8028 if (!keep_quoted &&
8029 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
8030 s++;
8031 else
8032 *to++ = *s++;
8033 }
8034 /* allow nested opens and closes */
8035 else if (*s == PL_multi_close && --brackets <= 0)
8036 break;
8037 else if (*s == PL_multi_open)
8038 brackets++;
8039 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
8040 has_utf8 = TRUE;
8041 *to = *s;
8042 }
8043 }
8044 /* terminate the copied string and update the sv's end-of-string */
8045 *to = '\0';
8046 SvCUR_set(sv, to - SvPVX(sv));
8047
8048 /*
8049 * this next chunk reads more into the buffer if we're not done yet
8050 */
8051
8052 if (s < PL_bufend)
8053 break; /* handle case where we are done yet :-) */
8054
8055#ifndef PERL_STRICT_CR
8056 if (to - SvPVX(sv) >= 2) {
8057 if ((to[-2] == '\r' && to[-1] == '\n') ||
8058 (to[-2] == '\n' && to[-1] == '\r'))
8059 {
8060 to[-2] = '\n';
8061 to--;
8062 SvCUR_set(sv, to - SvPVX(sv));
8063 }
8064 else if (to[-1] == '\r')
8065 to[-1] = '\n';
8066 }
8067 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
8068 to[-1] = '\n';
8069#endif
8070
8071 read_more_line:
8072 /* if we're out of file, or a read fails, bail and reset the current
8073 line marker so we can report where the unterminated string began
8074 */
8075 if (!PL_rsfp ||
8076 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
8077 sv_free(sv);
8078 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
8079 return Nullch;
8080 }
8081 /* we read a line, so increment our line counter */
8082 CopLINE_inc(PL_curcop);
8083
8084 /* update debugger info */
8085 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8086 SV *sv = NEWSV(88,0);
8087
8088 sv_upgrade(sv, SVt_PVMG);
8089 sv_setsv(sv,PL_linestr);
8090 (void)SvIOK_on(sv);
8091 SvIVX(sv) = 0;
8092 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
8093 }
8094
8095 /* having changed the buffer, we must update PL_bufend */
8096 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8097 PL_last_lop = PL_last_uni = Nullch;
8098 }
8099
8100 /* at this point, we have successfully read the delimited string */
8101
8102 if (!PL_encoding || UTF) {
8103 if (keep_delims)
8104 sv_catpvn(sv, s, termlen);
8105 s += termlen;
8106 }
8107 if (has_utf8 || PL_encoding)
8108 SvUTF8_on(sv);
8109
8110 PL_multi_end = CopLINE(PL_curcop);
8111
8112 /* if we allocated too much space, give some back */
8113 if (SvCUR(sv) + 5 < SvLEN(sv)) {
8114 SvLEN_set(sv, SvCUR(sv) + 1);
8115 Renew(SvPVX(sv), SvLEN(sv), char);
8116 }
8117
8118 /* decide whether this is the first or second quoted string we've read
8119 for this op
8120 */
8121
8122 if (PL_lex_stuff)
8123 PL_lex_repl = sv;
8124 else
8125 PL_lex_stuff = sv;
8126 return s;
8127}
8128
8129/*
8130 scan_num
8131 takes: pointer to position in buffer
8132 returns: pointer to new position in buffer
8133 side-effects: builds ops for the constant in yylval.op
8134
8135 Read a number in any of the formats that Perl accepts:
8136
8137 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
8138 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
8139 0b[01](_?[01])*
8140 0[0-7](_?[0-7])*
8141 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
8142
8143 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
8144 thing it reads.
8145
8146 If it reads a number without a decimal point or an exponent, it will
8147 try converting the number to an integer and see if it can do so
8148 without loss of precision.
8149*/
8150
8151char *
8152Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
8153{
8154 register char *s = start; /* current position in buffer */
8155 register char *d; /* destination in temp buffer */
8156 register char *e; /* end of temp buffer */
8157 NV nv; /* number read, as a double */
8158 SV *sv = Nullsv; /* place to put the converted number */
8159 bool floatit; /* boolean: int or float? */
8160 char *lastub = 0; /* position of last underbar */
8161 static char number_too_long[] = "Number too long";
8162
8163 /* We use the first character to decide what type of number this is */
8164
8165 switch (*s) {
8166 default:
8167 Perl_croak(aTHX_ "panic: scan_num");
8168
8169 /* if it starts with a 0, it could be an octal number, a decimal in
8170 0.13 disguise, or a hexadecimal number, or a binary number. */
8171 case '0':
8172 {
8173 /* variables:
8174 u holds the "number so far"
8175 shift the power of 2 of the base
8176 (hex == 4, octal == 3, binary == 1)
8177 overflowed was the number more than we can hold?
8178
8179 Shift is used when we add a digit. It also serves as an "are
8180 we in octal/hex/binary?" indicator to disallow hex characters
8181 when in octal mode.
8182 */
8183 NV n = 0.0;
8184 UV u = 0;
8185 I32 shift;
8186 bool overflowed = FALSE;
8187 bool just_zero = TRUE; /* just plain 0 or binary number? */
8188 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
8189 static char* bases[5] = { "", "binary", "", "octal",
8190 "hexadecimal" };
8191 static char* Bases[5] = { "", "Binary", "", "Octal",
8192 "Hexadecimal" };
8193 static char *maxima[5] = { "",
8194 "0b11111111111111111111111111111111",
8195 "",
8196 "037777777777",
8197 "0xffffffff" };
8198 char *base, *Base, *max;
8199
8200 /* check for hex */
8201 if (s[1] == 'x') {
8202 shift = 4;
8203 s += 2;
8204 just_zero = FALSE;
8205 } else if (s[1] == 'b') {
8206 shift = 1;
8207 s += 2;
8208 just_zero = FALSE;
8209 }
8210 /* check for a decimal in disguise */
8211 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
8212 goto decimal;
8213 /* so it must be octal */
8214 else {
8215 shift = 3;
8216 s++;
8217 }
8218
8219 if (*s == '_') {
8220 if (ckWARN(WARN_SYNTAX))
8221 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8222 "Misplaced _ in number");
8223 lastub = s++;
8224 }
8225
8226 base = bases[shift];
8227 Base = Bases[shift];
8228 max = maxima[shift];
8229
8230 /* read the rest of the number */
8231 for (;;) {
8232 /* x is used in the overflow test,
8233 b is the digit we're adding on. */
8234 UV x, b;
8235
8236 switch (*s) {
8237
8238 /* if we don't mention it, we're done */
8239 default:
8240 goto out;
8241
8242 /* _ are ignored -- but warned about if consecutive */
8243 case '_':
8244 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
8245 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8246 "Misplaced _ in number");
8247 lastub = s++;
8248 break;
8249
8250 /* 8 and 9 are not octal */
8251 case '8': case '9':
8252 if (shift == 3)
8253 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
8254 /* FALL THROUGH */
8255
8256 /* octal digits */
8257 case '2': case '3': case '4':
8258 case '5': case '6': case '7':
8259 if (shift == 1)
8260 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
8261 /* FALL THROUGH */
8262
8263 case '0': case '1':
8264 b = *s++ & 15; /* ASCII digit -> value of digit */
8265 goto digit;
8266
8267 /* hex digits */
8268 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
8269 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
8270 /* make sure they said 0x */
8271 if (shift != 4)
8272 goto out;
8273 b = (*s++ & 7) + 9;
8274
8275 /* Prepare to put the digit we have onto the end
8276 of the number so far. We check for overflows.
8277 */
8278
8279 digit:
8280 just_zero = FALSE;
8281 if (!overflowed) {
8282 x = u << shift; /* make room for the digit */
8283
8284 if ((x >> shift) != u
8285 && !(PL_hints & HINT_NEW_BINARY)) {
8286 overflowed = TRUE;
8287 n = (NV) u;
8288 if (ckWARN_d(WARN_OVERFLOW))
8289 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
8290 "Integer overflow in %s number",
8291 base);
8292 } else
8293 u = x | b; /* add the digit to the end */
8294 }
8295 if (overflowed) {
8296 n *= nvshift[shift];
8297 /* If an NV has not enough bits in its
8298 * mantissa to represent an UV this summing of
8299 * small low-order numbers is a waste of time
8300 * (because the NV cannot preserve the
8301 * low-order bits anyway): we could just
8302 * remember when did we overflow and in the
8303 * end just multiply n by the right
8304 * amount. */
8305 n += (NV) b;
8306 }
8307 break;
8308 }
8309 }
8310
8311 /* if we get here, we had success: make a scalar value from
8312 the number.
8313 */
8314 out:
8315
8316 /* final misplaced underbar check */
8317 if (s[-1] == '_') {
8318 if (ckWARN(WARN_SYNTAX))
8319 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
8320 }
8321
8322 sv = NEWSV(92,0);
8323 if (overflowed) {
8324 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
8325 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
8326 "%s number > %s non-portable",
8327 Base, max);
8328 sv_setnv(sv, n);
8329 }
8330 else {
8331#if UVSIZE > 4
8332 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
8333 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
8334 "%s number > %s non-portable",
8335 Base, max);
8336#endif
8337 sv_setuv(sv, u);
8338 }
8339 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
8340 sv = new_constant(start, s - start, "integer",
8341 sv, Nullsv, NULL);
8342 else if (PL_hints & HINT_NEW_BINARY)
8343 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
8344 }
8345 break;
8346
8347 /*
8348 handle decimal numbers.
8349 we're also sent here when we read a 0 as the first digit
8350 */
8351 case '1': case '2': case '3': case '4': case '5':
8352 case '6': case '7': case '8': case '9': case '.':
8353 decimal:
8354 d = PL_tokenbuf;
8355 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
8356 floatit = FALSE;
8357
8358 /* read next group of digits and _ and copy into d */
8359 while (isDIGIT(*s) || *s == '_') {
8360 /* skip underscores, checking for misplaced ones
8361 if -w is on
8362 */
8363 if (*s == '_') {
8364 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
8365 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8366 "Misplaced _ in number");
8367 lastub = s++;
8368 }
8369 else {
8370 /* check for end of fixed-length buffer */
8371 if (d >= e)
8372 Perl_croak(aTHX_ number_too_long);
8373 /* if we're ok, copy the character */
8374 *d++ = *s++;
8375 }
8376 }
8377
8378 /* final misplaced underbar check */
8379 if (lastub && s == lastub + 1) {
8380 if (ckWARN(WARN_SYNTAX))
8381 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
8382 }
8383
8384 /* read a decimal portion if there is one. avoid
8385 3..5 being interpreted as the number 3. followed
8386 by .5
8387 */
8388 if (*s == '.' && s[1] != '.') {
8389 floatit = TRUE;
8390 *d++ = *s++;
8391
8392 if (*s == '_') {
8393 if (ckWARN(WARN_SYNTAX))
8394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8395 "Misplaced _ in number");
8396 lastub = s;
8397 }
8398
8399 /* copy, ignoring underbars, until we run out of digits.
8400 */
8401 for (; isDIGIT(*s) || *s == '_'; s++) {
8402 /* fixed length buffer check */
8403 if (d >= e)
8404 Perl_croak(aTHX_ number_too_long);
8405 if (*s == '_') {
8406 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
8407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8408 "Misplaced _ in number");
8409 lastub = s;
8410 }
8411 else
8412 *d++ = *s;
8413 }
8414 /* fractional part ending in underbar? */
8415 if (s[-1] == '_') {
8416 if (ckWARN(WARN_SYNTAX))
8417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8418 "Misplaced _ in number");
8419 }
8420 if (*s == '.' && isDIGIT(s[1])) {
8421 /* oops, it's really a v-string, but without the "v" */
8422 s = start;
8423 goto vstring;
8424 }
8425 }
8426
8427 /* read exponent part, if present */
8428 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
8429 floatit = TRUE;
8430 s++;
8431
8432 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
8433 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
8434
8435 /* stray preinitial _ */
8436 if (*s == '_') {
8437 if (ckWARN(WARN_SYNTAX))
8438 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8439 "Misplaced _ in number");
8440 lastub = s++;
8441 }
8442
8443 /* allow positive or negative exponent */
8444 if (*s == '+' || *s == '-')
8445 *d++ = *s++;
8446
8447 /* stray initial _ */
8448 if (*s == '_') {
8449 if (ckWARN(WARN_SYNTAX))
8450 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8451 "Misplaced _ in number");
8452 lastub = s++;
8453 }
8454
8455 /* read digits of exponent */
8456 while (isDIGIT(*s) || *s == '_') {
8457 if (isDIGIT(*s)) {
8458 if (d >= e)
8459 Perl_croak(aTHX_ number_too_long);
8460 *d++ = *s++;
8461 }
8462 else {
8463 if (ckWARN(WARN_SYNTAX) &&
8464 ((lastub && s == lastub + 1) ||
8465 (!isDIGIT(s[1]) && s[1] != '_')))
8466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8467 "Misplaced _ in number");
8468 lastub = s++;
8469 }
8470 }
8471 }
8472
8473
8474 /* make an sv from the string */
8475 sv = NEWSV(92,0);
8476
8477 /*
8478 We try to do an integer conversion first if no characters
8479 indicating "float" have been found.
8480 */
8481
8482 if (!floatit) {
8483 UV uv;
8484 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
8485
8486 if (flags == IS_NUMBER_IN_UV) {
8487 if (uv <= IV_MAX)
8488 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
8489 else
8490 sv_setuv(sv, uv);
8491 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
8492 if (uv <= (UV) IV_MIN)
8493 sv_setiv(sv, -(IV)uv);
8494 else
8495 floatit = TRUE;
8496 } else
8497 floatit = TRUE;
8498 }
8499 if (floatit) {
8500 /* terminate the string */
8501 *d = '\0';
8502 nv = Atof(PL_tokenbuf);
8503 sv_setnv(sv, nv);
8504 }
8505
8506 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
8507 (PL_hints & HINT_NEW_INTEGER) )
8508 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
8509 (floatit ? "float" : "integer"),
8510 sv, Nullsv, NULL);
8511 break;
8512
8513 /* if it starts with a v, it could be a v-string */
8514 case 'v':
8515vstring:
8516 sv = NEWSV(92,5); /* preallocate storage space */
8517 s = scan_vstring(s,sv);
8518 break;
8519 }
8520
8521 /* make the op for the constant and return */
8522
8523 if (sv)
8524 lvalp->opval = newSVOP(OP_CONST, 0, sv);
8525 else
8526 lvalp->opval = Nullop;
8527
8528 return s;
8529}
8530
8531STATIC char *
8532S_scan_formline(pTHX_ register char *s)
8533{
8534 register char *eol;
8535 register char *t;
8536 SV *stuff = newSVpvn("",0);
8537 bool needargs = FALSE;
8538 bool eofmt = FALSE;
8539
8540 while (!needargs) {
8541 if (*s == '.') {
8542 /*SUPPRESS 530*/
8543#ifdef PERL_STRICT_CR
8544 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
8545#else
8546 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
8547#endif
8548 if (*t == '\n' || t == PL_bufend) {
8549 eofmt = TRUE;
8550 break;
8551 }
8552 }
8553 if (PL_in_eval && !PL_rsfp) {
8554 eol = memchr(s,'\n',PL_bufend-s);
8555 if (!eol++)
8556 eol = PL_bufend;
8557 }
8558 else
8559 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8560 if (*s != '#') {
8561 for (t = s; t < eol; t++) {
8562 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
8563 needargs = FALSE;
8564 goto enough; /* ~~ must be first line in formline */
8565 }
8566 if (*t == '@' || *t == '^')
8567 needargs = TRUE;
8568 }
8569 if (eol > s) {
8570 sv_catpvn(stuff, s, eol-s);
8571#ifndef PERL_STRICT_CR
8572 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
8573 char *end = SvPVX(stuff) + SvCUR(stuff);
8574 end[-2] = '\n';
8575 end[-1] = '\0';
8576 SvCUR(stuff)--;
8577 }
8578#endif
8579 }
8580 else
8581 break;
8582 }
8583 s = eol;
8584 if (PL_rsfp) {
8585 s = filter_gets(PL_linestr, PL_rsfp, 0);
8586 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
8587 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
8588 PL_last_lop = PL_last_uni = Nullch;
8589 if (!s) {
8590 s = PL_bufptr;
8591 break;
8592 }
8593 }
8594 incline(s);
8595 }
8596 enough:
8597 if (SvCUR(stuff)) {
8598 PL_expect = XTERM;
8599 if (needargs) {
8600 PL_lex_state = LEX_NORMAL;
8601 PL_nextval[PL_nexttoke].ival = 0;
8602 force_next(',');
8603 }
8604 else
8605 PL_lex_state = LEX_FORMLINE;
8606 if (!IN_BYTES) {
8607 if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
8608 SvUTF8_on(stuff);
8609 else if (PL_encoding)
8610 sv_recode_to_utf8(stuff, PL_encoding);
8611 }
8612 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
8613 force_next(THING);
8614 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
8615 force_next(LSTOP);
8616 }
8617 else {
8618 SvREFCNT_dec(stuff);
8619 if (eofmt)
8620 PL_lex_formbrack = 0;
8621 PL_bufptr = s;
8622 }
8623 return s;
8624}
8625
8626STATIC void
8627S_set_csh(pTHX)
8628{
8629#ifdef CSH
8630 if (!PL_cshlen)
8631 PL_cshlen = strlen(PL_cshname);
8632#endif
8633}
8634
8635I32
8636Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8637{
8638 I32 oldsavestack_ix = PL_savestack_ix;
8639 CV* outsidecv = PL_compcv;
8640
8641 if (PL_compcv) {
8642 assert(SvTYPE(PL_compcv) == SVt_PVCV);
8643 }
8644 SAVEI32(PL_subline);
8645 save_item(PL_subname);
8646 SAVESPTR(PL_compcv);
8647
8648 PL_compcv = (CV*)NEWSV(1104,0);
8649 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
8650 CvFLAGS(PL_compcv) |= flags;
8651
8652 PL_subline = CopLINE(PL_curcop);
8653 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
8654 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
8655 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
8656
8657 return oldsavestack_ix;
8658}
8659
8660#ifdef __SC__
8661#pragma segment Perl_yylex
8662#endif
8663int
8664Perl_yywarn(pTHX_ char *s)
8665{
8666 PL_in_eval |= EVAL_WARNONLY;
8667 yyerror(s);
8668 PL_in_eval &= ~EVAL_WARNONLY;
8669 return 0;
8670}
8671
8672int
8673Perl_yyerror(pTHX_ char *s)
8674{
8675 char *where = NULL;
8676 char *context = NULL;
8677 int contlen = -1;
8678 SV *msg;
8679
8680 if (!yychar || (yychar == ';' && !PL_rsfp))
8681 where = "at EOF";
8682 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
8683 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
8684 /*
8685 Only for NetWare:
8686 The code below is removed for NetWare because it abends/crashes on NetWare
8687 when the script has error such as not having the closing quotes like:
8688 if ($var eq "value)
8689 Checking of white spaces is anyway done in NetWare code.
8690 */
8691#ifndef NETWARE
8692 while (isSPACE(*PL_oldoldbufptr))
8693 PL_oldoldbufptr++;
8694#endif
8695 context = PL_oldoldbufptr;
8696 contlen = PL_bufptr - PL_oldoldbufptr;
8697 }
8698 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
8699 PL_oldbufptr != PL_bufptr) {
8700 /*
8701 Only for NetWare:
8702 The code below is removed for NetWare because it abends/crashes on NetWare
8703 when the script has error such as not having the closing quotes like:
8704 if ($var eq "value)
8705 Checking of white spaces is anyway done in NetWare code.
8706 */
8707#ifndef NETWARE
8708 while (isSPACE(*PL_oldbufptr))
8709 PL_oldbufptr++;
8710#endif
8711 context = PL_oldbufptr;
8712 contlen = PL_bufptr - PL_oldbufptr;
8713 }
8714 else if (yychar > 255)
8715 where = "next token ???";
8716 else if (yychar == -2) { /* YYEMPTY */
8717 if (PL_lex_state == LEX_NORMAL ||
8718 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
8719 where = "at end of line";
8720 else if (PL_lex_inpat)
8721 where = "within pattern";
8722 else
8723 where = "within string";
8724 }
8725 else {
8726 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
8727 if (yychar < 32)
8728 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
8729 else if (isPRINT_LC(yychar))
8730 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
8731 else
8732 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
8733 where = SvPVX(where_sv);
8734 }
8735 msg = sv_2mortal(newSVpv(s, 0));
8736 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
8737 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
8738 if (context)
8739 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
8740 else
8741 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
8742 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
8743 Perl_sv_catpvf(aTHX_ msg,
8744 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
8745 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
8746 PL_multi_end = 0;
8747 }
8748 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
8749 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
8750 else
8751 qerror(msg);
8752 if (PL_error_count >= 10) {
8753 if (PL_in_eval && SvCUR(ERRSV))
8754 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
8755 ERRSV, OutCopFILE(PL_curcop));
8756 else
8757 Perl_croak(aTHX_ "%s has too many errors.\n",
8758 OutCopFILE(PL_curcop));
8759 }
8760 PL_in_my = 0;
8761 PL_in_my_stash = Nullhv;
8762 return 0;
8763}
8764#ifdef __SC__
8765#pragma segment Main
8766#endif
8767
8768STATIC char*
8769S_swallow_bom(pTHX_ U8 *s)
8770{
8771 STRLEN slen;
8772 slen = SvCUR(PL_linestr);
8773 switch (s[0]) {
8774 case 0xFF:
8775 if (s[1] == 0xFE) {
8776 /* UTF-16 little-endian? (or UTF32-LE?) */
8777 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
8778 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
8779#ifndef PERL_NO_UTF16_FILTER
8780 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
8781 s += 2;
8782 utf16le:
8783 if (PL_bufend > (char*)s) {
8784 U8 *news;
8785 I32 newlen;
8786
8787 filter_add(utf16rev_textfilter, NULL);
8788 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
8789 utf16_to_utf8_reversed(s, news,
8790 PL_bufend - (char*)s - 1,
8791 &newlen);
8792 sv_setpvn(PL_linestr, (const char*)news, newlen);
8793 Safefree(news);
8794 SvUTF8_on(PL_linestr);
8795 s = (U8*)SvPVX(PL_linestr);
8796 PL_bufend = SvPVX(PL_linestr) + newlen;
8797 }
8798#else
8799 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
8800#endif
8801 }
8802 break;
8803 case 0xFE:
8804 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
8805#ifndef PERL_NO_UTF16_FILTER
8806 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
8807 s += 2;
8808 utf16be:
8809 if (PL_bufend > (char *)s) {
8810 U8 *news;
8811 I32 newlen;
8812
8813 filter_add(utf16_textfilter, NULL);
8814 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
8815 utf16_to_utf8(s, news,
8816 PL_bufend - (char*)s,
8817 &newlen);
8818 sv_setpvn(PL_linestr, (const char*)news, newlen);
8819 Safefree(news);
8820 SvUTF8_on(PL_linestr);
8821 s = (U8*)SvPVX(PL_linestr);
8822 PL_bufend = SvPVX(PL_linestr) + newlen;
8823 }
8824#else
8825 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
8826#endif
8827 }
8828 break;
8829 case 0xEF:
8830 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
8831 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
8832 s += 3; /* UTF-8 */
8833 }
8834 break;
8835 case 0:
8836 if (slen > 3) {
8837 if (s[1] == 0) {
8838 if (s[2] == 0xFE && s[3] == 0xFF) {
8839 /* UTF-32 big-endian */
8840 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
8841 }
8842 }
8843 else if (s[2] == 0 && s[3] != 0) {
8844 /* Leading bytes
8845 * 00 xx 00 xx
8846 * are a good indicator of UTF-16BE. */
8847 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
8848 goto utf16be;
8849 }
8850 }
8851 default:
8852 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
8853 /* Leading bytes
8854 * xx 00 xx 00
8855 * are a good indicator of UTF-16LE. */
8856 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
8857 goto utf16le;
8858 }
8859 }
8860 return (char*)s;
8861}
8862
8863/*
8864 * restore_rsfp
8865 * Restore a source filter.
8866 */
8867
8868static void
8869restore_rsfp(pTHX_ void *f)
8870{
8871 PerlIO *fp = (PerlIO*)f;
8872
8873 if (PL_rsfp == PerlIO_stdin())
8874 PerlIO_clearerr(PL_rsfp);
8875 else if (PL_rsfp && (PL_rsfp != fp))
8876 PerlIO_close(PL_rsfp);
8877 PL_rsfp = fp;
8878}
8879
8880#ifndef PERL_NO_UTF16_FILTER
8881static I32
8882utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
8883{
8884 STRLEN old = SvCUR(sv);
8885 I32 count = FILTER_READ(idx+1, sv, maxlen);
8886 DEBUG_P(PerlIO_printf(Perl_debug_log,
8887 "utf16_textfilter(%p): %d %d (%d)\n",
8888 utf16_textfilter, idx, maxlen, (int) count));
8889 if (count) {
8890 U8* tmps;
8891 I32 newlen;
8892 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
8893 Copy(SvPVX(sv), tmps, old, char);
8894 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
8895 SvCUR(sv) - old, &newlen);
8896 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
8897 }
8898 DEBUG_P({sv_dump(sv);});
8899 return SvCUR(sv);
8900}
8901
8902static I32
8903utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
8904{
8905 STRLEN old = SvCUR(sv);
8906 I32 count = FILTER_READ(idx+1, sv, maxlen);
8907 DEBUG_P(PerlIO_printf(Perl_debug_log,
8908 "utf16rev_textfilter(%p): %d %d (%d)\n",
8909 utf16rev_textfilter, idx, maxlen, (int) count));
8910 if (count) {
8911 U8* tmps;
8912 I32 newlen;
8913 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
8914 Copy(SvPVX(sv), tmps, old, char);
8915 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
8916 SvCUR(sv) - old, &newlen);
8917 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
8918 }
8919 DEBUG_P({ sv_dump(sv); });
8920 return count;
8921}
8922#endif
8923
8924/*
8925Returns a pointer to the next character after the parsed
8926vstring, as well as updating the passed in sv.
8927
8928Function must be called like
8929
8930 sv = NEWSV(92,5);
8931 s = scan_vstring(s,sv);
8932
8933The sv should already be large enough to store the vstring
8934passed in, for performance reasons.
8935
8936*/
8937
8938char *
8939Perl_scan_vstring(pTHX_ char *s, SV *sv)
8940{
8941 char *pos = s;
8942 char *start = s;
8943 if (*pos == 'v') pos++; /* get past 'v' */
8944 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
8945 pos++;
8946 if ( *pos != '.') {
8947 /* this may not be a v-string if followed by => */
8948 char *next = pos;
8949 while (next < PL_bufend && isSPACE(*next))
8950 ++next;
8951 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
8952 /* return string not v-string */
8953 sv_setpvn(sv,(char *)s,pos-s);
8954 return pos;
8955 }
8956 }
8957
8958 if (!isALPHA(*pos)) {
8959 UV rev;
8960 U8 tmpbuf[UTF8_MAXBYTES+1];
8961 U8 *tmpend;
8962
8963 if (*s == 'v') s++; /* get past 'v' */
8964
8965 sv_setpvn(sv, "", 0);
8966
8967 for (;;) {
8968 rev = 0;
8969 {
8970 /* this is atoi() that tolerates underscores */
8971 char *end = pos;
8972 UV mult = 1;
8973 while (--end >= s) {
8974 UV orev;
8975 if (*end == '_')
8976 continue;
8977 orev = rev;
8978 rev += (*end - '0') * mult;
8979 mult *= 10;
8980 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
8981 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
8982 "Integer overflow in decimal number");
8983 }
8984 }
8985#ifdef EBCDIC
8986 if (rev > 0x7FFFFFFF)
8987 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
8988#endif
8989 /* Append native character for the rev point */
8990 tmpend = uvchr_to_utf8(tmpbuf, rev);
8991 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
8992 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
8993 SvUTF8_on(sv);
8994 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
8995 s = ++pos;
8996 else {
8997 s = pos;
8998 break;
8999 }
9000 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
9001 pos++;
9002 }
9003 SvPOK_on(sv);
9004 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
9005 SvRMAGICAL_on(sv);
9006 }
9007 return s;
9008}
9009