toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #include "keywords.h"
 186
 187 /* CLINE is a macro that ensures PL_copline has a sane value */
 188
 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 190
 191 #ifdef PERL_MAD
 192 #  define SKIPSPACE0(s) skipspace0(s)
 193 #  define SKIPSPACE1(s) skipspace1(s)
 194 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 195 #  define PEEKSPACE(s) skipspace2(s,0)
 196 #else
 197 #  define SKIPSPACE0(s) skipspace(s)
 198 #  define SKIPSPACE1(s) skipspace(s)
 199 #  define SKIPSPACE2(s,tsv) skipspace(s)
 200 #  define PEEKSPACE(s) skipspace(s)
 201 #endif
 202
 203 /*
 204  * Convenience functions to return different tokens and prime the
 205  * lexer for the next token.  They all take an argument.
 206  *
 207  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 208  * OPERATOR     : generic operator
 209  * AOPERATOR    : assignment operator
 210  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 211  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 212  * PREREF       : *EXPR where EXPR is not a simple identifier
 213  * TERM         : expression term
 214  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 215  * LOOPX        : loop exiting command (goto, last, dump, etc)
 216  * FTST         : file test operator
 217  * FUN0         : zero-argument function
 218  * FUN0OP       : zero-argument function, with its op created in this file
 219  * FUN1         : not used, except for not, which isn't a UNIOP
 220  * BOop         : bitwise or or xor
 221  * BAop         : bitwise and
 222  * SHop         : shift operator
 223  * PWop         : power operator
 224  * PMop         : pattern-matching operator
 225  * Aop          : addition-level operator
 226  * Mop          : multiplication-level operator
 227  * Eop          : equality-testing operator
 228  * Rop          : relational operator <= != gt
 229  *
 230  * Also see LOP and lop() below.
 231  */
 232
 233 #ifdef DEBUGGING /* Serve -DT. */
 234 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 235 #else
 236 #   define REPORT(retval) (retval)
 237 #endif
 238
 239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 248 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 249 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 250 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 251 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 252 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 253 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 254 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 255 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 256 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 257 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 258 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 259 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 260 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 261
 262 /* This bit of chicanery makes a unary function followed by
 263  * a parenthesis into a function with one argument, highest precedence.
 264  * The UNIDOR macro is for unary functions that can be followed by the //
 265  * operator (such as C<shift // 0>).
 266  */
 267 #define UNI3(f,x,have_x) { \
 268         pl_yylval.ival = f; \
 269         if (have_x) PL_expect = x; \
 270         PL_bufptr = s; \
 271         PL_last_uni = PL_oldbufptr; \
 272         PL_last_lop_op = f; \
 273         if (*s == '(') \
 274             return REPORT( (int)FUNC1 ); \
 275         s = PEEKSPACE(s); \
 276         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 277         }
 278 #define UNI(f)    UNI3(f,XTERM,1)
 279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 280 #define UNIPROTO(f,optional) { \
 281         if (optional) PL_last_uni = PL_oldbufptr; \
 282         OPERATOR(f); \
 283         }
 284
 285 #define UNIBRACK(f) UNI3(f,0,0)
 286
 287 /* grandfather return to old style */
 288 #define OLDLOP(f) \
 289         do { \
 290             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 291                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 292             pl_yylval.ival = (f); \
 293             PL_expect = XTERM; \
 294             PL_bufptr = s; \
 295             return (int)LSTOP; \
 296         } while(0)
 297
 298 #define COPLINE_INC_WITH_HERELINES                  \
 299     STMT_START {                                     \
 300         CopLINE_inc(PL_curcop);                       \
 301         if (PL_parser->herelines)                      \
 302             CopLINE(PL_curcop) += PL_parser->herelines, \
 303             PL_parser->herelines = 0;                    \
 304     } STMT_END
 305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 306  * is no sublex_push to follow. */
 307 #define COPLINE_SET_FROM_MULTI_END            \
 308     STMT_START {                               \
 309         CopLINE_set(PL_curcop, PL_multi_end);   \
 310         if (PL_multi_end != PL_multi_start)      \
 311             PL_parser->herelines = 0;             \
 312     } STMT_END
 313
 314
 315 #ifdef DEBUGGING
 316
 317 /* how to interpret the pl_yylval associated with the token */
 318 enum token_type {
 319     TOKENTYPE_NONE,
 320     TOKENTYPE_IVAL,
 321     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 322     TOKENTYPE_PVAL,
 323     TOKENTYPE_OPVAL
 324 };
 325
 326 static struct debug_tokens {
 327     const int token;
 328     enum token_type type;
 329     const char *name;
 330 } const debug_tokens[] =
 331 {
 332     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 333     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 334     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 335     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 336     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 337     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 338     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 339     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 340     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 341     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 342     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 343     { DO,               TOKENTYPE_NONE,         "DO" },
 344     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 345     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 346     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 347     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 348     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 349     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 350     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 351     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 352     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 353     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 354     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 355     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 356     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 357     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 358     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 359     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 360     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 361     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 362     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 363     { IF,               TOKENTYPE_IVAL,         "IF" },
 364     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 365     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 366     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 367     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 368     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 369     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 370     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 371     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 372     { MY,               TOKENTYPE_IVAL,         "MY" },
 373     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 374     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 375     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 376     { OROR,             TOKENTYPE_NONE,         "OROR" },
 377     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 378     { PEG,              TOKENTYPE_NONE,         "PEG" },
 379     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 380     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 381     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 382     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 383     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 384     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 385     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 386     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 387     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 388     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 389     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 390     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 391     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 392     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 393     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 394     { SUB,              TOKENTYPE_NONE,         "SUB" },
 395     { THING,            TOKENTYPE_OPVAL,        "THING" },
 396     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 397     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 398     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 399     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 400     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 401     { USE,              TOKENTYPE_IVAL,         "USE" },
 402     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 403     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 404     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 405     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 406     { 0,                TOKENTYPE_NONE,         NULL }
 407 };
 408
 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 410
 411 STATIC int
 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 413 {
 414     dVAR;
 415
 416     PERL_ARGS_ASSERT_TOKEREPORT;
 417
 418     if (DEBUG_T_TEST) {
 419         const char *name = NULL;
 420         enum token_type type = TOKENTYPE_NONE;
 421         const struct debug_tokens *p;
 422         SV* const report = newSVpvs("<== ");
 423
 424         for (p = debug_tokens; p->token; p++) {
 425             if (p->token == (int)rv) {
 426                 name = p->name;
 427                 type = p->type;
 428                 break;
 429             }
 430         }
 431         if (name)
 432             Perl_sv_catpv(aTHX_ report, name);
 433         else if ((char)rv > ' ' && (char)rv <= '~')
 434         {
 435             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 436             if ((char)rv == 'p')
 437                 sv_catpvs(report, " (pending identifier)");
 438         }
 439         else if (!rv)
 440             sv_catpvs(report, "EOF");
 441         else
 442             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 443         switch (type) {
 444         case TOKENTYPE_NONE:
 445             break;
 446         case TOKENTYPE_IVAL:
 447             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 448             break;
 449         case TOKENTYPE_OPNUM:
 450             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 451                                     PL_op_name[lvalp->ival]);
 452             break;
 453         case TOKENTYPE_PVAL:
 454             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 455             break;
 456         case TOKENTYPE_OPVAL:
 457             if (lvalp->opval) {
 458                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 459                                     PL_op_name[lvalp->opval->op_type]);
 460                 if (lvalp->opval->op_type == OP_CONST) {
 461                     Perl_sv_catpvf(aTHX_ report, " %s",
 462                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 463                 }
 464
 465             }
 466             else
 467                 sv_catpvs(report, "(opval=null)");
 468             break;
 469         }
 470         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 471     };
 472     return (int)rv;
 473 }
 474
 475
 476 /* print the buffer with suitable escapes */
 477
 478 STATIC void
 479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 480 {
 481     SV* const tmp = newSVpvs("");
 482
 483     PERL_ARGS_ASSERT_PRINTBUF;
 484
 485     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 486     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 487     GCC_DIAG_RESTORE;
 488     SvREFCNT_dec(tmp);
 489 }
 490
 491 #endif
 492
 493 static int
 494 S_deprecate_commaless_var_list(pTHX) {
 495     PL_expect = XTERM;
 496     deprecate("comma-less variable list");
 497     return REPORT(','); /* grandfather non-comma-format format */
 498 }
 499
 500 /*
 501  * S_ao
 502  *
 503  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 504  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 505  */
 506
 507 STATIC int
 508 S_ao(pTHX_ int toketype)
 509 {
 510     dVAR;
 511     if (*PL_bufptr == '=') {
 512         PL_bufptr++;
 513         if (toketype == ANDAND)
 514             pl_yylval.ival = OP_ANDASSIGN;
 515         else if (toketype == OROR)
 516             pl_yylval.ival = OP_ORASSIGN;
 517         else if (toketype == DORDOR)
 518             pl_yylval.ival = OP_DORASSIGN;
 519         toketype = ASSIGNOP;
 520     }
 521     return toketype;
 522 }
 523
 524 /*
 525  * S_no_op
 526  * When Perl expects an operator and finds something else, no_op
 527  * prints the warning.  It always prints "<something> found where
 528  * operator expected.  It prints "Missing semicolon on previous line?"
 529  * if the surprise occurs at the start of the line.  "do you need to
 530  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 531  * where the compiler doesn't know if foo is a method call or a function.
 532  * It prints "Missing operator before end of line" if there's nothing
 533  * after the missing operator, or "... before <...>" if there is something
 534  * after the missing operator.
 535  */
 536
 537 STATIC void
 538 S_no_op(pTHX_ const char *const what, char *s)
 539 {
 540     dVAR;
 541     char * const oldbp = PL_bufptr;
 542     const bool is_first = (PL_oldbufptr == PL_linestart);
 543
 544     PERL_ARGS_ASSERT_NO_OP;
 545
 546     if (!s)
 547         s = oldbp;
 548     else
 549         PL_bufptr = s;
 550     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 551     if (ckWARN_d(WARN_SYNTAX)) {
 552         if (is_first)
 553             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 554                     "\t(Missing semicolon on previous line?)\n");
 555         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 556             const char *t;
 557             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 558                                                             t += UTF ? UTF8SKIP(t) : 1)
 559                 NOOP;
 560             if (t < PL_bufptr && isSPACE(*t))
 561                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 562                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 563                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 564         }
 565         else {
 566             assert(s >= oldbp);
 567             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 568                     "\t(Missing operator before %"UTF8f"?)\n",
 569                      UTF8fARG(UTF, s - oldbp, oldbp));
 570         }
 571     }
 572     PL_bufptr = oldbp;
 573 }
 574
 575 /*
 576  * S_missingterm
 577  * Complain about missing quote/regexp/heredoc terminator.
 578  * If it's called with NULL then it cauterizes the line buffer.
 579  * If we're in a delimited string and the delimiter is a control
 580  * character, it's reformatted into a two-char sequence like ^C.
 581  * This is fatal.
 582  */
 583
 584 STATIC void
 585 S_missingterm(pTHX_ char *s)
 586 {
 587     dVAR;
 588     char tmpbuf[3];
 589     char q;
 590     if (s) {
 591         char * const nl = strrchr(s,'\n');
 592         if (nl)
 593             *nl = '\0';
 594     }
 595     else if ((U8) PL_multi_close < 32) {
 596         *tmpbuf = '^';
 597         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 598         tmpbuf[2] = '\0';
 599         s = tmpbuf;
 600     }
 601     else {
 602         *tmpbuf = (char)PL_multi_close;
 603         tmpbuf[1] = '\0';
 604         s = tmpbuf;
 605     }
 606     q = strchr(s,'"') ? '\'' : '"';
 607     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 608 }
 609
 610 #include "feature.h"
 611
 612 /*
 613  * Check whether the named feature is enabled.
 614  */
 615 bool
 616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 617 {
 618     dVAR;
 619     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 620
 621     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 622
 623     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 624
 625     if (namelen > MAX_FEATURE_LEN)
 626         return FALSE;
 627     memcpy(&he_name[8], name, namelen);
 628
 629     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 630                                      REFCOUNTED_HE_EXISTS));
 631 }
 632
 633 /*
 634  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 635  * utf16-to-utf8-reversed.
 636  */
 637
 638 #ifdef PERL_CR_FILTER
 639 static void
 640 strip_return(SV *sv)
 641 {
 642     const char *s = SvPVX_const(sv);
 643     const char * const e = s + SvCUR(sv);
 644
 645     PERL_ARGS_ASSERT_STRIP_RETURN;
 646
 647     /* outer loop optimized to do nothing if there are no CR-LFs */
 648     while (s < e) {
 649         if (*s++ == '\r' && *s == '\n') {
 650             /* hit a CR-LF, need to copy the rest */
 651             char *d = s - 1;
 652             *d++ = *s++;
 653             while (s < e) {
 654                 if (*s == '\r' && s[1] == '\n')
 655                     s++;
 656                 *d++ = *s++;
 657             }
 658             SvCUR(sv) -= s - d;
 659             return;
 660         }
 661     }
 662 }
 663
 664 STATIC I32
 665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 666 {
 667     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 668     if (count > 0 && !maxlen)
 669         strip_return(sv);
 670     return count;
 671 }
 672 #endif
 673
 674 /*
 675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 676
 677 Creates and initialises a new lexer/parser state object, supplying
 678 a context in which to lex and parse from a new source of Perl code.
 679 A pointer to the new state object is placed in L</PL_parser>.  An entry
 680 is made on the save stack so that upon unwinding the new state object
 681 will be destroyed and the former value of L</PL_parser> will be restored.
 682 Nothing else need be done to clean up the parsing context.
 683
 684 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 685 non-null, provides a string (in SV form) containing code to be parsed.
 686 A copy of the string is made, so subsequent modification of I<line>
 687 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 688 from which code will be read to be parsed.  If both are non-null, the
 689 code in I<line> comes first and must consist of complete lines of input,
 690 and I<rsfp> supplies the remainder of the source.
 691
 692 The I<flags> parameter is reserved for future use.  Currently it is only
 693 used by perl internally, so extensions should always pass zero.
 694
 695 =cut
 696 */
 697
 698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 699    can share filters with the current parser.
 700    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 701    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 702    destruction. This is used to handle the case of defaulting to reading the
 703    script from the standard input because no filename was given on the command
 704    line (without getting confused by situation where STDIN has been closed, so
 705    the script handle is opened on fd 0)  */
 706
 707 void
 708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 709 {
 710     dVAR;
 711     const char *s = NULL;
 712     yy_parser *parser, *oparser;
 713     if (flags && flags & ~LEX_START_FLAGS)
 714         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 715
 716     /* create and initialise a parser */
 717
 718     Newxz(parser, 1, yy_parser);
 719     parser->old_parser = oparser = PL_parser;
 720     PL_parser = parser;
 721
 722     parser->stack = NULL;
 723     parser->ps = NULL;
 724     parser->stack_size = 0;
 725
 726     /* on scope exit, free this parser and restore any outer one */
 727     SAVEPARSER(parser);
 728     parser->saved_curcop = PL_curcop;
 729
 730     /* initialise lexer state */
 731
 732 #ifdef PERL_MAD
 733     parser->curforce = -1;
 734 #else
 735     parser->nexttoke = 0;
 736 #endif
 737     parser->error_count = oparser ? oparser->error_count : 0;
 738     parser->copline = parser->preambling = NOLINE;
 739     parser->lex_state = LEX_NORMAL;
 740     parser->expect = XSTATE;
 741     parser->rsfp = rsfp;
 742     parser->rsfp_filters =
 743       !(flags & LEX_START_SAME_FILTER) || !oparser
 744         ? NULL
 745         : MUTABLE_AV(SvREFCNT_inc(
 746             oparser->rsfp_filters
 747              ? oparser->rsfp_filters
 748              : (oparser->rsfp_filters = newAV())
 749           ));
 750
 751     Newx(parser->lex_brackstack, 120, char);
 752     Newx(parser->lex_casestack, 12, char);
 753     *parser->lex_casestack = '\0';
 754     Newxz(parser->lex_shared, 1, LEXSHARED);
 755
 756     if (line) {
 757         STRLEN len;
 758         s = SvPV_const(line, len);
 759         parser->linestr = flags & LEX_START_COPIED
 760                             ? SvREFCNT_inc_simple_NN(line)
 761                             : newSVpvn_flags(s, len, SvUTF8(line));
 762         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 763     } else {
 764         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 765     }
 766     parser->oldoldbufptr =
 767         parser->oldbufptr =
 768         parser->bufptr =
 769         parser->linestart = SvPVX(parser->linestr);
 770     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 771     parser->last_lop = parser->last_uni = NULL;
 772
 773     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 774                                                         |LEX_DONT_CLOSE_RSFP));
 775     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 776                                                         |LEX_DONT_CLOSE_RSFP));
 777
 778     parser->in_pod = parser->filtered = 0;
 779 }
 780
 781
 782 /* delete a parser object */
 783
 784 void
 785 Perl_parser_free(pTHX_  const yy_parser *parser)
 786 {
 787     PERL_ARGS_ASSERT_PARSER_FREE;
 788
 789     PL_curcop = parser->saved_curcop;
 790     SvREFCNT_dec(parser->linestr);
 791
 792     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 793         PerlIO_clearerr(parser->rsfp);
 794     else if (parser->rsfp && (!parser->old_parser ||
 795                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 796         PerlIO_close(parser->rsfp);
 797     SvREFCNT_dec(parser->rsfp_filters);
 798     SvREFCNT_dec(parser->lex_stuff);
 799     SvREFCNT_dec(parser->sublex_info.repl);
 800
 801     Safefree(parser->lex_brackstack);
 802     Safefree(parser->lex_casestack);
 803     Safefree(parser->lex_shared);
 804     PL_parser = parser->old_parser;
 805     Safefree(parser);
 806 }
 807
 808 void
 809 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 810 {
 811 #ifdef PERL_MAD
 812     I32 nexttoke = parser->lasttoke;
 813 #else
 814     I32 nexttoke = parser->nexttoke;
 815 #endif
 816     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 817     while (nexttoke--) {
 818 #ifdef PERL_MAD
 819         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 820                                 & 0xffff)
 821          && parser->nexttoke[nexttoke].next_val.opval
 822          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 823          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 824                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 825                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 826         }
 827 #else
 828         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 829          && parser->nextval[nexttoke].opval
 830          && parser->nextval[nexttoke].opval->op_slabbed
 831          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 832             op_free(parser->nextval[nexttoke].opval);
 833             parser->nextval[nexttoke].opval = NULL;
 834         }
 835 #endif
 836     }
 837 }
 838
 839
 840 /*
 841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 842
 843 Buffer scalar containing the chunk currently under consideration of the
 844 text currently being lexed.  This is always a plain string scalar (for
 845 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 846 normal scalar means; instead refer to the buffer directly by the pointer
 847 variables described below.
 848
 849 The lexer maintains various C<char*> pointers to things in the
 850 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 851 reallocated, all of these pointers must be updated.  Don't attempt to
 852 do this manually, but rather use L</lex_grow_linestr> if you need to
 853 reallocate the buffer.
 854
 855 The content of the text chunk in the buffer is commonly exactly one
 856 complete line of input, up to and including a newline terminator,
 857 but there are situations where it is otherwise.  The octets of the
 858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 859 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 860 flag on this scalar, which may disagree with it.
 861
 862 For direct examination of the buffer, the variable
 863 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 865 of these pointers is usually preferable to examination of the scalar
 866 through normal scalar means.
 867
 868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 869
 870 Direct pointer to the end of the chunk of text currently being lexed, the
 871 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 872 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 873 always located at the end of the buffer, and does not count as part of
 874 the buffer's contents.
 875
 876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 877
 878 Points to the current position of lexing inside the lexer buffer.
 879 Characters around this point may be freely examined, within
 880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 881 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 883
 884 Lexing code (whether in the Perl core or not) moves this pointer past
 885 the characters that it consumes.  It is also expected to perform some
 886 bookkeeping whenever a newline character is consumed.  This movement
 887 can be more conveniently performed by the function L</lex_read_to>,
 888 which handles newlines appropriately.
 889
 890 Interpretation of the buffer's octets can be abstracted out by
 891 using the slightly higher-level functions L</lex_peek_unichar> and
 892 L</lex_read_unichar>.
 893
 894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 895
 896 Points to the start of the current line inside the lexer buffer.
 897 This is useful for indicating at which column an error occurred, and
 898 not much else.  This must be updated by any lexing code that consumes
 899 a newline; the function L</lex_read_to> handles this detail.
 900
 901 =cut
 902 */
 903
 904 /*
 905 =for apidoc Amx|bool|lex_bufutf8
 906
 907 Indicates whether the octets in the lexer buffer
 908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 909 of Unicode characters.  If not, they should be interpreted as Latin-1
 910 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 911
 912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 913 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 914 encoding.
 915
 916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 917 is significant, but not the whole story regarding the input character
 918 encoding.  Normally, when a file is being read, the scalar contains octets
 919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 920 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 923 is in effect.  This logic may change in the future; use this function
 924 instead of implementing the logic yourself.
 925
 926 =cut
 927 */
 928
 929 bool
 930 Perl_lex_bufutf8(pTHX)
 931 {
 932     return UTF;
 933 }
 934
 935 /*
 936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 937
 938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 939 at least I<len> octets (including terminating NUL).  Returns a
 940 pointer to the reallocated buffer.  This is necessary before making
 941 any direct modification of the buffer that would increase its length.
 942 L</lex_stuff_pvn> provides a more convenient way to insert text into
 943 the buffer.
 944
 945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 946 this function updates all of the lexer's variables that point directly
 947 into the buffer.
 948
 949 =cut
 950 */
 951
 952 char *
 953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 954 {
 955     SV *linestr;
 956     char *buf;
 957     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 958     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 959     linestr = PL_parser->linestr;
 960     buf = SvPVX(linestr);
 961     if (len <= SvLEN(linestr))
 962         return buf;
 963     bufend_pos = PL_parser->bufend - buf;
 964     bufptr_pos = PL_parser->bufptr - buf;
 965     oldbufptr_pos = PL_parser->oldbufptr - buf;
 966     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 967     linestart_pos = PL_parser->linestart - buf;
 968     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 969     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 970     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 971                             PL_parser->lex_shared->re_eval_start - buf : 0;
 972
 973     buf = sv_grow(linestr, len);
 974
 975     PL_parser->bufend = buf + bufend_pos;
 976     PL_parser->bufptr = buf + bufptr_pos;
 977     PL_parser->oldbufptr = buf + oldbufptr_pos;
 978     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 979     PL_parser->linestart = buf + linestart_pos;
 980     if (PL_parser->last_uni)
 981         PL_parser->last_uni = buf + last_uni_pos;
 982     if (PL_parser->last_lop)
 983         PL_parser->last_lop = buf + last_lop_pos;
 984     if (PL_parser->lex_shared->re_eval_start)
 985         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 986     return buf;
 987 }
 988
 989 /*
 990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 991
 992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 994 reallocating the buffer if necessary.  This means that lexing code that
 995 runs later will see the characters as if they had appeared in the input.
 996 It is not recommended to do this as part of normal parsing, and most
 997 uses of this facility run the risk of the inserted characters being
 998 interpreted in an unintended manner.
 999
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1007
1008 =cut
1009 */
1010
1011 void
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1013 {
1014     dVAR;
1015     char *bufptr;
1016     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017     if (flags & ~(LEX_STUFF_UTF8))
1018         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1019     if (UTF) {
1020         if (flags & LEX_STUFF_UTF8) {
1021             goto plain_copy;
1022         } else {
1023             STRLEN highhalf = 0;    /* Count of variants */
1024             const char *p, *e = pv+len;
1025             for (p = pv; p != e; p++) {
1026                 if (! UTF8_IS_INVARIANT(*p)) {
1027                     highhalf++;
1028                 }
1029             }
1030             if (!highhalf)
1031                 goto plain_copy;
1032             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033             bufptr = PL_parser->bufptr;
1034             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035             SvCUR_set(PL_parser->linestr,
1036                 SvCUR(PL_parser->linestr) + len+highhalf);
1037             PL_parser->bufend += len+highhalf;
1038             for (p = pv; p != e; p++) {
1039                 U8 c = (U8)*p;
1040                 if (! UTF8_IS_INVARIANT(c)) {
1041                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1043                 } else {
1044                     *bufptr++ = (char)c;
1045                 }
1046             }
1047         }
1048     } else {
1049         if (flags & LEX_STUFF_UTF8) {
1050             STRLEN highhalf = 0;
1051             const char *p, *e = pv+len;
1052             for (p = pv; p != e; p++) {
1053                 U8 c = (U8)*p;
1054                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056                                 "non-Latin-1 character into Latin-1 input");
1057                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1058                     p++;
1059                     highhalf++;
1060                 } else if (! UTF8_IS_INVARIANT(c)) {
1061                     /* malformed UTF-8 */
1062                     ENTER;
1063                     SAVESPTR(PL_warnhook);
1064                     PL_warnhook = PERL_WARNHOOK_FATAL;
1065                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1066                     LEAVE;
1067                 }
1068             }
1069             if (!highhalf)
1070                 goto plain_copy;
1071             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072             bufptr = PL_parser->bufptr;
1073             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074             SvCUR_set(PL_parser->linestr,
1075                 SvCUR(PL_parser->linestr) + len-highhalf);
1076             PL_parser->bufend += len-highhalf;
1077             p = pv;
1078             while (p < e) {
1079                 if (UTF8_IS_INVARIANT(*p)) {
1080                     *bufptr++ = *p;
1081                     p++;
1082                 }
1083                 else {
1084                     assert(p < e -1 );
1085                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1086                     p += 2;
1087                 }
1088             }
1089         } else {
1090           plain_copy:
1091             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092             bufptr = PL_parser->bufptr;
1093             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095             PL_parser->bufend += len;
1096             Copy(pv, bufptr, len, char);
1097         }
1098     }
1099 }
1100
1101 /*
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1103
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary.  This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1111
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul.  These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>.  The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1119
1120 =cut
1121 */
1122
1123 void
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1125 {
1126     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127     lex_stuff_pvn(pv, strlen(pv), flags);
1128 }
1129
1130 /*
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1132
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary.  This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1140
1141 The string to be inserted is the string value of I<sv>.  The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1146
1147 =cut
1148 */
1149
1150 void
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1152 {
1153     char *pv;
1154     STRLEN len;
1155     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1156     if (flags)
1157         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1158     pv = SvPV(sv, len);
1159     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1160 }
1161
1162 /*
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1164
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1169
1170 This is not the normal way to consume lexed text.  For that, use
1171 L</lex_read_to>.
1172
1173 =cut
1174 */
1175
1176 void
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1178 {
1179     char *buf, *bufend;
1180     STRLEN unstuff_len;
1181     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182     buf = PL_parser->bufptr;
1183     if (ptr < buf)
1184         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185     if (ptr == buf)
1186         return;
1187     bufend = PL_parser->bufend;
1188     if (ptr > bufend)
1189         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190     unstuff_len = ptr - buf;
1191     Move(ptr, buf, bufend+1-ptr, char);
1192     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193     PL_parser->bufend = bufend - unstuff_len;
1194 }
1195
1196 /*
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1198
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1203
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1207
1208 =cut
1209 */
1210
1211 void
1212 Perl_lex_read_to(pTHX_ char *ptr)
1213 {
1214     char *s;
1215     PERL_ARGS_ASSERT_LEX_READ_TO;
1216     s = PL_parser->bufptr;
1217     if (ptr < s || ptr > PL_parser->bufend)
1218         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219     for (; s != ptr; s++)
1220         if (*s == '\n') {
1221             COPLINE_INC_WITH_HERELINES;
1222             PL_parser->linestart = s+1;
1223         }
1224     PL_parser->bufptr = ptr;
1225 }
1226
1227 /*
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1229
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>.  The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately.  I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1235
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it.  However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1243
1244 =cut
1245 */
1246
1247 void
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1249 {
1250     char *buf;
1251     STRLEN discard_len;
1252     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253     buf = SvPVX(PL_parser->linestr);
1254     if (ptr < buf)
1255         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256     if (ptr == buf)
1257         return;
1258     if (ptr > PL_parser->bufptr)
1259         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260     discard_len = ptr - buf;
1261     if (PL_parser->oldbufptr < ptr)
1262         PL_parser->oldbufptr = ptr;
1263     if (PL_parser->oldoldbufptr < ptr)
1264         PL_parser->oldoldbufptr = ptr;
1265     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266         PL_parser->last_uni = NULL;
1267     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268         PL_parser->last_lop = NULL;
1269     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271     PL_parser->bufend -= discard_len;
1272     PL_parser->bufptr -= discard_len;
1273     PL_parser->oldbufptr -= discard_len;
1274     PL_parser->oldoldbufptr -= discard_len;
1275     if (PL_parser->last_uni)
1276         PL_parser->last_uni -= discard_len;
1277     if (PL_parser->last_lop)
1278         PL_parser->last_lop -= discard_len;
1279 }
1280
1281 /*
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1283
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more.  It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1289
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded.  If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1296
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1299
1300 =cut
1301 */
1302
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM  0x40000000
1305
1306 bool
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1308 {
1309     SV *linestr;
1310     char *buf;
1311     STRLEN old_bufend_pos, new_bufend_pos;
1312     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314     bool got_some_for_debugger = 0;
1315     bool got_some;
1316     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318     linestr = PL_parser->linestr;
1319     buf = SvPVX(linestr);
1320     if (!(flags & LEX_KEEP_PREVIOUS) &&
1321             PL_parser->bufptr == PL_parser->bufend) {
1322         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1323         linestart_pos = 0;
1324         if (PL_parser->last_uni != PL_parser->bufend)
1325             PL_parser->last_uni = NULL;
1326         if (PL_parser->last_lop != PL_parser->bufend)
1327             PL_parser->last_lop = NULL;
1328         last_uni_pos = last_lop_pos = 0;
1329         *buf = 0;
1330         SvCUR(linestr) = 0;
1331     } else {
1332         old_bufend_pos = PL_parser->bufend - buf;
1333         bufptr_pos = PL_parser->bufptr - buf;
1334         oldbufptr_pos = PL_parser->oldbufptr - buf;
1335         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336         linestart_pos = PL_parser->linestart - buf;
1337         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1339     }
1340     if (flags & LEX_FAKE_EOF) {
1341         goto eof;
1342     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1343         got_some = 0;
1344     } else if (filter_gets(linestr, old_bufend_pos)) {
1345         got_some = 1;
1346         got_some_for_debugger = 1;
1347     } else if (flags & LEX_NO_TERM) {
1348         got_some = 0;
1349     } else {
1350         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1351             sv_setpvs(linestr, "");
1352         eof:
1353         /* End of real input.  Close filehandle (unless it was STDIN),
1354          * then add implicit termination.
1355          */
1356         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357             PerlIO_clearerr(PL_parser->rsfp);
1358         else if (PL_parser->rsfp)
1359             (void)PerlIO_close(PL_parser->rsfp);
1360         PL_parser->rsfp = NULL;
1361         PL_parser->in_pod = PL_parser->filtered = 0;
1362 #ifdef PERL_MAD
1363         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1364             PL_faketokens = 1;
1365 #endif
1366         if (!PL_in_eval && PL_minus_p) {
1367             sv_catpvs(linestr,
1368                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369             PL_minus_n = PL_minus_p = 0;
1370         } else if (!PL_in_eval && PL_minus_n) {
1371             sv_catpvs(linestr, /*{*/";}");
1372             PL_minus_n = 0;
1373         } else
1374             sv_catpvs(linestr, ";");
1375         got_some = 1;
1376     }
1377     buf = SvPVX(linestr);
1378     new_bufend_pos = SvCUR(linestr);
1379     PL_parser->bufend = buf + new_bufend_pos;
1380     PL_parser->bufptr = buf + bufptr_pos;
1381     PL_parser->oldbufptr = buf + oldbufptr_pos;
1382     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383     PL_parser->linestart = buf + linestart_pos;
1384     if (PL_parser->last_uni)
1385         PL_parser->last_uni = buf + last_uni_pos;
1386     if (PL_parser->last_lop)
1387         PL_parser->last_lop = buf + last_lop_pos;
1388     if (PL_parser->preambling != NOLINE) {
1389         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390         PL_parser->preambling = NOLINE;
1391     }
1392     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393             PL_curstash != PL_debstash) {
1394         /* debugger active and we're not compiling the debugger code,
1395          * so store the line into the debugger's array of lines
1396          */
1397         update_debugger_info(NULL, buf+old_bufend_pos,
1398             new_bufend_pos-old_bufend_pos);
1399     }
1400     return got_some;
1401 }
1402
1403 /*
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1405
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text.  To consume the
1409 peeked character, use L</lex_read_unichar>.
1410
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in.  Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1415
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1418
1419 =cut
1420 */
1421
1422 I32
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1424 {
1425     dVAR;
1426     char *s, *bufend;
1427     if (flags & ~(LEX_KEEP_PREVIOUS))
1428         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429     s = PL_parser->bufptr;
1430     bufend = PL_parser->bufend;
1431     if (UTF) {
1432         U8 head;
1433         I32 unichar;
1434         STRLEN len, retlen;
1435         if (s == bufend) {
1436             if (!lex_next_chunk(flags))
1437                 return -1;
1438             s = PL_parser->bufptr;
1439             bufend = PL_parser->bufend;
1440         }
1441         head = (U8)*s;
1442         if (UTF8_IS_INVARIANT(head))
1443             return head;
1444         if (UTF8_IS_START(head)) {
1445             len = UTF8SKIP(&head);
1446             while ((STRLEN)(bufend-s) < len) {
1447                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1448                     break;
1449                 s = PL_parser->bufptr;
1450                 bufend = PL_parser->bufend;
1451             }
1452         }
1453         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454         if (retlen == (STRLEN)-1) {
1455             /* malformed UTF-8 */
1456             ENTER;
1457             SAVESPTR(PL_warnhook);
1458             PL_warnhook = PERL_WARNHOOK_FATAL;
1459             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1460             LEAVE;
1461         }
1462         return unichar;
1463     } else {
1464         if (s == bufend) {
1465             if (!lex_next_chunk(flags))
1466                 return -1;
1467             s = PL_parser->bufptr;
1468         }
1469         return (U8)*s;
1470     }
1471 }
1472
1473 /*
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1475
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text.  To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1481
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in.  Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1486
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1489
1490 =cut
1491 */
1492
1493 I32
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1495 {
1496     I32 c;
1497     if (flags & ~(LEX_KEEP_PREVIOUS))
1498         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499     c = lex_peek_unichar(flags);
1500     if (c != -1) {
1501         if (c == '\n')
1502             COPLINE_INC_WITH_HERELINES;
1503         if (UTF)
1504             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1505         else
1506             ++(PL_parser->bufptr);
1507     }
1508     return c;
1509 }
1510
1511 /*
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1513
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed.  The spaces may include ordinary whitespace characters and
1516 Perl-style comments.  C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1519
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in.  Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1524
1525 =cut
1526 */
1527
1528 #define LEX_NO_INCLINE    0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1530
1531 void
1532 Perl_lex_read_space(pTHX_ U32 flags)
1533 {
1534     char *s, *bufend;
1535     const bool can_incline = !(flags & LEX_NO_INCLINE);
1536     bool need_incline = 0;
1537     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1539 #ifdef PERL_MAD
1540     if (PL_skipwhite) {
1541         sv_free(PL_skipwhite);
1542         PL_skipwhite = NULL;
1543     }
1544     if (PL_madskills)
1545         PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547     s = PL_parser->bufptr;
1548     bufend = PL_parser->bufend;
1549     while (1) {
1550         char c = *s;
1551         if (c == '#') {
1552             do {
1553                 c = *++s;
1554             } while (!(c == '\n' || (c == 0 && s == bufend)));
1555         } else if (c == '\n') {
1556             s++;
1557             if (can_incline) {
1558                 PL_parser->linestart = s;
1559                 if (s == bufend)
1560                     need_incline = 1;
1561                 else
1562                     incline(s);
1563             }
1564         } else if (isSPACE(c)) {
1565             s++;
1566         } else if (c == 0 && s == bufend) {
1567             bool got_more;
1568             line_t l;
1569 #ifdef PERL_MAD
1570             if (PL_madskills)
1571                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573             if (flags & LEX_NO_NEXT_CHUNK)
1574                 break;
1575             PL_parser->bufptr = s;
1576             l = CopLINE(PL_curcop);
1577             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578             got_more = lex_next_chunk(flags);
1579             CopLINE_set(PL_curcop, l);
1580             s = PL_parser->bufptr;
1581             bufend = PL_parser->bufend;
1582             if (!got_more)
1583                 break;
1584             if (can_incline && need_incline && PL_parser->rsfp) {
1585                 incline(s);
1586                 need_incline = 0;
1587             }
1588         } else {
1589             break;
1590         }
1591     }
1592 #ifdef PERL_MAD
1593     if (PL_madskills)
1594         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596     PL_parser->bufptr = s;
1597 }
1598
1599 /*
1600
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1602
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1607
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1610 C<false>.
1611
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1613
1614 =cut
1615
1616  */
1617
1618 bool
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1620 {
1621     STRLEN len, origlen;
1622     char *p = proto ? SvPV(proto, len) : NULL;
1623     bool bad_proto = FALSE;
1624     bool in_brackets = FALSE;
1625     bool after_slash = FALSE;
1626     char greedy_proto = ' ';
1627     bool proto_after_greedy_proto = FALSE;
1628     bool must_be_last = FALSE;
1629     bool underscore = FALSE;
1630     bool bad_proto_after_underscore = FALSE;
1631
1632     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1633
1634     if (!proto)
1635         return TRUE;
1636
1637     origlen = len;
1638     for (; len--; p++) {
1639         if (!isSPACE(*p)) {
1640             if (must_be_last)
1641                 proto_after_greedy_proto = TRUE;
1642             if (underscore) {
1643                 if (!strchr(";@%", *p))
1644                     bad_proto_after_underscore = TRUE;
1645                 underscore = FALSE;
1646             }
1647             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1648                 bad_proto = TRUE;
1649             }
1650             else {
1651                 if (*p == '[')
1652                     in_brackets = TRUE;
1653                 else if (*p == ']')
1654                     in_brackets = FALSE;
1655                 else if ((*p == '@' || *p == '%') &&
1656                     !after_slash &&
1657                     !in_brackets ) {
1658                     must_be_last = TRUE;
1659                     greedy_proto = *p;
1660                 }
1661                 else if (*p == '_')
1662                     underscore = TRUE;
1663             }
1664             if (*p == '\\')
1665                 after_slash = TRUE;
1666             else
1667                 after_slash = FALSE;
1668         }
1669     }
1670
1671     if (warn) {
1672         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1673         p -= origlen;
1674         p = SvUTF8(proto)
1675             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676                              origlen, UNI_DISPLAY_ISPRINT)
1677             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1678
1679         if (proto_after_greedy_proto)
1680             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681                         "Prototype after '%c' for %"SVf" : %s",
1682                         greedy_proto, SVfARG(name), p);
1683         if (in_brackets)
1684             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685                         "Missing ']' in prototype for %"SVf" : %s",
1686                         SVfARG(name), p);
1687         if (bad_proto)
1688             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689                         "Illegal character in prototype for %"SVf" : %s",
1690                         SVfARG(name), p);
1691         if (bad_proto_after_underscore)
1692             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693                         "Illegal character after '_' in prototype for %"SVf" : %s",
1694                         SVfARG(name), p);
1695     }
1696
1697     return (! (proto_after_greedy_proto || bad_proto) );
1698 }
1699
1700 /*
1701  * S_incline
1702  * This subroutine has nothing to do with tilting, whether at windmills
1703  * or pinball tables.  Its name is short for "increment line".  It
1704  * increments the current line number in CopLINE(PL_curcop) and checks
1705  * to see whether the line starts with a comment of the form
1706  *    # line 500 "foo.pm"
1707  * If so, it sets the current line number and file to the values in the comment.
1708  */
1709
1710 STATIC void
1711 S_incline(pTHX_ const char *s)
1712 {
1713     dVAR;
1714     const char *t;
1715     const char *n;
1716     const char *e;
1717     line_t line_num;
1718
1719     PERL_ARGS_ASSERT_INCLINE;
1720
1721     COPLINE_INC_WITH_HERELINES;
1722     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723      && s+1 == PL_bufend && *s == ';') {
1724         /* fake newline in string eval */
1725         CopLINE_dec(PL_curcop);
1726         return;
1727     }
1728     if (*s++ != '#')
1729         return;
1730     while (SPACE_OR_TAB(*s))
1731         s++;
1732     if (strnEQ(s, "line", 4))
1733         s += 4;
1734     else
1735         return;
1736     if (SPACE_OR_TAB(*s))
1737         s++;
1738     else
1739         return;
1740     while (SPACE_OR_TAB(*s))
1741         s++;
1742     if (!isDIGIT(*s))
1743         return;
1744
1745     n = s;
1746     while (isDIGIT(*s))
1747         s++;
1748     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1749         return;
1750     while (SPACE_OR_TAB(*s))
1751         s++;
1752     if (*s == '"' && (t = strchr(s+1, '"'))) {
1753         s++;
1754         e = t + 1;
1755     }
1756     else {
1757         t = s;
1758         while (!isSPACE(*t))
1759             t++;
1760         e = t;
1761     }
1762     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1763         e++;
1764     if (*e != '\n' && *e != '\0')
1765         return;         /* false alarm */
1766
1767     line_num = atoi(n)-1;
1768
1769     if (t - s > 0) {
1770         const STRLEN len = t - s;
1771
1772         if (!PL_rsfp && !PL_parser->filtered) {
1773             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774              * to *{"::_<newfilename"} */
1775             /* However, the long form of evals is only turned on by the
1776                debugger - usually they're "(eval %lu)" */
1777             GV * const cfgv = CopFILEGV(PL_curcop);
1778             if (cfgv) {
1779                 char smallbuf[128];
1780                 STRLEN tmplen2 = len;
1781                 char *tmpbuf2;
1782                 GV *gv2;
1783
1784                 if (tmplen2 + 2 <= sizeof smallbuf)
1785                     tmpbuf2 = smallbuf;
1786                 else
1787                     Newx(tmpbuf2, tmplen2 + 2, char);
1788
1789                 tmpbuf2[0] = '_';
1790                 tmpbuf2[1] = '<';
1791
1792                 memcpy(tmpbuf2 + 2, s, tmplen2);
1793                 tmplen2 += 2;
1794
1795                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1796                 if (!isGV(gv2)) {
1797                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798                     /* adjust ${"::_<newfilename"} to store the new file name */
1799                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800                     /* The line number may differ. If that is the case,
1801                        alias the saved lines that are in the array.
1802                        Otherwise alias the whole array. */
1803                     if (CopLINE(PL_curcop) == line_num) {
1804                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1806                     }
1807                     else if (GvAV(cfgv)) {
1808                         AV * const av = GvAV(cfgv);
1809                         const I32 start = CopLINE(PL_curcop)+1;
1810                         I32 items = AvFILLp(av) - start;
1811                         if (items > 0) {
1812                             AV * const av2 = GvAVn(gv2);
1813                             SV **svp = AvARRAY(av) + start;
1814                             I32 l = (I32)line_num+1;
1815                             while (items--)
1816                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1817                         }
1818                     }
1819                 }
1820
1821                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1822             }
1823         }
1824         CopFILE_free(PL_curcop);
1825         CopFILE_setn(PL_curcop, s, len);
1826     }
1827     CopLINE_set(PL_curcop, line_num);
1828 }
1829
1830 #define skipspace(s) skipspace_flags(s, 0)
1831
1832 #ifdef PERL_MAD
1833 /* skip space before PL_thistoken */
1834
1835 STATIC char *
1836 S_skipspace0(pTHX_ char *s)
1837 {
1838     PERL_ARGS_ASSERT_SKIPSPACE0;
1839
1840     s = skipspace(s);
1841     if (!PL_madskills)
1842         return s;
1843     if (PL_skipwhite) {
1844         if (!PL_thiswhite)
1845             PL_thiswhite = newSVpvs("");
1846         sv_catsv(PL_thiswhite, PL_skipwhite);
1847         sv_free(PL_skipwhite);
1848         PL_skipwhite = 0;
1849     }
1850     PL_realtokenstart = s - SvPVX(PL_linestr);
1851     return s;
1852 }
1853
1854 /* skip space after PL_thistoken */
1855
1856 STATIC char *
1857 S_skipspace1(pTHX_ char *s)
1858 {
1859     const char *start = s;
1860     I32 startoff = start - SvPVX(PL_linestr);
1861
1862     PERL_ARGS_ASSERT_SKIPSPACE1;
1863
1864     s = skipspace(s);
1865     if (!PL_madskills)
1866         return s;
1867     start = SvPVX(PL_linestr) + startoff;
1868     if (!PL_thistoken && PL_realtokenstart >= 0) {
1869         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870         PL_thistoken = newSVpvn(tstart, start - tstart);
1871     }
1872     PL_realtokenstart = -1;
1873     if (PL_skipwhite) {
1874         if (!PL_nextwhite)
1875             PL_nextwhite = newSVpvs("");
1876         sv_catsv(PL_nextwhite, PL_skipwhite);
1877         sv_free(PL_skipwhite);
1878         PL_skipwhite = 0;
1879     }
1880     return s;
1881 }
1882
1883 STATIC char *
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1885 {
1886     char *start;
1887     const I32 startoff = s - SvPVX(PL_linestr);
1888
1889     PERL_ARGS_ASSERT_SKIPSPACE2;
1890
1891     s = skipspace(s);
1892     if (!PL_madskills || !svp)
1893         return s;
1894     start = SvPVX(PL_linestr) + startoff;
1895     if (!PL_thistoken && PL_realtokenstart >= 0) {
1896         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897         PL_thistoken = newSVpvn(tstart, start - tstart);
1898         PL_realtokenstart = -1;
1899     }
1900     if (PL_skipwhite) {
1901         if (!*svp)
1902             *svp = newSVpvs("");
1903         sv_setsv(*svp, PL_skipwhite);
1904         sv_free(PL_skipwhite);
1905         PL_skipwhite = 0;
1906     }
1907
1908     return s;
1909 }
1910 #endif
1911
1912 STATIC void
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1914 {
1915     AV *av = CopFILEAVx(PL_curcop);
1916     if (av) {
1917         SV * sv;
1918         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1919         else {
1920             sv = *av_fetch(av, 0, 1);
1921             SvUPGRADE(sv, SVt_PVMG);
1922         }
1923         if (!SvPOK(sv)) sv_setpvs(sv,"");
1924         if (orig_sv)
1925             sv_catsv(sv, orig_sv);
1926         else
1927             sv_catpvn(sv, buf, len);
1928         if (!SvIOK(sv)) {
1929             (void)SvIOK_on(sv);
1930             SvIV_set(sv, 0);
1931         }
1932         if (PL_parser->preambling == NOLINE)
1933             av_store(av, CopLINE(PL_curcop), sv);
1934     }
1935 }
1936
1937 /*
1938  * S_skipspace
1939  * Called to gobble the appropriate amount and type of whitespace.
1940  * Skips comments as well.
1941  */
1942
1943 STATIC char *
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1945 {
1946 #ifdef PERL_MAD
1947     char *start = s;
1948 #endif /* PERL_MAD */
1949     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1950 #ifdef PERL_MAD
1951     if (PL_skipwhite) {
1952         sv_free(PL_skipwhite);
1953         PL_skipwhite = NULL;
1954     }
1955 #endif /* PERL_MAD */
1956     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957         while (s < PL_bufend && SPACE_OR_TAB(*s))
1958             s++;
1959     } else {
1960         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1961         PL_bufptr = s;
1962         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964                     LEX_NO_NEXT_CHUNK : 0));
1965         s = PL_bufptr;
1966         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967         if (PL_linestart > PL_bufptr)
1968             PL_bufptr = PL_linestart;
1969         return s;
1970     }
1971 #ifdef PERL_MAD
1972     if (PL_madskills)
1973         PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1975     return s;
1976 }
1977
1978 /*
1979  * S_check_uni
1980  * Check the unary operators to ensure there's no ambiguity in how they're
1981  * used.  An ambiguous piece of code would be:
1982  *     rand + 5
1983  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1984  * the +5 is its argument.
1985  */
1986
1987 STATIC void
1988 S_check_uni(pTHX)
1989 {
1990     dVAR;
1991     const char *s;
1992     const char *t;
1993
1994     if (PL_oldoldbufptr != PL_last_uni)
1995         return;
1996     while (isSPACE(*PL_last_uni))
1997         PL_last_uni++;
1998     s = PL_last_uni;
1999     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2000         s++;
2001     if ((t = strchr(s, '(')) && t < PL_bufptr)
2002         return;
2003
2004     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006                      (int)(s - PL_last_uni), PL_last_uni);
2007 }
2008
2009 /*
2010  * LOP : macro to build a list operator.  Its behaviour has been replaced
2011  * with a subroutine, S_lop() for which LOP is just another name.
2012  */
2013
2014 #define LOP(f,x) return lop(f,x,s)
2015
2016 /*
2017  * S_lop
2018  * Build a list operator (or something that might be one).  The rules:
2019  *  - if we have a next token, then it's a list operator [why?]
2020  *  - if the next thing is an opening paren, then it's a function
2021  *  - else it's a list operator
2022  */
2023
2024 STATIC I32
2025 S_lop(pTHX_ I32 f, int x, char *s)
2026 {
2027     dVAR;
2028
2029     PERL_ARGS_ASSERT_LOP;
2030
2031     pl_yylval.ival = f;
2032     CLINE;
2033     PL_expect = x;
2034     PL_bufptr = s;
2035     PL_last_lop = PL_oldbufptr;
2036     PL_last_lop_op = (OPCODE)f;
2037 #ifdef PERL_MAD
2038     if (PL_lasttoke)
2039         goto lstop;
2040 #else
2041     if (PL_nexttoke)
2042         goto lstop;
2043 #endif
2044     if (*s == '(')
2045         return REPORT(FUNC);
2046     s = PEEKSPACE(s);
2047     if (*s == '(')
2048         return REPORT(FUNC);
2049     else {
2050         lstop:
2051         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053         return REPORT(LSTOP);
2054     }
2055 }
2056
2057 #ifdef PERL_MAD
2058  /*
2059  * S_start_force
2060  * Sets up for an eventual force_next().  start_force(0) basically does
2061  * an unshift, while start_force(-1) does a push.  yylex removes items
2062  * on the "pop" end.
2063  */
2064
2065 STATIC void
2066 S_start_force(pTHX_ int where)
2067 {
2068     int i;
2069
2070     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2071         where = PL_lasttoke;
2072     assert(PL_curforce < 0 || PL_curforce == where);
2073     if (PL_curforce != where) {
2074         for (i = PL_lasttoke; i > where; --i) {
2075             PL_nexttoke[i] = PL_nexttoke[i-1];
2076         }
2077         PL_lasttoke++;
2078     }
2079     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2080         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081     PL_curforce = where;
2082     if (PL_nextwhite) {
2083         if (PL_madskills)
2084             curmad('^', newSVpvs(""));
2085         CURMAD('_', PL_nextwhite);
2086     }
2087 }
2088
2089 STATIC void
2090 S_curmad(pTHX_ char slot, SV *sv)
2091 {
2092     MADPROP **where;
2093
2094     if (!sv)
2095         return;
2096     if (PL_curforce < 0)
2097         where = &PL_thismad;
2098     else
2099         where = &PL_nexttoke[PL_curforce].next_mad;
2100
2101     if (PL_faketokens)
2102         sv_setpvs(sv, "");
2103     else {
2104         if (!IN_BYTES) {
2105             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2106                 SvUTF8_on(sv);
2107             else if (PL_encoding) {
2108                 sv_recode_to_utf8(sv, PL_encoding);
2109             }
2110         }
2111     }
2112
2113     /* keep a slot open for the head of the list? */
2114     if (slot != '_' && *where && (*where)->mad_key == '^') {
2115         (*where)->mad_key = slot;
2116         sv_free(MUTABLE_SV(((*where)->mad_val)));
2117         (*where)->mad_val = (void*)sv;
2118     }
2119     else
2120         addmad(newMADsv(slot, sv), where, 0);
2121 }
2122 #else
2123 #  define start_force(where)    NOOP
2124 #  define curmad(slot, sv)      NOOP
2125 #endif
2126
2127 /*
2128  * S_force_next
2129  * When the lexer realizes it knows the next token (for instance,
2130  * it is reordering tokens for the parser) then it can call S_force_next
2131  * to know what token to return the next time the lexer is called.  Caller
2132  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133  * and possibly PL_expect to ensure the lexer handles the token correctly.
2134  */
2135
2136 STATIC void
2137 S_force_next(pTHX_ I32 type)
2138 {
2139     dVAR;
2140 #ifdef DEBUGGING
2141     if (DEBUG_T_TEST) {
2142         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143         tokereport(type, &NEXTVAL_NEXTTOKE);
2144     }
2145 #endif
2146 #ifdef PERL_MAD
2147     if (PL_curforce < 0)
2148         start_force(PL_lasttoke);
2149     PL_nexttoke[PL_curforce].next_type = type;
2150     if (PL_lex_state != LEX_KNOWNEXT)
2151         PL_lex_defer = PL_lex_state;
2152     PL_lex_state = LEX_KNOWNEXT;
2153     PL_lex_expect = PL_expect;
2154     PL_curforce = -1;
2155 #else
2156     PL_nexttype[PL_nexttoke] = type;
2157     PL_nexttoke++;
2158     if (PL_lex_state != LEX_KNOWNEXT) {
2159         PL_lex_defer = PL_lex_state;
2160         PL_lex_expect = PL_expect;
2161         PL_lex_state = LEX_KNOWNEXT;
2162     }
2163 #endif
2164 }
2165
2166 /*
2167  * S_postderef
2168  *
2169  * This subroutine handles postfix deref syntax after the arrow has already
2170  * been emitted.  @* $* etc. are emitted as two separate token right here.
2171  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172  * only the first, leaving yylex to find the next.
2173  */
2174
2175 static int
2176 S_postderef(pTHX_ int const funny, char const next)
2177 {
2178     dVAR;
2179     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180     assert(strchr("*[{", next));
2181     if (next == '*') {
2182         PL_expect = XOPERATOR;
2183         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185             PL_lex_state = LEX_INTERPEND;
2186             start_force(PL_curforce);
2187             force_next(POSTJOIN);
2188         }
2189         start_force(PL_curforce);
2190         force_next(next);
2191         PL_bufptr+=2;
2192     }
2193     else {
2194         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195          && !PL_lex_brackets)
2196             PL_lex_dojoin = 2;
2197         PL_expect = XOPERATOR;
2198         PL_bufptr++;
2199     }
2200     return funny;
2201 }
2202
2203 void
2204 Perl_yyunlex(pTHX)
2205 {
2206     int yyc = PL_parser->yychar;
2207     if (yyc != YYEMPTY) {
2208         if (yyc) {
2209             start_force(-1);
2210             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212                 PL_lex_allbrackets--;
2213                 PL_lex_brackets--;
2214                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215             } else if (yyc == '('/*)*/) {
2216                 PL_lex_allbrackets--;
2217                 yyc |= (2<<24);
2218             }
2219             force_next(yyc);
2220         }
2221         PL_parser->yychar = YYEMPTY;
2222     }
2223 }
2224
2225 STATIC SV *
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228     dVAR;
2229     SV * const sv = newSVpvn_utf8(start, len,
2230                                   !IN_BYTES
2231                                   && UTF
2232                                   && !is_ascii_string((const U8*)start, len)
2233                                   && is_utf8_string((const U8*)start, len));
2234     return sv;
2235 }
2236
2237 /*
2238  * S_force_word
2239  * When the lexer knows the next thing is a word (for instance, it has
2240  * just seen -> and it knows that the next char is a word char, then
2241  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2242  * lookahead.
2243  *
2244  * Arguments:
2245  *   char *start : buffer position (must be within PL_linestr)
2246  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247  *   int check_keyword : if true, Perl checks to make sure the word isn't
2248  *       a keyword (do this if the word is a label, e.g. goto FOO)
2249  *   int allow_pack : if true, : characters will also be allowed (require,
2250  *       use, etc. do this)
2251  *   int allow_initial_tick : used by the "sub" lexer only.
2252  */
2253
2254 STATIC char *
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2256 {
2257     dVAR;
2258     char *s;
2259     STRLEN len;
2260
2261     PERL_ARGS_ASSERT_FORCE_WORD;
2262
2263     start = SKIPSPACE1(start);
2264     s = start;
2265     if (isIDFIRST_lazy_if(s,UTF) ||
2266         (allow_pack && *s == ':') )
2267     {
2268         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269         if (check_keyword) {
2270           char *s2 = PL_tokenbuf;
2271           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2272             s2 += 6, len -= 6;
2273           if (keyword(s2, len, 0))
2274             return start;
2275         }
2276         start_force(PL_curforce);
2277         if (PL_madskills)
2278             curmad('X', newSVpvn(start,s-start));
2279         if (token == METHOD) {
2280             s = SKIPSPACE1(s);
2281             if (*s == '(')
2282                 PL_expect = XTERM;
2283             else {
2284                 PL_expect = XOPERATOR;
2285             }
2286         }
2287         if (PL_madskills)
2288             curmad('g', newSVpvs( "forced" ));
2289         NEXTVAL_NEXTTOKE.opval
2290             = (OP*)newSVOP(OP_CONST,0,
2291                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2293         force_next(token);
2294     }
2295     return s;
2296 }
2297
2298 /*
2299  * S_force_ident
2300  * Called when the lexer wants $foo *foo &foo etc, but the program
2301  * text only contains the "foo" portion.  The first argument is a pointer
2302  * to the "foo", and the second argument is the type symbol to prefix.
2303  * Forces the next token to be a "WORD".
2304  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2305  */
2306
2307 STATIC void
2308 S_force_ident(pTHX_ const char *s, int kind)
2309 {
2310     dVAR;
2311
2312     PERL_ARGS_ASSERT_FORCE_IDENT;
2313
2314     if (s[0]) {
2315         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317                                                                 UTF ? SVf_UTF8 : 0));
2318         start_force(PL_curforce);
2319         NEXTVAL_NEXTTOKE.opval = o;
2320         force_next(WORD);
2321         if (kind) {
2322             o->op_private = OPpCONST_ENTERED;
2323             /* XXX see note in pp_entereval() for why we forgo typo
2324                warnings if the symbol must be introduced in an eval.
2325                GSAR 96-10-12 */
2326             gv_fetchpvn_flags(s, len,
2327                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329                               kind == '$' ? SVt_PV :
2330                               kind == '@' ? SVt_PVAV :
2331                               kind == '%' ? SVt_PVHV :
2332                               SVt_PVGV
2333                               );
2334         }
2335     }
2336 }
2337
2338 static void
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2340 {
2341     start_force(PL_curforce);
2342     NEXTVAL_NEXTTOKE.ival = pit;
2343     force_next('p');
2344 }
2345
2346 NV
2347 Perl_str_to_version(pTHX_ SV *sv)
2348 {
2349     NV retval = 0.0;
2350     NV nshift = 1.0;
2351     STRLEN len;
2352     const char *start = SvPV_const(sv,len);
2353     const char * const end = start + len;
2354     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2355
2356     PERL_ARGS_ASSERT_STR_TO_VERSION;
2357
2358     while (start < end) {
2359         STRLEN skip;
2360         UV n;
2361         if (utf)
2362             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2363         else {
2364             n = *(U8*)start;
2365             skip = 1;
2366         }
2367         retval += ((NV)n)/nshift;
2368         start += skip;
2369         nshift *= 1000;
2370     }
2371     return retval;
2372 }
2373
2374 /*
2375  * S_force_version
2376  * Forces the next token to be a version number.
2377  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378  * and if "guessing" is TRUE, then no new token is created (and the caller
2379  * must use an alternative parsing method).
2380  */
2381
2382 STATIC char *
2383 S_force_version(pTHX_ char *s, int guessing)
2384 {
2385     dVAR;
2386     OP *version = NULL;
2387     char *d;
2388 #ifdef PERL_MAD
2389     I32 startoff = s - SvPVX(PL_linestr);
2390 #endif
2391
2392     PERL_ARGS_ASSERT_FORCE_VERSION;
2393
2394     s = SKIPSPACE1(s);
2395
2396     d = s;
2397     if (*d == 'v')
2398         d++;
2399     if (isDIGIT(*d)) {
2400         while (isDIGIT(*d) || *d == '_' || *d == '.')
2401             d++;
2402 #ifdef PERL_MAD
2403         if (PL_madskills) {
2404             start_force(PL_curforce);
2405             curmad('X', newSVpvn(s,d-s));
2406         }
2407 #endif
2408         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2409             SV *ver;
2410             s = scan_num(s, &pl_yylval);
2411             version = pl_yylval.opval;
2412             ver = cSVOPx(version)->op_sv;
2413             if (SvPOK(ver) && !SvNIOK(ver)) {
2414                 SvUPGRADE(ver, SVt_PVNV);
2415                 SvNV_set(ver, str_to_version(ver));
2416                 SvNOK_on(ver);          /* hint that it is a version */
2417             }
2418         }
2419         else if (guessing) {
2420 #ifdef PERL_MAD
2421             if (PL_madskills) {
2422                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2423                 PL_nextwhite = 0;
2424                 s = SvPVX(PL_linestr) + startoff;
2425             }
2426 #endif
2427             return s;
2428         }
2429     }
2430
2431 #ifdef PERL_MAD
2432     if (PL_madskills && !version) {
2433         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2434         PL_nextwhite = 0;
2435         s = SvPVX(PL_linestr) + startoff;
2436     }
2437 #endif
2438     /* NOTE: The parser sees the package name and the VERSION swapped */
2439     start_force(PL_curforce);
2440     NEXTVAL_NEXTTOKE.opval = version;
2441     force_next(WORD);
2442
2443     return s;
2444 }
2445
2446 /*
2447  * S_force_strict_version
2448  * Forces the next token to be a version number using strict syntax rules.
2449  */
2450
2451 STATIC char *
2452 S_force_strict_version(pTHX_ char *s)
2453 {
2454     dVAR;
2455     OP *version = NULL;
2456 #ifdef PERL_MAD
2457     I32 startoff = s - SvPVX(PL_linestr);
2458 #endif
2459     const char *errstr = NULL;
2460
2461     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2462
2463     while (isSPACE(*s)) /* leading whitespace */
2464         s++;
2465
2466     if (is_STRICT_VERSION(s,&errstr)) {
2467         SV *ver = newSV(0);
2468         s = (char *)scan_version(s, ver, 0);
2469         version = newSVOP(OP_CONST, 0, ver);
2470     }
2471     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2472             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2473     {
2474         PL_bufptr = s;
2475         if (errstr)
2476             yyerror(errstr); /* version required */
2477         return s;
2478     }
2479
2480 #ifdef PERL_MAD
2481     if (PL_madskills && !version) {
2482         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2483         PL_nextwhite = 0;
2484         s = SvPVX(PL_linestr) + startoff;
2485     }
2486 #endif
2487     /* NOTE: The parser sees the package name and the VERSION swapped */
2488     start_force(PL_curforce);
2489     NEXTVAL_NEXTTOKE.opval = version;
2490     force_next(WORD);
2491
2492     return s;
2493 }
2494
2495 /*
2496  * S_tokeq
2497  * Tokenize a quoted string passed in as an SV.  It finds the next
2498  * chunk, up to end of string or a backslash.  It may make a new
2499  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2500  * turns \\ into \.
2501  */
2502
2503 STATIC SV *
2504 S_tokeq(pTHX_ SV *sv)
2505 {
2506     dVAR;
2507     char *s;
2508     char *send;
2509     char *d;
2510     SV *pv = sv;
2511
2512     PERL_ARGS_ASSERT_TOKEQ;
2513
2514     assert (SvPOK(sv));
2515     assert (SvLEN(sv));
2516     assert (!SvIsCOW(sv));
2517     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2518         goto finish;
2519     s = SvPVX(sv);
2520     send = SvEND(sv);
2521     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2522     while (s < send && !(*s == '\\' && s[1] == '\\'))
2523         s++;
2524     if (s == send)
2525         goto finish;
2526     d = s;
2527     if ( PL_hints & HINT_NEW_STRING ) {
2528         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2529                             SVs_TEMP | SvUTF8(sv));
2530     }
2531     while (s < send) {
2532         if (*s == '\\') {
2533             if (s + 1 < send && (s[1] == '\\'))
2534                 s++;            /* all that, just for this */
2535         }
2536         *d++ = *s++;
2537     }
2538     *d = '\0';
2539     SvCUR_set(sv, d - SvPVX_const(sv));
2540   finish:
2541     if ( PL_hints & HINT_NEW_STRING )
2542        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2543     return sv;
2544 }
2545
2546 /*
2547  * Now come three functions related to double-quote context,
2548  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2549  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2550  * interact with PL_lex_state, and create fake ( ... ) argument lists
2551  * to handle functions and concatenation.
2552  * For example,
2553  *   "foo\lbar"
2554  * is tokenised as
2555  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2556  */
2557
2558 /*
2559  * S_sublex_start
2560  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2561  *
2562  * Pattern matching will set PL_lex_op to the pattern-matching op to
2563  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2564  *
2565  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2566  *
2567  * Everything else becomes a FUNC.
2568  *
2569  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2570  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2571  * call to S_sublex_push().
2572  */
2573
2574 STATIC I32
2575 S_sublex_start(pTHX)
2576 {
2577     dVAR;
2578     const I32 op_type = pl_yylval.ival;
2579
2580     if (op_type == OP_NULL) {
2581         pl_yylval.opval = PL_lex_op;
2582         PL_lex_op = NULL;
2583         return THING;
2584     }
2585     if (op_type == OP_CONST) {
2586         SV *sv = tokeq(PL_lex_stuff);
2587
2588         if (SvTYPE(sv) == SVt_PVIV) {
2589             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2590             STRLEN len;
2591             const char * const p = SvPV_const(sv, len);
2592             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2593             SvREFCNT_dec(sv);
2594             sv = nsv;
2595         }
2596         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2597         PL_lex_stuff = NULL;
2598         return THING;
2599     }
2600
2601     PL_sublex_info.super_state = PL_lex_state;
2602     PL_sublex_info.sub_inwhat = (U16)op_type;
2603     PL_sublex_info.sub_op = PL_lex_op;
2604     PL_lex_state = LEX_INTERPPUSH;
2605
2606     PL_expect = XTERM;
2607     if (PL_lex_op) {
2608         pl_yylval.opval = PL_lex_op;
2609         PL_lex_op = NULL;
2610         return PMFUNC;
2611     }
2612     else
2613         return FUNC;
2614 }
2615
2616 /*
2617  * S_sublex_push
2618  * Create a new scope to save the lexing state.  The scope will be
2619  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2620  * to the uc, lc, etc. found before.
2621  * Sets PL_lex_state to LEX_INTERPCONCAT.
2622  */
2623
2624 STATIC I32
2625 S_sublex_push(pTHX)
2626 {
2627     dVAR;
2628     LEXSHARED *shared;
2629     const bool is_heredoc = PL_multi_close == '<';
2630     ENTER;
2631
2632     PL_lex_state = PL_sublex_info.super_state;
2633     SAVEI8(PL_lex_dojoin);
2634     SAVEI32(PL_lex_brackets);
2635     SAVEI32(PL_lex_allbrackets);
2636     SAVEI32(PL_lex_formbrack);
2637     SAVEI8(PL_lex_fakeeof);
2638     SAVEI32(PL_lex_casemods);
2639     SAVEI32(PL_lex_starts);
2640     SAVEI8(PL_lex_state);
2641     SAVESPTR(PL_lex_repl);
2642     SAVEVPTR(PL_lex_inpat);
2643     SAVEI16(PL_lex_inwhat);
2644     if (is_heredoc)
2645     {
2646         SAVECOPLINE(PL_curcop);
2647         SAVEI32(PL_multi_end);
2648         SAVEI32(PL_parser->herelines);
2649         PL_parser->herelines = 0;
2650     }
2651     SAVEI8(PL_multi_close);
2652     SAVEPPTR(PL_bufptr);
2653     SAVEPPTR(PL_bufend);
2654     SAVEPPTR(PL_oldbufptr);
2655     SAVEPPTR(PL_oldoldbufptr);
2656     SAVEPPTR(PL_last_lop);
2657     SAVEPPTR(PL_last_uni);
2658     SAVEPPTR(PL_linestart);
2659     SAVESPTR(PL_linestr);
2660     SAVEGENERICPV(PL_lex_brackstack);
2661     SAVEGENERICPV(PL_lex_casestack);
2662     SAVEGENERICPV(PL_parser->lex_shared);
2663     SAVEBOOL(PL_parser->lex_re_reparsing);
2664     SAVEI32(PL_copline);
2665
2666     /* The here-doc parser needs to be able to peek into outer lexing
2667        scopes to find the body of the here-doc.  So we put PL_linestr and
2668        PL_bufptr into lex_shared, to ‘share’ those values.
2669      */
2670     PL_parser->lex_shared->ls_linestr = PL_linestr;
2671     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2672
2673     PL_linestr = PL_lex_stuff;
2674     PL_lex_repl = PL_sublex_info.repl;
2675     PL_lex_stuff = NULL;
2676     PL_sublex_info.repl = NULL;
2677
2678     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2679         = SvPVX(PL_linestr);
2680     PL_bufend += SvCUR(PL_linestr);
2681     PL_last_lop = PL_last_uni = NULL;
2682     SAVEFREESV(PL_linestr);
2683     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2684
2685     PL_lex_dojoin = FALSE;
2686     PL_lex_brackets = PL_lex_formbrack = 0;
2687     PL_lex_allbrackets = 0;
2688     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2689     Newx(PL_lex_brackstack, 120, char);
2690     Newx(PL_lex_casestack, 12, char);
2691     PL_lex_casemods = 0;
2692     *PL_lex_casestack = '\0';
2693     PL_lex_starts = 0;
2694     PL_lex_state = LEX_INTERPCONCAT;
2695     if (is_heredoc)
2696         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2697     PL_copline = NOLINE;
2698
2699     Newxz(shared, 1, LEXSHARED);
2700     shared->ls_prev = PL_parser->lex_shared;
2701     PL_parser->lex_shared = shared;
2702
2703     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2704     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2705     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2706         PL_lex_inpat = PL_sublex_info.sub_op;
2707     else
2708         PL_lex_inpat = NULL;
2709
2710     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2711     PL_in_eval &= ~EVAL_RE_REPARSING;
2712
2713     return '(';
2714 }
2715
2716 /*
2717  * S_sublex_done
2718  * Restores lexer state after a S_sublex_push.
2719  */
2720
2721 STATIC I32
2722 S_sublex_done(pTHX)
2723 {
2724     dVAR;
2725     if (!PL_lex_starts++) {
2726         SV * const sv = newSVpvs("");
2727         if (SvUTF8(PL_linestr))
2728             SvUTF8_on(sv);
2729         PL_expect = XOPERATOR;
2730         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2731         return THING;
2732     }
2733
2734     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2735         PL_lex_state = LEX_INTERPCASEMOD;
2736         return yylex();
2737     }
2738
2739     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2740     assert(PL_lex_inwhat != OP_TRANSR);
2741     if (PL_lex_repl) {
2742         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2743         PL_linestr = PL_lex_repl;
2744         PL_lex_inpat = 0;
2745         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2746         PL_bufend += SvCUR(PL_linestr);
2747         PL_last_lop = PL_last_uni = NULL;
2748         PL_lex_dojoin = FALSE;
2749         PL_lex_brackets = 0;
2750         PL_lex_allbrackets = 0;
2751         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2752         PL_lex_casemods = 0;
2753         *PL_lex_casestack = '\0';
2754         PL_lex_starts = 0;
2755         if (SvEVALED(PL_lex_repl)) {
2756             PL_lex_state = LEX_INTERPNORMAL;
2757             PL_lex_starts++;
2758             /*  we don't clear PL_lex_repl here, so that we can check later
2759                 whether this is an evalled subst; that means we rely on the
2760                 logic to ensure sublex_done() is called again only via the
2761                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2762         }
2763         else {
2764             PL_lex_state = LEX_INTERPCONCAT;
2765             PL_lex_repl = NULL;
2766         }
2767         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2768             CopLINE(PL_curcop) +=
2769                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2770                  + PL_parser->herelines;
2771             PL_parser->herelines = 0;
2772         }
2773         return ',';
2774     }
2775     else {
2776         const line_t l = CopLINE(PL_curcop);
2777 #ifdef PERL_MAD
2778         if (PL_madskills) {
2779             if (PL_thiswhite) {
2780                 if (!PL_endwhite)
2781                     PL_endwhite = newSVpvs("");
2782                 sv_catsv(PL_endwhite, PL_thiswhite);
2783                 PL_thiswhite = 0;
2784             }
2785             if (PL_thistoken)
2786                 sv_setpvs(PL_thistoken,"");
2787             else
2788                 PL_realtokenstart = -1;
2789         }
2790 #endif
2791         LEAVE;
2792         if (PL_multi_close == '<')
2793             PL_parser->herelines += l - PL_multi_end;
2794         PL_bufend = SvPVX(PL_linestr);
2795         PL_bufend += SvCUR(PL_linestr);
2796         PL_expect = XOPERATOR;
2797         PL_sublex_info.sub_inwhat = 0;
2798         return ')';
2799     }
2800 }
2801
2802 PERL_STATIC_INLINE SV*
2803 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2804 {
2805     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2806      * interior, hence to the "}".  Finds what the name resolves to, returning
2807      * an SV* containing it; NULL if no valid one found */
2808
2809     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2810
2811     HV * table;
2812     SV **cvp;
2813     SV *cv;
2814     SV *rv;
2815     HV *stash;
2816     const U8* first_bad_char_loc;
2817     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2818
2819     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2820
2821     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2822                                      e - backslash_ptr,
2823                                      &first_bad_char_loc))
2824     {
2825         /* If warnings are on, this will print a more detailed analysis of what
2826          * is wrong than the error message below */
2827         utf8n_to_uvchr(first_bad_char_loc,
2828                        e - ((char *) first_bad_char_loc),
2829                        NULL, 0);
2830
2831         /* We deliberately don't try to print the malformed character, which
2832          * might not print very well; it also may be just the first of many
2833          * malformations, so don't print what comes after it */
2834         yyerror(Perl_form(aTHX_
2835             "Malformed UTF-8 character immediately after '%.*s'",
2836             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2837         return NULL;
2838     }
2839
2840     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2841                         /* include the <}> */
2842                         e - backslash_ptr + 1);
2843     if (! SvPOK(res)) {
2844         SvREFCNT_dec_NN(res);
2845         return NULL;
2846     }
2847
2848     /* See if the charnames handler is the Perl core's, and if so, we can skip
2849      * the validation needed for a user-supplied one, as Perl's does its own
2850      * validation. */
2851     table = GvHV(PL_hintgv);             /* ^H */
2852     cvp = hv_fetchs(table, "charnames", FALSE);
2853     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2854         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2855     {
2856         const char * const name = HvNAME(stash);
2857         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2858          && strEQ(name, "_charnames")) {
2859            return res;
2860        }
2861     }
2862
2863     /* Here, it isn't Perl's charname handler.  We can't rely on a
2864      * user-supplied handler to validate the input name.  For non-ut8 input,
2865      * look to see that the first character is legal.  Then loop through the
2866      * rest checking that each is a continuation */
2867
2868     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2869      * the same thing */
2870
2871     if (! UTF) {
2872         if (! isALPHAU(*s)) {
2873             goto bad_charname;
2874         }
2875         s++;
2876         while (s < e) {
2877             if (! isCHARNAME_CONT(*s)) {
2878                 goto bad_charname;
2879             }
2880             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2881                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2882                            "A sequence of multiple spaces in a charnames "
2883                            "alias definition is deprecated");
2884             }
2885             s++;
2886         }
2887         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2888             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2889                         "Trailing white-space in a charnames alias "
2890                         "definition is deprecated");
2891         }
2892     }
2893     else {
2894         /* Similarly for utf8.  For invariants can check directly; for other
2895          * Latin1, can calculate their code point and check; otherwise  use a
2896          * swash */
2897         if (UTF8_IS_INVARIANT(*s)) {
2898             if (! isALPHAU(*s)) {
2899                 goto bad_charname;
2900             }
2901             s++;
2902         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2903             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2904                 goto bad_charname;
2905             }
2906             s += 2;
2907         }
2908         else {
2909             if (! PL_utf8_charname_begin) {
2910                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2911                 PL_utf8_charname_begin = _core_swash_init("utf8",
2912                                                         "_Perl_Charname_Begin",
2913                                                         &PL_sv_undef,
2914                                                         1, 0, NULL, &flags);
2915             }
2916             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2917                 goto bad_charname;
2918             }
2919             s += UTF8SKIP(s);
2920         }
2921
2922         while (s < e) {
2923             if (UTF8_IS_INVARIANT(*s)) {
2924                 if (! isCHARNAME_CONT(*s)) {
2925                     goto bad_charname;
2926                 }
2927                 if (*s == ' ' && *(s-1) == ' '
2928                  && ckWARN_d(WARN_DEPRECATED)) {
2929                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2930                                "A sequence of multiple spaces in a charnam"
2931                                "es alias definition is deprecated");
2932                 }
2933                 s++;
2934             }
2935             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2936                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2937                 {
2938                     goto bad_charname;
2939                 }
2940                 s += 2;
2941             }
2942             else {
2943                 if (! PL_utf8_charname_continue) {
2944                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2945                     PL_utf8_charname_continue = _core_swash_init("utf8",
2946                                                 "_Perl_Charname_Continue",
2947                                                 &PL_sv_undef,
2948                                                 1, 0, NULL, &flags);
2949                 }
2950                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2951                     goto bad_charname;
2952                 }
2953                 s += UTF8SKIP(s);
2954             }
2955         }
2956         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2957             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2958                        "Trailing white-space in a charnames alias "
2959                        "definition is deprecated");
2960         }
2961     }
2962
2963     if (SvUTF8(res)) { /* Don't accept malformed input */
2964         const U8* first_bad_char_loc;
2965         STRLEN len;
2966         const char* const str = SvPV_const(res, len);
2967         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2968             /* If warnings are on, this will print a more detailed analysis of
2969              * what is wrong than the error message below */
2970             utf8n_to_uvchr(first_bad_char_loc,
2971                            (char *) first_bad_char_loc - str,
2972                            NULL, 0);
2973
2974             /* We deliberately don't try to print the malformed character,
2975              * which might not print very well; it also may be just the first
2976              * of many malformations, so don't print what comes after it */
2977             yyerror_pv(
2978               Perl_form(aTHX_
2979                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2980                  (int) (e - backslash_ptr + 1), backslash_ptr,
2981                  (int) ((char *) first_bad_char_loc - str), str
2982               ),
2983               SVf_UTF8);
2984             return NULL;
2985         }
2986     }
2987
2988     return res;
2989
2990   bad_charname: {
2991         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2992
2993         /* The final %.*s makes sure that should the trailing NUL be missing
2994          * that this print won't run off the end of the string */
2995         yyerror_pv(
2996           Perl_form(aTHX_
2997             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2998             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2999             (int)(e - s + bad_char_size), s + bad_char_size
3000           ),
3001           UTF ? SVf_UTF8 : 0);
3002         return NULL;
3003     }
3004 }
3005
3006 /*
3007   scan_const
3008
3009   Extracts the next constant part of a pattern, double-quoted string,
3010   or transliteration.  This is terrifying code.
3011
3012   For example, in parsing the double-quoted string "ab\x63$d", it would
3013   stop at the '$' and return an OP_CONST containing 'abc'.
3014
3015   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3016   processing a pattern (PL_lex_inpat is true), a transliteration
3017   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3018
3019   Returns a pointer to the character scanned up to. If this is
3020   advanced from the start pointer supplied (i.e. if anything was
3021   successfully parsed), will leave an OP_CONST for the substring scanned
3022   in pl_yylval. Caller must intuit reason for not parsing further
3023   by looking at the next characters herself.
3024
3025   In patterns:
3026     expand:
3027       \N{FOO}  => \N{U+hex_for_character_FOO}
3028       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3029
3030     pass through:
3031         all other \-char, including \N and \N{ apart from \N{ABC}
3032
3033     stops on:
3034         @ and $ where it appears to be a var, but not for $ as tail anchor
3035         \l \L \u \U \Q \E
3036         (?{  or  (??{
3037
3038
3039   In transliterations:
3040     characters are VERY literal, except for - not at the start or end
3041     of the string, which indicates a range. If the range is in bytes,
3042     scan_const expands the range to the full set of intermediate
3043     characters. If the range is in utf8, the hyphen is replaced with
3044     a certain range mark which will be handled by pmtrans() in op.c.
3045
3046   In double-quoted strings:
3047     backslashes:
3048       double-quoted style: \r and \n
3049       constants: \x31, etc.
3050       deprecated backrefs: \1 (in substitution replacements)
3051       case and quoting: \U \Q \E
3052     stops on @ and $
3053
3054   scan_const does *not* construct ops to handle interpolated strings.
3055   It stops processing as soon as it finds an embedded $ or @ variable
3056   and leaves it to the caller to work out what's going on.
3057
3058   embedded arrays (whether in pattern or not) could be:
3059       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3060
3061   $ in double-quoted strings must be the symbol of an embedded scalar.
3062
3063   $ in pattern could be $foo or could be tail anchor.  Assumption:
3064   it's a tail anchor if $ is the last thing in the string, or if it's
3065   followed by one of "()| \r\n\t"
3066
3067   \1 (backreferences) are turned into $1 in substitutions
3068
3069   The structure of the code is
3070       while (there's a character to process) {
3071           handle transliteration ranges
3072           skip regexp comments /(?#comment)/ and codes /(?{code})/
3073           skip #-initiated comments in //x patterns
3074           check for embedded arrays
3075           check for embedded scalars
3076           if (backslash) {
3077               deprecate \1 in substitution replacements
3078               handle string-changing backslashes \l \U \Q \E, etc.
3079               switch (what was escaped) {
3080                   handle \- in a transliteration (becomes a literal -)
3081                   if a pattern and not \N{, go treat as regular character
3082                   handle \132 (octal characters)
3083                   handle \x15 and \x{1234} (hex characters)
3084                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3085                   handle \cV (control characters)
3086                   handle printf-style backslashes (\f, \r, \n, etc)
3087               } (end switch)
3088               continue
3089           } (end if backslash)
3090           handle regular character
3091     } (end while character to read)
3092
3093 */
3094
3095 STATIC char *
3096 S_scan_const(pTHX_ char *start)
3097 {
3098     dVAR;
3099     char *send = PL_bufend;             /* end of the constant */
3100     SV *sv = newSV(send - start);               /* sv for the constant.  See
3101                                                    note below on sizing. */
3102     char *s = start;                    /* start of the constant */
3103     char *d = SvPVX(sv);                /* destination for copies */
3104     bool dorange = FALSE;                       /* are we in a translit range? */
3105     bool didrange = FALSE;                      /* did we just finish a range? */
3106     bool in_charclass = FALSE;                  /* within /[...]/ */
3107     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3108     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3109                                                    to be UTF8?  But, this can
3110                                                    show as true when the source
3111                                                    isn't utf8, as for example
3112                                                    when it is entirely composed
3113                                                    of hex constants */
3114     SV *res;                            /* result from charnames */
3115
3116     /* Note on sizing:  The scanned constant is placed into sv, which is
3117      * initialized by newSV() assuming one byte of output for every byte of
3118      * input.  This routine expects newSV() to allocate an extra byte for a
3119      * trailing NUL, which this routine will append if it gets to the end of
3120      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3121      * CAPITAL LETTER A}), or more output than input if the constant ends up
3122      * recoded to utf8, but each time a construct is found that might increase
3123      * the needed size, SvGROW() is called.  Its size parameter each time is
3124      * based on the best guess estimate at the time, namely the length used so
3125      * far, plus the length the current construct will occupy, plus room for
3126      * the trailing NUL, plus one byte for every input byte still unscanned */
3127
3128     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3129                        before set */
3130 #ifdef EBCDIC
3131     UV literal_endpoint = 0;
3132     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3133 #endif
3134
3135     PERL_ARGS_ASSERT_SCAN_CONST;
3136
3137     assert(PL_lex_inwhat != OP_TRANSR);
3138     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3139         /* If we are doing a trans and we know we want UTF8 set expectation */
3140         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3141         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3142     }
3143
3144     /* Protect sv from errors and fatal warnings. */
3145     ENTER_with_name("scan_const");
3146     SAVEFREESV(sv);
3147
3148     while (s < send || dorange) {
3149
3150         /* get transliterations out of the way (they're most literal) */
3151         if (PL_lex_inwhat == OP_TRANS) {
3152             /* expand a range A-Z to the full set of characters.  AIE! */
3153             if (dorange) {
3154                 I32 i;                          /* current expanded character */
3155                 I32 min;                        /* first character in range */
3156                 I32 max;                        /* last character in range */
3157
3158 #ifdef EBCDIC
3159                 UV uvmax = 0;
3160 #endif
3161
3162                 if (has_utf8
3163 #ifdef EBCDIC
3164                     && !native_range
3165 #endif
3166                 ) {
3167                     char * const c = (char*)utf8_hop((U8*)d, -1);
3168                     char *e = d++;
3169                     while (e-- > c)
3170                         *(e + 1) = *e;
3171                     *c = (char) ILLEGAL_UTF8_BYTE;
3172                     /* mark the range as done, and continue */
3173                     dorange = FALSE;
3174                     didrange = TRUE;
3175                     continue;
3176                 }
3177
3178                 i = d - SvPVX_const(sv);                /* remember current offset */
3179 #ifdef EBCDIC
3180                 SvGROW(sv,
3181                        SvLEN(sv) + (has_utf8 ?
3182                                     (512 - UTF_CONTINUATION_MARK +
3183                                      UNISKIP(0x100))
3184                                     : 256));
3185                 /* How many two-byte within 0..255: 128 in UTF-8,
3186                  * 96 in UTF-8-mod. */
3187 #else
3188                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3189 #endif
3190                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3191 #ifdef EBCDIC
3192                 if (has_utf8) {
3193                     int j;
3194                     for (j = 0; j <= 1; j++) {
3195                         char * const c = (char*)utf8_hop((U8*)d, -1);
3196                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3197                         if (j)
3198                             min = (U8)uv;
3199                         else if (uv < 256)
3200                             max = (U8)uv;
3201                         else {
3202                             max = (U8)0xff; /* only to \xff */
3203                             uvmax = uv; /* \x{100} to uvmax */
3204                         }
3205                         d = c; /* eat endpoint chars */
3206                      }
3207                 }
3208                else {
3209 #endif
3210                    d -= 2;              /* eat the first char and the - */
3211                    min = (U8)*d;        /* first char in range */
3212                    max = (U8)d[1];      /* last char in range  */
3213 #ifdef EBCDIC
3214                }
3215 #endif
3216
3217                 if (min > max) {
3218                     Perl_croak(aTHX_
3219                                "Invalid range \"%c-%c\" in transliteration operator",
3220                                (char)min, (char)max);
3221                 }
3222
3223 #ifdef EBCDIC
3224                 if (literal_endpoint == 2 &&
3225                     ((isLOWER_A(min) && isLOWER_A(max)) ||
3226                      (isUPPER_A(min) && isUPPER_A(max))))
3227                 {
3228                     for (i = min; i <= max; i++) {
3229                         if (isALPHA_A(i))
3230                             *d++ = i;
3231                     }
3232                 }
3233                 else
3234 #endif
3235                     for (i = min; i <= max; i++)
3236 #ifdef EBCDIC
3237                         if (has_utf8) {
3238                             append_utf8_from_native_byte(i, &d);
3239                         }
3240                         else
3241 #endif
3242                             *d++ = (char)i;
3243
3244 #ifdef EBCDIC
3245                 if (uvmax) {
3246                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3247                     if (uvmax > 0x101)
3248                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3249                     if (uvmax > 0x100)
3250                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3251                 }
3252 #endif
3253
3254                 /* mark the range as done, and continue */
3255                 dorange = FALSE;
3256                 didrange = TRUE;
3257 #ifdef EBCDIC
3258                 literal_endpoint = 0;
3259 #endif
3260                 continue;
3261             }
3262
3263             /* range begins (ignore - as first or last char) */
3264             else if (*s == '-' && s+1 < send  && s != start) {
3265                 if (didrange) {
3266                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3267                 }
3268                 if (has_utf8
3269 #ifdef EBCDIC
3270                     && !native_range
3271 #endif
3272                     ) {
3273                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
3274                     s++;
3275                     continue;
3276                 }
3277                 dorange = TRUE;
3278                 s++;
3279             }
3280             else {
3281                 didrange = FALSE;
3282 #ifdef EBCDIC
3283                 literal_endpoint = 0;
3284                 native_range = TRUE;
3285 #endif
3286             }
3287         }
3288
3289         /* if we get here, we're not doing a transliteration */
3290
3291         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3292             char *s1 = s-1;
3293             int esc = 0;
3294             while (s1 >= start && *s1-- == '\\')
3295                 esc = !esc;
3296             if (!esc)
3297                 in_charclass = TRUE;
3298         }
3299
3300         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3301             char *s1 = s-1;
3302             int esc = 0;
3303             while (s1 >= start && *s1-- == '\\')
3304                 esc = !esc;
3305             if (!esc)
3306                 in_charclass = FALSE;
3307         }
3308
3309         /* skip for regexp comments /(?#comment)/, except for the last
3310          * char, which will be done separately.
3311          * Stop on (?{..}) and friends */
3312
3313         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3314             if (s[2] == '#') {
3315                 while (s+1 < send && *s != ')')
3316                     *d++ = *s++;
3317             }
3318             else if (!PL_lex_casemods &&
3319                      (    s[2] == '{' /* This should match regcomp.c */
3320                       || (s[2] == '?' && s[3] == '{')))
3321             {
3322                 break;
3323             }
3324         }
3325
3326         /* likewise skip #-initiated comments in //x patterns */
3327         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3328           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3329             while (s+1 < send && *s != '\n')
3330                 *d++ = *s++;
3331         }
3332
3333         /* no further processing of single-quoted regex */
3334         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3335             goto default_action;
3336
3337         /* check for embedded arrays
3338            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3339            */
3340         else if (*s == '@' && s[1]) {
3341             if (isWORDCHAR_lazy_if(s+1,UTF))
3342                 break;
3343             if (strchr(":'{$", s[1]))
3344                 break;
3345             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3346                 break; /* in regexp, neither @+ nor @- are interpolated */
3347         }
3348
3349         /* check for embedded scalars.  only stop if we're sure it's a
3350            variable.
3351         */
3352         else if (*s == '$') {
3353             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3354                 break;
3355             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3356                 if (s[1] == '\\') {
3357                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3358                                    "Possible unintended interpolation of $\\ in regex");
3359                 }
3360                 break;          /* in regexp, $ might be tail anchor */
3361             }
3362         }
3363
3364         /* End of else if chain - OP_TRANS rejoin rest */
3365
3366         /* backslashes */
3367         if (*s == '\\' && s+1 < send) {
3368             char* e;    /* Can be used for ending '}', etc. */
3369
3370             s++;
3371
3372             /* warn on \1 - \9 in substitution replacements, but note that \11
3373              * is an octal; and \19 is \1 followed by '9' */
3374             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3375                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3376             {
3377                 /* diag_listed_as: \%d better written as $%d */
3378                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3379                 *--s = '$';
3380                 break;
3381             }
3382
3383             /* string-change backslash escapes */
3384             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3385                 --s;
3386                 break;
3387             }
3388             /* In a pattern, process \N, but skip any other backslash escapes.
3389              * This is because we don't want to translate an escape sequence
3390              * into a meta symbol and have the regex compiler use the meta
3391              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3392              * in spite of this, we do have to process \N here while the proper
3393              * charnames handler is in scope.  See bugs #56444 and #62056.
3394              * There is a complication because \N in a pattern may also stand
3395              * for 'match a non-nl', and not mean a charname, in which case its
3396              * processing should be deferred to the regex compiler.  To be a
3397              * charname it must be followed immediately by a '{', and not look
3398              * like \N followed by a curly quantifier, i.e., not something like
3399              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3400              * quantifier */
3401             else if (PL_lex_inpat
3402                     && (*s != 'N'
3403                         || s[1] != '{'
3404                         || regcurly(s + 1, FALSE)))
3405             {
3406                 *d++ = '\\';
3407                 goto default_action;
3408             }
3409
3410             switch (*s) {
3411
3412             /* quoted - in transliterations */
3413             case '-':
3414                 if (PL_lex_inwhat == OP_TRANS) {
3415                     *d++ = *s++;
3416                     continue;
3417                 }
3418                 /* FALL THROUGH */
3419             default:
3420                 {
3421                     if ((isALPHANUMERIC(*s)))
3422                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3423                                        "Unrecognized escape \\%c passed through",
3424                                        *s);
3425                     /* default action is to copy the quoted character */
3426                     goto default_action;
3427                 }
3428
3429             /* eg. \132 indicates the octal constant 0132 */
3430             case '0': case '1': case '2': case '3':
3431             case '4': case '5': case '6': case '7':
3432                 {
3433                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3434                     STRLEN len = 3;
3435                     uv = grok_oct(s, &len, &flags, NULL);
3436                     s += len;
3437                     if (len < 3 && s < send && isDIGIT(*s)
3438                         && ckWARN(WARN_MISC))
3439                     {
3440                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3441                                     "%s", form_short_octal_warning(s, len));
3442                     }
3443                 }
3444                 goto NUM_ESCAPE_INSERT;
3445
3446             /* eg. \o{24} indicates the octal constant \024 */
3447             case 'o':
3448                 {
3449                     const char* error;
3450
3451                     bool valid = grok_bslash_o(&s, &uv, &error,
3452                                                TRUE, /* Output warning */
3453                                                FALSE, /* Not strict */
3454                                                TRUE, /* Output warnings for
3455                                                          non-portables */
3456                                                UTF);
3457                     if (! valid) {
3458                         yyerror(error);
3459                         continue;
3460                     }
3461                     goto NUM_ESCAPE_INSERT;
3462                 }
3463
3464             /* eg. \x24 indicates the hex constant 0x24 */
3465             case 'x':
3466                 {
3467                     const char* error;
3468
3469                     bool valid = grok_bslash_x(&s, &uv, &error,
3470                                                TRUE, /* Output warning */
3471                                                FALSE, /* Not strict */
3472                                                TRUE,  /* Output warnings for
3473                                                          non-portables */
3474                                                UTF);
3475                     if (! valid) {
3476                         yyerror(error);
3477                         continue;
3478                     }
3479                 }
3480
3481               NUM_ESCAPE_INSERT:
3482                 /* Insert oct or hex escaped character.  There will always be
3483                  * enough room in sv since such escapes will be longer than any
3484                  * UTF-8 sequence they can end up as, except if they force us
3485                  * to recode the rest of the string into utf8 */
3486
3487                 /* Here uv is the ordinal of the next character being added */
3488                 if (!UVCHR_IS_INVARIANT(uv)) {
3489                     if (!has_utf8 && uv > 255) {
3490                         /* Might need to recode whatever we have accumulated so
3491                          * far if it contains any chars variant in utf8 or
3492                          * utf-ebcdic. */
3493
3494                         SvCUR_set(sv, d - SvPVX_const(sv));
3495                         SvPOK_on(sv);
3496                         *d = '\0';
3497                         /* See Note on sizing above.  */
3498                         sv_utf8_upgrade_flags_grow(sv,
3499                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3500                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3501                         d = SvPVX(sv) + SvCUR(sv);
3502                         has_utf8 = TRUE;
3503                     }
3504
3505                     if (has_utf8) {
3506                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3507                         if (PL_lex_inwhat == OP_TRANS &&
3508                             PL_sublex_info.sub_op) {
3509                             PL_sublex_info.sub_op->op_private |=
3510                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3511                                              : OPpTRANS_TO_UTF);
3512                         }
3513 #ifdef EBCDIC
3514                         if (uv > 255 && !dorange)
3515                             native_range = FALSE;
3516 #endif
3517                     }
3518                     else {
3519                         *d++ = (char)uv;
3520                     }
3521                 }
3522                 else {
3523                     *d++ = (char) uv;
3524                 }
3525                 continue;
3526
3527             case 'N':
3528                 /* In a non-pattern \N must be a named character, like \N{LATIN
3529                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3530                  * mean to match a non-newline.  For non-patterns, named
3531                  * characters are converted to their string equivalents. In
3532                  * patterns, named characters are not converted to their
3533                  * ultimate forms for the same reasons that other escapes
3534                  * aren't.  Instead, they are converted to the \N{U+...} form
3535                  * to get the value from the charnames that is in effect right
3536                  * now, while preserving the fact that it was a named character
3537                  * so that the regex compiler knows this */
3538
3539                 /* The structure of this section of code (besides checking for
3540                  * errors and upgrading to utf8) is:
3541                  *  Further disambiguate between the two meanings of \N, and if
3542                  *      not a charname, go process it elsewhere
3543                  *  If of form \N{U+...}, pass it through if a pattern;
3544                  *      otherwise convert to utf8
3545                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3546                  *  pattern; otherwise convert to utf8 */
3547
3548                 /* Here, s points to the 'N'; the test below is guaranteed to
3549                  * succeed if we are being called on a pattern as we already
3550                  * know from a test above that the next character is a '{'.
3551                  * On a non-pattern \N must mean 'named sequence, which
3552                  * requires braces */
3553                 s++;
3554                 if (*s != '{') {
3555                     yyerror("Missing braces on \\N{}");
3556                     continue;
3557                 }
3558                 s++;
3559
3560                 /* If there is no matching '}', it is an error. */
3561                 if (! (e = strchr(s, '}'))) {
3562                     if (! PL_lex_inpat) {
3563                         yyerror("Missing right brace on \\N{}");
3564                     } else {
3565                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3566                     }
3567                     continue;
3568                 }
3569
3570                 /* Here it looks like a named character */
3571
3572                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3573                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3574                                 | PERL_SCAN_DISALLOW_PREFIX;
3575                     STRLEN len;
3576
3577                     /* For \N{U+...}, the '...' is a unicode value even on
3578                      * EBCDIC machines */
3579                     s += 2;         /* Skip to next char after the 'U+' */
3580                     len = e - s;
3581                     uv = grok_hex(s, &len, &flags, NULL);
3582                     if (len == 0 || len != (STRLEN)(e - s)) {
3583                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3584                         s = e + 1;
3585                         continue;
3586                     }
3587
3588                     if (PL_lex_inpat) {
3589
3590                         /* On non-EBCDIC platforms, pass through to the regex
3591                          * compiler unchanged.  The reason we evaluated the
3592                          * number above is to make sure there wasn't a syntax
3593                          * error.  But on EBCDIC we convert to native so
3594                          * downstream code can continue to assume it's native
3595                          */
3596                         s -= 5;     /* Include the '\N{U+' */
3597 #ifdef EBCDIC
3598                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3599                                                                and the \0 */
3600                                     "\\N{U+%X}",
3601                                     (unsigned int) UNI_TO_NATIVE(uv));
3602 #else
3603                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3604                         d += e - s + 1;
3605 #endif
3606                     }
3607                     else {  /* Not a pattern: convert the hex to string */
3608
3609                          /* If destination is not in utf8, unconditionally
3610                           * recode it to be so.  This is because \N{} implies
3611                           * Unicode semantics, and scalars have to be in utf8
3612                           * to guarantee those semantics */
3613                         if (! has_utf8) {
3614                             SvCUR_set(sv, d - SvPVX_const(sv));
3615                             SvPOK_on(sv);
3616                             *d = '\0';
3617                             /* See Note on sizing above.  */
3618                             sv_utf8_upgrade_flags_grow(
3619                                         sv,
3620                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3621                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3622                             d = SvPVX(sv) + SvCUR(sv);
3623                             has_utf8 = TRUE;
3624                         }
3625
3626                         /* Add the (Unicode) code point to the output. */
3627                         if (UNI_IS_INVARIANT(uv)) {
3628                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3629                         }
3630                         else {
3631                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3632                         }
3633                     }
3634                 }
3635                 else /* Here is \N{NAME} but not \N{U+...}. */
3636                      if ((res = get_and_check_backslash_N_name(s, e)))
3637                 {
3638                     STRLEN len;
3639                     const char *str = SvPV_const(res, len);
3640                     if (PL_lex_inpat) {
3641
3642                         if (! len) { /* The name resolved to an empty string */
3643                             Copy("\\N{}", d, 4, char);
3644                             d += 4;
3645                         }
3646                         else {
3647                             /* In order to not lose information for the regex
3648                             * compiler, pass the result in the specially made
3649                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3650                             * the code points in hex of each character
3651                             * returned by charnames */
3652
3653                             const char *str_end = str + len;
3654                             const STRLEN off = d - SvPVX_const(sv);
3655
3656                             if (! SvUTF8(res)) {
3657                                 /* For the non-UTF-8 case, we can determine the
3658                                  * exact length needed without having to parse
3659                                  * through the string.  Each character takes up
3660                                  * 2 hex digits plus either a trailing dot or
3661                                  * the "}" */
3662                                 d = off + SvGROW(sv, off
3663                                                     + 3 * len
3664                                                     + 6 /* For the "\N{U+", and
3665                                                            trailing NUL */
3666                                                     + (STRLEN)(send - e));
3667                                 Copy("\\N{U+", d, 5, char);
3668                                 d += 5;
3669                                 while (str < str_end) {
3670                                     char hex_string[4];
3671                                     my_snprintf(hex_string, sizeof(hex_string),
3672                                                 "%02X.", (U8) *str);
3673                                     Copy(hex_string, d, 3, char);
3674                                     d += 3;
3675                                     str++;
3676                                 }
3677                                 d--;    /* We will overwrite below the final
3678                                            dot with a right brace */
3679                             }
3680                             else {
3681                                 STRLEN char_length; /* cur char's byte length */
3682
3683                                 /* and the number of bytes after this is
3684                                  * translated into hex digits */
3685                                 STRLEN output_length;
3686
3687                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3688                                  * for max('U+', '.'); and 1 for NUL */
3689                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3690
3691                                 /* Get the first character of the result. */
3692                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3693                                                         len,
3694                                                         &char_length,
3695                                                         UTF8_ALLOW_ANYUV);
3696                                 /* Convert first code point to hex, including
3697                                  * the boiler plate before it. */
3698                                 output_length =
3699                                     my_snprintf(hex_string, sizeof(hex_string),
3700                                                 "\\N{U+%X",
3701                                                 (unsigned int) uv);
3702
3703                                 /* Make sure there is enough space to hold it */
3704                                 d = off + SvGROW(sv, off
3705                                                     + output_length
3706                                                     + (STRLEN)(send - e)
3707                                                     + 2);       /* '}' + NUL */
3708                                 /* And output it */
3709                                 Copy(hex_string, d, output_length, char);
3710                                 d += output_length;
3711
3712                                 /* For each subsequent character, append dot and
3713                                 * its ordinal in hex */
3714                                 while ((str += char_length) < str_end) {
3715                                     const STRLEN off = d - SvPVX_const(sv);
3716                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3717                                                             str_end - str,
3718                                                             &char_length,
3719                                                             UTF8_ALLOW_ANYUV);
3720                                     output_length =
3721                                         my_snprintf(hex_string,
3722                                                     sizeof(hex_string),
3723                                                     ".%X",
3724                                                     (unsigned int) uv);
3725
3726                                     d = off + SvGROW(sv, off
3727                                                         + output_length
3728                                                         + (STRLEN)(send - e)
3729                                                         + 2);   /* '}' +  NUL */
3730                                     Copy(hex_string, d, output_length, char);
3731                                     d += output_length;
3732                                 }
3733                             }
3734
3735                             *d++ = '}'; /* Done.  Add the trailing brace */
3736                         }
3737                     }
3738                     else { /* Here, not in a pattern.  Convert the name to a
3739                             * string. */
3740
3741                          /* If destination is not in utf8, unconditionally
3742                           * recode it to be so.  This is because \N{} implies
3743                           * Unicode semantics, and scalars have to be in utf8
3744                           * to guarantee those semantics */
3745                         if (! has_utf8) {
3746                             SvCUR_set(sv, d - SvPVX_const(sv));
3747                             SvPOK_on(sv);
3748                             *d = '\0';
3749                             /* See Note on sizing above.  */
3750                             sv_utf8_upgrade_flags_grow(sv,
3751                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3752                                                 len + (STRLEN)(send - s) + 1);
3753                             d = SvPVX(sv) + SvCUR(sv);
3754                             has_utf8 = TRUE;
3755                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3756
3757                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3758                              * set correctly here). */
3759                             const STRLEN off = d - SvPVX_const(sv);
3760                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3761                         }
3762                         Copy(str, d, len, char);
3763                         d += len;
3764                     }
3765
3766                     SvREFCNT_dec(res);
3767
3768                 } /* End \N{NAME} */
3769 #ifdef EBCDIC
3770                 if (!dorange)
3771                     native_range = FALSE; /* \N{} is defined to be Unicode */
3772 #endif
3773                 s = e + 1;  /* Point to just after the '}' */
3774                 continue;
3775
3776             /* \c is a control character */
3777             case 'c':
3778                 s++;
3779                 if (s < send) {
3780                     *d++ = grok_bslash_c(*s++, 1);
3781                 }
3782                 else {
3783                     yyerror("Missing control char name in \\c");
3784                 }
3785                 continue;
3786
3787             /* printf-style backslashes, formfeeds, newlines, etc */
3788             case 'b':
3789                 *d++ = '\b';
3790                 break;
3791             case 'n':
3792                 *d++ = '\n';
3793                 break;
3794             case 'r':
3795                 *d++ = '\r';
3796                 break;
3797             case 'f':
3798                 *d++ = '\f';
3799                 break;
3800             case 't':
3801                 *d++ = '\t';
3802                 break;
3803             case 'e':
3804                 *d++ = ASCII_TO_NATIVE('\033');
3805                 break;
3806             case 'a':
3807                 *d++ = '\a';
3808                 break;
3809             } /* end switch */
3810
3811             s++;
3812             continue;
3813         } /* end if (backslash) */
3814 #ifdef EBCDIC
3815         else
3816             literal_endpoint++;
3817 #endif
3818
3819     default_action:
3820         /* If we started with encoded form, or already know we want it,
3821            then encode the next character */
3822         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3823             STRLEN len  = 1;
3824
3825
3826             /* One might think that it is wasted effort in the case of the
3827              * source being utf8 (this_utf8 == TRUE) to take the next character
3828              * in the source, convert it to an unsigned value, and then convert
3829              * it back again.  But the source has not been validated here.  The
3830              * routine that does the conversion checks for errors like
3831              * malformed utf8 */
3832
3833             const UV nextuv   = (this_utf8)
3834                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3835                                 : (UV) ((U8) *s);
3836             const STRLEN need = UNISKIP(nextuv);
3837             if (!has_utf8) {
3838                 SvCUR_set(sv, d - SvPVX_const(sv));
3839                 SvPOK_on(sv);
3840                 *d = '\0';
3841                 /* See Note on sizing above.  */
3842                 sv_utf8_upgrade_flags_grow(sv,
3843                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3844                                         need + (STRLEN)(send - s) + 1);
3845                 d = SvPVX(sv) + SvCUR(sv);
3846                 has_utf8 = TRUE;
3847             } else if (need > len) {
3848                 /* encoded value larger than old, may need extra space (NOTE:
3849                  * SvCUR() is not set correctly here).   See Note on sizing
3850                  * above.  */
3851                 const STRLEN off = d - SvPVX_const(sv);
3852                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3853             }
3854             s += len;
3855
3856             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3857 #ifdef EBCDIC
3858             if (uv > 255 && !dorange)
3859                 native_range = FALSE;
3860 #endif
3861         }
3862         else {
3863             *d++ = *s++;
3864         }
3865     } /* while loop to process each character */
3866
3867     /* terminate the string and set up the sv */
3868     *d = '\0';
3869     SvCUR_set(sv, d - SvPVX_const(sv));
3870     if (SvCUR(sv) >= SvLEN(sv))
3871         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3872                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3873
3874     SvPOK_on(sv);
3875     if (PL_encoding && !has_utf8) {
3876         sv_recode_to_utf8(sv, PL_encoding);
3877         if (SvUTF8(sv))
3878             has_utf8 = TRUE;
3879     }
3880     if (has_utf8) {
3881         SvUTF8_on(sv);
3882         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3883             PL_sublex_info.sub_op->op_private |=
3884                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3885         }
3886     }
3887
3888     /* shrink the sv if we allocated more than we used */
3889     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3890         SvPV_shrink_to_cur(sv);
3891     }
3892
3893     /* return the substring (via pl_yylval) only if we parsed anything */
3894     if (s > start) {
3895         char *s2 = start;
3896         for (; s2 < s; s2++) {
3897             if (*s2 == '\n')
3898                 COPLINE_INC_WITH_HERELINES;
3899         }
3900         SvREFCNT_inc_simple_void_NN(sv);
3901         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3902             && ! PL_parser->lex_re_reparsing)
3903         {
3904             const char *const key = PL_lex_inpat ? "qr" : "q";
3905             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3906             const char *type;
3907             STRLEN typelen;
3908
3909             if (PL_lex_inwhat == OP_TRANS) {
3910                 type = "tr";
3911                 typelen = 2;
3912             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3913                 type = "s";
3914                 typelen = 1;
3915             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3916                 type = "q";
3917                 typelen = 1;
3918             } else  {
3919                 type = "qq";
3920                 typelen = 2;
3921             }
3922
3923             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3924                                 type, typelen);
3925         }
3926         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3927     }
3928     LEAVE_with_name("scan_const");
3929     return s;
3930 }
3931
3932 /* S_intuit_more
3933  * Returns TRUE if there's more to the expression (e.g., a subscript),
3934  * FALSE otherwise.
3935  *
3936  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3937  *
3938  * ->[ and ->{ return TRUE
3939  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3940  * { and [ outside a pattern are always subscripts, so return TRUE
3941  * if we're outside a pattern and it's not { or [, then return FALSE
3942  * if we're in a pattern and the first char is a {
3943  *   {4,5} (any digits around the comma) returns FALSE
3944  * if we're in a pattern and the first char is a [
3945  *   [] returns FALSE
3946  *   [SOMETHING] has a funky algorithm to decide whether it's a
3947  *      character class or not.  It has to deal with things like
3948  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3949  * anything else returns TRUE
3950  */
3951
3952 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3953
3954 STATIC int
3955 S_intuit_more(pTHX_ char *s)
3956 {
3957     dVAR;
3958
3959     PERL_ARGS_ASSERT_INTUIT_MORE;
3960
3961     if (PL_lex_brackets)
3962         return TRUE;
3963     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3964         return TRUE;
3965     if (*s == '-' && s[1] == '>'
3966      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3967      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3968         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3969         return TRUE;
3970     if (*s != '{' && *s != '[')
3971         return FALSE;
3972     if (!PL_lex_inpat)
3973         return TRUE;
3974
3975     /* In a pattern, so maybe we have {n,m}. */
3976     if (*s == '{') {
3977         if (regcurly(s, FALSE)) {
3978             return FALSE;
3979         }
3980         return TRUE;
3981     }
3982
3983     /* On the other hand, maybe we have a character class */
3984
3985     s++;
3986     if (*s == ']' || *s == '^')
3987         return FALSE;
3988     else {
3989         /* this is terrifying, and it works */
3990         int weight;
3991         char seen[256];
3992         const char * const send = strchr(s,']');
3993         unsigned char un_char, last_un_char;
3994         char tmpbuf[sizeof PL_tokenbuf * 4];
3995
3996         if (!send)              /* has to be an expression */
3997             return TRUE;
3998         weight = 2;             /* let's weigh the evidence */
3999
4000         if (*s == '$')
4001             weight -= 3;
4002         else if (isDIGIT(*s)) {
4003             if (s[1] != ']') {
4004                 if (isDIGIT(s[1]) && s[2] == ']')
4005                     weight -= 10;
4006             }
4007             else
4008                 weight -= 100;
4009         }
4010         Zero(seen,256,char);
4011         un_char = 255;
4012         for (; s < send; s++) {
4013             last_un_char = un_char;
4014             un_char = (unsigned char)*s;
4015             switch (*s) {
4016             case '@':
4017             case '&':
4018             case '$':
4019                 weight -= seen[un_char] * 10;
4020                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4021                     int len;
4022                     char *tmp = PL_bufend;
4023                     PL_bufend = (char*)send;
4024                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4025                     PL_bufend = tmp;
4026                     len = (int)strlen(tmpbuf);
4027                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4028                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4029                         weight -= 100;
4030                     else
4031                         weight -= 10;
4032                 }
4033                 else if (*s == '$' && s[1] &&
4034                   strchr("[#!%*<>()-=",s[1])) {
4035                     if (/*{*/ strchr("])} =",s[2]))
4036                         weight -= 10;
4037                     else
4038                         weight -= 1;
4039                 }
4040                 break;
4041             case '\\':
4042                 un_char = 254;
4043                 if (s[1]) {
4044                     if (strchr("wds]",s[1]))
4045                         weight += 100;
4046                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4047                         weight += 1;
4048                     else if (strchr("rnftbxcav",s[1]))
4049                         weight += 40;
4050                     else if (isDIGIT(s[1])) {
4051                         weight += 40;
4052                         while (s[1] && isDIGIT(s[1]))
4053                             s++;
4054                     }
4055                 }
4056                 else
4057                     weight += 100;
4058                 break;
4059             case '-':
4060                 if (s[1] == '\\')
4061                     weight += 50;
4062                 if (strchr("aA01! ",last_un_char))
4063                     weight += 30;
4064                 if (strchr("zZ79~",s[1]))
4065                     weight += 30;
4066                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4067                     weight -= 5;        /* cope with negative subscript */
4068                 break;
4069             default:
4070                 if (!isWORDCHAR(last_un_char)
4071                     && !(last_un_char == '$' || last_un_char == '@'
4072                          || last_un_char == '&')
4073                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4074                     char *d = tmpbuf;
4075                     while (isALPHA(*s))
4076                         *d++ = *s++;
4077                     *d = '\0';
4078                     if (keyword(tmpbuf, d - tmpbuf, 0))
4079                         weight -= 150;
4080                 }
4081                 if (un_char == last_un_char + 1)
4082                     weight += 5;
4083                 weight -= seen[un_char];
4084                 break;
4085             }
4086             seen[un_char]++;
4087         }
4088         if (weight >= 0)        /* probably a character class */
4089             return FALSE;
4090     }
4091
4092     return TRUE;
4093 }
4094
4095 /*
4096  * S_intuit_method
4097  *
4098  * Does all the checking to disambiguate
4099  *   foo bar
4100  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4101  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4102  *
4103  * First argument is the stuff after the first token, e.g. "bar".
4104  *
4105  * Not a method if foo is a filehandle.
4106  * Not a method if foo is a subroutine prototyped to take a filehandle.
4107  * Not a method if it's really "Foo $bar"
4108  * Method if it's "foo $bar"
4109  * Not a method if it's really "print foo $bar"
4110  * Method if it's really "foo package::" (interpreted as package->foo)
4111  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4112  * Not a method if bar is a filehandle or package, but is quoted with
4113  *   =>
4114  */
4115
4116 STATIC int
4117 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4118 {
4119     dVAR;
4120     char *s = start + (*start == '$');
4121     char tmpbuf[sizeof PL_tokenbuf];
4122     STRLEN len;
4123     GV* indirgv;
4124 #ifdef PERL_MAD
4125     int soff;
4126 #endif
4127
4128     PERL_ARGS_ASSERT_INTUIT_METHOD;
4129
4130     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4131             return 0;
4132     if (cv && SvPOK(cv)) {
4133         const char *proto = CvPROTO(cv);
4134         if (proto) {
4135             while (*proto && (isSPACE(*proto) || *proto == ';'))
4136                 proto++;
4137             if (*proto == '*')
4138                 return 0;
4139         }
4140     }
4141
4142     if (*start == '$') {
4143         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4144                 isUPPER(*PL_tokenbuf))
4145             return 0;
4146 #ifdef PERL_MAD
4147         len = start - SvPVX(PL_linestr);
4148 #endif
4149         s = PEEKSPACE(s);
4150 #ifdef PERL_MAD
4151         start = SvPVX(PL_linestr) + len;
4152 #endif
4153         PL_bufptr = start;
4154         PL_expect = XREF;
4155         return *s == '(' ? FUNCMETH : METHOD;
4156     }
4157
4158     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4159     /* start is the beginning of the possible filehandle/object,
4160      * and s is the end of it
4161      * tmpbuf is a copy of it (but with single quotes as double colons)
4162      */
4163
4164     if (!keyword(tmpbuf, len, 0)) {
4165         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4166             len -= 2;
4167             tmpbuf[len] = '\0';
4168 #ifdef PERL_MAD
4169             soff = s - SvPVX(PL_linestr);
4170 #endif
4171             goto bare_package;
4172         }
4173         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4174         if (indirgv && GvCVu(indirgv))
4175             return 0;
4176         /* filehandle or package name makes it a method */
4177         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4178 #ifdef PERL_MAD
4179             soff = s - SvPVX(PL_linestr);
4180 #endif
4181             s = PEEKSPACE(s);
4182             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4183                 return 0;       /* no assumptions -- "=>" quotes bareword */
4184       bare_package:
4185             start_force(PL_curforce);
4186             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4187                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4188             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4189             if (PL_madskills)
4190                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4191                                                             ( UTF ? SVf_UTF8 : 0 )));
4192             PL_expect = XTERM;
4193             force_next(WORD);
4194             PL_bufptr = s;
4195 #ifdef PERL_MAD
4196             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4197 #endif
4198             return *s == '(' ? FUNCMETH : METHOD;
4199         }
4200     }
4201     return 0;
4202 }
4203
4204 /* Encoded script support. filter_add() effectively inserts a
4205  * 'pre-processing' function into the current source input stream.
4206  * Note that the filter function only applies to the current source file
4207  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4208  *
4209  * The datasv parameter (which may be NULL) can be used to pass
4210  * private data to this instance of the filter. The filter function
4211  * can recover the SV using the FILTER_DATA macro and use it to
4212  * store private buffers and state information.
4213  *
4214  * The supplied datasv parameter is upgraded to a PVIO type
4215  * and the IoDIRP/IoANY field is used to store the function pointer,
4216  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4217  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4218  * private use must be set using malloc'd pointers.
4219  */
4220
4221 SV *
4222 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4223 {
4224     dVAR;
4225     if (!funcp)
4226         return NULL;
4227
4228     if (!PL_parser)
4229         return NULL;
4230
4231     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4232         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4233
4234     if (!PL_rsfp_filters)
4235         PL_rsfp_filters = newAV();
4236     if (!datasv)
4237         datasv = newSV(0);
4238     SvUPGRADE(datasv, SVt_PVIO);
4239     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4240     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4241     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4242                           FPTR2DPTR(void *, IoANY(datasv)),
4243                           SvPV_nolen(datasv)));
4244     av_unshift(PL_rsfp_filters, 1);
4245     av_store(PL_rsfp_filters, 0, datasv) ;
4246     if (
4247         !PL_parser->filtered
4248      && PL_parser->lex_flags & LEX_EVALBYTES
4249      && PL_bufptr < PL_bufend
4250     ) {
4251         const char *s = PL_bufptr;
4252         while (s < PL_bufend) {
4253             if (*s == '\n') {
4254                 SV *linestr = PL_parser->linestr;
4255                 char *buf = SvPVX(linestr);
4256                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4257                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4258                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4259                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4260                 STRLEN const last_uni_pos =
4261                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4262                 STRLEN const last_lop_pos =
4263                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4264                 av_push(PL_rsfp_filters, linestr);
4265                 PL_parser->linestr =
4266                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4267                 buf = SvPVX(PL_parser->linestr);
4268                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4269                 PL_parser->bufptr = buf + bufptr_pos;
4270                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4271                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4272                 PL_parser->linestart = buf + linestart_pos;
4273                 if (PL_parser->last_uni)
4274                     PL_parser->last_uni = buf + last_uni_pos;
4275                 if (PL_parser->last_lop)
4276                     PL_parser->last_lop = buf + last_lop_pos;
4277                 SvLEN(linestr) = SvCUR(linestr);
4278                 SvCUR(linestr) = s-SvPVX(linestr);
4279                 PL_parser->filtered = 1;
4280                 break;
4281             }
4282             s++;
4283         }
4284     }
4285     return(datasv);
4286 }
4287
4288
4289 /* Delete most recently added instance of this filter function. */
4290 void
4291 Perl_filter_del(pTHX_ filter_t funcp)
4292 {
4293     dVAR;
4294     SV *datasv;
4295
4296     PERL_ARGS_ASSERT_FILTER_DEL;
4297
4298 #ifdef DEBUGGING
4299     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4300                           FPTR2DPTR(void*, funcp)));
4301 #endif
4302     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4303         return;
4304     /* if filter is on top of stack (usual case) just pop it off */
4305     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4306     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4307         sv_free(av_pop(PL_rsfp_filters));
4308
4309         return;
4310     }
4311     /* we need to search for the correct entry and clear it     */
4312     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4313 }
4314
4315
4316 /* Invoke the idxth filter function for the current rsfp.        */
4317 /* maxlen 0 = read one text line */
4318 I32
4319 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4320 {
4321     dVAR;
4322     filter_t funcp;
4323     SV *datasv = NULL;
4324     /* This API is bad. It should have been using unsigned int for maxlen.
4325        Not sure if we want to change the API, but if not we should sanity
4326        check the value here.  */
4327     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4328
4329     PERL_ARGS_ASSERT_FILTER_READ;
4330
4331     if (!PL_parser || !PL_rsfp_filters)
4332         return -1;
4333     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4334         /* Provide a default input filter to make life easy.    */
4335         /* Note that we append to the line. This is handy.      */
4336         DEBUG_P(PerlIO_printf(Perl_debug_log,
4337                               "filter_read %d: from rsfp\n", idx));
4338         if (correct_length) {
4339             /* Want a block */
4340             int len ;
4341             const int old_len = SvCUR(buf_sv);
4342
4343             /* ensure buf_sv is large enough */
4344             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4345             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4346                                    correct_length)) <= 0) {
4347                 if (PerlIO_error(PL_rsfp))
4348                     return -1;          /* error */
4349                 else
4350                     return 0 ;          /* end of file */
4351             }
4352             SvCUR_set(buf_sv, old_len + len) ;
4353             SvPVX(buf_sv)[old_len + len] = '\0';
4354         } else {
4355             /* Want a line */
4356             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4357                 if (PerlIO_error(PL_rsfp))
4358                     return -1;          /* error */
4359                 else
4360                     return 0 ;          /* end of file */
4361             }
4362         }
4363         return SvCUR(buf_sv);
4364     }
4365     /* Skip this filter slot if filter has been deleted */
4366     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4367         DEBUG_P(PerlIO_printf(Perl_debug_log,
4368                               "filter_read %d: skipped (filter deleted)\n",
4369                               idx));
4370         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4371     }
4372     if (SvTYPE(datasv) != SVt_PVIO) {
4373         if (correct_length) {
4374             /* Want a block */
4375             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4376             if (!remainder) return 0; /* eof */
4377             if (correct_length > remainder) correct_length = remainder;
4378             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4379             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4380         } else {
4381             /* Want a line */
4382             const char *s = SvEND(datasv);
4383             const char *send = SvPVX(datasv) + SvLEN(datasv);
4384             while (s < send) {
4385                 if (*s == '\n') {
4386                     s++;
4387                     break;
4388                 }
4389                 s++;
4390             }
4391             if (s == send) return 0; /* eof */
4392             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4393             SvCUR_set(datasv, s-SvPVX(datasv));
4394         }
4395         return SvCUR(buf_sv);
4396     }
4397     /* Get function pointer hidden within datasv        */
4398     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4399     DEBUG_P(PerlIO_printf(Perl_debug_log,
4400                           "filter_read %d: via function %p (%s)\n",
4401                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4402     /* Call function. The function is expected to       */
4403     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4404     /* Return: <0:error, =0:eof, >0:not eof             */
4405     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4406 }
4407
4408 STATIC char *
4409 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4410 {
4411     dVAR;
4412
4413     PERL_ARGS_ASSERT_FILTER_GETS;
4414
4415 #ifdef PERL_CR_FILTER
4416     if (!PL_rsfp_filters) {
4417         filter_add(S_cr_textfilter,NULL);
4418     }
4419 #endif
4420     if (PL_rsfp_filters) {
4421         if (!append)
4422             SvCUR_set(sv, 0);   /* start with empty line        */
4423         if (FILTER_READ(0, sv, 0) > 0)
4424             return ( SvPVX(sv) ) ;
4425         else
4426             return NULL ;
4427     }
4428     else
4429         return (sv_gets(sv, PL_rsfp, append));
4430 }
4431
4432 STATIC HV *
4433 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4434 {
4435     dVAR;
4436     GV *gv;
4437
4438     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4439
4440     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4441         return PL_curstash;
4442
4443     if (len > 2 &&
4444         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4445         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4446     {
4447         return GvHV(gv);                        /* Foo:: */
4448     }
4449
4450     /* use constant CLASS => 'MyClass' */
4451     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4452     if (gv && GvCV(gv)) {
4453         SV * const sv = cv_const_sv(GvCV(gv));
4454         if (sv)
4455             pkgname = SvPV_const(sv, len);
4456     }
4457
4458     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4459 }
4460
4461 #ifdef PERL_MAD
4462  /*
4463  * Perl_madlex
4464  * The intent of this yylex wrapper is to minimize the changes to the
4465  * tokener when we aren't interested in collecting madprops.  It remains
4466  * to be seen how successful this strategy will be...
4467  */
4468
4469 int
4470 Perl_madlex(pTHX)
4471 {
4472     int optype;
4473     char *s = PL_bufptr;
4474
4475     /* make sure PL_thiswhite is initialized */
4476     PL_thiswhite = 0;
4477     PL_thismad = 0;
4478
4479     /* previous token ate up our whitespace? */
4480     if (!PL_lasttoke && PL_nextwhite) {
4481         PL_thiswhite = PL_nextwhite;
4482         PL_nextwhite = 0;
4483     }
4484
4485     /* isolate the token, and figure out where it is without whitespace */
4486     PL_realtokenstart = -1;
4487     PL_thistoken = 0;
4488     optype = yylex();
4489     s = PL_bufptr;
4490     assert(PL_curforce < 0);
4491
4492     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4493         if (!PL_thistoken) {
4494             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4495                 PL_thistoken = newSVpvs("");
4496             else {
4497                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4498                 PL_thistoken = newSVpvn(tstart, s - tstart);
4499             }
4500         }
4501         if (PL_thismad) /* install head */
4502             CURMAD('X', PL_thistoken);
4503     }
4504
4505     /* last whitespace of a sublex? */
4506     if (optype == ')' && PL_endwhite) {
4507         CURMAD('X', PL_endwhite);
4508     }
4509
4510     if (!PL_thismad) {
4511
4512         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4513         if (!PL_thiswhite && !PL_endwhite && !optype) {
4514             sv_free(PL_thistoken);
4515             PL_thistoken = 0;
4516             return 0;
4517         }
4518
4519         /* put off final whitespace till peg */
4520         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4521             PL_nextwhite = PL_thiswhite;
4522             PL_thiswhite = 0;
4523         }
4524         else if (PL_thisopen) {
4525             CURMAD('q', PL_thisopen);
4526             if (PL_thistoken)
4527                 sv_free(PL_thistoken);
4528             PL_thistoken = 0;
4529         }
4530         else {
4531             /* Store actual token text as madprop X */
4532             CURMAD('X', PL_thistoken);
4533         }
4534
4535         if (PL_thiswhite) {
4536             /* add preceding whitespace as madprop _ */
4537             CURMAD('_', PL_thiswhite);
4538         }
4539
4540         if (PL_thisstuff) {
4541             /* add quoted material as madprop = */
4542             CURMAD('=', PL_thisstuff);
4543         }
4544
4545         if (PL_thisclose) {
4546             /* add terminating quote as madprop Q */
4547             CURMAD('Q', PL_thisclose);
4548         }
4549     }
4550
4551     /* special processing based on optype */
4552
4553     switch (optype) {
4554
4555     /* opval doesn't need a TOKEN since it can already store mp */
4556     case WORD:
4557     case METHOD:
4558     case FUNCMETH:
4559     case THING:
4560     case PMFUNC:
4561     case PRIVATEREF:
4562     case FUNC0SUB:
4563     case UNIOPSUB:
4564     case LSTOPSUB:
4565         if (pl_yylval.opval)
4566             append_madprops(PL_thismad, pl_yylval.opval, 0);
4567         PL_thismad = 0;
4568         return optype;
4569
4570     /* fake EOF */
4571     case 0:
4572         optype = PEG;
4573         if (PL_endwhite) {
4574             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4575             PL_endwhite = 0;
4576         }
4577         break;
4578
4579     /* pval */
4580     case LABEL:
4581         break;
4582
4583     case ']':
4584     case '}':
4585         if (PL_faketokens)
4586             break;
4587         /* remember any fake bracket that lexer is about to discard */
4588         if (PL_lex_brackets == 1 &&
4589             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4590         {
4591             s = PL_bufptr;
4592             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4593                 s++;
4594             if (*s == '}') {
4595                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4596                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4597                 PL_thiswhite = 0;
4598                 PL_bufptr = s - 1;
4599                 break;  /* don't bother looking for trailing comment */
4600             }
4601             else
4602                 s = PL_bufptr;
4603         }
4604         if (optype == ']')
4605             break;
4606         /* FALLTHROUGH */
4607
4608     /* attach a trailing comment to its statement instead of next token */
4609     case ';':
4610         if (PL_faketokens)
4611             break;
4612         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4613             s = PL_bufptr;
4614             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4615                 s++;
4616             if (*s == '\n' || *s == '#') {
4617                 while (s < PL_bufend && *s != '\n')
4618                     s++;
4619                 if (s < PL_bufend)
4620                     s++;
4621                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4622                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4623                 PL_thiswhite = 0;
4624                 PL_bufptr = s;
4625             }
4626         }
4627         break;
4628
4629     /* ival */
4630     default:
4631         break;
4632
4633     }
4634
4635     /* Create new token struct.  Note: opvals return early above. */
4636     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4637     PL_thismad = 0;
4638     return optype;
4639 }
4640 #endif
4641
4642 STATIC char *
4643 S_tokenize_use(pTHX_ int is_use, char *s) {
4644     dVAR;
4645
4646     PERL_ARGS_ASSERT_TOKENIZE_USE;
4647
4648     if (PL_expect != XSTATE)
4649         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4650                     is_use ? "use" : "no"));
4651     PL_expect = XTERM;
4652     s = SKIPSPACE1(s);
4653     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4654         s = force_version(s, TRUE);
4655         if (*s == ';' || *s == '}'
4656                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4657             start_force(PL_curforce);
4658             NEXTVAL_NEXTTOKE.opval = NULL;
4659             force_next(WORD);
4660         }
4661         else if (*s == 'v') {
4662             s = force_word(s,WORD,FALSE,TRUE);
4663             s = force_version(s, FALSE);
4664         }
4665     }
4666     else {
4667         s = force_word(s,WORD,FALSE,TRUE);
4668         s = force_version(s, FALSE);
4669     }
4670     pl_yylval.ival = is_use;
4671     return s;
4672 }
4673 #ifdef DEBUGGING
4674     static const char* const exp_name[] =
4675         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4676           "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4677         };
4678 #endif
4679
4680 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4681 STATIC bool
4682 S_word_takes_any_delimeter(char *p, STRLEN len)
4683 {
4684     return (len == 1 && strchr("msyq", p[0])) ||
4685            (len == 2 && (
4686             (p[0] == 't' && p[1] == 'r') ||
4687             (p[0] == 'q' && strchr("qwxr", p[1]))));
4688 }
4689
4690 static void
4691 S_check_scalar_slice(pTHX_ char *s)
4692 {
4693     s++;
4694     while (*s == ' ' || *s == '\t') s++;
4695     if (*s == 'q' && s[1] == 'w'
4696      && !isWORDCHAR_lazy_if(s+2,UTF))
4697         return;
4698     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4699         s += UTF ? UTF8SKIP(s) : 1;
4700     if (*s == '}' || *s == ']')
4701         pl_yylval.ival = OPpSLICEWARNING;
4702 }
4703
4704 /*
4705   yylex
4706
4707   Works out what to call the token just pulled out of the input
4708   stream.  The yacc parser takes care of taking the ops we return and
4709   stitching them into a tree.
4710
4711   Returns:
4712     The type of the next token
4713
4714   Structure:
4715       Switch based on the current state:
4716           - if we already built the token before, use it
4717           - if we have a case modifier in a string, deal with that
4718           - handle other cases of interpolation inside a string
4719           - scan the next line if we are inside a format
4720       In the normal state switch on the next character:
4721           - default:
4722             if alphabetic, go to key lookup
4723             unrecoginized character - croak
4724           - 0/4/26: handle end-of-line or EOF
4725           - cases for whitespace
4726           - \n and #: handle comments and line numbers
4727           - various operators, brackets and sigils
4728           - numbers
4729           - quotes
4730           - 'v': vstrings (or go to key lookup)
4731           - 'x' repetition operator (or go to key lookup)
4732           - other ASCII alphanumerics (key lookup begins here):
4733               word before => ?
4734               keyword plugin
4735               scan built-in keyword (but do nothing with it yet)
4736               check for statement label
4737               check for lexical subs
4738                   goto just_a_word if there is one
4739               see whether built-in keyword is overridden
4740               switch on keyword number:
4741                   - default: just_a_word:
4742                       not a built-in keyword; handle bareword lookup
4743                       disambiguate between method and sub call
4744                       fall back to bareword
4745                   - cases for built-in keywords
4746 */
4747
4748
4749 int
4750 Perl_yylex(pTHX)
4751 {
4752     dVAR;
4753     char *s = PL_bufptr;
4754     char *d;
4755     STRLEN len;
4756     bool bof = FALSE;
4757     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4758     U8 formbrack = 0;
4759     U32 fake_eof = 0;
4760
4761     /* orig_keyword, gvp, and gv are initialized here because
4762      * jump to the label just_a_word_zero can bypass their
4763      * initialization later. */
4764     I32 orig_keyword = 0;
4765     GV *gv = NULL;
4766     GV **gvp = NULL;
4767
4768     DEBUG_T( {
4769         SV* tmp = newSVpvs("");
4770         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4771             (IV)CopLINE(PL_curcop),
4772             lex_state_names[PL_lex_state],
4773             exp_name[PL_expect],
4774             pv_display(tmp, s, strlen(s), 0, 60));
4775         SvREFCNT_dec(tmp);
4776     } );
4777
4778     switch (PL_lex_state) {
4779     case LEX_NORMAL:
4780     case LEX_INTERPNORMAL:
4781         break;
4782
4783     /* when we've already built the next token, just pull it out of the queue */
4784     case LEX_KNOWNEXT:
4785 #ifdef PERL_MAD
4786         PL_lasttoke--;
4787         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4788         if (PL_madskills) {
4789             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4790             PL_nexttoke[PL_lasttoke].next_mad = 0;
4791             if (PL_thismad && PL_thismad->mad_key == '_') {
4792                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4793                 PL_thismad->mad_val = 0;
4794                 mad_free(PL_thismad);
4795                 PL_thismad = 0;
4796             }
4797         }
4798         if (!PL_lasttoke) {
4799             PL_lex_state = PL_lex_defer;
4800             PL_expect = PL_lex_expect;
4801             PL_lex_defer = LEX_NORMAL;
4802             if (!PL_nexttoke[PL_lasttoke].next_type)
4803                 return yylex();
4804         }
4805 #else
4806         PL_nexttoke--;
4807         pl_yylval = PL_nextval[PL_nexttoke];
4808         if (!PL_nexttoke) {
4809             PL_lex_state = PL_lex_defer;
4810             PL_expect = PL_lex_expect;
4811             PL_lex_defer = LEX_NORMAL;
4812         }
4813 #endif
4814         {
4815             I32 next_type;
4816 #ifdef PERL_MAD
4817             next_type = PL_nexttoke[PL_lasttoke].next_type;
4818 #else
4819             next_type = PL_nexttype[PL_nexttoke];
4820 #endif
4821             if (next_type & (7<<24)) {
4822                 if (next_type & (1<<24)) {
4823                     if (PL_lex_brackets > 100)
4824                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4825                     PL_lex_brackstack[PL_lex_brackets++] =
4826                         (char) ((next_type >> 16) & 0xff);
4827                 }
4828                 if (next_type & (2<<24))
4829                     PL_lex_allbrackets++;
4830                 if (next_type & (4<<24))
4831                     PL_lex_allbrackets--;
4832                 next_type &= 0xffff;
4833             }
4834             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4835         }
4836
4837     /* interpolated case modifiers like \L \U, including \Q and \E.
4838        when we get here, PL_bufptr is at the \
4839     */
4840     case LEX_INTERPCASEMOD:
4841 #ifdef DEBUGGING
4842         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4843             Perl_croak(aTHX_
4844                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4845                        PL_bufptr, PL_bufend, *PL_bufptr);
4846 #endif
4847         /* handle \E or end of string */
4848         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4849             /* if at a \E */
4850             if (PL_lex_casemods) {
4851                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4852                 PL_lex_casestack[PL_lex_casemods] = '\0';
4853
4854                 if (PL_bufptr != PL_bufend
4855                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4856                         || oldmod == 'F')) {
4857                     PL_bufptr += 2;
4858                     PL_lex_state = LEX_INTERPCONCAT;
4859 #ifdef PERL_MAD
4860                     if (PL_madskills)
4861                         PL_thistoken = newSVpvs("\\E");
4862 #endif
4863                 }
4864                 PL_lex_allbrackets--;
4865                 return REPORT(')');
4866             }
4867             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4868                /* Got an unpaired \E */
4869                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4870                         "Useless use of \\E");
4871             }
4872 #ifdef PERL_MAD
4873             while (PL_bufptr != PL_bufend &&
4874               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4875                 if (PL_madskills) {
4876                   if (!PL_thiswhite)
4877                     PL_thiswhite = newSVpvs("");
4878                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4879                 }
4880                 PL_bufptr += 2;
4881             }
4882 #else
4883             if (PL_bufptr != PL_bufend)
4884                 PL_bufptr += 2;
4885 #endif
4886             PL_lex_state = LEX_INTERPCONCAT;
4887             return yylex();
4888         }
4889         else {
4890             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4891               "### Saw case modifier\n"); });
4892             s = PL_bufptr + 1;
4893             if (s[1] == '\\' && s[2] == 'E') {
4894 #ifdef PERL_MAD
4895                 if (PL_madskills) {
4896                   if (!PL_thiswhite)
4897                     PL_thiswhite = newSVpvs("");
4898                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4899                 }
4900 #endif
4901                 PL_bufptr = s + 3;
4902                 PL_lex_state = LEX_INTERPCONCAT;
4903                 return yylex();
4904             }
4905             else {
4906                 I32 tmp;
4907                 if (!PL_madskills) /* when just compiling don't need correct */
4908                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4909                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4910                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4911                     (strchr(PL_lex_casestack, 'L')
4912                         || strchr(PL_lex_casestack, 'U')
4913                         || strchr(PL_lex_casestack, 'F'))) {
4914                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4915                     PL_lex_allbrackets--;
4916                     return REPORT(')');
4917                 }
4918                 if (PL_lex_casemods > 10)
4919                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4920                 PL_lex_casestack[PL_lex_casemods++] = *s;
4921                 PL_lex_casestack[PL_lex_casemods] = '\0';
4922                 PL_lex_state = LEX_INTERPCONCAT;
4923                 start_force(PL_curforce);
4924                 NEXTVAL_NEXTTOKE.ival = 0;
4925                 force_next((2<<24)|'(');
4926                 start_force(PL_curforce);
4927                 if (*s == 'l')
4928                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4929                 else if (*s == 'u')
4930                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4931                 else if (*s == 'L')
4932                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4933                 else if (*s == 'U')
4934                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4935                 else if (*s == 'Q')
4936                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4937                 else if (*s == 'F')
4938                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4939                 else
4940                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4941                 if (PL_madskills) {
4942                     SV* const tmpsv = newSVpvs("\\ ");
4943                     /* replace the space with the character we want to escape
4944                      */
4945                     SvPVX(tmpsv)[1] = *s;
4946                     curmad('_', tmpsv);
4947                 }
4948                 PL_bufptr = s + 1;
4949             }
4950             force_next(FUNC);
4951             if (PL_lex_starts) {
4952                 s = PL_bufptr;
4953                 PL_lex_starts = 0;
4954 #ifdef PERL_MAD
4955                 if (PL_madskills) {
4956                     if (PL_thistoken)
4957                         sv_free(PL_thistoken);
4958                     PL_thistoken = newSVpvs("");
4959                 }
4960 #endif
4961                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4962                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4963                     OPERATOR(',');
4964                 else
4965                     Aop(OP_CONCAT);
4966             }
4967             else
4968                 return yylex();
4969         }
4970
4971     case LEX_INTERPPUSH:
4972         return REPORT(sublex_push());
4973
4974     case LEX_INTERPSTART:
4975         if (PL_bufptr == PL_bufend)
4976             return REPORT(sublex_done());
4977         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4978               "### Interpolated variable\n"); });
4979         PL_expect = XTERM;
4980         /* for /@a/, we leave the joining for the regex engine to do
4981          * (unless we're within \Q etc) */
4982         PL_lex_dojoin = (*PL_bufptr == '@'
4983                             && (!PL_lex_inpat || PL_lex_casemods));
4984         PL_lex_state = LEX_INTERPNORMAL;
4985         if (PL_lex_dojoin) {
4986             start_force(PL_curforce);
4987             NEXTVAL_NEXTTOKE.ival = 0;
4988             force_next(',');
4989             start_force(PL_curforce);
4990             force_ident("\"", '$');
4991             start_force(PL_curforce);
4992             NEXTVAL_NEXTTOKE.ival = 0;
4993             force_next('$');
4994             start_force(PL_curforce);
4995             NEXTVAL_NEXTTOKE.ival = 0;
4996             force_next((2<<24)|'(');
4997             start_force(PL_curforce);
4998             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4999             force_next(FUNC);
5000         }
5001         /* Convert (?{...}) and friends to 'do {...}' */
5002         if (PL_lex_inpat && *PL_bufptr == '(') {
5003             PL_parser->lex_shared->re_eval_start = PL_bufptr;
5004             PL_bufptr += 2;
5005             if (*PL_bufptr != '{')
5006                 PL_bufptr++;
5007             start_force(PL_curforce);
5008             /* XXX probably need a CURMAD(something) here */
5009             PL_expect = XTERMBLOCK;
5010             force_next(DO);
5011         }
5012
5013         if (PL_lex_starts++) {
5014             s = PL_bufptr;
5015 #ifdef PERL_MAD
5016             if (PL_madskills) {
5017                 if (PL_thistoken)
5018                     sv_free(PL_thistoken);
5019                 PL_thistoken = newSVpvs("");
5020             }
5021 #endif
5022             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5023             if (!PL_lex_casemods && PL_lex_inpat)
5024                 OPERATOR(',');
5025             else
5026                 Aop(OP_CONCAT);
5027         }
5028         return yylex();
5029
5030     case LEX_INTERPENDMAYBE:
5031         if (intuit_more(PL_bufptr)) {
5032             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5033             break;
5034         }
5035         /* FALL THROUGH */
5036
5037     case LEX_INTERPEND:
5038         if (PL_lex_dojoin) {
5039             const U8 dojoin_was = PL_lex_dojoin;
5040             PL_lex_dojoin = FALSE;
5041             PL_lex_state = LEX_INTERPCONCAT;
5042 #ifdef PERL_MAD
5043             if (PL_madskills) {
5044                 if (PL_thistoken)
5045                     sv_free(PL_thistoken);
5046                 PL_thistoken = newSVpvs("");
5047             }
5048 #endif
5049             PL_lex_allbrackets--;
5050             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5051         }
5052         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5053             && SvEVALED(PL_lex_repl))
5054         {
5055             if (PL_bufptr != PL_bufend)
5056                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5057             PL_lex_repl = NULL;
5058         }
5059         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5060            re_eval_str.  If the here-doc body’s length equals the previous
5061            value of re_eval_start, re_eval_start will now be null.  So
5062            check re_eval_str as well. */
5063         if (PL_parser->lex_shared->re_eval_start
5064          || PL_parser->lex_shared->re_eval_str) {
5065             SV *sv;
5066             if (*PL_bufptr != ')')
5067                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5068             PL_bufptr++;
5069             /* having compiled a (?{..}) expression, return the original
5070              * text too, as a const */
5071             if (PL_parser->lex_shared->re_eval_str) {
5072                 sv = PL_parser->lex_shared->re_eval_str;
5073                 PL_parser->lex_shared->re_eval_str = NULL;
5074                 SvCUR_set(sv,
5075                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5076                 SvPV_shrink_to_cur(sv);
5077             }
5078             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5079                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5080             start_force(PL_curforce);
5081             /* XXX probably need a CURMAD(something) here */
5082             NEXTVAL_NEXTTOKE.opval =
5083                     (OP*)newSVOP(OP_CONST, 0,
5084                                  sv);
5085             force_next(THING);
5086             PL_parser->lex_shared->re_eval_start = NULL;
5087             PL_expect = XTERM;
5088             return REPORT(',');
5089         }
5090
5091         /* FALLTHROUGH */
5092     case LEX_INTERPCONCAT:
5093 #ifdef DEBUGGING
5094         if (PL_lex_brackets)
5095             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5096                        (long) PL_lex_brackets);
5097 #endif
5098         if (PL_bufptr == PL_bufend)
5099             return REPORT(sublex_done());
5100
5101         /* m'foo' still needs to be parsed for possible (?{...}) */
5102         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5103             SV *sv = newSVsv(PL_linestr);
5104             sv = tokeq(sv);
5105             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5106             s = PL_bufend;
5107         }
5108         else {
5109             s = scan_const(PL_bufptr);
5110             if (*s == '\\')
5111                 PL_lex_state = LEX_INTERPCASEMOD;
5112             else
5113                 PL_lex_state = LEX_INTERPSTART;
5114         }
5115
5116         if (s != PL_bufptr) {
5117             start_force(PL_curforce);
5118             if (PL_madskills) {
5119                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5120             }
5121             NEXTVAL_NEXTTOKE = pl_yylval;
5122             PL_expect = XTERM;
5123             force_next(THING);
5124             if (PL_lex_starts++) {
5125 #ifdef PERL_MAD
5126                 if (PL_madskills) {
5127                     if (PL_thistoken)
5128                         sv_free(PL_thistoken);
5129                     PL_thistoken = newSVpvs("");
5130                 }
5131 #endif
5132                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5133                 if (!PL_lex_casemods && PL_lex_inpat)
5134                     OPERATOR(',');
5135                 else
5136                     Aop(OP_CONCAT);
5137             }
5138             else {
5139                 PL_bufptr = s;
5140                 return yylex();
5141             }
5142         }
5143
5144         return yylex();
5145     case LEX_FORMLINE:
5146         s = scan_formline(PL_bufptr);
5147         if (!PL_lex_formbrack)
5148         {
5149             formbrack = 1;
5150             goto rightbracket;
5151         }
5152         PL_bufptr = s;
5153         return yylex();
5154     }
5155
5156     /* We really do *not* want PL_linestr ever becoming a COW. */
5157     assert (!SvIsCOW(PL_linestr));
5158     s = PL_bufptr;
5159     PL_oldoldbufptr = PL_oldbufptr;
5160     PL_oldbufptr = s;
5161     PL_parser->saw_infix_sigil = 0;
5162
5163   retry:
5164 #ifdef PERL_MAD
5165     if (PL_thistoken) {
5166         sv_free(PL_thistoken);
5167         PL_thistoken = 0;
5168     }
5169     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5170 #endif
5171     switch (*s) {
5172     default:
5173         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5174             goto keylookup;
5175         {
5176         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5177         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5178                                                     UTF8SKIP(s),
5179                                                     SVs_TEMP | SVf_UTF8),
5180                                             10, UNI_DISPLAY_ISPRINT)
5181                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5182         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5183         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5184             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5185         } else {
5186             d = PL_linestart;
5187         }
5188         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5189                           UTF8fARG(UTF, (s - d), d),
5190                          (int) len + 1);
5191     }
5192     case 4:
5193     case 26:
5194         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5195     case 0:
5196 #ifdef PERL_MAD
5197         if (PL_madskills)
5198             PL_faketokens = 0;
5199 #endif
5200         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5201             PL_last_uni = 0;
5202             PL_last_lop = 0;
5203             if (PL_lex_brackets &&
5204                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5205                 yyerror((const char *)
5206                         (PL_lex_formbrack
5207                          ? "Format not terminated"
5208                          : "Missing right curly or square bracket"));
5209             }
5210             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5211                         "### Tokener got EOF\n");
5212             } );
5213             TOKEN(0);
5214         }
5215         if (s++ < PL_bufend)
5216             goto retry;                 /* ignore stray nulls */
5217         PL_last_uni = 0;
5218         PL_last_lop = 0;
5219         if (!PL_in_eval && !PL_preambled) {
5220             PL_preambled = TRUE;
5221 #ifdef PERL_MAD
5222             if (PL_madskills)
5223                 PL_faketokens = 1;
5224 #endif
5225             if (PL_perldb) {
5226                 /* Generate a string of Perl code to load the debugger.
5227                  * If PERL5DB is set, it will return the contents of that,
5228                  * otherwise a compile-time require of perl5db.pl.  */
5229
5230                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5231
5232                 if (pdb) {
5233                     sv_setpv(PL_linestr, pdb);
5234                     sv_catpvs(PL_linestr,";");
5235                 } else {
5236                     SETERRNO(0,SS_NORMAL);
5237                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5238                 }
5239                 PL_parser->preambling = CopLINE(PL_curcop);
5240             } else
5241                 sv_setpvs(PL_linestr,"");
5242             if (PL_preambleav) {
5243                 SV **svp = AvARRAY(PL_preambleav);
5244                 SV **const end = svp + AvFILLp(PL_preambleav);
5245                 while(svp <= end) {
5246                     sv_catsv(PL_linestr, *svp);
5247                     ++svp;
5248                     sv_catpvs(PL_linestr, ";");
5249                 }
5250                 sv_free(MUTABLE_SV(PL_preambleav));
5251                 PL_preambleav = NULL;
5252             }
5253             if (PL_minus_E)
5254                 sv_catpvs(PL_linestr,
5255                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5256             if (PL_minus_n || PL_minus_p) {
5257                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5258                 if (PL_minus_l)
5259                     sv_catpvs(PL_linestr,"chomp;");
5260                 if (PL_minus_a) {
5261                     if (PL_minus_F) {
5262                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5263                              || *PL_splitstr == '"')
5264                               && strchr(PL_splitstr + 1, *PL_splitstr))
5265                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5266                         else {
5267                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5268                                bytes can be used as quoting characters.  :-) */
5269                             const char *splits = PL_splitstr;
5270                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5271                             do {
5272                                 /* Need to \ \s  */
5273                                 if (*splits == '\\')
5274                                     sv_catpvn(PL_linestr, splits, 1);
5275                                 sv_catpvn(PL_linestr, splits, 1);
5276                             } while (*splits++);
5277                             /* This loop will embed the trailing NUL of
5278                                PL_linestr as the last thing it does before
5279                                terminating.  */
5280                             sv_catpvs(PL_linestr, ");");
5281                         }
5282                     }
5283                     else
5284                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5285                 }
5286             }
5287             sv_catpvs(PL_linestr, "\n");
5288             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5289             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5290             PL_last_lop = PL_last_uni = NULL;
5291             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5292                 update_debugger_info(PL_linestr, NULL, 0);
5293             goto retry;
5294         }
5295         do {
5296             fake_eof = 0;
5297             bof = PL_rsfp ? TRUE : FALSE;
5298             if (0) {
5299               fake_eof:
5300                 fake_eof = LEX_FAKE_EOF;
5301             }
5302             PL_bufptr = PL_bufend;
5303             COPLINE_INC_WITH_HERELINES;
5304             if (!lex_next_chunk(fake_eof)) {
5305                 CopLINE_dec(PL_curcop);
5306                 s = PL_bufptr;
5307                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5308             }
5309             CopLINE_dec(PL_curcop);
5310 #ifdef PERL_MAD
5311             if (!PL_rsfp)
5312                 PL_realtokenstart = -1;
5313 #endif
5314             s = PL_bufptr;
5315             /* If it looks like the start of a BOM or raw UTF-16,
5316              * check if it in fact is. */
5317             if (bof && PL_rsfp &&
5318                      (*s == 0 ||
5319                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5320                       *(U8*)s >= 0xFE ||
5321                       s[1] == 0)) {
5322                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5323                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5324 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5325                 /* offset may include swallowed CR */
5326                 if (!bof)
5327                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5328 #endif
5329                 if (bof) {
5330                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5331                     s = swallow_bom((U8*)s);
5332                 }
5333             }
5334             if (PL_parser->in_pod) {
5335                 /* Incest with pod. */
5336 #ifdef PERL_MAD
5337                 if (PL_madskills)
5338                     sv_catsv(PL_thiswhite, PL_linestr);
5339 #endif
5340                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5341                     sv_setpvs(PL_linestr, "");
5342                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5343                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5344                     PL_last_lop = PL_last_uni = NULL;
5345                     PL_parser->in_pod = 0;
5346                 }
5347             }
5348             if (PL_rsfp || PL_parser->filtered)
5349                 incline(s);
5350         } while (PL_parser->in_pod);
5351         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5352         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5353         PL_last_lop = PL_last_uni = NULL;
5354         if (CopLINE(PL_curcop) == 1) {
5355             while (s < PL_bufend && isSPACE(*s))
5356                 s++;
5357             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5358                 s++;
5359 #ifdef PERL_MAD
5360             if (PL_madskills)
5361                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5362 #endif
5363             d = NULL;
5364             if (!PL_in_eval) {
5365                 if (*s == '#' && *(s+1) == '!')
5366                     d = s + 2;
5367 #ifdef ALTERNATE_SHEBANG
5368                 else {
5369                     static char const as[] = ALTERNATE_SHEBANG;
5370                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5371                         d = s + (sizeof(as) - 1);
5372                 }
5373 #endif /* ALTERNATE_SHEBANG */
5374             }
5375             if (d) {
5376                 char *ipath;
5377                 char *ipathend;
5378
5379                 while (isSPACE(*d))
5380                     d++;
5381                 ipath = d;
5382                 while (*d && !isSPACE(*d))
5383                     d++;
5384                 ipathend = d;
5385
5386 #ifdef ARG_ZERO_IS_SCRIPT
5387                 if (ipathend > ipath) {
5388                     /*
5389                      * HP-UX (at least) sets argv[0] to the script name,
5390                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5391                      * at least, set argv[0] to the basename of the Perl
5392                      * interpreter. So, having found "#!", we'll set it right.
5393                      */
5394                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5395                                                     SVt_PV)); /* $^X */
5396                     assert(SvPOK(x) || SvGMAGICAL(x));
5397                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5398                         sv_setpvn(x, ipath, ipathend - ipath);
5399                         SvSETMAGIC(x);
5400                     }
5401                     else {
5402                         STRLEN blen;
5403                         STRLEN llen;
5404                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5405                         const char * const lstart = SvPV_const(x,llen);
5406                         if (llen < blen) {
5407                             bstart += blen - llen;
5408                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5409                                 sv_setpvn(x, ipath, ipathend - ipath);
5410                                 SvSETMAGIC(x);
5411                             }
5412                         }
5413                     }
5414                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5415                 }
5416 #endif /* ARG_ZERO_IS_SCRIPT */
5417
5418                 /*
5419                  * Look for options.
5420                  */
5421                 d = instr(s,"perl -");
5422                 if (!d) {
5423                     d = instr(s,"perl");
5424 #if defined(DOSISH)
5425                     /* avoid getting into infinite loops when shebang
5426                      * line contains "Perl" rather than "perl" */
5427                     if (!d) {
5428                         for (d = ipathend-4; d >= ipath; --d) {
5429                             if ((*d == 'p' || *d == 'P')
5430                                 && !ibcmp(d, "perl", 4))
5431                             {
5432                                 break;
5433                             }
5434                         }
5435                         if (d < ipath)
5436                             d = NULL;
5437                     }
5438 #endif
5439                 }
5440 #ifdef ALTERNATE_SHEBANG
5441                 /*
5442                  * If the ALTERNATE_SHEBANG on this system starts with a
5443                  * character that can be part of a Perl expression, then if
5444                  * we see it but not "perl", we're probably looking at the
5445                  * start of Perl code, not a request to hand off to some
5446                  * other interpreter.  Similarly, if "perl" is there, but
5447                  * not in the first 'word' of the line, we assume the line
5448                  * contains the start of the Perl program.
5449                  */
5450                 if (d && *s != '#') {
5451                     const char *c = ipath;
5452                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5453                         c++;
5454                     if (c < d)
5455                         d = NULL;       /* "perl" not in first word; ignore */
5456                     else
5457                         *s = '#';       /* Don't try to parse shebang line */
5458                 }
5459 #endif /* ALTERNATE_SHEBANG */
5460                 if (!d &&
5461                     *s == '#' &&
5462                     ipathend > ipath &&
5463                     !PL_minus_c &&
5464                     !instr(s,"indir") &&
5465                     instr(PL_origargv[0],"perl"))
5466                 {
5467                     dVAR;
5468                     char **newargv;
5469
5470                     *ipathend = '\0';
5471                     s = ipathend + 1;
5472                     while (s < PL_bufend && isSPACE(*s))
5473                         s++;
5474                     if (s < PL_bufend) {
5475                         Newx(newargv,PL_origargc+3,char*);
5476                         newargv[1] = s;
5477                         while (s < PL_bufend && !isSPACE(*s))
5478                             s++;
5479                         *s = '\0';
5480                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5481                     }
5482                     else
5483                         newargv = PL_origargv;
5484                     newargv[0] = ipath;
5485                     PERL_FPU_PRE_EXEC
5486                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5487                     PERL_FPU_POST_EXEC
5488                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5489                 }
5490                 if (d) {
5491                     while (*d && !isSPACE(*d))
5492                         d++;
5493                     while (SPACE_OR_TAB(*d))
5494                         d++;
5495
5496                     if (*d++ == '-') {
5497                         const bool switches_done = PL_doswitches;
5498                         const U32 oldpdb = PL_perldb;
5499                         const bool oldn = PL_minus_n;
5500                         const bool oldp = PL_minus_p;
5501                         const char *d1 = d;
5502
5503                         do {
5504                             bool baduni = FALSE;
5505                             if (*d1 == 'C') {
5506                                 const char *d2 = d1 + 1;
5507                                 if (parse_unicode_opts((const char **)&d2)
5508                                     != PL_unicode)
5509                                     baduni = TRUE;
5510                             }
5511                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5512                                 const char * const m = d1;
5513                                 while (*d1 && !isSPACE(*d1))
5514                                     d1++;
5515                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5516                                       (int)(d1 - m), m);
5517                             }
5518                             d1 = moreswitches(d1);
5519                         } while (d1);
5520                         if (PL_doswitches && !switches_done) {
5521                             int argc = PL_origargc;
5522                             char **argv = PL_origargv;
5523                             do {
5524                                 argc--,argv++;
5525                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5526                             init_argv_symbols(argc,argv);
5527                         }
5528                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5529                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5530                               /* if we have already added "LINE: while (<>) {",
5531                                  we must not do it again */
5532                         {
5533                             sv_setpvs(PL_linestr, "");
5534                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5535                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5536                             PL_last_lop = PL_last_uni = NULL;
5537                             PL_preambled = FALSE;
5538                             if (PERLDB_LINE || PERLDB_SAVESRC)
5539                                 (void)gv_fetchfile(PL_origfilename);
5540                             goto retry;
5541                         }
5542                     }
5543                 }
5544             }
5545         }
5546         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5547             PL_lex_state = LEX_FORMLINE;
5548             start_force(PL_curforce);
5549             NEXTVAL_NEXTTOKE.ival = 0;
5550             force_next(FORMRBRACK);
5551             TOKEN(';');
5552         }
5553         goto retry;
5554     case '\r':
5555 #ifdef PERL_STRICT_CR
5556         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5557         Perl_croak(aTHX_
5558       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5559 #endif
5560     case ' ': case '\t': case '\f': case 013:
5561 #ifdef PERL_MAD
5562         PL_realtokenstart = -1;
5563         if (PL_madskills) {
5564           if (!PL_thiswhite)
5565             PL_thiswhite = newSVpvs("");
5566           sv_catpvn(PL_thiswhite, s, 1);
5567         }
5568 #endif
5569         s++;
5570         goto retry;
5571     case '#':
5572     case '\n':
5573 #ifdef PERL_MAD
5574         PL_realtokenstart = -1;
5575         if (PL_madskills)
5576             PL_faketokens = 0;
5577 #endif
5578         if (PL_lex_state != LEX_NORMAL ||
5579              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5580             if (*s == '#' && s == PL_linestart && PL_in_eval
5581              && !PL_rsfp && !PL_parser->filtered) {
5582                 /* handle eval qq[#line 1 "foo"\n ...] */
5583                 CopLINE_dec(PL_curcop);
5584                 incline(s);
5585             }
5586             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5587                 s = SKIPSPACE0(s);
5588                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5589                     incline(s);
5590             }
5591             else {
5592                 const bool in_comment = *s == '#';
5593                 d = s;
5594                 while (d < PL_bufend && *d != '\n')
5595                     d++;
5596                 if (d < PL_bufend)
5597                     d++;
5598                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5599                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5600                                d, PL_bufend);
5601 #ifdef PERL_MAD
5602                 if (PL_madskills)
5603                     PL_thiswhite = newSVpvn(s, d - s);
5604 #endif
5605                 s = d;
5606                 if (in_comment && d == PL_bufend
5607                  && PL_lex_state == LEX_INTERPNORMAL
5608                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5609                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5610                 else incline(s);
5611             }
5612             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5613                 PL_lex_state = LEX_FORMLINE;
5614                 start_force(PL_curforce);
5615                 NEXTVAL_NEXTTOKE.ival = 0;
5616                 force_next(FORMRBRACK);
5617                 TOKEN(';');
5618             }
5619         }
5620         else {
5621 #ifdef PERL_MAD
5622             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5623                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5624                     PL_faketokens = 0;
5625                     s = SKIPSPACE0(s);
5626                     TOKEN(PEG); /* make sure any #! line is accessible */
5627                 }
5628                 s = SKIPSPACE0(s);
5629             }
5630             else {
5631 #endif
5632                     if (PL_madskills) d = s;
5633                     while (s < PL_bufend && *s != '\n')
5634                         s++;
5635                     if (s < PL_bufend)
5636                     {
5637                         s++;
5638                         if (s < PL_bufend)
5639                             incline(s);
5640                     }
5641                     else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5642                       Perl_croak(aTHX_ "panic: input overflow");
5643 #ifdef PERL_MAD
5644                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5645                         if (!PL_thiswhite)
5646                             PL_thiswhite = newSVpvs("");
5647                         if (CopLINE(PL_curcop) == 1) {
5648                             sv_setpvs(PL_thiswhite, "");
5649                             PL_faketokens = 0;
5650                         }
5651                         sv_catpvn(PL_thiswhite, d, s - d);
5652                     }
5653             }
5654 #endif
5655         }
5656         goto retry;
5657     case '-':
5658         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5659             I32 ftst = 0;
5660             char tmp;
5661
5662             s++;
5663             PL_bufptr = s;
5664             tmp = *s++;
5665
5666             while (s < PL_bufend && SPACE_OR_TAB(*s))
5667                 s++;
5668
5669             if (strnEQ(s,"=>",2)) {
5670                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5671                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5672                 OPERATOR('-');          /* unary minus */
5673             }
5674             switch (tmp) {
5675             case 'r': ftst = OP_FTEREAD;        break;
5676             case 'w': ftst = OP_FTEWRITE;       break;
5677             case 'x': ftst = OP_FTEEXEC;        break;
5678             case 'o': ftst = OP_FTEOWNED;       break;
5679             case 'R': ftst = OP_FTRREAD;        break;
5680             case 'W': ftst = OP_FTRWRITE;       break;
5681             case 'X': ftst = OP_FTREXEC;        break;
5682             case 'O': ftst = OP_FTROWNED;       break;
5683             case 'e': ftst = OP_FTIS;           break;
5684             case 'z': ftst = OP_FTZERO;         break;
5685             case 's': ftst = OP_FTSIZE;         break;
5686             case 'f': ftst = OP_FTFILE;         break;
5687             case 'd': ftst = OP_FTDIR;          break;
5688             case 'l': ftst = OP_FTLINK;         break;
5689             case 'p': ftst = OP_FTPIPE;         break;
5690             case 'S': ftst = OP_FTSOCK;         break;
5691             case 'u': ftst = OP_FTSUID;         break;
5692             case 'g': ftst = OP_FTSGID;         break;
5693             case 'k': ftst = OP_FTSVTX;         break;
5694             case 'b': ftst = OP_FTBLK;          break;
5695             case 'c': ftst = OP_FTCHR;          break;
5696             case 't': ftst = OP_FTTTY;          break;
5697             case 'T': ftst = OP_FTTEXT;         break;
5698             case 'B': ftst = OP_FTBINARY;       break;
5699             case 'M': case 'A': case 'C':
5700                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5701                 switch (tmp) {
5702                 case 'M': ftst = OP_FTMTIME;    break;
5703                 case 'A': ftst = OP_FTATIME;    break;
5704                 case 'C': ftst = OP_FTCTIME;    break;
5705                 default:                        break;
5706                 }
5707                 break;
5708             default:
5709                 break;
5710             }
5711             if (ftst) {
5712                 PL_last_uni = PL_oldbufptr;
5713                 PL_last_lop_op = (OPCODE)ftst;
5714                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5715                         "### Saw file test %c\n", (int)tmp);
5716                 } );
5717                 FTST(ftst);
5718             }
5719             else {
5720                 /* Assume it was a minus followed by a one-letter named
5721                  * subroutine call (or a -bareword), then. */
5722                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5723                         "### '-%c' looked like a file test but was not\n",
5724                         (int) tmp);
5725                 } );
5726                 s = --PL_bufptr;
5727             }
5728         }
5729         {
5730             const char tmp = *s++;
5731             if (*s == tmp) {
5732                 s++;
5733                 if (PL_expect == XOPERATOR)
5734                     TERM(POSTDEC);
5735                 else
5736                     OPERATOR(PREDEC);
5737             }
5738             else if (*s == '>') {
5739                 s++;
5740                 s = SKIPSPACE1(s);
5741                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5742                     ((*s == '$' || *s == '&') && s[1] == '*')
5743                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5744                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5745                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5746                  ))
5747                 {
5748                     Perl_ck_warner_d(aTHX_
5749                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5750                         "Postfix dereference is experimental"
5751                     );
5752                     PL_expect = XPOSTDEREF;
5753                     TOKEN(ARROW);
5754                 }
5755                 if (isIDFIRST_lazy_if(s,UTF)) {
5756                     s = force_word(s,METHOD,FALSE,TRUE);
5757                     TOKEN(ARROW);
5758                 }
5759                 else if (*s == '$')
5760                     OPERATOR(ARROW);
5761                 else
5762                     TERM(ARROW);
5763             }
5764             if (PL_expect == XOPERATOR) {
5765                 if (*s == '=' && !PL_lex_allbrackets &&
5766                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5767                     s--;
5768                     TOKEN(0);
5769                 }
5770                 Aop(OP_SUBTRACT);
5771             }
5772             else {
5773                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5774                     check_uni();
5775                 OPERATOR('-');          /* unary minus */
5776             }
5777         }
5778
5779     case '+':
5780         {
5781             const char tmp = *s++;
5782             if (*s == tmp) {
5783                 s++;
5784                 if (PL_expect == XOPERATOR)
5785                     TERM(POSTINC);
5786                 else
5787                     OPERATOR(PREINC);
5788             }
5789             if (PL_expect == XOPERATOR) {
5790                 if (*s == '=' && !PL_lex_allbrackets &&
5791                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5792                     s--;
5793                     TOKEN(0);
5794                 }
5795                 Aop(OP_ADD);
5796             }
5797             else {
5798                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5799                     check_uni();
5800                 OPERATOR('+');
5801             }
5802         }
5803
5804     case '*':
5805         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5806         if (PL_expect != XOPERATOR) {
5807             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5808             PL_expect = XOPERATOR;
5809             force_ident(PL_tokenbuf, '*');
5810             if (!*PL_tokenbuf)
5811                 PREREF('*');
5812             TERM('*');
5813         }
5814         s++;
5815         if (*s == '*') {
5816             s++;
5817             if (*s == '=' && !PL_lex_allbrackets &&
5818                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5819                 s -= 2;
5820                 TOKEN(0);
5821             }
5822             PWop(OP_POW);
5823         }
5824         if (*s == '=' && !PL_lex_allbrackets &&
5825                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5826             s--;
5827             TOKEN(0);
5828         }
5829         PL_parser->saw_infix_sigil = 1;
5830         Mop(OP_MULTIPLY);
5831
5832     case '%':
5833     {
5834         if (PL_expect == XOPERATOR) {
5835             if (s[1] == '=' && !PL_lex_allbrackets &&
5836                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5837                 TOKEN(0);
5838             ++s;
5839             PL_parser->saw_infix_sigil = 1;
5840             Mop(OP_MODULO);
5841         }
5842         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5843         PL_tokenbuf[0] = '%';
5844         s = scan_ident(s, PL_tokenbuf + 1,
5845                 sizeof PL_tokenbuf - 1, FALSE);
5846         pl_yylval.ival = 0;
5847         if (!PL_tokenbuf[1]) {
5848             PREREF('%');
5849         }
5850         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5851             if (*s == '[')
5852                 PL_tokenbuf[0] = '@';
5853         }
5854         PL_expect = XOPERATOR;
5855         force_ident_maybe_lex('%');
5856         TERM('%');
5857     }
5858     case '^':
5859         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5860                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5861             TOKEN(0);
5862         s++;
5863         BOop(OP_BIT_XOR);
5864     case '[':
5865         if (PL_lex_brackets > 100)
5866             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5867         PL_lex_brackstack[PL_lex_brackets++] = 0;
5868         PL_lex_allbrackets++;
5869         {
5870             const char tmp = *s++;
5871             OPERATOR(tmp);
5872         }
5873     case '~':
5874         if (s[1] == '~'
5875             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5876         {
5877             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5878                 TOKEN(0);
5879             s += 2;
5880             Perl_ck_warner_d(aTHX_
5881                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5882                 "Smartmatch is experimental");
5883             Eop(OP_SMARTMATCH);
5884         }
5885         s++;
5886         OPERATOR('~');
5887     case ',':
5888         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5889             TOKEN(0);
5890         s++;
5891         OPERATOR(',');
5892     case ':':
5893         if (s[1] == ':') {
5894             len = 0;
5895             goto just_a_word_zero_gv;
5896         }
5897         s++;
5898         switch (PL_expect) {
5899             OP *attrs;
5900 #ifdef PERL_MAD
5901             I32 stuffstart;
5902 #endif
5903         case XOPERATOR:
5904             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5905                 break;
5906             PL_bufptr = s;      /* update in case we back off */
5907             if (*s == '=') {
5908                 Perl_croak(aTHX_
5909                            "Use of := for an empty attribute list is not allowed");
5910             }
5911             goto grabattrs;
5912         case XATTRBLOCK:
5913             PL_expect = XBLOCK;
5914             goto grabattrs;
5915         case XATTRTERM:
5916             PL_expect = XTERMBLOCK;
5917          grabattrs:
5918 #ifdef PERL_MAD
5919             stuffstart = s - SvPVX(PL_linestr) - 1;
5920 #endif
5921             s = PEEKSPACE(s);
5922             attrs = NULL;
5923             while (isIDFIRST_lazy_if(s,UTF)) {
5924                 I32 tmp;
5925                 SV *sv;
5926                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5927                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5928                     if (tmp < 0) tmp = -tmp;
5929                     switch (tmp) {
5930                     case KEY_or:
5931                     case KEY_and:
5932                     case KEY_for:
5933                     case KEY_foreach:
5934                     case KEY_unless:
5935                     case KEY_if:
5936                     case KEY_while:
5937                     case KEY_until:
5938                         goto got_attrs;
5939                     default:
5940                         break;
5941                     }
5942                 }
5943                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5944                 if (*d == '(') {
5945                     d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5946                     COPLINE_SET_FROM_MULTI_END;
5947                     if (!d) {
5948                         /* MUST advance bufptr here to avoid bogus
5949                            "at end of line" context messages from yyerror().
5950                          */
5951                         PL_bufptr = s + len;
5952                         yyerror("Unterminated attribute parameter in attribute list");
5953                         if (attrs)
5954                             op_free(attrs);
5955                         sv_free(sv);
5956                         return REPORT(0);       /* EOF indicator */
5957                     }
5958                 }
5959                 if (PL_lex_stuff) {
5960                     sv_catsv(sv, PL_lex_stuff);
5961                     attrs = op_append_elem(OP_LIST, attrs,
5962                                         newSVOP(OP_CONST, 0, sv));
5963                     SvREFCNT_dec(PL_lex_stuff);
5964                     PL_lex_stuff = NULL;
5965                 }
5966                 else {
5967                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5968                         sv_free(sv);
5969                         if (PL_in_my == KEY_our) {
5970                             deprecate(":unique");
5971                         }
5972                         else
5973                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5974                     }
5975
5976                     /* NOTE: any CV attrs applied here need to be part of
5977                        the CVf_BUILTIN_ATTRS define in cv.h! */
5978                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5979                         sv_free(sv);
5980                         CvLVALUE_on(PL_compcv);
5981                     }
5982                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5983                         sv_free(sv);
5984                         deprecate(":locked");
5985                     }
5986                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5987                         sv_free(sv);
5988                         CvMETHOD_on(PL_compcv);
5989                     }
5990                     /* After we've set the flags, it could be argued that
5991                        we don't need to do the attributes.pm-based setting
5992                        process, and shouldn't bother appending recognized
5993                        flags.  To experiment with that, uncomment the
5994                        following "else".  (Note that's already been
5995                        uncommented.  That keeps the above-applied built-in
5996                        attributes from being intercepted (and possibly
5997                        rejected) by a package's attribute routines, but is
5998                        justified by the performance win for the common case
5999                        of applying only built-in attributes.) */
6000                     else
6001                         attrs = op_append_elem(OP_LIST, attrs,
6002                                             newSVOP(OP_CONST, 0,
6003                                                     sv));
6004                 }
6005                 s = PEEKSPACE(d);
6006                 if (*s == ':' && s[1] != ':')
6007                     s = PEEKSPACE(s+1);
6008                 else if (s == d)
6009                     break;      /* require real whitespace or :'s */
6010                 /* XXX losing whitespace on sequential attributes here */
6011             }
6012             {
6013                 if (*s != ';' && *s != '}' &&
6014                     !(PL_expect == XOPERATOR
6015                         ? (*s == '=' ||  *s == ')')
6016                         : (*s == '{' ||  *s == '('))) {
6017                     const char q = ((*s == '\'') ? '"' : '\'');
6018                     /* If here for an expression, and parsed no attrs, back
6019                        off. */
6020                     if (PL_expect == XOPERATOR && !attrs) {
6021                         s = PL_bufptr;
6022                         break;
6023                     }
6024                     /* MUST advance bufptr here to avoid bogus "at end of line"
6025                        context messages from yyerror().
6026                     */
6027                     PL_bufptr = s;
6028                     yyerror( (const char *)
6029                              (*s
6030                               ? Perl_form(aTHX_ "Invalid separator character "
6031                                           "%c%c%c in attribute list", q, *s, q)
6032                               : "Unterminated attribute list" ) );
6033                     if (attrs)
6034                         op_free(attrs);
6035                     OPERATOR(':');
6036                 }
6037             }
6038         got_attrs:
6039             if (attrs) {
6040                 start_force(PL_curforce);
6041                 NEXTVAL_NEXTTOKE.opval = attrs;
6042                 CURMAD('_', PL_nextwhite);
6043                 force_next(THING);
6044             }
6045 #ifdef PERL_MAD
6046             if (PL_madskills) {
6047                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6048                                      (s - SvPVX(PL_linestr)) - stuffstart);
6049             }
6050 #endif
6051             TOKEN(COLONATTR);
6052         }
6053         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6054             s--;
6055             TOKEN(0);
6056         }
6057         PL_lex_allbrackets--;
6058         OPERATOR(':');
6059     case '(':
6060         s++;
6061         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6062             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6063         else
6064             PL_expect = XTERM;
6065         s = SKIPSPACE1(s);
6066         PL_lex_allbrackets++;
6067         TOKEN('(');
6068     case ';':
6069         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6070             TOKEN(0);
6071         CLINE;
6072         s++;
6073         OPERATOR(';');
6074     case ')':
6075         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6076             TOKEN(0);
6077         s++;
6078         PL_lex_allbrackets--;
6079         s = SKIPSPACE1(s);
6080         if (*s == '{')
6081             PREBLOCK(')');
6082         TERM(')');
6083     case ']':
6084         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6085             TOKEN(0);
6086         s++;
6087         if (PL_lex_brackets <= 0)
6088             /* diag_listed_as: Unmatched right %s bracket */
6089             yyerror("Unmatched right square bracket");
6090         else
6091             --PL_lex_brackets;
6092         PL_lex_allbrackets--;
6093         if (PL_lex_state == LEX_INTERPNORMAL) {
6094             if (PL_lex_brackets == 0) {
6095                 if (*s == '-' && s[1] == '>')
6096                     PL_lex_state = LEX_INTERPENDMAYBE;
6097                 else if (*s != '[' && *s != '{')
6098                     PL_lex_state = LEX_INTERPEND;
6099             }
6100         }
6101         TERM(']');
6102     case '{':
6103         s++;
6104       leftbracket:
6105         if (PL_lex_brackets > 100) {
6106             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6107         }
6108         switch (PL_expect) {
6109         case XTERM:
6110             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6111             PL_lex_allbrackets++;
6112             OPERATOR(HASHBRACK);
6113         case XOPERATOR:
6114             while (s < PL_bufend && SPACE_OR_TAB(*s))
6115                 s++;
6116             d = s;
6117             PL_tokenbuf[0] = '\0';
6118             if (d < PL_bufend && *d == '-') {
6119                 PL_tokenbuf[0] = '-';
6120                 d++;
6121                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6122                     d++;
6123             }
6124             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6125                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6126                               FALSE, &len);
6127                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6128                     d++;
6129                 if (*d == '}') {
6130                     const char minus = (PL_tokenbuf[0] == '-');
6131                     s = force_word(s + minus, WORD, FALSE, TRUE);
6132                     if (minus)
6133                         force_next('-');
6134                 }
6135             }
6136             /* FALL THROUGH */
6137         case XATTRBLOCK:
6138         case XBLOCK:
6139             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6140             PL_lex_allbrackets++;
6141             PL_expect = XSTATE;
6142             break;
6143         case XATTRTERM:
6144         case XTERMBLOCK:
6145             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6146             PL_lex_allbrackets++;
6147             PL_expect = XSTATE;
6148             break;
6149         default: {
6150                 const char *t;
6151                 if (PL_oldoldbufptr == PL_last_lop)
6152                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6153                 else
6154                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6155                 PL_lex_allbrackets++;
6156                 s = SKIPSPACE1(s);
6157                 if (*s == '}') {
6158                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6159                         PL_expect = XTERM;
6160                         /* This hack is to get the ${} in the message. */
6161                         PL_bufptr = s+1;
6162                         yyerror("syntax error");
6163                         break;
6164                     }
6165                     OPERATOR(HASHBRACK);
6166                 }
6167                 /* This hack serves to disambiguate a pair of curlies
6168                  * as being a block or an anon hash.  Normally, expectation
6169                  * determines that, but in cases where we're not in a
6170                  * position to expect anything in particular (like inside
6171                  * eval"") we have to resolve the ambiguity.  This code
6172                  * covers the case where the first term in the curlies is a
6173                  * quoted string.  Most other cases need to be explicitly
6174                  * disambiguated by prepending a "+" before the opening
6175                  * curly in order to force resolution as an anon hash.
6176                  *
6177                  * XXX should probably propagate the outer expectation
6178                  * into eval"" to rely less on this hack, but that could
6179                  * potentially break current behavior of eval"".
6180                  * GSAR 97-07-21
6181                  */
6182                 t = s;
6183                 if (*s == '\'' || *s == '"' || *s == '`') {
6184                     /* common case: get past first string, handling escapes */
6185                     for (t++; t < PL_bufend && *t != *s;)
6186                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6187                             t++;
6188                     t++;
6189                 }
6190                 else if (*s == 'q') {
6191                     if (++t < PL_bufend
6192                         && (!isWORDCHAR(*t)
6193                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6194                                 && !isWORDCHAR(*t))))
6195                     {
6196                         /* skip q//-like construct */
6197                         const char *tmps;
6198                         char open, close, term;
6199                         I32 brackets = 1;
6200
6201                         while (t < PL_bufend && isSPACE(*t))
6202                             t++;
6203                         /* check for q => */
6204                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6205                             OPERATOR(HASHBRACK);
6206                         }
6207                         term = *t;
6208                         open = term;
6209                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6210                             term = tmps[5];
6211                         close = term;
6212                         if (open == close)
6213                             for (t++; t < PL_bufend; t++) {
6214                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6215                                     t++;
6216                                 else if (*t == open)
6217                                     break;
6218                             }
6219                         else {
6220                             for (t++; t < PL_bufend; t++) {
6221                                 if (*t == '\\' && t+1 < PL_bufend)
6222                                     t++;
6223                                 else if (*t == close && --brackets <= 0)
6224                                     break;
6225                                 else if (*t == open)
6226                                     brackets++;
6227                             }
6228                         }
6229                         t++;
6230                     }
6231                     else
6232                         /* skip plain q word */
6233                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6234                              t += UTF8SKIP(t);
6235                 }
6236                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6237                     t += UTF8SKIP(t);
6238                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6239                          t += UTF8SKIP(t);
6240                 }
6241                 while (t < PL_bufend && isSPACE(*t))
6242                     t++;
6243                 /* if comma follows first term, call it an anon hash */
6244                 /* XXX it could be a comma expression with loop modifiers */
6245                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6246                                    || (*t == '=' && t[1] == '>')))
6247                     OPERATOR(HASHBRACK);
6248                 if (PL_expect == XREF)
6249                     PL_expect = XTERM;
6250                 else {
6251                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6252                     PL_expect = XSTATE;
6253                 }
6254             }
6255             break;
6256         }
6257         pl_yylval.ival = CopLINE(PL_curcop);
6258         if (isSPACE(*s) || *s == '#')
6259             PL_copline = NOLINE;   /* invalidate current command line number */
6260         TOKEN(formbrack ? '=' : '{');
6261     case '}':
6262         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6263             TOKEN(0);
6264       rightbracket:
6265         s++;
6266         if (PL_lex_brackets <= 0)
6267             /* diag_listed_as: Unmatched right %s bracket */
6268             yyerror("Unmatched right curly bracket");
6269         else
6270             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6271         PL_lex_allbrackets--;
6272         if (PL_lex_state == LEX_INTERPNORMAL) {
6273             if (PL_lex_brackets == 0) {
6274                 if (PL_expect & XFAKEBRACK) {
6275                     PL_expect &= XENUMMASK;
6276                     PL_lex_state = LEX_INTERPEND;
6277                     PL_bufptr = s;
6278 #if 0
6279                     if (PL_madskills) {
6280                         if (!PL_thiswhite)
6281                             PL_thiswhite = newSVpvs("");
6282                         sv_catpvs(PL_thiswhite,"}");
6283                     }
6284 #endif
6285                     return yylex();     /* ignore fake brackets */
6286                 }
6287                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6288                  && SvEVALED(PL_lex_repl))
6289                     PL_lex_state = LEX_INTERPEND;
6290                 else if (*s == '-' && s[1] == '>')
6291                     PL_lex_state = LEX_INTERPENDMAYBE;
6292                 else if (*s != '[' && *s != '{')
6293                     PL_lex_state = LEX_INTERPEND;
6294             }
6295         }
6296         if (PL_expect & XFAKEBRACK) {
6297             PL_expect &= XENUMMASK;
6298             PL_bufptr = s;
6299             return yylex();             /* ignore fake brackets */
6300         }
6301         start_force(PL_curforce);
6302         if (PL_madskills) {
6303             curmad('X', newSVpvn(s-1,1));
6304             CURMAD('_', PL_thiswhite);
6305         }
6306         force_next(formbrack ? '.' : '}');
6307         if (formbrack) LEAVE;
6308 #ifdef PERL_MAD
6309         if (PL_madskills && !PL_thistoken)
6310             PL_thistoken = newSVpvs("");
6311 #endif
6312         if (formbrack == 2) { /* means . where arguments were expected */
6313             start_force(PL_curforce);
6314             force_next(';');
6315             TOKEN(FORMRBRACK);
6316         }
6317         TOKEN(';');
6318     case '&':
6319         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6320         s++;
6321         if (*s++ == '&') {
6322             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6323                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6324                 s -= 2;
6325                 TOKEN(0);
6326             }
6327             AOPERATOR(ANDAND);
6328         }
6329         s--;
6330         if (PL_expect == XOPERATOR) {
6331             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6332                 && isIDFIRST_lazy_if(s,UTF))
6333             {
6334                 CopLINE_dec(PL_curcop);
6335                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6336                 CopLINE_inc(PL_curcop);
6337             }
6338             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6339                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6340                 s--;
6341                 TOKEN(0);
6342             }
6343             PL_parser->saw_infix_sigil = 1;
6344             BAop(OP_BIT_AND);
6345         }
6346
6347         PL_tokenbuf[0] = '&';
6348         s = scan_ident(s - 1, PL_tokenbuf + 1,
6349                        sizeof PL_tokenbuf - 1, TRUE);
6350         if (PL_tokenbuf[1]) {
6351             PL_expect = XOPERATOR;
6352             force_ident_maybe_lex('&');
6353         }
6354         else
6355             PREREF('&');
6356         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6357         TERM('&');
6358
6359     case '|':
6360         s++;
6361         if (*s++ == '|') {
6362             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6363                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6364                 s -= 2;
6365                 TOKEN(0);
6366             }
6367             AOPERATOR(OROR);
6368         }
6369         s--;
6370         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6371                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6372             s--;
6373             TOKEN(0);
6374         }
6375         BOop(OP_BIT_OR);
6376     case '=':
6377         s++;
6378         {
6379             const char tmp = *s++;
6380             if (tmp == '=') {
6381                 if (!PL_lex_allbrackets &&
6382                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6383                     s -= 2;
6384                     TOKEN(0);
6385                 }
6386                 Eop(OP_EQ);
6387             }
6388             if (tmp == '>') {
6389                 if (!PL_lex_allbrackets &&
6390                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6391                     s -= 2;
6392                     TOKEN(0);
6393                 }
6394                 OPERATOR(',');
6395             }
6396             if (tmp == '~')
6397                 PMop(OP_MATCH);
6398             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6399                 && strchr("+-*/%.^&|<",tmp))
6400                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6401                             "Reversed %c= operator",(int)tmp);
6402             s--;
6403             if (PL_expect == XSTATE && isALPHA(tmp) &&
6404                 (s == PL_linestart+1 || s[-2] == '\n') )
6405                 {
6406                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6407                         || PL_lex_state != LEX_NORMAL) {
6408                         d = PL_bufend;
6409                         while (s < d) {
6410                             if (*s++ == '\n') {
6411                                 incline(s);
6412                                 if (strnEQ(s,"=cut",4)) {
6413                                     s = strchr(s,'\n');
6414                                     if (s)
6415                                         s++;
6416                                     else
6417                                         s = d;
6418                                     incline(s);
6419                                     goto retry;
6420                                 }
6421                             }
6422                         }
6423                         goto retry;
6424                     }
6425 #ifdef PERL_MAD
6426                     if (PL_madskills) {
6427                         if (!PL_thiswhite)
6428                             PL_thiswhite = newSVpvs("");
6429                         sv_catpvn(PL_thiswhite, PL_linestart,
6430                                   PL_bufend - PL_linestart);
6431                     }
6432 #endif
6433                     s = PL_bufend;
6434                     PL_parser->in_pod = 1;
6435                     goto retry;
6436                 }
6437         }
6438         if (PL_expect == XBLOCK) {
6439             const char *t = s;
6440 #ifdef PERL_STRICT_CR
6441             while (SPACE_OR_TAB(*t))
6442 #else
6443             while (SPACE_OR_TAB(*t) || *t == '\r')
6444 #endif
6445                 t++;
6446             if (*t == '\n' || *t == '#') {
6447                 formbrack = 1;
6448                 ENTER;
6449                 SAVEI8(PL_parser->form_lex_state);
6450                 SAVEI32(PL_lex_formbrack);
6451                 PL_parser->form_lex_state = PL_lex_state;
6452                 PL_lex_formbrack = PL_lex_brackets + 1;
6453                 goto leftbracket;
6454             }
6455         }
6456         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6457             s--;
6458             TOKEN(0);
6459         }
6460         pl_yylval.ival = 0;
6461         OPERATOR(ASSIGNOP);
6462     case '!':
6463         s++;
6464         {
6465             const char tmp = *s++;
6466             if (tmp == '=') {
6467                 /* was this !=~ where !~ was meant?
6468                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6469
6470                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6471                     const char *t = s+1;
6472
6473                     while (t < PL_bufend && isSPACE(*t))
6474                         ++t;
6475
6476                     if (*t == '/' || *t == '?' ||
6477                         ((*t == 'm' || *t == 's' || *t == 'y')
6478                          && !isWORDCHAR(t[1])) ||
6479                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6480                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6481                                     "!=~ should be !~");
6482                 }
6483                 if (!PL_lex_allbrackets &&
6484                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6485                     s -= 2;
6486                     TOKEN(0);
6487                 }
6488                 Eop(OP_NE);
6489             }
6490             if (tmp == '~')
6491                 PMop(OP_NOT);
6492         }
6493         s--;
6494         OPERATOR('!');
6495     case '<':
6496         if (PL_expect != XOPERATOR) {
6497             if (s[1] != '<' && !strchr(s,'>'))
6498                 check_uni();
6499             if (s[1] == '<')
6500                 s = scan_heredoc(s);
6501             else
6502                 s = scan_inputsymbol(s);
6503             PL_expect = XOPERATOR;
6504             TOKEN(sublex_start());
6505         }
6506         s++;
6507         {
6508             char tmp = *s++;
6509             if (tmp == '<') {
6510                 if (*s == '=' && !PL_lex_allbrackets &&
6511                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6512                     s -= 2;
6513                     TOKEN(0);
6514                 }
6515                 SHop(OP_LEFT_SHIFT);
6516             }
6517             if (tmp == '=') {
6518                 tmp = *s++;
6519                 if (tmp == '>') {
6520                     if (!PL_lex_allbrackets &&
6521                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6522                         s -= 3;
6523                         TOKEN(0);
6524                     }
6525                     Eop(OP_NCMP);
6526                 }
6527                 s--;
6528                 if (!PL_lex_allbrackets &&
6529                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6530                     s -= 2;
6531                     TOKEN(0);
6532                 }
6533                 Rop(OP_LE);
6534             }
6535         }
6536         s--;
6537         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6538             s--;
6539             TOKEN(0);
6540         }
6541         Rop(OP_LT);
6542     case '>':
6543         s++;
6544         {
6545             const char tmp = *s++;
6546             if (tmp == '>') {
6547                 if (*s == '=' && !PL_lex_allbrackets &&
6548                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6549                     s -= 2;
6550                     TOKEN(0);
6551                 }
6552                 SHop(OP_RIGHT_SHIFT);
6553             }
6554             else if (tmp == '=') {
6555                 if (!PL_lex_allbrackets &&
6556                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6557                     s -= 2;
6558                     TOKEN(0);
6559                 }
6560                 Rop(OP_GE);
6561             }
6562         }
6563         s--;
6564         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6565             s--;
6566             TOKEN(0);
6567         }
6568         Rop(OP_GT);
6569
6570     case '$':
6571         CLINE;
6572
6573         if (PL_expect == XOPERATOR) {
6574             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6575                 return deprecate_commaless_var_list();
6576             }
6577         }
6578         else if (PL_expect == XPOSTDEREF) {
6579             if (s[1] == '#') {
6580                 s++;
6581                 POSTDEREF(DOLSHARP);
6582             }
6583             POSTDEREF('$');
6584         }
6585
6586         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6587             PL_tokenbuf[0] = '@';
6588             s = scan_ident(s + 1, PL_tokenbuf + 1,
6589                            sizeof PL_tokenbuf - 1, FALSE);
6590             if (PL_expect == XOPERATOR)
6591                 no_op("Array length", s);
6592             if (!PL_tokenbuf[1])
6593                 PREREF(DOLSHARP);
6594             PL_expect = XOPERATOR;
6595             force_ident_maybe_lex('#');
6596             TOKEN(DOLSHARP);
6597         }
6598
6599         PL_tokenbuf[0] = '$';
6600         s = scan_ident(s, PL_tokenbuf + 1,
6601                        sizeof PL_tokenbuf - 1, FALSE);
6602         if (PL_expect == XOPERATOR)
6603             no_op("Scalar", s);
6604         if (!PL_tokenbuf[1]) {
6605             if (s == PL_bufend)
6606                 yyerror("Final $ should be \\$ or $name");
6607             PREREF('$');
6608         }
6609
6610         d = s;
6611         {
6612             const char tmp = *s;
6613             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6614                 s = SKIPSPACE1(s);
6615
6616             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6617                 && intuit_more(s)) {
6618                 if (*s == '[') {
6619                     PL_tokenbuf[0] = '@';
6620                     if (ckWARN(WARN_SYNTAX)) {
6621                         char *t = s+1;
6622
6623                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6624                             t++;
6625                         if (*t++ == ',') {
6626                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6627                             while (t < PL_bufend && *t != ']')
6628                                 t++;
6629                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6630                                         "Multidimensional syntax %.*s not supported",
6631                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6632                         }
6633                     }
6634                 }
6635                 else if (*s == '{') {
6636                     char *t;
6637                     PL_tokenbuf[0] = '%';
6638                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6639                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6640                         {
6641                             char tmpbuf[sizeof PL_tokenbuf];
6642                             do {
6643                                 t++;
6644                             } while (isSPACE(*t));
6645                             if (isIDFIRST_lazy_if(t,UTF)) {
6646                                 STRLEN len;
6647                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6648                                               &len);
6649                                 while (isSPACE(*t))
6650                                     t++;
6651                                 if (*t == ';'
6652                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6653                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6654                                         "You need to quote \"%"UTF8f"\"",
6655                                          UTF8fARG(UTF, len, tmpbuf));
6656                             }
6657                         }
6658                 }
6659             }
6660
6661             PL_expect = XOPERATOR;
6662             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6663                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6664                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6665                     PL_expect = XOPERATOR;
6666                 else if (strchr("$@\"'`q", *s))
6667                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6668                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6669                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6670                 else if (isIDFIRST_lazy_if(s,UTF)) {
6671                     char tmpbuf[sizeof PL_tokenbuf];
6672                     int t2;
6673                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6674                     if ((t2 = keyword(tmpbuf, len, 0))) {
6675                         /* binary operators exclude handle interpretations */
6676                         switch (t2) {
6677                         case -KEY_x:
6678                         case -KEY_eq:
6679                         case -KEY_ne:
6680                         case -KEY_gt:
6681                         case -KEY_lt:
6682                         case -KEY_ge:
6683                         case -KEY_le:
6684                         case -KEY_cmp:
6685                             break;
6686                         default:
6687                             PL_expect = XTERM;  /* e.g. print $fh length() */
6688                             break;
6689                         }
6690                     }
6691                     else {
6692                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6693                     }
6694                 }
6695                 else if (isDIGIT(*s))
6696                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6697                 else if (*s == '.' && isDIGIT(s[1]))
6698                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6699                 else if ((*s == '?' || *s == '-' || *s == '+')
6700                          && !isSPACE(s[1]) && s[1] != '=')
6701                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6702                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6703                          && s[1] != '/')
6704                     PL_expect = XTERM;          /* e.g. print $fh /.../
6705                                                    XXX except DORDOR operator
6706                                                 */
6707                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6708                          && s[2] != '=')
6709                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6710             }
6711         }
6712         force_ident_maybe_lex('$');
6713         TOKEN('$');
6714
6715     case '@':
6716         if (PL_expect == XOPERATOR)
6717             no_op("Array", s);
6718         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6719         PL_tokenbuf[0] = '@';
6720         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6721         pl_yylval.ival = 0;
6722         if (!PL_tokenbuf[1]) {
6723             PREREF('@');
6724         }
6725         if (PL_lex_state == LEX_NORMAL)
6726             s = SKIPSPACE1(s);
6727         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6728             if (*s == '{')
6729                 PL_tokenbuf[0] = '%';
6730
6731             /* Warn about @ where they meant $. */
6732             if (*s == '[' || *s == '{') {
6733                 if (ckWARN(WARN_SYNTAX)) {
6734                     S_check_scalar_slice(aTHX_ s);
6735                 }
6736             }
6737         }
6738         PL_expect = XOPERATOR;
6739         force_ident_maybe_lex('@');
6740         TERM('@');
6741
6742      case '/':                  /* may be division, defined-or, or pattern */
6743         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6744             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6745                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6746                 TOKEN(0);
6747             s += 2;
6748             AOPERATOR(DORDOR);
6749         }
6750      case '?':                  /* may either be conditional or pattern */
6751         if (PL_expect == XOPERATOR) {
6752              char tmp = *s++;
6753              if(tmp == '?') {
6754                 if (!PL_lex_allbrackets &&
6755                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6756                     s--;
6757                     TOKEN(0);
6758                 }
6759                 PL_lex_allbrackets++;
6760                 OPERATOR('?');
6761              }
6762              else {
6763                  tmp = *s++;
6764                  if(tmp == '/') {
6765                      /* A // operator. */
6766                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6767                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6768                                             LEX_FAKEEOF_LOGIC)) {
6769                         s -= 2;
6770                         TOKEN(0);
6771                     }
6772                     AOPERATOR(DORDOR);
6773                  }
6774                  else {
6775                      s--;
6776                      if (*s == '=' && !PL_lex_allbrackets &&
6777                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6778                          s--;
6779                          TOKEN(0);
6780                      }
6781                      Mop(OP_DIVIDE);
6782                  }
6783              }
6784          }
6785          else {
6786              /* Disable warning on "study /blah/" */
6787              if (PL_oldoldbufptr == PL_last_uni
6788               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6789                   || memNE(PL_last_uni, "study", 5)
6790                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6791               ))
6792                  check_uni();
6793              if (*s == '?')
6794                  deprecate("?PATTERN? without explicit operator");
6795              s = scan_pat(s,OP_MATCH);
6796              TERM(sublex_start());
6797          }
6798
6799     case '.':
6800         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6801 #ifdef PERL_STRICT_CR
6802             && s[1] == '\n'
6803 #else
6804             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6805 #endif
6806             && (s == PL_linestart || s[-1] == '\n') )
6807         {
6808             PL_expect = XSTATE;
6809             formbrack = 2; /* dot seen where arguments expected */
6810             goto rightbracket;
6811         }
6812         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6813             s += 3;
6814             OPERATOR(YADAYADA);
6815         }
6816         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6817             char tmp = *s++;
6818             if (*s == tmp) {
6819                 if (!PL_lex_allbrackets &&
6820                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6821                     s--;
6822                     TOKEN(0);
6823                 }
6824                 s++;
6825                 if (*s == tmp) {
6826                     s++;
6827                     pl_yylval.ival = OPf_SPECIAL;
6828                 }
6829                 else
6830                     pl_yylval.ival = 0;
6831                 OPERATOR(DOTDOT);
6832             }
6833             if (*s == '=' && !PL_lex_allbrackets &&
6834                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6835                 s--;
6836                 TOKEN(0);
6837             }
6838             Aop(OP_CONCAT);
6839         }
6840         /* FALL THROUGH */
6841     case '0': case '1': case '2': case '3': case '4':
6842     case '5': case '6': case '7': case '8': case '9':
6843         s = scan_num(s, &pl_yylval);
6844         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6845         if (PL_expect == XOPERATOR)
6846             no_op("Number",s);
6847         TERM(THING);
6848
6849     case '\'':
6850         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6851         COPLINE_SET_FROM_MULTI_END;
6852         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6853         if (PL_expect == XOPERATOR) {
6854             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6855                 return deprecate_commaless_var_list();
6856             }
6857             else
6858                 no_op("String",s);
6859         }
6860         if (!s)
6861             missingterm(NULL);
6862         pl_yylval.ival = OP_CONST;
6863         TERM(sublex_start());
6864
6865     case '"':
6866         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6867         DEBUG_T( {
6868             if (s)
6869                 printbuf("### Saw string before %s\n", s);
6870             else
6871                 PerlIO_printf(Perl_debug_log,
6872                              "### Saw unterminated string\n");
6873         } );
6874         if (PL_expect == XOPERATOR) {
6875             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6876                 return deprecate_commaless_var_list();
6877             }
6878             else
6879                 no_op("String",s);
6880         }
6881         if (!s)
6882             missingterm(NULL);
6883         pl_yylval.ival = OP_CONST;
6884         /* FIXME. I think that this can be const if char *d is replaced by
6885            more localised variables.  */
6886         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6887             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6888                 pl_yylval.ival = OP_STRINGIFY;
6889                 break;
6890             }
6891         }
6892         if (pl_yylval.ival == OP_CONST)
6893             COPLINE_SET_FROM_MULTI_END;
6894         TERM(sublex_start());
6895
6896     case '`':
6897         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6898         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6899         if (PL_expect == XOPERATOR)
6900             no_op("Backticks",s);
6901         if (!s)
6902             missingterm(NULL);
6903         pl_yylval.ival = OP_BACKTICK;
6904         TERM(sublex_start());
6905
6906     case '\\':
6907         s++;
6908         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6909          && isDIGIT(*s))
6910             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6911                            *s, *s);
6912         if (PL_expect == XOPERATOR)
6913             no_op("Backslash",s);
6914         OPERATOR(REFGEN);
6915
6916     case 'v':
6917         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6918             char *start = s + 2;
6919             while (isDIGIT(*start) || *start == '_')
6920                 start++;
6921             if (*start == '.' && isDIGIT(start[1])) {
6922                 s = scan_num(s, &pl_yylval);
6923                 TERM(THING);
6924             }
6925             else if ((*start == ':' && start[1] == ':')
6926                   || (PL_expect == XSTATE && *start == ':'))
6927                 goto keylookup;
6928             else if (PL_expect == XSTATE) {
6929                 d = start;
6930                 while (d < PL_bufend && isSPACE(*d)) d++;
6931                 if (*d == ':') goto keylookup;
6932             }
6933             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6934             if (!isALPHA(*start) && (PL_expect == XTERM
6935                         || PL_expect == XREF || PL_expect == XSTATE
6936                         || PL_expect == XTERMORDORDOR)) {
6937                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6938                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6939                 if (!gv) {
6940                     s = scan_num(s, &pl_yylval);
6941                     TERM(THING);
6942                 }
6943             }
6944         }
6945         goto keylookup;
6946     case 'x':
6947         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6948             s++;
6949             Mop(OP_REPEAT);
6950         }
6951         goto keylookup;
6952
6953     case '_':
6954     case 'a': case 'A':
6955     case 'b': case 'B':
6956     case 'c': case 'C':
6957     case 'd': case 'D':
6958     case 'e': case 'E':
6959     case 'f': case 'F':
6960     case 'g': case 'G':
6961     case 'h': case 'H':
6962     case 'i': case 'I':
6963     case 'j': case 'J':
6964     case 'k': case 'K':
6965     case 'l': case 'L':
6966     case 'm': case 'M':
6967     case 'n': case 'N':
6968     case 'o': case 'O':
6969     case 'p': case 'P':
6970     case 'q': case 'Q':
6971     case 'r': case 'R':
6972     case 's': case 'S':
6973     case 't': case 'T':
6974     case 'u': case 'U':
6975               case 'V':
6976     case 'w': case 'W':
6977               case 'X':
6978     case 'y': case 'Y':
6979     case 'z': case 'Z':
6980
6981       keylookup: {
6982         bool anydelim;
6983         bool lex;
6984         I32 tmp;
6985         SV *sv;
6986         CV *cv;
6987         PADOFFSET off;
6988         OP *rv2cv_op;
6989
6990         lex = FALSE;
6991         orig_keyword = 0;
6992         off = 0;
6993         sv = NULL;
6994         cv = NULL;
6995         gv = NULL;
6996         gvp = NULL;
6997         rv2cv_op = NULL;
6998
6999         PL_bufptr = s;
7000         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7001
7002         /* Some keywords can be followed by any delimiter, including ':' */
7003         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7004
7005         /* x::* is just a word, unless x is "CORE" */
7006         if (!anydelim && *s == ':' && s[1] == ':') {
7007             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7008             goto just_a_word;
7009         }
7010
7011         d = s;
7012         while (d < PL_bufend && isSPACE(*d))
7013                 d++;    /* no comments skipped here, or s### is misparsed */
7014
7015         /* Is this a word before a => operator? */
7016         if (*d == '=' && d[1] == '>') {
7017           fat_arrow:
7018             CLINE;
7019             pl_yylval.opval
7020                 = (OP*)newSVOP(OP_CONST, 0,
7021                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7022             pl_yylval.opval->op_private = OPpCONST_BARE;
7023             TERM(WORD);
7024         }
7025
7026         /* Check for plugged-in keyword */
7027         {
7028             OP *o;
7029             int result;
7030             char *saved_bufptr = PL_bufptr;
7031             PL_bufptr = s;
7032             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7033             s = PL_bufptr;
7034             if (result == KEYWORD_PLUGIN_DECLINE) {
7035                 /* not a plugged-in keyword */
7036                 PL_bufptr = saved_bufptr;
7037             } else if (result == KEYWORD_PLUGIN_STMT) {
7038                 pl_yylval.opval = o;
7039                 CLINE;
7040                 PL_expect = XSTATE;
7041                 return REPORT(PLUGSTMT);
7042             } else if (result == KEYWORD_PLUGIN_EXPR) {
7043                 pl_yylval.opval = o;
7044                 CLINE;
7045                 PL_expect = XOPERATOR;
7046                 return REPORT(PLUGEXPR);
7047             } else {
7048                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7049                                         PL_tokenbuf);
7050             }
7051         }
7052
7053         /* Check for built-in keyword */
7054         tmp = keyword(PL_tokenbuf, len, 0);
7055
7056         /* Is this a label? */
7057         if (!anydelim && PL_expect == XSTATE
7058               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7059             s = d + 1;
7060             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7061             pl_yylval.pval[len] = '\0';
7062             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7063             CLINE;
7064             TOKEN(LABEL);
7065         }
7066
7067         /* Check for lexical sub */
7068         if (PL_expect != XOPERATOR) {
7069             char tmpbuf[sizeof PL_tokenbuf + 1];
7070             *tmpbuf = '&';
7071             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7072             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7073             if (off != NOT_IN_PAD) {
7074                 assert(off); /* we assume this is boolean-true below */
7075                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7076                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7077                     HEK * const stashname = HvNAME_HEK(stash);
7078                     sv = newSVhek(stashname);
7079                     sv_catpvs(sv, "::");
7080                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7081                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7082                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7083                                     SVt_PVCV);
7084                     off = 0;
7085                     if (!gv) {
7086                         sv_free(sv);
7087                         sv = NULL;
7088                         goto just_a_word;
7089                     }
7090                 }
7091                 else {
7092                     rv2cv_op = newOP(OP_PADANY, 0);
7093                     rv2cv_op->op_targ = off;
7094                     cv = find_lexical_cv(off);
7095                 }
7096                 lex = TRUE;
7097                 goto just_a_word;
7098             }
7099             off = 0;
7100         }
7101
7102         if (tmp < 0) {                  /* second-class keyword? */
7103             GV *ogv = NULL;     /* override (winner) */
7104             GV *hgv = NULL;     /* hidden (loser) */
7105             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7106                 CV *cv;
7107                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7108                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7109                                             SVt_PVCV)) &&
7110                     (cv = GvCVu(gv)))
7111                 {
7112                     if (GvIMPORTED_CV(gv))
7113                         ogv = gv;
7114                     else if (! CvMETHOD(cv))
7115                         hgv = gv;
7116                 }
7117                 if (!ogv &&
7118                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7119                                           len, FALSE)) &&
7120                     (gv = *gvp) && (
7121                         isGV_with_GP(gv)
7122                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
7123                             :   SvPCS_IMPORTED(gv)
7124                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7125                                          len, 0), 1)
7126                    ))
7127                 {
7128                     ogv = gv;
7129                 }
7130             }
7131             if (ogv) {
7132                 orig_keyword = tmp;
7133                 tmp = 0;                /* overridden by import or by GLOBAL */
7134             }
7135             else if (gv && !gvp
7136                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7137                      && GvCVu(gv))
7138             {
7139                 tmp = 0;                /* any sub overrides "weak" keyword */
7140             }
7141             else {                      /* no override */
7142                 tmp = -tmp;
7143                 if (tmp == KEY_dump) {
7144                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7145                                    "dump() better written as CORE::dump()");
7146                 }
7147                 gv = NULL;
7148                 gvp = 0;
7149                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7150                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7151                                    "Ambiguous call resolved as CORE::%s(), "
7152                                    "qualify as such or use &",
7153                                    GvENAME(hgv));
7154             }
7155         }
7156
7157         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7158          && (!anydelim || *s != '#')) {
7159             /* no override, and not s### either; skipspace is safe here
7160              * check for => on following line */
7161             bool arrow;
7162             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7163             STRLEN   soff = s         - SvPVX(PL_linestr);
7164             s = skipspace_flags(s, LEX_NO_INCLINE);
7165             arrow = *s == '=' && s[1] == '>';
7166             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7167             s         = SvPVX(PL_linestr) +   soff;
7168             if (arrow)
7169                 goto fat_arrow;
7170         }
7171
7172       reserved_word:
7173         switch (tmp) {
7174
7175         default:                        /* not a keyword */
7176             /* Trade off - by using this evil construction we can pull the
7177                variable gv into the block labelled keylookup. If not, then
7178                we have to give it function scope so that the goto from the
7179                earlier ':' case doesn't bypass the initialisation.  */
7180             if (0) {
7181             just_a_word_zero_gv:
7182                 sv = NULL;
7183                 cv = NULL;
7184                 gv = NULL;
7185                 gvp = NULL;
7186                 rv2cv_op = NULL;
7187                 orig_keyword = 0;
7188                 lex = 0;
7189                 off = 0;
7190             }
7191           just_a_word: {
7192                 int pkgname = 0;
7193                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7194                 const char penultchar =
7195                     lastchar && PL_bufptr - 2 >= PL_linestart
7196                          ? PL_bufptr[-2]
7197                          : 0;
7198 #ifdef PERL_MAD
7199                 SV *nextPL_nextwhite = 0;
7200 #endif
7201
7202
7203                 /* Get the rest if it looks like a package qualifier */
7204
7205                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7206                     STRLEN morelen;
7207                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7208                                   TRUE, &morelen);
7209                     if (!morelen)
7210                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7211                                 UTF8fARG(UTF, len, PL_tokenbuf),
7212                                 *s == '\'' ? "'" : "::");
7213                     len += morelen;
7214                     pkgname = 1;
7215                 }
7216
7217                 if (PL_expect == XOPERATOR) {
7218                     if (PL_bufptr == PL_linestart) {
7219                         CopLINE_dec(PL_curcop);
7220                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7221                         CopLINE_inc(PL_curcop);
7222                     }
7223                     else
7224                         no_op("Bareword",s);
7225                 }
7226
7227                 /* Look for a subroutine with this name in current package,
7228                    unless this is a lexical sub, or name is "Foo::",
7229                    in which case Foo is a bareword
7230                    (and a package name). */
7231
7232                 if (len > 2 && !PL_madskills &&
7233                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7234                 {
7235                     if (ckWARN(WARN_BAREWORD)
7236                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7237                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7238                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7239                            UTF8fARG(UTF, len, PL_tokenbuf));
7240                     len -= 2;
7241                     PL_tokenbuf[len] = '\0';
7242                     gv = NULL;
7243                     gvp = 0;
7244                 }
7245                 else {
7246                     if (!lex && !gv) {
7247                         /* Mustn't actually add anything to a symbol table.
7248                            But also don't want to "initialise" any placeholder
7249                            constants that might already be there into full
7250                            blown PVGVs with attached PVCV.  */
7251                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7252                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7253                                                SVt_PVCV);
7254                     }
7255                     len = 0;
7256                 }
7257
7258                 /* if we saw a global override before, get the right name */
7259
7260                 if (!sv)
7261                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7262                     len ? len : strlen(PL_tokenbuf));
7263                 if (gvp) {
7264                     SV * const tmp_sv = sv;
7265                     sv = newSVpvs("CORE::GLOBAL::");
7266                     sv_catsv(sv, tmp_sv);
7267                     SvREFCNT_dec(tmp_sv);
7268                 }
7269
7270 #ifdef PERL_MAD
7271                 if (PL_madskills && !PL_thistoken) {
7272                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7273                     PL_thistoken = newSVpvn(start,s - start);
7274                     PL_realtokenstart = s - SvPVX(PL_linestr);
7275                 }
7276 #endif
7277
7278                 /* Presume this is going to be a bareword of some sort. */
7279                 CLINE;
7280                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7281                 pl_yylval.opval->op_private = OPpCONST_BARE;
7282
7283                 /* And if "Foo::", then that's what it certainly is. */
7284                 if (len)
7285                     goto safe_bareword;
7286
7287                 if (!off)
7288                 {
7289                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7290                     const_op->op_private = OPpCONST_BARE;
7291                     rv2cv_op = newCVREF(0, const_op);
7292                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7293                 }
7294
7295                 /* See if it's the indirect object for a list operator. */
7296
7297                 if (PL_oldoldbufptr &&
7298                     PL_oldoldbufptr < PL_bufptr &&
7299                     (PL_oldoldbufptr == PL_last_lop
7300                      || PL_oldoldbufptr == PL_last_uni) &&
7301                     /* NO SKIPSPACE BEFORE HERE! */
7302                     (PL_expect == XREF ||
7303                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7304                 {
7305                     bool immediate_paren = *s == '(';
7306
7307                     /* (Now we can afford to cross potential line boundary.) */
7308                     s = SKIPSPACE2(s,nextPL_nextwhite);
7309 #ifdef PERL_MAD
7310                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7311 #endif
7312
7313                     /* Two barewords in a row may indicate method call. */
7314
7315                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7316                         (tmp = intuit_method(s, gv, cv))) {
7317                         op_free(rv2cv_op);
7318                         if (tmp == METHOD && !PL_lex_allbrackets &&
7319                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7320                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7321                         return REPORT(tmp);
7322                     }
7323
7324                     /* If not a declared subroutine, it's an indirect object. */
7325                     /* (But it's an indir obj regardless for sort.) */
7326                     /* Also, if "_" follows a filetest operator, it's a bareword */
7327
7328                     if (
7329                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7330                          (!cv &&
7331                         (PL_last_lop_op != OP_MAPSTART &&
7332                          PL_last_lop_op != OP_GREPSTART))))
7333                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7334                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7335                        )
7336                     {
7337                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7338                         goto bareword;
7339                     }
7340                 }
7341
7342                 PL_expect = XOPERATOR;
7343 #ifdef PERL_MAD
7344                 if (isSPACE(*s))
7345                     s = SKIPSPACE2(s,nextPL_nextwhite);
7346                 PL_nextwhite = nextPL_nextwhite;
7347 #else
7348                 s = skipspace(s);
7349 #endif
7350
7351                 /* Is this a word before a => operator? */
7352                 if (*s == '=' && s[1] == '>' && !pkgname) {
7353                     op_free(rv2cv_op);
7354                     CLINE;
7355                     /* This is our own scalar, created a few lines above,
7356                        so this is safe. */
7357                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7358                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7359                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7360                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7361                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7362                     TERM(WORD);
7363                 }
7364
7365                 /* If followed by a paren, it's certainly a subroutine. */
7366                 if (*s == '(') {
7367                     CLINE;
7368                     if (cv) {
7369                         d = s + 1;
7370                         while (SPACE_OR_TAB(*d))
7371                             d++;
7372                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7373                             s = d + 1;
7374                             goto its_constant;
7375                         }
7376                     }
7377 #ifdef PERL_MAD
7378                     if (PL_madskills) {
7379                         PL_nextwhite = PL_thiswhite;
7380                         PL_thiswhite = 0;
7381                     }
7382                     start_force(PL_curforce);
7383 #endif
7384                     NEXTVAL_NEXTTOKE.opval =
7385                         off ? rv2cv_op : pl_yylval.opval;
7386                     PL_expect = XOPERATOR;
7387 #ifdef PERL_MAD
7388                     if (PL_madskills) {
7389                         PL_nextwhite = nextPL_nextwhite;
7390                         curmad('X', PL_thistoken);
7391                         PL_thistoken = newSVpvs("");
7392                     }
7393 #endif
7394                     if (off)
7395                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7396                     else op_free(rv2cv_op),        force_next(WORD);
7397                     pl_yylval.ival = 0;
7398                     TOKEN('&');
7399                 }
7400
7401                 /* If followed by var or block, call it a method (unless sub) */
7402
7403                 if ((*s == '$' || *s == '{') && !cv) {
7404                     op_free(rv2cv_op);
7405                     PL_last_lop = PL_oldbufptr;
7406                     PL_last_lop_op = OP_METHOD;
7407                     if (!PL_lex_allbrackets &&
7408                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7409                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7410                     PREBLOCK(METHOD);
7411                 }
7412
7413                 /* If followed by a bareword, see if it looks like indir obj. */
7414
7415                 if (!orig_keyword
7416                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7417                         && (tmp = intuit_method(s, gv, cv))) {
7418                     op_free(rv2cv_op);
7419                     if (tmp == METHOD && !PL_lex_allbrackets &&
7420                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7421                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7422                     return REPORT(tmp);
7423                 }
7424
7425                 /* Not a method, so call it a subroutine (if defined) */
7426
7427                 if (cv) {
7428                     if (lastchar == '-' && penultchar != '-') {
7429                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7430                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7431                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7432                              UTF8fARG(UTF, l, PL_tokenbuf),
7433                              UTF8fARG(UTF, l, PL_tokenbuf));
7434                     }
7435                     /* Check for a constant sub */
7436                     if ((sv = cv_const_sv_or_av(cv))) {
7437                   its_constant:
7438                         op_free(rv2cv_op);
7439                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7440                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7441                         if (SvTYPE(sv) == SVt_PVAV)
7442                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7443                                                       pl_yylval.opval);
7444                         else {
7445                             pl_yylval.opval->op_private = 0;
7446                             pl_yylval.opval->op_folded = 1;
7447                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7448                         }
7449                         TOKEN(WORD);
7450                     }
7451
7452                     op_free(pl_yylval.opval);
7453                     pl_yylval.opval =
7454                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7455                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7456                     PL_last_lop = PL_oldbufptr;
7457                     PL_last_lop_op = OP_ENTERSUB;
7458                     /* Is there a prototype? */
7459                     if (
7460 #ifdef PERL_MAD
7461                         cv &&
7462 #endif
7463                         SvPOK(cv))
7464                     {
7465                         STRLEN protolen = CvPROTOLEN(cv);
7466                         const char *proto = CvPROTO(cv);
7467                         bool optional;
7468                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7469                         if (!protolen)
7470                             TERM(FUNC0SUB);
7471                         if ((optional = *proto == ';'))
7472                           do
7473                             proto++;
7474                           while (*proto == ';');
7475                         if (
7476                             (
7477                                 (
7478                                     *proto == '$' || *proto == '_'
7479                                  || *proto == '*' || *proto == '+'
7480                                 )
7481                              && proto[1] == '\0'
7482                             )
7483                          || (
7484                              *proto == '\\' && proto[1] && proto[2] == '\0'
7485                             )
7486                         )
7487                             UNIPROTO(UNIOPSUB,optional);
7488                         if (*proto == '\\' && proto[1] == '[') {
7489                             const char *p = proto + 2;
7490                             while(*p && *p != ']')
7491                                 ++p;
7492                             if(*p == ']' && !p[1])
7493                                 UNIPROTO(UNIOPSUB,optional);
7494                         }
7495                         if (*proto == '&' && *s == '{') {
7496                             if (PL_curstash)
7497                                 sv_setpvs(PL_subname, "__ANON__");
7498                             else
7499                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7500                             if (!PL_lex_allbrackets &&
7501                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7502                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7503                             PREBLOCK(LSTOPSUB);
7504                         }
7505                     }
7506 #ifdef PERL_MAD
7507                     {
7508                         if (PL_madskills) {
7509                             PL_nextwhite = PL_thiswhite;
7510                             PL_thiswhite = 0;
7511                         }
7512                         start_force(PL_curforce);
7513                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7514                         PL_expect = XTERM;
7515                         if (PL_madskills) {
7516                             PL_nextwhite = nextPL_nextwhite;
7517                             curmad('X', PL_thistoken);
7518                             PL_thistoken = newSVpvs("");
7519                         }
7520                         force_next(off ? PRIVATEREF : WORD);
7521                         if (!PL_lex_allbrackets &&
7522                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7523                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7524                         TOKEN(NOAMP);
7525                     }
7526                 }
7527
7528                 /* Guess harder when madskills require "best effort". */
7529                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7530                     int probable_sub = 0;
7531                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7532                         probable_sub = 1;
7533                     else if (isALPHA(*s)) {
7534                         char tmpbuf[1024];
7535                         STRLEN tmplen;
7536                         d = s;
7537                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7538                         if (!keyword(tmpbuf, tmplen, 0))
7539                             probable_sub = 1;
7540                         else {
7541                             while (d < PL_bufend && isSPACE(*d))
7542                                 d++;
7543                             if (*d == '=' && d[1] == '>')
7544                                 probable_sub = 1;
7545                         }
7546                     }
7547                     if (probable_sub) {
7548                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7549                                         SVt_PVCV);
7550                         op_free(pl_yylval.opval);
7551                         pl_yylval.opval =
7552                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7553                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7554                         PL_last_lop = PL_oldbufptr;
7555                         PL_last_lop_op = OP_ENTERSUB;
7556                         PL_nextwhite = PL_thiswhite;
7557                         PL_thiswhite = 0;
7558                         start_force(PL_curforce);
7559                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7560                         PL_expect = XTERM;
7561                         PL_nextwhite = nextPL_nextwhite;
7562                         curmad('X', PL_thistoken);
7563                         PL_thistoken = newSVpvs("");
7564                         force_next(off ? PRIVATEREF : WORD);
7565                         if (!PL_lex_allbrackets &&
7566                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7567                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7568                         TOKEN(NOAMP);
7569                     }
7570 #else
7571                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7572                     PL_expect = XTERM;
7573                     force_next(off ? PRIVATEREF : WORD);
7574                     if (!PL_lex_allbrackets &&
7575                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7576                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7577                     TOKEN(NOAMP);
7578 #endif
7579                 }
7580
7581                 /* Call it a bare word */
7582
7583                 if (PL_hints & HINT_STRICT_SUBS)
7584                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7585                 else {
7586                 bareword:
7587                     /* after "print" and similar functions (corresponding to
7588                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7589                      * a filehandle should be subject to "strict subs".
7590                      * Likewise for the optional indirect-object argument to system
7591                      * or exec, which can't be a bareword */
7592                     if ((PL_last_lop_op == OP_PRINT
7593                             || PL_last_lop_op == OP_PRTF
7594                             || PL_last_lop_op == OP_SAY
7595                             || PL_last_lop_op == OP_SYSTEM
7596                             || PL_last_lop_op == OP_EXEC)
7597                             && (PL_hints & HINT_STRICT_SUBS))
7598                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7599                     if (lastchar != '-') {
7600                         if (ckWARN(WARN_RESERVED)) {
7601                             d = PL_tokenbuf;
7602                             while (isLOWER(*d))
7603                                 d++;
7604                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7605                             {
7606                                 /* PL_warn_reserved is constant */
7607                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7608                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7609                                        PL_tokenbuf);
7610                                 GCC_DIAG_RESTORE;
7611                             }
7612                         }
7613                     }
7614                 }
7615                 op_free(rv2cv_op);
7616
7617             safe_bareword:
7618                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7619                  && saw_infix_sigil) {
7620                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7621                                      "Operator or semicolon missing before %c%"UTF8f,
7622                                      lastchar,
7623                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7624                                               PL_tokenbuf));
7625                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7626                                      "Ambiguous use of %c resolved as operator %c",
7627                                      lastchar, lastchar);
7628                 }
7629                 TOKEN(WORD);
7630             }
7631
7632         case KEY___FILE__:
7633             FUN0OP(
7634                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7635             );
7636
7637         case KEY___LINE__:
7638             FUN0OP(
7639                 (OP*)newSVOP(OP_CONST, 0,
7640                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7641             );
7642
7643         case KEY___PACKAGE__:
7644             FUN0OP(
7645                 (OP*)newSVOP(OP_CONST, 0,
7646                                         (PL_curstash
7647                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7648                                          : &PL_sv_undef))
7649             );
7650
7651         case KEY___DATA__:
7652         case KEY___END__: {
7653             GV *gv;
7654             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7655                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7656                                         ? PL_curstash
7657                                         : PL_defstash;
7658                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7659                 if (!isGV(gv))
7660                     gv_init(gv,stash,"DATA",4,0);
7661                 GvMULTI_on(gv);
7662                 if (!GvIO(gv))
7663                     GvIOp(gv) = newIO();
7664                 IoIFP(GvIOp(gv)) = PL_rsfp;
7665 #if defined(HAS_FCNTL) && defined(F_SETFD)
7666                 {
7667                     const int fd = PerlIO_fileno(PL_rsfp);
7668                     fcntl(fd,F_SETFD,fd >= 3);
7669                 }
7670 #endif
7671                 /* Mark this internal pseudo-handle as clean */
7672                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7673                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7674                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7675                 else
7676                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7677 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7678                 /* if the script was opened in binmode, we need to revert
7679                  * it to text mode for compatibility; but only iff it has CRs
7680                  * XXX this is a questionable hack at best. */
7681                 if (PL_bufend-PL_bufptr > 2
7682                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7683                 {
7684                     Off_t loc = 0;
7685                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7686                         loc = PerlIO_tell(PL_rsfp);
7687                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7688                     }
7689 #ifdef NETWARE
7690                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7691 #else
7692                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7693 #endif  /* NETWARE */
7694                         if (loc > 0)
7695                             PerlIO_seek(PL_rsfp, loc, 0);
7696                     }
7697                 }
7698 #endif
7699 #ifdef PERLIO_LAYERS
7700                 if (!IN_BYTES) {
7701                     if (UTF)
7702                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7703                     else if (PL_encoding) {
7704                         SV *name;
7705                         dSP;
7706                         ENTER;
7707                         SAVETMPS;
7708                         PUSHMARK(sp);
7709                         XPUSHs(PL_encoding);
7710                         PUTBACK;
7711                         call_method("name", G_SCALAR);
7712                         SPAGAIN;
7713                         name = POPs;
7714                         PUTBACK;
7715                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7716                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7717                                                       SVfARG(name)));
7718                         FREETMPS;
7719                         LEAVE;
7720                     }
7721                 }
7722 #endif
7723 #ifdef PERL_MAD
7724                 if (PL_madskills) {
7725                     if (PL_realtokenstart >= 0) {
7726                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7727                         if (!PL_endwhite)
7728                             PL_endwhite = newSVpvs("");
7729                         sv_catsv(PL_endwhite, PL_thiswhite);
7730                         PL_thiswhite = 0;
7731                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7732                         PL_realtokenstart = -1;
7733                     }
7734                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7735                            != NULL) ;
7736                 }
7737 #endif
7738                 PL_rsfp = NULL;
7739             }
7740             goto fake_eof;
7741         }
7742
7743         case KEY___SUB__:
7744             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7745
7746         case KEY_AUTOLOAD:
7747         case KEY_DESTROY:
7748         case KEY_BEGIN:
7749         case KEY_UNITCHECK:
7750         case KEY_CHECK:
7751         case KEY_INIT:
7752         case KEY_END:
7753             if (PL_expect == XSTATE) {
7754                 s = PL_bufptr;
7755                 goto really_sub;
7756             }
7757             goto just_a_word;
7758
7759         case_KEY_CORE:
7760             {
7761                 STRLEN olen = len;
7762                 d = s;
7763                 s += 2;
7764                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7765                 if ((*s == ':' && s[1] == ':')
7766                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7767                 {
7768                     s = d;
7769                     len = olen;
7770                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7771                     goto just_a_word;
7772                 }
7773                 if (!tmp)
7774                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7775                                       UTF8fARG(UTF, len, PL_tokenbuf));
7776                 if (tmp < 0)
7777                     tmp = -tmp;
7778                 else if (tmp == KEY_require || tmp == KEY_do
7779                       || tmp == KEY_glob)
7780                     /* that's a way to remember we saw "CORE::" */
7781                     orig_keyword = tmp;
7782                 goto reserved_word;
7783             }
7784
7785         case KEY_abs:
7786             UNI(OP_ABS);
7787
7788         case KEY_alarm:
7789             UNI(OP_ALARM);
7790
7791         case KEY_accept:
7792             LOP(OP_ACCEPT,XTERM);
7793
7794         case KEY_and:
7795             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7796                 return REPORT(0);
7797             OPERATOR(ANDOP);
7798
7799         case KEY_atan2:
7800             LOP(OP_ATAN2,XTERM);
7801
7802         case KEY_bind:
7803             LOP(OP_BIND,XTERM);
7804
7805         case KEY_binmode:
7806             LOP(OP_BINMODE,XTERM);
7807
7808         case KEY_bless:
7809             LOP(OP_BLESS,XTERM);
7810
7811         case KEY_break:
7812             FUN0(OP_BREAK);
7813
7814         case KEY_chop:
7815             UNI(OP_CHOP);
7816
7817         case KEY_continue:
7818                     /* We have to disambiguate the two senses of
7819                       "continue". If the next token is a '{' then
7820                       treat it as the start of a continue block;
7821                       otherwise treat it as a control operator.
7822                      */
7823                     s = skipspace(s);
7824                     if (*s == '{')
7825             PREBLOCK(CONTINUE);
7826                     else
7827                         FUN0(OP_CONTINUE);
7828
7829         case KEY_chdir:
7830             /* may use HOME */
7831             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7832             UNI(OP_CHDIR);
7833
7834         case KEY_close:
7835             UNI(OP_CLOSE);
7836
7837         case KEY_closedir:
7838             UNI(OP_CLOSEDIR);
7839
7840         case KEY_cmp:
7841             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7842                 return REPORT(0);
7843             Eop(OP_SCMP);
7844
7845         case KEY_caller:
7846             UNI(OP_CALLER);
7847
7848         case KEY_crypt:
7849 #ifdef FCRYPT
7850             if (!PL_cryptseen) {
7851                 PL_cryptseen = TRUE;
7852                 init_des();
7853             }
7854 #endif
7855             LOP(OP_CRYPT,XTERM);
7856
7857         case KEY_chmod:
7858             LOP(OP_CHMOD,XTERM);
7859
7860         case KEY_chown:
7861             LOP(OP_CHOWN,XTERM);
7862
7863         case KEY_connect:
7864             LOP(OP_CONNECT,XTERM);
7865
7866         case KEY_chr:
7867             UNI(OP_CHR);
7868
7869         case KEY_cos:
7870             UNI(OP_COS);
7871
7872         case KEY_chroot:
7873             UNI(OP_CHROOT);
7874
7875         case KEY_default:
7876             PREBLOCK(DEFAULT);
7877
7878         case KEY_do:
7879             s = SKIPSPACE1(s);
7880             if (*s == '{')
7881                 PRETERMBLOCK(DO);
7882             if (*s != '\'') {
7883                 *PL_tokenbuf = '&';
7884                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7885                               1, &len);
7886                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7887                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7888                     d = SKIPSPACE1(d);
7889                     if (*d == '(') {
7890                         force_ident_maybe_lex('&');
7891                         s = d;
7892                     }
7893                 }
7894             }
7895             if (orig_keyword == KEY_do) {
7896                 orig_keyword = 0;
7897                 pl_yylval.ival = 1;
7898             }
7899             else
7900                 pl_yylval.ival = 0;
7901             OPERATOR(DO);
7902
7903         case KEY_die:
7904             PL_hints |= HINT_BLOCK_SCOPE;
7905             LOP(OP_DIE,XTERM);
7906
7907         case KEY_defined:
7908             UNI(OP_DEFINED);
7909
7910         case KEY_delete:
7911             UNI(OP_DELETE);
7912
7913         case KEY_dbmopen:
7914             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7915                               STR_WITH_LEN("NDBM_File::"),
7916                               STR_WITH_LEN("DB_File::"),
7917                               STR_WITH_LEN("GDBM_File::"),
7918                               STR_WITH_LEN("SDBM_File::"),
7919                               STR_WITH_LEN("ODBM_File::"),
7920                               NULL);
7921             LOP(OP_DBMOPEN,XTERM);
7922
7923         case KEY_dbmclose:
7924             UNI(OP_DBMCLOSE);
7925
7926         case KEY_dump:
7927             PL_expect = XOPERATOR;
7928             s = force_word(s,WORD,TRUE,FALSE);
7929             LOOPX(OP_DUMP);
7930
7931         case KEY_else:
7932             PREBLOCK(ELSE);
7933
7934         case KEY_elsif:
7935             pl_yylval.ival = CopLINE(PL_curcop);
7936             OPERATOR(ELSIF);
7937
7938         case KEY_eq:
7939             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7940                 return REPORT(0);
7941             Eop(OP_SEQ);
7942
7943         case KEY_exists:
7944             UNI(OP_EXISTS);
7945
7946         case KEY_exit:
7947             if (PL_madskills)
7948                 UNI(OP_INT);
7949             UNI(OP_EXIT);
7950
7951         case KEY_eval:
7952             s = SKIPSPACE1(s);
7953             if (*s == '{') { /* block eval */
7954                 PL_expect = XTERMBLOCK;
7955                 UNIBRACK(OP_ENTERTRY);
7956             }
7957             else { /* string eval */
7958                 PL_expect = XTERM;
7959                 UNIBRACK(OP_ENTEREVAL);
7960             }
7961
7962         case KEY_evalbytes:
7963             PL_expect = XTERM;
7964             UNIBRACK(-OP_ENTEREVAL);
7965
7966         case KEY_eof:
7967             UNI(OP_EOF);
7968
7969         case KEY_exp:
7970             UNI(OP_EXP);
7971
7972         case KEY_each:
7973             UNI(OP_EACH);
7974
7975         case KEY_exec:
7976             LOP(OP_EXEC,XREF);
7977
7978         case KEY_endhostent:
7979             FUN0(OP_EHOSTENT);
7980
7981         case KEY_endnetent:
7982             FUN0(OP_ENETENT);
7983
7984         case KEY_endservent:
7985             FUN0(OP_ESERVENT);
7986
7987         case KEY_endprotoent:
7988             FUN0(OP_EPROTOENT);
7989
7990         case KEY_endpwent:
7991             FUN0(OP_EPWENT);
7992
7993         case KEY_endgrent:
7994             FUN0(OP_EGRENT);
7995
7996         case KEY_for:
7997         case KEY_foreach:
7998             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7999                 return REPORT(0);
8000             pl_yylval.ival = CopLINE(PL_curcop);
8001             s = SKIPSPACE1(s);
8002             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8003                 char *p = s;
8004 #ifdef PERL_MAD
8005                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8006 #endif
8007
8008                 if ((PL_bufend - p) >= 3 &&
8009                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8010                     p += 2;
8011                 else if ((PL_bufend - p) >= 4 &&
8012                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8013                     p += 3;
8014                 p = PEEKSPACE(p);
8015                 /* skip optional package name, as in "for my abc $x (..)" */
8016                 if (isIDFIRST_lazy_if(p,UTF)) {
8017                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8018                     p = PEEKSPACE(p);
8019                 }
8020                 if (*p != '$')
8021                     Perl_croak(aTHX_ "Missing $ on loop variable");
8022 #ifdef PERL_MAD
8023                 s = SvPVX(PL_linestr) + soff;
8024 #endif
8025             }
8026             OPERATOR(FOR);
8027
8028         case KEY_formline:
8029             LOP(OP_FORMLINE,XTERM);
8030
8031         case KEY_fork:
8032             FUN0(OP_FORK);
8033
8034         case KEY_fc:
8035             UNI(OP_FC);
8036
8037         case KEY_fcntl:
8038             LOP(OP_FCNTL,XTERM);
8039
8040         case KEY_fileno:
8041             UNI(OP_FILENO);
8042
8043         case KEY_flock:
8044             LOP(OP_FLOCK,XTERM);
8045
8046         case KEY_gt:
8047             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8048                 return REPORT(0);
8049             Rop(OP_SGT);
8050
8051         case KEY_ge:
8052             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8053                 return REPORT(0);
8054             Rop(OP_SGE);
8055
8056         case KEY_grep:
8057             LOP(OP_GREPSTART, XREF);
8058
8059         case KEY_goto:
8060             PL_expect = XOPERATOR;
8061             s = force_word(s,WORD,TRUE,FALSE);
8062             LOOPX(OP_GOTO);
8063
8064         case KEY_gmtime:
8065             UNI(OP_GMTIME);
8066
8067         case KEY_getc:
8068             UNIDOR(OP_GETC);
8069
8070         case KEY_getppid:
8071             FUN0(OP_GETPPID);
8072
8073         case KEY_getpgrp:
8074             UNI(OP_GETPGRP);
8075
8076         case KEY_getpriority:
8077             LOP(OP_GETPRIORITY,XTERM);
8078
8079         case KEY_getprotobyname:
8080             UNI(OP_GPBYNAME);
8081
8082         case KEY_getprotobynumber:
8083             LOP(OP_GPBYNUMBER,XTERM);
8084
8085         case KEY_getprotoent:
8086             FUN0(OP_GPROTOENT);
8087
8088         case KEY_getpwent:
8089             FUN0(OP_GPWENT);
8090
8091         case KEY_getpwnam:
8092             UNI(OP_GPWNAM);
8093
8094         case KEY_getpwuid:
8095             UNI(OP_GPWUID);
8096
8097         case KEY_getpeername:
8098             UNI(OP_GETPEERNAME);
8099
8100         case KEY_gethostbyname:
8101             UNI(OP_GHBYNAME);
8102
8103         case KEY_gethostbyaddr:
8104             LOP(OP_GHBYADDR,XTERM);
8105
8106         case KEY_gethostent:
8107             FUN0(OP_GHOSTENT);
8108
8109         case KEY_getnetbyname:
8110             UNI(OP_GNBYNAME);
8111
8112         case KEY_getnetbyaddr:
8113             LOP(OP_GNBYADDR,XTERM);
8114
8115         case KEY_getnetent:
8116             FUN0(OP_GNETENT);
8117
8118         case KEY_getservbyname:
8119             LOP(OP_GSBYNAME,XTERM);
8120
8121         case KEY_getservbyport:
8122             LOP(OP_GSBYPORT,XTERM);
8123
8124         case KEY_getservent:
8125             FUN0(OP_GSERVENT);
8126
8127         case KEY_getsockname:
8128             UNI(OP_GETSOCKNAME);
8129
8130         case KEY_getsockopt:
8131             LOP(OP_GSOCKOPT,XTERM);
8132
8133         case KEY_getgrent:
8134             FUN0(OP_GGRENT);
8135
8136         case KEY_getgrnam:
8137             UNI(OP_GGRNAM);
8138
8139         case KEY_getgrgid:
8140             UNI(OP_GGRGID);
8141
8142         case KEY_getlogin:
8143             FUN0(OP_GETLOGIN);
8144
8145         case KEY_given:
8146             pl_yylval.ival = CopLINE(PL_curcop);
8147             Perl_ck_warner_d(aTHX_
8148                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8149                 "given is experimental");
8150             OPERATOR(GIVEN);
8151
8152         case KEY_glob:
8153             LOP(
8154              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8155              XTERM
8156             );
8157
8158         case KEY_hex:
8159             UNI(OP_HEX);
8160
8161         case KEY_if:
8162             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8163                 return REPORT(0);
8164             pl_yylval.ival = CopLINE(PL_curcop);
8165             OPERATOR(IF);
8166
8167         case KEY_index:
8168             LOP(OP_INDEX,XTERM);
8169
8170         case KEY_int:
8171             UNI(OP_INT);
8172
8173         case KEY_ioctl:
8174             LOP(OP_IOCTL,XTERM);
8175
8176         case KEY_join:
8177             LOP(OP_JOIN,XTERM);
8178
8179         case KEY_keys:
8180             UNI(OP_KEYS);
8181
8182         case KEY_kill:
8183             LOP(OP_KILL,XTERM);
8184
8185         case KEY_last:
8186             PL_expect = XOPERATOR;
8187             s = force_word(s,WORD,TRUE,FALSE);
8188             LOOPX(OP_LAST);
8189
8190         case KEY_lc:
8191             UNI(OP_LC);
8192
8193         case KEY_lcfirst:
8194             UNI(OP_LCFIRST);
8195
8196         case KEY_local:
8197             pl_yylval.ival = 0;
8198             OPERATOR(LOCAL);
8199
8200         case KEY_length:
8201             UNI(OP_LENGTH);
8202
8203         case KEY_lt:
8204             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8205                 return REPORT(0);
8206             Rop(OP_SLT);
8207
8208         case KEY_le:
8209             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8210                 return REPORT(0);
8211             Rop(OP_SLE);
8212
8213         case KEY_localtime:
8214             UNI(OP_LOCALTIME);
8215
8216         case KEY_log:
8217             UNI(OP_LOG);
8218
8219         case KEY_link:
8220             LOP(OP_LINK,XTERM);
8221
8222         case KEY_listen:
8223             LOP(OP_LISTEN,XTERM);
8224
8225         case KEY_lock:
8226             UNI(OP_LOCK);
8227
8228         case KEY_lstat:
8229             UNI(OP_LSTAT);
8230
8231         case KEY_m:
8232             s = scan_pat(s,OP_MATCH);
8233             TERM(sublex_start());
8234
8235         case KEY_map:
8236             LOP(OP_MAPSTART, XREF);
8237
8238         case KEY_mkdir:
8239             LOP(OP_MKDIR,XTERM);
8240
8241         case KEY_msgctl:
8242             LOP(OP_MSGCTL,XTERM);
8243
8244         case KEY_msgget:
8245             LOP(OP_MSGGET,XTERM);
8246
8247         case KEY_msgrcv:
8248             LOP(OP_MSGRCV,XTERM);
8249
8250         case KEY_msgsnd:
8251             LOP(OP_MSGSND,XTERM);
8252
8253         case KEY_our:
8254         case KEY_my:
8255         case KEY_state:
8256             PL_in_my = (U16)tmp;
8257             s = SKIPSPACE1(s);
8258             if (isIDFIRST_lazy_if(s,UTF)) {
8259 #ifdef PERL_MAD
8260                 char* start = s;
8261 #endif
8262                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8263                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8264                 {
8265                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8266                         Perl_croak(aTHX_
8267                                   "Experimental \"%s\" subs not enabled",
8268                                    tmp == KEY_my    ? "my"    :
8269                                    tmp == KEY_state ? "state" : "our");
8270                     Perl_ck_warner_d(aTHX_
8271                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8272                         "The lexical_subs feature is experimental");
8273                     goto really_sub;
8274                 }
8275                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8276                 if (!PL_in_my_stash) {
8277                     char tmpbuf[1024];
8278                     PL_bufptr = s;
8279                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8280                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8281                 }
8282 #ifdef PERL_MAD
8283                 if (PL_madskills) {     /* just add type to declarator token */
8284                     sv_catsv(PL_thistoken, PL_nextwhite);
8285                     PL_nextwhite = 0;
8286                     sv_catpvn(PL_thistoken, start, s - start);
8287                 }
8288 #endif
8289             }
8290             pl_yylval.ival = 1;
8291             OPERATOR(MY);
8292
8293         case KEY_next:
8294             PL_expect = XOPERATOR;
8295             s = force_word(s,WORD,TRUE,FALSE);
8296             LOOPX(OP_NEXT);
8297
8298         case KEY_ne:
8299             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8300                 return REPORT(0);
8301             Eop(OP_SNE);
8302
8303         case KEY_no:
8304             s = tokenize_use(0, s);
8305             TERM(USE);
8306
8307         case KEY_not:
8308             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8309                 FUN1(OP_NOT);
8310             else {
8311                 if (!PL_lex_allbrackets &&
8312                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8313                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8314                 OPERATOR(NOTOP);
8315             }
8316
8317         case KEY_open:
8318             s = SKIPSPACE1(s);
8319             if (isIDFIRST_lazy_if(s,UTF)) {
8320           const char *t;
8321           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8322               &len);
8323                 for (t=d; isSPACE(*t);)
8324                     t++;
8325                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8326                     /* [perl #16184] */
8327                     && !(t[0] == '=' && t[1] == '>')
8328                     && !(t[0] == ':' && t[1] == ':')
8329                     && !keyword(s, d-s, 0)
8330                 ) {
8331                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8332                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8333                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8334                 }
8335             }
8336             LOP(OP_OPEN,XTERM);
8337
8338         case KEY_or:
8339             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8340                 return REPORT(0);
8341             pl_yylval.ival = OP_OR;
8342             OPERATOR(OROP);
8343
8344         case KEY_ord:
8345             UNI(OP_ORD);
8346
8347         case KEY_oct:
8348             UNI(OP_OCT);
8349
8350         case KEY_opendir:
8351             LOP(OP_OPEN_DIR,XTERM);
8352
8353         case KEY_print:
8354             checkcomma(s,PL_tokenbuf,"filehandle");
8355             LOP(OP_PRINT,XREF);
8356
8357         case KEY_printf:
8358             checkcomma(s,PL_tokenbuf,"filehandle");
8359             LOP(OP_PRTF,XREF);
8360
8361         case KEY_prototype:
8362             UNI(OP_PROTOTYPE);
8363
8364         case KEY_push:
8365             LOP(OP_PUSH,XTERM);
8366
8367         case KEY_pop:
8368             UNIDOR(OP_POP);
8369
8370         case KEY_pos:
8371             UNIDOR(OP_POS);
8372
8373         case KEY_pack:
8374             LOP(OP_PACK,XTERM);
8375
8376         case KEY_package:
8377             s = force_word(s,WORD,FALSE,TRUE);
8378             s = SKIPSPACE1(s);
8379             s = force_strict_version(s);
8380             PL_lex_expect = XBLOCK;
8381             OPERATOR(PACKAGE);
8382
8383         case KEY_pipe:
8384             LOP(OP_PIPE_OP,XTERM);
8385
8386         case KEY_q:
8387             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8388             COPLINE_SET_FROM_MULTI_END;
8389             if (!s)
8390                 missingterm(NULL);
8391             pl_yylval.ival = OP_CONST;
8392             TERM(sublex_start());
8393
8394         case KEY_quotemeta:
8395             UNI(OP_QUOTEMETA);
8396
8397         case KEY_qw: {
8398             OP *words = NULL;
8399             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8400             COPLINE_SET_FROM_MULTI_END;
8401             if (!s)
8402                 missingterm(NULL);
8403             PL_expect = XOPERATOR;
8404             if (SvCUR(PL_lex_stuff)) {
8405                 int warned_comma = !ckWARN(WARN_QW);
8406                 int warned_comment = warned_comma;
8407                 d = SvPV_force(PL_lex_stuff, len);
8408                 while (len) {
8409                     for (; isSPACE(*d) && len; --len, ++d)
8410                         /**/;
8411                     if (len) {
8412                         SV *sv;
8413                         const char *b = d;
8414                         if (!warned_comma || !warned_comment) {
8415                             for (; !isSPACE(*d) && len; --len, ++d) {
8416                                 if (!warned_comma && *d == ',') {
8417                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8418                                         "Possible attempt to separate words with commas");
8419                                     ++warned_comma;
8420                                 }
8421                                 else if (!warned_comment && *d == '#') {
8422                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8423                                         "Possible attempt to put comments in qw() list");
8424                                     ++warned_comment;
8425                                 }
8426                             }
8427                         }
8428                         else {
8429                             for (; !isSPACE(*d) && len; --len, ++d)
8430                                 /**/;
8431                         }
8432                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8433                         words = op_append_elem(OP_LIST, words,
8434                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8435                     }
8436                 }
8437             }
8438             if (!words)
8439                 words = newNULLLIST();
8440             if (PL_lex_stuff) {
8441                 SvREFCNT_dec(PL_lex_stuff);
8442                 PL_lex_stuff = NULL;
8443             }
8444             PL_expect = XOPERATOR;
8445             pl_yylval.opval = sawparens(words);
8446             TOKEN(QWLIST);
8447         }
8448
8449         case KEY_qq:
8450             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8451             if (!s)
8452                 missingterm(NULL);
8453             pl_yylval.ival = OP_STRINGIFY;
8454             if (SvIVX(PL_lex_stuff) == '\'')
8455                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8456             TERM(sublex_start());
8457
8458         case KEY_qr:
8459             s = scan_pat(s,OP_QR);
8460             TERM(sublex_start());
8461
8462         case KEY_qx:
8463             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8464             if (!s)
8465                 missingterm(NULL);
8466             pl_yylval.ival = OP_BACKTICK;
8467             TERM(sublex_start());
8468
8469         case KEY_return:
8470             OLDLOP(OP_RETURN);
8471
8472         case KEY_require:
8473             s = SKIPSPACE1(s);
8474             PL_expect = XOPERATOR;
8475             if (isDIGIT(*s)) {
8476                 s = force_version(s, FALSE);
8477             }
8478             else if (*s != 'v' || !isDIGIT(s[1])
8479                     || (s = force_version(s, TRUE), *s == 'v'))
8480             {
8481                 *PL_tokenbuf = '\0';
8482                 s = force_word(s,WORD,TRUE,TRUE);
8483                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8484                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8485                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8486                 else if (*s == '<')
8487                     yyerror("<> should be quotes");
8488             }
8489             if (orig_keyword == KEY_require) {
8490                 orig_keyword = 0;
8491                 pl_yylval.ival = 1;
8492             }
8493             else
8494                 pl_yylval.ival = 0;
8495             PL_expect = XTERM;
8496             PL_bufptr = s;
8497             PL_last_uni = PL_oldbufptr;
8498             PL_last_lop_op = OP_REQUIRE;
8499             s = skipspace(s);
8500             return REPORT( (int)REQUIRE );
8501
8502         case KEY_reset:
8503             UNI(OP_RESET);
8504
8505         case KEY_redo:
8506             PL_expect = XOPERATOR;
8507             s = force_word(s,WORD,TRUE,FALSE);
8508             LOOPX(OP_REDO);
8509
8510         case KEY_rename:
8511             LOP(OP_RENAME,XTERM);
8512
8513         case KEY_rand:
8514             UNI(OP_RAND);
8515
8516         case KEY_rmdir:
8517             UNI(OP_RMDIR);
8518
8519         case KEY_rindex:
8520             LOP(OP_RINDEX,XTERM);
8521
8522         case KEY_read:
8523             LOP(OP_READ,XTERM);
8524
8525         case KEY_readdir:
8526             UNI(OP_READDIR);
8527
8528         case KEY_readline:
8529             UNIDOR(OP_READLINE);
8530
8531         case KEY_readpipe:
8532             UNIDOR(OP_BACKTICK);
8533
8534         case KEY_rewinddir:
8535             UNI(OP_REWINDDIR);
8536
8537         case KEY_recv:
8538             LOP(OP_RECV,XTERM);
8539
8540         case KEY_reverse:
8541             LOP(OP_REVERSE,XTERM);
8542
8543         case KEY_readlink:
8544             UNIDOR(OP_READLINK);
8545
8546         case KEY_ref:
8547             UNI(OP_REF);
8548
8549         case KEY_s:
8550             s = scan_subst(s);
8551             if (pl_yylval.opval)
8552                 TERM(sublex_start());
8553             else
8554                 TOKEN(1);       /* force error */
8555
8556         case KEY_say:
8557             checkcomma(s,PL_tokenbuf,"filehandle");
8558             LOP(OP_SAY,XREF);
8559
8560         case KEY_chomp:
8561             UNI(OP_CHOMP);
8562
8563         case KEY_scalar:
8564             UNI(OP_SCALAR);
8565
8566         case KEY_select:
8567             LOP(OP_SELECT,XTERM);
8568
8569         case KEY_seek:
8570             LOP(OP_SEEK,XTERM);
8571
8572         case KEY_semctl:
8573             LOP(OP_SEMCTL,XTERM);
8574
8575         case KEY_semget:
8576             LOP(OP_SEMGET,XTERM);
8577
8578         case KEY_semop:
8579             LOP(OP_SEMOP,XTERM);
8580
8581         case KEY_send:
8582             LOP(OP_SEND,XTERM);
8583
8584         case KEY_setpgrp:
8585             LOP(OP_SETPGRP,XTERM);
8586
8587         case KEY_setpriority:
8588             LOP(OP_SETPRIORITY,XTERM);
8589
8590         case KEY_sethostent:
8591             UNI(OP_SHOSTENT);
8592
8593         case KEY_setnetent:
8594             UNI(OP_SNETENT);
8595
8596         case KEY_setservent:
8597             UNI(OP_SSERVENT);
8598
8599         case KEY_setprotoent:
8600             UNI(OP_SPROTOENT);
8601
8602         case KEY_setpwent:
8603             FUN0(OP_SPWENT);
8604
8605         case KEY_setgrent:
8606             FUN0(OP_SGRENT);
8607
8608         case KEY_seekdir:
8609             LOP(OP_SEEKDIR,XTERM);
8610
8611         case KEY_setsockopt:
8612             LOP(OP_SSOCKOPT,XTERM);
8613
8614         case KEY_shift:
8615             UNIDOR(OP_SHIFT);
8616
8617         case KEY_shmctl:
8618             LOP(OP_SHMCTL,XTERM);
8619
8620         case KEY_shmget:
8621             LOP(OP_SHMGET,XTERM);
8622
8623         case KEY_shmread:
8624             LOP(OP_SHMREAD,XTERM);
8625
8626         case KEY_shmwrite:
8627             LOP(OP_SHMWRITE,XTERM);
8628
8629         case KEY_shutdown:
8630             LOP(OP_SHUTDOWN,XTERM);
8631
8632         case KEY_sin:
8633             UNI(OP_SIN);
8634
8635         case KEY_sleep:
8636             UNI(OP_SLEEP);
8637
8638         case KEY_socket:
8639             LOP(OP_SOCKET,XTERM);
8640
8641         case KEY_socketpair:
8642             LOP(OP_SOCKPAIR,XTERM);
8643
8644         case KEY_sort:
8645             checkcomma(s,PL_tokenbuf,"subroutine name");
8646             s = SKIPSPACE1(s);
8647             PL_expect = XTERM;
8648             s = force_word(s,WORD,TRUE,TRUE);
8649             LOP(OP_SORT,XREF);
8650
8651         case KEY_split:
8652             LOP(OP_SPLIT,XTERM);
8653
8654         case KEY_sprintf:
8655             LOP(OP_SPRINTF,XTERM);
8656
8657         case KEY_splice:
8658             LOP(OP_SPLICE,XTERM);
8659
8660         case KEY_sqrt:
8661             UNI(OP_SQRT);
8662
8663         case KEY_srand:
8664             UNI(OP_SRAND);
8665
8666         case KEY_stat:
8667             UNI(OP_STAT);
8668
8669         case KEY_study:
8670             UNI(OP_STUDY);
8671
8672         case KEY_substr:
8673             LOP(OP_SUBSTR,XTERM);
8674
8675         case KEY_format:
8676         case KEY_sub:
8677           really_sub:
8678             {
8679                 char * const tmpbuf = PL_tokenbuf + 1;
8680                 expectation attrful;
8681                 bool have_name, have_proto;
8682                 const int key = tmp;
8683 #ifndef PERL_MAD
8684                 SV *format_name = NULL;
8685 #endif
8686
8687 #ifdef PERL_MAD
8688                 SV *tmpwhite = 0;
8689
8690                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8691                 SV *subtoken = PL_madskills
8692                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8693                    : NULL;
8694                 PL_thistoken = 0;
8695
8696                 d = s;
8697                 s = SKIPSPACE2(s,tmpwhite);
8698 #else
8699                 d = s;
8700                 s = skipspace(s);
8701 #endif
8702
8703                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8704                     (*s == ':' && s[1] == ':'))
8705                 {
8706 #ifdef PERL_MAD
8707                     SV *nametoke = NULL;
8708 #endif
8709
8710                     PL_expect = XBLOCK;
8711                     attrful = XATTRBLOCK;
8712                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8713                                   &len);
8714 #ifdef PERL_MAD
8715                     if (PL_madskills)
8716                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8717 #else
8718                     if (key == KEY_format)
8719                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8720 #endif
8721                     *PL_tokenbuf = '&';
8722                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8723                      || pad_findmy_pvn(
8724                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8725                         ) != NOT_IN_PAD)
8726                         sv_setpvn(PL_subname, tmpbuf, len);
8727                     else {
8728                         sv_setsv(PL_subname,PL_curstname);
8729                         sv_catpvs(PL_subname,"::");
8730                         sv_catpvn(PL_subname,tmpbuf,len);
8731                     }
8732                     if (SvUTF8(PL_linestr))
8733                         SvUTF8_on(PL_subname);
8734                     have_name = TRUE;
8735
8736
8737 #ifdef PERL_MAD
8738                     start_force(0);
8739                     CURMAD('X', nametoke);
8740                     CURMAD('_', tmpwhite);
8741                     force_ident_maybe_lex('&');
8742
8743                     s = SKIPSPACE2(d,tmpwhite);
8744 #else
8745                     s = skipspace(d);
8746 #endif
8747                 }
8748                 else {
8749                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8750                     {
8751                         *d = '\0';
8752                         /* diag_listed_as: Missing name in "%s sub" */
8753                         Perl_croak(aTHX_
8754                                   "Missing name in \"%s\"", PL_bufptr);
8755                     }
8756                     PL_expect = XTERMBLOCK;
8757                     attrful = XATTRTERM;
8758                     sv_setpvs(PL_subname,"?");
8759                     have_name = FALSE;
8760                 }
8761
8762                 if (key == KEY_format) {
8763 #ifdef PERL_MAD
8764                     PL_thistoken = subtoken;
8765                     s = d;
8766 #else
8767                     if (format_name) {
8768                         start_force(PL_curforce);
8769                         NEXTVAL_NEXTTOKE.opval
8770                             = (OP*)newSVOP(OP_CONST,0, format_name);
8771                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8772                         force_next(WORD);
8773                     }
8774 #endif
8775                     PREBLOCK(FORMAT);
8776                 }
8777
8778                 /* Look for a prototype */
8779                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8780                     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8781                     COPLINE_SET_FROM_MULTI_END;
8782                     if (!s)
8783                         Perl_croak(aTHX_ "Prototype not terminated");
8784                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8785                     have_proto = TRUE;
8786
8787 #ifdef PERL_MAD
8788                     start_force(0);
8789                     CURMAD('q', PL_thisopen);
8790                     CURMAD('_', tmpwhite);
8791                     CURMAD('=', PL_thisstuff);
8792                     CURMAD('Q', PL_thisclose);
8793                     NEXTVAL_NEXTTOKE.opval =
8794                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8795                     PL_lex_stuff = NULL;
8796                     force_next(THING);
8797
8798                     s = SKIPSPACE2(s,tmpwhite);
8799 #else
8800                     s = skipspace(s);
8801 #endif
8802                 }
8803                 else
8804                     have_proto = FALSE;
8805
8806                 if (*s == ':' && s[1] != ':')
8807                     PL_expect = attrful;
8808                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8809                     if (!have_name)
8810                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8811                     else if (*s != ';' && *s != '}')
8812                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8813                 }
8814
8815 #ifdef PERL_MAD
8816                 start_force(0);
8817                 if (tmpwhite) {
8818                     if (PL_madskills)
8819                         curmad('^', newSVpvs(""));
8820                     CURMAD('_', tmpwhite);
8821                 }
8822                 force_next(0);
8823
8824                 PL_thistoken = subtoken;
8825                 PERL_UNUSED_VAR(have_proto);
8826 #else
8827                 if (have_proto) {
8828                     NEXTVAL_NEXTTOKE.opval =
8829                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8830                     PL_lex_stuff = NULL;
8831                     force_next(THING);
8832                 }
8833 #endif
8834                 if (!have_name) {
8835                     if (PL_curstash)
8836                         sv_setpvs(PL_subname, "__ANON__");
8837                     else
8838                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8839                     TOKEN(ANONSUB);
8840                 }
8841 #ifndef PERL_MAD
8842                 force_ident_maybe_lex('&');
8843 #endif
8844                 TOKEN(SUB);
8845             }
8846
8847         case KEY_system:
8848             LOP(OP_SYSTEM,XREF);
8849
8850         case KEY_symlink:
8851             LOP(OP_SYMLINK,XTERM);
8852
8853         case KEY_syscall:
8854             LOP(OP_SYSCALL,XTERM);
8855
8856         case KEY_sysopen:
8857             LOP(OP_SYSOPEN,XTERM);
8858
8859         case KEY_sysseek:
8860             LOP(OP_SYSSEEK,XTERM);
8861
8862         case KEY_sysread:
8863             LOP(OP_SYSREAD,XTERM);
8864
8865         case KEY_syswrite:
8866             LOP(OP_SYSWRITE,XTERM);
8867
8868         case KEY_tr:
8869         case KEY_y:
8870             s = scan_trans(s);
8871             TERM(sublex_start());
8872
8873         case KEY_tell:
8874             UNI(OP_TELL);
8875
8876         case KEY_telldir:
8877             UNI(OP_TELLDIR);
8878
8879         case KEY_tie:
8880             LOP(OP_TIE,XTERM);
8881
8882         case KEY_tied:
8883             UNI(OP_TIED);
8884
8885         case KEY_time:
8886             FUN0(OP_TIME);
8887
8888         case KEY_times:
8889             FUN0(OP_TMS);
8890
8891         case KEY_truncate:
8892             LOP(OP_TRUNCATE,XTERM);
8893
8894         case KEY_uc:
8895             UNI(OP_UC);
8896
8897         case KEY_ucfirst:
8898             UNI(OP_UCFIRST);
8899
8900         case KEY_untie:
8901             UNI(OP_UNTIE);
8902
8903         case KEY_until:
8904             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8905                 return REPORT(0);
8906             pl_yylval.ival = CopLINE(PL_curcop);
8907             OPERATOR(UNTIL);
8908
8909         case KEY_unless:
8910             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8911                 return REPORT(0);
8912             pl_yylval.ival = CopLINE(PL_curcop);
8913             OPERATOR(UNLESS);
8914
8915         case KEY_unlink:
8916             LOP(OP_UNLINK,XTERM);
8917
8918         case KEY_undef:
8919             UNIDOR(OP_UNDEF);
8920
8921         case KEY_unpack:
8922             LOP(OP_UNPACK,XTERM);
8923
8924         case KEY_utime:
8925             LOP(OP_UTIME,XTERM);
8926
8927         case KEY_umask:
8928             UNIDOR(OP_UMASK);
8929
8930         case KEY_unshift:
8931             LOP(OP_UNSHIFT,XTERM);
8932
8933         case KEY_use:
8934             s = tokenize_use(1, s);
8935             OPERATOR(USE);
8936
8937         case KEY_values:
8938             UNI(OP_VALUES);
8939
8940         case KEY_vec:
8941             LOP(OP_VEC,XTERM);
8942
8943         case KEY_when:
8944             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8945                 return REPORT(0);
8946             pl_yylval.ival = CopLINE(PL_curcop);
8947             Perl_ck_warner_d(aTHX_
8948                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8949                 "when is experimental");
8950             OPERATOR(WHEN);
8951
8952         case KEY_while:
8953             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8954                 return REPORT(0);
8955             pl_yylval.ival = CopLINE(PL_curcop);
8956             OPERATOR(WHILE);
8957
8958         case KEY_warn:
8959             PL_hints |= HINT_BLOCK_SCOPE;
8960             LOP(OP_WARN,XTERM);
8961
8962         case KEY_wait:
8963             FUN0(OP_WAIT);
8964
8965         case KEY_waitpid:
8966             LOP(OP_WAITPID,XTERM);
8967
8968         case KEY_wantarray:
8969             FUN0(OP_WANTARRAY);
8970
8971         case KEY_write:
8972             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8973              * we use the same number on EBCDIC */
8974             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8975             UNI(OP_ENTERWRITE);
8976
8977         case KEY_x:
8978             if (PL_expect == XOPERATOR) {
8979                 if (*s == '=' && !PL_lex_allbrackets &&
8980                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8981                     return REPORT(0);
8982                 Mop(OP_REPEAT);
8983             }
8984             check_uni();
8985             goto just_a_word;
8986
8987         case KEY_xor:
8988             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8989                 return REPORT(0);
8990             pl_yylval.ival = OP_XOR;
8991             OPERATOR(OROP);
8992         }
8993     }}
8994 }
8995
8996 /*
8997   S_pending_ident
8998
8999   Looks up an identifier in the pad or in a package
9000
9001   Returns:
9002     PRIVATEREF if this is a lexical name.
9003     WORD       if this belongs to a package.
9004
9005   Structure:
9006       if we're in a my declaration
9007           croak if they tried to say my($foo::bar)
9008           build the ops for a my() declaration
9009       if it's an access to a my() variable
9010           build ops for access to a my() variable
9011       if in a dq string, and they've said @foo and we can't find @foo
9012           warn
9013       build ops for a bareword
9014 */
9015
9016 static int
9017 S_pending_ident(pTHX)
9018 {
9019     dVAR;
9020     PADOFFSET tmp = 0;
9021     const char pit = (char)pl_yylval.ival;
9022     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9023     /* All routes through this function want to know if there is a colon.  */
9024     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9025
9026     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9027           "### Pending identifier '%s'\n", PL_tokenbuf); });
9028
9029     /* if we're in a my(), we can't allow dynamics here.
9030        $foo'bar has already been turned into $foo::bar, so
9031        just check for colons.
9032
9033        if it's a legal name, the OP is a PADANY.
9034     */
9035     if (PL_in_my) {
9036         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9037             if (has_colon)
9038                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9039                                   "variable %s in \"our\"",
9040                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9041             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9042         }
9043         else {
9044             if (has_colon) {
9045                 /* PL_no_myglob is constant */
9046                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9047                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9048                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9049                             UTF ? SVf_UTF8 : 0);
9050                 GCC_DIAG_RESTORE;
9051             }
9052
9053             pl_yylval.opval = newOP(OP_PADANY, 0);
9054             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9055                                                         UTF ? SVf_UTF8 : 0);
9056             return PRIVATEREF;
9057         }
9058     }
9059
9060     /*
9061        build the ops for accesses to a my() variable.
9062     */
9063
9064     if (!has_colon) {
9065         if (!PL_in_my)
9066             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9067                                     UTF ? SVf_UTF8 : 0);
9068         if (tmp != NOT_IN_PAD) {
9069             /* might be an "our" variable" */
9070             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9071                 /* build ops for a bareword */
9072                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9073                 HEK * const stashname = HvNAME_HEK(stash);
9074                 SV *  const sym = newSVhek(stashname);
9075                 sv_catpvs(sym, "::");
9076                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9077                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9078                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9079                 if (pit != '&')
9080                   gv_fetchsv(sym,
9081                     (PL_in_eval
9082                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9083                         : GV_ADDMULTI
9084                     ),
9085                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9086                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9087                      : SVt_PVHV));
9088                 return WORD;
9089             }
9090
9091             pl_yylval.opval = newOP(OP_PADANY, 0);
9092             pl_yylval.opval->op_targ = tmp;
9093             return PRIVATEREF;
9094         }
9095     }
9096
9097     /*
9098        Whine if they've said @foo in a doublequoted string,
9099        and @foo isn't a variable we can find in the symbol
9100        table.
9101     */
9102     if (ckWARN(WARN_AMBIGUOUS) &&
9103         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9104         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9105                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9106         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9107                 /* DO NOT warn for @- and @+ */
9108                 && !( PL_tokenbuf[2] == '\0' &&
9109                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9110            )
9111         {
9112             /* Downgraded from fatal to warning 20000522 mjd */
9113             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9114                         "Possible unintended interpolation of %"UTF8f
9115                         " in string",
9116                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9117         }
9118     }
9119
9120     /* build ops for a bareword */
9121     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9122                                    newSVpvn_flags(PL_tokenbuf + 1,
9123                                                       tokenbuf_len - 1,
9124                                                       UTF ? SVf_UTF8 : 0 ));
9125     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9126     if (pit != '&')
9127         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9128                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9129                      | ( UTF ? SVf_UTF8 : 0 ),
9130                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9131                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9132                       : SVt_PVHV));
9133     return WORD;
9134 }
9135
9136 STATIC void
9137 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9138 {
9139     dVAR;
9140
9141     PERL_ARGS_ASSERT_CHECKCOMMA;
9142
9143     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9144         if (ckWARN(WARN_SYNTAX)) {
9145             int level = 1;
9146             const char *w;
9147             for (w = s+2; *w && level; w++) {
9148                 if (*w == '(')
9149                     ++level;
9150                 else if (*w == ')')
9151                     --level;
9152             }
9153             while (isSPACE(*w))
9154                 ++w;
9155             /* the list of chars below is for end of statements or
9156              * block / parens, boolean operators (&&, ||, //) and branch
9157              * constructs (or, and, if, until, unless, while, err, for).
9158              * Not a very solid hack... */
9159             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9160                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9161                             "%s (...) interpreted as function",name);
9162         }
9163     }
9164     while (s < PL_bufend && isSPACE(*s))
9165         s++;
9166     if (*s == '(')
9167         s++;
9168     while (s < PL_bufend && isSPACE(*s))
9169         s++;
9170     if (isIDFIRST_lazy_if(s,UTF)) {
9171         const char * const w = s;
9172         s += UTF ? UTF8SKIP(s) : 1;
9173         while (isWORDCHAR_lazy_if(s,UTF))
9174             s += UTF ? UTF8SKIP(s) : 1;
9175         while (s < PL_bufend && isSPACE(*s))
9176             s++;
9177         if (*s == ',') {
9178             GV* gv;
9179             if (keyword(w, s - w, 0))
9180                 return;
9181
9182             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9183             if (gv && GvCVu(gv))
9184                 return;
9185             Perl_croak(aTHX_ "No comma allowed after %s", what);
9186         }
9187     }
9188 }
9189
9190 /* S_new_constant(): do any overload::constant lookup.
9191
9192    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9193    Best used as sv=new_constant(..., sv, ...).
9194    If s, pv are NULL, calls subroutine with one argument,
9195    and <type> is used with error messages only.
9196    <type> is assumed to be well formed UTF-8 */
9197
9198 STATIC SV *
9199 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9200                SV *sv, SV *pv, const char *type, STRLEN typelen)
9201 {
9202     dVAR; dSP;
9203     HV * table = GvHV(PL_hintgv);                /* ^H */
9204     SV *res;
9205     SV *errsv = NULL;
9206     SV **cvp;
9207     SV *cv, *typesv;
9208     const char *why1 = "", *why2 = "", *why3 = "";
9209
9210     PERL_ARGS_ASSERT_NEW_CONSTANT;
9211     /* We assume that this is true: */
9212     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9213     assert(type || s);
9214
9215     /* charnames doesn't work well if there have been errors found */
9216     if (PL_error_count > 0 && *key == 'c')
9217     {
9218         SvREFCNT_dec_NN(sv);
9219         return &PL_sv_undef;
9220     }
9221
9222     sv_2mortal(sv);                     /* Parent created it permanently */
9223     if (!table
9224         || ! (PL_hints & HINT_LOCALIZE_HH)
9225         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9226         || ! SvOK(*cvp))
9227     {
9228         char *msg;
9229
9230         /* Here haven't found what we're looking for.  If it is charnames,
9231          * perhaps it needs to be loaded.  Try doing that before giving up */
9232         if (*key == 'c') {
9233             Perl_load_module(aTHX_
9234                             0,
9235                             newSVpvs("_charnames"),
9236                              /* version parameter; no need to specify it, as if
9237                               * we get too early a version, will fail anyway,
9238                               * not being able to find '_charnames' */
9239                             NULL,
9240                             newSVpvs(":full"),
9241                             newSVpvs(":short"),
9242                             NULL);
9243             assert(sp == PL_stack_sp);
9244             table = GvHV(PL_hintgv);
9245             if (table
9246                 && (PL_hints & HINT_LOCALIZE_HH)
9247                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9248                 && SvOK(*cvp))
9249             {
9250                 goto now_ok;
9251             }
9252         }
9253         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9254             msg = Perl_form(aTHX_
9255                                "Constant(%.*s) unknown",
9256                                 (int)(type ? typelen : len),
9257                                 (type ? type: s));
9258         }
9259         else {
9260             why1 = "$^H{";
9261             why2 = key;
9262             why3 = "} is not defined";
9263         report:
9264             if (*key == 'c') {
9265                 msg = Perl_form(aTHX_
9266                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9267                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9268                       );
9269             }
9270             else {
9271                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9272                                     (int)(type ? typelen : len),
9273                                     (type ? type: s), why1, why2, why3);
9274             }
9275         }
9276         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9277         return SvREFCNT_inc_simple_NN(sv);
9278     }
9279 now_ok:
9280     cv = *cvp;
9281     if (!pv && s)
9282         pv = newSVpvn_flags(s, len, SVs_TEMP);
9283     if (type && pv)
9284         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9285     else
9286         typesv = &PL_sv_undef;
9287
9288     PUSHSTACKi(PERLSI_OVERLOAD);
9289     ENTER ;
9290     SAVETMPS;
9291
9292     PUSHMARK(SP) ;
9293     EXTEND(sp, 3);
9294     if (pv)
9295         PUSHs(pv);
9296     PUSHs(sv);
9297     if (pv)
9298         PUSHs(typesv);
9299     PUTBACK;
9300     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9301
9302     SPAGAIN ;
9303
9304     /* Check the eval first */
9305     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9306         STRLEN errlen;
9307         const char * errstr;
9308         sv_catpvs(errsv, "Propagated");
9309         errstr = SvPV_const(errsv, errlen);
9310         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9311         (void)POPs;
9312         res = SvREFCNT_inc_simple_NN(sv);
9313     }
9314     else {
9315         res = POPs;
9316         SvREFCNT_inc_simple_void_NN(res);
9317     }
9318
9319     PUTBACK ;
9320     FREETMPS ;
9321     LEAVE ;
9322     POPSTACK;
9323
9324     if (!SvOK(res)) {
9325         why1 = "Call to &{$^H{";
9326         why2 = key;
9327         why3 = "}} did not return a defined value";
9328         sv = res;
9329         (void)sv_2mortal(sv);
9330         goto report;
9331     }
9332
9333     return res;
9334 }
9335
9336 PERL_STATIC_INLINE void
9337 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9338     dVAR;
9339     PERL_ARGS_ASSERT_PARSE_IDENT;
9340
9341     for (;;) {
9342         if (*d >= e)
9343             Perl_croak(aTHX_ "%s", ident_too_long);
9344         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9345              /* The UTF-8 case must come first, otherwise things
9346              * like c\N{COMBINING TILDE} would start failing, as the
9347              * isWORDCHAR_A case below would gobble the 'c' up.
9348              */
9349
9350             char *t = *s + UTF8SKIP(*s);
9351             while (isIDCONT_utf8((U8*)t))
9352                 t += UTF8SKIP(t);
9353             if (*d + (t - *s) > e)
9354                 Perl_croak(aTHX_ "%s", ident_too_long);
9355             Copy(*s, *d, t - *s, char);
9356             *d += t - *s;
9357             *s = t;
9358         }
9359         else if ( isWORDCHAR_A(**s) ) {
9360             do {
9361                 *(*d)++ = *(*s)++;
9362             } while (isWORDCHAR_A(**s) && *d < e);
9363         }
9364         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9365             *(*d)++ = ':';
9366             *(*d)++ = ':';
9367             (*s)++;
9368         }
9369         else if (allow_package && **s == ':' && (*s)[1] == ':'
9370            /* Disallow things like Foo::$bar. For the curious, this is
9371             * the code path that triggers the "Bad name after" warning
9372             * when looking for barewords.
9373             */
9374            && (*s)[2] != '$') {
9375             *(*d)++ = *(*s)++;
9376             *(*d)++ = *(*s)++;
9377         }
9378         else
9379             break;
9380     }
9381     return;
9382 }
9383
9384 /* Returns a NUL terminated string, with the length of the string written to
9385    *slp
9386    */
9387 STATIC char *
9388 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9389 {
9390     dVAR;
9391     char *d = dest;
9392     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9393     bool is_utf8 = cBOOL(UTF);
9394
9395     PERL_ARGS_ASSERT_SCAN_WORD;
9396
9397     parse_ident(&s, &d, e, allow_package, is_utf8);
9398     *d = '\0';
9399     *slp = d - dest;
9400     return s;
9401 }
9402
9403 STATIC char *
9404 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9405 {
9406     dVAR;
9407     I32 herelines = PL_parser->herelines;
9408     SSize_t bracket = -1;
9409     char funny = *s++;
9410     char *d = dest;
9411     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9412     bool is_utf8 = cBOOL(UTF);
9413     I32 orig_copline = 0, tmp_copline = 0;
9414
9415     PERL_ARGS_ASSERT_SCAN_IDENT;
9416
9417     if (isSPACE(*s))
9418         s = PEEKSPACE(s);
9419     if (isDIGIT(*s)) {
9420         while (isDIGIT(*s)) {
9421             if (d >= e)
9422                 Perl_croak(aTHX_ "%s", ident_too_long);
9423             *d++ = *s++;
9424         }
9425     }
9426     else {
9427         parse_ident(&s, &d, e, 1, is_utf8);
9428     }
9429     *d = '\0';
9430     d = dest;
9431     if (*d) {
9432         /* Either a digit variable, or parse_ident() found an identifier
9433            (anything valid as a bareword), so job done and return.  */
9434         if (PL_lex_state != LEX_NORMAL)
9435             PL_lex_state = LEX_INTERPENDMAYBE;
9436         return s;
9437     }
9438     if (*s == '$' && s[1] &&
9439       (isIDFIRST_lazy_if(s+1,is_utf8)
9440          || isDIGIT_A((U8)s[1])
9441          || s[1] == '$'
9442          || s[1] == '{'
9443          || strnEQ(s+1,"::",2)) )
9444     {
9445         /* Dereferencing a value in a scalar variable.
9446            The alternatives are different syntaxes for a scalar variable.
9447            Using ' as a leading package separator isn't allowed. :: is.   */
9448         return s;
9449     }
9450     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9451     if (*s == '{') {
9452         bracket = s - SvPVX(PL_linestr);
9453         s++;
9454         orig_copline = CopLINE(PL_curcop);
9455         if (s < PL_bufend && isSPACE(*s)) {
9456             s = PEEKSPACE(s);
9457         }
9458     }
9459
9460 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9461  * iff Unicode semantics are to be used.  The legal ones are any of:
9462  *  a) ASCII digits
9463  *  b) ASCII punctuation
9464  *  c) When not under Unicode rules, any upper Latin1 character
9465  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9466  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
9467  *     the \s ones. */
9468 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
9469                                    || isDIGIT_A((U8)(d))                    \
9470                                    || (!(u) && !isASCII((U8)(d)))           \
9471                                    || ((((U8)(d)) < 32)                     \
9472                                        && (((((U8)(d)) >= 14)               \
9473                                            || (((U8)(d)) <= 8 && (d) != 0) \
9474                                            || (((U8)(d)) == 13))))          \
9475                                    || (((U8)(d)) == toCTRL('?')))
9476     if (s < PL_bufend
9477         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9478     {
9479         if ( isCNTRL_A((U8)*s) ) {
9480             deprecate("literal control characters in variable names");
9481         }
9482
9483         if (is_utf8) {
9484             const STRLEN skip = UTF8SKIP(s);
9485             STRLEN i;
9486             d[skip] = '\0';
9487             for ( i = 0; i < skip; i++ )
9488                 d[i] = *s++;
9489         }
9490         else {
9491             *d = *s++;
9492             d[1] = '\0';
9493         }
9494     }
9495     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9496     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9497         *d = toCTRL(*s);
9498         s++;
9499     }
9500     /* Warn about ambiguous code after unary operators if {...} notation isn't
9501        used.  There's no difference in ambiguity; it's merely a heuristic
9502        about when not to warn.  */
9503     else if (ck_uni && bracket == -1)
9504         check_uni();
9505     if (bracket != -1) {
9506         /* If we were processing {...} notation then...  */
9507         if (isIDFIRST_lazy_if(d,is_utf8)) {
9508             /* if it starts as a valid identifier, assume that it is one.
9509                (the later check for } being at the expected point will trap
9510                cases where this doesn't pan out.)  */
9511         d += is_utf8 ? UTF8SKIP(d) : 1;
9512         parse_ident(&s, &d, e, 1, is_utf8);
9513             *d = '\0';
9514             tmp_copline = CopLINE(PL_curcop);
9515             if (s < PL_bufend && isSPACE(*s)) {
9516                 s = PEEKSPACE(s);
9517             }
9518             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9519                 /* ${foo[0]} and ${foo{bar}} notation.  */
9520                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9521                     const char * const brack =
9522                         (const char *)
9523                         ((*s == '[') ? "[...]" : "{...}");
9524                     orig_copline = CopLINE(PL_curcop);
9525                     CopLINE_set(PL_curcop, tmp_copline);
9526    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9527                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9528                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9529                         funny, dest, brack, funny, dest, brack);
9530                     CopLINE_set(PL_curcop, orig_copline);
9531                 }
9532                 bracket++;
9533                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9534                 PL_lex_allbrackets++;
9535                 return s;
9536             }
9537         }
9538         /* Handle extended ${^Foo} variables
9539          * 1999-02-27 mjd-perl-patch@plover.com */
9540         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9541                  && isWORDCHAR(*s))
9542         {
9543             d++;
9544             while (isWORDCHAR(*s) && d < e) {
9545                 *d++ = *s++;
9546             }
9547             if (d >= e)
9548                 Perl_croak(aTHX_ "%s", ident_too_long);
9549             *d = '\0';
9550         }
9551
9552         if ( !tmp_copline )
9553             tmp_copline = CopLINE(PL_curcop);
9554         if (s < PL_bufend && isSPACE(*s)) {
9555             s = PEEKSPACE(s);
9556         }
9557
9558         /* Expect to find a closing } after consuming any trailing whitespace.
9559          */
9560         if (*s == '}') {
9561             s++;
9562             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9563                 PL_lex_state = LEX_INTERPEND;
9564                 PL_expect = XREF;
9565             }
9566             if (PL_lex_state == LEX_NORMAL) {
9567                 if (ckWARN(WARN_AMBIGUOUS) &&
9568                     (keyword(dest, d - dest, 0)
9569                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9570                 {
9571                     SV *tmp = newSVpvn_flags( dest, d - dest,
9572                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9573                     if (funny == '#')
9574                         funny = '@';
9575                     orig_copline = CopLINE(PL_curcop);
9576                     CopLINE_set(PL_curcop, tmp_copline);
9577                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9578                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9579                         funny, tmp, funny, tmp);
9580                     CopLINE_set(PL_curcop, orig_copline);
9581                 }
9582             }
9583         }
9584         else {
9585             /* Didn't find the closing } at the point we expected, so restore
9586                state such that the next thing to process is the opening { and */
9587             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9588             CopLINE_set(PL_curcop, orig_copline);
9589             PL_parser->herelines = herelines;
9590             *dest = '\0';
9591         }
9592     }
9593     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9594         PL_lex_state = LEX_INTERPEND;
9595     return s;
9596 }
9597
9598 static bool
9599 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9600
9601     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9602      * the parse starting at 's', based on the subset that are valid in this
9603      * context input to this routine in 'valid_flags'. Advances s.  Returns
9604      * TRUE if the input should be treated as a valid flag, so the next char
9605      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9606      * first call on the current regex.  This routine will set it to any
9607      * charset modifier found.  The caller shouldn't change it.  This way,
9608      * another charset modifier encountered in the parse can be detected as an
9609      * error, as we have decided to allow only one */
9610
9611     const char c = **s;
9612     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9613
9614     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9615         if (isWORDCHAR_lazy_if(*s, UTF)) {
9616             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9617                        UTF ? SVf_UTF8 : 0);
9618             (*s) += charlen;
9619             /* Pretend that it worked, so will continue processing before
9620              * dieing */
9621             return TRUE;
9622         }
9623         return FALSE;
9624     }
9625
9626     switch (c) {
9627
9628         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9629         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9630         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9631         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9632         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9633         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9634         case LOCALE_PAT_MOD:
9635             if (*charset) {
9636                 goto multiple_charsets;
9637             }
9638             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9639             *charset = c;
9640             break;
9641         case UNICODE_PAT_MOD:
9642             if (*charset) {
9643                 goto multiple_charsets;
9644             }
9645             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9646             *charset = c;
9647             break;
9648         case ASCII_RESTRICT_PAT_MOD:
9649             if (! *charset) {
9650                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9651             }
9652             else {
9653
9654                 /* Error if previous modifier wasn't an 'a', but if it was, see
9655                  * if, and accept, a second occurrence (only) */
9656                 if (*charset != 'a'
9657                     || get_regex_charset(*pmfl)
9658                         != REGEX_ASCII_RESTRICTED_CHARSET)
9659                 {
9660                         goto multiple_charsets;
9661                 }
9662                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9663             }
9664             *charset = c;
9665             break;
9666         case DEPENDS_PAT_MOD:
9667             if (*charset) {
9668                 goto multiple_charsets;
9669             }
9670             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9671             *charset = c;
9672             break;
9673     }
9674
9675     (*s)++;
9676     return TRUE;
9677
9678     multiple_charsets:
9679         if (*charset != c) {
9680             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9681         }
9682         else if (c == 'a') {
9683   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9684             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9685         }
9686         else {
9687             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9688         }
9689
9690         /* Pretend that it worked, so will continue processing before dieing */
9691         (*s)++;
9692         return TRUE;
9693 }
9694
9695 STATIC char *
9696 S_scan_pat(pTHX_ char *start, I32 type)
9697 {
9698     dVAR;
9699     PMOP *pm;
9700     char *s;
9701     const char * const valid_flags =
9702         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9703     char charset = '\0';    /* character set modifier */
9704 #ifdef PERL_MAD
9705     char *modstart;
9706 #endif
9707
9708     PERL_ARGS_ASSERT_SCAN_PAT;
9709
9710     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9711                        TRUE /* look for escaped bracketed metas */, NULL);
9712
9713     if (!s) {
9714         const char * const delimiter = skipspace(start);
9715         Perl_croak(aTHX_
9716                    (const char *)
9717                    (*delimiter == '?'
9718                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9719                     : "Search pattern not terminated" ));
9720     }
9721
9722     pm = (PMOP*)newPMOP(type, 0);
9723     if (PL_multi_open == '?') {
9724         /* This is the only point in the code that sets PMf_ONCE:  */
9725         pm->op_pmflags |= PMf_ONCE;
9726
9727         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9728            allows us to restrict the list needed by reset to just the ??
9729            matches.  */
9730         assert(type != OP_TRANS);
9731         if (PL_curstash) {
9732             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9733             U32 elements;
9734             if (!mg) {
9735                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9736                                  0);
9737             }
9738             elements = mg->mg_len / sizeof(PMOP**);
9739             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9740             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9741             mg->mg_len = elements * sizeof(PMOP**);
9742             PmopSTASH_set(pm,PL_curstash);
9743         }
9744     }
9745 #ifdef PERL_MAD
9746     modstart = s;
9747 #endif
9748
9749     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9750      * anon CV. False positives like qr/[(?{]/ are harmless */
9751
9752     if (type == OP_QR) {
9753         STRLEN len;
9754         char *e, *p = SvPV(PL_lex_stuff, len);
9755         e = p + len;
9756         for (; p < e; p++) {
9757             if (p[0] == '(' && p[1] == '?'
9758                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9759             {
9760                 pm->op_pmflags |= PMf_HAS_CV;
9761                 break;
9762             }
9763         }
9764         pm->op_pmflags |= PMf_IS_QR;
9765     }
9766
9767     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9768 #ifdef PERL_MAD
9769     if (PL_madskills && modstart != s) {
9770         SV* tmptoken = newSVpvn(modstart, s - modstart);
9771         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9772     }
9773 #endif
9774     /* issue a warning if /c is specified,but /g is not */
9775     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9776     {
9777         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9778                        "Use of /c modifier is meaningless without /g" );
9779     }
9780
9781     PL_lex_op = (OP*)pm;
9782     pl_yylval.ival = OP_MATCH;
9783     return s;
9784 }
9785
9786 STATIC char *
9787 S_scan_subst(pTHX_ char *start)
9788 {
9789     dVAR;
9790     char *s;
9791     PMOP *pm;
9792     I32 first_start;
9793     line_t first_line;
9794     I32 es = 0;
9795     char charset = '\0';    /* character set modifier */
9796 #ifdef PERL_MAD
9797     char *modstart;
9798 #endif
9799     char *t;
9800
9801     PERL_ARGS_ASSERT_SCAN_SUBST;
9802
9803     pl_yylval.ival = OP_NULL;
9804
9805     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9806                  TRUE /* look for escaped bracketed metas */, &t);
9807
9808     if (!s)
9809         Perl_croak(aTHX_ "Substitution pattern not terminated");
9810
9811     s = t;
9812 #ifdef PERL_MAD
9813     if (PL_madskills) {
9814         CURMAD('q', PL_thisopen);
9815         CURMAD('_', PL_thiswhite);
9816         CURMAD('E', PL_thisstuff);
9817         CURMAD('Q', PL_thisclose);
9818         PL_realtokenstart = s - SvPVX(PL_linestr);
9819     }
9820 #endif
9821
9822     first_start = PL_multi_start;
9823     first_line = CopLINE(PL_curcop);
9824     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9825     if (!s) {
9826         if (PL_lex_stuff) {
9827             SvREFCNT_dec(PL_lex_stuff);
9828             PL_lex_stuff = NULL;
9829         }
9830         Perl_croak(aTHX_ "Substitution replacement not terminated");
9831     }
9832     PL_multi_start = first_start;       /* so whole substitution is taken together */
9833
9834     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9835
9836 #ifdef PERL_MAD
9837     if (PL_madskills) {
9838         CURMAD('z', PL_thisopen);
9839         CURMAD('R', PL_thisstuff);
9840         CURMAD('Z', PL_thisclose);
9841     }
9842     modstart = s;
9843 #endif
9844
9845     while (*s) {
9846         if (*s == EXEC_PAT_MOD) {
9847             s++;
9848             es++;
9849         }
9850         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9851         {
9852             break;
9853         }
9854     }
9855
9856 #ifdef PERL_MAD
9857     if (PL_madskills) {
9858         if (modstart != s)
9859             curmad('m', newSVpvn(modstart, s - modstart));
9860         append_madprops(PL_thismad, (OP*)pm, 0);
9861         PL_thismad = 0;
9862     }
9863 #endif
9864     if ((pm->op_pmflags & PMf_CONTINUE)) {
9865         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9866     }
9867
9868     if (es) {
9869         SV * const repl = newSVpvs("");
9870
9871         PL_multi_end = 0;
9872         pm->op_pmflags |= PMf_EVAL;
9873         while (es-- > 0) {
9874             if (es)
9875                 sv_catpvs(repl, "eval ");
9876             else
9877                 sv_catpvs(repl, "do ");
9878         }
9879         sv_catpvs(repl, "{");
9880         sv_catsv(repl, PL_sublex_info.repl);
9881         sv_catpvs(repl, "}");
9882         SvEVALED_on(repl);
9883         SvREFCNT_dec(PL_sublex_info.repl);
9884         PL_sublex_info.repl = repl;
9885     }
9886     if (CopLINE(PL_curcop) != first_line) {
9887         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9888         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9889             CopLINE(PL_curcop) - first_line;
9890         CopLINE_set(PL_curcop, first_line);
9891     }
9892
9893     PL_lex_op = (OP*)pm;
9894     pl_yylval.ival = OP_SUBST;
9895     return s;
9896 }
9897
9898 STATIC char *
9899 S_scan_trans(pTHX_ char *start)
9900 {
9901     dVAR;
9902     char* s;
9903     OP *o;
9904     U8 squash;
9905     U8 del;
9906     U8 complement;
9907     bool nondestruct = 0;
9908 #ifdef PERL_MAD
9909     char *modstart;
9910 #endif
9911     char *t;
9912
9913     PERL_ARGS_ASSERT_SCAN_TRANS;
9914
9915     pl_yylval.ival = OP_NULL;
9916
9917     s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9918     if (!s)
9919         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9920
9921     s = t;
9922 #ifdef PERL_MAD
9923     if (PL_madskills) {
9924         CURMAD('q', PL_thisopen);
9925         CURMAD('_', PL_thiswhite);
9926         CURMAD('E', PL_thisstuff);
9927         CURMAD('Q', PL_thisclose);
9928         PL_realtokenstart = s - SvPVX(PL_linestr);
9929     }
9930 #endif
9931
9932     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9933     if (!s) {
9934         if (PL_lex_stuff) {
9935             SvREFCNT_dec(PL_lex_stuff);
9936             PL_lex_stuff = NULL;
9937         }
9938         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9939     }
9940     if (PL_madskills) {
9941         CURMAD('z', PL_thisopen);
9942         CURMAD('R', PL_thisstuff);
9943         CURMAD('Z', PL_thisclose);
9944     }
9945
9946     complement = del = squash = 0;
9947 #ifdef PERL_MAD
9948     modstart = s;
9949 #endif
9950     while (1) {
9951         switch (*s) {
9952         case 'c':
9953             complement = OPpTRANS_COMPLEMENT;
9954             break;
9955         case 'd':
9956             del = OPpTRANS_DELETE;
9957             break;
9958         case 's':
9959             squash = OPpTRANS_SQUASH;
9960             break;
9961         case 'r':
9962             nondestruct = 1;
9963             break;
9964         default:
9965             goto no_more;
9966         }
9967         s++;
9968     }
9969   no_more:
9970
9971     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9972     o->op_private &= ~OPpTRANS_ALL;
9973     o->op_private |= del|squash|complement|
9974       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9975       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9976
9977     PL_lex_op = o;
9978     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9979
9980 #ifdef PERL_MAD
9981     if (PL_madskills) {
9982         if (modstart != s)
9983             curmad('m', newSVpvn(modstart, s - modstart));
9984         append_madprops(PL_thismad, o, 0);
9985         PL_thismad = 0;
9986     }
9987 #endif
9988
9989     return s;
9990 }
9991
9992 /* scan_heredoc
9993    Takes a pointer to the first < in <<FOO.
9994    Returns a pointer to the byte following <<FOO.
9995
9996    This function scans a heredoc, which involves different methods
9997    depending on whether we are in a string eval, quoted construct, etc.
9998    This is because PL_linestr could containing a single line of input, or
9999    a whole string being evalled, or the contents of the current quote-
10000    like operator.
10001
10002    The two basic methods are:
10003     - Steal lines from the input stream
10004     - Scan the heredoc in PL_linestr and remove it therefrom
10005
10006    In a file scope or filtered eval, the first method is used; in a
10007    string eval, the second.
10008
10009    In a quote-like operator, we have to choose between the two,
10010    depending on where we can find a newline.  We peek into outer lex-
10011    ing scopes until we find one with a newline in it.  If we reach the
10012    outermost lexing scope and it is a file, we use the stream method.
10013    Otherwise it is treated as an eval.
10014 */
10015
10016 STATIC char *
10017 S_scan_heredoc(pTHX_ char *s)
10018 {
10019     dVAR;
10020     I32 op_type = OP_SCALAR;
10021     I32 len;
10022     SV *tmpstr;
10023     char term;
10024     char *d;
10025     char *e;
10026     char *peek;
10027     const bool infile = PL_rsfp || PL_parser->filtered;
10028     const line_t origline = CopLINE(PL_curcop);
10029     LEXSHARED *shared = PL_parser->lex_shared;
10030 #ifdef PERL_MAD
10031     I32 stuffstart = s - SvPVX(PL_linestr);
10032     char *tstart;
10033
10034     PL_realtokenstart = -1;
10035 #endif
10036
10037     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10038
10039     s += 2;
10040     d = PL_tokenbuf + 1;
10041     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10042     *PL_tokenbuf = '\n';
10043     peek = s;
10044     while (SPACE_OR_TAB(*peek))
10045         peek++;
10046     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10047         s = peek;
10048         term = *s++;
10049         s = delimcpy(d, e, s, PL_bufend, term, &len);
10050         if (s == PL_bufend)
10051             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10052         d += len;
10053         s++;
10054     }
10055     else {
10056         if (*s == '\\')
10057             /* <<\FOO is equivalent to <<'FOO' */
10058             s++, term = '\'';
10059         else
10060             term = '"';
10061         if (!isWORDCHAR_lazy_if(s,UTF))
10062             deprecate("bare << to mean <<\"\"");
10063         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10064             if (d < e)
10065                 *d++ = *s;
10066         }
10067     }
10068     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10069         Perl_croak(aTHX_ "Delimiter for here document is too long");
10070     *d++ = '\n';
10071     *d = '\0';
10072     len = d - PL_tokenbuf;
10073
10074 #ifdef PERL_MAD
10075     if (PL_madskills) {
10076         tstart = PL_tokenbuf + 1;
10077         PL_thisclose = newSVpvn(tstart, len - 1);
10078         tstart = SvPVX(PL_linestr) + stuffstart;
10079         PL_thisopen = newSVpvn(tstart, s - tstart);
10080         stuffstart = s - SvPVX(PL_linestr);
10081     }
10082 #endif
10083 #ifndef PERL_STRICT_CR
10084     d = strchr(s, '\r');
10085     if (d) {
10086         char * const olds = s;
10087         s = d;
10088         while (s < PL_bufend) {
10089             if (*s == '\r') {
10090                 *d++ = '\n';
10091                 if (*++s == '\n')
10092                     s++;
10093             }
10094             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10095                 *d++ = *s++;
10096                 s++;
10097             }
10098             else
10099                 *d++ = *s++;
10100         }
10101         *d = '\0';
10102         PL_bufend = d;
10103         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10104         s = olds;
10105     }
10106 #endif
10107 #ifdef PERL_MAD
10108     if (PL_madskills) {
10109         tstart = SvPVX(PL_linestr) + stuffstart;
10110         if (PL_thisstuff)
10111             sv_catpvn(PL_thisstuff, tstart, s - tstart);
10112         else
10113             PL_thisstuff = newSVpvn(tstart, s - tstart);
10114     }
10115
10116     stuffstart = s - SvPVX(PL_linestr);
10117 #endif
10118
10119     tmpstr = newSV_type(SVt_PVIV);
10120     SvGROW(tmpstr, 80);
10121     if (term == '\'') {
10122         op_type = OP_CONST;
10123         SvIV_set(tmpstr, -1);
10124     }
10125     else if (term == '`') {
10126         op_type = OP_BACKTICK;
10127         SvIV_set(tmpstr, '\\');
10128     }
10129
10130     PL_multi_start = origline + 1 + PL_parser->herelines;
10131     PL_multi_open = PL_multi_close = '<';
10132     /* inside a string eval or quote-like operator */
10133     if (!infile || PL_lex_inwhat) {
10134         SV *linestr;
10135         char *bufend;
10136         char * const olds = s;
10137         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10138         /* These two fields are not set until an inner lexing scope is
10139            entered.  But we need them set here. */
10140         shared->ls_bufptr  = s;
10141         shared->ls_linestr = PL_linestr;
10142         if (PL_lex_inwhat)
10143           /* Look for a newline.  If the current buffer does not have one,
10144              peek into the line buffer of the parent lexing scope, going
10145              up as many levels as necessary to find one with a newline
10146              after bufptr.
10147            */
10148           while (!(s = (char *)memchr(
10149                     (void *)shared->ls_bufptr, '\n',
10150                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10151                 ))) {
10152             shared = shared->ls_prev;
10153             /* shared is only null if we have gone beyond the outermost
10154                lexing scope.  In a file, we will have broken out of the
10155                loop in the previous iteration.  In an eval, the string buf-
10156                fer ends with "\n;", so the while condition above will have
10157                evaluated to false.  So shared can never be null. */
10158             assert(shared);
10159             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10160                most lexing scope.  In a file, shared->ls_linestr at that
10161                level is just one line, so there is no body to steal. */
10162             if (infile && !shared->ls_prev) {
10163                 s = olds;
10164                 goto streaming;
10165             }
10166           }
10167         else {  /* eval */
10168             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10169             assert(s);
10170         }
10171         linestr = shared->ls_linestr;
10172         bufend = SvEND(linestr);
10173         d = s;
10174         while (s < bufend - len + 1 &&
10175           memNE(s,PL_tokenbuf,len) ) {
10176             if (*s++ == '\n')
10177                 ++PL_parser->herelines;
10178         }
10179         if (s >= bufend - len + 1) {
10180             goto interminable;
10181         }
10182         sv_setpvn(tmpstr,d+1,s-d);
10183 #ifdef PERL_MAD
10184         if (PL_madskills) {
10185             if (PL_thisstuff)
10186                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10187             else
10188                 PL_thisstuff = newSVpvn(d + 1, s - d);
10189             stuffstart = s - SvPVX(PL_linestr);
10190         }
10191 #endif
10192         s += len - 1;
10193         /* the preceding stmt passes a newline */
10194         PL_parser->herelines++;
10195
10196         /* s now points to the newline after the heredoc terminator.
10197            d points to the newline before the body of the heredoc.
10198          */
10199
10200         /* We are going to modify linestr in place here, so set
10201            aside copies of the string if necessary for re-evals or
10202            (caller $n)[6]. */
10203         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10204            check shared->re_eval_str. */
10205         if (shared->re_eval_start || shared->re_eval_str) {
10206             /* Set aside the rest of the regexp */
10207             if (!shared->re_eval_str)
10208                 shared->re_eval_str =
10209                        newSVpvn(shared->re_eval_start,
10210                                 bufend - shared->re_eval_start);
10211             shared->re_eval_start -= s-d;
10212         }
10213         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10214             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10215             cx->blk_eval.cur_text == linestr)
10216         {
10217             cx->blk_eval.cur_text = newSVsv(linestr);
10218             SvSCREAM_on(cx->blk_eval.cur_text);
10219         }
10220         /* Copy everything from s onwards back to d. */
10221         Move(s,d,bufend-s + 1,char);
10222         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10223         /* Setting PL_bufend only applies when we have not dug deeper
10224            into other scopes, because sublex_done sets PL_bufend to
10225            SvEND(PL_linestr). */
10226         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10227         s = olds;
10228     }
10229     else
10230     {
10231       SV *linestr_save;
10232      streaming:
10233       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10234       term = PL_tokenbuf[1];
10235       len--;
10236       linestr_save = PL_linestr; /* must restore this afterwards */
10237       d = s;                     /* and this */
10238       PL_linestr = newSVpvs("");
10239       PL_bufend = SvPVX(PL_linestr);
10240       while (1) {
10241 #ifdef PERL_MAD
10242         if (PL_madskills) {
10243             tstart = SvPVX(PL_linestr) + stuffstart;
10244             if (PL_thisstuff)
10245                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10246             else
10247                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10248         }
10249 #endif
10250         PL_bufptr = PL_bufend;
10251         CopLINE_set(PL_curcop,
10252                     origline + 1 + PL_parser->herelines);
10253         if (!lex_next_chunk(LEX_NO_TERM)
10254          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10255             SvREFCNT_dec(linestr_save);
10256             goto interminable;
10257         }
10258         CopLINE_set(PL_curcop, origline);
10259         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10260             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10261             /* ^That should be enough to avoid this needing to grow:  */
10262             sv_catpvs(PL_linestr, "\n\0");
10263             assert(s == SvPVX(PL_linestr));
10264             PL_bufend = SvEND(PL_linestr);
10265         }
10266         s = PL_bufptr;
10267 #ifdef PERL_MAD
10268         stuffstart = s - SvPVX(PL_linestr);
10269 #endif
10270         PL_parser->herelines++;
10271         PL_last_lop = PL_last_uni = NULL;
10272 #ifndef PERL_STRICT_CR
10273         if (PL_bufend - PL_linestart >= 2) {
10274             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10275                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10276             {
10277                 PL_bufend[-2] = '\n';
10278                 PL_bufend--;
10279                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10280             }
10281             else if (PL_bufend[-1] == '\r')
10282                 PL_bufend[-1] = '\n';
10283         }
10284         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10285             PL_bufend[-1] = '\n';
10286 #endif
10287         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10288             SvREFCNT_dec(PL_linestr);
10289             PL_linestr = linestr_save;
10290             PL_linestart = SvPVX(linestr_save);
10291             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10292             s = d;
10293             break;
10294         }
10295         else {
10296             sv_catsv(tmpstr,PL_linestr);
10297         }
10298       }
10299     }
10300     PL_multi_end = origline + PL_parser->herelines;
10301     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10302         SvPV_shrink_to_cur(tmpstr);
10303     }
10304     if (!IN_BYTES) {
10305         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10306             SvUTF8_on(tmpstr);
10307         else if (PL_encoding)
10308             sv_recode_to_utf8(tmpstr, PL_encoding);
10309     }
10310     PL_lex_stuff = tmpstr;
10311     pl_yylval.ival = op_type;
10312     return s;
10313
10314   interminable:
10315     SvREFCNT_dec(tmpstr);
10316     CopLINE_set(PL_curcop, origline);
10317     missingterm(PL_tokenbuf + 1);
10318 }
10319
10320 /* scan_inputsymbol
10321    takes: current position in input buffer
10322    returns: new position in input buffer
10323    side-effects: pl_yylval and lex_op are set.
10324
10325    This code handles:
10326
10327    <>           read from ARGV
10328    <FH>         read from filehandle
10329    <pkg::FH>    read from package qualified filehandle
10330    <pkg'FH>     read from package qualified filehandle
10331    <$fh>        read from filehandle in $fh
10332    <*.h>        filename glob
10333
10334 */
10335
10336 STATIC char *
10337 S_scan_inputsymbol(pTHX_ char *start)
10338 {
10339     dVAR;
10340     char *s = start;            /* current position in buffer */
10341     char *end;
10342     I32 len;
10343     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10344     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10345
10346     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10347
10348     end = strchr(s, '\n');
10349     if (!end)
10350         end = PL_bufend;
10351     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10352
10353     /* die if we didn't have space for the contents of the <>,
10354        or if it didn't end, or if we see a newline
10355     */
10356
10357     if (len >= (I32)sizeof PL_tokenbuf)
10358         Perl_croak(aTHX_ "Excessively long <> operator");
10359     if (s >= end)
10360         Perl_croak(aTHX_ "Unterminated <> operator");
10361
10362     s++;
10363
10364     /* check for <$fh>
10365        Remember, only scalar variables are interpreted as filehandles by
10366        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10367        treated as a glob() call.
10368        This code makes use of the fact that except for the $ at the front,
10369        a scalar variable and a filehandle look the same.
10370     */
10371     if (*d == '$' && d[1]) d++;
10372
10373     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10374     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10375         d += UTF ? UTF8SKIP(d) : 1;
10376
10377     /* If we've tried to read what we allow filehandles to look like, and
10378        there's still text left, then it must be a glob() and not a getline.
10379        Use scan_str to pull out the stuff between the <> and treat it
10380        as nothing more than a string.
10381     */
10382
10383     if (d - PL_tokenbuf != len) {
10384         pl_yylval.ival = OP_GLOB;
10385         s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10386         if (!s)
10387            Perl_croak(aTHX_ "Glob not terminated");
10388         return s;
10389     }
10390     else {
10391         bool readline_overriden = FALSE;
10392         GV *gv_readline;
10393         /* we're in a filehandle read situation */
10394         d = PL_tokenbuf;
10395
10396         /* turn <> into <ARGV> */
10397         if (!len)
10398             Copy("ARGV",d,5,char);
10399
10400         /* Check whether readline() is overriden */
10401         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10402         if ((gv_readline = gv_override("readline",8)))
10403             readline_overriden = TRUE;
10404
10405         /* if <$fh>, create the ops to turn the variable into a
10406            filehandle
10407         */
10408         if (*d == '$') {
10409             /* try to find it in the pad for this block, otherwise find
10410                add symbol table ops
10411             */
10412             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10413             if (tmp != NOT_IN_PAD) {
10414                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10415                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10416                     HEK * const stashname = HvNAME_HEK(stash);
10417                     SV * const sym = sv_2mortal(newSVhek(stashname));
10418                     sv_catpvs(sym, "::");
10419                     sv_catpv(sym, d+1);
10420                     d = SvPVX(sym);
10421                     goto intro_sym;
10422                 }
10423                 else {
10424                     OP * const o = newOP(OP_PADSV, 0);
10425                     o->op_targ = tmp;
10426                     PL_lex_op = readline_overriden
10427                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10428                                 op_append_elem(OP_LIST, o,
10429                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10430                         : (OP*)newUNOP(OP_READLINE, 0, o);
10431                 }
10432             }
10433             else {
10434                 GV *gv;
10435                 ++d;
10436 intro_sym:
10437                 gv = gv_fetchpv(d,
10438                                 (PL_in_eval
10439                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10440                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10441                                 SVt_PV);
10442                 PL_lex_op = readline_overriden
10443                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10444                             op_append_elem(OP_LIST,
10445                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10446                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10447                     : (OP*)newUNOP(OP_READLINE, 0,
10448                             newUNOP(OP_RV2SV, 0,
10449                                 newGVOP(OP_GV, 0, gv)));
10450             }
10451             if (!readline_overriden)
10452                 PL_lex_op->op_flags |= OPf_SPECIAL;
10453             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10454             pl_yylval.ival = OP_NULL;
10455         }
10456
10457         /* If it's none of the above, it must be a literal filehandle
10458            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10459         else {
10460             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10461             PL_lex_op = readline_overriden
10462                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10463                         op_append_elem(OP_LIST,
10464                             newGVOP(OP_GV, 0, gv),
10465                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10466                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10467             pl_yylval.ival = OP_NULL;
10468         }
10469     }
10470
10471     return s;
10472 }
10473
10474
10475 /* scan_str
10476    takes:
10477         start                   position in buffer
10478         keep_quoted             preserve \ on the embedded delimiter(s)
10479         keep_delims             preserve the delimiters around the string
10480         re_reparse              compiling a run-time /(?{})/:
10481                                    collapse // to /,  and skip encoding src
10482         deprecate_escaped_meta  issue a deprecation warning for cer-
10483                                 tain paired metacharacters that appear
10484                                 escaped within it
10485         delimp                  if non-null, this is set to the position of
10486                                 the closing delimiter, or just after it if
10487                                 the closing and opening delimiters differ
10488                                 (i.e., the opening delimiter of a substitu-
10489                                 tion replacement)
10490    returns: position to continue reading from buffer
10491    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10492         updates the read buffer.
10493
10494    This subroutine pulls a string out of the input.  It is called for:
10495         q               single quotes           q(literal text)
10496         '               single quotes           'literal text'
10497         qq              double quotes           qq(interpolate $here please)
10498         "               double quotes           "interpolate $here please"
10499         qx              backticks               qx(/bin/ls -l)
10500         `               backticks               `/bin/ls -l`
10501         qw              quote words             @EXPORT_OK = qw( func() $spam )
10502         m//             regexp match            m/this/
10503         s///            regexp substitute       s/this/that/
10504         tr///           string transliterate    tr/this/that/
10505         y///            string transliterate    y/this/that/
10506         ($*@)           sub prototypes          sub foo ($)
10507         (stuff)         sub attr parameters     sub foo : attr(stuff)
10508         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10509
10510    In most of these cases (all but <>, patterns and transliterate)
10511    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10512    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10513    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10514    calls scan_str().
10515
10516    It skips whitespace before the string starts, and treats the first
10517    character as the delimiter.  If the delimiter is one of ([{< then
10518    the corresponding "close" character )]}> is used as the closing
10519    delimiter.  It allows quoting of delimiters, and if the string has
10520    balanced delimiters ([{<>}]) it allows nesting.
10521
10522    On success, the SV with the resulting string is put into lex_stuff or,
10523    if that is already non-NULL, into lex_repl. The second case occurs only
10524    when parsing the RHS of the special constructs s/// and tr/// (y///).
10525    For convenience, the terminating delimiter character is stuffed into
10526    SvIVX of the SV.
10527 */
10528
10529 STATIC char *
10530 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10531                  bool deprecate_escaped_meta, char **delimp
10532     )
10533 {
10534     dVAR;
10535     SV *sv;                     /* scalar value: string */
10536     const char *tmps;           /* temp string, used for delimiter matching */
10537     char *s = start;            /* current position in the buffer */
10538     char term;                  /* terminating character */
10539     char *to;                   /* current position in the sv's data */
10540     I32 brackets = 1;           /* bracket nesting level */
10541     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10542     I32 termcode;               /* terminating char. code */
10543     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10544     STRLEN termlen;             /* length of terminating string */
10545     int last_off = 0;           /* last position for nesting bracket */
10546     char *escaped_open = NULL;
10547     line_t herelines;
10548 #ifdef PERL_MAD
10549     int stuffstart;
10550     char *tstart;
10551 #endif
10552
10553     PERL_ARGS_ASSERT_SCAN_STR;
10554
10555     /* skip space before the delimiter */
10556     if (isSPACE(*s)) {
10557         s = PEEKSPACE(s);
10558     }
10559
10560 #ifdef PERL_MAD
10561     if (PL_realtokenstart >= 0) {
10562         stuffstart = PL_realtokenstart;
10563         PL_realtokenstart = -1;
10564     }
10565     else
10566         stuffstart = start - SvPVX(PL_linestr);
10567 #endif
10568     /* mark where we are, in case we need to report errors */
10569     CLINE;
10570
10571     /* after skipping whitespace, the next character is the terminator */
10572     term = *s;
10573     if (!UTF) {
10574         termcode = termstr[0] = term;
10575         termlen = 1;
10576     }
10577     else {
10578         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10579         Copy(s, termstr, termlen, U8);
10580         if (!UTF8_IS_INVARIANT(term))
10581             has_utf8 = TRUE;
10582     }
10583
10584     /* mark where we are */
10585     PL_multi_start = CopLINE(PL_curcop);
10586     PL_multi_open = term;
10587     herelines = PL_parser->herelines;
10588
10589     /* find corresponding closing delimiter */
10590     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10591         termcode = termstr[0] = term = tmps[5];
10592
10593     PL_multi_close = term;
10594
10595     /* A warning is raised if the input parameter requires it for escaped (by a
10596      * backslash) paired metacharacters {} [] and () when the delimiters are
10597      * those same characters, and the backslash is ineffective.  This doesn't
10598      * happen for <>, as they aren't metas. */
10599     if (deprecate_escaped_meta
10600         && (PL_multi_open == PL_multi_close
10601             || PL_multi_open == '<'
10602             || ! ckWARN_d(WARN_DEPRECATED)))
10603     {
10604         deprecate_escaped_meta = FALSE;
10605     }
10606
10607     /* create a new SV to hold the contents.  79 is the SV's initial length.
10608        What a random number. */
10609     sv = newSV_type(SVt_PVIV);
10610     SvGROW(sv, 80);
10611     SvIV_set(sv, termcode);
10612     (void)SvPOK_only(sv);               /* validate pointer */
10613
10614     /* move past delimiter and try to read a complete string */
10615     if (keep_delims)
10616         sv_catpvn(sv, s, termlen);
10617     s += termlen;
10618 #ifdef PERL_MAD
10619     tstart = SvPVX(PL_linestr) + stuffstart;
10620     if (PL_madskills && !PL_thisopen && !keep_delims) {
10621         PL_thisopen = newSVpvn(tstart, s - tstart);
10622         stuffstart = s - SvPVX(PL_linestr);
10623     }
10624 #endif
10625     for (;;) {
10626         if (PL_encoding && !UTF && !re_reparse) {
10627             bool cont = TRUE;
10628
10629             while (cont) {
10630                 int offset = s - SvPVX_const(PL_linestr);
10631                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10632                                            &offset, (char*)termstr, termlen);
10633                 const char *ns;
10634                 char *svlast;
10635
10636                 if (SvIsCOW(PL_linestr)) {
10637                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10638                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10639                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10640                     char *buf = SvPVX(PL_linestr);
10641                     bufend_pos = PL_parser->bufend - buf;
10642                     bufptr_pos = PL_parser->bufptr - buf;
10643                     oldbufptr_pos = PL_parser->oldbufptr - buf;
10644                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10645                     linestart_pos = PL_parser->linestart - buf;
10646                     last_uni_pos = PL_parser->last_uni
10647                         ? PL_parser->last_uni - buf
10648                         : 0;
10649                     last_lop_pos = PL_parser->last_lop
10650                         ? PL_parser->last_lop - buf
10651                         : 0;
10652                     re_eval_start_pos =
10653                         PL_parser->lex_shared->re_eval_start ?
10654                             PL_parser->lex_shared->re_eval_start - buf : 0;
10655                     s_pos = s - buf;
10656
10657                     sv_force_normal(PL_linestr);
10658
10659                     buf = SvPVX(PL_linestr);
10660                     PL_parser->bufend = buf + bufend_pos;
10661                     PL_parser->bufptr = buf + bufptr_pos;
10662                     PL_parser->oldbufptr = buf + oldbufptr_pos;
10663                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10664                     PL_parser->linestart = buf + linestart_pos;
10665                     if (PL_parser->last_uni)
10666                         PL_parser->last_uni = buf + last_uni_pos;
10667                     if (PL_parser->last_lop)
10668                         PL_parser->last_lop = buf + last_lop_pos;
10669                     if (PL_parser->lex_shared->re_eval_start)
10670                         PL_parser->lex_shared->re_eval_start  =
10671                             buf + re_eval_start_pos;
10672                     s = buf + s_pos;
10673                 }
10674                 ns = SvPVX_const(PL_linestr) + offset;
10675                 svlast = SvEND(sv) - 1;
10676
10677                 for (; s < ns; s++) {
10678                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10679                         COPLINE_INC_WITH_HERELINES;
10680                 }
10681                 if (!found)
10682                     goto read_more_line;
10683                 else {
10684                     /* handle quoted delimiters */
10685                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10686                         const char *t;
10687                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10688                             t--;
10689                         if ((svlast-1 - t) % 2) {
10690                             if (!keep_quoted) {
10691                                 *(svlast-1) = term;
10692                                 *svlast = '\0';
10693                                 SvCUR_set(sv, SvCUR(sv) - 1);
10694                             }
10695                             continue;
10696                         }
10697                     }
10698                     if (PL_multi_open == PL_multi_close) {
10699                         cont = FALSE;
10700                     }
10701                     else {
10702                         const char *t;
10703                         char *w;
10704                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10705                             /* At here, all closes are "was quoted" one,
10706                                so we don't check PL_multi_close. */
10707                             if (*t == '\\') {
10708                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10709                                     t++;
10710                                 else
10711                                     *w++ = *t++;
10712                             }
10713                             else if (*t == PL_multi_open)
10714                                 brackets++;
10715
10716                             *w = *t;
10717                         }
10718                         if (w < t) {
10719                             *w++ = term;
10720                             *w = '\0';
10721                             SvCUR_set(sv, w - SvPVX_const(sv));
10722                         }
10723                         last_off = w - SvPVX(sv);
10724                         if (--brackets <= 0)
10725                             cont = FALSE;
10726                     }
10727                 }
10728             }
10729             if (!keep_delims) {
10730                 SvCUR_set(sv, SvCUR(sv) - 1);
10731                 *SvEND(sv) = '\0';
10732             }
10733             break;
10734         }
10735
10736         /* extend sv if need be */
10737         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10738         /* set 'to' to the next character in the sv's string */
10739         to = SvPVX(sv)+SvCUR(sv);
10740
10741         /* if open delimiter is the close delimiter read unbridle */
10742         if (PL_multi_open == PL_multi_close) {
10743             for (; s < PL_bufend; s++,to++) {
10744                 /* embedded newlines increment the current line number */
10745                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10746                     COPLINE_INC_WITH_HERELINES;
10747                 /* handle quoted delimiters */
10748                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10749                     if (!keep_quoted
10750                         && (s[1] == term
10751                             || (re_reparse && s[1] == '\\'))
10752                     )
10753                         s++;
10754                     /* any other quotes are simply copied straight through */
10755                     else
10756                         *to++ = *s++;
10757                 }
10758                 /* terminate when run out of buffer (the for() condition), or
10759                    have found the terminator */
10760                 else if (*s == term) {
10761                     if (termlen == 1)
10762                         break;
10763                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10764                         break;
10765                 }
10766                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10767                     has_utf8 = TRUE;
10768                 *to = *s;
10769             }
10770         }
10771
10772         /* if the terminator isn't the same as the start character (e.g.,
10773            matched brackets), we have to allow more in the quoting, and
10774            be prepared for nested brackets.
10775         */
10776         else {
10777             /* read until we run out of string, or we find the terminator */
10778             for (; s < PL_bufend; s++,to++) {
10779                 /* embedded newlines increment the line count */
10780                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10781                     COPLINE_INC_WITH_HERELINES;
10782                 /* backslashes can escape the open or closing characters */
10783                 if (*s == '\\' && s+1 < PL_bufend) {
10784                     if (!keep_quoted &&
10785                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10786                     {
10787                         s++;
10788
10789                         /* Here, 'deprecate_escaped_meta' is true iff the
10790                          * delimiters are paired metacharacters, and 's' points
10791                          * to an occurrence of one of them within the string,
10792                          * which was preceded by a backslash.  If this is a
10793                          * context where the delimiter is also a metacharacter,
10794                          * the backslash is useless, and deprecated.  () and []
10795                          * are meta in any context. {} are meta only when
10796                          * appearing in a quantifier or in things like '\p{'
10797                          * (but '\\p{' isn't meta).  They also aren't meta
10798                          * unless there is a matching closed, escaped char
10799                          * later on within the string.  If 's' points to an
10800                          * open, set a flag; if to a close, test that flag, and
10801                          * raise a warning if it was set */
10802
10803                         if (deprecate_escaped_meta) {
10804                             if (*s == PL_multi_open) {
10805                                 if (*s != '{') {
10806                                     escaped_open = s;
10807                                 }
10808                                      /* Look for a closing '\}' */
10809                                 else if (regcurly(s, TRUE)) {
10810                                     escaped_open = s;
10811                                 }
10812                                      /* Look for e.g.  '\x{' */
10813                                 else if (s - start > 2
10814                                          && _generic_isCC(*(s-2),
10815                                              _CC_BACKSLASH_FOO_LBRACE_IS_META))
10816                                 { /* Exclude '\\x', '\\\\x', etc. */
10817                                     char *lookbehind = s - 4;
10818                                     bool is_meta = TRUE;
10819                                     while (lookbehind >= start
10820                                            && *lookbehind == '\\')
10821                                     {
10822                                         is_meta = ! is_meta;
10823                                         lookbehind--;
10824                                     }
10825                                     if (is_meta) {
10826                                         escaped_open = s;
10827                                     }
10828                                 }
10829                             }
10830                             else if (escaped_open) {
10831                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10832                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10833                                 escaped_open = NULL;
10834                             }
10835                         }
10836                     }
10837                     else
10838                         *to++ = *s++;
10839                 }
10840                 /* allow nested opens and closes */
10841                 else if (*s == PL_multi_close && --brackets <= 0)
10842                     break;
10843                 else if (*s == PL_multi_open)
10844                     brackets++;
10845                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10846                     has_utf8 = TRUE;
10847                 *to = *s;
10848             }
10849         }
10850         /* terminate the copied string and update the sv's end-of-string */
10851         *to = '\0';
10852         SvCUR_set(sv, to - SvPVX_const(sv));
10853
10854         /*
10855          * this next chunk reads more into the buffer if we're not done yet
10856          */
10857
10858         if (s < PL_bufend)
10859             break;              /* handle case where we are done yet :-) */
10860
10861 #ifndef PERL_STRICT_CR
10862         if (to - SvPVX_const(sv) >= 2) {
10863             if ((to[-2] == '\r' && to[-1] == '\n') ||
10864                 (to[-2] == '\n' && to[-1] == '\r'))
10865             {
10866                 to[-2] = '\n';
10867                 to--;
10868                 SvCUR_set(sv, to - SvPVX_const(sv));
10869             }
10870             else if (to[-1] == '\r')
10871                 to[-1] = '\n';
10872         }
10873         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10874             to[-1] = '\n';
10875 #endif
10876
10877      read_more_line:
10878         /* if we're out of file, or a read fails, bail and reset the current
10879            line marker so we can report where the unterminated string began
10880         */
10881 #ifdef PERL_MAD
10882         if (PL_madskills) {
10883             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10884             if (PL_thisstuff)
10885                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10886             else
10887                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10888         }
10889 #endif
10890         COPLINE_INC_WITH_HERELINES;
10891         PL_bufptr = PL_bufend;
10892         if (!lex_next_chunk(0)) {
10893             sv_free(sv);
10894             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10895             return NULL;
10896         }
10897         s = PL_bufptr;
10898 #ifdef PERL_MAD
10899         stuffstart = 0;
10900 #endif
10901     }
10902
10903     /* at this point, we have successfully read the delimited string */
10904
10905     if (!PL_encoding || UTF || re_reparse) {
10906 #ifdef PERL_MAD
10907         if (PL_madskills) {
10908             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10909             const int len = s - tstart;
10910             if (PL_thisstuff)
10911                 sv_catpvn(PL_thisstuff, tstart, len);
10912             else
10913                 PL_thisstuff = newSVpvn(tstart, len);
10914             if (!PL_thisclose && !keep_delims)
10915                 PL_thisclose = newSVpvn(s,termlen);
10916         }
10917 #endif
10918
10919         if (keep_delims)
10920             sv_catpvn(sv, s, termlen);
10921         s += termlen;
10922     }
10923 #ifdef PERL_MAD
10924     else {
10925         if (PL_madskills) {
10926             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10927             const int len = s - tstart - termlen;
10928             if (PL_thisstuff)
10929                 sv_catpvn(PL_thisstuff, tstart, len);
10930             else
10931                 PL_thisstuff = newSVpvn(tstart, len);
10932             if (!PL_thisclose && !keep_delims)
10933                 PL_thisclose = newSVpvn(s - termlen,termlen);
10934         }
10935     }
10936 #endif
10937     if (has_utf8 || (PL_encoding && !re_reparse))
10938         SvUTF8_on(sv);
10939
10940     PL_multi_end = CopLINE(PL_curcop);
10941     CopLINE_set(PL_curcop, PL_multi_start);
10942     PL_parser->herelines = herelines;
10943
10944     /* if we allocated too much space, give some back */
10945     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10946         SvLEN_set(sv, SvCUR(sv) + 1);
10947         SvPV_renew(sv, SvLEN(sv));
10948     }
10949
10950     /* decide whether this is the first or second quoted string we've read
10951        for this op
10952     */
10953
10954     if (PL_lex_stuff)
10955         PL_sublex_info.repl = sv;
10956     else
10957         PL_lex_stuff = sv;
10958     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10959     return s;
10960 }
10961
10962 /*
10963   scan_num
10964   takes: pointer to position in buffer
10965   returns: pointer to new position in buffer
10966   side-effects: builds ops for the constant in pl_yylval.op
10967
10968   Read a number in any of the formats that Perl accepts:
10969
10970   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10971   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10972   0b[01](_?[01])*
10973   0[0-7](_?[0-7])*
10974   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10975
10976   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10977   thing it reads.
10978
10979   If it reads a number without a decimal point or an exponent, it will
10980   try converting the number to an integer and see if it can do so
10981   without loss of precision.
10982 */
10983
10984 char *
10985 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10986 {
10987     dVAR;
10988     const char *s = start;      /* current position in buffer */
10989     char *d;                    /* destination in temp buffer */
10990     char *e;                    /* end of temp buffer */
10991     NV nv;                              /* number read, as a double */
10992     SV *sv = NULL;                      /* place to put the converted number */
10993     bool floatit;                       /* boolean: int or float? */
10994     const char *lastub = NULL;          /* position of last underbar */
10995     static const char* const number_too_long = "Number too long";
10996
10997     PERL_ARGS_ASSERT_SCAN_NUM;
10998
10999     /* We use the first character to decide what type of number this is */
11000
11001     switch (*s) {
11002     default:
11003         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11004
11005     /* if it starts with a 0, it could be an octal number, a decimal in
11006        0.13 disguise, or a hexadecimal number, or a binary number. */
11007     case '0':
11008         {
11009           /* variables:
11010              u          holds the "number so far"
11011              shift      the power of 2 of the base
11012                         (hex == 4, octal == 3, binary == 1)
11013              overflowed was the number more than we can hold?
11014
11015              Shift is used when we add a digit.  It also serves as an "are
11016              we in octal/hex/binary?" indicator to disallow hex characters
11017              when in octal mode.
11018            */
11019             NV n = 0.0;
11020             UV u = 0;
11021             I32 shift;
11022             bool overflowed = FALSE;
11023             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11024             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11025             static const char* const bases[5] =
11026               { "", "binary", "", "octal", "hexadecimal" };
11027             static const char* const Bases[5] =
11028               { "", "Binary", "", "Octal", "Hexadecimal" };
11029             static const char* const maxima[5] =
11030               { "",
11031                 "0b11111111111111111111111111111111",
11032                 "",
11033                 "037777777777",
11034                 "0xffffffff" };
11035             const char *base, *Base, *max;
11036
11037             /* check for hex */
11038             if (s[1] == 'x' || s[1] == 'X') {
11039                 shift = 4;
11040                 s += 2;
11041                 just_zero = FALSE;
11042             } else if (s[1] == 'b' || s[1] == 'B') {
11043                 shift = 1;
11044                 s += 2;
11045                 just_zero = FALSE;
11046             }
11047             /* check for a decimal in disguise */
11048             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11049                 goto decimal;
11050             /* so it must be octal */
11051             else {
11052                 shift = 3;
11053                 s++;
11054             }
11055
11056             if (*s == '_') {
11057                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11058                                "Misplaced _ in number");
11059                lastub = s++;
11060             }
11061
11062             base = bases[shift];
11063             Base = Bases[shift];
11064             max  = maxima[shift];
11065
11066             /* read the rest of the number */
11067             for (;;) {
11068                 /* x is used in the overflow test,
11069                    b is the digit we're adding on. */
11070                 UV x, b;
11071
11072                 switch (*s) {
11073
11074                 /* if we don't mention it, we're done */
11075                 default:
11076                     goto out;
11077
11078                 /* _ are ignored -- but warned about if consecutive */
11079                 case '_':
11080                     if (lastub && s == lastub + 1)
11081                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11082                                        "Misplaced _ in number");
11083                     lastub = s++;
11084                     break;
11085
11086                 /* 8 and 9 are not octal */
11087                 case '8': case '9':
11088                     if (shift == 3)
11089                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11090                     /* FALL THROUGH */
11091
11092                 /* octal digits */
11093                 case '2': case '3': case '4':
11094                 case '5': case '6': case '7':
11095                     if (shift == 1)
11096                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11097                     /* FALL THROUGH */
11098
11099                 case '0': case '1':
11100                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11101                     goto digit;
11102
11103                 /* hex digits */
11104                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11105                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11106                     /* make sure they said 0x */
11107                     if (shift != 4)
11108                         goto out;
11109                     b = (*s++ & 7) + 9;
11110
11111                     /* Prepare to put the digit we have onto the end
11112                        of the number so far.  We check for overflows.
11113                     */
11114
11115                   digit:
11116                     just_zero = FALSE;
11117                     if (!overflowed) {
11118                         x = u << shift; /* make room for the digit */
11119
11120                         if ((x >> shift) != u
11121                             && !(PL_hints & HINT_NEW_BINARY)) {
11122                             overflowed = TRUE;
11123                             n = (NV) u;
11124                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11125                                              "Integer overflow in %s number",
11126                                              base);
11127                         } else
11128                             u = x | b;          /* add the digit to the end */
11129                     }
11130                     if (overflowed) {
11131                         n *= nvshift[shift];
11132                         /* If an NV has not enough bits in its
11133                          * mantissa to represent an UV this summing of
11134                          * small low-order numbers is a waste of time
11135                          * (because the NV cannot preserve the
11136                          * low-order bits anyway): we could just
11137                          * remember when did we overflow and in the
11138                          * end just multiply n by the right
11139                          * amount. */
11140                         n += (NV) b;
11141                     }
11142                     break;
11143                 }
11144             }
11145
11146           /* if we get here, we had success: make a scalar value from
11147              the number.
11148           */
11149           out:
11150
11151             /* final misplaced underbar check */
11152             if (s[-1] == '_') {
11153                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11154             }
11155
11156             if (overflowed) {
11157                 if (n > 4294967295.0)
11158                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11159                                    "%s number > %s non-portable",
11160                                    Base, max);
11161                 sv = newSVnv(n);
11162             }
11163             else {
11164 #if UVSIZE > 4
11165                 if (u > 0xffffffff)
11166                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11167                                    "%s number > %s non-portable",
11168                                    Base, max);
11169 #endif
11170                 sv = newSVuv(u);
11171             }
11172             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11173                 sv = new_constant(start, s - start, "integer",
11174                                   sv, NULL, NULL, 0);
11175             else if (PL_hints & HINT_NEW_BINARY)
11176                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11177         }
11178         break;
11179
11180     /*
11181       handle decimal numbers.
11182       we're also sent here when we read a 0 as the first digit
11183     */
11184     case '1': case '2': case '3': case '4': case '5':
11185     case '6': case '7': case '8': case '9': case '.':
11186       decimal:
11187         d = PL_tokenbuf;
11188         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11189         floatit = FALSE;
11190
11191         /* read next group of digits and _ and copy into d */
11192         while (isDIGIT(*s) || *s == '_') {
11193             /* skip underscores, checking for misplaced ones
11194                if -w is on
11195             */
11196             if (*s == '_') {
11197                 if (lastub && s == lastub + 1)
11198                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11199                                    "Misplaced _ in number");
11200                 lastub = s++;
11201             }
11202             else {
11203                 /* check for end of fixed-length buffer */
11204                 if (d >= e)
11205                     Perl_croak(aTHX_ "%s", number_too_long);
11206                 /* if we're ok, copy the character */
11207                 *d++ = *s++;
11208             }
11209         }
11210
11211         /* final misplaced underbar check */
11212         if (lastub && s == lastub + 1) {
11213             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11214         }
11215
11216         /* read a decimal portion if there is one.  avoid
11217            3..5 being interpreted as the number 3. followed
11218            by .5
11219         */
11220         if (*s == '.' && s[1] != '.') {
11221             floatit = TRUE;
11222             *d++ = *s++;
11223
11224             if (*s == '_') {
11225                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11226                                "Misplaced _ in number");
11227                 lastub = s;
11228             }
11229
11230             /* copy, ignoring underbars, until we run out of digits.
11231             */
11232             for (; isDIGIT(*s) || *s == '_'; s++) {
11233                 /* fixed length buffer check */
11234                 if (d >= e)
11235                     Perl_croak(aTHX_ "%s", number_too_long);
11236                 if (*s == '_') {
11237                    if (lastub && s == lastub + 1)
11238                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11239                                       "Misplaced _ in number");
11240                    lastub = s;
11241                 }
11242                 else
11243                     *d++ = *s;
11244             }
11245             /* fractional part ending in underbar? */
11246             if (s[-1] == '_') {
11247                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11248                                "Misplaced _ in number");
11249             }
11250             if (*s == '.' && isDIGIT(s[1])) {
11251                 /* oops, it's really a v-string, but without the "v" */
11252                 s = start;
11253                 goto vstring;
11254             }
11255         }
11256
11257         /* read exponent part, if present */
11258         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11259             floatit = TRUE;
11260             s++;
11261
11262             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11263             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11264
11265             /* stray preinitial _ */
11266             if (*s == '_') {
11267                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11268                                "Misplaced _ in number");
11269                 lastub = s++;
11270             }
11271
11272             /* allow positive or negative exponent */
11273             if (*s == '+' || *s == '-')
11274                 *d++ = *s++;
11275
11276             /* stray initial _ */
11277             if (*s == '_') {
11278                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11279                                "Misplaced _ in number");
11280                 lastub = s++;
11281             }
11282
11283             /* read digits of exponent */
11284             while (isDIGIT(*s) || *s == '_') {
11285                 if (isDIGIT(*s)) {
11286                     if (d >= e)
11287                         Perl_croak(aTHX_ "%s", number_too_long);
11288                     *d++ = *s++;
11289                 }
11290                 else {
11291                    if (((lastub && s == lastub + 1) ||
11292                         (!isDIGIT(s[1]) && s[1] != '_')))
11293                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11294                                       "Misplaced _ in number");
11295                    lastub = s++;
11296                 }
11297             }
11298         }
11299
11300
11301         /*
11302            We try to do an integer conversion first if no characters
11303            indicating "float" have been found.
11304          */
11305
11306         if (!floatit) {
11307             UV uv;
11308             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11309
11310             if (flags == IS_NUMBER_IN_UV) {
11311               if (uv <= IV_MAX)
11312                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11313               else
11314                 sv = newSVuv(uv);
11315             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11316               if (uv <= (UV) IV_MIN)
11317                 sv = newSViv(-(IV)uv);
11318               else
11319                 floatit = TRUE;
11320             } else
11321               floatit = TRUE;
11322         }
11323         if (floatit) {
11324             STORE_NUMERIC_LOCAL_SET_STANDARD();
11325             /* terminate the string */
11326             *d = '\0';
11327             nv = Atof(PL_tokenbuf);
11328             RESTORE_NUMERIC_LOCAL();
11329             sv = newSVnv(nv);
11330         }
11331
11332         if ( floatit
11333              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11334             const char *const key = floatit ? "float" : "integer";
11335             const STRLEN keylen = floatit ? 5 : 7;
11336             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11337                                 key, keylen, sv, NULL, NULL, 0);
11338         }
11339         break;
11340
11341     /* if it starts with a v, it could be a v-string */
11342     case 'v':
11343 vstring:
11344                 sv = newSV(5); /* preallocate storage space */
11345                 ENTER_with_name("scan_vstring");
11346                 SAVEFREESV(sv);
11347                 s = scan_vstring(s, PL_bufend, sv);
11348                 SvREFCNT_inc_simple_void_NN(sv);
11349                 LEAVE_with_name("scan_vstring");
11350         break;
11351     }
11352
11353     /* make the op for the constant and return */
11354
11355     if (sv)
11356         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11357     else
11358         lvalp->opval = NULL;
11359
11360     return (char *)s;
11361 }
11362
11363 STATIC char *
11364 S_scan_formline(pTHX_ char *s)
11365 {
11366     dVAR;
11367     char *eol;
11368     char *t;
11369     SV * const stuff = newSVpvs("");
11370     bool needargs = FALSE;
11371     bool eofmt = FALSE;
11372 #ifdef PERL_MAD
11373     char *tokenstart = s;
11374     SV* savewhite = NULL;
11375
11376     if (PL_madskills) {
11377         savewhite = PL_thiswhite;
11378         PL_thiswhite = 0;
11379     }
11380 #endif
11381
11382     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11383
11384     while (!needargs) {
11385         if (*s == '.') {
11386             t = s+1;
11387 #ifdef PERL_STRICT_CR
11388             while (SPACE_OR_TAB(*t))
11389                 t++;
11390 #else
11391             while (SPACE_OR_TAB(*t) || *t == '\r')
11392                 t++;
11393 #endif
11394             if (*t == '\n' || t == PL_bufend) {
11395                 eofmt = TRUE;
11396                 break;
11397             }
11398         }
11399         eol = (char *) memchr(s,'\n',PL_bufend-s);
11400         if (!eol++)
11401                 eol = PL_bufend;
11402         if (*s != '#') {
11403             for (t = s; t < eol; t++) {
11404                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11405                     needargs = FALSE;
11406                     goto enough;        /* ~~ must be first line in formline */
11407                 }
11408                 if (*t == '@' || *t == '^')
11409                     needargs = TRUE;
11410             }
11411             if (eol > s) {
11412                 sv_catpvn(stuff, s, eol-s);
11413 #ifndef PERL_STRICT_CR
11414                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11415                     char *end = SvPVX(stuff) + SvCUR(stuff);
11416                     end[-2] = '\n';
11417                     end[-1] = '\0';
11418                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11419                 }
11420 #endif
11421             }
11422             else
11423               break;
11424         }
11425         s = (char*)eol;
11426         if ((PL_rsfp || PL_parser->filtered)
11427          && PL_parser->form_lex_state == LEX_NORMAL) {
11428             bool got_some;
11429 #ifdef PERL_MAD
11430             if (PL_madskills) {
11431                 if (PL_thistoken)
11432                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11433                 else
11434                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11435             }
11436 #endif
11437             PL_bufptr = PL_bufend;
11438             COPLINE_INC_WITH_HERELINES;
11439             got_some = lex_next_chunk(0);
11440             CopLINE_dec(PL_curcop);
11441             s = PL_bufptr;
11442 #ifdef PERL_MAD
11443             tokenstart = PL_bufptr;
11444 #endif
11445             if (!got_some)
11446                 break;
11447         }
11448         incline(s);
11449     }
11450   enough:
11451     if (!SvCUR(stuff) || needargs)
11452         PL_lex_state = PL_parser->form_lex_state;
11453     if (SvCUR(stuff)) {
11454         PL_expect = XSTATE;
11455         if (needargs) {
11456             const char *s2 = s;
11457             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11458                 || *s2 == 013)
11459                 s2++;
11460             if (*s2 == '{') {
11461                 start_force(PL_curforce);
11462                 PL_expect = XTERMBLOCK;
11463                 NEXTVAL_NEXTTOKE.ival = 0;
11464                 force_next(DO);
11465             }
11466             start_force(PL_curforce);
11467             NEXTVAL_NEXTTOKE.ival = 0;
11468             force_next(FORMLBRACK);
11469         }
11470         if (!IN_BYTES) {
11471             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11472                 SvUTF8_on(stuff);
11473             else if (PL_encoding)
11474                 sv_recode_to_utf8(stuff, PL_encoding);
11475         }
11476         start_force(PL_curforce);
11477         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11478         force_next(THING);
11479     }
11480     else {
11481         SvREFCNT_dec(stuff);
11482         if (eofmt)
11483             PL_lex_formbrack = 0;
11484     }
11485 #ifdef PERL_MAD
11486     if (PL_madskills) {
11487         if (PL_thistoken)
11488             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11489         else
11490             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11491         PL_thiswhite = savewhite;
11492     }
11493 #endif
11494     return s;
11495 }
11496
11497 I32
11498 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11499 {
11500     dVAR;
11501     const I32 oldsavestack_ix = PL_savestack_ix;
11502     CV* const outsidecv = PL_compcv;
11503
11504     SAVEI32(PL_subline);
11505     save_item(PL_subname);
11506     SAVESPTR(PL_compcv);
11507
11508     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11509     CvFLAGS(PL_compcv) |= flags;
11510
11511     PL_subline = CopLINE(PL_curcop);
11512     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11513     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11514     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11515     if (outsidecv && CvPADLIST(outsidecv))
11516         CvPADLIST(PL_compcv)->xpadl_outid =
11517             PadlistNAMES(CvPADLIST(outsidecv));
11518
11519     return oldsavestack_ix;
11520 }
11521
11522 static int
11523 S_yywarn(pTHX_ const char *const s, U32 flags)
11524 {
11525     dVAR;
11526
11527     PERL_ARGS_ASSERT_YYWARN;
11528
11529     PL_in_eval |= EVAL_WARNONLY;
11530     yyerror_pv(s, flags);
11531     PL_in_eval &= ~EVAL_WARNONLY;
11532     return 0;
11533 }
11534
11535 int
11536 Perl_yyerror(pTHX_ const char *const s)
11537 {
11538     PERL_ARGS_ASSERT_YYERROR;
11539     return yyerror_pvn(s, strlen(s), 0);
11540 }
11541
11542 int
11543 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11544 {
11545     PERL_ARGS_ASSERT_YYERROR_PV;
11546     return yyerror_pvn(s, strlen(s), flags);
11547 }
11548
11549 int
11550 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11551 {
11552     dVAR;
11553     const char *context = NULL;
11554     int contlen = -1;
11555     SV *msg;
11556     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11557     int yychar  = PL_parser->yychar;
11558
11559     PERL_ARGS_ASSERT_YYERROR_PVN;
11560
11561     if (!yychar || (yychar == ';' && !PL_rsfp))
11562         sv_catpvs(where_sv, "at EOF");
11563     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11564       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11565       PL_oldbufptr != PL_bufptr) {
11566         /*
11567                 Only for NetWare:
11568                 The code below is removed for NetWare because it abends/crashes on NetWare
11569                 when the script has error such as not having the closing quotes like:
11570                     if ($var eq "value)
11571                 Checking of white spaces is anyway done in NetWare code.
11572         */
11573 #ifndef NETWARE
11574         while (isSPACE(*PL_oldoldbufptr))
11575             PL_oldoldbufptr++;
11576 #endif
11577         context = PL_oldoldbufptr;
11578         contlen = PL_bufptr - PL_oldoldbufptr;
11579     }
11580     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11581       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11582         /*
11583                 Only for NetWare:
11584                 The code below is removed for NetWare because it abends/crashes on NetWare
11585                 when the script has error such as not having the closing quotes like:
11586                     if ($var eq "value)
11587                 Checking of white spaces is anyway done in NetWare code.
11588         */
11589 #ifndef NETWARE
11590         while (isSPACE(*PL_oldbufptr))
11591             PL_oldbufptr++;
11592 #endif
11593         context = PL_oldbufptr;
11594         contlen = PL_bufptr - PL_oldbufptr;
11595     }
11596     else if (yychar > 255)
11597         sv_catpvs(where_sv, "next token ???");
11598     else if (yychar == -2) { /* YYEMPTY */
11599         if (PL_lex_state == LEX_NORMAL ||
11600            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11601             sv_catpvs(where_sv, "at end of line");
11602         else if (PL_lex_inpat)
11603             sv_catpvs(where_sv, "within pattern");
11604         else
11605             sv_catpvs(where_sv, "within string");
11606     }
11607     else {
11608         sv_catpvs(where_sv, "next char ");
11609         if (yychar < 32)
11610             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11611         else if (isPRINT_LC(yychar)) {
11612             const char string = yychar;
11613             sv_catpvn(where_sv, &string, 1);
11614         }
11615         else
11616             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11617     }
11618     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11619     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11620         OutCopFILE(PL_curcop),
11621         (IV)(PL_parser->preambling == NOLINE
11622                ? CopLINE(PL_curcop)
11623                : PL_parser->preambling));
11624     if (context)
11625         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11626                              UTF8fARG(UTF, contlen, context));
11627     else
11628         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11629     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11630         Perl_sv_catpvf(aTHX_ msg,
11631         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11632                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11633         PL_multi_end = 0;
11634     }
11635     if (PL_in_eval & EVAL_WARNONLY) {
11636         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11637     }
11638     else
11639         qerror(msg);
11640     if (PL_error_count >= 10) {
11641         SV * errsv;
11642         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11643             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11644                        SVfARG(errsv), OutCopFILE(PL_curcop));
11645         else
11646             Perl_croak(aTHX_ "%s has too many errors.\n",
11647             OutCopFILE(PL_curcop));
11648     }
11649     PL_in_my = 0;
11650     PL_in_my_stash = NULL;
11651     return 0;
11652 }
11653
11654 STATIC char*
11655 S_swallow_bom(pTHX_ U8 *s)
11656 {
11657     dVAR;
11658     const STRLEN slen = SvCUR(PL_linestr);
11659
11660     PERL_ARGS_ASSERT_SWALLOW_BOM;
11661
11662     switch (s[0]) {
11663     case 0xFF:
11664         if (s[1] == 0xFE) {
11665             /* UTF-16 little-endian? (or UTF-32LE?) */
11666             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11667                 /* diag_listed_as: Unsupported script encoding %s */
11668                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11669 #ifndef PERL_NO_UTF16_FILTER
11670             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11671             s += 2;
11672             if (PL_bufend > (char*)s) {
11673                 s = add_utf16_textfilter(s, TRUE);
11674             }
11675 #else
11676             /* diag_listed_as: Unsupported script encoding %s */
11677             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11678 #endif
11679         }
11680         break;
11681     case 0xFE:
11682         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11683 #ifndef PERL_NO_UTF16_FILTER
11684             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11685             s += 2;
11686             if (PL_bufend > (char *)s) {
11687                 s = add_utf16_textfilter(s, FALSE);
11688             }
11689 #else
11690             /* diag_listed_as: Unsupported script encoding %s */
11691             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11692 #endif
11693         }
11694         break;
11695     case BOM_UTF8_FIRST_BYTE: {
11696         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11697         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11698             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11699             s += len + 1;                      /* UTF-8 */
11700         }
11701         break;
11702     }
11703     case 0:
11704         if (slen > 3) {
11705              if (s[1] == 0) {
11706                   if (s[2] == 0xFE && s[3] == 0xFF) {
11707                        /* UTF-32 big-endian */
11708                        /* diag_listed_as: Unsupported script encoding %s */
11709                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11710                   }
11711              }
11712              else if (s[2] == 0 && s[3] != 0) {
11713                   /* Leading bytes
11714                    * 00 xx 00 xx
11715                    * are a good indicator of UTF-16BE. */
11716 #ifndef PERL_NO_UTF16_FILTER
11717                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11718                   s = add_utf16_textfilter(s, FALSE);
11719 #else
11720                   /* diag_listed_as: Unsupported script encoding %s */
11721                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11722 #endif
11723              }
11724         }
11725
11726     default:
11727          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11728                   /* Leading bytes
11729                    * xx 00 xx 00
11730                    * are a good indicator of UTF-16LE. */
11731 #ifndef PERL_NO_UTF16_FILTER
11732               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11733               s = add_utf16_textfilter(s, TRUE);
11734 #else
11735               /* diag_listed_as: Unsupported script encoding %s */
11736               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11737 #endif
11738          }
11739     }
11740     return (char*)s;
11741 }
11742
11743
11744 #ifndef PERL_NO_UTF16_FILTER
11745 static I32
11746 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11747 {
11748     dVAR;
11749     SV *const filter = FILTER_DATA(idx);
11750     /* We re-use this each time round, throwing the contents away before we
11751        return.  */
11752     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11753     SV *const utf8_buffer = filter;
11754     IV status = IoPAGE(filter);
11755     const bool reverse = cBOOL(IoLINES(filter));
11756     I32 retval;
11757
11758     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11759
11760     /* As we're automatically added, at the lowest level, and hence only called
11761        from this file, we can be sure that we're not called in block mode. Hence
11762        don't bother writing code to deal with block mode.  */
11763     if (maxlen) {
11764         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11765     }
11766     if (status < 0) {
11767         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11768     }
11769     DEBUG_P(PerlIO_printf(Perl_debug_log,
11770                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11771                           FPTR2DPTR(void *, S_utf16_textfilter),
11772                           reverse ? 'l' : 'b', idx, maxlen, status,
11773                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11774
11775     while (1) {
11776         STRLEN chars;
11777         STRLEN have;
11778         I32 newlen;
11779         U8 *end;
11780         /* First, look in our buffer of existing UTF-8 data:  */
11781         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11782
11783         if (nl) {
11784             ++nl;
11785         } else if (status == 0) {
11786             /* EOF */
11787             IoPAGE(filter) = 0;
11788             nl = SvEND(utf8_buffer);
11789         }
11790         if (nl) {
11791             STRLEN got = nl - SvPVX(utf8_buffer);
11792             /* Did we have anything to append?  */
11793             retval = got != 0;
11794             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11795             /* Everything else in this code works just fine if SVp_POK isn't
11796                set.  This, however, needs it, and we need it to work, else
11797                we loop infinitely because the buffer is never consumed.  */
11798             sv_chop(utf8_buffer, nl);
11799             break;
11800         }
11801
11802         /* OK, not a complete line there, so need to read some more UTF-16.
11803            Read an extra octect if the buffer currently has an odd number. */
11804         while (1) {
11805             if (status <= 0)
11806                 break;
11807             if (SvCUR(utf16_buffer) >= 2) {
11808                 /* Location of the high octet of the last complete code point.
11809                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11810                    *coupled* with all the benefits of partial reads and
11811                    endianness.  */
11812                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11813                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11814
11815                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11816                     break;
11817                 }
11818
11819                 /* We have the first half of a surrogate. Read more.  */
11820                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11821             }
11822
11823             status = FILTER_READ(idx + 1, utf16_buffer,
11824                                  160 + (SvCUR(utf16_buffer) & 1));
11825             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11826             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11827             if (status < 0) {
11828                 /* Error */
11829                 IoPAGE(filter) = status;
11830                 return status;
11831             }
11832         }
11833
11834         chars = SvCUR(utf16_buffer) >> 1;
11835         have = SvCUR(utf8_buffer);
11836         SvGROW(utf8_buffer, have + chars * 3 + 1);
11837
11838         if (reverse) {
11839             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11840                                          (U8*)SvPVX_const(utf8_buffer) + have,
11841                                          chars * 2, &newlen);
11842         } else {
11843             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11844                                 (U8*)SvPVX_const(utf8_buffer) + have,
11845                                 chars * 2, &newlen);
11846         }
11847         SvCUR_set(utf8_buffer, have + newlen);
11848         *end = '\0';
11849
11850         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11851            it's private to us, and utf16_to_utf8{,reversed} take a
11852            (pointer,length) pair, rather than a NUL-terminated string.  */
11853         if(SvCUR(utf16_buffer) & 1) {
11854             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11855             SvCUR_set(utf16_buffer, 1);
11856         } else {
11857             SvCUR_set(utf16_buffer, 0);
11858         }
11859     }
11860     DEBUG_P(PerlIO_printf(Perl_debug_log,
11861                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11862                           status,
11863                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11864     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11865     return retval;
11866 }
11867
11868 static U8 *
11869 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11870 {
11871     SV *filter = filter_add(S_utf16_textfilter, NULL);
11872
11873     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11874
11875     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11876     sv_setpvs(filter, "");
11877     IoLINES(filter) = reversed;
11878     IoPAGE(filter) = 1; /* Not EOF */
11879
11880     /* Sadly, we have to return a valid pointer, come what may, so we have to
11881        ignore any error return from this.  */
11882     SvCUR_set(PL_linestr, 0);
11883     if (FILTER_READ(0, PL_linestr, 0)) {
11884         SvUTF8_on(PL_linestr);
11885     } else {
11886         SvUTF8_on(PL_linestr);
11887     }
11888     PL_bufend = SvEND(PL_linestr);
11889     return (U8*)SvPVX(PL_linestr);
11890 }
11891 #endif
11892
11893 /*
11894 Returns a pointer to the next character after the parsed
11895 vstring, as well as updating the passed in sv.
11896
11897 Function must be called like
11898
11899         sv = sv_2mortal(newSV(5));
11900         s = scan_vstring(s,e,sv);
11901
11902 where s and e are the start and end of the string.
11903 The sv should already be large enough to store the vstring
11904 passed in, for performance reasons.
11905
11906 This function may croak if fatal warnings are enabled in the
11907 calling scope, hence the sv_2mortal in the example (to prevent
11908 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11909 sv_2mortal.
11910
11911 */
11912
11913 char *
11914 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11915 {
11916     dVAR;
11917     const char *pos = s;
11918     const char *start = s;
11919
11920     PERL_ARGS_ASSERT_SCAN_VSTRING;
11921
11922     if (*pos == 'v') pos++;  /* get past 'v' */
11923     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11924         pos++;
11925     if ( *pos != '.') {
11926         /* this may not be a v-string if followed by => */
11927         const char *next = pos;
11928         while (next < e && isSPACE(*next))
11929             ++next;
11930         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11931             /* return string not v-string */
11932             sv_setpvn(sv,(char *)s,pos-s);
11933             return (char *)pos;
11934         }
11935     }
11936
11937     if (!isALPHA(*pos)) {
11938         U8 tmpbuf[UTF8_MAXBYTES+1];
11939
11940         if (*s == 'v')
11941             s++;  /* get past 'v' */
11942
11943         sv_setpvs(sv, "");
11944
11945         for (;;) {
11946             /* this is atoi() that tolerates underscores */
11947             U8 *tmpend;
11948             UV rev = 0;
11949             const char *end = pos;
11950             UV mult = 1;
11951             while (--end >= s) {
11952                 if (*end != '_') {
11953                     const UV orev = rev;
11954                     rev += (*end - '0') * mult;
11955                     mult *= 10;
11956                     if (orev > rev)
11957                         /* diag_listed_as: Integer overflow in %s number */
11958                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11959                                          "Integer overflow in decimal number");
11960                 }
11961             }
11962 #ifdef EBCDIC
11963             if (rev > 0x7FFFFFFF)
11964                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11965 #endif
11966             /* Append native character for the rev point */
11967             tmpend = uvchr_to_utf8(tmpbuf, rev);
11968             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11969             if (!UVCHR_IS_INVARIANT(rev))
11970                  SvUTF8_on(sv);
11971             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11972                  s = ++pos;
11973             else {
11974                  s = pos;
11975                  break;
11976             }
11977             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11978                  pos++;
11979         }
11980         SvPOK_on(sv);
11981         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11982         SvRMAGICAL_on(sv);
11983     }
11984     return (char *)s;
11985 }
11986
11987 int
11988 Perl_keyword_plugin_standard(pTHX_
11989         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11990 {
11991     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11992     PERL_UNUSED_CONTEXT;
11993     PERL_UNUSED_ARG(keyword_ptr);
11994     PERL_UNUSED_ARG(keyword_len);
11995     PERL_UNUSED_ARG(op_ptr);
11996     return KEYWORD_PLUGIN_DECLINE;
11997 }
11998
11999 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12000 static void
12001 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12002 {
12003     SAVEI32(PL_lex_brackets);
12004     if (PL_lex_brackets > 100)
12005         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12006     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12007     SAVEI32(PL_lex_allbrackets);
12008     PL_lex_allbrackets = 0;
12009     SAVEI8(PL_lex_fakeeof);
12010     PL_lex_fakeeof = (U8)fakeeof;
12011     if(yyparse(gramtype) && !PL_parser->error_count)
12012         qerror(Perl_mess(aTHX_ "Parse error"));
12013 }
12014
12015 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12016 static OP *
12017 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12018 {
12019     OP *o;
12020     ENTER;
12021     SAVEVPTR(PL_eval_root);
12022     PL_eval_root = NULL;
12023     parse_recdescent(gramtype, fakeeof);
12024     o = PL_eval_root;
12025     LEAVE;
12026     return o;
12027 }
12028
12029 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12030 static OP *
12031 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12032 {
12033     OP *exprop;
12034     if (flags & ~PARSE_OPTIONAL)
12035         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12036     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12037     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12038         if (!PL_parser->error_count)
12039             qerror(Perl_mess(aTHX_ "Parse error"));
12040         exprop = newOP(OP_NULL, 0);
12041     }
12042     return exprop;
12043 }
12044
12045 /*
12046 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12047
12048 Parse a Perl arithmetic expression.  This may contain operators of precedence
12049 down to the bit shift operators.  The expression must be followed (and thus
12050 terminated) either by a comparison or lower-precedence operator or by
12051 something that would normally terminate an expression such as semicolon.
12052 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12053 otherwise it is mandatory.  It is up to the caller to ensure that the
12054 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12055 the source of the code to be parsed and the lexical context for the
12056 expression.
12057
12058 The op tree representing the expression is returned.  If an optional
12059 expression is absent, a null pointer is returned, otherwise the pointer
12060 will be non-null.
12061
12062 If an error occurs in parsing or compilation, in most cases a valid op
12063 tree is returned anyway.  The error is reflected in the parser state,
12064 normally resulting in a single exception at the top level of parsing
12065 which covers all the compilation errors that occurred.  Some compilation
12066 errors, however, will throw an exception immediately.
12067
12068 =cut
12069 */
12070
12071 OP *
12072 Perl_parse_arithexpr(pTHX_ U32 flags)
12073 {
12074     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12075 }
12076
12077 /*
12078 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12079
12080 Parse a Perl term expression.  This may contain operators of precedence
12081 down to the assignment operators.  The expression must be followed (and thus
12082 terminated) either by a comma or lower-precedence operator or by
12083 something that would normally terminate an expression such as semicolon.
12084 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12085 otherwise it is mandatory.  It is up to the caller to ensure that the
12086 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12087 the source of the code to be parsed and the lexical context for the
12088 expression.
12089
12090 The op tree representing the expression is returned.  If an optional
12091 expression is absent, a null pointer is returned, otherwise the pointer
12092 will be non-null.
12093
12094 If an error occurs in parsing or compilation, in most cases a valid op
12095 tree is returned anyway.  The error is reflected in the parser state,
12096 normally resulting in a single exception at the top level of parsing
12097 which covers all the compilation errors that occurred.  Some compilation
12098 errors, however, will throw an exception immediately.
12099
12100 =cut
12101 */
12102
12103 OP *
12104 Perl_parse_termexpr(pTHX_ U32 flags)
12105 {
12106     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12107 }
12108
12109 /*
12110 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12111
12112 Parse a Perl list expression.  This may contain operators of precedence
12113 down to the comma operator.  The expression must be followed (and thus
12114 terminated) either by a low-precedence logic operator such as C<or> or by
12115 something that would normally terminate an expression such as semicolon.
12116 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12117 otherwise it is mandatory.  It is up to the caller to ensure that the
12118 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12119 the source of the code to be parsed and the lexical context for the
12120 expression.
12121
12122 The op tree representing the expression is returned.  If an optional
12123 expression is absent, a null pointer is returned, otherwise the pointer
12124 will be non-null.
12125
12126 If an error occurs in parsing or compilation, in most cases a valid op
12127 tree is returned anyway.  The error is reflected in the parser state,
12128 normally resulting in a single exception at the top level of parsing
12129 which covers all the compilation errors that occurred.  Some compilation
12130 errors, however, will throw an exception immediately.
12131
12132 =cut
12133 */
12134
12135 OP *
12136 Perl_parse_listexpr(pTHX_ U32 flags)
12137 {
12138     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12139 }
12140
12141 /*
12142 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12143
12144 Parse a single complete Perl expression.  This allows the full
12145 expression grammar, including the lowest-precedence operators such
12146 as C<or>.  The expression must be followed (and thus terminated) by a
12147 token that an expression would normally be terminated by: end-of-file,
12148 closing bracketing punctuation, semicolon, or one of the keywords that
12149 signals a postfix expression-statement modifier.  If I<flags> includes
12150 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12151 mandatory.  It is up to the caller to ensure that the dynamic parser
12152 state (L</PL_parser> et al) is correctly set to reflect the source of
12153 the code to be parsed and the lexical context for the expression.
12154
12155 The op tree representing the expression is returned.  If an optional
12156 expression is absent, a null pointer is returned, otherwise the pointer
12157 will be non-null.
12158
12159 If an error occurs in parsing or compilation, in most cases a valid op
12160 tree is returned anyway.  The error is reflected in the parser state,
12161 normally resulting in a single exception at the top level of parsing
12162 which covers all the compilation errors that occurred.  Some compilation
12163 errors, however, will throw an exception immediately.
12164
12165 =cut
12166 */
12167
12168 OP *
12169 Perl_parse_fullexpr(pTHX_ U32 flags)
12170 {
12171     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12172 }
12173
12174 /*
12175 =for apidoc Amx|OP *|parse_block|U32 flags
12176
12177 Parse a single complete Perl code block.  This consists of an opening
12178 brace, a sequence of statements, and a closing brace.  The block
12179 constitutes a lexical scope, so C<my> variables and various compile-time
12180 effects can be contained within it.  It is up to the caller to ensure
12181 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12182 reflect the source of the code to be parsed and the lexical context for
12183 the statement.
12184
12185 The op tree representing the code block is returned.  This is always a
12186 real op, never a null pointer.  It will normally be a C<lineseq> list,
12187 including C<nextstate> or equivalent ops.  No ops to construct any kind
12188 of runtime scope are included by virtue of it being a block.
12189
12190 If an error occurs in parsing or compilation, in most cases a valid op
12191 tree (most likely null) is returned anyway.  The error is reflected in
12192 the parser state, normally resulting in a single exception at the top
12193 level of parsing which covers all the compilation errors that occurred.
12194 Some compilation errors, however, will throw an exception immediately.
12195
12196 The I<flags> parameter is reserved for future use, and must always
12197 be zero.
12198
12199 =cut
12200 */
12201
12202 OP *
12203 Perl_parse_block(pTHX_ U32 flags)
12204 {
12205     if (flags)
12206         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12207     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12208 }
12209
12210 /*
12211 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12212
12213 Parse a single unadorned Perl statement.  This may be a normal imperative
12214 statement or a declaration that has compile-time effect.  It does not
12215 include any label or other affixture.  It is up to the caller to ensure
12216 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12217 reflect the source of the code to be parsed and the lexical context for
12218 the statement.
12219
12220 The op tree representing the statement is returned.  This may be a
12221 null pointer if the statement is null, for example if it was actually
12222 a subroutine definition (which has compile-time side effects).  If not
12223 null, it will be ops directly implementing the statement, suitable to
12224 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12225 equivalent op (except for those embedded in a scope contained entirely
12226 within the statement).
12227
12228 If an error occurs in parsing or compilation, in most cases a valid op
12229 tree (most likely null) is returned anyway.  The error is reflected in
12230 the parser state, normally resulting in a single exception at the top
12231 level of parsing which covers all the compilation errors that occurred.
12232 Some compilation errors, however, will throw an exception immediately.
12233
12234 The I<flags> parameter is reserved for future use, and must always
12235 be zero.
12236
12237 =cut
12238 */
12239
12240 OP *
12241 Perl_parse_barestmt(pTHX_ U32 flags)
12242 {
12243     if (flags)
12244         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12245     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12246 }
12247
12248 /*
12249 =for apidoc Amx|SV *|parse_label|U32 flags
12250
12251 Parse a single label, possibly optional, of the type that may prefix a
12252 Perl statement.  It is up to the caller to ensure that the dynamic parser
12253 state (L</PL_parser> et al) is correctly set to reflect the source of
12254 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12255 label is optional, otherwise it is mandatory.
12256
12257 The name of the label is returned in the form of a fresh scalar.  If an
12258 optional label is absent, a null pointer is returned.
12259
12260 If an error occurs in parsing, which can only occur if the label is
12261 mandatory, a valid label is returned anyway.  The error is reflected in
12262 the parser state, normally resulting in a single exception at the top
12263 level of parsing which covers all the compilation errors that occurred.
12264
12265 =cut
12266 */
12267
12268 SV *
12269 Perl_parse_label(pTHX_ U32 flags)
12270 {
12271     if (flags & ~PARSE_OPTIONAL)
12272         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12273     if (PL_lex_state == LEX_KNOWNEXT) {
12274         PL_parser->yychar = yylex();
12275         if (PL_parser->yychar == LABEL) {
12276             char * const lpv = pl_yylval.pval;
12277             STRLEN llen = strlen(lpv);
12278             PL_parser->yychar = YYEMPTY;
12279             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12280         } else {
12281             yyunlex();
12282             goto no_label;
12283         }
12284     } else {
12285         char *s, *t;
12286         STRLEN wlen, bufptr_pos;
12287         lex_read_space(0);
12288         t = s = PL_bufptr;
12289         if (!isIDFIRST_lazy_if(s, UTF))
12290             goto no_label;
12291         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12292         if (word_takes_any_delimeter(s, wlen))
12293             goto no_label;
12294         bufptr_pos = s - SvPVX(PL_linestr);
12295         PL_bufptr = t;
12296         lex_read_space(LEX_KEEP_PREVIOUS);
12297         t = PL_bufptr;
12298         s = SvPVX(PL_linestr) + bufptr_pos;
12299         if (t[0] == ':' && t[1] != ':') {
12300             PL_oldoldbufptr = PL_oldbufptr;
12301             PL_oldbufptr = s;
12302             PL_bufptr = t+1;
12303             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12304         } else {
12305             PL_bufptr = s;
12306             no_label:
12307             if (flags & PARSE_OPTIONAL) {
12308                 return NULL;
12309             } else {
12310                 qerror(Perl_mess(aTHX_ "Parse error"));
12311                 return newSVpvs("x");
12312             }
12313         }
12314     }
12315 }
12316
12317 /*
12318 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12319
12320 Parse a single complete Perl statement.  This may be a normal imperative
12321 statement or a declaration that has compile-time effect, and may include
12322 optional labels.  It is up to the caller to ensure that the dynamic
12323 parser state (L</PL_parser> et al) is correctly set to reflect the source
12324 of the code to be parsed and the lexical context for the statement.
12325
12326 The op tree representing the statement is returned.  This may be a
12327 null pointer if the statement is null, for example if it was actually
12328 a subroutine definition (which has compile-time side effects).  If not
12329 null, it will be the result of a L</newSTATEOP> call, normally including
12330 a C<nextstate> or equivalent op.
12331
12332 If an error occurs in parsing or compilation, in most cases a valid op
12333 tree (most likely null) is returned anyway.  The error is reflected in
12334 the parser state, normally resulting in a single exception at the top
12335 level of parsing which covers all the compilation errors that occurred.
12336 Some compilation errors, however, will throw an exception immediately.
12337
12338 The I<flags> parameter is reserved for future use, and must always
12339 be zero.
12340
12341 =cut
12342 */
12343
12344 OP *
12345 Perl_parse_fullstmt(pTHX_ U32 flags)
12346 {
12347     if (flags)
12348         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12349     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12350 }
12351
12352 /*
12353 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12354
12355 Parse a sequence of zero or more Perl statements.  These may be normal
12356 imperative statements, including optional labels, or declarations
12357 that have compile-time effect, or any mixture thereof.  The statement
12358 sequence ends when a closing brace or end-of-file is encountered in a
12359 place where a new statement could have validly started.  It is up to
12360 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12361 is correctly set to reflect the source of the code to be parsed and the
12362 lexical context for the statements.
12363
12364 The op tree representing the statement sequence is returned.  This may
12365 be a null pointer if the statements were all null, for example if there
12366 were no statements or if there were only subroutine definitions (which
12367 have compile-time side effects).  If not null, it will be a C<lineseq>
12368 list, normally including C<nextstate> or equivalent ops.
12369
12370 If an error occurs in parsing or compilation, in most cases a valid op
12371 tree is returned anyway.  The error is reflected in the parser state,
12372 normally resulting in a single exception at the top level of parsing
12373 which covers all the compilation errors that occurred.  Some compilation
12374 errors, however, will throw an exception immediately.
12375
12376 The I<flags> parameter is reserved for future use, and must always
12377 be zero.
12378
12379 =cut
12380 */
12381
12382 OP *
12383 Perl_parse_stmtseq(pTHX_ U32 flags)
12384 {
12385     OP *stmtseqop;
12386     I32 c;
12387     if (flags)
12388         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12389     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12390     c = lex_peek_unichar(0);
12391     if (c != -1 && c != /*{*/'}')
12392         qerror(Perl_mess(aTHX_ "Parse error"));
12393     return stmtseqop;
12394 }
12395
12396 #define lex_token_boundary() S_lex_token_boundary(aTHX)
12397 static void
12398 S_lex_token_boundary(pTHX)
12399 {
12400     PL_oldoldbufptr = PL_oldbufptr;
12401     PL_oldbufptr = PL_bufptr;
12402 }
12403
12404 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
12405 static OP *
12406 S_parse_opt_lexvar(pTHX)
12407 {
12408     I32 sigil, c;
12409     char *s, *d;
12410     OP *var;
12411     lex_token_boundary();
12412     sigil = lex_read_unichar(0);
12413     if (lex_peek_unichar(0) == '#') {
12414         qerror(Perl_mess(aTHX_ "Parse error"));
12415         return NULL;
12416     }
12417     lex_read_space(0);
12418     c = lex_peek_unichar(0);
12419     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
12420         return NULL;
12421     s = PL_bufptr;
12422     d = PL_tokenbuf + 1;
12423     PL_tokenbuf[0] = (char)sigil;
12424     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
12425     PL_bufptr = s;
12426     if (d == PL_tokenbuf+1)
12427         return NULL;
12428     *d = 0;
12429     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
12430                 OPf_MOD | (OPpLVAL_INTRO<<8));
12431     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
12432     return var;
12433 }
12434
12435 OP *
12436 Perl_parse_subsignature(pTHX)
12437 {
12438     I32 c;
12439     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
12440     OP *initops = NULL;
12441     lex_read_space(0);
12442     c = lex_peek_unichar(0);
12443     while (c != /*(*/')') {
12444         switch (c) {
12445             case '$': {
12446                 OP *var, *expr;
12447                 if (prev_type == 2)
12448                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12449                 var = parse_opt_lexvar();
12450                 expr = var ?
12451                     newBINOP(OP_AELEM, 0,
12452                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
12453                             OP_RV2AV),
12454                         newSVOP(OP_CONST, 0, newSViv(pos))) :
12455                     NULL;
12456                 lex_read_space(0);
12457                 c = lex_peek_unichar(0);
12458                 if (c == '=') {
12459                     lex_token_boundary();
12460                     lex_read_unichar(0);
12461                     lex_read_space(0);
12462                     c = lex_peek_unichar(0);
12463                     if (c == ',' || c == /*(*/')') {
12464                         if (var)
12465                             qerror(Perl_mess(aTHX_ "Optional parameter "
12466                                     "lacks default expression"));
12467                     } else {
12468                         OP *defexpr = parse_termexpr(0);
12469                         if (defexpr->op_type == OP_UNDEF &&
12470                                 !(defexpr->op_flags & OPf_KIDS)) {
12471                             op_free(defexpr);
12472                         } else {
12473                             OP *ifop =
12474                                 newBINOP(OP_GE, 0,
12475                                     scalar(newUNOP(OP_RV2AV, 0,
12476                                             newGVOP(OP_GV, 0, PL_defgv))),
12477                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
12478                             expr = var ?
12479                                 newCONDOP(0, ifop, expr, defexpr) :
12480                                 newLOGOP(OP_OR, 0, ifop, defexpr);
12481                         }
12482                     }
12483                     prev_type = 1;
12484                 } else {
12485                     if (prev_type == 1)
12486                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
12487                                 "follows optional parameter"));
12488                     prev_type = 0;
12489                     min_arity = pos + 1;
12490                 }
12491                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
12492                 if (expr)
12493                     initops = op_append_list(OP_LINESEQ, initops,
12494                                 newSTATEOP(0, NULL, expr));
12495                 max_arity = ++pos;
12496             } break;
12497             case '@':
12498             case '%': {
12499                 OP *var;
12500                 if (prev_type == 2)
12501                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12502                 var = parse_opt_lexvar();
12503                 if (c == '%') {
12504                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
12505                             newBINOP(OP_BIT_AND, 0,
12506                                 scalar(newUNOP(OP_RV2AV, 0,
12507                                     newGVOP(OP_GV, 0, PL_defgv))),
12508                                 newSVOP(OP_CONST, 0, newSViv(1))),
12509                             newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12510                                 newSVOP(OP_CONST, 0,
12511                                     newSVpvs("Odd name/value argument "
12512                                         "for subroutine"))));
12513                     if (pos != min_arity)
12514                         chkop = newLOGOP(OP_AND, 0,
12515                                     newBINOP(OP_GT, 0,
12516                                         scalar(newUNOP(OP_RV2AV, 0,
12517                                             newGVOP(OP_GV, 0, PL_defgv))),
12518                                         newSVOP(OP_CONST, 0, newSViv(pos))),
12519                                     chkop);
12520                     initops = op_append_list(OP_LINESEQ,
12521                                 newSTATEOP(0, NULL, chkop),
12522                                 initops);
12523                 }
12524                 if (var) {
12525                     OP *slice = pos ?
12526                         op_prepend_elem(OP_ASLICE,
12527                             newOP(OP_PUSHMARK, 0),
12528                             newLISTOP(OP_ASLICE, 0,
12529                                 list(newRANGE(0,
12530                                     newSVOP(OP_CONST, 0, newSViv(pos)),
12531                                     newUNOP(OP_AV2ARYLEN, 0,
12532                                         ref(newUNOP(OP_RV2AV, 0,
12533                                                 newGVOP(OP_GV, 0, PL_defgv)),
12534                                             OP_AV2ARYLEN)))),
12535                                 ref(newUNOP(OP_RV2AV, 0,
12536                                         newGVOP(OP_GV, 0, PL_defgv)),
12537                                     OP_ASLICE))) :
12538                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12539                     initops = op_append_list(OP_LINESEQ, initops,
12540                         newSTATEOP(0, NULL,
12541                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
12542                 }
12543                 prev_type = 2;
12544                 max_arity = -1;
12545             } break;
12546             default:
12547                 parse_error:
12548                 qerror(Perl_mess(aTHX_ "Parse error"));
12549                 return NULL;
12550         }
12551         lex_read_space(0);
12552         c = lex_peek_unichar(0);
12553         switch (c) {
12554             case /*(*/')': break;
12555             case ',':
12556                 do {
12557                     lex_token_boundary();
12558                     lex_read_unichar(0);
12559                     lex_read_space(0);
12560                     c = lex_peek_unichar(0);
12561                 } while (c == ',');
12562                 break;
12563             default:
12564                 goto parse_error;
12565         }
12566     }
12567     if (min_arity != 0) {
12568         initops = op_append_list(OP_LINESEQ,
12569             newSTATEOP(0, NULL,
12570                 newLOGOP(OP_OR, 0,
12571                     newBINOP(OP_GE, 0,
12572                         scalar(newUNOP(OP_RV2AV, 0,
12573                             newGVOP(OP_GV, 0, PL_defgv))),
12574                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
12575                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12576                         newSVOP(OP_CONST, 0,
12577                             newSVpvs("Too few arguments for subroutine"))))),
12578             initops);
12579     }
12580     if (max_arity != -1) {
12581         initops = op_append_list(OP_LINESEQ,
12582             newSTATEOP(0, NULL,
12583                 newLOGOP(OP_OR, 0,
12584                     newBINOP(OP_LE, 0,
12585                         scalar(newUNOP(OP_RV2AV, 0,
12586                             newGVOP(OP_GV, 0, PL_defgv))),
12587                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
12588                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12589                         newSVOP(OP_CONST, 0,
12590                             newSVpvs("Too many arguments for subroutine"))))),
12591             initops);
12592     }
12593     return initops;
12594 }
12595
12596 /*
12597  * Local variables:
12598  * c-indentation-style: bsd
12599  * c-basic-offset: 4
12600  * indent-tabs-mode: nil
12601  * End:
12602  *
12603  * ex: set ts=8 sts=4 sw=4 et:
12604  */