toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #include "keywords.h"
 186
 187 /* CLINE is a macro that ensures PL_copline has a sane value */
 188
 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 190
 191 #ifdef PERL_MAD
 192 #  define SKIPSPACE0(s) skipspace0(s)
 193 #  define SKIPSPACE1(s) skipspace1(s)
 194 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 195 #  define PEEKSPACE(s) skipspace2(s,0)
 196 #else
 197 #  define SKIPSPACE0(s) skipspace(s)
 198 #  define SKIPSPACE1(s) skipspace(s)
 199 #  define SKIPSPACE2(s,tsv) skipspace(s)
 200 #  define PEEKSPACE(s) skipspace(s)
 201 #endif
 202
 203 /*
 204  * Convenience functions to return different tokens and prime the
 205  * lexer for the next token.  They all take an argument.
 206  *
 207  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 208  * OPERATOR     : generic operator
 209  * AOPERATOR    : assignment operator
 210  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 211  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 212  * PREREF       : *EXPR where EXPR is not a simple identifier
 213  * TERM         : expression term
 214  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 215  * LOOPX        : loop exiting command (goto, last, dump, etc)
 216  * FTST         : file test operator
 217  * FUN0         : zero-argument function
 218  * FUN0OP       : zero-argument function, with its op created in this file
 219  * FUN1         : not used, except for not, which isn't a UNIOP
 220  * BOop         : bitwise or or xor
 221  * BAop         : bitwise and
 222  * SHop         : shift operator
 223  * PWop         : power operator
 224  * PMop         : pattern-matching operator
 225  * Aop          : addition-level operator
 226  * Mop          : multiplication-level operator
 227  * Eop          : equality-testing operator
 228  * Rop          : relational operator <= != gt
 229  *
 230  * Also see LOP and lop() below.
 231  */
 232
 233 #ifdef DEBUGGING /* Serve -DT. */
 234 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 235 #else
 236 #   define REPORT(retval) (retval)
 237 #endif
 238
 239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 248 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 249 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 250 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 251 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 252 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 253 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 254 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 255 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 256 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 257 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 258 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 259 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 260 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 261
 262 /* This bit of chicanery makes a unary function followed by
 263  * a parenthesis into a function with one argument, highest precedence.
 264  * The UNIDOR macro is for unary functions that can be followed by the //
 265  * operator (such as C<shift // 0>).
 266  */
 267 #define UNI3(f,x,have_x) { \
 268         pl_yylval.ival = f; \
 269         if (have_x) PL_expect = x; \
 270         PL_bufptr = s; \
 271         PL_last_uni = PL_oldbufptr; \
 272         PL_last_lop_op = f; \
 273         if (*s == '(') \
 274             return REPORT( (int)FUNC1 ); \
 275         s = PEEKSPACE(s); \
 276         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 277         }
 278 #define UNI(f)    UNI3(f,XTERM,1)
 279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 280 #define UNIPROTO(f,optional) { \
 281         if (optional) PL_last_uni = PL_oldbufptr; \
 282         OPERATOR(f); \
 283         }
 284
 285 #define UNIBRACK(f) UNI3(f,0,0)
 286
 287 /* grandfather return to old style */
 288 #define OLDLOP(f) \
 289         do { \
 290             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 291                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 292             pl_yylval.ival = (f); \
 293             PL_expect = XTERM; \
 294             PL_bufptr = s; \
 295             return (int)LSTOP; \
 296         } while(0)
 297
 298 #define COPLINE_INC_WITH_HERELINES                  \
 299     STMT_START {                                     \
 300         CopLINE_inc(PL_curcop);                       \
 301         if (PL_parser->herelines)                      \
 302             CopLINE(PL_curcop) += PL_parser->herelines, \
 303             PL_parser->herelines = 0;                    \
 304     } STMT_END
 305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 306  * is no sublex_push to follow. */
 307 #define COPLINE_SET_FROM_MULTI_END            \
 308     STMT_START {                               \
 309         CopLINE_set(PL_curcop, PL_multi_end);   \
 310         if (PL_multi_end != PL_multi_start)      \
 311             PL_parser->herelines = 0;             \
 312     } STMT_END
 313
 314
 315 #ifdef DEBUGGING
 316
 317 /* how to interpret the pl_yylval associated with the token */
 318 enum token_type {
 319     TOKENTYPE_NONE,
 320     TOKENTYPE_IVAL,
 321     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 322     TOKENTYPE_PVAL,
 323     TOKENTYPE_OPVAL
 324 };
 325
 326 static struct debug_tokens {
 327     const int token;
 328     enum token_type type;
 329     const char *name;
 330 } const debug_tokens[] =
 331 {
 332     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 333     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 334     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 335     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 336     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 337     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 338     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 339     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 340     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 341     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 342     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 343     { DO,               TOKENTYPE_NONE,         "DO" },
 344     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 345     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 346     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 347     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 348     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 349     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 350     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 351     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 352     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 353     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 354     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 355     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 356     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 357     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 358     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 359     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 360     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 361     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 362     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 363     { IF,               TOKENTYPE_IVAL,         "IF" },
 364     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 365     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 366     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 367     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 368     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 369     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 370     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 371     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 372     { MY,               TOKENTYPE_IVAL,         "MY" },
 373     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 374     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 375     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 376     { OROR,             TOKENTYPE_NONE,         "OROR" },
 377     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 378     { PEG,              TOKENTYPE_NONE,         "PEG" },
 379     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 380     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 381     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 382     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 383     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 384     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 385     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 386     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 387     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 388     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 389     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 390     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 391     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 392     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 393     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 394     { SUB,              TOKENTYPE_NONE,         "SUB" },
 395     { THING,            TOKENTYPE_OPVAL,        "THING" },
 396     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 397     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 398     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 399     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 400     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 401     { USE,              TOKENTYPE_IVAL,         "USE" },
 402     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 403     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 404     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 405     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 406     { 0,                TOKENTYPE_NONE,         NULL }
 407 };
 408
 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 410
 411 STATIC int
 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 413 {
 414     dVAR;
 415
 416     PERL_ARGS_ASSERT_TOKEREPORT;
 417
 418     if (DEBUG_T_TEST) {
 419         const char *name = NULL;
 420         enum token_type type = TOKENTYPE_NONE;
 421         const struct debug_tokens *p;
 422         SV* const report = newSVpvs("<== ");
 423
 424         for (p = debug_tokens; p->token; p++) {
 425             if (p->token == (int)rv) {
 426                 name = p->name;
 427                 type = p->type;
 428                 break;
 429             }
 430         }
 431         if (name)
 432             Perl_sv_catpv(aTHX_ report, name);
 433         else if ((char)rv > ' ' && (char)rv <= '~')
 434         {
 435             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 436             if ((char)rv == 'p')
 437                 sv_catpvs(report, " (pending identifier)");
 438         }
 439         else if (!rv)
 440             sv_catpvs(report, "EOF");
 441         else
 442             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 443         switch (type) {
 444         case TOKENTYPE_NONE:
 445             break;
 446         case TOKENTYPE_IVAL:
 447             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 448             break;
 449         case TOKENTYPE_OPNUM:
 450             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 451                                     PL_op_name[lvalp->ival]);
 452             break;
 453         case TOKENTYPE_PVAL:
 454             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 455             break;
 456         case TOKENTYPE_OPVAL:
 457             if (lvalp->opval) {
 458                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 459                                     PL_op_name[lvalp->opval->op_type]);
 460                 if (lvalp->opval->op_type == OP_CONST) {
 461                     Perl_sv_catpvf(aTHX_ report, " %s",
 462                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 463                 }
 464
 465             }
 466             else
 467                 sv_catpvs(report, "(opval=null)");
 468             break;
 469         }
 470         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 471     };
 472     return (int)rv;
 473 }
 474
 475
 476 /* print the buffer with suitable escapes */
 477
 478 STATIC void
 479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 480 {
 481     SV* const tmp = newSVpvs("");
 482
 483     PERL_ARGS_ASSERT_PRINTBUF;
 484
 485     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 486     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 487     GCC_DIAG_RESTORE;
 488     SvREFCNT_dec(tmp);
 489 }
 490
 491 #endif
 492
 493 static int
 494 S_deprecate_commaless_var_list(pTHX) {
 495     PL_expect = XTERM;
 496     deprecate("comma-less variable list");
 497     return REPORT(','); /* grandfather non-comma-format format */
 498 }
 499
 500 /*
 501  * S_ao
 502  *
 503  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 504  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 505  */
 506
 507 STATIC int
 508 S_ao(pTHX_ int toketype)
 509 {
 510     dVAR;
 511     if (*PL_bufptr == '=') {
 512         PL_bufptr++;
 513         if (toketype == ANDAND)
 514             pl_yylval.ival = OP_ANDASSIGN;
 515         else if (toketype == OROR)
 516             pl_yylval.ival = OP_ORASSIGN;
 517         else if (toketype == DORDOR)
 518             pl_yylval.ival = OP_DORASSIGN;
 519         toketype = ASSIGNOP;
 520     }
 521     return toketype;
 522 }
 523
 524 /*
 525  * S_no_op
 526  * When Perl expects an operator and finds something else, no_op
 527  * prints the warning.  It always prints "<something> found where
 528  * operator expected.  It prints "Missing semicolon on previous line?"
 529  * if the surprise occurs at the start of the line.  "do you need to
 530  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 531  * where the compiler doesn't know if foo is a method call or a function.
 532  * It prints "Missing operator before end of line" if there's nothing
 533  * after the missing operator, or "... before <...>" if there is something
 534  * after the missing operator.
 535  */
 536
 537 STATIC void
 538 S_no_op(pTHX_ const char *const what, char *s)
 539 {
 540     dVAR;
 541     char * const oldbp = PL_bufptr;
 542     const bool is_first = (PL_oldbufptr == PL_linestart);
 543
 544     PERL_ARGS_ASSERT_NO_OP;
 545
 546     if (!s)
 547         s = oldbp;
 548     else
 549         PL_bufptr = s;
 550     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 551     if (ckWARN_d(WARN_SYNTAX)) {
 552         if (is_first)
 553             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 554                     "\t(Missing semicolon on previous line?)\n");
 555         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 556             const char *t;
 557             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 558                                                             t += UTF ? UTF8SKIP(t) : 1)
 559                 NOOP;
 560             if (t < PL_bufptr && isSPACE(*t))
 561                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 562                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 563                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 564         }
 565         else {
 566             assert(s >= oldbp);
 567             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 568                     "\t(Missing operator before %"UTF8f"?)\n",
 569                      UTF8fARG(UTF, s - oldbp, oldbp));
 570         }
 571     }
 572     PL_bufptr = oldbp;
 573 }
 574
 575 /*
 576  * S_missingterm
 577  * Complain about missing quote/regexp/heredoc terminator.
 578  * If it's called with NULL then it cauterizes the line buffer.
 579  * If we're in a delimited string and the delimiter is a control
 580  * character, it's reformatted into a two-char sequence like ^C.
 581  * This is fatal.
 582  */
 583
 584 STATIC void
 585 S_missingterm(pTHX_ char *s)
 586 {
 587     dVAR;
 588     char tmpbuf[3];
 589     char q;
 590     if (s) {
 591         char * const nl = strrchr(s,'\n');
 592         if (nl)
 593             *nl = '\0';
 594     }
 595     else if ((U8) PL_multi_close < 32) {
 596         *tmpbuf = '^';
 597         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 598         tmpbuf[2] = '\0';
 599         s = tmpbuf;
 600     }
 601     else {
 602         *tmpbuf = (char)PL_multi_close;
 603         tmpbuf[1] = '\0';
 604         s = tmpbuf;
 605     }
 606     q = strchr(s,'"') ? '\'' : '"';
 607     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 608 }
 609
 610 #include "feature.h"
 611
 612 /*
 613  * Check whether the named feature is enabled.
 614  */
 615 bool
 616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 617 {
 618     dVAR;
 619     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 620
 621     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 622
 623     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 624
 625     if (namelen > MAX_FEATURE_LEN)
 626         return FALSE;
 627     memcpy(&he_name[8], name, namelen);
 628
 629     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 630                                      REFCOUNTED_HE_EXISTS));
 631 }
 632
 633 /*
 634  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 635  * utf16-to-utf8-reversed.
 636  */
 637
 638 #ifdef PERL_CR_FILTER
 639 static void
 640 strip_return(SV *sv)
 641 {
 642     const char *s = SvPVX_const(sv);
 643     const char * const e = s + SvCUR(sv);
 644
 645     PERL_ARGS_ASSERT_STRIP_RETURN;
 646
 647     /* outer loop optimized to do nothing if there are no CR-LFs */
 648     while (s < e) {
 649         if (*s++ == '\r' && *s == '\n') {
 650             /* hit a CR-LF, need to copy the rest */
 651             char *d = s - 1;
 652             *d++ = *s++;
 653             while (s < e) {
 654                 if (*s == '\r' && s[1] == '\n')
 655                     s++;
 656                 *d++ = *s++;
 657             }
 658             SvCUR(sv) -= s - d;
 659             return;
 660         }
 661     }
 662 }
 663
 664 STATIC I32
 665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 666 {
 667     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 668     if (count > 0 && !maxlen)
 669         strip_return(sv);
 670     return count;
 671 }
 672 #endif
 673
 674 /*
 675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 676
 677 Creates and initialises a new lexer/parser state object, supplying
 678 a context in which to lex and parse from a new source of Perl code.
 679 A pointer to the new state object is placed in L</PL_parser>.  An entry
 680 is made on the save stack so that upon unwinding the new state object
 681 will be destroyed and the former value of L</PL_parser> will be restored.
 682 Nothing else need be done to clean up the parsing context.
 683
 684 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 685 non-null, provides a string (in SV form) containing code to be parsed.
 686 A copy of the string is made, so subsequent modification of I<line>
 687 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 688 from which code will be read to be parsed.  If both are non-null, the
 689 code in I<line> comes first and must consist of complete lines of input,
 690 and I<rsfp> supplies the remainder of the source.
 691
 692 The I<flags> parameter is reserved for future use.  Currently it is only
 693 used by perl internally, so extensions should always pass zero.
 694
 695 =cut
 696 */
 697
 698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 699    can share filters with the current parser.
 700    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 701    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 702    destruction. This is used to handle the case of defaulting to reading the
 703    script from the standard input because no filename was given on the command
 704    line (without getting confused by situation where STDIN has been closed, so
 705    the script handle is opened on fd 0)  */
 706
 707 void
 708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 709 {
 710     dVAR;
 711     const char *s = NULL;
 712     yy_parser *parser, *oparser;
 713     if (flags && flags & ~LEX_START_FLAGS)
 714         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 715
 716     /* create and initialise a parser */
 717
 718     Newxz(parser, 1, yy_parser);
 719     parser->old_parser = oparser = PL_parser;
 720     PL_parser = parser;
 721
 722     parser->stack = NULL;
 723     parser->ps = NULL;
 724     parser->stack_size = 0;
 725
 726     /* on scope exit, free this parser and restore any outer one */
 727     SAVEPARSER(parser);
 728     parser->saved_curcop = PL_curcop;
 729
 730     /* initialise lexer state */
 731
 732 #ifdef PERL_MAD
 733     parser->curforce = -1;
 734 #else
 735     parser->nexttoke = 0;
 736 #endif
 737     parser->error_count = oparser ? oparser->error_count : 0;
 738     parser->copline = parser->preambling = NOLINE;
 739     parser->lex_state = LEX_NORMAL;
 740     parser->expect = XSTATE;
 741     parser->rsfp = rsfp;
 742     parser->rsfp_filters =
 743       !(flags & LEX_START_SAME_FILTER) || !oparser
 744         ? NULL
 745         : MUTABLE_AV(SvREFCNT_inc(
 746             oparser->rsfp_filters
 747              ? oparser->rsfp_filters
 748              : (oparser->rsfp_filters = newAV())
 749           ));
 750
 751     Newx(parser->lex_brackstack, 120, char);
 752     Newx(parser->lex_casestack, 12, char);
 753     *parser->lex_casestack = '\0';
 754     Newxz(parser->lex_shared, 1, LEXSHARED);
 755
 756     if (line) {
 757         STRLEN len;
 758         s = SvPV_const(line, len);
 759         parser->linestr = flags & LEX_START_COPIED
 760                             ? SvREFCNT_inc_simple_NN(line)
 761                             : newSVpvn_flags(s, len, SvUTF8(line));
 762         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 763     } else {
 764         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 765     }
 766     parser->oldoldbufptr =
 767         parser->oldbufptr =
 768         parser->bufptr =
 769         parser->linestart = SvPVX(parser->linestr);
 770     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 771     parser->last_lop = parser->last_uni = NULL;
 772
 773     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 774                                                         |LEX_DONT_CLOSE_RSFP));
 775     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 776                                                         |LEX_DONT_CLOSE_RSFP));
 777
 778     parser->in_pod = parser->filtered = 0;
 779 }
 780
 781
 782 /* delete a parser object */
 783
 784 void
 785 Perl_parser_free(pTHX_  const yy_parser *parser)
 786 {
 787     PERL_ARGS_ASSERT_PARSER_FREE;
 788
 789     PL_curcop = parser->saved_curcop;
 790     SvREFCNT_dec(parser->linestr);
 791
 792     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 793         PerlIO_clearerr(parser->rsfp);
 794     else if (parser->rsfp && (!parser->old_parser ||
 795                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 796         PerlIO_close(parser->rsfp);
 797     SvREFCNT_dec(parser->rsfp_filters);
 798     SvREFCNT_dec(parser->lex_stuff);
 799     SvREFCNT_dec(parser->sublex_info.repl);
 800
 801     Safefree(parser->lex_brackstack);
 802     Safefree(parser->lex_casestack);
 803     Safefree(parser->lex_shared);
 804     PL_parser = parser->old_parser;
 805     Safefree(parser);
 806 }
 807
 808 void
 809 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 810 {
 811 #ifdef PERL_MAD
 812     I32 nexttoke = parser->lasttoke;
 813 #else
 814     I32 nexttoke = parser->nexttoke;
 815 #endif
 816     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 817     while (nexttoke--) {
 818 #ifdef PERL_MAD
 819         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 820                                 & 0xffff)
 821          && parser->nexttoke[nexttoke].next_val.opval
 822          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 823          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 824                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 825                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 826         }
 827 #else
 828         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 829          && parser->nextval[nexttoke].opval
 830          && parser->nextval[nexttoke].opval->op_slabbed
 831          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 832             op_free(parser->nextval[nexttoke].opval);
 833             parser->nextval[nexttoke].opval = NULL;
 834         }
 835 #endif
 836     }
 837 }
 838
 839
 840 /*
 841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 842
 843 Buffer scalar containing the chunk currently under consideration of the
 844 text currently being lexed.  This is always a plain string scalar (for
 845 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 846 normal scalar means; instead refer to the buffer directly by the pointer
 847 variables described below.
 848
 849 The lexer maintains various C<char*> pointers to things in the
 850 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 851 reallocated, all of these pointers must be updated.  Don't attempt to
 852 do this manually, but rather use L</lex_grow_linestr> if you need to
 853 reallocate the buffer.
 854
 855 The content of the text chunk in the buffer is commonly exactly one
 856 complete line of input, up to and including a newline terminator,
 857 but there are situations where it is otherwise.  The octets of the
 858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 859 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 860 flag on this scalar, which may disagree with it.
 861
 862 For direct examination of the buffer, the variable
 863 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 865 of these pointers is usually preferable to examination of the scalar
 866 through normal scalar means.
 867
 868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 869
 870 Direct pointer to the end of the chunk of text currently being lexed, the
 871 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 872 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 873 always located at the end of the buffer, and does not count as part of
 874 the buffer's contents.
 875
 876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 877
 878 Points to the current position of lexing inside the lexer buffer.
 879 Characters around this point may be freely examined, within
 880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 881 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 883
 884 Lexing code (whether in the Perl core or not) moves this pointer past
 885 the characters that it consumes.  It is also expected to perform some
 886 bookkeeping whenever a newline character is consumed.  This movement
 887 can be more conveniently performed by the function L</lex_read_to>,
 888 which handles newlines appropriately.
 889
 890 Interpretation of the buffer's octets can be abstracted out by
 891 using the slightly higher-level functions L</lex_peek_unichar> and
 892 L</lex_read_unichar>.
 893
 894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 895
 896 Points to the start of the current line inside the lexer buffer.
 897 This is useful for indicating at which column an error occurred, and
 898 not much else.  This must be updated by any lexing code that consumes
 899 a newline; the function L</lex_read_to> handles this detail.
 900
 901 =cut
 902 */
 903
 904 /*
 905 =for apidoc Amx|bool|lex_bufutf8
 906
 907 Indicates whether the octets in the lexer buffer
 908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 909 of Unicode characters.  If not, they should be interpreted as Latin-1
 910 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 911
 912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 913 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 914 encoding.
 915
 916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 917 is significant, but not the whole story regarding the input character
 918 encoding.  Normally, when a file is being read, the scalar contains octets
 919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 920 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 923 is in effect.  This logic may change in the future; use this function
 924 instead of implementing the logic yourself.
 925
 926 =cut
 927 */
 928
 929 bool
 930 Perl_lex_bufutf8(pTHX)
 931 {
 932     return UTF;
 933 }
 934
 935 /*
 936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 937
 938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 939 at least I<len> octets (including terminating NUL).  Returns a
 940 pointer to the reallocated buffer.  This is necessary before making
 941 any direct modification of the buffer that would increase its length.
 942 L</lex_stuff_pvn> provides a more convenient way to insert text into
 943 the buffer.
 944
 945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 946 this function updates all of the lexer's variables that point directly
 947 into the buffer.
 948
 949 =cut
 950 */
 951
 952 char *
 953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 954 {
 955     SV *linestr;
 956     char *buf;
 957     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 958     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 959     linestr = PL_parser->linestr;
 960     buf = SvPVX(linestr);
 961     if (len <= SvLEN(linestr))
 962         return buf;
 963     bufend_pos = PL_parser->bufend - buf;
 964     bufptr_pos = PL_parser->bufptr - buf;
 965     oldbufptr_pos = PL_parser->oldbufptr - buf;
 966     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 967     linestart_pos = PL_parser->linestart - buf;
 968     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 969     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 970     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 971                             PL_parser->lex_shared->re_eval_start - buf : 0;
 972
 973     buf = sv_grow(linestr, len);
 974
 975     PL_parser->bufend = buf + bufend_pos;
 976     PL_parser->bufptr = buf + bufptr_pos;
 977     PL_parser->oldbufptr = buf + oldbufptr_pos;
 978     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 979     PL_parser->linestart = buf + linestart_pos;
 980     if (PL_parser->last_uni)
 981         PL_parser->last_uni = buf + last_uni_pos;
 982     if (PL_parser->last_lop)
 983         PL_parser->last_lop = buf + last_lop_pos;
 984     if (PL_parser->lex_shared->re_eval_start)
 985         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 986     return buf;
 987 }
 988
 989 /*
 990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 991
 992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 994 reallocating the buffer if necessary.  This means that lexing code that
 995 runs later will see the characters as if they had appeared in the input.
 996 It is not recommended to do this as part of normal parsing, and most
 997 uses of this facility run the risk of the inserted characters being
 998 interpreted in an unintended manner.
 999
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1007
1008 =cut
1009 */
1010
1011 void
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1013 {
1014     dVAR;
1015     char *bufptr;
1016     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017     if (flags & ~(LEX_STUFF_UTF8))
1018         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1019     if (UTF) {
1020         if (flags & LEX_STUFF_UTF8) {
1021             goto plain_copy;
1022         } else {
1023             STRLEN highhalf = 0;    /* Count of variants */
1024             const char *p, *e = pv+len;
1025             for (p = pv; p != e; p++) {
1026                 if (! UTF8_IS_INVARIANT(*p)) {
1027                     highhalf++;
1028                 }
1029             }
1030             if (!highhalf)
1031                 goto plain_copy;
1032             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033             bufptr = PL_parser->bufptr;
1034             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035             SvCUR_set(PL_parser->linestr,
1036                 SvCUR(PL_parser->linestr) + len+highhalf);
1037             PL_parser->bufend += len+highhalf;
1038             for (p = pv; p != e; p++) {
1039                 U8 c = (U8)*p;
1040                 if (! UTF8_IS_INVARIANT(c)) {
1041                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1043                 } else {
1044                     *bufptr++ = (char)c;
1045                 }
1046             }
1047         }
1048     } else {
1049         if (flags & LEX_STUFF_UTF8) {
1050             STRLEN highhalf = 0;
1051             const char *p, *e = pv+len;
1052             for (p = pv; p != e; p++) {
1053                 U8 c = (U8)*p;
1054                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056                                 "non-Latin-1 character into Latin-1 input");
1057                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1058                     p++;
1059                     highhalf++;
1060                 } else if (! UTF8_IS_INVARIANT(c)) {
1061                     /* malformed UTF-8 */
1062                     ENTER;
1063                     SAVESPTR(PL_warnhook);
1064                     PL_warnhook = PERL_WARNHOOK_FATAL;
1065                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1066                     LEAVE;
1067                 }
1068             }
1069             if (!highhalf)
1070                 goto plain_copy;
1071             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072             bufptr = PL_parser->bufptr;
1073             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074             SvCUR_set(PL_parser->linestr,
1075                 SvCUR(PL_parser->linestr) + len-highhalf);
1076             PL_parser->bufend += len-highhalf;
1077             p = pv;
1078             while (p < e) {
1079                 if (UTF8_IS_INVARIANT(*p)) {
1080                     *bufptr++ = *p;
1081                     p++;
1082                 }
1083                 else {
1084                     assert(p < e -1 );
1085                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1086                     p += 2;
1087                 }
1088             }
1089         } else {
1090           plain_copy:
1091             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092             bufptr = PL_parser->bufptr;
1093             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095             PL_parser->bufend += len;
1096             Copy(pv, bufptr, len, char);
1097         }
1098     }
1099 }
1100
1101 /*
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1103
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary.  This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1111
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul.  These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>.  The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1119
1120 =cut
1121 */
1122
1123 void
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1125 {
1126     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127     lex_stuff_pvn(pv, strlen(pv), flags);
1128 }
1129
1130 /*
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1132
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary.  This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1140
1141 The string to be inserted is the string value of I<sv>.  The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1146
1147 =cut
1148 */
1149
1150 void
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1152 {
1153     char *pv;
1154     STRLEN len;
1155     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1156     if (flags)
1157         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1158     pv = SvPV(sv, len);
1159     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1160 }
1161
1162 /*
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1164
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1169
1170 This is not the normal way to consume lexed text.  For that, use
1171 L</lex_read_to>.
1172
1173 =cut
1174 */
1175
1176 void
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1178 {
1179     char *buf, *bufend;
1180     STRLEN unstuff_len;
1181     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182     buf = PL_parser->bufptr;
1183     if (ptr < buf)
1184         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185     if (ptr == buf)
1186         return;
1187     bufend = PL_parser->bufend;
1188     if (ptr > bufend)
1189         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190     unstuff_len = ptr - buf;
1191     Move(ptr, buf, bufend+1-ptr, char);
1192     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193     PL_parser->bufend = bufend - unstuff_len;
1194 }
1195
1196 /*
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1198
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1203
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1207
1208 =cut
1209 */
1210
1211 void
1212 Perl_lex_read_to(pTHX_ char *ptr)
1213 {
1214     char *s;
1215     PERL_ARGS_ASSERT_LEX_READ_TO;
1216     s = PL_parser->bufptr;
1217     if (ptr < s || ptr > PL_parser->bufend)
1218         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219     for (; s != ptr; s++)
1220         if (*s == '\n') {
1221             COPLINE_INC_WITH_HERELINES;
1222             PL_parser->linestart = s+1;
1223         }
1224     PL_parser->bufptr = ptr;
1225 }
1226
1227 /*
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1229
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>.  The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately.  I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1235
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it.  However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1243
1244 =cut
1245 */
1246
1247 void
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1249 {
1250     char *buf;
1251     STRLEN discard_len;
1252     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253     buf = SvPVX(PL_parser->linestr);
1254     if (ptr < buf)
1255         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256     if (ptr == buf)
1257         return;
1258     if (ptr > PL_parser->bufptr)
1259         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260     discard_len = ptr - buf;
1261     if (PL_parser->oldbufptr < ptr)
1262         PL_parser->oldbufptr = ptr;
1263     if (PL_parser->oldoldbufptr < ptr)
1264         PL_parser->oldoldbufptr = ptr;
1265     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266         PL_parser->last_uni = NULL;
1267     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268         PL_parser->last_lop = NULL;
1269     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271     PL_parser->bufend -= discard_len;
1272     PL_parser->bufptr -= discard_len;
1273     PL_parser->oldbufptr -= discard_len;
1274     PL_parser->oldoldbufptr -= discard_len;
1275     if (PL_parser->last_uni)
1276         PL_parser->last_uni -= discard_len;
1277     if (PL_parser->last_lop)
1278         PL_parser->last_lop -= discard_len;
1279 }
1280
1281 /*
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1283
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more.  It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1289
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded.  If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1296
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1299
1300 =cut
1301 */
1302
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM  0x40000000
1305
1306 bool
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1308 {
1309     SV *linestr;
1310     char *buf;
1311     STRLEN old_bufend_pos, new_bufend_pos;
1312     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314     bool got_some_for_debugger = 0;
1315     bool got_some;
1316     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318     linestr = PL_parser->linestr;
1319     buf = SvPVX(linestr);
1320     if (!(flags & LEX_KEEP_PREVIOUS) &&
1321             PL_parser->bufptr == PL_parser->bufend) {
1322         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1323         linestart_pos = 0;
1324         if (PL_parser->last_uni != PL_parser->bufend)
1325             PL_parser->last_uni = NULL;
1326         if (PL_parser->last_lop != PL_parser->bufend)
1327             PL_parser->last_lop = NULL;
1328         last_uni_pos = last_lop_pos = 0;
1329         *buf = 0;
1330         SvCUR(linestr) = 0;
1331     } else {
1332         old_bufend_pos = PL_parser->bufend - buf;
1333         bufptr_pos = PL_parser->bufptr - buf;
1334         oldbufptr_pos = PL_parser->oldbufptr - buf;
1335         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336         linestart_pos = PL_parser->linestart - buf;
1337         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1339     }
1340     if (flags & LEX_FAKE_EOF) {
1341         goto eof;
1342     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1343         got_some = 0;
1344     } else if (filter_gets(linestr, old_bufend_pos)) {
1345         got_some = 1;
1346         got_some_for_debugger = 1;
1347     } else if (flags & LEX_NO_TERM) {
1348         got_some = 0;
1349     } else {
1350         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1351             sv_setpvs(linestr, "");
1352         eof:
1353         /* End of real input.  Close filehandle (unless it was STDIN),
1354          * then add implicit termination.
1355          */
1356         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357             PerlIO_clearerr(PL_parser->rsfp);
1358         else if (PL_parser->rsfp)
1359             (void)PerlIO_close(PL_parser->rsfp);
1360         PL_parser->rsfp = NULL;
1361         PL_parser->in_pod = PL_parser->filtered = 0;
1362 #ifdef PERL_MAD
1363         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1364             PL_faketokens = 1;
1365 #endif
1366         if (!PL_in_eval && PL_minus_p) {
1367             sv_catpvs(linestr,
1368                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369             PL_minus_n = PL_minus_p = 0;
1370         } else if (!PL_in_eval && PL_minus_n) {
1371             sv_catpvs(linestr, /*{*/";}");
1372             PL_minus_n = 0;
1373         } else
1374             sv_catpvs(linestr, ";");
1375         got_some = 1;
1376     }
1377     buf = SvPVX(linestr);
1378     new_bufend_pos = SvCUR(linestr);
1379     PL_parser->bufend = buf + new_bufend_pos;
1380     PL_parser->bufptr = buf + bufptr_pos;
1381     PL_parser->oldbufptr = buf + oldbufptr_pos;
1382     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383     PL_parser->linestart = buf + linestart_pos;
1384     if (PL_parser->last_uni)
1385         PL_parser->last_uni = buf + last_uni_pos;
1386     if (PL_parser->last_lop)
1387         PL_parser->last_lop = buf + last_lop_pos;
1388     if (PL_parser->preambling != NOLINE) {
1389         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390         PL_parser->preambling = NOLINE;
1391     }
1392     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393             PL_curstash != PL_debstash) {
1394         /* debugger active and we're not compiling the debugger code,
1395          * so store the line into the debugger's array of lines
1396          */
1397         update_debugger_info(NULL, buf+old_bufend_pos,
1398             new_bufend_pos-old_bufend_pos);
1399     }
1400     return got_some;
1401 }
1402
1403 /*
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1405
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text.  To consume the
1409 peeked character, use L</lex_read_unichar>.
1410
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in.  Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1415
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1418
1419 =cut
1420 */
1421
1422 I32
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1424 {
1425     dVAR;
1426     char *s, *bufend;
1427     if (flags & ~(LEX_KEEP_PREVIOUS))
1428         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429     s = PL_parser->bufptr;
1430     bufend = PL_parser->bufend;
1431     if (UTF) {
1432         U8 head;
1433         I32 unichar;
1434         STRLEN len, retlen;
1435         if (s == bufend) {
1436             if (!lex_next_chunk(flags))
1437                 return -1;
1438             s = PL_parser->bufptr;
1439             bufend = PL_parser->bufend;
1440         }
1441         head = (U8)*s;
1442         if (UTF8_IS_INVARIANT(head))
1443             return head;
1444         if (UTF8_IS_START(head)) {
1445             len = UTF8SKIP(&head);
1446             while ((STRLEN)(bufend-s) < len) {
1447                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1448                     break;
1449                 s = PL_parser->bufptr;
1450                 bufend = PL_parser->bufend;
1451             }
1452         }
1453         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454         if (retlen == (STRLEN)-1) {
1455             /* malformed UTF-8 */
1456             ENTER;
1457             SAVESPTR(PL_warnhook);
1458             PL_warnhook = PERL_WARNHOOK_FATAL;
1459             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1460             LEAVE;
1461         }
1462         return unichar;
1463     } else {
1464         if (s == bufend) {
1465             if (!lex_next_chunk(flags))
1466                 return -1;
1467             s = PL_parser->bufptr;
1468         }
1469         return (U8)*s;
1470     }
1471 }
1472
1473 /*
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1475
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text.  To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1481
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in.  Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1486
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1489
1490 =cut
1491 */
1492
1493 I32
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1495 {
1496     I32 c;
1497     if (flags & ~(LEX_KEEP_PREVIOUS))
1498         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499     c = lex_peek_unichar(flags);
1500     if (c != -1) {
1501         if (c == '\n')
1502             COPLINE_INC_WITH_HERELINES;
1503         if (UTF)
1504             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1505         else
1506             ++(PL_parser->bufptr);
1507     }
1508     return c;
1509 }
1510
1511 /*
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1513
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed.  The spaces may include ordinary whitespace characters and
1516 Perl-style comments.  C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1519
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in.  Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1524
1525 =cut
1526 */
1527
1528 #define LEX_NO_INCLINE    0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1530
1531 void
1532 Perl_lex_read_space(pTHX_ U32 flags)
1533 {
1534     char *s, *bufend;
1535     const bool can_incline = !(flags & LEX_NO_INCLINE);
1536     bool need_incline = 0;
1537     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1539 #ifdef PERL_MAD
1540     if (PL_skipwhite) {
1541         sv_free(PL_skipwhite);
1542         PL_skipwhite = NULL;
1543     }
1544     if (PL_madskills)
1545         PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547     s = PL_parser->bufptr;
1548     bufend = PL_parser->bufend;
1549     while (1) {
1550         char c = *s;
1551         if (c == '#') {
1552             do {
1553                 c = *++s;
1554             } while (!(c == '\n' || (c == 0 && s == bufend)));
1555         } else if (c == '\n') {
1556             s++;
1557             if (can_incline) {
1558                 PL_parser->linestart = s;
1559                 if (s == bufend)
1560                     need_incline = 1;
1561                 else
1562                     incline(s);
1563             }
1564         } else if (isSPACE(c)) {
1565             s++;
1566         } else if (c == 0 && s == bufend) {
1567             bool got_more;
1568             line_t l;
1569 #ifdef PERL_MAD
1570             if (PL_madskills)
1571                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573             if (flags & LEX_NO_NEXT_CHUNK)
1574                 break;
1575             PL_parser->bufptr = s;
1576             l = CopLINE(PL_curcop);
1577             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578             got_more = lex_next_chunk(flags);
1579             CopLINE_set(PL_curcop, l);
1580             s = PL_parser->bufptr;
1581             bufend = PL_parser->bufend;
1582             if (!got_more)
1583                 break;
1584             if (can_incline && need_incline && PL_parser->rsfp) {
1585                 incline(s);
1586                 need_incline = 0;
1587             }
1588         } else {
1589             break;
1590         }
1591     }
1592 #ifdef PERL_MAD
1593     if (PL_madskills)
1594         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596     PL_parser->bufptr = s;
1597 }
1598
1599 /*
1600
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1602
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1607
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1610 C<false>.
1611
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1613
1614 =cut
1615
1616  */
1617
1618 bool
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1620 {
1621     STRLEN len, origlen;
1622     char *p = proto ? SvPV(proto, len) : NULL;
1623     bool bad_proto = FALSE;
1624     bool in_brackets = FALSE;
1625     bool after_slash = FALSE;
1626     char greedy_proto = ' ';
1627     bool proto_after_greedy_proto = FALSE;
1628     bool must_be_last = FALSE;
1629     bool underscore = FALSE;
1630     bool bad_proto_after_underscore = FALSE;
1631
1632     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1633
1634     if (!proto)
1635         return TRUE;
1636
1637     origlen = len;
1638     for (; len--; p++) {
1639         if (!isSPACE(*p)) {
1640             if (must_be_last)
1641                 proto_after_greedy_proto = TRUE;
1642             if (underscore) {
1643                 if (!strchr(";@%", *p))
1644                     bad_proto_after_underscore = TRUE;
1645                 underscore = FALSE;
1646             }
1647             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1648                 bad_proto = TRUE;
1649             }
1650             else {
1651                 if (*p == '[')
1652                     in_brackets = TRUE;
1653                 else if (*p == ']')
1654                     in_brackets = FALSE;
1655                 else if ((*p == '@' || *p == '%') &&
1656                     !after_slash &&
1657                     !in_brackets ) {
1658                     must_be_last = TRUE;
1659                     greedy_proto = *p;
1660                 }
1661                 else if (*p == '_')
1662                     underscore = TRUE;
1663             }
1664             if (*p == '\\')
1665                 after_slash = TRUE;
1666             else
1667                 after_slash = FALSE;
1668         }
1669     }
1670
1671     if (warn) {
1672         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1673         p -= origlen;
1674         p = SvUTF8(proto)
1675             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676                              origlen, UNI_DISPLAY_ISPRINT)
1677             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1678
1679         if (proto_after_greedy_proto)
1680             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681                         "Prototype after '%c' for %"SVf" : %s",
1682                         greedy_proto, SVfARG(name), p);
1683         if (in_brackets)
1684             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685                         "Missing ']' in prototype for %"SVf" : %s",
1686                         SVfARG(name), p);
1687         if (bad_proto)
1688             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689                         "Illegal character in prototype for %"SVf" : %s",
1690                         SVfARG(name), p);
1691         if (bad_proto_after_underscore)
1692             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693                         "Illegal character after '_' in prototype for %"SVf" : %s",
1694                         SVfARG(name), p);
1695     }
1696
1697     return (! (proto_after_greedy_proto || bad_proto) );
1698 }
1699
1700 /*
1701  * S_incline
1702  * This subroutine has nothing to do with tilting, whether at windmills
1703  * or pinball tables.  Its name is short for "increment line".  It
1704  * increments the current line number in CopLINE(PL_curcop) and checks
1705  * to see whether the line starts with a comment of the form
1706  *    # line 500 "foo.pm"
1707  * If so, it sets the current line number and file to the values in the comment.
1708  */
1709
1710 STATIC void
1711 S_incline(pTHX_ const char *s)
1712 {
1713     dVAR;
1714     const char *t;
1715     const char *n;
1716     const char *e;
1717     line_t line_num;
1718
1719     PERL_ARGS_ASSERT_INCLINE;
1720
1721     COPLINE_INC_WITH_HERELINES;
1722     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723      && s+1 == PL_bufend && *s == ';') {
1724         /* fake newline in string eval */
1725         CopLINE_dec(PL_curcop);
1726         return;
1727     }
1728     if (*s++ != '#')
1729         return;
1730     while (SPACE_OR_TAB(*s))
1731         s++;
1732     if (strnEQ(s, "line", 4))
1733         s += 4;
1734     else
1735         return;
1736     if (SPACE_OR_TAB(*s))
1737         s++;
1738     else
1739         return;
1740     while (SPACE_OR_TAB(*s))
1741         s++;
1742     if (!isDIGIT(*s))
1743         return;
1744
1745     n = s;
1746     while (isDIGIT(*s))
1747         s++;
1748     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1749         return;
1750     while (SPACE_OR_TAB(*s))
1751         s++;
1752     if (*s == '"' && (t = strchr(s+1, '"'))) {
1753         s++;
1754         e = t + 1;
1755     }
1756     else {
1757         t = s;
1758         while (!isSPACE(*t))
1759             t++;
1760         e = t;
1761     }
1762     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1763         e++;
1764     if (*e != '\n' && *e != '\0')
1765         return;         /* false alarm */
1766
1767     line_num = atoi(n)-1;
1768
1769     if (t - s > 0) {
1770         const STRLEN len = t - s;
1771
1772         if (!PL_rsfp && !PL_parser->filtered) {
1773             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774              * to *{"::_<newfilename"} */
1775             /* However, the long form of evals is only turned on by the
1776                debugger - usually they're "(eval %lu)" */
1777             GV * const cfgv = CopFILEGV(PL_curcop);
1778             if (cfgv) {
1779                 char smallbuf[128];
1780                 STRLEN tmplen2 = len;
1781                 char *tmpbuf2;
1782                 GV *gv2;
1783
1784                 if (tmplen2 + 2 <= sizeof smallbuf)
1785                     tmpbuf2 = smallbuf;
1786                 else
1787                     Newx(tmpbuf2, tmplen2 + 2, char);
1788
1789                 tmpbuf2[0] = '_';
1790                 tmpbuf2[1] = '<';
1791
1792                 memcpy(tmpbuf2 + 2, s, tmplen2);
1793                 tmplen2 += 2;
1794
1795                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1796                 if (!isGV(gv2)) {
1797                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798                     /* adjust ${"::_<newfilename"} to store the new file name */
1799                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800                     /* The line number may differ. If that is the case,
1801                        alias the saved lines that are in the array.
1802                        Otherwise alias the whole array. */
1803                     if (CopLINE(PL_curcop) == line_num) {
1804                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1806                     }
1807                     else if (GvAV(cfgv)) {
1808                         AV * const av = GvAV(cfgv);
1809                         const I32 start = CopLINE(PL_curcop)+1;
1810                         I32 items = AvFILLp(av) - start;
1811                         if (items > 0) {
1812                             AV * const av2 = GvAVn(gv2);
1813                             SV **svp = AvARRAY(av) + start;
1814                             I32 l = (I32)line_num+1;
1815                             while (items--)
1816                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1817                         }
1818                     }
1819                 }
1820
1821                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1822             }
1823         }
1824         CopFILE_free(PL_curcop);
1825         CopFILE_setn(PL_curcop, s, len);
1826     }
1827     CopLINE_set(PL_curcop, line_num);
1828 }
1829
1830 #define skipspace(s) skipspace_flags(s, 0)
1831
1832 #ifdef PERL_MAD
1833 /* skip space before PL_thistoken */
1834
1835 STATIC char *
1836 S_skipspace0(pTHX_ char *s)
1837 {
1838     PERL_ARGS_ASSERT_SKIPSPACE0;
1839
1840     s = skipspace(s);
1841     if (!PL_madskills)
1842         return s;
1843     if (PL_skipwhite) {
1844         if (!PL_thiswhite)
1845             PL_thiswhite = newSVpvs("");
1846         sv_catsv(PL_thiswhite, PL_skipwhite);
1847         sv_free(PL_skipwhite);
1848         PL_skipwhite = 0;
1849     }
1850     PL_realtokenstart = s - SvPVX(PL_linestr);
1851     return s;
1852 }
1853
1854 /* skip space after PL_thistoken */
1855
1856 STATIC char *
1857 S_skipspace1(pTHX_ char *s)
1858 {
1859     const char *start = s;
1860     I32 startoff = start - SvPVX(PL_linestr);
1861
1862     PERL_ARGS_ASSERT_SKIPSPACE1;
1863
1864     s = skipspace(s);
1865     if (!PL_madskills)
1866         return s;
1867     start = SvPVX(PL_linestr) + startoff;
1868     if (!PL_thistoken && PL_realtokenstart >= 0) {
1869         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870         PL_thistoken = newSVpvn(tstart, start - tstart);
1871     }
1872     PL_realtokenstart = -1;
1873     if (PL_skipwhite) {
1874         if (!PL_nextwhite)
1875             PL_nextwhite = newSVpvs("");
1876         sv_catsv(PL_nextwhite, PL_skipwhite);
1877         sv_free(PL_skipwhite);
1878         PL_skipwhite = 0;
1879     }
1880     return s;
1881 }
1882
1883 STATIC char *
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1885 {
1886     char *start;
1887     const I32 startoff = s - SvPVX(PL_linestr);
1888
1889     PERL_ARGS_ASSERT_SKIPSPACE2;
1890
1891     s = skipspace(s);
1892     if (!PL_madskills || !svp)
1893         return s;
1894     start = SvPVX(PL_linestr) + startoff;
1895     if (!PL_thistoken && PL_realtokenstart >= 0) {
1896         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897         PL_thistoken = newSVpvn(tstart, start - tstart);
1898         PL_realtokenstart = -1;
1899     }
1900     if (PL_skipwhite) {
1901         if (!*svp)
1902             *svp = newSVpvs("");
1903         sv_setsv(*svp, PL_skipwhite);
1904         sv_free(PL_skipwhite);
1905         PL_skipwhite = 0;
1906     }
1907
1908     return s;
1909 }
1910 #endif
1911
1912 STATIC void
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1914 {
1915     AV *av = CopFILEAVx(PL_curcop);
1916     if (av) {
1917         SV * sv;
1918         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1919         else {
1920             sv = *av_fetch(av, 0, 1);
1921             SvUPGRADE(sv, SVt_PVMG);
1922         }
1923         if (!SvPOK(sv)) sv_setpvs(sv,"");
1924         if (orig_sv)
1925             sv_catsv(sv, orig_sv);
1926         else
1927             sv_catpvn(sv, buf, len);
1928         if (!SvIOK(sv)) {
1929             (void)SvIOK_on(sv);
1930             SvIV_set(sv, 0);
1931         }
1932         if (PL_parser->preambling == NOLINE)
1933             av_store(av, CopLINE(PL_curcop), sv);
1934     }
1935 }
1936
1937 /*
1938  * S_skipspace
1939  * Called to gobble the appropriate amount and type of whitespace.
1940  * Skips comments as well.
1941  */
1942
1943 STATIC char *
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1945 {
1946 #ifdef PERL_MAD
1947     char *start = s;
1948 #endif /* PERL_MAD */
1949     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1950 #ifdef PERL_MAD
1951     if (PL_skipwhite) {
1952         sv_free(PL_skipwhite);
1953         PL_skipwhite = NULL;
1954     }
1955 #endif /* PERL_MAD */
1956     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957         while (s < PL_bufend && SPACE_OR_TAB(*s))
1958             s++;
1959     } else {
1960         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1961         PL_bufptr = s;
1962         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964                     LEX_NO_NEXT_CHUNK : 0));
1965         s = PL_bufptr;
1966         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967         if (PL_linestart > PL_bufptr)
1968             PL_bufptr = PL_linestart;
1969         return s;
1970     }
1971 #ifdef PERL_MAD
1972     if (PL_madskills)
1973         PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1975     return s;
1976 }
1977
1978 /*
1979  * S_check_uni
1980  * Check the unary operators to ensure there's no ambiguity in how they're
1981  * used.  An ambiguous piece of code would be:
1982  *     rand + 5
1983  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1984  * the +5 is its argument.
1985  */
1986
1987 STATIC void
1988 S_check_uni(pTHX)
1989 {
1990     dVAR;
1991     const char *s;
1992     const char *t;
1993
1994     if (PL_oldoldbufptr != PL_last_uni)
1995         return;
1996     while (isSPACE(*PL_last_uni))
1997         PL_last_uni++;
1998     s = PL_last_uni;
1999     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2000         s++;
2001     if ((t = strchr(s, '(')) && t < PL_bufptr)
2002         return;
2003
2004     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006                      (int)(s - PL_last_uni), PL_last_uni);
2007 }
2008
2009 /*
2010  * LOP : macro to build a list operator.  Its behaviour has been replaced
2011  * with a subroutine, S_lop() for which LOP is just another name.
2012  */
2013
2014 #define LOP(f,x) return lop(f,x,s)
2015
2016 /*
2017  * S_lop
2018  * Build a list operator (or something that might be one).  The rules:
2019  *  - if we have a next token, then it's a list operator [why?]
2020  *  - if the next thing is an opening paren, then it's a function
2021  *  - else it's a list operator
2022  */
2023
2024 STATIC I32
2025 S_lop(pTHX_ I32 f, int x, char *s)
2026 {
2027     dVAR;
2028
2029     PERL_ARGS_ASSERT_LOP;
2030
2031     pl_yylval.ival = f;
2032     CLINE;
2033     PL_expect = x;
2034     PL_bufptr = s;
2035     PL_last_lop = PL_oldbufptr;
2036     PL_last_lop_op = (OPCODE)f;
2037 #ifdef PERL_MAD
2038     if (PL_lasttoke)
2039         goto lstop;
2040 #else
2041     if (PL_nexttoke)
2042         goto lstop;
2043 #endif
2044     if (*s == '(')
2045         return REPORT(FUNC);
2046     s = PEEKSPACE(s);
2047     if (*s == '(')
2048         return REPORT(FUNC);
2049     else {
2050         lstop:
2051         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053         return REPORT(LSTOP);
2054     }
2055 }
2056
2057 #ifdef PERL_MAD
2058  /*
2059  * S_start_force
2060  * Sets up for an eventual force_next().  start_force(0) basically does
2061  * an unshift, while start_force(-1) does a push.  yylex removes items
2062  * on the "pop" end.
2063  */
2064
2065 STATIC void
2066 S_start_force(pTHX_ int where)
2067 {
2068     int i;
2069
2070     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2071         where = PL_lasttoke;
2072     assert(PL_curforce < 0 || PL_curforce == where);
2073     if (PL_curforce != where) {
2074         for (i = PL_lasttoke; i > where; --i) {
2075             PL_nexttoke[i] = PL_nexttoke[i-1];
2076         }
2077         PL_lasttoke++;
2078     }
2079     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2080         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081     PL_curforce = where;
2082     if (PL_nextwhite) {
2083         if (PL_madskills)
2084             curmad('^', newSVpvs(""));
2085         CURMAD('_', PL_nextwhite);
2086     }
2087 }
2088
2089 STATIC void
2090 S_curmad(pTHX_ char slot, SV *sv)
2091 {
2092     MADPROP **where;
2093
2094     if (!sv)
2095         return;
2096     if (PL_curforce < 0)
2097         where = &PL_thismad;
2098     else
2099         where = &PL_nexttoke[PL_curforce].next_mad;
2100
2101     if (PL_faketokens)
2102         sv_setpvs(sv, "");
2103     else {
2104         if (!IN_BYTES) {
2105             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2106                 SvUTF8_on(sv);
2107             else if (PL_encoding) {
2108                 sv_recode_to_utf8(sv, PL_encoding);
2109             }
2110         }
2111     }
2112
2113     /* keep a slot open for the head of the list? */
2114     if (slot != '_' && *where && (*where)->mad_key == '^') {
2115         (*where)->mad_key = slot;
2116         sv_free(MUTABLE_SV(((*where)->mad_val)));
2117         (*where)->mad_val = (void*)sv;
2118     }
2119     else
2120         addmad(newMADsv(slot, sv), where, 0);
2121 }
2122 #else
2123 #  define start_force(where)    NOOP
2124 #  define curmad(slot, sv)      NOOP
2125 #endif
2126
2127 /*
2128  * S_force_next
2129  * When the lexer realizes it knows the next token (for instance,
2130  * it is reordering tokens for the parser) then it can call S_force_next
2131  * to know what token to return the next time the lexer is called.  Caller
2132  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133  * and possibly PL_expect to ensure the lexer handles the token correctly.
2134  */
2135
2136 STATIC void
2137 S_force_next(pTHX_ I32 type)
2138 {
2139     dVAR;
2140 #ifdef DEBUGGING
2141     if (DEBUG_T_TEST) {
2142         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143         tokereport(type, &NEXTVAL_NEXTTOKE);
2144     }
2145 #endif
2146 #ifdef PERL_MAD
2147     if (PL_curforce < 0)
2148         start_force(PL_lasttoke);
2149     PL_nexttoke[PL_curforce].next_type = type;
2150     if (PL_lex_state != LEX_KNOWNEXT)
2151         PL_lex_defer = PL_lex_state;
2152     PL_lex_state = LEX_KNOWNEXT;
2153     PL_lex_expect = PL_expect;
2154     PL_curforce = -1;
2155 #else
2156     PL_nexttype[PL_nexttoke] = type;
2157     PL_nexttoke++;
2158     if (PL_lex_state != LEX_KNOWNEXT) {
2159         PL_lex_defer = PL_lex_state;
2160         PL_lex_expect = PL_expect;
2161         PL_lex_state = LEX_KNOWNEXT;
2162     }
2163 #endif
2164 }
2165
2166 /*
2167  * S_postderef
2168  *
2169  * This subroutine handles postfix deref syntax after the arrow has already
2170  * been emitted.  @* $* etc. are emitted as two separate token right here.
2171  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172  * only the first, leaving yylex to find the next.
2173  */
2174
2175 static int
2176 S_postderef(pTHX_ int const funny, char const next)
2177 {
2178     dVAR;
2179     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180     assert(strchr("*[{", next));
2181     if (next == '*') {
2182         PL_expect = XOPERATOR;
2183         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185             PL_lex_state = LEX_INTERPEND;
2186             start_force(PL_curforce);
2187             force_next(POSTJOIN);
2188         }
2189         start_force(PL_curforce);
2190         force_next(next);
2191         PL_bufptr+=2;
2192     }
2193     else {
2194         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195          && !PL_lex_brackets)
2196             PL_lex_dojoin = 2;
2197         PL_expect = XOPERATOR;
2198         PL_bufptr++;
2199     }
2200     return funny;
2201 }
2202
2203 void
2204 Perl_yyunlex(pTHX)
2205 {
2206     int yyc = PL_parser->yychar;
2207     if (yyc != YYEMPTY) {
2208         if (yyc) {
2209             start_force(-1);
2210             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212                 PL_lex_allbrackets--;
2213                 PL_lex_brackets--;
2214                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215             } else if (yyc == '('/*)*/) {
2216                 PL_lex_allbrackets--;
2217                 yyc |= (2<<24);
2218             }
2219             force_next(yyc);
2220         }
2221         PL_parser->yychar = YYEMPTY;
2222     }
2223 }
2224
2225 STATIC SV *
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228     dVAR;
2229     SV * const sv = newSVpvn_utf8(start, len,
2230                                   !IN_BYTES
2231                                   && UTF
2232                                   && !is_ascii_string((const U8*)start, len)
2233                                   && is_utf8_string((const U8*)start, len));
2234     return sv;
2235 }
2236
2237 /*
2238  * S_force_word
2239  * When the lexer knows the next thing is a word (for instance, it has
2240  * just seen -> and it knows that the next char is a word char, then
2241  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2242  * lookahead.
2243  *
2244  * Arguments:
2245  *   char *start : buffer position (must be within PL_linestr)
2246  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247  *   int check_keyword : if true, Perl checks to make sure the word isn't
2248  *       a keyword (do this if the word is a label, e.g. goto FOO)
2249  *   int allow_pack : if true, : characters will also be allowed (require,
2250  *       use, etc. do this)
2251  *   int allow_initial_tick : used by the "sub" lexer only.
2252  */
2253
2254 STATIC char *
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2256 {
2257     dVAR;
2258     char *s;
2259     STRLEN len;
2260
2261     PERL_ARGS_ASSERT_FORCE_WORD;
2262
2263     start = SKIPSPACE1(start);
2264     s = start;
2265     if (isIDFIRST_lazy_if(s,UTF) ||
2266         (allow_pack && *s == ':') )
2267     {
2268         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269         if (check_keyword) {
2270           char *s2 = PL_tokenbuf;
2271           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2272             s2 += 6, len -= 6;
2273           if (keyword(s2, len, 0))
2274             return start;
2275         }
2276         start_force(PL_curforce);
2277         if (PL_madskills)
2278             curmad('X', newSVpvn(start,s-start));
2279         if (token == METHOD) {
2280             s = SKIPSPACE1(s);
2281             if (*s == '(')
2282                 PL_expect = XTERM;
2283             else {
2284                 PL_expect = XOPERATOR;
2285             }
2286         }
2287         if (PL_madskills)
2288             curmad('g', newSVpvs( "forced" ));
2289         NEXTVAL_NEXTTOKE.opval
2290             = (OP*)newSVOP(OP_CONST,0,
2291                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2293         force_next(token);
2294     }
2295     return s;
2296 }
2297
2298 /*
2299  * S_force_ident
2300  * Called when the lexer wants $foo *foo &foo etc, but the program
2301  * text only contains the "foo" portion.  The first argument is a pointer
2302  * to the "foo", and the second argument is the type symbol to prefix.
2303  * Forces the next token to be a "WORD".
2304  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2305  */
2306
2307 STATIC void
2308 S_force_ident(pTHX_ const char *s, int kind)
2309 {
2310     dVAR;
2311
2312     PERL_ARGS_ASSERT_FORCE_IDENT;
2313
2314     if (s[0]) {
2315         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317                                                                 UTF ? SVf_UTF8 : 0));
2318         start_force(PL_curforce);
2319         NEXTVAL_NEXTTOKE.opval = o;
2320         force_next(WORD);
2321         if (kind) {
2322             o->op_private = OPpCONST_ENTERED;
2323             /* XXX see note in pp_entereval() for why we forgo typo
2324                warnings if the symbol must be introduced in an eval.
2325                GSAR 96-10-12 */
2326             gv_fetchpvn_flags(s, len,
2327                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329                               kind == '$' ? SVt_PV :
2330                               kind == '@' ? SVt_PVAV :
2331                               kind == '%' ? SVt_PVHV :
2332                               SVt_PVGV
2333                               );
2334         }
2335     }
2336 }
2337
2338 static void
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2340 {
2341     start_force(PL_curforce);
2342     NEXTVAL_NEXTTOKE.ival = pit;
2343     force_next('p');
2344 }
2345
2346 NV
2347 Perl_str_to_version(pTHX_ SV *sv)
2348 {
2349     NV retval = 0.0;
2350     NV nshift = 1.0;
2351     STRLEN len;
2352     const char *start = SvPV_const(sv,len);
2353     const char * const end = start + len;
2354     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2355
2356     PERL_ARGS_ASSERT_STR_TO_VERSION;
2357
2358     while (start < end) {
2359         STRLEN skip;
2360         UV n;
2361         if (utf)
2362             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2363         else {
2364             n = *(U8*)start;
2365             skip = 1;
2366         }
2367         retval += ((NV)n)/nshift;
2368         start += skip;
2369         nshift *= 1000;
2370     }
2371     return retval;
2372 }
2373
2374 /*
2375  * S_force_version
2376  * Forces the next token to be a version number.
2377  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378  * and if "guessing" is TRUE, then no new token is created (and the caller
2379  * must use an alternative parsing method).
2380  */
2381
2382 STATIC char *
2383 S_force_version(pTHX_ char *s, int guessing)
2384 {
2385     dVAR;
2386     OP *version = NULL;
2387     char *d;
2388 #ifdef PERL_MAD
2389     I32 startoff = s - SvPVX(PL_linestr);
2390 #endif
2391
2392     PERL_ARGS_ASSERT_FORCE_VERSION;
2393
2394     s = SKIPSPACE1(s);
2395
2396     d = s;
2397     if (*d == 'v')
2398         d++;
2399     if (isDIGIT(*d)) {
2400         while (isDIGIT(*d) || *d == '_' || *d == '.')
2401             d++;
2402 #ifdef PERL_MAD
2403         if (PL_madskills) {
2404             start_force(PL_curforce);
2405             curmad('X', newSVpvn(s,d-s));
2406         }
2407 #endif
2408         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2409             SV *ver;
2410             s = scan_num(s, &pl_yylval);
2411             version = pl_yylval.opval;
2412             ver = cSVOPx(version)->op_sv;
2413             if (SvPOK(ver) && !SvNIOK(ver)) {
2414                 SvUPGRADE(ver, SVt_PVNV);
2415                 SvNV_set(ver, str_to_version(ver));
2416                 SvNOK_on(ver);          /* hint that it is a version */
2417             }
2418         }
2419         else if (guessing) {
2420 #ifdef PERL_MAD
2421             if (PL_madskills) {
2422                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2423                 PL_nextwhite = 0;
2424                 s = SvPVX(PL_linestr) + startoff;
2425             }
2426 #endif
2427             return s;
2428         }
2429     }
2430
2431 #ifdef PERL_MAD
2432     if (PL_madskills && !version) {
2433         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2434         PL_nextwhite = 0;
2435         s = SvPVX(PL_linestr) + startoff;
2436     }
2437 #endif
2438     /* NOTE: The parser sees the package name and the VERSION swapped */
2439     start_force(PL_curforce);
2440     NEXTVAL_NEXTTOKE.opval = version;
2441     force_next(WORD);
2442
2443     return s;
2444 }
2445
2446 /*
2447  * S_force_strict_version
2448  * Forces the next token to be a version number using strict syntax rules.
2449  */
2450
2451 STATIC char *
2452 S_force_strict_version(pTHX_ char *s)
2453 {
2454     dVAR;
2455     OP *version = NULL;
2456 #ifdef PERL_MAD
2457     I32 startoff = s - SvPVX(PL_linestr);
2458 #endif
2459     const char *errstr = NULL;
2460
2461     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2462
2463     while (isSPACE(*s)) /* leading whitespace */
2464         s++;
2465
2466     if (is_STRICT_VERSION(s,&errstr)) {
2467         SV *ver = newSV(0);
2468         s = (char *)scan_version(s, ver, 0);
2469         version = newSVOP(OP_CONST, 0, ver);
2470     }
2471     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2472             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2473     {
2474         PL_bufptr = s;
2475         if (errstr)
2476             yyerror(errstr); /* version required */
2477         return s;
2478     }
2479
2480 #ifdef PERL_MAD
2481     if (PL_madskills && !version) {
2482         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2483         PL_nextwhite = 0;
2484         s = SvPVX(PL_linestr) + startoff;
2485     }
2486 #endif
2487     /* NOTE: The parser sees the package name and the VERSION swapped */
2488     start_force(PL_curforce);
2489     NEXTVAL_NEXTTOKE.opval = version;
2490     force_next(WORD);
2491
2492     return s;
2493 }
2494
2495 /*
2496  * S_tokeq
2497  * Tokenize a quoted string passed in as an SV.  It finds the next
2498  * chunk, up to end of string or a backslash.  It may make a new
2499  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2500  * turns \\ into \.
2501  */
2502
2503 STATIC SV *
2504 S_tokeq(pTHX_ SV *sv)
2505 {
2506     dVAR;
2507     char *s;
2508     char *send;
2509     char *d;
2510     SV *pv = sv;
2511
2512     PERL_ARGS_ASSERT_TOKEQ;
2513
2514     assert (SvPOK(sv));
2515     assert (SvLEN(sv));
2516     assert (!SvIsCOW(sv));
2517     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2518         goto finish;
2519     s = SvPVX(sv);
2520     send = SvEND(sv);
2521     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2522     while (s < send && !(*s == '\\' && s[1] == '\\'))
2523         s++;
2524     if (s == send)
2525         goto finish;
2526     d = s;
2527     if ( PL_hints & HINT_NEW_STRING ) {
2528         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2529                             SVs_TEMP | SvUTF8(sv));
2530     }
2531     while (s < send) {
2532         if (*s == '\\') {
2533             if (s + 1 < send && (s[1] == '\\'))
2534                 s++;            /* all that, just for this */
2535         }
2536         *d++ = *s++;
2537     }
2538     *d = '\0';
2539     SvCUR_set(sv, d - SvPVX_const(sv));
2540   finish:
2541     if ( PL_hints & HINT_NEW_STRING )
2542        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2543     return sv;
2544 }
2545
2546 /*
2547  * Now come three functions related to double-quote context,
2548  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2549  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2550  * interact with PL_lex_state, and create fake ( ... ) argument lists
2551  * to handle functions and concatenation.
2552  * For example,
2553  *   "foo\lbar"
2554  * is tokenised as
2555  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2556  */
2557
2558 /*
2559  * S_sublex_start
2560  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2561  *
2562  * Pattern matching will set PL_lex_op to the pattern-matching op to
2563  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2564  *
2565  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2566  *
2567  * Everything else becomes a FUNC.
2568  *
2569  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2570  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2571  * call to S_sublex_push().
2572  */
2573
2574 STATIC I32
2575 S_sublex_start(pTHX)
2576 {
2577     dVAR;
2578     const I32 op_type = pl_yylval.ival;
2579
2580     if (op_type == OP_NULL) {
2581         pl_yylval.opval = PL_lex_op;
2582         PL_lex_op = NULL;
2583         return THING;
2584     }
2585     if (op_type == OP_CONST) {
2586         SV *sv = tokeq(PL_lex_stuff);
2587
2588         if (SvTYPE(sv) == SVt_PVIV) {
2589             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2590             STRLEN len;
2591             const char * const p = SvPV_const(sv, len);
2592             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2593             SvREFCNT_dec(sv);
2594             sv = nsv;
2595         }
2596         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2597         PL_lex_stuff = NULL;
2598         return THING;
2599     }
2600
2601     PL_sublex_info.super_state = PL_lex_state;
2602     PL_sublex_info.sub_inwhat = (U16)op_type;
2603     PL_sublex_info.sub_op = PL_lex_op;
2604     PL_lex_state = LEX_INTERPPUSH;
2605
2606     PL_expect = XTERM;
2607     if (PL_lex_op) {
2608         pl_yylval.opval = PL_lex_op;
2609         PL_lex_op = NULL;
2610         return PMFUNC;
2611     }
2612     else
2613         return FUNC;
2614 }
2615
2616 /*
2617  * S_sublex_push
2618  * Create a new scope to save the lexing state.  The scope will be
2619  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2620  * to the uc, lc, etc. found before.
2621  * Sets PL_lex_state to LEX_INTERPCONCAT.
2622  */
2623
2624 STATIC I32
2625 S_sublex_push(pTHX)
2626 {
2627     dVAR;
2628     LEXSHARED *shared;
2629     const bool is_heredoc = PL_multi_close == '<';
2630     ENTER;
2631
2632     PL_lex_state = PL_sublex_info.super_state;
2633     SAVEI8(PL_lex_dojoin);
2634     SAVEI32(PL_lex_brackets);
2635     SAVEI32(PL_lex_allbrackets);
2636     SAVEI32(PL_lex_formbrack);
2637     SAVEI8(PL_lex_fakeeof);
2638     SAVEI32(PL_lex_casemods);
2639     SAVEI32(PL_lex_starts);
2640     SAVEI8(PL_lex_state);
2641     SAVESPTR(PL_lex_repl);
2642     SAVEVPTR(PL_lex_inpat);
2643     SAVEI16(PL_lex_inwhat);
2644     if (is_heredoc)
2645     {
2646         SAVECOPLINE(PL_curcop);
2647         SAVEI32(PL_multi_end);
2648         SAVEI32(PL_parser->herelines);
2649         PL_parser->herelines = 0;
2650     }
2651     SAVEI8(PL_multi_close);
2652     SAVEPPTR(PL_bufptr);
2653     SAVEPPTR(PL_bufend);
2654     SAVEPPTR(PL_oldbufptr);
2655     SAVEPPTR(PL_oldoldbufptr);
2656     SAVEPPTR(PL_last_lop);
2657     SAVEPPTR(PL_last_uni);
2658     SAVEPPTR(PL_linestart);
2659     SAVESPTR(PL_linestr);
2660     SAVEGENERICPV(PL_lex_brackstack);
2661     SAVEGENERICPV(PL_lex_casestack);
2662     SAVEGENERICPV(PL_parser->lex_shared);
2663     SAVEBOOL(PL_parser->lex_re_reparsing);
2664     SAVEI32(PL_copline);
2665
2666     /* The here-doc parser needs to be able to peek into outer lexing
2667        scopes to find the body of the here-doc.  So we put PL_linestr and
2668        PL_bufptr into lex_shared, to ‘share’ those values.
2669      */
2670     PL_parser->lex_shared->ls_linestr = PL_linestr;
2671     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2672
2673     PL_linestr = PL_lex_stuff;
2674     PL_lex_repl = PL_sublex_info.repl;
2675     PL_lex_stuff = NULL;
2676     PL_sublex_info.repl = NULL;
2677
2678     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2679         = SvPVX(PL_linestr);
2680     PL_bufend += SvCUR(PL_linestr);
2681     PL_last_lop = PL_last_uni = NULL;
2682     SAVEFREESV(PL_linestr);
2683     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2684
2685     PL_lex_dojoin = FALSE;
2686     PL_lex_brackets = PL_lex_formbrack = 0;
2687     PL_lex_allbrackets = 0;
2688     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2689     Newx(PL_lex_brackstack, 120, char);
2690     Newx(PL_lex_casestack, 12, char);
2691     PL_lex_casemods = 0;
2692     *PL_lex_casestack = '\0';
2693     PL_lex_starts = 0;
2694     PL_lex_state = LEX_INTERPCONCAT;
2695     if (is_heredoc)
2696         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2697     PL_copline = NOLINE;
2698
2699     Newxz(shared, 1, LEXSHARED);
2700     shared->ls_prev = PL_parser->lex_shared;
2701     PL_parser->lex_shared = shared;
2702
2703     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2704     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2705     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2706         PL_lex_inpat = PL_sublex_info.sub_op;
2707     else
2708         PL_lex_inpat = NULL;
2709
2710     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2711     PL_in_eval &= ~EVAL_RE_REPARSING;
2712
2713     return '(';
2714 }
2715
2716 /*
2717  * S_sublex_done
2718  * Restores lexer state after a S_sublex_push.
2719  */
2720
2721 STATIC I32
2722 S_sublex_done(pTHX)
2723 {
2724     dVAR;
2725     if (!PL_lex_starts++) {
2726         SV * const sv = newSVpvs("");
2727         if (SvUTF8(PL_linestr))
2728             SvUTF8_on(sv);
2729         PL_expect = XOPERATOR;
2730         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2731         return THING;
2732     }
2733
2734     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2735         PL_lex_state = LEX_INTERPCASEMOD;
2736         return yylex();
2737     }
2738
2739     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2740     assert(PL_lex_inwhat != OP_TRANSR);
2741     if (PL_lex_repl) {
2742         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2743         PL_linestr = PL_lex_repl;
2744         PL_lex_inpat = 0;
2745         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2746         PL_bufend += SvCUR(PL_linestr);
2747         PL_last_lop = PL_last_uni = NULL;
2748         PL_lex_dojoin = FALSE;
2749         PL_lex_brackets = 0;
2750         PL_lex_allbrackets = 0;
2751         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2752         PL_lex_casemods = 0;
2753         *PL_lex_casestack = '\0';
2754         PL_lex_starts = 0;
2755         if (SvEVALED(PL_lex_repl)) {
2756             PL_lex_state = LEX_INTERPNORMAL;
2757             PL_lex_starts++;
2758             /*  we don't clear PL_lex_repl here, so that we can check later
2759                 whether this is an evalled subst; that means we rely on the
2760                 logic to ensure sublex_done() is called again only via the
2761                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2762         }
2763         else {
2764             PL_lex_state = LEX_INTERPCONCAT;
2765             PL_lex_repl = NULL;
2766         }
2767         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2768             CopLINE(PL_curcop) +=
2769                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2770                  + PL_parser->herelines;
2771             PL_parser->herelines = 0;
2772         }
2773         return ',';
2774     }
2775     else {
2776         const line_t l = CopLINE(PL_curcop);
2777 #ifdef PERL_MAD
2778         if (PL_madskills) {
2779             if (PL_thiswhite) {
2780                 if (!PL_endwhite)
2781                     PL_endwhite = newSVpvs("");
2782                 sv_catsv(PL_endwhite, PL_thiswhite);
2783                 PL_thiswhite = 0;
2784             }
2785             if (PL_thistoken)
2786                 sv_setpvs(PL_thistoken,"");
2787             else
2788                 PL_realtokenstart = -1;
2789         }
2790 #endif
2791         LEAVE;
2792         if (PL_multi_close == '<')
2793             PL_parser->herelines += l - PL_multi_end;
2794         PL_bufend = SvPVX(PL_linestr);
2795         PL_bufend += SvCUR(PL_linestr);
2796         PL_expect = XOPERATOR;
2797         PL_sublex_info.sub_inwhat = 0;
2798         return ')';
2799     }
2800 }
2801
2802 PERL_STATIC_INLINE SV*
2803 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2804 {
2805     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2806      * interior, hence to the "}".  Finds what the name resolves to, returning
2807      * an SV* containing it; NULL if no valid one found */
2808
2809     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2810
2811     HV * table;
2812     SV **cvp;
2813     SV *cv;
2814     SV *rv;
2815     HV *stash;
2816     const U8* first_bad_char_loc;
2817     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2818
2819     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2820
2821     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2822                                      e - backslash_ptr,
2823                                      &first_bad_char_loc))
2824     {
2825         /* If warnings are on, this will print a more detailed analysis of what
2826          * is wrong than the error message below */
2827         utf8n_to_uvchr(first_bad_char_loc,
2828                        e - ((char *) first_bad_char_loc),
2829                        NULL, 0);
2830
2831         /* We deliberately don't try to print the malformed character, which
2832          * might not print very well; it also may be just the first of many
2833          * malformations, so don't print what comes after it */
2834         yyerror(Perl_form(aTHX_
2835             "Malformed UTF-8 character immediately after '%.*s'",
2836             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2837         return NULL;
2838     }
2839
2840     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2841                         /* include the <}> */
2842                         e - backslash_ptr + 1);
2843     if (! SvPOK(res)) {
2844         SvREFCNT_dec_NN(res);
2845         return NULL;
2846     }
2847
2848     /* See if the charnames handler is the Perl core's, and if so, we can skip
2849      * the validation needed for a user-supplied one, as Perl's does its own
2850      * validation. */
2851     table = GvHV(PL_hintgv);             /* ^H */
2852     cvp = hv_fetchs(table, "charnames", FALSE);
2853     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2854         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2855     {
2856         const char * const name = HvNAME(stash);
2857         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2858          && strEQ(name, "_charnames")) {
2859            return res;
2860        }
2861     }
2862
2863     /* Here, it isn't Perl's charname handler.  We can't rely on a
2864      * user-supplied handler to validate the input name.  For non-ut8 input,
2865      * look to see that the first character is legal.  Then loop through the
2866      * rest checking that each is a continuation */
2867
2868     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2869      * the same thing */
2870
2871     if (! UTF) {
2872         if (! isALPHAU(*s)) {
2873             goto bad_charname;
2874         }
2875         s++;
2876         while (s < e) {
2877             if (! isCHARNAME_CONT(*s)) {
2878                 goto bad_charname;
2879             }
2880             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2881                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2882                            "A sequence of multiple spaces in a charnames "
2883                            "alias definition is deprecated");
2884             }
2885             s++;
2886         }
2887         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2888             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2889                         "Trailing white-space in a charnames alias "
2890                         "definition is deprecated");
2891         }
2892     }
2893     else {
2894         /* Similarly for utf8.  For invariants can check directly; for other
2895          * Latin1, can calculate their code point and check; otherwise  use a
2896          * swash */
2897         if (UTF8_IS_INVARIANT(*s)) {
2898             if (! isALPHAU(*s)) {
2899                 goto bad_charname;
2900             }
2901             s++;
2902         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2903             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2904                 goto bad_charname;
2905             }
2906             s += 2;
2907         }
2908         else {
2909             if (! PL_utf8_charname_begin) {
2910                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2911                 PL_utf8_charname_begin = _core_swash_init("utf8",
2912                                                         "_Perl_Charname_Begin",
2913                                                         &PL_sv_undef,
2914                                                         1, 0, NULL, &flags);
2915             }
2916             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2917                 goto bad_charname;
2918             }
2919             s += UTF8SKIP(s);
2920         }
2921
2922         while (s < e) {
2923             if (UTF8_IS_INVARIANT(*s)) {
2924                 if (! isCHARNAME_CONT(*s)) {
2925                     goto bad_charname;
2926                 }
2927                 if (*s == ' ' && *(s-1) == ' '
2928                  && ckWARN_d(WARN_DEPRECATED)) {
2929                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2930                                "A sequence of multiple spaces in a charnam"
2931                                "es alias definition is deprecated");
2932                 }
2933                 s++;
2934             }
2935             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2936                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2937                 {
2938                     goto bad_charname;
2939                 }
2940                 s += 2;
2941             }
2942             else {
2943                 if (! PL_utf8_charname_continue) {
2944                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2945                     PL_utf8_charname_continue = _core_swash_init("utf8",
2946                                                 "_Perl_Charname_Continue",
2947                                                 &PL_sv_undef,
2948                                                 1, 0, NULL, &flags);
2949                 }
2950                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2951                     goto bad_charname;
2952                 }
2953                 s += UTF8SKIP(s);
2954             }
2955         }
2956         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2957             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2958                        "Trailing white-space in a charnames alias "
2959                        "definition is deprecated");
2960         }
2961     }
2962
2963     if (SvUTF8(res)) { /* Don't accept malformed input */
2964         const U8* first_bad_char_loc;
2965         STRLEN len;
2966         const char* const str = SvPV_const(res, len);
2967         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2968             /* If warnings are on, this will print a more detailed analysis of
2969              * what is wrong than the error message below */
2970             utf8n_to_uvchr(first_bad_char_loc,
2971                            (char *) first_bad_char_loc - str,
2972                            NULL, 0);
2973
2974             /* We deliberately don't try to print the malformed character,
2975              * which might not print very well; it also may be just the first
2976              * of many malformations, so don't print what comes after it */
2977             yyerror_pv(
2978               Perl_form(aTHX_
2979                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2980                  (int) (e - backslash_ptr + 1), backslash_ptr,
2981                  (int) ((char *) first_bad_char_loc - str), str
2982               ),
2983               SVf_UTF8);
2984             return NULL;
2985         }
2986     }
2987
2988     return res;
2989
2990   bad_charname: {
2991         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2992
2993         /* The final %.*s makes sure that should the trailing NUL be missing
2994          * that this print won't run off the end of the string */
2995         yyerror_pv(
2996           Perl_form(aTHX_
2997             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2998             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2999             (int)(e - s + bad_char_size), s + bad_char_size
3000           ),
3001           UTF ? SVf_UTF8 : 0);
3002         return NULL;
3003     }
3004 }
3005
3006 /*
3007   scan_const
3008
3009   Extracts the next constant part of a pattern, double-quoted string,
3010   or transliteration.  This is terrifying code.
3011
3012   For example, in parsing the double-quoted string "ab\x63$d", it would
3013   stop at the '$' and return an OP_CONST containing 'abc'.
3014
3015   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3016   processing a pattern (PL_lex_inpat is true), a transliteration
3017   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3018
3019   Returns a pointer to the character scanned up to. If this is
3020   advanced from the start pointer supplied (i.e. if anything was
3021   successfully parsed), will leave an OP_CONST for the substring scanned
3022   in pl_yylval. Caller must intuit reason for not parsing further
3023   by looking at the next characters herself.
3024
3025   In patterns:
3026     expand:
3027       \N{FOO}  => \N{U+hex_for_character_FOO}
3028       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3029
3030     pass through:
3031         all other \-char, including \N and \N{ apart from \N{ABC}
3032
3033     stops on:
3034         @ and $ where it appears to be a var, but not for $ as tail anchor
3035         \l \L \u \U \Q \E
3036         (?{  or  (??{
3037
3038
3039   In transliterations:
3040     characters are VERY literal, except for - not at the start or end
3041     of the string, which indicates a range. If the range is in bytes,
3042     scan_const expands the range to the full set of intermediate
3043     characters. If the range is in utf8, the hyphen is replaced with
3044     a certain range mark which will be handled by pmtrans() in op.c.
3045
3046   In double-quoted strings:
3047     backslashes:
3048       double-quoted style: \r and \n
3049       constants: \x31, etc.
3050       deprecated backrefs: \1 (in substitution replacements)
3051       case and quoting: \U \Q \E
3052     stops on @ and $
3053
3054   scan_const does *not* construct ops to handle interpolated strings.
3055   It stops processing as soon as it finds an embedded $ or @ variable
3056   and leaves it to the caller to work out what's going on.
3057
3058   embedded arrays (whether in pattern or not) could be:
3059       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3060
3061   $ in double-quoted strings must be the symbol of an embedded scalar.
3062
3063   $ in pattern could be $foo or could be tail anchor.  Assumption:
3064   it's a tail anchor if $ is the last thing in the string, or if it's
3065   followed by one of "()| \r\n\t"
3066
3067   \1 (backreferences) are turned into $1 in substitutions
3068
3069   The structure of the code is
3070       while (there's a character to process) {
3071           handle transliteration ranges
3072           skip regexp comments /(?#comment)/ and codes /(?{code})/
3073           skip #-initiated comments in //x patterns
3074           check for embedded arrays
3075           check for embedded scalars
3076           if (backslash) {
3077               deprecate \1 in substitution replacements
3078               handle string-changing backslashes \l \U \Q \E, etc.
3079               switch (what was escaped) {
3080                   handle \- in a transliteration (becomes a literal -)
3081                   if a pattern and not \N{, go treat as regular character
3082                   handle \132 (octal characters)
3083                   handle \x15 and \x{1234} (hex characters)
3084                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3085                   handle \cV (control characters)
3086                   handle printf-style backslashes (\f, \r, \n, etc)
3087               } (end switch)
3088               continue
3089           } (end if backslash)
3090           handle regular character
3091     } (end while character to read)
3092
3093 */
3094
3095 STATIC char *
3096 S_scan_const(pTHX_ char *start)
3097 {
3098     dVAR;
3099     char *send = PL_bufend;             /* end of the constant */
3100     SV *sv = newSV(send - start);               /* sv for the constant.  See
3101                                                    note below on sizing. */
3102     char *s = start;                    /* start of the constant */
3103     char *d = SvPVX(sv);                /* destination for copies */
3104     bool dorange = FALSE;                       /* are we in a translit range? */
3105     bool didrange = FALSE;                      /* did we just finish a range? */
3106     bool in_charclass = FALSE;                  /* within /[...]/ */
3107     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3108     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3109                                                    to be UTF8?  But, this can
3110                                                    show as true when the source
3111                                                    isn't utf8, as for example
3112                                                    when it is entirely composed
3113                                                    of hex constants */
3114     SV *res;                            /* result from charnames */
3115
3116     /* Note on sizing:  The scanned constant is placed into sv, which is
3117      * initialized by newSV() assuming one byte of output for every byte of
3118      * input.  This routine expects newSV() to allocate an extra byte for a
3119      * trailing NUL, which this routine will append if it gets to the end of
3120      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3121      * CAPITAL LETTER A}), or more output than input if the constant ends up
3122      * recoded to utf8, but each time a construct is found that might increase
3123      * the needed size, SvGROW() is called.  Its size parameter each time is
3124      * based on the best guess estimate at the time, namely the length used so
3125      * far, plus the length the current construct will occupy, plus room for
3126      * the trailing NUL, plus one byte for every input byte still unscanned */
3127
3128     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3129                        before set */
3130 #ifdef EBCDIC
3131     UV literal_endpoint = 0;
3132     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3133 #endif
3134
3135     PERL_ARGS_ASSERT_SCAN_CONST;
3136
3137     assert(PL_lex_inwhat != OP_TRANSR);
3138     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3139         /* If we are doing a trans and we know we want UTF8 set expectation */
3140         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3141         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3142     }
3143
3144     /* Protect sv from errors and fatal warnings. */
3145     ENTER_with_name("scan_const");
3146     SAVEFREESV(sv);
3147
3148     while (s < send || dorange) {
3149
3150         /* get transliterations out of the way (they're most literal) */
3151         if (PL_lex_inwhat == OP_TRANS) {
3152             /* expand a range A-Z to the full set of characters.  AIE! */
3153             if (dorange) {
3154                 I32 i;                          /* current expanded character */
3155                 I32 min;                        /* first character in range */
3156                 I32 max;                        /* last character in range */
3157
3158 #ifdef EBCDIC
3159                 UV uvmax = 0;
3160 #endif
3161
3162                 if (has_utf8
3163 #ifdef EBCDIC
3164                     && !native_range
3165 #endif
3166                 ) {
3167                     char * const c = (char*)utf8_hop((U8*)d, -1);
3168                     char *e = d++;
3169                     while (e-- > c)
3170                         *(e + 1) = *e;
3171                     *c = (char) ILLEGAL_UTF8_BYTE;
3172                     /* mark the range as done, and continue */
3173                     dorange = FALSE;
3174                     didrange = TRUE;
3175                     continue;
3176                 }
3177
3178                 i = d - SvPVX_const(sv);                /* remember current offset */
3179 #ifdef EBCDIC
3180                 SvGROW(sv,
3181                        SvLEN(sv) + (has_utf8 ?
3182                                     (512 - UTF_CONTINUATION_MARK +
3183                                      UNISKIP(0x100))
3184                                     : 256));
3185                 /* How many two-byte within 0..255: 128 in UTF-8,
3186                  * 96 in UTF-8-mod. */
3187 #else
3188                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3189 #endif
3190                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3191 #ifdef EBCDIC
3192                 if (has_utf8) {
3193                     int j;
3194                     for (j = 0; j <= 1; j++) {
3195                         char * const c = (char*)utf8_hop((U8*)d, -1);
3196                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3197                         if (j)
3198                             min = (U8)uv;
3199                         else if (uv < 256)
3200                             max = (U8)uv;
3201                         else {
3202                             max = (U8)0xff; /* only to \xff */
3203                             uvmax = uv; /* \x{100} to uvmax */
3204                         }
3205                         d = c; /* eat endpoint chars */
3206                      }
3207                 }
3208                else {
3209 #endif
3210                    d -= 2;              /* eat the first char and the - */
3211                    min = (U8)*d;        /* first char in range */
3212                    max = (U8)d[1];      /* last char in range  */
3213 #ifdef EBCDIC
3214                }
3215 #endif
3216
3217                 if (min > max) {
3218                     Perl_croak(aTHX_
3219                                "Invalid range \"%c-%c\" in transliteration operator",
3220                                (char)min, (char)max);
3221                 }
3222
3223 #ifdef EBCDIC
3224                 if (literal_endpoint == 2 &&
3225                     ((isLOWER_A(min) && isLOWER_A(max)) ||
3226                      (isUPPER_A(min) && isUPPER_A(max))))
3227                 {
3228                     for (i = min; i <= max; i++) {
3229                         if (isALPHA_A(i))
3230                             *d++ = i;
3231                     }
3232                 }
3233                 else
3234 #endif
3235                     for (i = min; i <= max; i++)
3236 #ifdef EBCDIC
3237                         if (has_utf8) {
3238                             append_utf8_from_native_byte(i, &d);
3239                         }
3240                         else
3241 #endif
3242                             *d++ = (char)i;
3243
3244 #ifdef EBCDIC
3245                 if (uvmax) {
3246                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3247                     if (uvmax > 0x101)
3248                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3249                     if (uvmax > 0x100)
3250                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3251                 }
3252 #endif
3253
3254                 /* mark the range as done, and continue */
3255                 dorange = FALSE;
3256                 didrange = TRUE;
3257 #ifdef EBCDIC
3258                 literal_endpoint = 0;
3259 #endif
3260                 continue;
3261             }
3262
3263             /* range begins (ignore - as first or last char) */
3264             else if (*s == '-' && s+1 < send  && s != start) {
3265                 if (didrange) {
3266                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3267                 }
3268                 if (has_utf8
3269 #ifdef EBCDIC
3270                     && !native_range
3271 #endif
3272                     ) {
3273                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
3274                     s++;
3275                     continue;
3276                 }
3277                 dorange = TRUE;
3278                 s++;
3279             }
3280             else {
3281                 didrange = FALSE;
3282 #ifdef EBCDIC
3283                 literal_endpoint = 0;
3284                 native_range = TRUE;
3285 #endif
3286             }
3287         }
3288
3289         /* if we get here, we're not doing a transliteration */
3290
3291         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3292             char *s1 = s-1;
3293             int esc = 0;
3294             while (s1 >= start && *s1-- == '\\')
3295                 esc = !esc;
3296             if (!esc)
3297                 in_charclass = TRUE;
3298         }
3299
3300         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3301             char *s1 = s-1;
3302             int esc = 0;
3303             while (s1 >= start && *s1-- == '\\')
3304                 esc = !esc;
3305             if (!esc)
3306                 in_charclass = FALSE;
3307         }
3308
3309         /* skip for regexp comments /(?#comment)/, except for the last
3310          * char, which will be done separately.
3311          * Stop on (?{..}) and friends */
3312
3313         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3314             if (s[2] == '#') {
3315                 while (s+1 < send && *s != ')')
3316                     *d++ = *s++;
3317             }
3318             else if (!PL_lex_casemods &&
3319                      (    s[2] == '{' /* This should match regcomp.c */
3320                       || (s[2] == '?' && s[3] == '{')))
3321             {
3322                 break;
3323             }
3324         }
3325
3326         /* likewise skip #-initiated comments in //x patterns */
3327         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3328           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3329             while (s+1 < send && *s != '\n')
3330                 *d++ = *s++;
3331         }
3332
3333         /* no further processing of single-quoted regex */
3334         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3335             goto default_action;
3336
3337         /* check for embedded arrays
3338            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3339            */
3340         else if (*s == '@' && s[1]) {
3341             if (isWORDCHAR_lazy_if(s+1,UTF))
3342                 break;
3343             if (strchr(":'{$", s[1]))
3344                 break;
3345             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3346                 break; /* in regexp, neither @+ nor @- are interpolated */
3347         }
3348
3349         /* check for embedded scalars.  only stop if we're sure it's a
3350            variable.
3351         */
3352         else if (*s == '$') {
3353             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3354                 break;
3355             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3356                 if (s[1] == '\\') {
3357                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3358                                    "Possible unintended interpolation of $\\ in regex");
3359                 }
3360                 break;          /* in regexp, $ might be tail anchor */
3361             }
3362         }
3363
3364         /* End of else if chain - OP_TRANS rejoin rest */
3365
3366         /* backslashes */
3367         if (*s == '\\' && s+1 < send) {
3368             char* e;    /* Can be used for ending '}', etc. */
3369
3370             s++;
3371
3372             /* warn on \1 - \9 in substitution replacements, but note that \11
3373              * is an octal; and \19 is \1 followed by '9' */
3374             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3375                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3376             {
3377                 /* diag_listed_as: \%d better written as $%d */
3378                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3379                 *--s = '$';
3380                 break;
3381             }
3382
3383             /* string-change backslash escapes */
3384             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3385                 --s;
3386                 break;
3387             }
3388             /* In a pattern, process \N, but skip any other backslash escapes.
3389              * This is because we don't want to translate an escape sequence
3390              * into a meta symbol and have the regex compiler use the meta
3391              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3392              * in spite of this, we do have to process \N here while the proper
3393              * charnames handler is in scope.  See bugs #56444 and #62056.
3394              * There is a complication because \N in a pattern may also stand
3395              * for 'match a non-nl', and not mean a charname, in which case its
3396              * processing should be deferred to the regex compiler.  To be a
3397              * charname it must be followed immediately by a '{', and not look
3398              * like \N followed by a curly quantifier, i.e., not something like
3399              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3400              * quantifier */
3401             else if (PL_lex_inpat
3402                     && (*s != 'N'
3403                         || s[1] != '{'
3404                         || regcurly(s + 1, FALSE)))
3405             {
3406                 *d++ = '\\';
3407                 goto default_action;
3408             }
3409
3410             switch (*s) {
3411
3412             /* quoted - in transliterations */
3413             case '-':
3414                 if (PL_lex_inwhat == OP_TRANS) {
3415                     *d++ = *s++;
3416                     continue;
3417                 }
3418                 /* FALLTHROUGH */
3419             default:
3420                 {
3421                     if ((isALPHANUMERIC(*s)))
3422                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3423                                        "Unrecognized escape \\%c passed through",
3424                                        *s);
3425                     /* default action is to copy the quoted character */
3426                     goto default_action;
3427                 }
3428
3429             /* eg. \132 indicates the octal constant 0132 */
3430             case '0': case '1': case '2': case '3':
3431             case '4': case '5': case '6': case '7':
3432                 {
3433                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3434                     STRLEN len = 3;
3435                     uv = grok_oct(s, &len, &flags, NULL);
3436                     s += len;
3437                     if (len < 3 && s < send && isDIGIT(*s)
3438                         && ckWARN(WARN_MISC))
3439                     {
3440                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3441                                     "%s", form_short_octal_warning(s, len));
3442                     }
3443                 }
3444                 goto NUM_ESCAPE_INSERT;
3445
3446             /* eg. \o{24} indicates the octal constant \024 */
3447             case 'o':
3448                 {
3449                     const char* error;
3450
3451                     bool valid = grok_bslash_o(&s, &uv, &error,
3452                                                TRUE, /* Output warning */
3453                                                FALSE, /* Not strict */
3454                                                TRUE, /* Output warnings for
3455                                                          non-portables */
3456                                                UTF);
3457                     if (! valid) {
3458                         yyerror(error);
3459                         continue;
3460                     }
3461                     goto NUM_ESCAPE_INSERT;
3462                 }
3463
3464             /* eg. \x24 indicates the hex constant 0x24 */
3465             case 'x':
3466                 {
3467                     const char* error;
3468
3469                     bool valid = grok_bslash_x(&s, &uv, &error,
3470                                                TRUE, /* Output warning */
3471                                                FALSE, /* Not strict */
3472                                                TRUE,  /* Output warnings for
3473                                                          non-portables */
3474                                                UTF);
3475                     if (! valid) {
3476                         yyerror(error);
3477                         continue;
3478                     }
3479                 }
3480
3481               NUM_ESCAPE_INSERT:
3482                 /* Insert oct or hex escaped character.  There will always be
3483                  * enough room in sv since such escapes will be longer than any
3484                  * UTF-8 sequence they can end up as, except if they force us
3485                  * to recode the rest of the string into utf8 */
3486
3487                 /* Here uv is the ordinal of the next character being added */
3488                 if (!UVCHR_IS_INVARIANT(uv)) {
3489                     if (!has_utf8 && uv > 255) {
3490                         /* Might need to recode whatever we have accumulated so
3491                          * far if it contains any chars variant in utf8 or
3492                          * utf-ebcdic. */
3493
3494                         SvCUR_set(sv, d - SvPVX_const(sv));
3495                         SvPOK_on(sv);
3496                         *d = '\0';
3497                         /* See Note on sizing above.  */
3498                         sv_utf8_upgrade_flags_grow(sv,
3499                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3500                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3501                         d = SvPVX(sv) + SvCUR(sv);
3502                         has_utf8 = TRUE;
3503                     }
3504
3505                     if (has_utf8) {
3506                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3507                         if (PL_lex_inwhat == OP_TRANS &&
3508                             PL_sublex_info.sub_op) {
3509                             PL_sublex_info.sub_op->op_private |=
3510                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3511                                              : OPpTRANS_TO_UTF);
3512                         }
3513 #ifdef EBCDIC
3514                         if (uv > 255 && !dorange)
3515                             native_range = FALSE;
3516 #endif
3517                     }
3518                     else {
3519                         *d++ = (char)uv;
3520                     }
3521                 }
3522                 else {
3523                     *d++ = (char) uv;
3524                 }
3525                 continue;
3526
3527             case 'N':
3528                 /* In a non-pattern \N must be a named character, like \N{LATIN
3529                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3530                  * mean to match a non-newline.  For non-patterns, named
3531                  * characters are converted to their string equivalents. In
3532                  * patterns, named characters are not converted to their
3533                  * ultimate forms for the same reasons that other escapes
3534                  * aren't.  Instead, they are converted to the \N{U+...} form
3535                  * to get the value from the charnames that is in effect right
3536                  * now, while preserving the fact that it was a named character
3537                  * so that the regex compiler knows this */
3538
3539                 /* The structure of this section of code (besides checking for
3540                  * errors and upgrading to utf8) is:
3541                  *  Further disambiguate between the two meanings of \N, and if
3542                  *      not a charname, go process it elsewhere
3543                  *  If of form \N{U+...}, pass it through if a pattern;
3544                  *      otherwise convert to utf8
3545                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3546                  *  pattern; otherwise convert to utf8 */
3547
3548                 /* Here, s points to the 'N'; the test below is guaranteed to
3549                  * succeed if we are being called on a pattern as we already
3550                  * know from a test above that the next character is a '{'.
3551                  * On a non-pattern \N must mean 'named sequence, which
3552                  * requires braces */
3553                 s++;
3554                 if (*s != '{') {
3555                     yyerror("Missing braces on \\N{}");
3556                     continue;
3557                 }
3558                 s++;
3559
3560                 /* If there is no matching '}', it is an error. */
3561                 if (! (e = strchr(s, '}'))) {
3562                     if (! PL_lex_inpat) {
3563                         yyerror("Missing right brace on \\N{}");
3564                     } else {
3565                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3566                     }
3567                     continue;
3568                 }
3569
3570                 /* Here it looks like a named character */
3571
3572                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3573                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3574                                 | PERL_SCAN_DISALLOW_PREFIX;
3575                     STRLEN len;
3576
3577                     /* For \N{U+...}, the '...' is a unicode value even on
3578                      * EBCDIC machines */
3579                     s += 2;         /* Skip to next char after the 'U+' */
3580                     len = e - s;
3581                     uv = grok_hex(s, &len, &flags, NULL);
3582                     if (len == 0 || len != (STRLEN)(e - s)) {
3583                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3584                         s = e + 1;
3585                         continue;
3586                     }
3587
3588                     if (PL_lex_inpat) {
3589
3590                         /* On non-EBCDIC platforms, pass through to the regex
3591                          * compiler unchanged.  The reason we evaluated the
3592                          * number above is to make sure there wasn't a syntax
3593                          * error.  But on EBCDIC we convert to native so
3594                          * downstream code can continue to assume it's native
3595                          */
3596                         s -= 5;     /* Include the '\N{U+' */
3597 #ifdef EBCDIC
3598                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3599                                                                and the \0 */
3600                                     "\\N{U+%X}",
3601                                     (unsigned int) UNI_TO_NATIVE(uv));
3602 #else
3603                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3604                         d += e - s + 1;
3605 #endif
3606                     }
3607                     else {  /* Not a pattern: convert the hex to string */
3608
3609                          /* If destination is not in utf8, unconditionally
3610                           * recode it to be so.  This is because \N{} implies
3611                           * Unicode semantics, and scalars have to be in utf8
3612                           * to guarantee those semantics */
3613                         if (! has_utf8) {
3614                             SvCUR_set(sv, d - SvPVX_const(sv));
3615                             SvPOK_on(sv);
3616                             *d = '\0';
3617                             /* See Note on sizing above.  */
3618                             sv_utf8_upgrade_flags_grow(
3619                                         sv,
3620                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3621                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3622                             d = SvPVX(sv) + SvCUR(sv);
3623                             has_utf8 = TRUE;
3624                         }
3625
3626                         /* Add the (Unicode) code point to the output. */
3627                         if (UNI_IS_INVARIANT(uv)) {
3628                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3629                         }
3630                         else {
3631                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3632                         }
3633                     }
3634                 }
3635                 else /* Here is \N{NAME} but not \N{U+...}. */
3636                      if ((res = get_and_check_backslash_N_name(s, e)))
3637                 {
3638                     STRLEN len;
3639                     const char *str = SvPV_const(res, len);
3640                     if (PL_lex_inpat) {
3641
3642                         if (! len) { /* The name resolved to an empty string */
3643                             Copy("\\N{}", d, 4, char);
3644                             d += 4;
3645                         }
3646                         else {
3647                             /* In order to not lose information for the regex
3648                             * compiler, pass the result in the specially made
3649                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3650                             * the code points in hex of each character
3651                             * returned by charnames */
3652
3653                             const char *str_end = str + len;
3654                             const STRLEN off = d - SvPVX_const(sv);
3655
3656                             if (! SvUTF8(res)) {
3657                                 /* For the non-UTF-8 case, we can determine the
3658                                  * exact length needed without having to parse
3659                                  * through the string.  Each character takes up
3660                                  * 2 hex digits plus either a trailing dot or
3661                                  * the "}" */
3662                                 d = off + SvGROW(sv, off
3663                                                     + 3 * len
3664                                                     + 6 /* For the "\N{U+", and
3665                                                            trailing NUL */
3666                                                     + (STRLEN)(send - e));
3667                                 Copy("\\N{U+", d, 5, char);
3668                                 d += 5;
3669                                 while (str < str_end) {
3670                                     char hex_string[4];
3671                                     my_snprintf(hex_string, sizeof(hex_string),
3672                                                 "%02X.", (U8) *str);
3673                                     Copy(hex_string, d, 3, char);
3674                                     d += 3;
3675                                     str++;
3676                                 }
3677                                 d--;    /* We will overwrite below the final
3678                                            dot with a right brace */
3679                             }
3680                             else {
3681                                 STRLEN char_length; /* cur char's byte length */
3682
3683                                 /* and the number of bytes after this is
3684                                  * translated into hex digits */
3685                                 STRLEN output_length;
3686
3687                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3688                                  * for max('U+', '.'); and 1 for NUL */
3689                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3690
3691                                 /* Get the first character of the result. */
3692                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3693                                                         len,
3694                                                         &char_length,
3695                                                         UTF8_ALLOW_ANYUV);
3696                                 /* Convert first code point to hex, including
3697                                  * the boiler plate before it. */
3698                                 output_length =
3699                                     my_snprintf(hex_string, sizeof(hex_string),
3700                                                 "\\N{U+%X",
3701                                                 (unsigned int) uv);
3702
3703                                 /* Make sure there is enough space to hold it */
3704                                 d = off + SvGROW(sv, off
3705                                                     + output_length
3706                                                     + (STRLEN)(send - e)
3707                                                     + 2);       /* '}' + NUL */
3708                                 /* And output it */
3709                                 Copy(hex_string, d, output_length, char);
3710                                 d += output_length;
3711
3712                                 /* For each subsequent character, append dot and
3713                                 * its ordinal in hex */
3714                                 while ((str += char_length) < str_end) {
3715                                     const STRLEN off = d - SvPVX_const(sv);
3716                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3717                                                             str_end - str,
3718                                                             &char_length,
3719                                                             UTF8_ALLOW_ANYUV);
3720                                     output_length =
3721                                         my_snprintf(hex_string,
3722                                                     sizeof(hex_string),
3723                                                     ".%X",
3724                                                     (unsigned int) uv);
3725
3726                                     d = off + SvGROW(sv, off
3727                                                         + output_length
3728                                                         + (STRLEN)(send - e)
3729                                                         + 2);   /* '}' +  NUL */
3730                                     Copy(hex_string, d, output_length, char);
3731                                     d += output_length;
3732                                 }
3733                             }
3734
3735                             *d++ = '}'; /* Done.  Add the trailing brace */
3736                         }
3737                     }
3738                     else { /* Here, not in a pattern.  Convert the name to a
3739                             * string. */
3740
3741                          /* If destination is not in utf8, unconditionally
3742                           * recode it to be so.  This is because \N{} implies
3743                           * Unicode semantics, and scalars have to be in utf8
3744                           * to guarantee those semantics */
3745                         if (! has_utf8) {
3746                             SvCUR_set(sv, d - SvPVX_const(sv));
3747                             SvPOK_on(sv);
3748                             *d = '\0';
3749                             /* See Note on sizing above.  */
3750                             sv_utf8_upgrade_flags_grow(sv,
3751                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3752                                                 len + (STRLEN)(send - s) + 1);
3753                             d = SvPVX(sv) + SvCUR(sv);
3754                             has_utf8 = TRUE;
3755                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3756
3757                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3758                              * set correctly here). */
3759                             const STRLEN off = d - SvPVX_const(sv);
3760                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3761                         }
3762                         Copy(str, d, len, char);
3763                         d += len;
3764                     }
3765
3766                     SvREFCNT_dec(res);
3767
3768                 } /* End \N{NAME} */
3769 #ifdef EBCDIC
3770                 if (!dorange)
3771                     native_range = FALSE; /* \N{} is defined to be Unicode */
3772 #endif
3773                 s = e + 1;  /* Point to just after the '}' */
3774                 continue;
3775
3776             /* \c is a control character */
3777             case 'c':
3778                 s++;
3779                 if (s < send) {
3780                     *d++ = grok_bslash_c(*s++, 1);
3781                 }
3782                 else {
3783                     yyerror("Missing control char name in \\c");
3784                 }
3785                 continue;
3786
3787             /* printf-style backslashes, formfeeds, newlines, etc */
3788             case 'b':
3789                 *d++ = '\b';
3790                 break;
3791             case 'n':
3792                 *d++ = '\n';
3793                 break;
3794             case 'r':
3795                 *d++ = '\r';
3796                 break;
3797             case 'f':
3798                 *d++ = '\f';
3799                 break;
3800             case 't':
3801                 *d++ = '\t';
3802                 break;
3803             case 'e':
3804                 *d++ = ASCII_TO_NATIVE('\033');
3805                 break;
3806             case 'a':
3807                 *d++ = '\a';
3808                 break;
3809             } /* end switch */
3810
3811             s++;
3812             continue;
3813         } /* end if (backslash) */
3814 #ifdef EBCDIC
3815         else
3816             literal_endpoint++;
3817 #endif
3818
3819     default_action:
3820         /* If we started with encoded form, or already know we want it,
3821            then encode the next character */
3822         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3823             STRLEN len  = 1;
3824
3825
3826             /* One might think that it is wasted effort in the case of the
3827              * source being utf8 (this_utf8 == TRUE) to take the next character
3828              * in the source, convert it to an unsigned value, and then convert
3829              * it back again.  But the source has not been validated here.  The
3830              * routine that does the conversion checks for errors like
3831              * malformed utf8 */
3832
3833             const UV nextuv   = (this_utf8)
3834                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3835                                 : (UV) ((U8) *s);
3836             const STRLEN need = UNISKIP(nextuv);
3837             if (!has_utf8) {
3838                 SvCUR_set(sv, d - SvPVX_const(sv));
3839                 SvPOK_on(sv);
3840                 *d = '\0';
3841                 /* See Note on sizing above.  */
3842                 sv_utf8_upgrade_flags_grow(sv,
3843                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3844                                         need + (STRLEN)(send - s) + 1);
3845                 d = SvPVX(sv) + SvCUR(sv);
3846                 has_utf8 = TRUE;
3847             } else if (need > len) {
3848                 /* encoded value larger than old, may need extra space (NOTE:
3849                  * SvCUR() is not set correctly here).   See Note on sizing
3850                  * above.  */
3851                 const STRLEN off = d - SvPVX_const(sv);
3852                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3853             }
3854             s += len;
3855
3856             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3857 #ifdef EBCDIC
3858             if (uv > 255 && !dorange)
3859                 native_range = FALSE;
3860 #endif
3861         }
3862         else {
3863             *d++ = *s++;
3864         }
3865     } /* while loop to process each character */
3866
3867     /* terminate the string and set up the sv */
3868     *d = '\0';
3869     SvCUR_set(sv, d - SvPVX_const(sv));
3870     if (SvCUR(sv) >= SvLEN(sv))
3871         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3872                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3873
3874     SvPOK_on(sv);
3875     if (PL_encoding && !has_utf8) {
3876         sv_recode_to_utf8(sv, PL_encoding);
3877         if (SvUTF8(sv))
3878             has_utf8 = TRUE;
3879     }
3880     if (has_utf8) {
3881         SvUTF8_on(sv);
3882         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3883             PL_sublex_info.sub_op->op_private |=
3884                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3885         }
3886     }
3887
3888     /* shrink the sv if we allocated more than we used */
3889     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3890         SvPV_shrink_to_cur(sv);
3891     }
3892
3893     /* return the substring (via pl_yylval) only if we parsed anything */
3894     if (s > start) {
3895         char *s2 = start;
3896         for (; s2 < s; s2++) {
3897             if (*s2 == '\n')
3898                 COPLINE_INC_WITH_HERELINES;
3899         }
3900         SvREFCNT_inc_simple_void_NN(sv);
3901         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3902             && ! PL_parser->lex_re_reparsing)
3903         {
3904             const char *const key = PL_lex_inpat ? "qr" : "q";
3905             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3906             const char *type;
3907             STRLEN typelen;
3908
3909             if (PL_lex_inwhat == OP_TRANS) {
3910                 type = "tr";
3911                 typelen = 2;
3912             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3913                 type = "s";
3914                 typelen = 1;
3915             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3916                 type = "q";
3917                 typelen = 1;
3918             } else  {
3919                 type = "qq";
3920                 typelen = 2;
3921             }
3922
3923             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3924                                 type, typelen);
3925         }
3926         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3927     }
3928     LEAVE_with_name("scan_const");
3929     return s;
3930 }
3931
3932 /* S_intuit_more
3933  * Returns TRUE if there's more to the expression (e.g., a subscript),
3934  * FALSE otherwise.
3935  *
3936  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3937  *
3938  * ->[ and ->{ return TRUE
3939  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3940  * { and [ outside a pattern are always subscripts, so return TRUE
3941  * if we're outside a pattern and it's not { or [, then return FALSE
3942  * if we're in a pattern and the first char is a {
3943  *   {4,5} (any digits around the comma) returns FALSE
3944  * if we're in a pattern and the first char is a [
3945  *   [] returns FALSE
3946  *   [SOMETHING] has a funky algorithm to decide whether it's a
3947  *      character class or not.  It has to deal with things like
3948  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3949  * anything else returns TRUE
3950  */
3951
3952 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3953
3954 STATIC int
3955 S_intuit_more(pTHX_ char *s)
3956 {
3957     dVAR;
3958
3959     PERL_ARGS_ASSERT_INTUIT_MORE;
3960
3961     if (PL_lex_brackets)
3962         return TRUE;
3963     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3964         return TRUE;
3965     if (*s == '-' && s[1] == '>'
3966      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3967      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3968         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3969         return TRUE;
3970     if (*s != '{' && *s != '[')
3971         return FALSE;
3972     if (!PL_lex_inpat)
3973         return TRUE;
3974
3975     /* In a pattern, so maybe we have {n,m}. */
3976     if (*s == '{') {
3977         if (regcurly(s, FALSE)) {
3978             return FALSE;
3979         }
3980         return TRUE;
3981     }
3982
3983     /* On the other hand, maybe we have a character class */
3984
3985     s++;
3986     if (*s == ']' || *s == '^')
3987         return FALSE;
3988     else {
3989         /* this is terrifying, and it works */
3990         int weight;
3991         char seen[256];
3992         const char * const send = strchr(s,']');
3993         unsigned char un_char, last_un_char;
3994         char tmpbuf[sizeof PL_tokenbuf * 4];
3995
3996         if (!send)              /* has to be an expression */
3997             return TRUE;
3998         weight = 2;             /* let's weigh the evidence */
3999
4000         if (*s == '$')
4001             weight -= 3;
4002         else if (isDIGIT(*s)) {
4003             if (s[1] != ']') {
4004                 if (isDIGIT(s[1]) && s[2] == ']')
4005                     weight -= 10;
4006             }
4007             else
4008                 weight -= 100;
4009         }
4010         Zero(seen,256,char);
4011         un_char = 255;
4012         for (; s < send; s++) {
4013             last_un_char = un_char;
4014             un_char = (unsigned char)*s;
4015             switch (*s) {
4016             case '@':
4017             case '&':
4018             case '$':
4019                 weight -= seen[un_char] * 10;
4020                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4021                     int len;
4022                     char *tmp = PL_bufend;
4023                     PL_bufend = (char*)send;
4024                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4025                     PL_bufend = tmp;
4026                     len = (int)strlen(tmpbuf);
4027                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4028                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4029                         weight -= 100;
4030                     else
4031                         weight -= 10;
4032                 }
4033                 else if (*s == '$' && s[1] &&
4034                   strchr("[#!%*<>()-=",s[1])) {
4035                     if (/*{*/ strchr("])} =",s[2]))
4036                         weight -= 10;
4037                     else
4038                         weight -= 1;
4039                 }
4040                 break;
4041             case '\\':
4042                 un_char = 254;
4043                 if (s[1]) {
4044                     if (strchr("wds]",s[1]))
4045                         weight += 100;
4046                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4047                         weight += 1;
4048                     else if (strchr("rnftbxcav",s[1]))
4049                         weight += 40;
4050                     else if (isDIGIT(s[1])) {
4051                         weight += 40;
4052                         while (s[1] && isDIGIT(s[1]))
4053                             s++;
4054                     }
4055                 }
4056                 else
4057                     weight += 100;
4058                 break;
4059             case '-':
4060                 if (s[1] == '\\')
4061                     weight += 50;
4062                 if (strchr("aA01! ",last_un_char))
4063                     weight += 30;
4064                 if (strchr("zZ79~",s[1]))
4065                     weight += 30;
4066                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4067                     weight -= 5;        /* cope with negative subscript */
4068                 break;
4069             default:
4070                 if (!isWORDCHAR(last_un_char)
4071                     && !(last_un_char == '$' || last_un_char == '@'
4072                          || last_un_char == '&')
4073                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4074                     char *d = tmpbuf;
4075                     while (isALPHA(*s))
4076                         *d++ = *s++;
4077                     *d = '\0';
4078                     if (keyword(tmpbuf, d - tmpbuf, 0))
4079                         weight -= 150;
4080                 }
4081                 if (un_char == last_un_char + 1)
4082                     weight += 5;
4083                 weight -= seen[un_char];
4084                 break;
4085             }
4086             seen[un_char]++;
4087         }
4088         if (weight >= 0)        /* probably a character class */
4089             return FALSE;
4090     }
4091
4092     return TRUE;
4093 }
4094
4095 /*
4096  * S_intuit_method
4097  *
4098  * Does all the checking to disambiguate
4099  *   foo bar
4100  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4101  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4102  *
4103  * First argument is the stuff after the first token, e.g. "bar".
4104  *
4105  * Not a method if foo is a filehandle.
4106  * Not a method if foo is a subroutine prototyped to take a filehandle.
4107  * Not a method if it's really "Foo $bar"
4108  * Method if it's "foo $bar"
4109  * Not a method if it's really "print foo $bar"
4110  * Method if it's really "foo package::" (interpreted as package->foo)
4111  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4112  * Not a method if bar is a filehandle or package, but is quoted with
4113  *   =>
4114  */
4115
4116 STATIC int
4117 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4118 {
4119     dVAR;
4120     char *s = start + (*start == '$');
4121     char tmpbuf[sizeof PL_tokenbuf];
4122     STRLEN len;
4123     GV* indirgv;
4124 #ifdef PERL_MAD
4125     int soff;
4126 #endif
4127
4128     PERL_ARGS_ASSERT_INTUIT_METHOD;
4129
4130     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4131             return 0;
4132     if (cv && SvPOK(cv)) {
4133         const char *proto = CvPROTO(cv);
4134         if (proto) {
4135             while (*proto && (isSPACE(*proto) || *proto == ';'))
4136                 proto++;
4137             if (*proto == '*')
4138                 return 0;
4139         }
4140     }
4141
4142     if (*start == '$') {
4143         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4144                 isUPPER(*PL_tokenbuf))
4145             return 0;
4146 #ifdef PERL_MAD
4147         len = start - SvPVX(PL_linestr);
4148 #endif
4149         s = PEEKSPACE(s);
4150 #ifdef PERL_MAD
4151         start = SvPVX(PL_linestr) + len;
4152 #endif
4153         PL_bufptr = start;
4154         PL_expect = XREF;
4155         return *s == '(' ? FUNCMETH : METHOD;
4156     }
4157
4158     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4159     /* start is the beginning of the possible filehandle/object,
4160      * and s is the end of it
4161      * tmpbuf is a copy of it (but with single quotes as double colons)
4162      */
4163
4164     if (!keyword(tmpbuf, len, 0)) {
4165         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4166             len -= 2;
4167             tmpbuf[len] = '\0';
4168 #ifdef PERL_MAD
4169             soff = s - SvPVX(PL_linestr);
4170 #endif
4171             goto bare_package;
4172         }
4173         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4174         if (indirgv && GvCVu(indirgv))
4175             return 0;
4176         /* filehandle or package name makes it a method */
4177         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4178 #ifdef PERL_MAD
4179             soff = s - SvPVX(PL_linestr);
4180 #endif
4181             s = PEEKSPACE(s);
4182             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4183                 return 0;       /* no assumptions -- "=>" quotes bareword */
4184       bare_package:
4185             start_force(PL_curforce);
4186             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4187                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4188             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4189             if (PL_madskills)
4190                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4191                                                             ( UTF ? SVf_UTF8 : 0 )));
4192             PL_expect = XTERM;
4193             force_next(WORD);
4194             PL_bufptr = s;
4195 #ifdef PERL_MAD
4196             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4197 #endif
4198             return *s == '(' ? FUNCMETH : METHOD;
4199         }
4200     }
4201     return 0;
4202 }
4203
4204 /* Encoded script support. filter_add() effectively inserts a
4205  * 'pre-processing' function into the current source input stream.
4206  * Note that the filter function only applies to the current source file
4207  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4208  *
4209  * The datasv parameter (which may be NULL) can be used to pass
4210  * private data to this instance of the filter. The filter function
4211  * can recover the SV using the FILTER_DATA macro and use it to
4212  * store private buffers and state information.
4213  *
4214  * The supplied datasv parameter is upgraded to a PVIO type
4215  * and the IoDIRP/IoANY field is used to store the function pointer,
4216  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4217  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4218  * private use must be set using malloc'd pointers.
4219  */
4220
4221 SV *
4222 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4223 {
4224     dVAR;
4225     if (!funcp)
4226         return NULL;
4227
4228     if (!PL_parser)
4229         return NULL;
4230
4231     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4232         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4233
4234     if (!PL_rsfp_filters)
4235         PL_rsfp_filters = newAV();
4236     if (!datasv)
4237         datasv = newSV(0);
4238     SvUPGRADE(datasv, SVt_PVIO);
4239     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4240     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4241     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4242                           FPTR2DPTR(void *, IoANY(datasv)),
4243                           SvPV_nolen(datasv)));
4244     av_unshift(PL_rsfp_filters, 1);
4245     av_store(PL_rsfp_filters, 0, datasv) ;
4246     if (
4247         !PL_parser->filtered
4248      && PL_parser->lex_flags & LEX_EVALBYTES
4249      && PL_bufptr < PL_bufend
4250     ) {
4251         const char *s = PL_bufptr;
4252         while (s < PL_bufend) {
4253             if (*s == '\n') {
4254                 SV *linestr = PL_parser->linestr;
4255                 char *buf = SvPVX(linestr);
4256                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4257                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4258                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4259                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4260                 STRLEN const last_uni_pos =
4261                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4262                 STRLEN const last_lop_pos =
4263                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4264                 av_push(PL_rsfp_filters, linestr);
4265                 PL_parser->linestr =
4266                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4267                 buf = SvPVX(PL_parser->linestr);
4268                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4269                 PL_parser->bufptr = buf + bufptr_pos;
4270                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4271                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4272                 PL_parser->linestart = buf + linestart_pos;
4273                 if (PL_parser->last_uni)
4274                     PL_parser->last_uni = buf + last_uni_pos;
4275                 if (PL_parser->last_lop)
4276                     PL_parser->last_lop = buf + last_lop_pos;
4277                 SvLEN(linestr) = SvCUR(linestr);
4278                 SvCUR(linestr) = s-SvPVX(linestr);
4279                 PL_parser->filtered = 1;
4280                 break;
4281             }
4282             s++;
4283         }
4284     }
4285     return(datasv);
4286 }
4287
4288
4289 /* Delete most recently added instance of this filter function. */
4290 void
4291 Perl_filter_del(pTHX_ filter_t funcp)
4292 {
4293     dVAR;
4294     SV *datasv;
4295
4296     PERL_ARGS_ASSERT_FILTER_DEL;
4297
4298 #ifdef DEBUGGING
4299     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4300                           FPTR2DPTR(void*, funcp)));
4301 #endif
4302     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4303         return;
4304     /* if filter is on top of stack (usual case) just pop it off */
4305     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4306     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4307         sv_free(av_pop(PL_rsfp_filters));
4308
4309         return;
4310     }
4311     /* we need to search for the correct entry and clear it     */
4312     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4313 }
4314
4315
4316 /* Invoke the idxth filter function for the current rsfp.        */
4317 /* maxlen 0 = read one text line */
4318 I32
4319 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4320 {
4321     dVAR;
4322     filter_t funcp;
4323     SV *datasv = NULL;
4324     /* This API is bad. It should have been using unsigned int for maxlen.
4325        Not sure if we want to change the API, but if not we should sanity
4326        check the value here.  */
4327     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4328
4329     PERL_ARGS_ASSERT_FILTER_READ;
4330
4331     if (!PL_parser || !PL_rsfp_filters)
4332         return -1;
4333     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4334         /* Provide a default input filter to make life easy.    */
4335         /* Note that we append to the line. This is handy.      */
4336         DEBUG_P(PerlIO_printf(Perl_debug_log,
4337                               "filter_read %d: from rsfp\n", idx));
4338         if (correct_length) {
4339             /* Want a block */
4340             int len ;
4341             const int old_len = SvCUR(buf_sv);
4342
4343             /* ensure buf_sv is large enough */
4344             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4345             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4346                                    correct_length)) <= 0) {
4347                 if (PerlIO_error(PL_rsfp))
4348                     return -1;          /* error */
4349                 else
4350                     return 0 ;          /* end of file */
4351             }
4352             SvCUR_set(buf_sv, old_len + len) ;
4353             SvPVX(buf_sv)[old_len + len] = '\0';
4354         } else {
4355             /* Want a line */
4356             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4357                 if (PerlIO_error(PL_rsfp))
4358                     return -1;          /* error */
4359                 else
4360                     return 0 ;          /* end of file */
4361             }
4362         }
4363         return SvCUR(buf_sv);
4364     }
4365     /* Skip this filter slot if filter has been deleted */
4366     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4367         DEBUG_P(PerlIO_printf(Perl_debug_log,
4368                               "filter_read %d: skipped (filter deleted)\n",
4369                               idx));
4370         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4371     }
4372     if (SvTYPE(datasv) != SVt_PVIO) {
4373         if (correct_length) {
4374             /* Want a block */
4375             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4376             if (!remainder) return 0; /* eof */
4377             if (correct_length > remainder) correct_length = remainder;
4378             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4379             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4380         } else {
4381             /* Want a line */
4382             const char *s = SvEND(datasv);
4383             const char *send = SvPVX(datasv) + SvLEN(datasv);
4384             while (s < send) {
4385                 if (*s == '\n') {
4386                     s++;
4387                     break;
4388                 }
4389                 s++;
4390             }
4391             if (s == send) return 0; /* eof */
4392             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4393             SvCUR_set(datasv, s-SvPVX(datasv));
4394         }
4395         return SvCUR(buf_sv);
4396     }
4397     /* Get function pointer hidden within datasv        */
4398     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4399     DEBUG_P(PerlIO_printf(Perl_debug_log,
4400                           "filter_read %d: via function %p (%s)\n",
4401                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4402     /* Call function. The function is expected to       */
4403     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4404     /* Return: <0:error, =0:eof, >0:not eof             */
4405     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4406 }
4407
4408 STATIC char *
4409 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4410 {
4411     dVAR;
4412
4413     PERL_ARGS_ASSERT_FILTER_GETS;
4414
4415 #ifdef PERL_CR_FILTER
4416     if (!PL_rsfp_filters) {
4417         filter_add(S_cr_textfilter,NULL);
4418     }
4419 #endif
4420     if (PL_rsfp_filters) {
4421         if (!append)
4422             SvCUR_set(sv, 0);   /* start with empty line        */
4423         if (FILTER_READ(0, sv, 0) > 0)
4424             return ( SvPVX(sv) ) ;
4425         else
4426             return NULL ;
4427     }
4428     else
4429         return (sv_gets(sv, PL_rsfp, append));
4430 }
4431
4432 STATIC HV *
4433 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4434 {
4435     dVAR;
4436     GV *gv;
4437
4438     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4439
4440     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4441         return PL_curstash;
4442
4443     if (len > 2 &&
4444         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4445         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4446     {
4447         return GvHV(gv);                        /* Foo:: */
4448     }
4449
4450     /* use constant CLASS => 'MyClass' */
4451     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4452     if (gv && GvCV(gv)) {
4453         SV * const sv = cv_const_sv(GvCV(gv));
4454         if (sv)
4455             pkgname = SvPV_const(sv, len);
4456     }
4457
4458     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4459 }
4460
4461 #ifdef PERL_MAD
4462  /*
4463  * Perl_madlex
4464  * The intent of this yylex wrapper is to minimize the changes to the
4465  * tokener when we aren't interested in collecting madprops.  It remains
4466  * to be seen how successful this strategy will be...
4467  */
4468
4469 int
4470 Perl_madlex(pTHX)
4471 {
4472     int optype;
4473     char *s = PL_bufptr;
4474
4475     /* make sure PL_thiswhite is initialized */
4476     PL_thiswhite = 0;
4477     PL_thismad = 0;
4478
4479     /* previous token ate up our whitespace? */
4480     if (!PL_lasttoke && PL_nextwhite) {
4481         PL_thiswhite = PL_nextwhite;
4482         PL_nextwhite = 0;
4483     }
4484
4485     /* isolate the token, and figure out where it is without whitespace */
4486     PL_realtokenstart = -1;
4487     PL_thistoken = 0;
4488     optype = yylex();
4489     s = PL_bufptr;
4490     assert(PL_curforce < 0);
4491
4492     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4493         if (!PL_thistoken) {
4494             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4495                 PL_thistoken = newSVpvs("");
4496             else {
4497                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4498                 PL_thistoken = newSVpvn(tstart, s - tstart);
4499             }
4500         }
4501         if (PL_thismad) /* install head */
4502             CURMAD('X', PL_thistoken);
4503     }
4504
4505     /* last whitespace of a sublex? */
4506     if (optype == ')' && PL_endwhite) {
4507         CURMAD('X', PL_endwhite);
4508     }
4509
4510     if (!PL_thismad) {
4511
4512         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4513         if (!PL_thiswhite && !PL_endwhite && !optype) {
4514             sv_free(PL_thistoken);
4515             PL_thistoken = 0;
4516             return 0;
4517         }
4518
4519         /* put off final whitespace till peg */
4520         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4521             PL_nextwhite = PL_thiswhite;
4522             PL_thiswhite = 0;
4523         }
4524         else if (PL_thisopen) {
4525             CURMAD('q', PL_thisopen);
4526             if (PL_thistoken)
4527                 sv_free(PL_thistoken);
4528             PL_thistoken = 0;
4529         }
4530         else {
4531             /* Store actual token text as madprop X */
4532             CURMAD('X', PL_thistoken);
4533         }
4534
4535         if (PL_thiswhite) {
4536             /* add preceding whitespace as madprop _ */
4537             CURMAD('_', PL_thiswhite);
4538         }
4539
4540         if (PL_thisstuff) {
4541             /* add quoted material as madprop = */
4542             CURMAD('=', PL_thisstuff);
4543         }
4544
4545         if (PL_thisclose) {
4546             /* add terminating quote as madprop Q */
4547             CURMAD('Q', PL_thisclose);
4548         }
4549     }
4550
4551     /* special processing based on optype */
4552
4553     switch (optype) {
4554
4555     /* opval doesn't need a TOKEN since it can already store mp */
4556     case WORD:
4557     case METHOD:
4558     case FUNCMETH:
4559     case THING:
4560     case PMFUNC:
4561     case PRIVATEREF:
4562     case FUNC0SUB:
4563     case UNIOPSUB:
4564     case LSTOPSUB:
4565         if (pl_yylval.opval)
4566             append_madprops(PL_thismad, pl_yylval.opval, 0);
4567         PL_thismad = 0;
4568         return optype;
4569
4570     /* fake EOF */
4571     case 0:
4572         optype = PEG;
4573         if (PL_endwhite) {
4574             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4575             PL_endwhite = 0;
4576         }
4577         break;
4578
4579     /* pval */
4580     case LABEL:
4581         break;
4582
4583     case ']':
4584     case '}':
4585         if (PL_faketokens)
4586             break;
4587         /* remember any fake bracket that lexer is about to discard */
4588         if (PL_lex_brackets == 1 &&
4589             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4590         {
4591             s = PL_bufptr;
4592             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4593                 s++;
4594             if (*s == '}') {
4595                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4596                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4597                 PL_thiswhite = 0;
4598                 PL_bufptr = s - 1;
4599                 break;  /* don't bother looking for trailing comment */
4600             }
4601             else
4602                 s = PL_bufptr;
4603         }
4604         if (optype == ']')
4605             break;
4606         /* FALLTHROUGH */
4607
4608     /* attach a trailing comment to its statement instead of next token */
4609     case ';':
4610         if (PL_faketokens)
4611             break;
4612         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4613             s = PL_bufptr;
4614             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4615                 s++;
4616             if (*s == '\n' || *s == '#') {
4617                 while (s < PL_bufend && *s != '\n')
4618                     s++;
4619                 if (s < PL_bufend)
4620                     s++;
4621                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4622                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4623                 PL_thiswhite = 0;
4624                 PL_bufptr = s;
4625             }
4626         }
4627         break;
4628
4629     /* ival */
4630     default:
4631         break;
4632
4633     }
4634
4635     /* Create new token struct.  Note: opvals return early above. */
4636     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4637     PL_thismad = 0;
4638     return optype;
4639 }
4640 #endif
4641
4642 STATIC char *
4643 S_tokenize_use(pTHX_ int is_use, char *s) {
4644     dVAR;
4645
4646     PERL_ARGS_ASSERT_TOKENIZE_USE;
4647
4648     if (PL_expect != XSTATE)
4649         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4650                     is_use ? "use" : "no"));
4651     PL_expect = XTERM;
4652     s = SKIPSPACE1(s);
4653     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4654         s = force_version(s, TRUE);
4655         if (*s == ';' || *s == '}'
4656                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4657             start_force(PL_curforce);
4658             NEXTVAL_NEXTTOKE.opval = NULL;
4659             force_next(WORD);
4660         }
4661         else if (*s == 'v') {
4662             s = force_word(s,WORD,FALSE,TRUE);
4663             s = force_version(s, FALSE);
4664         }
4665     }
4666     else {
4667         s = force_word(s,WORD,FALSE,TRUE);
4668         s = force_version(s, FALSE);
4669     }
4670     pl_yylval.ival = is_use;
4671     return s;
4672 }
4673 #ifdef DEBUGGING
4674     static const char* const exp_name[] =
4675         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4676           "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4677         };
4678 #endif
4679
4680 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4681 STATIC bool
4682 S_word_takes_any_delimeter(char *p, STRLEN len)
4683 {
4684     return (len == 1 && strchr("msyq", p[0])) ||
4685            (len == 2 && (
4686             (p[0] == 't' && p[1] == 'r') ||
4687             (p[0] == 'q' && strchr("qwxr", p[1]))));
4688 }
4689
4690 static void
4691 S_check_scalar_slice(pTHX_ char *s)
4692 {
4693     s++;
4694     while (*s == ' ' || *s == '\t') s++;
4695     if (*s == 'q' && s[1] == 'w'
4696      && !isWORDCHAR_lazy_if(s+2,UTF))
4697         return;
4698     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4699         s += UTF ? UTF8SKIP(s) : 1;
4700     if (*s == '}' || *s == ']')
4701         pl_yylval.ival = OPpSLICEWARNING;
4702 }
4703
4704 /*
4705   yylex
4706
4707   Works out what to call the token just pulled out of the input
4708   stream.  The yacc parser takes care of taking the ops we return and
4709   stitching them into a tree.
4710
4711   Returns:
4712     The type of the next token
4713
4714   Structure:
4715       Switch based on the current state:
4716           - if we already built the token before, use it
4717           - if we have a case modifier in a string, deal with that
4718           - handle other cases of interpolation inside a string
4719           - scan the next line if we are inside a format
4720       In the normal state switch on the next character:
4721           - default:
4722             if alphabetic, go to key lookup
4723             unrecoginized character - croak
4724           - 0/4/26: handle end-of-line or EOF
4725           - cases for whitespace
4726           - \n and #: handle comments and line numbers
4727           - various operators, brackets and sigils
4728           - numbers
4729           - quotes
4730           - 'v': vstrings (or go to key lookup)
4731           - 'x' repetition operator (or go to key lookup)
4732           - other ASCII alphanumerics (key lookup begins here):
4733               word before => ?
4734               keyword plugin
4735               scan built-in keyword (but do nothing with it yet)
4736               check for statement label
4737               check for lexical subs
4738                   goto just_a_word if there is one
4739               see whether built-in keyword is overridden
4740               switch on keyword number:
4741                   - default: just_a_word:
4742                       not a built-in keyword; handle bareword lookup
4743                       disambiguate between method and sub call
4744                       fall back to bareword
4745                   - cases for built-in keywords
4746 */
4747
4748
4749 int
4750 Perl_yylex(pTHX)
4751 {
4752     dVAR;
4753     char *s = PL_bufptr;
4754     char *d;
4755     STRLEN len;
4756     bool bof = FALSE;
4757     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4758     U8 formbrack = 0;
4759     U32 fake_eof = 0;
4760
4761     /* orig_keyword, gvp, and gv are initialized here because
4762      * jump to the label just_a_word_zero can bypass their
4763      * initialization later. */
4764     I32 orig_keyword = 0;
4765     GV *gv = NULL;
4766     GV **gvp = NULL;
4767
4768     DEBUG_T( {
4769         SV* tmp = newSVpvs("");
4770         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4771             (IV)CopLINE(PL_curcop),
4772             lex_state_names[PL_lex_state],
4773             exp_name[PL_expect],
4774             pv_display(tmp, s, strlen(s), 0, 60));
4775         SvREFCNT_dec(tmp);
4776     } );
4777
4778     switch (PL_lex_state) {
4779     case LEX_NORMAL:
4780     case LEX_INTERPNORMAL:
4781         break;
4782
4783     /* when we've already built the next token, just pull it out of the queue */
4784     case LEX_KNOWNEXT:
4785 #ifdef PERL_MAD
4786         PL_lasttoke--;
4787         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4788         if (PL_madskills) {
4789             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4790             PL_nexttoke[PL_lasttoke].next_mad = 0;
4791             if (PL_thismad && PL_thismad->mad_key == '_') {
4792                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4793                 PL_thismad->mad_val = 0;
4794                 mad_free(PL_thismad);
4795                 PL_thismad = 0;
4796             }
4797         }
4798         if (!PL_lasttoke) {
4799             PL_lex_state = PL_lex_defer;
4800             PL_expect = PL_lex_expect;
4801             PL_lex_defer = LEX_NORMAL;
4802             if (!PL_nexttoke[PL_lasttoke].next_type)
4803                 return yylex();
4804         }
4805 #else
4806         PL_nexttoke--;
4807         pl_yylval = PL_nextval[PL_nexttoke];
4808         if (!PL_nexttoke) {
4809             PL_lex_state = PL_lex_defer;
4810             PL_expect = PL_lex_expect;
4811             PL_lex_defer = LEX_NORMAL;
4812         }
4813 #endif
4814         {
4815             I32 next_type;
4816 #ifdef PERL_MAD
4817             next_type = PL_nexttoke[PL_lasttoke].next_type;
4818 #else
4819             next_type = PL_nexttype[PL_nexttoke];
4820 #endif
4821             if (next_type & (7<<24)) {
4822                 if (next_type & (1<<24)) {
4823                     if (PL_lex_brackets > 100)
4824                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4825                     PL_lex_brackstack[PL_lex_brackets++] =
4826                         (char) ((next_type >> 16) & 0xff);
4827                 }
4828                 if (next_type & (2<<24))
4829                     PL_lex_allbrackets++;
4830                 if (next_type & (4<<24))
4831                     PL_lex_allbrackets--;
4832                 next_type &= 0xffff;
4833             }
4834             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4835         }
4836
4837     /* interpolated case modifiers like \L \U, including \Q and \E.
4838        when we get here, PL_bufptr is at the \
4839     */
4840     case LEX_INTERPCASEMOD:
4841 #ifdef DEBUGGING
4842         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4843             Perl_croak(aTHX_
4844                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4845                        PL_bufptr, PL_bufend, *PL_bufptr);
4846 #endif
4847         /* handle \E or end of string */
4848         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4849             /* if at a \E */
4850             if (PL_lex_casemods) {
4851                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4852                 PL_lex_casestack[PL_lex_casemods] = '\0';
4853
4854                 if (PL_bufptr != PL_bufend
4855                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4856                         || oldmod == 'F')) {
4857                     PL_bufptr += 2;
4858                     PL_lex_state = LEX_INTERPCONCAT;
4859 #ifdef PERL_MAD
4860                     if (PL_madskills)
4861                         PL_thistoken = newSVpvs("\\E");
4862 #endif
4863                 }
4864                 PL_lex_allbrackets--;
4865                 return REPORT(')');
4866             }
4867             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4868                /* Got an unpaired \E */
4869                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4870                         "Useless use of \\E");
4871             }
4872 #ifdef PERL_MAD
4873             while (PL_bufptr != PL_bufend &&
4874               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4875                 if (PL_madskills) {
4876                   if (!PL_thiswhite)
4877                     PL_thiswhite = newSVpvs("");
4878                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4879                 }
4880                 PL_bufptr += 2;
4881             }
4882 #else
4883             if (PL_bufptr != PL_bufend)
4884                 PL_bufptr += 2;
4885 #endif
4886             PL_lex_state = LEX_INTERPCONCAT;
4887             return yylex();
4888         }
4889         else {
4890             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4891               "### Saw case modifier\n"); });
4892             s = PL_bufptr + 1;
4893             if (s[1] == '\\' && s[2] == 'E') {
4894 #ifdef PERL_MAD
4895                 if (PL_madskills) {
4896                   if (!PL_thiswhite)
4897                     PL_thiswhite = newSVpvs("");
4898                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4899                 }
4900 #endif
4901                 PL_bufptr = s + 3;
4902                 PL_lex_state = LEX_INTERPCONCAT;
4903                 return yylex();
4904             }
4905             else {
4906                 I32 tmp;
4907                 if (!PL_madskills) /* when just compiling don't need correct */
4908                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4909                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4910                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4911                     (strchr(PL_lex_casestack, 'L')
4912                         || strchr(PL_lex_casestack, 'U')
4913                         || strchr(PL_lex_casestack, 'F'))) {
4914                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4915                     PL_lex_allbrackets--;
4916                     return REPORT(')');
4917                 }
4918                 if (PL_lex_casemods > 10)
4919                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4920                 PL_lex_casestack[PL_lex_casemods++] = *s;
4921                 PL_lex_casestack[PL_lex_casemods] = '\0';
4922                 PL_lex_state = LEX_INTERPCONCAT;
4923                 start_force(PL_curforce);
4924                 NEXTVAL_NEXTTOKE.ival = 0;
4925                 force_next((2<<24)|'(');
4926                 start_force(PL_curforce);
4927                 if (*s == 'l')
4928                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4929                 else if (*s == 'u')
4930                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4931                 else if (*s == 'L')
4932                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4933                 else if (*s == 'U')
4934                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4935                 else if (*s == 'Q')
4936                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4937                 else if (*s == 'F')
4938                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4939                 else
4940                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4941                 if (PL_madskills) {
4942                     SV* const tmpsv = newSVpvs("\\ ");
4943                     /* replace the space with the character we want to escape
4944                      */
4945                     SvPVX(tmpsv)[1] = *s;
4946                     curmad('_', tmpsv);
4947                 }
4948                 PL_bufptr = s + 1;
4949             }
4950             force_next(FUNC);
4951             if (PL_lex_starts) {
4952                 s = PL_bufptr;
4953                 PL_lex_starts = 0;
4954 #ifdef PERL_MAD
4955                 if (PL_madskills) {
4956                     if (PL_thistoken)
4957                         sv_free(PL_thistoken);
4958                     PL_thistoken = newSVpvs("");
4959                 }
4960 #endif
4961                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4962                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4963                     OPERATOR(',');
4964                 else
4965                     Aop(OP_CONCAT);
4966             }
4967             else
4968                 return yylex();
4969         }
4970
4971     case LEX_INTERPPUSH:
4972         return REPORT(sublex_push());
4973
4974     case LEX_INTERPSTART:
4975         if (PL_bufptr == PL_bufend)
4976             return REPORT(sublex_done());
4977         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4978               "### Interpolated variable\n"); });
4979         PL_expect = XTERM;
4980         /* for /@a/, we leave the joining for the regex engine to do
4981          * (unless we're within \Q etc) */
4982         PL_lex_dojoin = (*PL_bufptr == '@'
4983                             && (!PL_lex_inpat || PL_lex_casemods));
4984         PL_lex_state = LEX_INTERPNORMAL;
4985         if (PL_lex_dojoin) {
4986             start_force(PL_curforce);
4987             NEXTVAL_NEXTTOKE.ival = 0;
4988             force_next(',');
4989             start_force(PL_curforce);
4990             force_ident("\"", '$');
4991             start_force(PL_curforce);
4992             NEXTVAL_NEXTTOKE.ival = 0;
4993             force_next('$');
4994             start_force(PL_curforce);
4995             NEXTVAL_NEXTTOKE.ival = 0;
4996             force_next((2<<24)|'(');
4997             start_force(PL_curforce);
4998             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4999             force_next(FUNC);
5000         }
5001         /* Convert (?{...}) and friends to 'do {...}' */
5002         if (PL_lex_inpat && *PL_bufptr == '(') {
5003             PL_parser->lex_shared->re_eval_start = PL_bufptr;
5004             PL_bufptr += 2;
5005             if (*PL_bufptr != '{')
5006                 PL_bufptr++;
5007             start_force(PL_curforce);
5008             /* XXX probably need a CURMAD(something) here */
5009             PL_expect = XTERMBLOCK;
5010             force_next(DO);
5011         }
5012
5013         if (PL_lex_starts++) {
5014             s = PL_bufptr;
5015 #ifdef PERL_MAD
5016             if (PL_madskills) {
5017                 if (PL_thistoken)
5018                     sv_free(PL_thistoken);
5019                 PL_thistoken = newSVpvs("");
5020             }
5021 #endif
5022             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5023             if (!PL_lex_casemods && PL_lex_inpat)
5024                 OPERATOR(',');
5025             else
5026                 Aop(OP_CONCAT);
5027         }
5028         return yylex();
5029
5030     case LEX_INTERPENDMAYBE:
5031         if (intuit_more(PL_bufptr)) {
5032             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5033             break;
5034         }
5035         /* FALLTHROUGH */
5036
5037     case LEX_INTERPEND:
5038         if (PL_lex_dojoin) {
5039             const U8 dojoin_was = PL_lex_dojoin;
5040             PL_lex_dojoin = FALSE;
5041             PL_lex_state = LEX_INTERPCONCAT;
5042 #ifdef PERL_MAD
5043             if (PL_madskills) {
5044                 if (PL_thistoken)
5045                     sv_free(PL_thistoken);
5046                 PL_thistoken = newSVpvs("");
5047             }
5048 #endif
5049             PL_lex_allbrackets--;
5050             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5051         }
5052         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5053             && SvEVALED(PL_lex_repl))
5054         {
5055             if (PL_bufptr != PL_bufend)
5056                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5057             PL_lex_repl = NULL;
5058         }
5059         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5060            re_eval_str.  If the here-doc body’s length equals the previous
5061            value of re_eval_start, re_eval_start will now be null.  So
5062            check re_eval_str as well. */
5063         if (PL_parser->lex_shared->re_eval_start
5064          || PL_parser->lex_shared->re_eval_str) {
5065             SV *sv;
5066             if (*PL_bufptr != ')')
5067                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5068             PL_bufptr++;
5069             /* having compiled a (?{..}) expression, return the original
5070              * text too, as a const */
5071             if (PL_parser->lex_shared->re_eval_str) {
5072                 sv = PL_parser->lex_shared->re_eval_str;
5073                 PL_parser->lex_shared->re_eval_str = NULL;
5074                 SvCUR_set(sv,
5075                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5076                 SvPV_shrink_to_cur(sv);
5077             }
5078             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5079                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5080             start_force(PL_curforce);
5081             /* XXX probably need a CURMAD(something) here */
5082             NEXTVAL_NEXTTOKE.opval =
5083                     (OP*)newSVOP(OP_CONST, 0,
5084                                  sv);
5085             force_next(THING);
5086             PL_parser->lex_shared->re_eval_start = NULL;
5087             PL_expect = XTERM;
5088             return REPORT(',');
5089         }
5090
5091         /* FALLTHROUGH */
5092     case LEX_INTERPCONCAT:
5093 #ifdef DEBUGGING
5094         if (PL_lex_brackets)
5095             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5096                        (long) PL_lex_brackets);
5097 #endif
5098         if (PL_bufptr == PL_bufend)
5099             return REPORT(sublex_done());
5100
5101         /* m'foo' still needs to be parsed for possible (?{...}) */
5102         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5103             SV *sv = newSVsv(PL_linestr);
5104             sv = tokeq(sv);
5105             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5106             s = PL_bufend;
5107         }
5108         else {
5109             s = scan_const(PL_bufptr);
5110             if (*s == '\\')
5111                 PL_lex_state = LEX_INTERPCASEMOD;
5112             else
5113                 PL_lex_state = LEX_INTERPSTART;
5114         }
5115
5116         if (s != PL_bufptr) {
5117             start_force(PL_curforce);
5118             if (PL_madskills) {
5119                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5120             }
5121             NEXTVAL_NEXTTOKE = pl_yylval;
5122             PL_expect = XTERM;
5123             force_next(THING);
5124             if (PL_lex_starts++) {
5125 #ifdef PERL_MAD
5126                 if (PL_madskills) {
5127                     if (PL_thistoken)
5128                         sv_free(PL_thistoken);
5129                     PL_thistoken = newSVpvs("");
5130                 }
5131 #endif
5132                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5133                 if (!PL_lex_casemods && PL_lex_inpat)
5134                     OPERATOR(',');
5135                 else
5136                     Aop(OP_CONCAT);
5137             }
5138             else {
5139                 PL_bufptr = s;
5140                 return yylex();
5141             }
5142         }
5143
5144         return yylex();
5145     case LEX_FORMLINE:
5146         s = scan_formline(PL_bufptr);
5147         if (!PL_lex_formbrack)
5148         {
5149             formbrack = 1;
5150             goto rightbracket;
5151         }
5152         PL_bufptr = s;
5153         return yylex();
5154     }
5155
5156     /* We really do *not* want PL_linestr ever becoming a COW. */
5157     assert (!SvIsCOW(PL_linestr));
5158     s = PL_bufptr;
5159     PL_oldoldbufptr = PL_oldbufptr;
5160     PL_oldbufptr = s;
5161     PL_parser->saw_infix_sigil = 0;
5162
5163   retry:
5164 #ifdef PERL_MAD
5165     if (PL_thistoken) {
5166         sv_free(PL_thistoken);
5167         PL_thistoken = 0;
5168     }
5169     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5170 #endif
5171     switch (*s) {
5172     default:
5173         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5174             goto keylookup;
5175         {
5176         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5177         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5178                                                     UTF8SKIP(s),
5179                                                     SVs_TEMP | SVf_UTF8),
5180                                             10, UNI_DISPLAY_ISPRINT)
5181                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5182         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5183         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5184             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5185         } else {
5186             d = PL_linestart;
5187         }
5188         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5189                           UTF8fARG(UTF, (s - d), d),
5190                          (int) len + 1);
5191     }
5192     case 4:
5193     case 26:
5194         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5195     case 0:
5196 #ifdef PERL_MAD
5197         if (PL_madskills)
5198             PL_faketokens = 0;
5199 #endif
5200         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5201             PL_last_uni = 0;
5202             PL_last_lop = 0;
5203             if (PL_lex_brackets &&
5204                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5205                 yyerror((const char *)
5206                         (PL_lex_formbrack
5207                          ? "Format not terminated"
5208                          : "Missing right curly or square bracket"));
5209             }
5210             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5211                         "### Tokener got EOF\n");
5212             } );
5213             TOKEN(0);
5214         }
5215         if (s++ < PL_bufend)
5216             goto retry;                 /* ignore stray nulls */
5217         PL_last_uni = 0;
5218         PL_last_lop = 0;
5219         if (!PL_in_eval && !PL_preambled) {
5220             PL_preambled = TRUE;
5221 #ifdef PERL_MAD
5222             if (PL_madskills)
5223                 PL_faketokens = 1;
5224 #endif
5225             if (PL_perldb) {
5226                 /* Generate a string of Perl code to load the debugger.
5227                  * If PERL5DB is set, it will return the contents of that,
5228                  * otherwise a compile-time require of perl5db.pl.  */
5229
5230                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5231
5232                 if (pdb) {
5233                     sv_setpv(PL_linestr, pdb);
5234                     sv_catpvs(PL_linestr,";");
5235                 } else {
5236                     SETERRNO(0,SS_NORMAL);
5237                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5238                 }
5239                 PL_parser->preambling = CopLINE(PL_curcop);
5240             } else
5241                 sv_setpvs(PL_linestr,"");
5242             if (PL_preambleav) {
5243                 SV **svp = AvARRAY(PL_preambleav);
5244                 SV **const end = svp + AvFILLp(PL_preambleav);
5245                 while(svp <= end) {
5246                     sv_catsv(PL_linestr, *svp);
5247                     ++svp;
5248                     sv_catpvs(PL_linestr, ";");
5249                 }
5250                 sv_free(MUTABLE_SV(PL_preambleav));
5251                 PL_preambleav = NULL;
5252             }
5253             if (PL_minus_E)
5254                 sv_catpvs(PL_linestr,
5255                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5256             if (PL_minus_n || PL_minus_p) {
5257                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5258                 if (PL_minus_l)
5259                     sv_catpvs(PL_linestr,"chomp;");
5260                 if (PL_minus_a) {
5261                     if (PL_minus_F) {
5262                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5263                              || *PL_splitstr == '"')
5264                               && strchr(PL_splitstr + 1, *PL_splitstr))
5265                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5266                         else {
5267                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5268                                bytes can be used as quoting characters.  :-) */
5269                             const char *splits = PL_splitstr;
5270                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5271                             do {
5272                                 /* Need to \ \s  */
5273                                 if (*splits == '\\')
5274                                     sv_catpvn(PL_linestr, splits, 1);
5275                                 sv_catpvn(PL_linestr, splits, 1);
5276                             } while (*splits++);
5277                             /* This loop will embed the trailing NUL of
5278                                PL_linestr as the last thing it does before
5279                                terminating.  */
5280                             sv_catpvs(PL_linestr, ");");
5281                         }
5282                     }
5283                     else
5284                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5285                 }
5286             }
5287             sv_catpvs(PL_linestr, "\n");
5288             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5289             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5290             PL_last_lop = PL_last_uni = NULL;
5291             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5292                 update_debugger_info(PL_linestr, NULL, 0);
5293             goto retry;
5294         }
5295         do {
5296             fake_eof = 0;
5297             bof = PL_rsfp ? TRUE : FALSE;
5298             if (0) {
5299               fake_eof:
5300                 fake_eof = LEX_FAKE_EOF;
5301             }
5302             PL_bufptr = PL_bufend;
5303             COPLINE_INC_WITH_HERELINES;
5304             if (!lex_next_chunk(fake_eof)) {
5305                 CopLINE_dec(PL_curcop);
5306                 s = PL_bufptr;
5307                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5308             }
5309             CopLINE_dec(PL_curcop);
5310 #ifdef PERL_MAD
5311             if (!PL_rsfp)
5312                 PL_realtokenstart = -1;
5313 #endif
5314             s = PL_bufptr;
5315             /* If it looks like the start of a BOM or raw UTF-16,
5316              * check if it in fact is. */
5317             if (bof && PL_rsfp &&
5318                      (*s == 0 ||
5319                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5320                       *(U8*)s >= 0xFE ||
5321                       s[1] == 0)) {
5322                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5323                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5324 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5325                 /* offset may include swallowed CR */
5326                 if (!bof)
5327                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5328 #endif
5329                 if (bof) {
5330                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5331                     s = swallow_bom((U8*)s);
5332                 }
5333             }
5334             if (PL_parser->in_pod) {
5335                 /* Incest with pod. */
5336 #ifdef PERL_MAD
5337                 if (PL_madskills)
5338                     sv_catsv(PL_thiswhite, PL_linestr);
5339 #endif
5340                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5341                     sv_setpvs(PL_linestr, "");
5342                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5343                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5344                     PL_last_lop = PL_last_uni = NULL;
5345                     PL_parser->in_pod = 0;
5346                 }
5347             }
5348             if (PL_rsfp || PL_parser->filtered)
5349                 incline(s);
5350         } while (PL_parser->in_pod);
5351         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5352         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5353         PL_last_lop = PL_last_uni = NULL;
5354         if (CopLINE(PL_curcop) == 1) {
5355             while (s < PL_bufend && isSPACE(*s))
5356                 s++;
5357             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5358                 s++;
5359 #ifdef PERL_MAD
5360             if (PL_madskills)
5361                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5362 #endif
5363             d = NULL;
5364             if (!PL_in_eval) {
5365                 if (*s == '#' && *(s+1) == '!')
5366                     d = s + 2;
5367 #ifdef ALTERNATE_SHEBANG
5368                 else {
5369                     static char const as[] = ALTERNATE_SHEBANG;
5370                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5371                         d = s + (sizeof(as) - 1);
5372                 }
5373 #endif /* ALTERNATE_SHEBANG */
5374             }
5375             if (d) {
5376                 char *ipath;
5377                 char *ipathend;
5378
5379                 while (isSPACE(*d))
5380                     d++;
5381                 ipath = d;
5382                 while (*d && !isSPACE(*d))
5383                     d++;
5384                 ipathend = d;
5385
5386 #ifdef ARG_ZERO_IS_SCRIPT
5387                 if (ipathend > ipath) {
5388                     /*
5389                      * HP-UX (at least) sets argv[0] to the script name,
5390                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5391                      * at least, set argv[0] to the basename of the Perl
5392                      * interpreter. So, having found "#!", we'll set it right.
5393                      */
5394                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5395                                                     SVt_PV)); /* $^X */
5396                     assert(SvPOK(x) || SvGMAGICAL(x));
5397                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5398                         sv_setpvn(x, ipath, ipathend - ipath);
5399                         SvSETMAGIC(x);
5400                     }
5401                     else {
5402                         STRLEN blen;
5403                         STRLEN llen;
5404                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5405                         const char * const lstart = SvPV_const(x,llen);
5406                         if (llen < blen) {
5407                             bstart += blen - llen;
5408                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5409                                 sv_setpvn(x, ipath, ipathend - ipath);
5410                                 SvSETMAGIC(x);
5411                             }
5412                         }
5413                     }
5414                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5415                 }
5416 #endif /* ARG_ZERO_IS_SCRIPT */
5417
5418                 /*
5419                  * Look for options.
5420                  */
5421                 d = instr(s,"perl -");
5422                 if (!d) {
5423                     d = instr(s,"perl");
5424 #if defined(DOSISH)
5425                     /* avoid getting into infinite loops when shebang
5426                      * line contains "Perl" rather than "perl" */
5427                     if (!d) {
5428                         for (d = ipathend-4; d >= ipath; --d) {
5429                             if ((*d == 'p' || *d == 'P')
5430                                 && !ibcmp(d, "perl", 4))
5431                             {
5432                                 break;
5433                             }
5434                         }
5435                         if (d < ipath)
5436                             d = NULL;
5437                     }
5438 #endif
5439                 }
5440 #ifdef ALTERNATE_SHEBANG
5441                 /*
5442                  * If the ALTERNATE_SHEBANG on this system starts with a
5443                  * character that can be part of a Perl expression, then if
5444                  * we see it but not "perl", we're probably looking at the
5445                  * start of Perl code, not a request to hand off to some
5446                  * other interpreter.  Similarly, if "perl" is there, but
5447                  * not in the first 'word' of the line, we assume the line
5448                  * contains the start of the Perl program.
5449                  */
5450                 if (d && *s != '#') {
5451                     const char *c = ipath;
5452                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5453                         c++;
5454                     if (c < d)
5455                         d = NULL;       /* "perl" not in first word; ignore */
5456                     else
5457                         *s = '#';       /* Don't try to parse shebang line */
5458                 }
5459 #endif /* ALTERNATE_SHEBANG */
5460                 if (!d &&
5461                     *s == '#' &&
5462                     ipathend > ipath &&
5463                     !PL_minus_c &&
5464                     !instr(s,"indir") &&
5465                     instr(PL_origargv[0],"perl"))
5466                 {
5467                     dVAR;
5468                     char **newargv;
5469
5470                     *ipathend = '\0';
5471                     s = ipathend + 1;
5472                     while (s < PL_bufend && isSPACE(*s))
5473                         s++;
5474                     if (s < PL_bufend) {
5475                         Newx(newargv,PL_origargc+3,char*);
5476                         newargv[1] = s;
5477                         while (s < PL_bufend && !isSPACE(*s))
5478                             s++;
5479                         *s = '\0';
5480                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5481                     }
5482                     else
5483                         newargv = PL_origargv;
5484                     newargv[0] = ipath;
5485                     PERL_FPU_PRE_EXEC
5486                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5487                     PERL_FPU_POST_EXEC
5488                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5489                 }
5490                 if (d) {
5491                     while (*d && !isSPACE(*d))
5492                         d++;
5493                     while (SPACE_OR_TAB(*d))
5494                         d++;
5495
5496                     if (*d++ == '-') {
5497                         const bool switches_done = PL_doswitches;
5498                         const U32 oldpdb = PL_perldb;
5499                         const bool oldn = PL_minus_n;
5500                         const bool oldp = PL_minus_p;
5501                         const char *d1 = d;
5502
5503                         do {
5504                             bool baduni = FALSE;
5505                             if (*d1 == 'C') {
5506                                 const char *d2 = d1 + 1;
5507                                 if (parse_unicode_opts((const char **)&d2)
5508                                     != PL_unicode)
5509                                     baduni = TRUE;
5510                             }
5511                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5512                                 const char * const m = d1;
5513                                 while (*d1 && !isSPACE(*d1))
5514                                     d1++;
5515                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5516                                       (int)(d1 - m), m);
5517                             }
5518                             d1 = moreswitches(d1);
5519                         } while (d1);
5520                         if (PL_doswitches && !switches_done) {
5521                             int argc = PL_origargc;
5522                             char **argv = PL_origargv;
5523                             do {
5524                                 argc--,argv++;
5525                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5526                             init_argv_symbols(argc,argv);
5527                         }
5528                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5529                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5530                               /* if we have already added "LINE: while (<>) {",
5531                                  we must not do it again */
5532                         {
5533                             sv_setpvs(PL_linestr, "");
5534                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5535                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5536                             PL_last_lop = PL_last_uni = NULL;
5537                             PL_preambled = FALSE;
5538                             if (PERLDB_LINE || PERLDB_SAVESRC)
5539                                 (void)gv_fetchfile(PL_origfilename);
5540                             goto retry;
5541                         }
5542                     }
5543                 }
5544             }
5545         }
5546         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5547             PL_lex_state = LEX_FORMLINE;
5548             start_force(PL_curforce);
5549             NEXTVAL_NEXTTOKE.ival = 0;
5550             force_next(FORMRBRACK);
5551             TOKEN(';');
5552         }
5553         goto retry;
5554     case '\r':
5555 #ifdef PERL_STRICT_CR
5556         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5557         Perl_croak(aTHX_
5558       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5559 #endif
5560     case ' ': case '\t': case '\f': case 013:
5561 #ifdef PERL_MAD
5562         PL_realtokenstart = -1;
5563         if (PL_madskills) {
5564           if (!PL_thiswhite)
5565             PL_thiswhite = newSVpvs("");
5566           sv_catpvn(PL_thiswhite, s, 1);
5567         }
5568 #endif
5569         s++;
5570         goto retry;
5571     case '#':
5572     case '\n':
5573 #ifdef PERL_MAD
5574         PL_realtokenstart = -1;
5575         if (PL_madskills)
5576             PL_faketokens = 0;
5577 #endif
5578         if (PL_lex_state != LEX_NORMAL ||
5579              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5580             if (*s == '#' && s == PL_linestart && PL_in_eval
5581              && !PL_rsfp && !PL_parser->filtered) {
5582                 /* handle eval qq[#line 1 "foo"\n ...] */
5583                 CopLINE_dec(PL_curcop);
5584                 incline(s);
5585             }
5586             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5587                 s = SKIPSPACE0(s);
5588                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5589                     incline(s);
5590             }
5591             else {
5592                 const bool in_comment = *s == '#';
5593                 d = s;
5594                 while (d < PL_bufend && *d != '\n')
5595                     d++;
5596                 if (d < PL_bufend)
5597                     d++;
5598                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5599                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5600                                d, PL_bufend);
5601 #ifdef PERL_MAD
5602                 if (PL_madskills)
5603                     PL_thiswhite = newSVpvn(s, d - s);
5604 #endif
5605                 s = d;
5606                 if (in_comment && d == PL_bufend
5607                  && PL_lex_state == LEX_INTERPNORMAL
5608                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5609                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5610                 else incline(s);
5611             }
5612             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5613                 PL_lex_state = LEX_FORMLINE;
5614                 start_force(PL_curforce);
5615                 NEXTVAL_NEXTTOKE.ival = 0;
5616                 force_next(FORMRBRACK);
5617                 TOKEN(';');
5618             }
5619         }
5620         else {
5621 #ifdef PERL_MAD
5622             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5623                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5624                     PL_faketokens = 0;
5625                     s = SKIPSPACE0(s);
5626                     TOKEN(PEG); /* make sure any #! line is accessible */
5627                 }
5628                 s = SKIPSPACE0(s);
5629             }
5630             else {
5631 #endif
5632                     if (PL_madskills) d = s;
5633                     while (s < PL_bufend && *s != '\n')
5634                         s++;
5635                     if (s < PL_bufend)
5636                     {
5637                         s++;
5638                         if (s < PL_bufend)
5639                             incline(s);
5640                     }
5641                     else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5642                       Perl_croak(aTHX_ "panic: input overflow");
5643 #ifdef PERL_MAD
5644                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5645                         if (!PL_thiswhite)
5646                             PL_thiswhite = newSVpvs("");
5647                         if (CopLINE(PL_curcop) == 1) {
5648                             sv_setpvs(PL_thiswhite, "");
5649                             PL_faketokens = 0;
5650                         }
5651                         sv_catpvn(PL_thiswhite, d, s - d);
5652                     }
5653             }
5654 #endif
5655         }
5656         goto retry;
5657     case '-':
5658         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5659             I32 ftst = 0;
5660             char tmp;
5661
5662             s++;
5663             PL_bufptr = s;
5664             tmp = *s++;
5665
5666             while (s < PL_bufend && SPACE_OR_TAB(*s))
5667                 s++;
5668
5669             if (strnEQ(s,"=>",2)) {
5670                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5671                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5672                 OPERATOR('-');          /* unary minus */
5673             }
5674             switch (tmp) {
5675             case 'r': ftst = OP_FTEREAD;        break;
5676             case 'w': ftst = OP_FTEWRITE;       break;
5677             case 'x': ftst = OP_FTEEXEC;        break;
5678             case 'o': ftst = OP_FTEOWNED;       break;
5679             case 'R': ftst = OP_FTRREAD;        break;
5680             case 'W': ftst = OP_FTRWRITE;       break;
5681             case 'X': ftst = OP_FTREXEC;        break;
5682             case 'O': ftst = OP_FTROWNED;       break;
5683             case 'e': ftst = OP_FTIS;           break;
5684             case 'z': ftst = OP_FTZERO;         break;
5685             case 's': ftst = OP_FTSIZE;         break;
5686             case 'f': ftst = OP_FTFILE;         break;
5687             case 'd': ftst = OP_FTDIR;          break;
5688             case 'l': ftst = OP_FTLINK;         break;
5689             case 'p': ftst = OP_FTPIPE;         break;
5690             case 'S': ftst = OP_FTSOCK;         break;
5691             case 'u': ftst = OP_FTSUID;         break;
5692             case 'g': ftst = OP_FTSGID;         break;
5693             case 'k': ftst = OP_FTSVTX;         break;
5694             case 'b': ftst = OP_FTBLK;          break;
5695             case 'c': ftst = OP_FTCHR;          break;
5696             case 't': ftst = OP_FTTTY;          break;
5697             case 'T': ftst = OP_FTTEXT;         break;
5698             case 'B': ftst = OP_FTBINARY;       break;
5699             case 'M': case 'A': case 'C':
5700                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5701                 switch (tmp) {
5702                 case 'M': ftst = OP_FTMTIME;    break;
5703                 case 'A': ftst = OP_FTATIME;    break;
5704                 case 'C': ftst = OP_FTCTIME;    break;
5705                 default:                        break;
5706                 }
5707                 break;
5708             default:
5709                 break;
5710             }
5711             if (ftst) {
5712                 PL_last_uni = PL_oldbufptr;
5713                 PL_last_lop_op = (OPCODE)ftst;
5714                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5715                         "### Saw file test %c\n", (int)tmp);
5716                 } );
5717                 FTST(ftst);
5718             }
5719             else {
5720                 /* Assume it was a minus followed by a one-letter named
5721                  * subroutine call (or a -bareword), then. */
5722                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5723                         "### '-%c' looked like a file test but was not\n",
5724                         (int) tmp);
5725                 } );
5726                 s = --PL_bufptr;
5727             }
5728         }
5729         {
5730             const char tmp = *s++;
5731             if (*s == tmp) {
5732                 s++;
5733                 if (PL_expect == XOPERATOR)
5734                     TERM(POSTDEC);
5735                 else
5736                     OPERATOR(PREDEC);
5737             }
5738             else if (*s == '>') {
5739                 s++;
5740                 s = SKIPSPACE1(s);
5741                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5742                     ((*s == '$' || *s == '&') && s[1] == '*')
5743                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5744                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5745                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5746                  ))
5747                 {
5748                     Perl_ck_warner_d(aTHX_
5749                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5750                         "Postfix dereference is experimental"
5751                     );
5752                     PL_expect = XPOSTDEREF;
5753                     TOKEN(ARROW);
5754                 }
5755                 if (isIDFIRST_lazy_if(s,UTF)) {
5756                     s = force_word(s,METHOD,FALSE,TRUE);
5757                     TOKEN(ARROW);
5758                 }
5759                 else if (*s == '$')
5760                     OPERATOR(ARROW);
5761                 else
5762                     TERM(ARROW);
5763             }
5764             if (PL_expect == XOPERATOR) {
5765                 if (*s == '=' && !PL_lex_allbrackets &&
5766                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5767                     s--;
5768                     TOKEN(0);
5769                 }
5770                 Aop(OP_SUBTRACT);
5771             }
5772             else {
5773                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5774                     check_uni();
5775                 OPERATOR('-');          /* unary minus */
5776             }
5777         }
5778
5779     case '+':
5780         {
5781             const char tmp = *s++;
5782             if (*s == tmp) {
5783                 s++;
5784                 if (PL_expect == XOPERATOR)
5785                     TERM(POSTINC);
5786                 else
5787                     OPERATOR(PREINC);
5788             }
5789             if (PL_expect == XOPERATOR) {
5790                 if (*s == '=' && !PL_lex_allbrackets &&
5791                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5792                     s--;
5793                     TOKEN(0);
5794                 }
5795                 Aop(OP_ADD);
5796             }
5797             else {
5798                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5799                     check_uni();
5800                 OPERATOR('+');
5801             }
5802         }
5803
5804     case '*':
5805         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5806         if (PL_expect != XOPERATOR) {
5807             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5808             PL_expect = XOPERATOR;
5809             force_ident(PL_tokenbuf, '*');
5810             if (!*PL_tokenbuf)
5811                 PREREF('*');
5812             TERM('*');
5813         }
5814         s++;
5815         if (*s == '*') {
5816             s++;
5817             if (*s == '=' && !PL_lex_allbrackets &&
5818                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5819                 s -= 2;
5820                 TOKEN(0);
5821             }
5822             PWop(OP_POW);
5823         }
5824         if (*s == '=' && !PL_lex_allbrackets &&
5825                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5826             s--;
5827             TOKEN(0);
5828         }
5829         PL_parser->saw_infix_sigil = 1;
5830         Mop(OP_MULTIPLY);
5831
5832     case '%':
5833     {
5834         if (PL_expect == XOPERATOR) {
5835             if (s[1] == '=' && !PL_lex_allbrackets &&
5836                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5837                 TOKEN(0);
5838             ++s;
5839             PL_parser->saw_infix_sigil = 1;
5840             Mop(OP_MODULO);
5841         }
5842         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5843         PL_tokenbuf[0] = '%';
5844         s = scan_ident(s, PL_tokenbuf + 1,
5845                 sizeof PL_tokenbuf - 1, FALSE);
5846         pl_yylval.ival = 0;
5847         if (!PL_tokenbuf[1]) {
5848             PREREF('%');
5849         }
5850         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5851             if (*s == '[')
5852                 PL_tokenbuf[0] = '@';
5853         }
5854         PL_expect = XOPERATOR;
5855         force_ident_maybe_lex('%');
5856         TERM('%');
5857     }
5858     case '^':
5859         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5860                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5861             TOKEN(0);
5862         s++;
5863         BOop(OP_BIT_XOR);
5864     case '[':
5865         if (PL_lex_brackets > 100)
5866             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5867         PL_lex_brackstack[PL_lex_brackets++] = 0;
5868         PL_lex_allbrackets++;
5869         {
5870             const char tmp = *s++;
5871             OPERATOR(tmp);
5872         }
5873     case '~':
5874         if (s[1] == '~'
5875             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5876         {
5877             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5878                 TOKEN(0);
5879             s += 2;
5880             Perl_ck_warner_d(aTHX_
5881                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5882                 "Smartmatch is experimental");
5883             Eop(OP_SMARTMATCH);
5884         }
5885         s++;
5886         OPERATOR('~');
5887     case ',':
5888         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5889             TOKEN(0);
5890         s++;
5891         OPERATOR(',');
5892     case ':':
5893         if (s[1] == ':') {
5894             len = 0;
5895             goto just_a_word_zero_gv;
5896         }
5897         s++;
5898         switch (PL_expect) {
5899             OP *attrs;
5900 #ifdef PERL_MAD
5901             I32 stuffstart;
5902 #endif
5903         case XOPERATOR:
5904             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5905                 break;
5906             PL_bufptr = s;      /* update in case we back off */
5907             if (*s == '=') {
5908                 Perl_croak(aTHX_
5909                            "Use of := for an empty attribute list is not allowed");
5910             }
5911             goto grabattrs;
5912         case XATTRBLOCK:
5913             PL_expect = XBLOCK;
5914             goto grabattrs;
5915         case XATTRTERM:
5916             PL_expect = XTERMBLOCK;
5917          grabattrs:
5918 #ifdef PERL_MAD
5919             stuffstart = s - SvPVX(PL_linestr) - 1;
5920 #endif
5921             s = PEEKSPACE(s);
5922             attrs = NULL;
5923             while (isIDFIRST_lazy_if(s,UTF)) {
5924                 I32 tmp;
5925                 SV *sv;
5926                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5927                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5928                     if (tmp < 0) tmp = -tmp;
5929                     switch (tmp) {
5930                     case KEY_or:
5931                     case KEY_and:
5932                     case KEY_for:
5933                     case KEY_foreach:
5934                     case KEY_unless:
5935                     case KEY_if:
5936                     case KEY_while:
5937                     case KEY_until:
5938                         goto got_attrs;
5939                     default:
5940                         break;
5941                     }
5942                 }
5943                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5944                 if (*d == '(') {
5945                     d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5946                     COPLINE_SET_FROM_MULTI_END;
5947                     if (!d) {
5948                         /* MUST advance bufptr here to avoid bogus
5949                            "at end of line" context messages from yyerror().
5950                          */
5951                         PL_bufptr = s + len;
5952                         yyerror("Unterminated attribute parameter in attribute list");
5953                         if (attrs)
5954                             op_free(attrs);
5955                         sv_free(sv);
5956                         return REPORT(0);       /* EOF indicator */
5957                     }
5958                 }
5959                 if (PL_lex_stuff) {
5960                     sv_catsv(sv, PL_lex_stuff);
5961                     attrs = op_append_elem(OP_LIST, attrs,
5962                                         newSVOP(OP_CONST, 0, sv));
5963                     SvREFCNT_dec(PL_lex_stuff);
5964                     PL_lex_stuff = NULL;
5965                 }
5966                 else {
5967                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5968                         sv_free(sv);
5969                         if (PL_in_my == KEY_our) {
5970                             deprecate(":unique");
5971                         }
5972                         else
5973                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5974                     }
5975
5976                     /* NOTE: any CV attrs applied here need to be part of
5977                        the CVf_BUILTIN_ATTRS define in cv.h! */
5978                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5979                         sv_free(sv);
5980                         CvLVALUE_on(PL_compcv);
5981                     }
5982                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5983                         sv_free(sv);
5984                         deprecate(":locked");
5985                     }
5986                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5987                         sv_free(sv);
5988                         CvMETHOD_on(PL_compcv);
5989                     }
5990                     /* After we've set the flags, it could be argued that
5991                        we don't need to do the attributes.pm-based setting
5992                        process, and shouldn't bother appending recognized
5993                        flags.  To experiment with that, uncomment the
5994                        following "else".  (Note that's already been
5995                        uncommented.  That keeps the above-applied built-in
5996                        attributes from being intercepted (and possibly
5997                        rejected) by a package's attribute routines, but is
5998                        justified by the performance win for the common case
5999                        of applying only built-in attributes.) */
6000                     else
6001                         attrs = op_append_elem(OP_LIST, attrs,
6002                                             newSVOP(OP_CONST, 0,
6003                                                     sv));
6004                 }
6005                 s = PEEKSPACE(d);
6006                 if (*s == ':' && s[1] != ':')
6007                     s = PEEKSPACE(s+1);
6008                 else if (s == d)
6009                     break;      /* require real whitespace or :'s */
6010                 /* XXX losing whitespace on sequential attributes here */
6011             }
6012             {
6013                 if (*s != ';' && *s != '}' &&
6014                     !(PL_expect == XOPERATOR
6015                         ? (*s == '=' ||  *s == ')')
6016                         : (*s == '{' ||  *s == '('))) {
6017                     const char q = ((*s == '\'') ? '"' : '\'');
6018                     /* If here for an expression, and parsed no attrs, back
6019                        off. */
6020                     if (PL_expect == XOPERATOR && !attrs) {
6021                         s = PL_bufptr;
6022                         break;
6023                     }
6024                     /* MUST advance bufptr here to avoid bogus "at end of line"
6025                        context messages from yyerror().
6026                     */
6027                     PL_bufptr = s;
6028                     yyerror( (const char *)
6029                              (*s
6030                               ? Perl_form(aTHX_ "Invalid separator character "
6031                                           "%c%c%c in attribute list", q, *s, q)
6032                               : "Unterminated attribute list" ) );
6033                     if (attrs)
6034                         op_free(attrs);
6035                     OPERATOR(':');
6036                 }
6037             }
6038         got_attrs:
6039             if (attrs) {
6040                 start_force(PL_curforce);
6041                 NEXTVAL_NEXTTOKE.opval = attrs;
6042                 CURMAD('_', PL_nextwhite);
6043                 force_next(THING);
6044             }
6045 #ifdef PERL_MAD
6046             if (PL_madskills) {
6047                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6048                                      (s - SvPVX(PL_linestr)) - stuffstart);
6049             }
6050 #endif
6051             TOKEN(COLONATTR);
6052         }
6053         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6054             s--;
6055             TOKEN(0);
6056         }
6057         PL_lex_allbrackets--;
6058         OPERATOR(':');
6059     case '(':
6060         s++;
6061         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6062             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6063         else
6064             PL_expect = XTERM;
6065         s = SKIPSPACE1(s);
6066         PL_lex_allbrackets++;
6067         TOKEN('(');
6068     case ';':
6069         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6070             TOKEN(0);
6071         CLINE;
6072         s++;
6073         OPERATOR(';');
6074     case ')':
6075         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6076             TOKEN(0);
6077         s++;
6078         PL_lex_allbrackets--;
6079         s = SKIPSPACE1(s);
6080         if (*s == '{')
6081             PREBLOCK(')');
6082         TERM(')');
6083     case ']':
6084         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6085             TOKEN(0);
6086         s++;
6087         if (PL_lex_brackets <= 0)
6088             /* diag_listed_as: Unmatched right %s bracket */
6089             yyerror("Unmatched right square bracket");
6090         else
6091             --PL_lex_brackets;
6092         PL_lex_allbrackets--;
6093         if (PL_lex_state == LEX_INTERPNORMAL) {
6094             if (PL_lex_brackets == 0) {
6095                 if (*s == '-' && s[1] == '>')
6096                     PL_lex_state = LEX_INTERPENDMAYBE;
6097                 else if (*s != '[' && *s != '{')
6098                     PL_lex_state = LEX_INTERPEND;
6099             }
6100         }
6101         TERM(']');
6102     case '{':
6103         s++;
6104       leftbracket:
6105         if (PL_lex_brackets > 100) {
6106             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6107         }
6108         switch (PL_expect) {
6109         case XTERM:
6110             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6111             PL_lex_allbrackets++;
6112             OPERATOR(HASHBRACK);
6113         case XOPERATOR:
6114             while (s < PL_bufend && SPACE_OR_TAB(*s))
6115                 s++;
6116             d = s;
6117             PL_tokenbuf[0] = '\0';
6118             if (d < PL_bufend && *d == '-') {
6119                 PL_tokenbuf[0] = '-';
6120                 d++;
6121                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6122                     d++;
6123             }
6124             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6125                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6126                               FALSE, &len);
6127                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6128                     d++;
6129                 if (*d == '}') {
6130                     const char minus = (PL_tokenbuf[0] == '-');
6131                     s = force_word(s + minus, WORD, FALSE, TRUE);
6132                     if (minus)
6133                         force_next('-');
6134                 }
6135             }
6136             /* FALLTHROUGH */
6137         case XATTRBLOCK:
6138         case XBLOCK:
6139             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6140             PL_lex_allbrackets++;
6141             PL_expect = XSTATE;
6142             break;
6143         case XATTRTERM:
6144         case XTERMBLOCK:
6145             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6146             PL_lex_allbrackets++;
6147             PL_expect = XSTATE;
6148             break;
6149         default: {
6150                 const char *t;
6151                 if (PL_oldoldbufptr == PL_last_lop)
6152                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6153                 else
6154                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6155                 PL_lex_allbrackets++;
6156                 s = SKIPSPACE1(s);
6157                 if (*s == '}') {
6158                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6159                         PL_expect = XTERM;
6160                         /* This hack is to get the ${} in the message. */
6161                         PL_bufptr = s+1;
6162                         yyerror("syntax error");
6163                         break;
6164                     }
6165                     OPERATOR(HASHBRACK);
6166                 }
6167                 /* This hack serves to disambiguate a pair of curlies
6168                  * as being a block or an anon hash.  Normally, expectation
6169                  * determines that, but in cases where we're not in a
6170                  * position to expect anything in particular (like inside
6171                  * eval"") we have to resolve the ambiguity.  This code
6172                  * covers the case where the first term in the curlies is a
6173                  * quoted string.  Most other cases need to be explicitly
6174                  * disambiguated by prepending a "+" before the opening
6175                  * curly in order to force resolution as an anon hash.
6176                  *
6177                  * XXX should probably propagate the outer expectation
6178                  * into eval"" to rely less on this hack, but that could
6179                  * potentially break current behavior of eval"".
6180                  * GSAR 97-07-21
6181                  */
6182                 t = s;
6183                 if (*s == '\'' || *s == '"' || *s == '`') {
6184                     /* common case: get past first string, handling escapes */
6185                     for (t++; t < PL_bufend && *t != *s;)
6186                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6187                             t++;
6188                     t++;
6189                 }
6190                 else if (*s == 'q') {
6191                     if (++t < PL_bufend
6192                         && (!isWORDCHAR(*t)
6193                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6194                                 && !isWORDCHAR(*t))))
6195                     {
6196                         /* skip q//-like construct */
6197                         const char *tmps;
6198                         char open, close, term;
6199                         I32 brackets = 1;
6200
6201                         while (t < PL_bufend && isSPACE(*t))
6202                             t++;
6203                         /* check for q => */
6204                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6205                             OPERATOR(HASHBRACK);
6206                         }
6207                         term = *t;
6208                         open = term;
6209                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6210                             term = tmps[5];
6211                         close = term;
6212                         if (open == close)
6213                             for (t++; t < PL_bufend; t++) {
6214                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6215                                     t++;
6216                                 else if (*t == open)
6217                                     break;
6218                             }
6219                         else {
6220                             for (t++; t < PL_bufend; t++) {
6221                                 if (*t == '\\' && t+1 < PL_bufend)
6222                                     t++;
6223                                 else if (*t == close && --brackets <= 0)
6224                                     break;
6225                                 else if (*t == open)
6226                                     brackets++;
6227                             }
6228                         }
6229                         t++;
6230                     }
6231                     else
6232                         /* skip plain q word */
6233                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6234                              t += UTF8SKIP(t);
6235                 }
6236                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6237                     t += UTF8SKIP(t);
6238                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6239                          t += UTF8SKIP(t);
6240                 }
6241                 while (t < PL_bufend && isSPACE(*t))
6242                     t++;
6243                 /* if comma follows first term, call it an anon hash */
6244                 /* XXX it could be a comma expression with loop modifiers */
6245                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6246                                    || (*t == '=' && t[1] == '>')))
6247                     OPERATOR(HASHBRACK);
6248                 if (PL_expect == XREF)
6249                     PL_expect = XTERM;
6250                 else {
6251                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6252                     PL_expect = XSTATE;
6253                 }
6254             }
6255             break;
6256         }
6257         pl_yylval.ival = CopLINE(PL_curcop);
6258         if (isSPACE(*s) || *s == '#')
6259             PL_copline = NOLINE;   /* invalidate current command line number */
6260         TOKEN(formbrack ? '=' : '{');
6261     case '}':
6262         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6263             TOKEN(0);
6264       rightbracket:
6265         s++;
6266         if (PL_lex_brackets <= 0)
6267             /* diag_listed_as: Unmatched right %s bracket */
6268             yyerror("Unmatched right curly bracket");
6269         else
6270             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6271         PL_lex_allbrackets--;
6272         if (PL_lex_state == LEX_INTERPNORMAL) {
6273             if (PL_lex_brackets == 0) {
6274                 if (PL_expect & XFAKEBRACK) {
6275                     PL_expect &= XENUMMASK;
6276                     PL_lex_state = LEX_INTERPEND;
6277                     PL_bufptr = s;
6278 #if 0
6279                     if (PL_madskills) {
6280                         if (!PL_thiswhite)
6281                             PL_thiswhite = newSVpvs("");
6282                         sv_catpvs(PL_thiswhite,"}");
6283                     }
6284 #endif
6285                     return yylex();     /* ignore fake brackets */
6286                 }
6287                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6288                  && SvEVALED(PL_lex_repl))
6289                     PL_lex_state = LEX_INTERPEND;
6290                 else if (*s == '-' && s[1] == '>')
6291                     PL_lex_state = LEX_INTERPENDMAYBE;
6292                 else if (*s != '[' && *s != '{')
6293                     PL_lex_state = LEX_INTERPEND;
6294             }
6295         }
6296         if (PL_expect & XFAKEBRACK) {
6297             PL_expect &= XENUMMASK;
6298             PL_bufptr = s;
6299             return yylex();             /* ignore fake brackets */
6300         }
6301         start_force(PL_curforce);
6302         if (PL_madskills) {
6303             curmad('X', newSVpvn(s-1,1));
6304             CURMAD('_', PL_thiswhite);
6305         }
6306         force_next(formbrack ? '.' : '}');
6307         if (formbrack) LEAVE;
6308 #ifdef PERL_MAD
6309         if (PL_madskills && !PL_thistoken)
6310             PL_thistoken = newSVpvs("");
6311 #endif
6312         if (formbrack == 2) { /* means . where arguments were expected */
6313             start_force(PL_curforce);
6314             force_next(';');
6315             TOKEN(FORMRBRACK);
6316         }
6317         TOKEN(';');
6318     case '&':
6319         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6320         s++;
6321         if (*s++ == '&') {
6322             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6323                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6324                 s -= 2;
6325                 TOKEN(0);
6326             }
6327             AOPERATOR(ANDAND);
6328         }
6329         s--;
6330         if (PL_expect == XOPERATOR) {
6331             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6332                 && isIDFIRST_lazy_if(s,UTF))
6333             {
6334                 CopLINE_dec(PL_curcop);
6335                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6336                 CopLINE_inc(PL_curcop);
6337             }
6338             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6339                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6340                 s--;
6341                 TOKEN(0);
6342             }
6343             PL_parser->saw_infix_sigil = 1;
6344             BAop(OP_BIT_AND);
6345         }
6346
6347         PL_tokenbuf[0] = '&';
6348         s = scan_ident(s - 1, PL_tokenbuf + 1,
6349                        sizeof PL_tokenbuf - 1, TRUE);
6350         if (PL_tokenbuf[1]) {
6351             PL_expect = XOPERATOR;
6352             force_ident_maybe_lex('&');
6353         }
6354         else
6355             PREREF('&');
6356         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6357         TERM('&');
6358
6359     case '|':
6360         s++;
6361         if (*s++ == '|') {
6362             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6363                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6364                 s -= 2;
6365                 TOKEN(0);
6366             }
6367             AOPERATOR(OROR);
6368         }
6369         s--;
6370         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6371                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6372             s--;
6373             TOKEN(0);
6374         }
6375         BOop(OP_BIT_OR);
6376     case '=':
6377         s++;
6378         {
6379             const char tmp = *s++;
6380             if (tmp == '=') {
6381                 if (!PL_lex_allbrackets &&
6382                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6383                     s -= 2;
6384                     TOKEN(0);
6385                 }
6386                 Eop(OP_EQ);
6387             }
6388             if (tmp == '>') {
6389                 if (!PL_lex_allbrackets &&
6390                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6391                     s -= 2;
6392                     TOKEN(0);
6393                 }
6394                 OPERATOR(',');
6395             }
6396             if (tmp == '~')
6397                 PMop(OP_MATCH);
6398             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6399                 && strchr("+-*/%.^&|<",tmp))
6400                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6401                             "Reversed %c= operator",(int)tmp);
6402             s--;
6403             if (PL_expect == XSTATE && isALPHA(tmp) &&
6404                 (s == PL_linestart+1 || s[-2] == '\n') )
6405                 {
6406                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6407                         || PL_lex_state != LEX_NORMAL) {
6408                         d = PL_bufend;
6409                         while (s < d) {
6410                             if (*s++ == '\n') {
6411                                 incline(s);
6412                                 if (strnEQ(s,"=cut",4)) {
6413                                     s = strchr(s,'\n');
6414                                     if (s)
6415                                         s++;
6416                                     else
6417                                         s = d;
6418                                     incline(s);
6419                                     goto retry;
6420                                 }
6421                             }
6422                         }
6423                         goto retry;
6424                     }
6425 #ifdef PERL_MAD
6426                     if (PL_madskills) {
6427                         if (!PL_thiswhite)
6428                             PL_thiswhite = newSVpvs("");
6429                         sv_catpvn(PL_thiswhite, PL_linestart,
6430                                   PL_bufend - PL_linestart);
6431                     }
6432 #endif
6433                     s = PL_bufend;
6434                     PL_parser->in_pod = 1;
6435                     goto retry;
6436                 }
6437         }
6438         if (PL_expect == XBLOCK) {
6439             const char *t = s;
6440 #ifdef PERL_STRICT_CR
6441             while (SPACE_OR_TAB(*t))
6442 #else
6443             while (SPACE_OR_TAB(*t) || *t == '\r')
6444 #endif
6445                 t++;
6446             if (*t == '\n' || *t == '#') {
6447                 formbrack = 1;
6448                 ENTER;
6449                 SAVEI8(PL_parser->form_lex_state);
6450                 SAVEI32(PL_lex_formbrack);
6451                 PL_parser->form_lex_state = PL_lex_state;
6452                 PL_lex_formbrack = PL_lex_brackets + 1;
6453                 goto leftbracket;
6454             }
6455         }
6456         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6457             s--;
6458             TOKEN(0);
6459         }
6460         pl_yylval.ival = 0;
6461         OPERATOR(ASSIGNOP);
6462     case '!':
6463         s++;
6464         {
6465             const char tmp = *s++;
6466             if (tmp == '=') {
6467                 /* was this !=~ where !~ was meant?
6468                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6469
6470                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6471                     const char *t = s+1;
6472
6473                     while (t < PL_bufend && isSPACE(*t))
6474                         ++t;
6475
6476                     if (*t == '/' || *t == '?' ||
6477                         ((*t == 'm' || *t == 's' || *t == 'y')
6478                          && !isWORDCHAR(t[1])) ||
6479                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6480                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6481                                     "!=~ should be !~");
6482                 }
6483                 if (!PL_lex_allbrackets &&
6484                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6485                     s -= 2;
6486                     TOKEN(0);
6487                 }
6488                 Eop(OP_NE);
6489             }
6490             if (tmp == '~')
6491                 PMop(OP_NOT);
6492         }
6493         s--;
6494         OPERATOR('!');
6495     case '<':
6496         if (PL_expect != XOPERATOR) {
6497             if (s[1] != '<' && !strchr(s,'>'))
6498                 check_uni();
6499             if (s[1] == '<')
6500                 s = scan_heredoc(s);
6501             else
6502                 s = scan_inputsymbol(s);
6503             PL_expect = XOPERATOR;
6504             TOKEN(sublex_start());
6505         }
6506         s++;
6507         {
6508             char tmp = *s++;
6509             if (tmp == '<') {
6510                 if (*s == '=' && !PL_lex_allbrackets &&
6511                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6512                     s -= 2;
6513                     TOKEN(0);
6514                 }
6515                 SHop(OP_LEFT_SHIFT);
6516             }
6517             if (tmp == '=') {
6518                 tmp = *s++;
6519                 if (tmp == '>') {
6520                     if (!PL_lex_allbrackets &&
6521                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6522                         s -= 3;
6523                         TOKEN(0);
6524                     }
6525                     Eop(OP_NCMP);
6526                 }
6527                 s--;
6528                 if (!PL_lex_allbrackets &&
6529                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6530                     s -= 2;
6531                     TOKEN(0);
6532                 }
6533                 Rop(OP_LE);
6534             }
6535         }
6536         s--;
6537         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6538             s--;
6539             TOKEN(0);
6540         }
6541         Rop(OP_LT);
6542     case '>':
6543         s++;
6544         {
6545             const char tmp = *s++;
6546             if (tmp == '>') {
6547                 if (*s == '=' && !PL_lex_allbrackets &&
6548                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6549                     s -= 2;
6550                     TOKEN(0);
6551                 }
6552                 SHop(OP_RIGHT_SHIFT);
6553             }
6554             else if (tmp == '=') {
6555                 if (!PL_lex_allbrackets &&
6556                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6557                     s -= 2;
6558                     TOKEN(0);
6559                 }
6560                 Rop(OP_GE);
6561             }
6562         }
6563         s--;
6564         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6565             s--;
6566             TOKEN(0);
6567         }
6568         Rop(OP_GT);
6569
6570     case '$':
6571         CLINE;
6572
6573         if (PL_expect == XOPERATOR) {
6574             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6575                 return deprecate_commaless_var_list();
6576             }
6577         }
6578         else if (PL_expect == XPOSTDEREF) {
6579             if (s[1] == '#') {
6580                 s++;
6581                 POSTDEREF(DOLSHARP);
6582             }
6583             POSTDEREF('$');
6584         }
6585
6586         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6587             PL_tokenbuf[0] = '@';
6588             s = scan_ident(s + 1, PL_tokenbuf + 1,
6589                            sizeof PL_tokenbuf - 1, FALSE);
6590             if (PL_expect == XOPERATOR)
6591                 no_op("Array length", s);
6592             if (!PL_tokenbuf[1])
6593                 PREREF(DOLSHARP);
6594             PL_expect = XOPERATOR;
6595             force_ident_maybe_lex('#');
6596             TOKEN(DOLSHARP);
6597         }
6598
6599         PL_tokenbuf[0] = '$';
6600         s = scan_ident(s, PL_tokenbuf + 1,
6601                        sizeof PL_tokenbuf - 1, FALSE);
6602         if (PL_expect == XOPERATOR)
6603             no_op("Scalar", s);
6604         if (!PL_tokenbuf[1]) {
6605             if (s == PL_bufend)
6606                 yyerror("Final $ should be \\$ or $name");
6607             PREREF('$');
6608         }
6609
6610         d = s;
6611         {
6612             const char tmp = *s;
6613             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6614                 s = SKIPSPACE1(s);
6615
6616             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6617                 && intuit_more(s)) {
6618                 if (*s == '[') {
6619                     PL_tokenbuf[0] = '@';
6620                     if (ckWARN(WARN_SYNTAX)) {
6621                         char *t = s+1;
6622
6623                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6624                             t++;
6625                         if (*t++ == ',') {
6626                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6627                             while (t < PL_bufend && *t != ']')
6628                                 t++;
6629                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6630                                         "Multidimensional syntax %.*s not supported",
6631                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6632                         }
6633                     }
6634                 }
6635                 else if (*s == '{') {
6636                     char *t;
6637                     PL_tokenbuf[0] = '%';
6638                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6639                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6640                         {
6641                             char tmpbuf[sizeof PL_tokenbuf];
6642                             do {
6643                                 t++;
6644                             } while (isSPACE(*t));
6645                             if (isIDFIRST_lazy_if(t,UTF)) {
6646                                 STRLEN len;
6647                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6648                                               &len);
6649                                 while (isSPACE(*t))
6650                                     t++;
6651                                 if (*t == ';'
6652                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6653                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6654                                         "You need to quote \"%"UTF8f"\"",
6655                                          UTF8fARG(UTF, len, tmpbuf));
6656                             }
6657                         }
6658                 }
6659             }
6660
6661             PL_expect = XOPERATOR;
6662             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6663                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6664                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6665                     PL_expect = XOPERATOR;
6666                 else if (strchr("$@\"'`q", *s))
6667                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6668                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6669                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6670                 else if (isIDFIRST_lazy_if(s,UTF)) {
6671                     char tmpbuf[sizeof PL_tokenbuf];
6672                     int t2;
6673                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6674                     if ((t2 = keyword(tmpbuf, len, 0))) {
6675                         /* binary operators exclude handle interpretations */
6676                         switch (t2) {
6677                         case -KEY_x:
6678                         case -KEY_eq:
6679                         case -KEY_ne:
6680                         case -KEY_gt:
6681                         case -KEY_lt:
6682                         case -KEY_ge:
6683                         case -KEY_le:
6684                         case -KEY_cmp:
6685                             break;
6686                         default:
6687                             PL_expect = XTERM;  /* e.g. print $fh length() */
6688                             break;
6689                         }
6690                     }
6691                     else {
6692                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6693                     }
6694                 }
6695                 else if (isDIGIT(*s))
6696                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6697                 else if (*s == '.' && isDIGIT(s[1]))
6698                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6699                 else if ((*s == '?' || *s == '-' || *s == '+')
6700                          && !isSPACE(s[1]) && s[1] != '=')
6701                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6702                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6703                          && s[1] != '/')
6704                     PL_expect = XTERM;          /* e.g. print $fh /.../
6705                                                    XXX except DORDOR operator
6706                                                 */
6707                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6708                          && s[2] != '=')
6709                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6710             }
6711         }
6712         force_ident_maybe_lex('$');
6713         TOKEN('$');
6714
6715     case '@':
6716         if (PL_expect == XOPERATOR)
6717             no_op("Array", s);
6718         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6719         PL_tokenbuf[0] = '@';
6720         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6721         pl_yylval.ival = 0;
6722         if (!PL_tokenbuf[1]) {
6723             PREREF('@');
6724         }
6725         if (PL_lex_state == LEX_NORMAL)
6726             s = SKIPSPACE1(s);
6727         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6728             if (*s == '{')
6729                 PL_tokenbuf[0] = '%';
6730
6731             /* Warn about @ where they meant $. */
6732             if (*s == '[' || *s == '{') {
6733                 if (ckWARN(WARN_SYNTAX)) {
6734                     S_check_scalar_slice(aTHX_ s);
6735                 }
6736             }
6737         }
6738         PL_expect = XOPERATOR;
6739         force_ident_maybe_lex('@');
6740         TERM('@');
6741
6742      case '/':                  /* may be division, defined-or, or pattern */
6743         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6744             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6745                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6746                 TOKEN(0);
6747             s += 2;
6748             AOPERATOR(DORDOR);
6749         }
6750         /* FALLTHROUGH */
6751      case '?':                  /* may either be conditional or pattern */
6752         if (PL_expect == XOPERATOR) {
6753              char tmp = *s++;
6754              if(tmp == '?') {
6755                 if (!PL_lex_allbrackets &&
6756                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6757                     s--;
6758                     TOKEN(0);
6759                 }
6760                 PL_lex_allbrackets++;
6761                 OPERATOR('?');
6762              }
6763              else {
6764                  tmp = *s++;
6765                  if(tmp == '/') {
6766                      /* A // operator. */
6767                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6768                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6769                                             LEX_FAKEEOF_LOGIC)) {
6770                         s -= 2;
6771                         TOKEN(0);
6772                     }
6773                     AOPERATOR(DORDOR);
6774                  }
6775                  else {
6776                      s--;
6777                      if (*s == '=' && !PL_lex_allbrackets &&
6778                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6779                          s--;
6780                          TOKEN(0);
6781                      }
6782                      Mop(OP_DIVIDE);
6783                  }
6784              }
6785          }
6786          else {
6787              /* Disable warning on "study /blah/" */
6788              if (PL_oldoldbufptr == PL_last_uni
6789               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6790                   || memNE(PL_last_uni, "study", 5)
6791                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6792               ))
6793                  check_uni();
6794              if (*s == '?')
6795                  deprecate("?PATTERN? without explicit operator");
6796              s = scan_pat(s,OP_MATCH);
6797              TERM(sublex_start());
6798          }
6799
6800     case '.':
6801         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6802 #ifdef PERL_STRICT_CR
6803             && s[1] == '\n'
6804 #else
6805             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6806 #endif
6807             && (s == PL_linestart || s[-1] == '\n') )
6808         {
6809             PL_expect = XSTATE;
6810             formbrack = 2; /* dot seen where arguments expected */
6811             goto rightbracket;
6812         }
6813         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6814             s += 3;
6815             OPERATOR(YADAYADA);
6816         }
6817         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6818             char tmp = *s++;
6819             if (*s == tmp) {
6820                 if (!PL_lex_allbrackets &&
6821                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6822                     s--;
6823                     TOKEN(0);
6824                 }
6825                 s++;
6826                 if (*s == tmp) {
6827                     s++;
6828                     pl_yylval.ival = OPf_SPECIAL;
6829                 }
6830                 else
6831                     pl_yylval.ival = 0;
6832                 OPERATOR(DOTDOT);
6833             }
6834             if (*s == '=' && !PL_lex_allbrackets &&
6835                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6836                 s--;
6837                 TOKEN(0);
6838             }
6839             Aop(OP_CONCAT);
6840         }
6841         /* FALLTHROUGH */
6842     case '0': case '1': case '2': case '3': case '4':
6843     case '5': case '6': case '7': case '8': case '9':
6844         s = scan_num(s, &pl_yylval);
6845         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6846         if (PL_expect == XOPERATOR)
6847             no_op("Number",s);
6848         TERM(THING);
6849
6850     case '\'':
6851         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6852         if (!s)
6853             missingterm(NULL);
6854         COPLINE_SET_FROM_MULTI_END;
6855         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6856         if (PL_expect == XOPERATOR) {
6857             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6858                 return deprecate_commaless_var_list();
6859             }
6860             else
6861                 no_op("String",s);
6862         }
6863         pl_yylval.ival = OP_CONST;
6864         TERM(sublex_start());
6865
6866     case '"':
6867         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6868         DEBUG_T( {
6869             if (s)
6870                 printbuf("### Saw string before %s\n", s);
6871             else
6872                 PerlIO_printf(Perl_debug_log,
6873                              "### Saw unterminated string\n");
6874         } );
6875         if (PL_expect == XOPERATOR) {
6876             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6877                 return deprecate_commaless_var_list();
6878             }
6879             else
6880                 no_op("String",s);
6881         }
6882         if (!s)
6883             missingterm(NULL);
6884         pl_yylval.ival = OP_CONST;
6885         /* FIXME. I think that this can be const if char *d is replaced by
6886            more localised variables.  */
6887         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6888             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6889                 pl_yylval.ival = OP_STRINGIFY;
6890                 break;
6891             }
6892         }
6893         if (pl_yylval.ival == OP_CONST)
6894             COPLINE_SET_FROM_MULTI_END;
6895         TERM(sublex_start());
6896
6897     case '`':
6898         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6899         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6900         if (PL_expect == XOPERATOR)
6901             no_op("Backticks",s);
6902         if (!s)
6903             missingterm(NULL);
6904         pl_yylval.ival = OP_BACKTICK;
6905         TERM(sublex_start());
6906
6907     case '\\':
6908         s++;
6909         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6910          && isDIGIT(*s))
6911             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6912                            *s, *s);
6913         if (PL_expect == XOPERATOR)
6914             no_op("Backslash",s);
6915         OPERATOR(REFGEN);
6916
6917     case 'v':
6918         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6919             char *start = s + 2;
6920             while (isDIGIT(*start) || *start == '_')
6921                 start++;
6922             if (*start == '.' && isDIGIT(start[1])) {
6923                 s = scan_num(s, &pl_yylval);
6924                 TERM(THING);
6925             }
6926             else if ((*start == ':' && start[1] == ':')
6927                   || (PL_expect == XSTATE && *start == ':'))
6928                 goto keylookup;
6929             else if (PL_expect == XSTATE) {
6930                 d = start;
6931                 while (d < PL_bufend && isSPACE(*d)) d++;
6932                 if (*d == ':') goto keylookup;
6933             }
6934             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6935             if (!isALPHA(*start) && (PL_expect == XTERM
6936                         || PL_expect == XREF || PL_expect == XSTATE
6937                         || PL_expect == XTERMORDORDOR)) {
6938                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6939                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6940                 if (!gv) {
6941                     s = scan_num(s, &pl_yylval);
6942                     TERM(THING);
6943                 }
6944             }
6945         }
6946         goto keylookup;
6947     case 'x':
6948         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6949             s++;
6950             Mop(OP_REPEAT);
6951         }
6952         goto keylookup;
6953
6954     case '_':
6955     case 'a': case 'A':
6956     case 'b': case 'B':
6957     case 'c': case 'C':
6958     case 'd': case 'D':
6959     case 'e': case 'E':
6960     case 'f': case 'F':
6961     case 'g': case 'G':
6962     case 'h': case 'H':
6963     case 'i': case 'I':
6964     case 'j': case 'J':
6965     case 'k': case 'K':
6966     case 'l': case 'L':
6967     case 'm': case 'M':
6968     case 'n': case 'N':
6969     case 'o': case 'O':
6970     case 'p': case 'P':
6971     case 'q': case 'Q':
6972     case 'r': case 'R':
6973     case 's': case 'S':
6974     case 't': case 'T':
6975     case 'u': case 'U':
6976               case 'V':
6977     case 'w': case 'W':
6978               case 'X':
6979     case 'y': case 'Y':
6980     case 'z': case 'Z':
6981
6982       keylookup: {
6983         bool anydelim;
6984         bool lex;
6985         I32 tmp;
6986         SV *sv;
6987         CV *cv;
6988         PADOFFSET off;
6989         OP *rv2cv_op;
6990
6991         lex = FALSE;
6992         orig_keyword = 0;
6993         off = 0;
6994         sv = NULL;
6995         cv = NULL;
6996         gv = NULL;
6997         gvp = NULL;
6998         rv2cv_op = NULL;
6999
7000         PL_bufptr = s;
7001         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7002
7003         /* Some keywords can be followed by any delimiter, including ':' */
7004         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7005
7006         /* x::* is just a word, unless x is "CORE" */
7007         if (!anydelim && *s == ':' && s[1] == ':') {
7008             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7009             goto just_a_word;
7010         }
7011
7012         d = s;
7013         while (d < PL_bufend && isSPACE(*d))
7014                 d++;    /* no comments skipped here, or s### is misparsed */
7015
7016         /* Is this a word before a => operator? */
7017         if (*d == '=' && d[1] == '>') {
7018           fat_arrow:
7019             CLINE;
7020             pl_yylval.opval
7021                 = (OP*)newSVOP(OP_CONST, 0,
7022                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7023             pl_yylval.opval->op_private = OPpCONST_BARE;
7024             TERM(WORD);
7025         }
7026
7027         /* Check for plugged-in keyword */
7028         {
7029             OP *o;
7030             int result;
7031             char *saved_bufptr = PL_bufptr;
7032             PL_bufptr = s;
7033             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7034             s = PL_bufptr;
7035             if (result == KEYWORD_PLUGIN_DECLINE) {
7036                 /* not a plugged-in keyword */
7037                 PL_bufptr = saved_bufptr;
7038             } else if (result == KEYWORD_PLUGIN_STMT) {
7039                 pl_yylval.opval = o;
7040                 CLINE;
7041                 PL_expect = XSTATE;
7042                 return REPORT(PLUGSTMT);
7043             } else if (result == KEYWORD_PLUGIN_EXPR) {
7044                 pl_yylval.opval = o;
7045                 CLINE;
7046                 PL_expect = XOPERATOR;
7047                 return REPORT(PLUGEXPR);
7048             } else {
7049                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7050                                         PL_tokenbuf);
7051             }
7052         }
7053
7054         /* Check for built-in keyword */
7055         tmp = keyword(PL_tokenbuf, len, 0);
7056
7057         /* Is this a label? */
7058         if (!anydelim && PL_expect == XSTATE
7059               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7060             s = d + 1;
7061             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7062             pl_yylval.pval[len] = '\0';
7063             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7064             CLINE;
7065             TOKEN(LABEL);
7066         }
7067
7068         /* Check for lexical sub */
7069         if (PL_expect != XOPERATOR) {
7070             char tmpbuf[sizeof PL_tokenbuf + 1];
7071             *tmpbuf = '&';
7072             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7073             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7074             if (off != NOT_IN_PAD) {
7075                 assert(off); /* we assume this is boolean-true below */
7076                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7077                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7078                     HEK * const stashname = HvNAME_HEK(stash);
7079                     sv = newSVhek(stashname);
7080                     sv_catpvs(sv, "::");
7081                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7082                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7083                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7084                                     SVt_PVCV);
7085                     off = 0;
7086                     if (!gv) {
7087                         sv_free(sv);
7088                         sv = NULL;
7089                         goto just_a_word;
7090                     }
7091                 }
7092                 else {
7093                     rv2cv_op = newOP(OP_PADANY, 0);
7094                     rv2cv_op->op_targ = off;
7095                     cv = find_lexical_cv(off);
7096                 }
7097                 lex = TRUE;
7098                 goto just_a_word;
7099             }
7100             off = 0;
7101         }
7102
7103         if (tmp < 0) {                  /* second-class keyword? */
7104             GV *ogv = NULL;     /* override (winner) */
7105             GV *hgv = NULL;     /* hidden (loser) */
7106             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7107                 CV *cv;
7108                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7109                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7110                                             SVt_PVCV)) &&
7111                     (cv = GvCVu(gv)))
7112                 {
7113                     if (GvIMPORTED_CV(gv))
7114                         ogv = gv;
7115                     else if (! CvMETHOD(cv))
7116                         hgv = gv;
7117                 }
7118                 if (!ogv &&
7119                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7120                                           len, FALSE)) &&
7121                     (gv = *gvp) && (
7122                         isGV_with_GP(gv)
7123                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
7124                             :   SvPCS_IMPORTED(gv)
7125                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7126                                          len, 0), 1)
7127                    ))
7128                 {
7129                     ogv = gv;
7130                 }
7131             }
7132             if (ogv) {
7133                 orig_keyword = tmp;
7134                 tmp = 0;                /* overridden by import or by GLOBAL */
7135             }
7136             else if (gv && !gvp
7137                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7138                      && GvCVu(gv))
7139             {
7140                 tmp = 0;                /* any sub overrides "weak" keyword */
7141             }
7142             else {                      /* no override */
7143                 tmp = -tmp;
7144                 if (tmp == KEY_dump) {
7145                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7146                                    "dump() better written as CORE::dump()");
7147                 }
7148                 gv = NULL;
7149                 gvp = 0;
7150                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7151                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7152                                    "Ambiguous call resolved as CORE::%s(), "
7153                                    "qualify as such or use &",
7154                                    GvENAME(hgv));
7155             }
7156         }
7157
7158         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7159          && (!anydelim || *s != '#')) {
7160             /* no override, and not s### either; skipspace is safe here
7161              * check for => on following line */
7162             bool arrow;
7163             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7164             STRLEN   soff = s         - SvPVX(PL_linestr);
7165             s = skipspace_flags(s, LEX_NO_INCLINE);
7166             arrow = *s == '=' && s[1] == '>';
7167             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7168             s         = SvPVX(PL_linestr) +   soff;
7169             if (arrow)
7170                 goto fat_arrow;
7171         }
7172
7173       reserved_word:
7174         switch (tmp) {
7175
7176         default:                        /* not a keyword */
7177             /* Trade off - by using this evil construction we can pull the
7178                variable gv into the block labelled keylookup. If not, then
7179                we have to give it function scope so that the goto from the
7180                earlier ':' case doesn't bypass the initialisation.  */
7181             if (0) {
7182             just_a_word_zero_gv:
7183                 sv = NULL;
7184                 cv = NULL;
7185                 gv = NULL;
7186                 gvp = NULL;
7187                 rv2cv_op = NULL;
7188                 orig_keyword = 0;
7189                 lex = 0;
7190                 off = 0;
7191             }
7192           just_a_word: {
7193                 int pkgname = 0;
7194                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7195                 const char penultchar =
7196                     lastchar && PL_bufptr - 2 >= PL_linestart
7197                          ? PL_bufptr[-2]
7198                          : 0;
7199 #ifdef PERL_MAD
7200                 SV *nextPL_nextwhite = 0;
7201 #endif
7202
7203
7204                 /* Get the rest if it looks like a package qualifier */
7205
7206                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7207                     STRLEN morelen;
7208                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7209                                   TRUE, &morelen);
7210                     if (!morelen)
7211                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7212                                 UTF8fARG(UTF, len, PL_tokenbuf),
7213                                 *s == '\'' ? "'" : "::");
7214                     len += morelen;
7215                     pkgname = 1;
7216                 }
7217
7218                 if (PL_expect == XOPERATOR) {
7219                     if (PL_bufptr == PL_linestart) {
7220                         CopLINE_dec(PL_curcop);
7221                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7222                         CopLINE_inc(PL_curcop);
7223                     }
7224                     else
7225                         no_op("Bareword",s);
7226                 }
7227
7228                 /* Look for a subroutine with this name in current package,
7229                    unless this is a lexical sub, or name is "Foo::",
7230                    in which case Foo is a bareword
7231                    (and a package name). */
7232
7233                 if (len > 2 && !PL_madskills &&
7234                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7235                 {
7236                     if (ckWARN(WARN_BAREWORD)
7237                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7238                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7239                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7240                            UTF8fARG(UTF, len, PL_tokenbuf));
7241                     len -= 2;
7242                     PL_tokenbuf[len] = '\0';
7243                     gv = NULL;
7244                     gvp = 0;
7245                 }
7246                 else {
7247                     if (!lex && !gv) {
7248                         /* Mustn't actually add anything to a symbol table.
7249                            But also don't want to "initialise" any placeholder
7250                            constants that might already be there into full
7251                            blown PVGVs with attached PVCV.  */
7252                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7253                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7254                                                SVt_PVCV);
7255                     }
7256                     len = 0;
7257                 }
7258
7259                 /* if we saw a global override before, get the right name */
7260
7261                 if (!sv)
7262                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7263                     len ? len : strlen(PL_tokenbuf));
7264                 if (gvp) {
7265                     SV * const tmp_sv = sv;
7266                     sv = newSVpvs("CORE::GLOBAL::");
7267                     sv_catsv(sv, tmp_sv);
7268                     SvREFCNT_dec(tmp_sv);
7269                 }
7270
7271 #ifdef PERL_MAD
7272                 if (PL_madskills && !PL_thistoken) {
7273                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7274                     PL_thistoken = newSVpvn(start,s - start);
7275                     PL_realtokenstart = s - SvPVX(PL_linestr);
7276                 }
7277 #endif
7278
7279                 /* Presume this is going to be a bareword of some sort. */
7280                 CLINE;
7281                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7282                 pl_yylval.opval->op_private = OPpCONST_BARE;
7283
7284                 /* And if "Foo::", then that's what it certainly is. */
7285                 if (len)
7286                     goto safe_bareword;
7287
7288                 if (!off)
7289                 {
7290                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7291                     const_op->op_private = OPpCONST_BARE;
7292                     rv2cv_op = newCVREF(0, const_op);
7293                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7294                 }
7295
7296                 /* See if it's the indirect object for a list operator. */
7297
7298                 if (PL_oldoldbufptr &&
7299                     PL_oldoldbufptr < PL_bufptr &&
7300                     (PL_oldoldbufptr == PL_last_lop
7301                      || PL_oldoldbufptr == PL_last_uni) &&
7302                     /* NO SKIPSPACE BEFORE HERE! */
7303                     (PL_expect == XREF ||
7304                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7305                 {
7306                     bool immediate_paren = *s == '(';
7307
7308                     /* (Now we can afford to cross potential line boundary.) */
7309                     s = SKIPSPACE2(s,nextPL_nextwhite);
7310 #ifdef PERL_MAD
7311                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7312 #endif
7313
7314                     /* Two barewords in a row may indicate method call. */
7315
7316                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7317                         (tmp = intuit_method(s, gv, cv))) {
7318                         op_free(rv2cv_op);
7319                         if (tmp == METHOD && !PL_lex_allbrackets &&
7320                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7321                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7322                         return REPORT(tmp);
7323                     }
7324
7325                     /* If not a declared subroutine, it's an indirect object. */
7326                     /* (But it's an indir obj regardless for sort.) */
7327                     /* Also, if "_" follows a filetest operator, it's a bareword */
7328
7329                     if (
7330                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7331                          (!cv &&
7332                         (PL_last_lop_op != OP_MAPSTART &&
7333                          PL_last_lop_op != OP_GREPSTART))))
7334                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7335                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7336                        )
7337                     {
7338                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7339                         goto bareword;
7340                     }
7341                 }
7342
7343                 PL_expect = XOPERATOR;
7344 #ifdef PERL_MAD
7345                 if (isSPACE(*s))
7346                     s = SKIPSPACE2(s,nextPL_nextwhite);
7347                 PL_nextwhite = nextPL_nextwhite;
7348 #else
7349                 s = skipspace(s);
7350 #endif
7351
7352                 /* Is this a word before a => operator? */
7353                 if (*s == '=' && s[1] == '>' && !pkgname) {
7354                     op_free(rv2cv_op);
7355                     CLINE;
7356                     /* This is our own scalar, created a few lines above,
7357                        so this is safe. */
7358                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7359                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7360                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7361                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7362                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7363                     TERM(WORD);
7364                 }
7365
7366                 /* If followed by a paren, it's certainly a subroutine. */
7367                 if (*s == '(') {
7368                     CLINE;
7369                     if (cv) {
7370                         d = s + 1;
7371                         while (SPACE_OR_TAB(*d))
7372                             d++;
7373                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7374                             s = d + 1;
7375                             goto its_constant;
7376                         }
7377                     }
7378 #ifdef PERL_MAD
7379                     if (PL_madskills) {
7380                         PL_nextwhite = PL_thiswhite;
7381                         PL_thiswhite = 0;
7382                     }
7383                     start_force(PL_curforce);
7384 #endif
7385                     NEXTVAL_NEXTTOKE.opval =
7386                         off ? rv2cv_op : pl_yylval.opval;
7387                     PL_expect = XOPERATOR;
7388 #ifdef PERL_MAD
7389                     if (PL_madskills) {
7390                         PL_nextwhite = nextPL_nextwhite;
7391                         curmad('X', PL_thistoken);
7392                         PL_thistoken = newSVpvs("");
7393                     }
7394 #endif
7395                     if (off)
7396                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7397                     else op_free(rv2cv_op),        force_next(WORD);
7398                     pl_yylval.ival = 0;
7399                     TOKEN('&');
7400                 }
7401
7402                 /* If followed by var or block, call it a method (unless sub) */
7403
7404                 if ((*s == '$' || *s == '{') && !cv) {
7405                     op_free(rv2cv_op);
7406                     PL_last_lop = PL_oldbufptr;
7407                     PL_last_lop_op = OP_METHOD;
7408                     if (!PL_lex_allbrackets &&
7409                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7410                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7411                     PREBLOCK(METHOD);
7412                 }
7413
7414                 /* If followed by a bareword, see if it looks like indir obj. */
7415
7416                 if (!orig_keyword
7417                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7418                         && (tmp = intuit_method(s, gv, cv))) {
7419                     op_free(rv2cv_op);
7420                     if (tmp == METHOD && !PL_lex_allbrackets &&
7421                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7422                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7423                     return REPORT(tmp);
7424                 }
7425
7426                 /* Not a method, so call it a subroutine (if defined) */
7427
7428                 if (cv) {
7429                     if (lastchar == '-' && penultchar != '-') {
7430                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7431                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7432                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7433                              UTF8fARG(UTF, l, PL_tokenbuf),
7434                              UTF8fARG(UTF, l, PL_tokenbuf));
7435                     }
7436                     /* Check for a constant sub */
7437                     if ((sv = cv_const_sv_or_av(cv))) {
7438                   its_constant:
7439                         op_free(rv2cv_op);
7440                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7441                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7442                         if (SvTYPE(sv) == SVt_PVAV)
7443                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7444                                                       pl_yylval.opval);
7445                         else {
7446                             pl_yylval.opval->op_private = 0;
7447                             pl_yylval.opval->op_folded = 1;
7448                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7449                         }
7450                         TOKEN(WORD);
7451                     }
7452
7453                     op_free(pl_yylval.opval);
7454                     pl_yylval.opval =
7455                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7456                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7457                     PL_last_lop = PL_oldbufptr;
7458                     PL_last_lop_op = OP_ENTERSUB;
7459                     /* Is there a prototype? */
7460                     if (
7461 #ifdef PERL_MAD
7462                         cv &&
7463 #endif
7464                         SvPOK(cv))
7465                     {
7466                         STRLEN protolen = CvPROTOLEN(cv);
7467                         const char *proto = CvPROTO(cv);
7468                         bool optional;
7469                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7470                         if (!protolen)
7471                             TERM(FUNC0SUB);
7472                         if ((optional = *proto == ';'))
7473                           do
7474                             proto++;
7475                           while (*proto == ';');
7476                         if (
7477                             (
7478                                 (
7479                                     *proto == '$' || *proto == '_'
7480                                  || *proto == '*' || *proto == '+'
7481                                 )
7482                              && proto[1] == '\0'
7483                             )
7484                          || (
7485                              *proto == '\\' && proto[1] && proto[2] == '\0'
7486                             )
7487                         )
7488                             UNIPROTO(UNIOPSUB,optional);
7489                         if (*proto == '\\' && proto[1] == '[') {
7490                             const char *p = proto + 2;
7491                             while(*p && *p != ']')
7492                                 ++p;
7493                             if(*p == ']' && !p[1])
7494                                 UNIPROTO(UNIOPSUB,optional);
7495                         }
7496                         if (*proto == '&' && *s == '{') {
7497                             if (PL_curstash)
7498                                 sv_setpvs(PL_subname, "__ANON__");
7499                             else
7500                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7501                             if (!PL_lex_allbrackets &&
7502                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7503                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7504                             PREBLOCK(LSTOPSUB);
7505                         }
7506                     }
7507 #ifdef PERL_MAD
7508                     {
7509                         if (PL_madskills) {
7510                             PL_nextwhite = PL_thiswhite;
7511                             PL_thiswhite = 0;
7512                         }
7513                         start_force(PL_curforce);
7514                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7515                         PL_expect = XTERM;
7516                         if (PL_madskills) {
7517                             PL_nextwhite = nextPL_nextwhite;
7518                             curmad('X', PL_thistoken);
7519                             PL_thistoken = newSVpvs("");
7520                         }
7521                         force_next(off ? PRIVATEREF : WORD);
7522                         if (!PL_lex_allbrackets &&
7523                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7524                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7525                         TOKEN(NOAMP);
7526                     }
7527                 }
7528
7529                 /* Guess harder when madskills require "best effort". */
7530                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7531                     int probable_sub = 0;
7532                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7533                         probable_sub = 1;
7534                     else if (isALPHA(*s)) {
7535                         char tmpbuf[1024];
7536                         STRLEN tmplen;
7537                         d = s;
7538                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7539                         if (!keyword(tmpbuf, tmplen, 0))
7540                             probable_sub = 1;
7541                         else {
7542                             while (d < PL_bufend && isSPACE(*d))
7543                                 d++;
7544                             if (*d == '=' && d[1] == '>')
7545                                 probable_sub = 1;
7546                         }
7547                     }
7548                     if (probable_sub) {
7549                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7550                                         SVt_PVCV);
7551                         op_free(pl_yylval.opval);
7552                         pl_yylval.opval =
7553                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7554                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7555                         PL_last_lop = PL_oldbufptr;
7556                         PL_last_lop_op = OP_ENTERSUB;
7557                         PL_nextwhite = PL_thiswhite;
7558                         PL_thiswhite = 0;
7559                         start_force(PL_curforce);
7560                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7561                         PL_expect = XTERM;
7562                         PL_nextwhite = nextPL_nextwhite;
7563                         curmad('X', PL_thistoken);
7564                         PL_thistoken = newSVpvs("");
7565                         force_next(off ? PRIVATEREF : WORD);
7566                         if (!PL_lex_allbrackets &&
7567                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7568                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7569                         TOKEN(NOAMP);
7570                     }
7571 #else
7572                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7573                     PL_expect = XTERM;
7574                     force_next(off ? PRIVATEREF : WORD);
7575                     if (!PL_lex_allbrackets &&
7576                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7577                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7578                     TOKEN(NOAMP);
7579 #endif
7580                 }
7581
7582                 /* Call it a bare word */
7583
7584                 if (PL_hints & HINT_STRICT_SUBS)
7585                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7586                 else {
7587                 bareword:
7588                     /* after "print" and similar functions (corresponding to
7589                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7590                      * a filehandle should be subject to "strict subs".
7591                      * Likewise for the optional indirect-object argument to system
7592                      * or exec, which can't be a bareword */
7593                     if ((PL_last_lop_op == OP_PRINT
7594                             || PL_last_lop_op == OP_PRTF
7595                             || PL_last_lop_op == OP_SAY
7596                             || PL_last_lop_op == OP_SYSTEM
7597                             || PL_last_lop_op == OP_EXEC)
7598                             && (PL_hints & HINT_STRICT_SUBS))
7599                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7600                     if (lastchar != '-') {
7601                         if (ckWARN(WARN_RESERVED)) {
7602                             d = PL_tokenbuf;
7603                             while (isLOWER(*d))
7604                                 d++;
7605                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7606                             {
7607                                 /* PL_warn_reserved is constant */
7608                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7609                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7610                                        PL_tokenbuf);
7611                                 GCC_DIAG_RESTORE;
7612                             }
7613                         }
7614                     }
7615                 }
7616                 op_free(rv2cv_op);
7617
7618             safe_bareword:
7619                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7620                  && saw_infix_sigil) {
7621                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7622                                      "Operator or semicolon missing before %c%"UTF8f,
7623                                      lastchar,
7624                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7625                                               PL_tokenbuf));
7626                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7627                                      "Ambiguous use of %c resolved as operator %c",
7628                                      lastchar, lastchar);
7629                 }
7630                 TOKEN(WORD);
7631             }
7632
7633         case KEY___FILE__:
7634             FUN0OP(
7635                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7636             );
7637
7638         case KEY___LINE__:
7639             FUN0OP(
7640                 (OP*)newSVOP(OP_CONST, 0,
7641                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7642             );
7643
7644         case KEY___PACKAGE__:
7645             FUN0OP(
7646                 (OP*)newSVOP(OP_CONST, 0,
7647                                         (PL_curstash
7648                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7649                                          : &PL_sv_undef))
7650             );
7651
7652         case KEY___DATA__:
7653         case KEY___END__: {
7654             GV *gv;
7655             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7656                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7657                                         ? PL_curstash
7658                                         : PL_defstash;
7659                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7660                 if (!isGV(gv))
7661                     gv_init(gv,stash,"DATA",4,0);
7662                 GvMULTI_on(gv);
7663                 if (!GvIO(gv))
7664                     GvIOp(gv) = newIO();
7665                 IoIFP(GvIOp(gv)) = PL_rsfp;
7666 #if defined(HAS_FCNTL) && defined(F_SETFD)
7667                 {
7668                     const int fd = PerlIO_fileno(PL_rsfp);
7669                     fcntl(fd,F_SETFD,fd >= 3);
7670                 }
7671 #endif
7672                 /* Mark this internal pseudo-handle as clean */
7673                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7674                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7675                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7676                 else
7677                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7678 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7679                 /* if the script was opened in binmode, we need to revert
7680                  * it to text mode for compatibility; but only iff it has CRs
7681                  * XXX this is a questionable hack at best. */
7682                 if (PL_bufend-PL_bufptr > 2
7683                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7684                 {
7685                     Off_t loc = 0;
7686                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7687                         loc = PerlIO_tell(PL_rsfp);
7688                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7689                     }
7690 #ifdef NETWARE
7691                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7692 #else
7693                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7694 #endif  /* NETWARE */
7695                         if (loc > 0)
7696                             PerlIO_seek(PL_rsfp, loc, 0);
7697                     }
7698                 }
7699 #endif
7700 #ifdef PERLIO_LAYERS
7701                 if (!IN_BYTES) {
7702                     if (UTF)
7703                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7704                     else if (PL_encoding) {
7705                         SV *name;
7706                         dSP;
7707                         ENTER;
7708                         SAVETMPS;
7709                         PUSHMARK(sp);
7710                         XPUSHs(PL_encoding);
7711                         PUTBACK;
7712                         call_method("name", G_SCALAR);
7713                         SPAGAIN;
7714                         name = POPs;
7715                         PUTBACK;
7716                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7717                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7718                                                       SVfARG(name)));
7719                         FREETMPS;
7720                         LEAVE;
7721                     }
7722                 }
7723 #endif
7724 #ifdef PERL_MAD
7725                 if (PL_madskills) {
7726                     if (PL_realtokenstart >= 0) {
7727                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7728                         if (!PL_endwhite)
7729                             PL_endwhite = newSVpvs("");
7730                         sv_catsv(PL_endwhite, PL_thiswhite);
7731                         PL_thiswhite = 0;
7732                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7733                         PL_realtokenstart = -1;
7734                     }
7735                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7736                            != NULL) ;
7737                 }
7738 #endif
7739                 PL_rsfp = NULL;
7740             }
7741             goto fake_eof;
7742         }
7743
7744         case KEY___SUB__:
7745             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7746
7747         case KEY_AUTOLOAD:
7748         case KEY_DESTROY:
7749         case KEY_BEGIN:
7750         case KEY_UNITCHECK:
7751         case KEY_CHECK:
7752         case KEY_INIT:
7753         case KEY_END:
7754             if (PL_expect == XSTATE) {
7755                 s = PL_bufptr;
7756                 goto really_sub;
7757             }
7758             goto just_a_word;
7759
7760         case_KEY_CORE:
7761             {
7762                 STRLEN olen = len;
7763                 d = s;
7764                 s += 2;
7765                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7766                 if ((*s == ':' && s[1] == ':')
7767                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7768                 {
7769                     s = d;
7770                     len = olen;
7771                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7772                     goto just_a_word;
7773                 }
7774                 if (!tmp)
7775                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7776                                       UTF8fARG(UTF, len, PL_tokenbuf));
7777                 if (tmp < 0)
7778                     tmp = -tmp;
7779                 else if (tmp == KEY_require || tmp == KEY_do
7780                       || tmp == KEY_glob)
7781                     /* that's a way to remember we saw "CORE::" */
7782                     orig_keyword = tmp;
7783                 goto reserved_word;
7784             }
7785
7786         case KEY_abs:
7787             UNI(OP_ABS);
7788
7789         case KEY_alarm:
7790             UNI(OP_ALARM);
7791
7792         case KEY_accept:
7793             LOP(OP_ACCEPT,XTERM);
7794
7795         case KEY_and:
7796             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7797                 return REPORT(0);
7798             OPERATOR(ANDOP);
7799
7800         case KEY_atan2:
7801             LOP(OP_ATAN2,XTERM);
7802
7803         case KEY_bind:
7804             LOP(OP_BIND,XTERM);
7805
7806         case KEY_binmode:
7807             LOP(OP_BINMODE,XTERM);
7808
7809         case KEY_bless:
7810             LOP(OP_BLESS,XTERM);
7811
7812         case KEY_break:
7813             FUN0(OP_BREAK);
7814
7815         case KEY_chop:
7816             UNI(OP_CHOP);
7817
7818         case KEY_continue:
7819                     /* We have to disambiguate the two senses of
7820                       "continue". If the next token is a '{' then
7821                       treat it as the start of a continue block;
7822                       otherwise treat it as a control operator.
7823                      */
7824                     s = skipspace(s);
7825                     if (*s == '{')
7826             PREBLOCK(CONTINUE);
7827                     else
7828                         FUN0(OP_CONTINUE);
7829
7830         case KEY_chdir:
7831             /* may use HOME */
7832             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7833             UNI(OP_CHDIR);
7834
7835         case KEY_close:
7836             UNI(OP_CLOSE);
7837
7838         case KEY_closedir:
7839             UNI(OP_CLOSEDIR);
7840
7841         case KEY_cmp:
7842             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7843                 return REPORT(0);
7844             Eop(OP_SCMP);
7845
7846         case KEY_caller:
7847             UNI(OP_CALLER);
7848
7849         case KEY_crypt:
7850 #ifdef FCRYPT
7851             if (!PL_cryptseen) {
7852                 PL_cryptseen = TRUE;
7853                 init_des();
7854             }
7855 #endif
7856             LOP(OP_CRYPT,XTERM);
7857
7858         case KEY_chmod:
7859             LOP(OP_CHMOD,XTERM);
7860
7861         case KEY_chown:
7862             LOP(OP_CHOWN,XTERM);
7863
7864         case KEY_connect:
7865             LOP(OP_CONNECT,XTERM);
7866
7867         case KEY_chr:
7868             UNI(OP_CHR);
7869
7870         case KEY_cos:
7871             UNI(OP_COS);
7872
7873         case KEY_chroot:
7874             UNI(OP_CHROOT);
7875
7876         case KEY_default:
7877             PREBLOCK(DEFAULT);
7878
7879         case KEY_do:
7880             s = SKIPSPACE1(s);
7881             if (*s == '{')
7882                 PRETERMBLOCK(DO);
7883             if (*s != '\'') {
7884                 *PL_tokenbuf = '&';
7885                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7886                               1, &len);
7887                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7888                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7889                     d = SKIPSPACE1(d);
7890                     if (*d == '(') {
7891                         force_ident_maybe_lex('&');
7892                         s = d;
7893                     }
7894                 }
7895             }
7896             if (orig_keyword == KEY_do) {
7897                 orig_keyword = 0;
7898                 pl_yylval.ival = 1;
7899             }
7900             else
7901                 pl_yylval.ival = 0;
7902             OPERATOR(DO);
7903
7904         case KEY_die:
7905             PL_hints |= HINT_BLOCK_SCOPE;
7906             LOP(OP_DIE,XTERM);
7907
7908         case KEY_defined:
7909             UNI(OP_DEFINED);
7910
7911         case KEY_delete:
7912             UNI(OP_DELETE);
7913
7914         case KEY_dbmopen:
7915             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7916                               STR_WITH_LEN("NDBM_File::"),
7917                               STR_WITH_LEN("DB_File::"),
7918                               STR_WITH_LEN("GDBM_File::"),
7919                               STR_WITH_LEN("SDBM_File::"),
7920                               STR_WITH_LEN("ODBM_File::"),
7921                               NULL);
7922             LOP(OP_DBMOPEN,XTERM);
7923
7924         case KEY_dbmclose:
7925             UNI(OP_DBMCLOSE);
7926
7927         case KEY_dump:
7928             PL_expect = XOPERATOR;
7929             s = force_word(s,WORD,TRUE,FALSE);
7930             LOOPX(OP_DUMP);
7931
7932         case KEY_else:
7933             PREBLOCK(ELSE);
7934
7935         case KEY_elsif:
7936             pl_yylval.ival = CopLINE(PL_curcop);
7937             OPERATOR(ELSIF);
7938
7939         case KEY_eq:
7940             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7941                 return REPORT(0);
7942             Eop(OP_SEQ);
7943
7944         case KEY_exists:
7945             UNI(OP_EXISTS);
7946
7947         case KEY_exit:
7948             if (PL_madskills)
7949                 UNI(OP_INT);
7950             UNI(OP_EXIT);
7951
7952         case KEY_eval:
7953             s = SKIPSPACE1(s);
7954             if (*s == '{') { /* block eval */
7955                 PL_expect = XTERMBLOCK;
7956                 UNIBRACK(OP_ENTERTRY);
7957             }
7958             else { /* string eval */
7959                 PL_expect = XTERM;
7960                 UNIBRACK(OP_ENTEREVAL);
7961             }
7962
7963         case KEY_evalbytes:
7964             PL_expect = XTERM;
7965             UNIBRACK(-OP_ENTEREVAL);
7966
7967         case KEY_eof:
7968             UNI(OP_EOF);
7969
7970         case KEY_exp:
7971             UNI(OP_EXP);
7972
7973         case KEY_each:
7974             UNI(OP_EACH);
7975
7976         case KEY_exec:
7977             LOP(OP_EXEC,XREF);
7978
7979         case KEY_endhostent:
7980             FUN0(OP_EHOSTENT);
7981
7982         case KEY_endnetent:
7983             FUN0(OP_ENETENT);
7984
7985         case KEY_endservent:
7986             FUN0(OP_ESERVENT);
7987
7988         case KEY_endprotoent:
7989             FUN0(OP_EPROTOENT);
7990
7991         case KEY_endpwent:
7992             FUN0(OP_EPWENT);
7993
7994         case KEY_endgrent:
7995             FUN0(OP_EGRENT);
7996
7997         case KEY_for:
7998         case KEY_foreach:
7999             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8000                 return REPORT(0);
8001             pl_yylval.ival = CopLINE(PL_curcop);
8002             s = SKIPSPACE1(s);
8003             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8004                 char *p = s;
8005 #ifdef PERL_MAD
8006                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8007 #endif
8008
8009                 if ((PL_bufend - p) >= 3 &&
8010                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8011                     p += 2;
8012                 else if ((PL_bufend - p) >= 4 &&
8013                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8014                     p += 3;
8015                 p = PEEKSPACE(p);
8016                 /* skip optional package name, as in "for my abc $x (..)" */
8017                 if (isIDFIRST_lazy_if(p,UTF)) {
8018                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8019                     p = PEEKSPACE(p);
8020                 }
8021                 if (*p != '$')
8022                     Perl_croak(aTHX_ "Missing $ on loop variable");
8023 #ifdef PERL_MAD
8024                 s = SvPVX(PL_linestr) + soff;
8025 #endif
8026             }
8027             OPERATOR(FOR);
8028
8029         case KEY_formline:
8030             LOP(OP_FORMLINE,XTERM);
8031
8032         case KEY_fork:
8033             FUN0(OP_FORK);
8034
8035         case KEY_fc:
8036             UNI(OP_FC);
8037
8038         case KEY_fcntl:
8039             LOP(OP_FCNTL,XTERM);
8040
8041         case KEY_fileno:
8042             UNI(OP_FILENO);
8043
8044         case KEY_flock:
8045             LOP(OP_FLOCK,XTERM);
8046
8047         case KEY_gt:
8048             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8049                 return REPORT(0);
8050             Rop(OP_SGT);
8051
8052         case KEY_ge:
8053             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8054                 return REPORT(0);
8055             Rop(OP_SGE);
8056
8057         case KEY_grep:
8058             LOP(OP_GREPSTART, XREF);
8059
8060         case KEY_goto:
8061             PL_expect = XOPERATOR;
8062             s = force_word(s,WORD,TRUE,FALSE);
8063             LOOPX(OP_GOTO);
8064
8065         case KEY_gmtime:
8066             UNI(OP_GMTIME);
8067
8068         case KEY_getc:
8069             UNIDOR(OP_GETC);
8070
8071         case KEY_getppid:
8072             FUN0(OP_GETPPID);
8073
8074         case KEY_getpgrp:
8075             UNI(OP_GETPGRP);
8076
8077         case KEY_getpriority:
8078             LOP(OP_GETPRIORITY,XTERM);
8079
8080         case KEY_getprotobyname:
8081             UNI(OP_GPBYNAME);
8082
8083         case KEY_getprotobynumber:
8084             LOP(OP_GPBYNUMBER,XTERM);
8085
8086         case KEY_getprotoent:
8087             FUN0(OP_GPROTOENT);
8088
8089         case KEY_getpwent:
8090             FUN0(OP_GPWENT);
8091
8092         case KEY_getpwnam:
8093             UNI(OP_GPWNAM);
8094
8095         case KEY_getpwuid:
8096             UNI(OP_GPWUID);
8097
8098         case KEY_getpeername:
8099             UNI(OP_GETPEERNAME);
8100
8101         case KEY_gethostbyname:
8102             UNI(OP_GHBYNAME);
8103
8104         case KEY_gethostbyaddr:
8105             LOP(OP_GHBYADDR,XTERM);
8106
8107         case KEY_gethostent:
8108             FUN0(OP_GHOSTENT);
8109
8110         case KEY_getnetbyname:
8111             UNI(OP_GNBYNAME);
8112
8113         case KEY_getnetbyaddr:
8114             LOP(OP_GNBYADDR,XTERM);
8115
8116         case KEY_getnetent:
8117             FUN0(OP_GNETENT);
8118
8119         case KEY_getservbyname:
8120             LOP(OP_GSBYNAME,XTERM);
8121
8122         case KEY_getservbyport:
8123             LOP(OP_GSBYPORT,XTERM);
8124
8125         case KEY_getservent:
8126             FUN0(OP_GSERVENT);
8127
8128         case KEY_getsockname:
8129             UNI(OP_GETSOCKNAME);
8130
8131         case KEY_getsockopt:
8132             LOP(OP_GSOCKOPT,XTERM);
8133
8134         case KEY_getgrent:
8135             FUN0(OP_GGRENT);
8136
8137         case KEY_getgrnam:
8138             UNI(OP_GGRNAM);
8139
8140         case KEY_getgrgid:
8141             UNI(OP_GGRGID);
8142
8143         case KEY_getlogin:
8144             FUN0(OP_GETLOGIN);
8145
8146         case KEY_given:
8147             pl_yylval.ival = CopLINE(PL_curcop);
8148             Perl_ck_warner_d(aTHX_
8149                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8150                 "given is experimental");
8151             OPERATOR(GIVEN);
8152
8153         case KEY_glob:
8154             LOP(
8155              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8156              XTERM
8157             );
8158
8159         case KEY_hex:
8160             UNI(OP_HEX);
8161
8162         case KEY_if:
8163             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8164                 return REPORT(0);
8165             pl_yylval.ival = CopLINE(PL_curcop);
8166             OPERATOR(IF);
8167
8168         case KEY_index:
8169             LOP(OP_INDEX,XTERM);
8170
8171         case KEY_int:
8172             UNI(OP_INT);
8173
8174         case KEY_ioctl:
8175             LOP(OP_IOCTL,XTERM);
8176
8177         case KEY_join:
8178             LOP(OP_JOIN,XTERM);
8179
8180         case KEY_keys:
8181             UNI(OP_KEYS);
8182
8183         case KEY_kill:
8184             LOP(OP_KILL,XTERM);
8185
8186         case KEY_last:
8187             PL_expect = XOPERATOR;
8188             s = force_word(s,WORD,TRUE,FALSE);
8189             LOOPX(OP_LAST);
8190
8191         case KEY_lc:
8192             UNI(OP_LC);
8193
8194         case KEY_lcfirst:
8195             UNI(OP_LCFIRST);
8196
8197         case KEY_local:
8198             pl_yylval.ival = 0;
8199             OPERATOR(LOCAL);
8200
8201         case KEY_length:
8202             UNI(OP_LENGTH);
8203
8204         case KEY_lt:
8205             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8206                 return REPORT(0);
8207             Rop(OP_SLT);
8208
8209         case KEY_le:
8210             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8211                 return REPORT(0);
8212             Rop(OP_SLE);
8213
8214         case KEY_localtime:
8215             UNI(OP_LOCALTIME);
8216
8217         case KEY_log:
8218             UNI(OP_LOG);
8219
8220         case KEY_link:
8221             LOP(OP_LINK,XTERM);
8222
8223         case KEY_listen:
8224             LOP(OP_LISTEN,XTERM);
8225
8226         case KEY_lock:
8227             UNI(OP_LOCK);
8228
8229         case KEY_lstat:
8230             UNI(OP_LSTAT);
8231
8232         case KEY_m:
8233             s = scan_pat(s,OP_MATCH);
8234             TERM(sublex_start());
8235
8236         case KEY_map:
8237             LOP(OP_MAPSTART, XREF);
8238
8239         case KEY_mkdir:
8240             LOP(OP_MKDIR,XTERM);
8241
8242         case KEY_msgctl:
8243             LOP(OP_MSGCTL,XTERM);
8244
8245         case KEY_msgget:
8246             LOP(OP_MSGGET,XTERM);
8247
8248         case KEY_msgrcv:
8249             LOP(OP_MSGRCV,XTERM);
8250
8251         case KEY_msgsnd:
8252             LOP(OP_MSGSND,XTERM);
8253
8254         case KEY_our:
8255         case KEY_my:
8256         case KEY_state:
8257             PL_in_my = (U16)tmp;
8258             s = SKIPSPACE1(s);
8259             if (isIDFIRST_lazy_if(s,UTF)) {
8260 #ifdef PERL_MAD
8261                 char* start = s;
8262 #endif
8263                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8264                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8265                 {
8266                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8267                         Perl_croak(aTHX_
8268                                   "Experimental \"%s\" subs not enabled",
8269                                    tmp == KEY_my    ? "my"    :
8270                                    tmp == KEY_state ? "state" : "our");
8271                     Perl_ck_warner_d(aTHX_
8272                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8273                         "The lexical_subs feature is experimental");
8274                     goto really_sub;
8275                 }
8276                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8277                 if (!PL_in_my_stash) {
8278                     char tmpbuf[1024];
8279                     PL_bufptr = s;
8280                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8281                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8282                 }
8283 #ifdef PERL_MAD
8284                 if (PL_madskills) {     /* just add type to declarator token */
8285                     sv_catsv(PL_thistoken, PL_nextwhite);
8286                     PL_nextwhite = 0;
8287                     sv_catpvn(PL_thistoken, start, s - start);
8288                 }
8289 #endif
8290             }
8291             pl_yylval.ival = 1;
8292             OPERATOR(MY);
8293
8294         case KEY_next:
8295             PL_expect = XOPERATOR;
8296             s = force_word(s,WORD,TRUE,FALSE);
8297             LOOPX(OP_NEXT);
8298
8299         case KEY_ne:
8300             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8301                 return REPORT(0);
8302             Eop(OP_SNE);
8303
8304         case KEY_no:
8305             s = tokenize_use(0, s);
8306             TERM(USE);
8307
8308         case KEY_not:
8309             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8310                 FUN1(OP_NOT);
8311             else {
8312                 if (!PL_lex_allbrackets &&
8313                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8314                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8315                 OPERATOR(NOTOP);
8316             }
8317
8318         case KEY_open:
8319             s = SKIPSPACE1(s);
8320             if (isIDFIRST_lazy_if(s,UTF)) {
8321           const char *t;
8322           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8323               &len);
8324                 for (t=d; isSPACE(*t);)
8325                     t++;
8326                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8327                     /* [perl #16184] */
8328                     && !(t[0] == '=' && t[1] == '>')
8329                     && !(t[0] == ':' && t[1] == ':')
8330                     && !keyword(s, d-s, 0)
8331                 ) {
8332                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8333                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8334                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8335                 }
8336             }
8337             LOP(OP_OPEN,XTERM);
8338
8339         case KEY_or:
8340             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8341                 return REPORT(0);
8342             pl_yylval.ival = OP_OR;
8343             OPERATOR(OROP);
8344
8345         case KEY_ord:
8346             UNI(OP_ORD);
8347
8348         case KEY_oct:
8349             UNI(OP_OCT);
8350
8351         case KEY_opendir:
8352             LOP(OP_OPEN_DIR,XTERM);
8353
8354         case KEY_print:
8355             checkcomma(s,PL_tokenbuf,"filehandle");
8356             LOP(OP_PRINT,XREF);
8357
8358         case KEY_printf:
8359             checkcomma(s,PL_tokenbuf,"filehandle");
8360             LOP(OP_PRTF,XREF);
8361
8362         case KEY_prototype:
8363             UNI(OP_PROTOTYPE);
8364
8365         case KEY_push:
8366             LOP(OP_PUSH,XTERM);
8367
8368         case KEY_pop:
8369             UNIDOR(OP_POP);
8370
8371         case KEY_pos:
8372             UNIDOR(OP_POS);
8373
8374         case KEY_pack:
8375             LOP(OP_PACK,XTERM);
8376
8377         case KEY_package:
8378             s = force_word(s,WORD,FALSE,TRUE);
8379             s = SKIPSPACE1(s);
8380             s = force_strict_version(s);
8381             PL_lex_expect = XBLOCK;
8382             OPERATOR(PACKAGE);
8383
8384         case KEY_pipe:
8385             LOP(OP_PIPE_OP,XTERM);
8386
8387         case KEY_q:
8388             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8389             if (!s)
8390                 missingterm(NULL);
8391             COPLINE_SET_FROM_MULTI_END;
8392             pl_yylval.ival = OP_CONST;
8393             TERM(sublex_start());
8394
8395         case KEY_quotemeta:
8396             UNI(OP_QUOTEMETA);
8397
8398         case KEY_qw: {
8399             OP *words = NULL;
8400             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8401             if (!s)
8402                 missingterm(NULL);
8403             COPLINE_SET_FROM_MULTI_END;
8404             PL_expect = XOPERATOR;
8405             if (SvCUR(PL_lex_stuff)) {
8406                 int warned_comma = !ckWARN(WARN_QW);
8407                 int warned_comment = warned_comma;
8408                 d = SvPV_force(PL_lex_stuff, len);
8409                 while (len) {
8410                     for (; isSPACE(*d) && len; --len, ++d)
8411                         /**/;
8412                     if (len) {
8413                         SV *sv;
8414                         const char *b = d;
8415                         if (!warned_comma || !warned_comment) {
8416                             for (; !isSPACE(*d) && len; --len, ++d) {
8417                                 if (!warned_comma && *d == ',') {
8418                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8419                                         "Possible attempt to separate words with commas");
8420                                     ++warned_comma;
8421                                 }
8422                                 else if (!warned_comment && *d == '#') {
8423                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8424                                         "Possible attempt to put comments in qw() list");
8425                                     ++warned_comment;
8426                                 }
8427                             }
8428                         }
8429                         else {
8430                             for (; !isSPACE(*d) && len; --len, ++d)
8431                                 /**/;
8432                         }
8433                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8434                         words = op_append_elem(OP_LIST, words,
8435                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8436                     }
8437                 }
8438             }
8439             if (!words)
8440                 words = newNULLLIST();
8441             if (PL_lex_stuff) {
8442                 SvREFCNT_dec(PL_lex_stuff);
8443                 PL_lex_stuff = NULL;
8444             }
8445             PL_expect = XOPERATOR;
8446             pl_yylval.opval = sawparens(words);
8447             TOKEN(QWLIST);
8448         }
8449
8450         case KEY_qq:
8451             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8452             if (!s)
8453                 missingterm(NULL);
8454             pl_yylval.ival = OP_STRINGIFY;
8455             if (SvIVX(PL_lex_stuff) == '\'')
8456                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8457             TERM(sublex_start());
8458
8459         case KEY_qr:
8460             s = scan_pat(s,OP_QR);
8461             TERM(sublex_start());
8462
8463         case KEY_qx:
8464             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8465             if (!s)
8466                 missingterm(NULL);
8467             pl_yylval.ival = OP_BACKTICK;
8468             TERM(sublex_start());
8469
8470         case KEY_return:
8471             OLDLOP(OP_RETURN);
8472
8473         case KEY_require:
8474             s = SKIPSPACE1(s);
8475             PL_expect = XOPERATOR;
8476             if (isDIGIT(*s)) {
8477                 s = force_version(s, FALSE);
8478             }
8479             else if (*s != 'v' || !isDIGIT(s[1])
8480                     || (s = force_version(s, TRUE), *s == 'v'))
8481             {
8482                 *PL_tokenbuf = '\0';
8483                 s = force_word(s,WORD,TRUE,TRUE);
8484                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8485                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8486                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8487                 else if (*s == '<')
8488                     yyerror("<> should be quotes");
8489             }
8490             if (orig_keyword == KEY_require) {
8491                 orig_keyword = 0;
8492                 pl_yylval.ival = 1;
8493             }
8494             else
8495                 pl_yylval.ival = 0;
8496             PL_expect = XTERM;
8497             PL_bufptr = s;
8498             PL_last_uni = PL_oldbufptr;
8499             PL_last_lop_op = OP_REQUIRE;
8500             s = skipspace(s);
8501             return REPORT( (int)REQUIRE );
8502
8503         case KEY_reset:
8504             UNI(OP_RESET);
8505
8506         case KEY_redo:
8507             PL_expect = XOPERATOR;
8508             s = force_word(s,WORD,TRUE,FALSE);
8509             LOOPX(OP_REDO);
8510
8511         case KEY_rename:
8512             LOP(OP_RENAME,XTERM);
8513
8514         case KEY_rand:
8515             UNI(OP_RAND);
8516
8517         case KEY_rmdir:
8518             UNI(OP_RMDIR);
8519
8520         case KEY_rindex:
8521             LOP(OP_RINDEX,XTERM);
8522
8523         case KEY_read:
8524             LOP(OP_READ,XTERM);
8525
8526         case KEY_readdir:
8527             UNI(OP_READDIR);
8528
8529         case KEY_readline:
8530             UNIDOR(OP_READLINE);
8531
8532         case KEY_readpipe:
8533             UNIDOR(OP_BACKTICK);
8534
8535         case KEY_rewinddir:
8536             UNI(OP_REWINDDIR);
8537
8538         case KEY_recv:
8539             LOP(OP_RECV,XTERM);
8540
8541         case KEY_reverse:
8542             LOP(OP_REVERSE,XTERM);
8543
8544         case KEY_readlink:
8545             UNIDOR(OP_READLINK);
8546
8547         case KEY_ref:
8548             UNI(OP_REF);
8549
8550         case KEY_s:
8551             s = scan_subst(s);
8552             if (pl_yylval.opval)
8553                 TERM(sublex_start());
8554             else
8555                 TOKEN(1);       /* force error */
8556
8557         case KEY_say:
8558             checkcomma(s,PL_tokenbuf,"filehandle");
8559             LOP(OP_SAY,XREF);
8560
8561         case KEY_chomp:
8562             UNI(OP_CHOMP);
8563
8564         case KEY_scalar:
8565             UNI(OP_SCALAR);
8566
8567         case KEY_select:
8568             LOP(OP_SELECT,XTERM);
8569
8570         case KEY_seek:
8571             LOP(OP_SEEK,XTERM);
8572
8573         case KEY_semctl:
8574             LOP(OP_SEMCTL,XTERM);
8575
8576         case KEY_semget:
8577             LOP(OP_SEMGET,XTERM);
8578
8579         case KEY_semop:
8580             LOP(OP_SEMOP,XTERM);
8581
8582         case KEY_send:
8583             LOP(OP_SEND,XTERM);
8584
8585         case KEY_setpgrp:
8586             LOP(OP_SETPGRP,XTERM);
8587
8588         case KEY_setpriority:
8589             LOP(OP_SETPRIORITY,XTERM);
8590
8591         case KEY_sethostent:
8592             UNI(OP_SHOSTENT);
8593
8594         case KEY_setnetent:
8595             UNI(OP_SNETENT);
8596
8597         case KEY_setservent:
8598             UNI(OP_SSERVENT);
8599
8600         case KEY_setprotoent:
8601             UNI(OP_SPROTOENT);
8602
8603         case KEY_setpwent:
8604             FUN0(OP_SPWENT);
8605
8606         case KEY_setgrent:
8607             FUN0(OP_SGRENT);
8608
8609         case KEY_seekdir:
8610             LOP(OP_SEEKDIR,XTERM);
8611
8612         case KEY_setsockopt:
8613             LOP(OP_SSOCKOPT,XTERM);
8614
8615         case KEY_shift:
8616             UNIDOR(OP_SHIFT);
8617
8618         case KEY_shmctl:
8619             LOP(OP_SHMCTL,XTERM);
8620
8621         case KEY_shmget:
8622             LOP(OP_SHMGET,XTERM);
8623
8624         case KEY_shmread:
8625             LOP(OP_SHMREAD,XTERM);
8626
8627         case KEY_shmwrite:
8628             LOP(OP_SHMWRITE,XTERM);
8629
8630         case KEY_shutdown:
8631             LOP(OP_SHUTDOWN,XTERM);
8632
8633         case KEY_sin:
8634             UNI(OP_SIN);
8635
8636         case KEY_sleep:
8637             UNI(OP_SLEEP);
8638
8639         case KEY_socket:
8640             LOP(OP_SOCKET,XTERM);
8641
8642         case KEY_socketpair:
8643             LOP(OP_SOCKPAIR,XTERM);
8644
8645         case KEY_sort:
8646             checkcomma(s,PL_tokenbuf,"subroutine name");
8647             s = SKIPSPACE1(s);
8648             PL_expect = XTERM;
8649             s = force_word(s,WORD,TRUE,TRUE);
8650             LOP(OP_SORT,XREF);
8651
8652         case KEY_split:
8653             LOP(OP_SPLIT,XTERM);
8654
8655         case KEY_sprintf:
8656             LOP(OP_SPRINTF,XTERM);
8657
8658         case KEY_splice:
8659             LOP(OP_SPLICE,XTERM);
8660
8661         case KEY_sqrt:
8662             UNI(OP_SQRT);
8663
8664         case KEY_srand:
8665             UNI(OP_SRAND);
8666
8667         case KEY_stat:
8668             UNI(OP_STAT);
8669
8670         case KEY_study:
8671             UNI(OP_STUDY);
8672
8673         case KEY_substr:
8674             LOP(OP_SUBSTR,XTERM);
8675
8676         case KEY_format:
8677         case KEY_sub:
8678           really_sub:
8679             {
8680                 char * const tmpbuf = PL_tokenbuf + 1;
8681                 expectation attrful;
8682                 bool have_name, have_proto;
8683                 const int key = tmp;
8684 #ifndef PERL_MAD
8685                 SV *format_name = NULL;
8686 #endif
8687
8688 #ifdef PERL_MAD
8689                 SV *tmpwhite = 0;
8690
8691                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8692                 SV *subtoken = PL_madskills
8693                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8694                    : NULL;
8695                 PL_thistoken = 0;
8696
8697                 d = s;
8698                 s = SKIPSPACE2(s,tmpwhite);
8699 #else
8700                 d = s;
8701                 s = skipspace(s);
8702 #endif
8703
8704                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8705                     (*s == ':' && s[1] == ':'))
8706                 {
8707 #ifdef PERL_MAD
8708                     SV *nametoke = NULL;
8709 #endif
8710
8711                     PL_expect = XBLOCK;
8712                     attrful = XATTRBLOCK;
8713                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8714                                   &len);
8715 #ifdef PERL_MAD
8716                     if (PL_madskills)
8717                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8718 #else
8719                     if (key == KEY_format)
8720                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8721 #endif
8722                     *PL_tokenbuf = '&';
8723                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8724                      || pad_findmy_pvn(
8725                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8726                         ) != NOT_IN_PAD)
8727                         sv_setpvn(PL_subname, tmpbuf, len);
8728                     else {
8729                         sv_setsv(PL_subname,PL_curstname);
8730                         sv_catpvs(PL_subname,"::");
8731                         sv_catpvn(PL_subname,tmpbuf,len);
8732                     }
8733                     if (SvUTF8(PL_linestr))
8734                         SvUTF8_on(PL_subname);
8735                     have_name = TRUE;
8736
8737
8738 #ifdef PERL_MAD
8739                     start_force(0);
8740                     CURMAD('X', nametoke);
8741                     CURMAD('_', tmpwhite);
8742                     force_ident_maybe_lex('&');
8743
8744                     s = SKIPSPACE2(d,tmpwhite);
8745 #else
8746                     s = skipspace(d);
8747 #endif
8748                 }
8749                 else {
8750                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8751                     {
8752                         *d = '\0';
8753                         /* diag_listed_as: Missing name in "%s sub" */
8754                         Perl_croak(aTHX_
8755                                   "Missing name in \"%s\"", PL_bufptr);
8756                     }
8757                     PL_expect = XTERMBLOCK;
8758                     attrful = XATTRTERM;
8759                     sv_setpvs(PL_subname,"?");
8760                     have_name = FALSE;
8761                 }
8762
8763                 if (key == KEY_format) {
8764 #ifdef PERL_MAD
8765                     PL_thistoken = subtoken;
8766                     s = d;
8767 #else
8768                     if (format_name) {
8769                         start_force(PL_curforce);
8770                         NEXTVAL_NEXTTOKE.opval
8771                             = (OP*)newSVOP(OP_CONST,0, format_name);
8772                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8773                         force_next(WORD);
8774                     }
8775 #endif
8776                     PREBLOCK(FORMAT);
8777                 }
8778
8779                 /* Look for a prototype */
8780                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8781                     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8782                     COPLINE_SET_FROM_MULTI_END;
8783                     if (!s)
8784                         Perl_croak(aTHX_ "Prototype not terminated");
8785                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8786                     have_proto = TRUE;
8787
8788 #ifdef PERL_MAD
8789                     start_force(0);
8790                     CURMAD('q', PL_thisopen);
8791                     CURMAD('_', tmpwhite);
8792                     CURMAD('=', PL_thisstuff);
8793                     CURMAD('Q', PL_thisclose);
8794                     NEXTVAL_NEXTTOKE.opval =
8795                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8796                     PL_lex_stuff = NULL;
8797                     force_next(THING);
8798
8799                     s = SKIPSPACE2(s,tmpwhite);
8800 #else
8801                     s = skipspace(s);
8802 #endif
8803                 }
8804                 else
8805                     have_proto = FALSE;
8806
8807                 if (*s == ':' && s[1] != ':')
8808                     PL_expect = attrful;
8809                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8810                     if (!have_name)
8811                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8812                     else if (*s != ';' && *s != '}')
8813                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8814                 }
8815
8816 #ifdef PERL_MAD
8817                 start_force(0);
8818                 if (tmpwhite) {
8819                     if (PL_madskills)
8820                         curmad('^', newSVpvs(""));
8821                     CURMAD('_', tmpwhite);
8822                 }
8823                 force_next(0);
8824
8825                 PL_thistoken = subtoken;
8826                 PERL_UNUSED_VAR(have_proto);
8827 #else
8828                 if (have_proto) {
8829                     NEXTVAL_NEXTTOKE.opval =
8830                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8831                     PL_lex_stuff = NULL;
8832                     force_next(THING);
8833                 }
8834 #endif
8835                 if (!have_name) {
8836                     if (PL_curstash)
8837                         sv_setpvs(PL_subname, "__ANON__");
8838                     else
8839                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8840                     TOKEN(ANONSUB);
8841                 }
8842 #ifndef PERL_MAD
8843                 force_ident_maybe_lex('&');
8844 #endif
8845                 TOKEN(SUB);
8846             }
8847
8848         case KEY_system:
8849             LOP(OP_SYSTEM,XREF);
8850
8851         case KEY_symlink:
8852             LOP(OP_SYMLINK,XTERM);
8853
8854         case KEY_syscall:
8855             LOP(OP_SYSCALL,XTERM);
8856
8857         case KEY_sysopen:
8858             LOP(OP_SYSOPEN,XTERM);
8859
8860         case KEY_sysseek:
8861             LOP(OP_SYSSEEK,XTERM);
8862
8863         case KEY_sysread:
8864             LOP(OP_SYSREAD,XTERM);
8865
8866         case KEY_syswrite:
8867             LOP(OP_SYSWRITE,XTERM);
8868
8869         case KEY_tr:
8870         case KEY_y:
8871             s = scan_trans(s);
8872             TERM(sublex_start());
8873
8874         case KEY_tell:
8875             UNI(OP_TELL);
8876
8877         case KEY_telldir:
8878             UNI(OP_TELLDIR);
8879
8880         case KEY_tie:
8881             LOP(OP_TIE,XTERM);
8882
8883         case KEY_tied:
8884             UNI(OP_TIED);
8885
8886         case KEY_time:
8887             FUN0(OP_TIME);
8888
8889         case KEY_times:
8890             FUN0(OP_TMS);
8891
8892         case KEY_truncate:
8893             LOP(OP_TRUNCATE,XTERM);
8894
8895         case KEY_uc:
8896             UNI(OP_UC);
8897
8898         case KEY_ucfirst:
8899             UNI(OP_UCFIRST);
8900
8901         case KEY_untie:
8902             UNI(OP_UNTIE);
8903
8904         case KEY_until:
8905             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8906                 return REPORT(0);
8907             pl_yylval.ival = CopLINE(PL_curcop);
8908             OPERATOR(UNTIL);
8909
8910         case KEY_unless:
8911             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8912                 return REPORT(0);
8913             pl_yylval.ival = CopLINE(PL_curcop);
8914             OPERATOR(UNLESS);
8915
8916         case KEY_unlink:
8917             LOP(OP_UNLINK,XTERM);
8918
8919         case KEY_undef:
8920             UNIDOR(OP_UNDEF);
8921
8922         case KEY_unpack:
8923             LOP(OP_UNPACK,XTERM);
8924
8925         case KEY_utime:
8926             LOP(OP_UTIME,XTERM);
8927
8928         case KEY_umask:
8929             UNIDOR(OP_UMASK);
8930
8931         case KEY_unshift:
8932             LOP(OP_UNSHIFT,XTERM);
8933
8934         case KEY_use:
8935             s = tokenize_use(1, s);
8936             OPERATOR(USE);
8937
8938         case KEY_values:
8939             UNI(OP_VALUES);
8940
8941         case KEY_vec:
8942             LOP(OP_VEC,XTERM);
8943
8944         case KEY_when:
8945             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8946                 return REPORT(0);
8947             pl_yylval.ival = CopLINE(PL_curcop);
8948             Perl_ck_warner_d(aTHX_
8949                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8950                 "when is experimental");
8951             OPERATOR(WHEN);
8952
8953         case KEY_while:
8954             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8955                 return REPORT(0);
8956             pl_yylval.ival = CopLINE(PL_curcop);
8957             OPERATOR(WHILE);
8958
8959         case KEY_warn:
8960             PL_hints |= HINT_BLOCK_SCOPE;
8961             LOP(OP_WARN,XTERM);
8962
8963         case KEY_wait:
8964             FUN0(OP_WAIT);
8965
8966         case KEY_waitpid:
8967             LOP(OP_WAITPID,XTERM);
8968
8969         case KEY_wantarray:
8970             FUN0(OP_WANTARRAY);
8971
8972         case KEY_write:
8973             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8974              * we use the same number on EBCDIC */
8975             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8976             UNI(OP_ENTERWRITE);
8977
8978         case KEY_x:
8979             if (PL_expect == XOPERATOR) {
8980                 if (*s == '=' && !PL_lex_allbrackets &&
8981                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8982                     return REPORT(0);
8983                 Mop(OP_REPEAT);
8984             }
8985             check_uni();
8986             goto just_a_word;
8987
8988         case KEY_xor:
8989             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8990                 return REPORT(0);
8991             pl_yylval.ival = OP_XOR;
8992             OPERATOR(OROP);
8993         }
8994     }}
8995 }
8996
8997 /*
8998   S_pending_ident
8999
9000   Looks up an identifier in the pad or in a package
9001
9002   Returns:
9003     PRIVATEREF if this is a lexical name.
9004     WORD       if this belongs to a package.
9005
9006   Structure:
9007       if we're in a my declaration
9008           croak if they tried to say my($foo::bar)
9009           build the ops for a my() declaration
9010       if it's an access to a my() variable
9011           build ops for access to a my() variable
9012       if in a dq string, and they've said @foo and we can't find @foo
9013           warn
9014       build ops for a bareword
9015 */
9016
9017 static int
9018 S_pending_ident(pTHX)
9019 {
9020     dVAR;
9021     PADOFFSET tmp = 0;
9022     const char pit = (char)pl_yylval.ival;
9023     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9024     /* All routes through this function want to know if there is a colon.  */
9025     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9026
9027     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9028           "### Pending identifier '%s'\n", PL_tokenbuf); });
9029
9030     /* if we're in a my(), we can't allow dynamics here.
9031        $foo'bar has already been turned into $foo::bar, so
9032        just check for colons.
9033
9034        if it's a legal name, the OP is a PADANY.
9035     */
9036     if (PL_in_my) {
9037         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9038             if (has_colon)
9039                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9040                                   "variable %s in \"our\"",
9041                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9042             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9043         }
9044         else {
9045             if (has_colon) {
9046                 /* PL_no_myglob is constant */
9047                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9048                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9049                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9050                             UTF ? SVf_UTF8 : 0);
9051                 GCC_DIAG_RESTORE;
9052             }
9053
9054             pl_yylval.opval = newOP(OP_PADANY, 0);
9055             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9056                                                         UTF ? SVf_UTF8 : 0);
9057             return PRIVATEREF;
9058         }
9059     }
9060
9061     /*
9062        build the ops for accesses to a my() variable.
9063     */
9064
9065     if (!has_colon) {
9066         if (!PL_in_my)
9067             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9068                                     UTF ? SVf_UTF8 : 0);
9069         if (tmp != NOT_IN_PAD) {
9070             /* might be an "our" variable" */
9071             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9072                 /* build ops for a bareword */
9073                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9074                 HEK * const stashname = HvNAME_HEK(stash);
9075                 SV *  const sym = newSVhek(stashname);
9076                 sv_catpvs(sym, "::");
9077                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9078                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9079                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9080                 if (pit != '&')
9081                   gv_fetchsv(sym,
9082                     (PL_in_eval
9083                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9084                         : GV_ADDMULTI
9085                     ),
9086                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9087                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9088                      : SVt_PVHV));
9089                 return WORD;
9090             }
9091
9092             pl_yylval.opval = newOP(OP_PADANY, 0);
9093             pl_yylval.opval->op_targ = tmp;
9094             return PRIVATEREF;
9095         }
9096     }
9097
9098     /*
9099        Whine if they've said @foo in a doublequoted string,
9100        and @foo isn't a variable we can find in the symbol
9101        table.
9102     */
9103     if (ckWARN(WARN_AMBIGUOUS) &&
9104         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9105         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9106                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9107         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9108                 /* DO NOT warn for @- and @+ */
9109                 && !( PL_tokenbuf[2] == '\0' &&
9110                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9111            )
9112         {
9113             /* Downgraded from fatal to warning 20000522 mjd */
9114             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9115                         "Possible unintended interpolation of %"UTF8f
9116                         " in string",
9117                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9118         }
9119     }
9120
9121     /* build ops for a bareword */
9122     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9123                                    newSVpvn_flags(PL_tokenbuf + 1,
9124                                                       tokenbuf_len - 1,
9125                                                       UTF ? SVf_UTF8 : 0 ));
9126     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9127     if (pit != '&')
9128         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9129                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9130                      | ( UTF ? SVf_UTF8 : 0 ),
9131                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9132                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9133                       : SVt_PVHV));
9134     return WORD;
9135 }
9136
9137 STATIC void
9138 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9139 {
9140     dVAR;
9141
9142     PERL_ARGS_ASSERT_CHECKCOMMA;
9143
9144     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9145         if (ckWARN(WARN_SYNTAX)) {
9146             int level = 1;
9147             const char *w;
9148             for (w = s+2; *w && level; w++) {
9149                 if (*w == '(')
9150                     ++level;
9151                 else if (*w == ')')
9152                     --level;
9153             }
9154             while (isSPACE(*w))
9155                 ++w;
9156             /* the list of chars below is for end of statements or
9157              * block / parens, boolean operators (&&, ||, //) and branch
9158              * constructs (or, and, if, until, unless, while, err, for).
9159              * Not a very solid hack... */
9160             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9161                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9162                             "%s (...) interpreted as function",name);
9163         }
9164     }
9165     while (s < PL_bufend && isSPACE(*s))
9166         s++;
9167     if (*s == '(')
9168         s++;
9169     while (s < PL_bufend && isSPACE(*s))
9170         s++;
9171     if (isIDFIRST_lazy_if(s,UTF)) {
9172         const char * const w = s;
9173         s += UTF ? UTF8SKIP(s) : 1;
9174         while (isWORDCHAR_lazy_if(s,UTF))
9175             s += UTF ? UTF8SKIP(s) : 1;
9176         while (s < PL_bufend && isSPACE(*s))
9177             s++;
9178         if (*s == ',') {
9179             GV* gv;
9180             if (keyword(w, s - w, 0))
9181                 return;
9182
9183             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9184             if (gv && GvCVu(gv))
9185                 return;
9186             Perl_croak(aTHX_ "No comma allowed after %s", what);
9187         }
9188     }
9189 }
9190
9191 /* S_new_constant(): do any overload::constant lookup.
9192
9193    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9194    Best used as sv=new_constant(..., sv, ...).
9195    If s, pv are NULL, calls subroutine with one argument,
9196    and <type> is used with error messages only.
9197    <type> is assumed to be well formed UTF-8 */
9198
9199 STATIC SV *
9200 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9201                SV *sv, SV *pv, const char *type, STRLEN typelen)
9202 {
9203     dVAR; dSP;
9204     HV * table = GvHV(PL_hintgv);                /* ^H */
9205     SV *res;
9206     SV *errsv = NULL;
9207     SV **cvp;
9208     SV *cv, *typesv;
9209     const char *why1 = "", *why2 = "", *why3 = "";
9210
9211     PERL_ARGS_ASSERT_NEW_CONSTANT;
9212     /* We assume that this is true: */
9213     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9214     assert(type || s);
9215
9216     /* charnames doesn't work well if there have been errors found */
9217     if (PL_error_count > 0 && *key == 'c')
9218     {
9219         SvREFCNT_dec_NN(sv);
9220         return &PL_sv_undef;
9221     }
9222
9223     sv_2mortal(sv);                     /* Parent created it permanently */
9224     if (!table
9225         || ! (PL_hints & HINT_LOCALIZE_HH)
9226         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9227         || ! SvOK(*cvp))
9228     {
9229         char *msg;
9230
9231         /* Here haven't found what we're looking for.  If it is charnames,
9232          * perhaps it needs to be loaded.  Try doing that before giving up */
9233         if (*key == 'c') {
9234             Perl_load_module(aTHX_
9235                             0,
9236                             newSVpvs("_charnames"),
9237                              /* version parameter; no need to specify it, as if
9238                               * we get too early a version, will fail anyway,
9239                               * not being able to find '_charnames' */
9240                             NULL,
9241                             newSVpvs(":full"),
9242                             newSVpvs(":short"),
9243                             NULL);
9244             assert(sp == PL_stack_sp);
9245             table = GvHV(PL_hintgv);
9246             if (table
9247                 && (PL_hints & HINT_LOCALIZE_HH)
9248                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9249                 && SvOK(*cvp))
9250             {
9251                 goto now_ok;
9252             }
9253         }
9254         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9255             msg = Perl_form(aTHX_
9256                                "Constant(%.*s) unknown",
9257                                 (int)(type ? typelen : len),
9258                                 (type ? type: s));
9259         }
9260         else {
9261             why1 = "$^H{";
9262             why2 = key;
9263             why3 = "} is not defined";
9264         report:
9265             if (*key == 'c') {
9266                 msg = Perl_form(aTHX_
9267                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9268                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9269                       );
9270             }
9271             else {
9272                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9273                                     (int)(type ? typelen : len),
9274                                     (type ? type: s), why1, why2, why3);
9275             }
9276         }
9277         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9278         return SvREFCNT_inc_simple_NN(sv);
9279     }
9280 now_ok:
9281     cv = *cvp;
9282     if (!pv && s)
9283         pv = newSVpvn_flags(s, len, SVs_TEMP);
9284     if (type && pv)
9285         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9286     else
9287         typesv = &PL_sv_undef;
9288
9289     PUSHSTACKi(PERLSI_OVERLOAD);
9290     ENTER ;
9291     SAVETMPS;
9292
9293     PUSHMARK(SP) ;
9294     EXTEND(sp, 3);
9295     if (pv)
9296         PUSHs(pv);
9297     PUSHs(sv);
9298     if (pv)
9299         PUSHs(typesv);
9300     PUTBACK;
9301     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9302
9303     SPAGAIN ;
9304
9305     /* Check the eval first */
9306     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9307         STRLEN errlen;
9308         const char * errstr;
9309         sv_catpvs(errsv, "Propagated");
9310         errstr = SvPV_const(errsv, errlen);
9311         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9312         (void)POPs;
9313         res = SvREFCNT_inc_simple_NN(sv);
9314     }
9315     else {
9316         res = POPs;
9317         SvREFCNT_inc_simple_void_NN(res);
9318     }
9319
9320     PUTBACK ;
9321     FREETMPS ;
9322     LEAVE ;
9323     POPSTACK;
9324
9325     if (!SvOK(res)) {
9326         why1 = "Call to &{$^H{";
9327         why2 = key;
9328         why3 = "}} did not return a defined value";
9329         sv = res;
9330         (void)sv_2mortal(sv);
9331         goto report;
9332     }
9333
9334     return res;
9335 }
9336
9337 PERL_STATIC_INLINE void
9338 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9339     dVAR;
9340     PERL_ARGS_ASSERT_PARSE_IDENT;
9341
9342     for (;;) {
9343         if (*d >= e)
9344             Perl_croak(aTHX_ "%s", ident_too_long);
9345         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9346              /* The UTF-8 case must come first, otherwise things
9347              * like c\N{COMBINING TILDE} would start failing, as the
9348              * isWORDCHAR_A case below would gobble the 'c' up.
9349              */
9350
9351             char *t = *s + UTF8SKIP(*s);
9352             while (isIDCONT_utf8((U8*)t))
9353                 t += UTF8SKIP(t);
9354             if (*d + (t - *s) > e)
9355                 Perl_croak(aTHX_ "%s", ident_too_long);
9356             Copy(*s, *d, t - *s, char);
9357             *d += t - *s;
9358             *s = t;
9359         }
9360         else if ( isWORDCHAR_A(**s) ) {
9361             do {
9362                 *(*d)++ = *(*s)++;
9363             } while (isWORDCHAR_A(**s) && *d < e);
9364         }
9365         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9366             *(*d)++ = ':';
9367             *(*d)++ = ':';
9368             (*s)++;
9369         }
9370         else if (allow_package && **s == ':' && (*s)[1] == ':'
9371            /* Disallow things like Foo::$bar. For the curious, this is
9372             * the code path that triggers the "Bad name after" warning
9373             * when looking for barewords.
9374             */
9375            && (*s)[2] != '$') {
9376             *(*d)++ = *(*s)++;
9377             *(*d)++ = *(*s)++;
9378         }
9379         else
9380             break;
9381     }
9382     return;
9383 }
9384
9385 /* Returns a NUL terminated string, with the length of the string written to
9386    *slp
9387    */
9388 STATIC char *
9389 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9390 {
9391     dVAR;
9392     char *d = dest;
9393     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9394     bool is_utf8 = cBOOL(UTF);
9395
9396     PERL_ARGS_ASSERT_SCAN_WORD;
9397
9398     parse_ident(&s, &d, e, allow_package, is_utf8);
9399     *d = '\0';
9400     *slp = d - dest;
9401     return s;
9402 }
9403
9404 STATIC char *
9405 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9406 {
9407     dVAR;
9408     I32 herelines = PL_parser->herelines;
9409     SSize_t bracket = -1;
9410     char funny = *s++;
9411     char *d = dest;
9412     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9413     bool is_utf8 = cBOOL(UTF);
9414     I32 orig_copline = 0, tmp_copline = 0;
9415
9416     PERL_ARGS_ASSERT_SCAN_IDENT;
9417
9418     if (isSPACE(*s))
9419         s = PEEKSPACE(s);
9420     if (isDIGIT(*s)) {
9421         while (isDIGIT(*s)) {
9422             if (d >= e)
9423                 Perl_croak(aTHX_ "%s", ident_too_long);
9424             *d++ = *s++;
9425         }
9426     }
9427     else {
9428         parse_ident(&s, &d, e, 1, is_utf8);
9429     }
9430     *d = '\0';
9431     d = dest;
9432     if (*d) {
9433         /* Either a digit variable, or parse_ident() found an identifier
9434            (anything valid as a bareword), so job done and return.  */
9435         if (PL_lex_state != LEX_NORMAL)
9436             PL_lex_state = LEX_INTERPENDMAYBE;
9437         return s;
9438     }
9439     if (*s == '$' && s[1] &&
9440       (isIDFIRST_lazy_if(s+1,is_utf8)
9441          || isDIGIT_A((U8)s[1])
9442          || s[1] == '$'
9443          || s[1] == '{'
9444          || strnEQ(s+1,"::",2)) )
9445     {
9446         /* Dereferencing a value in a scalar variable.
9447            The alternatives are different syntaxes for a scalar variable.
9448            Using ' as a leading package separator isn't allowed. :: is.   */
9449         return s;
9450     }
9451     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9452     if (*s == '{') {
9453         bracket = s - SvPVX(PL_linestr);
9454         s++;
9455         orig_copline = CopLINE(PL_curcop);
9456         if (s < PL_bufend && isSPACE(*s)) {
9457             s = PEEKSPACE(s);
9458         }
9459     }
9460
9461 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9462  * iff Unicode semantics are to be used.  The legal ones are any of:
9463  *  a) ASCII digits
9464  *  b) ASCII punctuation
9465  *  c) When not under Unicode rules, any upper Latin1 character
9466  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9467  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
9468  *     the \s ones. */
9469 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
9470                                    || isDIGIT_A((U8)(d))                    \
9471                                    || (!(u) && !isASCII((U8)(d)))           \
9472                                    || ((((U8)(d)) < 32)                     \
9473                                        && (((((U8)(d)) >= 14)               \
9474                                            || (((U8)(d)) <= 8 && (d) != 0) \
9475                                            || (((U8)(d)) == 13))))          \
9476                                    || (((U8)(d)) == toCTRL('?')))
9477     if (s < PL_bufend
9478         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9479     {
9480         if ( isCNTRL_A((U8)*s) ) {
9481             deprecate("literal control characters in variable names");
9482         }
9483
9484         if (is_utf8) {
9485             const STRLEN skip = UTF8SKIP(s);
9486             STRLEN i;
9487             d[skip] = '\0';
9488             for ( i = 0; i < skip; i++ )
9489                 d[i] = *s++;
9490         }
9491         else {
9492             *d = *s++;
9493             d[1] = '\0';
9494         }
9495     }
9496     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9497     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9498         *d = toCTRL(*s);
9499         s++;
9500     }
9501     /* Warn about ambiguous code after unary operators if {...} notation isn't
9502        used.  There's no difference in ambiguity; it's merely a heuristic
9503        about when not to warn.  */
9504     else if (ck_uni && bracket == -1)
9505         check_uni();
9506     if (bracket != -1) {
9507         /* If we were processing {...} notation then...  */
9508         if (isIDFIRST_lazy_if(d,is_utf8)) {
9509             /* if it starts as a valid identifier, assume that it is one.
9510                (the later check for } being at the expected point will trap
9511                cases where this doesn't pan out.)  */
9512         d += is_utf8 ? UTF8SKIP(d) : 1;
9513         parse_ident(&s, &d, e, 1, is_utf8);
9514             *d = '\0';
9515             tmp_copline = CopLINE(PL_curcop);
9516             if (s < PL_bufend && isSPACE(*s)) {
9517                 s = PEEKSPACE(s);
9518             }
9519             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9520                 /* ${foo[0]} and ${foo{bar}} notation.  */
9521                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9522                     const char * const brack =
9523                         (const char *)
9524                         ((*s == '[') ? "[...]" : "{...}");
9525                     orig_copline = CopLINE(PL_curcop);
9526                     CopLINE_set(PL_curcop, tmp_copline);
9527    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9528                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9529                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9530                         funny, dest, brack, funny, dest, brack);
9531                     CopLINE_set(PL_curcop, orig_copline);
9532                 }
9533                 bracket++;
9534                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9535                 PL_lex_allbrackets++;
9536                 return s;
9537             }
9538         }
9539         /* Handle extended ${^Foo} variables
9540          * 1999-02-27 mjd-perl-patch@plover.com */
9541         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9542                  && isWORDCHAR(*s))
9543         {
9544             d++;
9545             while (isWORDCHAR(*s) && d < e) {
9546                 *d++ = *s++;
9547             }
9548             if (d >= e)
9549                 Perl_croak(aTHX_ "%s", ident_too_long);
9550             *d = '\0';
9551         }
9552
9553         if ( !tmp_copline )
9554             tmp_copline = CopLINE(PL_curcop);
9555         if (s < PL_bufend && isSPACE(*s)) {
9556             s = PEEKSPACE(s);
9557         }
9558
9559         /* Expect to find a closing } after consuming any trailing whitespace.
9560          */
9561         if (*s == '}') {
9562             s++;
9563             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9564                 PL_lex_state = LEX_INTERPEND;
9565                 PL_expect = XREF;
9566             }
9567             if (PL_lex_state == LEX_NORMAL) {
9568                 if (ckWARN(WARN_AMBIGUOUS) &&
9569                     (keyword(dest, d - dest, 0)
9570                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9571                 {
9572                     SV *tmp = newSVpvn_flags( dest, d - dest,
9573                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9574                     if (funny == '#')
9575                         funny = '@';
9576                     orig_copline = CopLINE(PL_curcop);
9577                     CopLINE_set(PL_curcop, tmp_copline);
9578                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9579                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9580                         funny, tmp, funny, tmp);
9581                     CopLINE_set(PL_curcop, orig_copline);
9582                 }
9583             }
9584         }
9585         else {
9586             /* Didn't find the closing } at the point we expected, so restore
9587                state such that the next thing to process is the opening { and */
9588             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9589             CopLINE_set(PL_curcop, orig_copline);
9590             PL_parser->herelines = herelines;
9591             *dest = '\0';
9592         }
9593     }
9594     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9595         PL_lex_state = LEX_INTERPEND;
9596     return s;
9597 }
9598
9599 static bool
9600 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9601
9602     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9603      * the parse starting at 's', based on the subset that are valid in this
9604      * context input to this routine in 'valid_flags'. Advances s.  Returns
9605      * TRUE if the input should be treated as a valid flag, so the next char
9606      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9607      * first call on the current regex.  This routine will set it to any
9608      * charset modifier found.  The caller shouldn't change it.  This way,
9609      * another charset modifier encountered in the parse can be detected as an
9610      * error, as we have decided to allow only one */
9611
9612     const char c = **s;
9613     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9614
9615     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9616         if (isWORDCHAR_lazy_if(*s, UTF)) {
9617             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9618                        UTF ? SVf_UTF8 : 0);
9619             (*s) += charlen;
9620             /* Pretend that it worked, so will continue processing before
9621              * dieing */
9622             return TRUE;
9623         }
9624         return FALSE;
9625     }
9626
9627     switch (c) {
9628
9629         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9630         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9631         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9632         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9633         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9634         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9635         case LOCALE_PAT_MOD:
9636             if (*charset) {
9637                 goto multiple_charsets;
9638             }
9639             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9640             *charset = c;
9641             break;
9642         case UNICODE_PAT_MOD:
9643             if (*charset) {
9644                 goto multiple_charsets;
9645             }
9646             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9647             *charset = c;
9648             break;
9649         case ASCII_RESTRICT_PAT_MOD:
9650             if (! *charset) {
9651                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9652             }
9653             else {
9654
9655                 /* Error if previous modifier wasn't an 'a', but if it was, see
9656                  * if, and accept, a second occurrence (only) */
9657                 if (*charset != 'a'
9658                     || get_regex_charset(*pmfl)
9659                         != REGEX_ASCII_RESTRICTED_CHARSET)
9660                 {
9661                         goto multiple_charsets;
9662                 }
9663                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9664             }
9665             *charset = c;
9666             break;
9667         case DEPENDS_PAT_MOD:
9668             if (*charset) {
9669                 goto multiple_charsets;
9670             }
9671             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9672             *charset = c;
9673             break;
9674     }
9675
9676     (*s)++;
9677     return TRUE;
9678
9679     multiple_charsets:
9680         if (*charset != c) {
9681             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9682         }
9683         else if (c == 'a') {
9684   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9685             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9686         }
9687         else {
9688             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9689         }
9690
9691         /* Pretend that it worked, so will continue processing before dieing */
9692         (*s)++;
9693         return TRUE;
9694 }
9695
9696 STATIC char *
9697 S_scan_pat(pTHX_ char *start, I32 type)
9698 {
9699     dVAR;
9700     PMOP *pm;
9701     char *s;
9702     const char * const valid_flags =
9703         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9704     char charset = '\0';    /* character set modifier */
9705 #ifdef PERL_MAD
9706     char *modstart;
9707 #endif
9708
9709     PERL_ARGS_ASSERT_SCAN_PAT;
9710
9711     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9712                        TRUE /* look for escaped bracketed metas */, NULL);
9713
9714     if (!s) {
9715         const char * const delimiter = skipspace(start);
9716         Perl_croak(aTHX_
9717                    (const char *)
9718                    (*delimiter == '?'
9719                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9720                     : "Search pattern not terminated" ));
9721     }
9722
9723     pm = (PMOP*)newPMOP(type, 0);
9724     if (PL_multi_open == '?') {
9725         /* This is the only point in the code that sets PMf_ONCE:  */
9726         pm->op_pmflags |= PMf_ONCE;
9727
9728         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9729            allows us to restrict the list needed by reset to just the ??
9730            matches.  */
9731         assert(type != OP_TRANS);
9732         if (PL_curstash) {
9733             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9734             U32 elements;
9735             if (!mg) {
9736                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9737                                  0);
9738             }
9739             elements = mg->mg_len / sizeof(PMOP**);
9740             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9741             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9742             mg->mg_len = elements * sizeof(PMOP**);
9743             PmopSTASH_set(pm,PL_curstash);
9744         }
9745     }
9746 #ifdef PERL_MAD
9747     modstart = s;
9748 #endif
9749
9750     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9751      * anon CV. False positives like qr/[(?{]/ are harmless */
9752
9753     if (type == OP_QR) {
9754         STRLEN len;
9755         char *e, *p = SvPV(PL_lex_stuff, len);
9756         e = p + len;
9757         for (; p < e; p++) {
9758             if (p[0] == '(' && p[1] == '?'
9759                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9760             {
9761                 pm->op_pmflags |= PMf_HAS_CV;
9762                 break;
9763             }
9764         }
9765         pm->op_pmflags |= PMf_IS_QR;
9766     }
9767
9768     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9769 #ifdef PERL_MAD
9770     if (PL_madskills && modstart != s) {
9771         SV* tmptoken = newSVpvn(modstart, s - modstart);
9772         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9773     }
9774 #endif
9775     /* issue a warning if /c is specified,but /g is not */
9776     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9777     {
9778         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9779                        "Use of /c modifier is meaningless without /g" );
9780     }
9781
9782     PL_lex_op = (OP*)pm;
9783     pl_yylval.ival = OP_MATCH;
9784     return s;
9785 }
9786
9787 STATIC char *
9788 S_scan_subst(pTHX_ char *start)
9789 {
9790     dVAR;
9791     char *s;
9792     PMOP *pm;
9793     I32 first_start;
9794     line_t first_line;
9795     I32 es = 0;
9796     char charset = '\0';    /* character set modifier */
9797 #ifdef PERL_MAD
9798     char *modstart;
9799 #endif
9800     char *t;
9801
9802     PERL_ARGS_ASSERT_SCAN_SUBST;
9803
9804     pl_yylval.ival = OP_NULL;
9805
9806     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9807                  TRUE /* look for escaped bracketed metas */, &t);
9808
9809     if (!s)
9810         Perl_croak(aTHX_ "Substitution pattern not terminated");
9811
9812     s = t;
9813 #ifdef PERL_MAD
9814     if (PL_madskills) {
9815         CURMAD('q', PL_thisopen);
9816         CURMAD('_', PL_thiswhite);
9817         CURMAD('E', PL_thisstuff);
9818         CURMAD('Q', PL_thisclose);
9819         PL_realtokenstart = s - SvPVX(PL_linestr);
9820     }
9821 #endif
9822
9823     first_start = PL_multi_start;
9824     first_line = CopLINE(PL_curcop);
9825     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9826     if (!s) {
9827         if (PL_lex_stuff) {
9828             SvREFCNT_dec(PL_lex_stuff);
9829             PL_lex_stuff = NULL;
9830         }
9831         Perl_croak(aTHX_ "Substitution replacement not terminated");
9832     }
9833     PL_multi_start = first_start;       /* so whole substitution is taken together */
9834
9835     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9836
9837 #ifdef PERL_MAD
9838     if (PL_madskills) {
9839         CURMAD('z', PL_thisopen);
9840         CURMAD('R', PL_thisstuff);
9841         CURMAD('Z', PL_thisclose);
9842     }
9843     modstart = s;
9844 #endif
9845
9846     while (*s) {
9847         if (*s == EXEC_PAT_MOD) {
9848             s++;
9849             es++;
9850         }
9851         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9852         {
9853             break;
9854         }
9855     }
9856
9857 #ifdef PERL_MAD
9858     if (PL_madskills) {
9859         if (modstart != s)
9860             curmad('m', newSVpvn(modstart, s - modstart));
9861         append_madprops(PL_thismad, (OP*)pm, 0);
9862         PL_thismad = 0;
9863     }
9864 #endif
9865     if ((pm->op_pmflags & PMf_CONTINUE)) {
9866         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9867     }
9868
9869     if (es) {
9870         SV * const repl = newSVpvs("");
9871
9872         PL_multi_end = 0;
9873         pm->op_pmflags |= PMf_EVAL;
9874         while (es-- > 0) {
9875             if (es)
9876                 sv_catpvs(repl, "eval ");
9877             else
9878                 sv_catpvs(repl, "do ");
9879         }
9880         sv_catpvs(repl, "{");
9881         sv_catsv(repl, PL_sublex_info.repl);
9882         sv_catpvs(repl, "}");
9883         SvEVALED_on(repl);
9884         SvREFCNT_dec(PL_sublex_info.repl);
9885         PL_sublex_info.repl = repl;
9886     }
9887     if (CopLINE(PL_curcop) != first_line) {
9888         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9889         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9890             CopLINE(PL_curcop) - first_line;
9891         CopLINE_set(PL_curcop, first_line);
9892     }
9893
9894     PL_lex_op = (OP*)pm;
9895     pl_yylval.ival = OP_SUBST;
9896     return s;
9897 }
9898
9899 STATIC char *
9900 S_scan_trans(pTHX_ char *start)
9901 {
9902     dVAR;
9903     char* s;
9904     OP *o;
9905     U8 squash;
9906     U8 del;
9907     U8 complement;
9908     bool nondestruct = 0;
9909 #ifdef PERL_MAD
9910     char *modstart;
9911 #endif
9912     char *t;
9913
9914     PERL_ARGS_ASSERT_SCAN_TRANS;
9915
9916     pl_yylval.ival = OP_NULL;
9917
9918     s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9919     if (!s)
9920         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9921
9922     s = t;
9923 #ifdef PERL_MAD
9924     if (PL_madskills) {
9925         CURMAD('q', PL_thisopen);
9926         CURMAD('_', PL_thiswhite);
9927         CURMAD('E', PL_thisstuff);
9928         CURMAD('Q', PL_thisclose);
9929         PL_realtokenstart = s - SvPVX(PL_linestr);
9930     }
9931 #endif
9932
9933     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9934     if (!s) {
9935         if (PL_lex_stuff) {
9936             SvREFCNT_dec(PL_lex_stuff);
9937             PL_lex_stuff = NULL;
9938         }
9939         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9940     }
9941     if (PL_madskills) {
9942         CURMAD('z', PL_thisopen);
9943         CURMAD('R', PL_thisstuff);
9944         CURMAD('Z', PL_thisclose);
9945     }
9946
9947     complement = del = squash = 0;
9948 #ifdef PERL_MAD
9949     modstart = s;
9950 #endif
9951     while (1) {
9952         switch (*s) {
9953         case 'c':
9954             complement = OPpTRANS_COMPLEMENT;
9955             break;
9956         case 'd':
9957             del = OPpTRANS_DELETE;
9958             break;
9959         case 's':
9960             squash = OPpTRANS_SQUASH;
9961             break;
9962         case 'r':
9963             nondestruct = 1;
9964             break;
9965         default:
9966             goto no_more;
9967         }
9968         s++;
9969     }
9970   no_more:
9971
9972     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9973     o->op_private &= ~OPpTRANS_ALL;
9974     o->op_private |= del|squash|complement|
9975       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9976       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9977
9978     PL_lex_op = o;
9979     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9980
9981 #ifdef PERL_MAD
9982     if (PL_madskills) {
9983         if (modstart != s)
9984             curmad('m', newSVpvn(modstart, s - modstart));
9985         append_madprops(PL_thismad, o, 0);
9986         PL_thismad = 0;
9987     }
9988 #endif
9989
9990     return s;
9991 }
9992
9993 /* scan_heredoc
9994    Takes a pointer to the first < in <<FOO.
9995    Returns a pointer to the byte following <<FOO.
9996
9997    This function scans a heredoc, which involves different methods
9998    depending on whether we are in a string eval, quoted construct, etc.
9999    This is because PL_linestr could containing a single line of input, or
10000    a whole string being evalled, or the contents of the current quote-
10001    like operator.
10002
10003    The two basic methods are:
10004     - Steal lines from the input stream
10005     - Scan the heredoc in PL_linestr and remove it therefrom
10006
10007    In a file scope or filtered eval, the first method is used; in a
10008    string eval, the second.
10009
10010    In a quote-like operator, we have to choose between the two,
10011    depending on where we can find a newline.  We peek into outer lex-
10012    ing scopes until we find one with a newline in it.  If we reach the
10013    outermost lexing scope and it is a file, we use the stream method.
10014    Otherwise it is treated as an eval.
10015 */
10016
10017 STATIC char *
10018 S_scan_heredoc(pTHX_ char *s)
10019 {
10020     dVAR;
10021     I32 op_type = OP_SCALAR;
10022     I32 len;
10023     SV *tmpstr;
10024     char term;
10025     char *d;
10026     char *e;
10027     char *peek;
10028     const bool infile = PL_rsfp || PL_parser->filtered;
10029     const line_t origline = CopLINE(PL_curcop);
10030     LEXSHARED *shared = PL_parser->lex_shared;
10031 #ifdef PERL_MAD
10032     I32 stuffstart = s - SvPVX(PL_linestr);
10033     char *tstart;
10034
10035     PL_realtokenstart = -1;
10036 #endif
10037
10038     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10039
10040     s += 2;
10041     d = PL_tokenbuf + 1;
10042     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10043     *PL_tokenbuf = '\n';
10044     peek = s;
10045     while (SPACE_OR_TAB(*peek))
10046         peek++;
10047     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10048         s = peek;
10049         term = *s++;
10050         s = delimcpy(d, e, s, PL_bufend, term, &len);
10051         if (s == PL_bufend)
10052             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10053         d += len;
10054         s++;
10055     }
10056     else {
10057         if (*s == '\\')
10058             /* <<\FOO is equivalent to <<'FOO' */
10059             s++, term = '\'';
10060         else
10061             term = '"';
10062         if (!isWORDCHAR_lazy_if(s,UTF))
10063             deprecate("bare << to mean <<\"\"");
10064         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10065             if (d < e)
10066                 *d++ = *s;
10067         }
10068     }
10069     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10070         Perl_croak(aTHX_ "Delimiter for here document is too long");
10071     *d++ = '\n';
10072     *d = '\0';
10073     len = d - PL_tokenbuf;
10074
10075 #ifdef PERL_MAD
10076     if (PL_madskills) {
10077         tstart = PL_tokenbuf + 1;
10078         PL_thisclose = newSVpvn(tstart, len - 1);
10079         tstart = SvPVX(PL_linestr) + stuffstart;
10080         PL_thisopen = newSVpvn(tstart, s - tstart);
10081         stuffstart = s - SvPVX(PL_linestr);
10082     }
10083 #endif
10084 #ifndef PERL_STRICT_CR
10085     d = strchr(s, '\r');
10086     if (d) {
10087         char * const olds = s;
10088         s = d;
10089         while (s < PL_bufend) {
10090             if (*s == '\r') {
10091                 *d++ = '\n';
10092                 if (*++s == '\n')
10093                     s++;
10094             }
10095             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10096                 *d++ = *s++;
10097                 s++;
10098             }
10099             else
10100                 *d++ = *s++;
10101         }
10102         *d = '\0';
10103         PL_bufend = d;
10104         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10105         s = olds;
10106     }
10107 #endif
10108 #ifdef PERL_MAD
10109     if (PL_madskills) {
10110         tstart = SvPVX(PL_linestr) + stuffstart;
10111         if (PL_thisstuff)
10112             sv_catpvn(PL_thisstuff, tstart, s - tstart);
10113         else
10114             PL_thisstuff = newSVpvn(tstart, s - tstart);
10115     }
10116
10117     stuffstart = s - SvPVX(PL_linestr);
10118 #endif
10119
10120     tmpstr = newSV_type(SVt_PVIV);
10121     SvGROW(tmpstr, 80);
10122     if (term == '\'') {
10123         op_type = OP_CONST;
10124         SvIV_set(tmpstr, -1);
10125     }
10126     else if (term == '`') {
10127         op_type = OP_BACKTICK;
10128         SvIV_set(tmpstr, '\\');
10129     }
10130
10131     PL_multi_start = origline + 1 + PL_parser->herelines;
10132     PL_multi_open = PL_multi_close = '<';
10133     /* inside a string eval or quote-like operator */
10134     if (!infile || PL_lex_inwhat) {
10135         SV *linestr;
10136         char *bufend;
10137         char * const olds = s;
10138         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10139         /* These two fields are not set until an inner lexing scope is
10140            entered.  But we need them set here. */
10141         shared->ls_bufptr  = s;
10142         shared->ls_linestr = PL_linestr;
10143         if (PL_lex_inwhat)
10144           /* Look for a newline.  If the current buffer does not have one,
10145              peek into the line buffer of the parent lexing scope, going
10146              up as many levels as necessary to find one with a newline
10147              after bufptr.
10148            */
10149           while (!(s = (char *)memchr(
10150                     (void *)shared->ls_bufptr, '\n',
10151                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10152                 ))) {
10153             shared = shared->ls_prev;
10154             /* shared is only null if we have gone beyond the outermost
10155                lexing scope.  In a file, we will have broken out of the
10156                loop in the previous iteration.  In an eval, the string buf-
10157                fer ends with "\n;", so the while condition above will have
10158                evaluated to false.  So shared can never be null. */
10159             assert(shared);
10160             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10161                most lexing scope.  In a file, shared->ls_linestr at that
10162                level is just one line, so there is no body to steal. */
10163             if (infile && !shared->ls_prev) {
10164                 s = olds;
10165                 goto streaming;
10166             }
10167           }
10168         else {  /* eval */
10169             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10170             assert(s);
10171         }
10172         linestr = shared->ls_linestr;
10173         bufend = SvEND(linestr);
10174         d = s;
10175         while (s < bufend - len + 1 &&
10176           memNE(s,PL_tokenbuf,len) ) {
10177             if (*s++ == '\n')
10178                 ++PL_parser->herelines;
10179         }
10180         if (s >= bufend - len + 1) {
10181             goto interminable;
10182         }
10183         sv_setpvn(tmpstr,d+1,s-d);
10184 #ifdef PERL_MAD
10185         if (PL_madskills) {
10186             if (PL_thisstuff)
10187                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10188             else
10189                 PL_thisstuff = newSVpvn(d + 1, s - d);
10190             stuffstart = s - SvPVX(PL_linestr);
10191         }
10192 #endif
10193         s += len - 1;
10194         /* the preceding stmt passes a newline */
10195         PL_parser->herelines++;
10196
10197         /* s now points to the newline after the heredoc terminator.
10198            d points to the newline before the body of the heredoc.
10199          */
10200
10201         /* We are going to modify linestr in place here, so set
10202            aside copies of the string if necessary for re-evals or
10203            (caller $n)[6]. */
10204         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10205            check shared->re_eval_str. */
10206         if (shared->re_eval_start || shared->re_eval_str) {
10207             /* Set aside the rest of the regexp */
10208             if (!shared->re_eval_str)
10209                 shared->re_eval_str =
10210                        newSVpvn(shared->re_eval_start,
10211                                 bufend - shared->re_eval_start);
10212             shared->re_eval_start -= s-d;
10213         }
10214         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10215             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10216             cx->blk_eval.cur_text == linestr)
10217         {
10218             cx->blk_eval.cur_text = newSVsv(linestr);
10219             SvSCREAM_on(cx->blk_eval.cur_text);
10220         }
10221         /* Copy everything from s onwards back to d. */
10222         Move(s,d,bufend-s + 1,char);
10223         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10224         /* Setting PL_bufend only applies when we have not dug deeper
10225            into other scopes, because sublex_done sets PL_bufend to
10226            SvEND(PL_linestr). */
10227         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10228         s = olds;
10229     }
10230     else
10231     {
10232       SV *linestr_save;
10233      streaming:
10234       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10235       term = PL_tokenbuf[1];
10236       len--;
10237       linestr_save = PL_linestr; /* must restore this afterwards */
10238       d = s;                     /* and this */
10239       PL_linestr = newSVpvs("");
10240       PL_bufend = SvPVX(PL_linestr);
10241       while (1) {
10242 #ifdef PERL_MAD
10243         if (PL_madskills) {
10244             tstart = SvPVX(PL_linestr) + stuffstart;
10245             if (PL_thisstuff)
10246                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10247             else
10248                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10249         }
10250 #endif
10251         PL_bufptr = PL_bufend;
10252         CopLINE_set(PL_curcop,
10253                     origline + 1 + PL_parser->herelines);
10254         if (!lex_next_chunk(LEX_NO_TERM)
10255          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10256             SvREFCNT_dec(linestr_save);
10257             goto interminable;
10258         }
10259         CopLINE_set(PL_curcop, origline);
10260         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10261             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10262             /* ^That should be enough to avoid this needing to grow:  */
10263             sv_catpvs(PL_linestr, "\n\0");
10264             assert(s == SvPVX(PL_linestr));
10265             PL_bufend = SvEND(PL_linestr);
10266         }
10267         s = PL_bufptr;
10268 #ifdef PERL_MAD
10269         stuffstart = s - SvPVX(PL_linestr);
10270 #endif
10271         PL_parser->herelines++;
10272         PL_last_lop = PL_last_uni = NULL;
10273 #ifndef PERL_STRICT_CR
10274         if (PL_bufend - PL_linestart >= 2) {
10275             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10276                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10277             {
10278                 PL_bufend[-2] = '\n';
10279                 PL_bufend--;
10280                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10281             }
10282             else if (PL_bufend[-1] == '\r')
10283                 PL_bufend[-1] = '\n';
10284         }
10285         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10286             PL_bufend[-1] = '\n';
10287 #endif
10288         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10289             SvREFCNT_dec(PL_linestr);
10290             PL_linestr = linestr_save;
10291             PL_linestart = SvPVX(linestr_save);
10292             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10293             s = d;
10294             break;
10295         }
10296         else {
10297             sv_catsv(tmpstr,PL_linestr);
10298         }
10299       }
10300     }
10301     PL_multi_end = origline + PL_parser->herelines;
10302     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10303         SvPV_shrink_to_cur(tmpstr);
10304     }
10305     if (!IN_BYTES) {
10306         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10307             SvUTF8_on(tmpstr);
10308         else if (PL_encoding)
10309             sv_recode_to_utf8(tmpstr, PL_encoding);
10310     }
10311     PL_lex_stuff = tmpstr;
10312     pl_yylval.ival = op_type;
10313     return s;
10314
10315   interminable:
10316     SvREFCNT_dec(tmpstr);
10317     CopLINE_set(PL_curcop, origline);
10318     missingterm(PL_tokenbuf + 1);
10319 }
10320
10321 /* scan_inputsymbol
10322    takes: current position in input buffer
10323    returns: new position in input buffer
10324    side-effects: pl_yylval and lex_op are set.
10325
10326    This code handles:
10327
10328    <>           read from ARGV
10329    <FH>         read from filehandle
10330    <pkg::FH>    read from package qualified filehandle
10331    <pkg'FH>     read from package qualified filehandle
10332    <$fh>        read from filehandle in $fh
10333    <*.h>        filename glob
10334
10335 */
10336
10337 STATIC char *
10338 S_scan_inputsymbol(pTHX_ char *start)
10339 {
10340     dVAR;
10341     char *s = start;            /* current position in buffer */
10342     char *end;
10343     I32 len;
10344     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10345     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10346
10347     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10348
10349     end = strchr(s, '\n');
10350     if (!end)
10351         end = PL_bufend;
10352     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10353
10354     /* die if we didn't have space for the contents of the <>,
10355        or if it didn't end, or if we see a newline
10356     */
10357
10358     if (len >= (I32)sizeof PL_tokenbuf)
10359         Perl_croak(aTHX_ "Excessively long <> operator");
10360     if (s >= end)
10361         Perl_croak(aTHX_ "Unterminated <> operator");
10362
10363     s++;
10364
10365     /* check for <$fh>
10366        Remember, only scalar variables are interpreted as filehandles by
10367        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10368        treated as a glob() call.
10369        This code makes use of the fact that except for the $ at the front,
10370        a scalar variable and a filehandle look the same.
10371     */
10372     if (*d == '$' && d[1]) d++;
10373
10374     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10375     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10376         d += UTF ? UTF8SKIP(d) : 1;
10377
10378     /* If we've tried to read what we allow filehandles to look like, and
10379        there's still text left, then it must be a glob() and not a getline.
10380        Use scan_str to pull out the stuff between the <> and treat it
10381        as nothing more than a string.
10382     */
10383
10384     if (d - PL_tokenbuf != len) {
10385         pl_yylval.ival = OP_GLOB;
10386         s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10387         if (!s)
10388            Perl_croak(aTHX_ "Glob not terminated");
10389         return s;
10390     }
10391     else {
10392         bool readline_overriden = FALSE;
10393         GV *gv_readline;
10394         /* we're in a filehandle read situation */
10395         d = PL_tokenbuf;
10396
10397         /* turn <> into <ARGV> */
10398         if (!len)
10399             Copy("ARGV",d,5,char);
10400
10401         /* Check whether readline() is overriden */
10402         if ((gv_readline = gv_override("readline",8)))
10403             readline_overriden = TRUE;
10404
10405         /* if <$fh>, create the ops to turn the variable into a
10406            filehandle
10407         */
10408         if (*d == '$') {
10409             /* try to find it in the pad for this block, otherwise find
10410                add symbol table ops
10411             */
10412             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10413             if (tmp != NOT_IN_PAD) {
10414                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10415                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10416                     HEK * const stashname = HvNAME_HEK(stash);
10417                     SV * const sym = sv_2mortal(newSVhek(stashname));
10418                     sv_catpvs(sym, "::");
10419                     sv_catpv(sym, d+1);
10420                     d = SvPVX(sym);
10421                     goto intro_sym;
10422                 }
10423                 else {
10424                     OP * const o = newOP(OP_PADSV, 0);
10425                     o->op_targ = tmp;
10426                     PL_lex_op = readline_overriden
10427                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10428                                 op_append_elem(OP_LIST, o,
10429                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10430                         : (OP*)newUNOP(OP_READLINE, 0, o);
10431                 }
10432             }
10433             else {
10434                 GV *gv;
10435                 ++d;
10436 intro_sym:
10437                 gv = gv_fetchpv(d,
10438                                 (PL_in_eval
10439                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10440                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10441                                 SVt_PV);
10442                 PL_lex_op = readline_overriden
10443                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10444                             op_append_elem(OP_LIST,
10445                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10446                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10447                     : (OP*)newUNOP(OP_READLINE, 0,
10448                             newUNOP(OP_RV2SV, 0,
10449                                 newGVOP(OP_GV, 0, gv)));
10450             }
10451             if (!readline_overriden)
10452                 PL_lex_op->op_flags |= OPf_SPECIAL;
10453             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10454             pl_yylval.ival = OP_NULL;
10455         }
10456
10457         /* If it's none of the above, it must be a literal filehandle
10458            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10459         else {
10460             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10461             PL_lex_op = readline_overriden
10462                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10463                         op_append_elem(OP_LIST,
10464                             newGVOP(OP_GV, 0, gv),
10465                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10466                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10467             pl_yylval.ival = OP_NULL;
10468         }
10469     }
10470
10471     return s;
10472 }
10473
10474
10475 /* scan_str
10476    takes:
10477         start                   position in buffer
10478         keep_quoted             preserve \ on the embedded delimiter(s)
10479         keep_delims             preserve the delimiters around the string
10480         re_reparse              compiling a run-time /(?{})/:
10481                                    collapse // to /,  and skip encoding src
10482         deprecate_escaped_meta  issue a deprecation warning for cer-
10483                                 tain paired metacharacters that appear
10484                                 escaped within it
10485         delimp                  if non-null, this is set to the position of
10486                                 the closing delimiter, or just after it if
10487                                 the closing and opening delimiters differ
10488                                 (i.e., the opening delimiter of a substitu-
10489                                 tion replacement)
10490    returns: position to continue reading from buffer
10491    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10492         updates the read buffer.
10493
10494    This subroutine pulls a string out of the input.  It is called for:
10495         q               single quotes           q(literal text)
10496         '               single quotes           'literal text'
10497         qq              double quotes           qq(interpolate $here please)
10498         "               double quotes           "interpolate $here please"
10499         qx              backticks               qx(/bin/ls -l)
10500         `               backticks               `/bin/ls -l`
10501         qw              quote words             @EXPORT_OK = qw( func() $spam )
10502         m//             regexp match            m/this/
10503         s///            regexp substitute       s/this/that/
10504         tr///           string transliterate    tr/this/that/
10505         y///            string transliterate    y/this/that/
10506         ($*@)           sub prototypes          sub foo ($)
10507         (stuff)         sub attr parameters     sub foo : attr(stuff)
10508         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10509
10510    In most of these cases (all but <>, patterns and transliterate)
10511    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10512    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10513    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10514    calls scan_str().
10515
10516    It skips whitespace before the string starts, and treats the first
10517    character as the delimiter.  If the delimiter is one of ([{< then
10518    the corresponding "close" character )]}> is used as the closing
10519    delimiter.  It allows quoting of delimiters, and if the string has
10520    balanced delimiters ([{<>}]) it allows nesting.
10521
10522    On success, the SV with the resulting string is put into lex_stuff or,
10523    if that is already non-NULL, into lex_repl. The second case occurs only
10524    when parsing the RHS of the special constructs s/// and tr/// (y///).
10525    For convenience, the terminating delimiter character is stuffed into
10526    SvIVX of the SV.
10527 */
10528
10529 STATIC char *
10530 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10531                  bool deprecate_escaped_meta, char **delimp
10532     )
10533 {
10534     dVAR;
10535     SV *sv;                     /* scalar value: string */
10536     const char *tmps;           /* temp string, used for delimiter matching */
10537     char *s = start;            /* current position in the buffer */
10538     char term;                  /* terminating character */
10539     char *to;                   /* current position in the sv's data */
10540     I32 brackets = 1;           /* bracket nesting level */
10541     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10542     I32 termcode;               /* terminating char. code */
10543     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10544     STRLEN termlen;             /* length of terminating string */
10545     int last_off = 0;           /* last position for nesting bracket */
10546     char *escaped_open = NULL;
10547     line_t herelines;
10548 #ifdef PERL_MAD
10549     int stuffstart;
10550     char *tstart;
10551 #endif
10552
10553     PERL_ARGS_ASSERT_SCAN_STR;
10554
10555     /* skip space before the delimiter */
10556     if (isSPACE(*s)) {
10557         s = PEEKSPACE(s);
10558     }
10559
10560 #ifdef PERL_MAD
10561     if (PL_realtokenstart >= 0) {
10562         stuffstart = PL_realtokenstart;
10563         PL_realtokenstart = -1;
10564     }
10565     else
10566         stuffstart = start - SvPVX(PL_linestr);
10567 #endif
10568     /* mark where we are, in case we need to report errors */
10569     CLINE;
10570
10571     /* after skipping whitespace, the next character is the terminator */
10572     term = *s;
10573     if (!UTF) {
10574         termcode = termstr[0] = term;
10575         termlen = 1;
10576     }
10577     else {
10578         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10579         Copy(s, termstr, termlen, U8);
10580         if (!UTF8_IS_INVARIANT(term))
10581             has_utf8 = TRUE;
10582     }
10583
10584     /* mark where we are */
10585     PL_multi_start = CopLINE(PL_curcop);
10586     PL_multi_open = term;
10587     herelines = PL_parser->herelines;
10588
10589     /* find corresponding closing delimiter */
10590     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10591         termcode = termstr[0] = term = tmps[5];
10592
10593     PL_multi_close = term;
10594
10595     /* A warning is raised if the input parameter requires it for escaped (by a
10596      * backslash) paired metacharacters {} [] and () when the delimiters are
10597      * those same characters, and the backslash is ineffective.  This doesn't
10598      * happen for <>, as they aren't metas. */
10599     if (deprecate_escaped_meta
10600         && (PL_multi_open == PL_multi_close
10601             || PL_multi_open == '<'
10602             || ! ckWARN_d(WARN_DEPRECATED)))
10603     {
10604         deprecate_escaped_meta = FALSE;
10605     }
10606
10607     /* create a new SV to hold the contents.  79 is the SV's initial length.
10608        What a random number. */
10609     sv = newSV_type(SVt_PVIV);
10610     SvGROW(sv, 80);
10611     SvIV_set(sv, termcode);
10612     (void)SvPOK_only(sv);               /* validate pointer */
10613
10614     /* move past delimiter and try to read a complete string */
10615     if (keep_delims)
10616         sv_catpvn(sv, s, termlen);
10617     s += termlen;
10618 #ifdef PERL_MAD
10619     tstart = SvPVX(PL_linestr) + stuffstart;
10620     if (PL_madskills && !PL_thisopen && !keep_delims) {
10621         PL_thisopen = newSVpvn(tstart, s - tstart);
10622         stuffstart = s - SvPVX(PL_linestr);
10623     }
10624 #endif
10625     for (;;) {
10626         if (PL_encoding && !UTF && !re_reparse) {
10627             bool cont = TRUE;
10628
10629             while (cont) {
10630                 int offset = s - SvPVX_const(PL_linestr);
10631                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10632                                            &offset, (char*)termstr, termlen);
10633                 const char *ns;
10634                 char *svlast;
10635
10636                 if (SvIsCOW(PL_linestr)) {
10637                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10638                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10639                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10640                     char *buf = SvPVX(PL_linestr);
10641                     bufend_pos = PL_parser->bufend - buf;
10642                     bufptr_pos = PL_parser->bufptr - buf;
10643                     oldbufptr_pos = PL_parser->oldbufptr - buf;
10644                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10645                     linestart_pos = PL_parser->linestart - buf;
10646                     last_uni_pos = PL_parser->last_uni
10647                         ? PL_parser->last_uni - buf
10648                         : 0;
10649                     last_lop_pos = PL_parser->last_lop
10650                         ? PL_parser->last_lop - buf
10651                         : 0;
10652                     re_eval_start_pos =
10653                         PL_parser->lex_shared->re_eval_start ?
10654                             PL_parser->lex_shared->re_eval_start - buf : 0;
10655                     s_pos = s - buf;
10656
10657                     sv_force_normal(PL_linestr);
10658
10659                     buf = SvPVX(PL_linestr);
10660                     PL_parser->bufend = buf + bufend_pos;
10661                     PL_parser->bufptr = buf + bufptr_pos;
10662                     PL_parser->oldbufptr = buf + oldbufptr_pos;
10663                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10664                     PL_parser->linestart = buf + linestart_pos;
10665                     if (PL_parser->last_uni)
10666                         PL_parser->last_uni = buf + last_uni_pos;
10667                     if (PL_parser->last_lop)
10668                         PL_parser->last_lop = buf + last_lop_pos;
10669                     if (PL_parser->lex_shared->re_eval_start)
10670                         PL_parser->lex_shared->re_eval_start  =
10671                             buf + re_eval_start_pos;
10672                     s = buf + s_pos;
10673                 }
10674                 ns = SvPVX_const(PL_linestr) + offset;
10675                 svlast = SvEND(sv) - 1;
10676
10677                 for (; s < ns; s++) {
10678                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10679                         COPLINE_INC_WITH_HERELINES;
10680                 }
10681                 if (!found)
10682                     goto read_more_line;
10683                 else {
10684                     /* handle quoted delimiters */
10685                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10686                         const char *t;
10687                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10688                             t--;
10689                         if ((svlast-1 - t) % 2) {
10690                             if (!keep_quoted) {
10691                                 *(svlast-1) = term;
10692                                 *svlast = '\0';
10693                                 SvCUR_set(sv, SvCUR(sv) - 1);
10694                             }
10695                             continue;
10696                         }
10697                     }
10698                     if (PL_multi_open == PL_multi_close) {
10699                         cont = FALSE;
10700                     }
10701                     else {
10702                         const char *t;
10703                         char *w;
10704                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10705                             /* At here, all closes are "was quoted" one,
10706                                so we don't check PL_multi_close. */
10707                             if (*t == '\\') {
10708                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10709                                     t++;
10710                                 else
10711                                     *w++ = *t++;
10712                             }
10713                             else if (*t == PL_multi_open)
10714                                 brackets++;
10715
10716                             *w = *t;
10717                         }
10718                         if (w < t) {
10719                             *w++ = term;
10720                             *w = '\0';
10721                             SvCUR_set(sv, w - SvPVX_const(sv));
10722                         }
10723                         last_off = w - SvPVX(sv);
10724                         if (--brackets <= 0)
10725                             cont = FALSE;
10726                     }
10727                 }
10728             }
10729             if (!keep_delims) {
10730                 SvCUR_set(sv, SvCUR(sv) - 1);
10731                 *SvEND(sv) = '\0';
10732             }
10733             break;
10734         }
10735
10736         /* extend sv if need be */
10737         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10738         /* set 'to' to the next character in the sv's string */
10739         to = SvPVX(sv)+SvCUR(sv);
10740
10741         /* if open delimiter is the close delimiter read unbridle */
10742         if (PL_multi_open == PL_multi_close) {
10743             for (; s < PL_bufend; s++,to++) {
10744                 /* embedded newlines increment the current line number */
10745                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10746                     COPLINE_INC_WITH_HERELINES;
10747                 /* handle quoted delimiters */
10748                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10749                     if (!keep_quoted
10750                         && (s[1] == term
10751                             || (re_reparse && s[1] == '\\'))
10752                     )
10753                         s++;
10754                     /* any other quotes are simply copied straight through */
10755                     else
10756                         *to++ = *s++;
10757                 }
10758                 /* terminate when run out of buffer (the for() condition), or
10759                    have found the terminator */
10760                 else if (*s == term) {
10761                     if (termlen == 1)
10762                         break;
10763                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10764                         break;
10765                 }
10766                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10767                     has_utf8 = TRUE;
10768                 *to = *s;
10769             }
10770         }
10771
10772         /* if the terminator isn't the same as the start character (e.g.,
10773            matched brackets), we have to allow more in the quoting, and
10774            be prepared for nested brackets.
10775         */
10776         else {
10777             /* read until we run out of string, or we find the terminator */
10778             for (; s < PL_bufend; s++,to++) {
10779                 /* embedded newlines increment the line count */
10780                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10781                     COPLINE_INC_WITH_HERELINES;
10782                 /* backslashes can escape the open or closing characters */
10783                 if (*s == '\\' && s+1 < PL_bufend) {
10784                     if (!keep_quoted &&
10785                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10786                     {
10787                         s++;
10788
10789                         /* Here, 'deprecate_escaped_meta' is true iff the
10790                          * delimiters are paired metacharacters, and 's' points
10791                          * to an occurrence of one of them within the string,
10792                          * which was preceded by a backslash.  If this is a
10793                          * context where the delimiter is also a metacharacter,
10794                          * the backslash is useless, and deprecated.  () and []
10795                          * are meta in any context. {} are meta only when
10796                          * appearing in a quantifier or in things like '\p{'
10797                          * (but '\\p{' isn't meta).  They also aren't meta
10798                          * unless there is a matching closed, escaped char
10799                          * later on within the string.  If 's' points to an
10800                          * open, set a flag; if to a close, test that flag, and
10801                          * raise a warning if it was set */
10802
10803                         if (deprecate_escaped_meta) {
10804                             if (*s == PL_multi_open) {
10805                                 if (*s != '{') {
10806                                     escaped_open = s;
10807                                 }
10808                                      /* Look for a closing '\}' */
10809                                 else if (regcurly(s, TRUE)) {
10810                                     escaped_open = s;
10811                                 }
10812                                      /* Look for e.g.  '\x{' */
10813                                 else if (s - start > 2
10814                                          && _generic_isCC(*(s-2),
10815                                              _CC_BACKSLASH_FOO_LBRACE_IS_META))
10816                                 { /* Exclude '\\x', '\\\\x', etc. */
10817                                     char *lookbehind = s - 4;
10818                                     bool is_meta = TRUE;
10819                                     while (lookbehind >= start
10820                                            && *lookbehind == '\\')
10821                                     {
10822                                         is_meta = ! is_meta;
10823                                         lookbehind--;
10824                                     }
10825                                     if (is_meta) {
10826                                         escaped_open = s;
10827                                     }
10828                                 }
10829                             }
10830                             else if (escaped_open) {
10831                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10832                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10833                                 escaped_open = NULL;
10834                             }
10835                         }
10836                     }
10837                     else
10838                         *to++ = *s++;
10839                 }
10840                 /* allow nested opens and closes */
10841                 else if (*s == PL_multi_close && --brackets <= 0)
10842                     break;
10843                 else if (*s == PL_multi_open)
10844                     brackets++;
10845                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10846                     has_utf8 = TRUE;
10847                 *to = *s;
10848             }
10849         }
10850         /* terminate the copied string and update the sv's end-of-string */
10851         *to = '\0';
10852         SvCUR_set(sv, to - SvPVX_const(sv));
10853
10854         /*
10855          * this next chunk reads more into the buffer if we're not done yet
10856          */
10857
10858         if (s < PL_bufend)
10859             break;              /* handle case where we are done yet :-) */
10860
10861 #ifndef PERL_STRICT_CR
10862         if (to - SvPVX_const(sv) >= 2) {
10863             if ((to[-2] == '\r' && to[-1] == '\n') ||
10864                 (to[-2] == '\n' && to[-1] == '\r'))
10865             {
10866                 to[-2] = '\n';
10867                 to--;
10868                 SvCUR_set(sv, to - SvPVX_const(sv));
10869             }
10870             else if (to[-1] == '\r')
10871                 to[-1] = '\n';
10872         }
10873         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10874             to[-1] = '\n';
10875 #endif
10876
10877      read_more_line:
10878         /* if we're out of file, or a read fails, bail and reset the current
10879            line marker so we can report where the unterminated string began
10880         */
10881 #ifdef PERL_MAD
10882         if (PL_madskills) {
10883             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10884             if (PL_thisstuff)
10885                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10886             else
10887                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10888         }
10889 #endif
10890         COPLINE_INC_WITH_HERELINES;
10891         PL_bufptr = PL_bufend;
10892         if (!lex_next_chunk(0)) {
10893             sv_free(sv);
10894             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10895             return NULL;
10896         }
10897         s = PL_bufptr;
10898 #ifdef PERL_MAD
10899         stuffstart = 0;
10900 #endif
10901     }
10902
10903     /* at this point, we have successfully read the delimited string */
10904
10905     if (!PL_encoding || UTF || re_reparse) {
10906 #ifdef PERL_MAD
10907         if (PL_madskills) {
10908             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10909             const int len = s - tstart;
10910             if (PL_thisstuff)
10911                 sv_catpvn(PL_thisstuff, tstart, len);
10912             else
10913                 PL_thisstuff = newSVpvn(tstart, len);
10914             if (!PL_thisclose && !keep_delims)
10915                 PL_thisclose = newSVpvn(s,termlen);
10916         }
10917 #endif
10918
10919         if (keep_delims)
10920             sv_catpvn(sv, s, termlen);
10921         s += termlen;
10922     }
10923 #ifdef PERL_MAD
10924     else {
10925         if (PL_madskills) {
10926             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10927             const int len = s - tstart - termlen;
10928             if (PL_thisstuff)
10929                 sv_catpvn(PL_thisstuff, tstart, len);
10930             else
10931                 PL_thisstuff = newSVpvn(tstart, len);
10932             if (!PL_thisclose && !keep_delims)
10933                 PL_thisclose = newSVpvn(s - termlen,termlen);
10934         }
10935     }
10936 #endif
10937     if (has_utf8 || (PL_encoding && !re_reparse))
10938         SvUTF8_on(sv);
10939
10940     PL_multi_end = CopLINE(PL_curcop);
10941     CopLINE_set(PL_curcop, PL_multi_start);
10942     PL_parser->herelines = herelines;
10943
10944     /* if we allocated too much space, give some back */
10945     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10946         SvLEN_set(sv, SvCUR(sv) + 1);
10947         SvPV_renew(sv, SvLEN(sv));
10948     }
10949
10950     /* decide whether this is the first or second quoted string we've read
10951        for this op
10952     */
10953
10954     if (PL_lex_stuff)
10955         PL_sublex_info.repl = sv;
10956     else
10957         PL_lex_stuff = sv;
10958     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10959     return s;
10960 }
10961
10962 /*
10963   scan_num
10964   takes: pointer to position in buffer
10965   returns: pointer to new position in buffer
10966   side-effects: builds ops for the constant in pl_yylval.op
10967
10968   Read a number in any of the formats that Perl accepts:
10969
10970   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10971   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10972   0b[01](_?[01])*
10973   0[0-7](_?[0-7])*
10974   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10975
10976   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10977   thing it reads.
10978
10979   If it reads a number without a decimal point or an exponent, it will
10980   try converting the number to an integer and see if it can do so
10981   without loss of precision.
10982 */
10983
10984 char *
10985 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10986 {
10987     dVAR;
10988     const char *s = start;      /* current position in buffer */
10989     char *d;                    /* destination in temp buffer */
10990     char *e;                    /* end of temp buffer */
10991     NV nv;                              /* number read, as a double */
10992     SV *sv = NULL;                      /* place to put the converted number */
10993     bool floatit;                       /* boolean: int or float? */
10994     const char *lastub = NULL;          /* position of last underbar */
10995     static const char* const number_too_long = "Number too long";
10996
10997     PERL_ARGS_ASSERT_SCAN_NUM;
10998
10999     /* We use the first character to decide what type of number this is */
11000
11001     switch (*s) {
11002     default:
11003         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11004
11005     /* if it starts with a 0, it could be an octal number, a decimal in
11006        0.13 disguise, or a hexadecimal number, or a binary number. */
11007     case '0':
11008         {
11009           /* variables:
11010              u          holds the "number so far"
11011              shift      the power of 2 of the base
11012                         (hex == 4, octal == 3, binary == 1)
11013              overflowed was the number more than we can hold?
11014
11015              Shift is used when we add a digit.  It also serves as an "are
11016              we in octal/hex/binary?" indicator to disallow hex characters
11017              when in octal mode.
11018            */
11019             NV n = 0.0;
11020             UV u = 0;
11021             I32 shift;
11022             bool overflowed = FALSE;
11023             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11024             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11025             static const char* const bases[5] =
11026               { "", "binary", "", "octal", "hexadecimal" };
11027             static const char* const Bases[5] =
11028               { "", "Binary", "", "Octal", "Hexadecimal" };
11029             static const char* const maxima[5] =
11030               { "",
11031                 "0b11111111111111111111111111111111",
11032                 "",
11033                 "037777777777",
11034                 "0xffffffff" };
11035             const char *base, *Base, *max;
11036
11037             /* check for hex */
11038             if (s[1] == 'x' || s[1] == 'X') {
11039                 shift = 4;
11040                 s += 2;
11041                 just_zero = FALSE;
11042             } else if (s[1] == 'b' || s[1] == 'B') {
11043                 shift = 1;
11044                 s += 2;
11045                 just_zero = FALSE;
11046             }
11047             /* check for a decimal in disguise */
11048             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11049                 goto decimal;
11050             /* so it must be octal */
11051             else {
11052                 shift = 3;
11053                 s++;
11054             }
11055
11056             if (*s == '_') {
11057                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11058                                "Misplaced _ in number");
11059                lastub = s++;
11060             }
11061
11062             base = bases[shift];
11063             Base = Bases[shift];
11064             max  = maxima[shift];
11065
11066             /* read the rest of the number */
11067             for (;;) {
11068                 /* x is used in the overflow test,
11069                    b is the digit we're adding on. */
11070                 UV x, b;
11071
11072                 switch (*s) {
11073
11074                 /* if we don't mention it, we're done */
11075                 default:
11076                     goto out;
11077
11078                 /* _ are ignored -- but warned about if consecutive */
11079                 case '_':
11080                     if (lastub && s == lastub + 1)
11081                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11082                                        "Misplaced _ in number");
11083                     lastub = s++;
11084                     break;
11085
11086                 /* 8 and 9 are not octal */
11087                 case '8': case '9':
11088                     if (shift == 3)
11089                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11090                     /* FALLTHROUGH */
11091
11092                 /* octal digits */
11093                 case '2': case '3': case '4':
11094                 case '5': case '6': case '7':
11095                     if (shift == 1)
11096                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11097                     /* FALLTHROUGH */
11098
11099                 case '0': case '1':
11100                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11101                     goto digit;
11102
11103                 /* hex digits */
11104                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11105                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11106                     /* make sure they said 0x */
11107                     if (shift != 4)
11108                         goto out;
11109                     b = (*s++ & 7) + 9;
11110
11111                     /* Prepare to put the digit we have onto the end
11112                        of the number so far.  We check for overflows.
11113                     */
11114
11115                   digit:
11116                     just_zero = FALSE;
11117                     if (!overflowed) {
11118                         x = u << shift; /* make room for the digit */
11119
11120                         if ((x >> shift) != u
11121                             && !(PL_hints & HINT_NEW_BINARY)) {
11122                             overflowed = TRUE;
11123                             n = (NV) u;
11124                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11125                                              "Integer overflow in %s number",
11126                                              base);
11127                         } else
11128                             u = x | b;          /* add the digit to the end */
11129                     }
11130                     if (overflowed) {
11131                         n *= nvshift[shift];
11132                         /* If an NV has not enough bits in its
11133                          * mantissa to represent an UV this summing of
11134                          * small low-order numbers is a waste of time
11135                          * (because the NV cannot preserve the
11136                          * low-order bits anyway): we could just
11137                          * remember when did we overflow and in the
11138                          * end just multiply n by the right
11139                          * amount. */
11140                         n += (NV) b;
11141                     }
11142                     break;
11143                 }
11144             }
11145
11146           /* if we get here, we had success: make a scalar value from
11147              the number.
11148           */
11149           out:
11150
11151             /* final misplaced underbar check */
11152             if (s[-1] == '_') {
11153                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11154             }
11155
11156             if (overflowed) {
11157                 if (n > 4294967295.0)
11158                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11159                                    "%s number > %s non-portable",
11160                                    Base, max);
11161                 sv = newSVnv(n);
11162             }
11163             else {
11164 #if UVSIZE > 4
11165                 if (u > 0xffffffff)
11166                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11167                                    "%s number > %s non-portable",
11168                                    Base, max);
11169 #endif
11170                 sv = newSVuv(u);
11171             }
11172             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11173                 sv = new_constant(start, s - start, "integer",
11174                                   sv, NULL, NULL, 0);
11175             else if (PL_hints & HINT_NEW_BINARY)
11176                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11177         }
11178         break;
11179
11180     /*
11181       handle decimal numbers.
11182       we're also sent here when we read a 0 as the first digit
11183     */
11184     case '1': case '2': case '3': case '4': case '5':
11185     case '6': case '7': case '8': case '9': case '.':
11186       decimal:
11187         d = PL_tokenbuf;
11188         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11189         floatit = FALSE;
11190
11191         /* read next group of digits and _ and copy into d */
11192         while (isDIGIT(*s) || *s == '_') {
11193             /* skip underscores, checking for misplaced ones
11194                if -w is on
11195             */
11196             if (*s == '_') {
11197                 if (lastub && s == lastub + 1)
11198                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11199                                    "Misplaced _ in number");
11200                 lastub = s++;
11201             }
11202             else {
11203                 /* check for end of fixed-length buffer */
11204                 if (d >= e)
11205                     Perl_croak(aTHX_ "%s", number_too_long);
11206                 /* if we're ok, copy the character */
11207                 *d++ = *s++;
11208             }
11209         }
11210
11211         /* final misplaced underbar check */
11212         if (lastub && s == lastub + 1) {
11213             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11214         }
11215
11216         /* read a decimal portion if there is one.  avoid
11217            3..5 being interpreted as the number 3. followed
11218            by .5
11219         */
11220         if (*s == '.' && s[1] != '.') {
11221             floatit = TRUE;
11222             *d++ = *s++;
11223
11224             if (*s == '_') {
11225                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11226                                "Misplaced _ in number");
11227                 lastub = s;
11228             }
11229
11230             /* copy, ignoring underbars, until we run out of digits.
11231             */
11232             for (; isDIGIT(*s) || *s == '_'; s++) {
11233                 /* fixed length buffer check */
11234                 if (d >= e)
11235                     Perl_croak(aTHX_ "%s", number_too_long);
11236                 if (*s == '_') {
11237                    if (lastub && s == lastub + 1)
11238                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11239                                       "Misplaced _ in number");
11240                    lastub = s;
11241                 }
11242                 else
11243                     *d++ = *s;
11244             }
11245             /* fractional part ending in underbar? */
11246             if (s[-1] == '_') {
11247                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11248                                "Misplaced _ in number");
11249             }
11250             if (*s == '.' && isDIGIT(s[1])) {
11251                 /* oops, it's really a v-string, but without the "v" */
11252                 s = start;
11253                 goto vstring;
11254             }
11255         }
11256
11257         /* read exponent part, if present */
11258         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11259             floatit = TRUE;
11260             s++;
11261
11262             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11263             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11264
11265             /* stray preinitial _ */
11266             if (*s == '_') {
11267                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11268                                "Misplaced _ in number");
11269                 lastub = s++;
11270             }
11271
11272             /* allow positive or negative exponent */
11273             if (*s == '+' || *s == '-')
11274                 *d++ = *s++;
11275
11276             /* stray initial _ */
11277             if (*s == '_') {
11278                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11279                                "Misplaced _ in number");
11280                 lastub = s++;
11281             }
11282
11283             /* read digits of exponent */
11284             while (isDIGIT(*s) || *s == '_') {
11285                 if (isDIGIT(*s)) {
11286                     if (d >= e)
11287                         Perl_croak(aTHX_ "%s", number_too_long);
11288                     *d++ = *s++;
11289                 }
11290                 else {
11291                    if (((lastub && s == lastub + 1) ||
11292                         (!isDIGIT(s[1]) && s[1] != '_')))
11293                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11294                                       "Misplaced _ in number");
11295                    lastub = s++;
11296                 }
11297             }
11298         }
11299
11300
11301         /*
11302            We try to do an integer conversion first if no characters
11303            indicating "float" have been found.
11304          */
11305
11306         if (!floatit) {
11307             UV uv;
11308             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11309
11310             if (flags == IS_NUMBER_IN_UV) {
11311               if (uv <= IV_MAX)
11312                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11313               else
11314                 sv = newSVuv(uv);
11315             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11316               if (uv <= (UV) IV_MIN)
11317                 sv = newSViv(-(IV)uv);
11318               else
11319                 floatit = TRUE;
11320             } else
11321               floatit = TRUE;
11322         }
11323         if (floatit) {
11324             STORE_NUMERIC_LOCAL_SET_STANDARD();
11325             /* terminate the string */
11326             *d = '\0';
11327             nv = Atof(PL_tokenbuf);
11328             RESTORE_NUMERIC_LOCAL();
11329             sv = newSVnv(nv);
11330         }
11331
11332         if ( floatit
11333              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11334             const char *const key = floatit ? "float" : "integer";
11335             const STRLEN keylen = floatit ? 5 : 7;
11336             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11337                                 key, keylen, sv, NULL, NULL, 0);
11338         }
11339         break;
11340
11341     /* if it starts with a v, it could be a v-string */
11342     case 'v':
11343 vstring:
11344                 sv = newSV(5); /* preallocate storage space */
11345                 ENTER_with_name("scan_vstring");
11346                 SAVEFREESV(sv);
11347                 s = scan_vstring(s, PL_bufend, sv);
11348                 SvREFCNT_inc_simple_void_NN(sv);
11349                 LEAVE_with_name("scan_vstring");
11350         break;
11351     }
11352
11353     /* make the op for the constant and return */
11354
11355     if (sv)
11356         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11357     else
11358         lvalp->opval = NULL;
11359
11360     return (char *)s;
11361 }
11362
11363 STATIC char *
11364 S_scan_formline(pTHX_ char *s)
11365 {
11366     dVAR;
11367     char *eol;
11368     char *t;
11369     SV * const stuff = newSVpvs("");
11370     bool needargs = FALSE;
11371     bool eofmt = FALSE;
11372 #ifdef PERL_MAD
11373     char *tokenstart = s;
11374     SV* savewhite = NULL;
11375
11376     if (PL_madskills) {
11377         savewhite = PL_thiswhite;
11378         PL_thiswhite = 0;
11379     }
11380 #endif
11381
11382     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11383
11384     while (!needargs) {
11385         if (*s == '.') {
11386             t = s+1;
11387 #ifdef PERL_STRICT_CR
11388             while (SPACE_OR_TAB(*t))
11389                 t++;
11390 #else
11391             while (SPACE_OR_TAB(*t) || *t == '\r')
11392                 t++;
11393 #endif
11394             if (*t == '\n' || t == PL_bufend) {
11395                 eofmt = TRUE;
11396                 break;
11397             }
11398         }
11399         eol = (char *) memchr(s,'\n',PL_bufend-s);
11400         if (!eol++)
11401                 eol = PL_bufend;
11402         if (*s != '#') {
11403             for (t = s; t < eol; t++) {
11404                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11405                     needargs = FALSE;
11406                     goto enough;        /* ~~ must be first line in formline */
11407                 }
11408                 if (*t == '@' || *t == '^')
11409                     needargs = TRUE;
11410             }
11411             if (eol > s) {
11412                 sv_catpvn(stuff, s, eol-s);
11413 #ifndef PERL_STRICT_CR
11414                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11415                     char *end = SvPVX(stuff) + SvCUR(stuff);
11416                     end[-2] = '\n';
11417                     end[-1] = '\0';
11418                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11419                 }
11420 #endif
11421             }
11422             else
11423               break;
11424         }
11425         s = (char*)eol;
11426         if ((PL_rsfp || PL_parser->filtered)
11427          && PL_parser->form_lex_state == LEX_NORMAL) {
11428             bool got_some;
11429 #ifdef PERL_MAD
11430             if (PL_madskills) {
11431                 if (PL_thistoken)
11432                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11433                 else
11434                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11435             }
11436 #endif
11437             PL_bufptr = PL_bufend;
11438             COPLINE_INC_WITH_HERELINES;
11439             got_some = lex_next_chunk(0);
11440             CopLINE_dec(PL_curcop);
11441             s = PL_bufptr;
11442 #ifdef PERL_MAD
11443             tokenstart = PL_bufptr;
11444 #endif
11445             if (!got_some)
11446                 break;
11447         }
11448         incline(s);
11449     }
11450   enough:
11451     if (!SvCUR(stuff) || needargs)
11452         PL_lex_state = PL_parser->form_lex_state;
11453     if (SvCUR(stuff)) {
11454         PL_expect = XSTATE;
11455         if (needargs) {
11456             const char *s2 = s;
11457             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11458                 || *s2 == 013)
11459                 s2++;
11460             if (*s2 == '{') {
11461                 start_force(PL_curforce);
11462                 PL_expect = XTERMBLOCK;
11463                 NEXTVAL_NEXTTOKE.ival = 0;
11464                 force_next(DO);
11465             }
11466             start_force(PL_curforce);
11467             NEXTVAL_NEXTTOKE.ival = 0;
11468             force_next(FORMLBRACK);
11469         }
11470         if (!IN_BYTES) {
11471             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11472                 SvUTF8_on(stuff);
11473             else if (PL_encoding)
11474                 sv_recode_to_utf8(stuff, PL_encoding);
11475         }
11476         start_force(PL_curforce);
11477         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11478         force_next(THING);
11479     }
11480     else {
11481         SvREFCNT_dec(stuff);
11482         if (eofmt)
11483             PL_lex_formbrack = 0;
11484     }
11485 #ifdef PERL_MAD
11486     if (PL_madskills) {
11487         if (PL_thistoken)
11488             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11489         else
11490             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11491         PL_thiswhite = savewhite;
11492     }
11493 #endif
11494     return s;
11495 }
11496
11497 I32
11498 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11499 {
11500     dVAR;
11501     const I32 oldsavestack_ix = PL_savestack_ix;
11502     CV* const outsidecv = PL_compcv;
11503
11504     SAVEI32(PL_subline);
11505     save_item(PL_subname);
11506     SAVESPTR(PL_compcv);
11507
11508     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11509     CvFLAGS(PL_compcv) |= flags;
11510
11511     PL_subline = CopLINE(PL_curcop);
11512     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11513     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11514     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11515     if (outsidecv && CvPADLIST(outsidecv))
11516         CvPADLIST(PL_compcv)->xpadl_outid =
11517             PadlistNAMES(CvPADLIST(outsidecv));
11518
11519     return oldsavestack_ix;
11520 }
11521
11522 static int
11523 S_yywarn(pTHX_ const char *const s, U32 flags)
11524 {
11525     dVAR;
11526
11527     PERL_ARGS_ASSERT_YYWARN;
11528
11529     PL_in_eval |= EVAL_WARNONLY;
11530     yyerror_pv(s, flags);
11531     PL_in_eval &= ~EVAL_WARNONLY;
11532     return 0;
11533 }
11534
11535 int
11536 Perl_yyerror(pTHX_ const char *const s)
11537 {
11538     PERL_ARGS_ASSERT_YYERROR;
11539     return yyerror_pvn(s, strlen(s), 0);
11540 }
11541
11542 int
11543 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11544 {
11545     PERL_ARGS_ASSERT_YYERROR_PV;
11546     return yyerror_pvn(s, strlen(s), flags);
11547 }
11548
11549 int
11550 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11551 {
11552     dVAR;
11553     const char *context = NULL;
11554     int contlen = -1;
11555     SV *msg;
11556     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11557     int yychar  = PL_parser->yychar;
11558
11559     PERL_ARGS_ASSERT_YYERROR_PVN;
11560
11561     if (!yychar || (yychar == ';' && !PL_rsfp))
11562         sv_catpvs(where_sv, "at EOF");
11563     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11564       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11565       PL_oldbufptr != PL_bufptr) {
11566         /*
11567                 Only for NetWare:
11568                 The code below is removed for NetWare because it abends/crashes on NetWare
11569                 when the script has error such as not having the closing quotes like:
11570                     if ($var eq "value)
11571                 Checking of white spaces is anyway done in NetWare code.
11572         */
11573 #ifndef NETWARE
11574         while (isSPACE(*PL_oldoldbufptr))
11575             PL_oldoldbufptr++;
11576 #endif
11577         context = PL_oldoldbufptr;
11578         contlen = PL_bufptr - PL_oldoldbufptr;
11579     }
11580     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11581       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11582         /*
11583                 Only for NetWare:
11584                 The code below is removed for NetWare because it abends/crashes on NetWare
11585                 when the script has error such as not having the closing quotes like:
11586                     if ($var eq "value)
11587                 Checking of white spaces is anyway done in NetWare code.
11588         */
11589 #ifndef NETWARE
11590         while (isSPACE(*PL_oldbufptr))
11591             PL_oldbufptr++;
11592 #endif
11593         context = PL_oldbufptr;
11594         contlen = PL_bufptr - PL_oldbufptr;
11595     }
11596     else if (yychar > 255)
11597         sv_catpvs(where_sv, "next token ???");
11598     else if (yychar == -2) { /* YYEMPTY */
11599         if (PL_lex_state == LEX_NORMAL ||
11600            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11601             sv_catpvs(where_sv, "at end of line");
11602         else if (PL_lex_inpat)
11603             sv_catpvs(where_sv, "within pattern");
11604         else
11605             sv_catpvs(where_sv, "within string");
11606     }
11607     else {
11608         sv_catpvs(where_sv, "next char ");
11609         if (yychar < 32)
11610             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11611         else if (isPRINT_LC(yychar)) {
11612             const char string = yychar;
11613             sv_catpvn(where_sv, &string, 1);
11614         }
11615         else
11616             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11617     }
11618     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11619     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11620         OutCopFILE(PL_curcop),
11621         (IV)(PL_parser->preambling == NOLINE
11622                ? CopLINE(PL_curcop)
11623                : PL_parser->preambling));
11624     if (context)
11625         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11626                              UTF8fARG(UTF, contlen, context));
11627     else
11628         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11629     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11630         Perl_sv_catpvf(aTHX_ msg,
11631         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11632                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11633         PL_multi_end = 0;
11634     }
11635     if (PL_in_eval & EVAL_WARNONLY) {
11636         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11637     }
11638     else
11639         qerror(msg);
11640     if (PL_error_count >= 10) {
11641         SV * errsv;
11642         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11643             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11644                        SVfARG(errsv), OutCopFILE(PL_curcop));
11645         else
11646             Perl_croak(aTHX_ "%s has too many errors.\n",
11647             OutCopFILE(PL_curcop));
11648     }
11649     PL_in_my = 0;
11650     PL_in_my_stash = NULL;
11651     return 0;
11652 }
11653
11654 STATIC char*
11655 S_swallow_bom(pTHX_ U8 *s)
11656 {
11657     dVAR;
11658     const STRLEN slen = SvCUR(PL_linestr);
11659
11660     PERL_ARGS_ASSERT_SWALLOW_BOM;
11661
11662     switch (s[0]) {
11663     case 0xFF:
11664         if (s[1] == 0xFE) {
11665             /* UTF-16 little-endian? (or UTF-32LE?) */
11666             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11667                 /* diag_listed_as: Unsupported script encoding %s */
11668                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11669 #ifndef PERL_NO_UTF16_FILTER
11670             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11671             s += 2;
11672             if (PL_bufend > (char*)s) {
11673                 s = add_utf16_textfilter(s, TRUE);
11674             }
11675 #else
11676             /* diag_listed_as: Unsupported script encoding %s */
11677             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11678 #endif
11679         }
11680         break;
11681     case 0xFE:
11682         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11683 #ifndef PERL_NO_UTF16_FILTER
11684             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11685             s += 2;
11686             if (PL_bufend > (char *)s) {
11687                 s = add_utf16_textfilter(s, FALSE);
11688             }
11689 #else
11690             /* diag_listed_as: Unsupported script encoding %s */
11691             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11692 #endif
11693         }
11694         break;
11695     case BOM_UTF8_FIRST_BYTE: {
11696         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11697         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11698             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11699             s += len + 1;                      /* UTF-8 */
11700         }
11701         break;
11702     }
11703     case 0:
11704         if (slen > 3) {
11705              if (s[1] == 0) {
11706                   if (s[2] == 0xFE && s[3] == 0xFF) {
11707                        /* UTF-32 big-endian */
11708                        /* diag_listed_as: Unsupported script encoding %s */
11709                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11710                   }
11711              }
11712              else if (s[2] == 0 && s[3] != 0) {
11713                   /* Leading bytes
11714                    * 00 xx 00 xx
11715                    * are a good indicator of UTF-16BE. */
11716 #ifndef PERL_NO_UTF16_FILTER
11717                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11718                   s = add_utf16_textfilter(s, FALSE);
11719 #else
11720                   /* diag_listed_as: Unsupported script encoding %s */
11721                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11722 #endif
11723              }
11724         }
11725         break;
11726
11727     default:
11728          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11729                   /* Leading bytes
11730                    * xx 00 xx 00
11731                    * are a good indicator of UTF-16LE. */
11732 #ifndef PERL_NO_UTF16_FILTER
11733               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11734               s = add_utf16_textfilter(s, TRUE);
11735 #else
11736               /* diag_listed_as: Unsupported script encoding %s */
11737               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11738 #endif
11739          }
11740     }
11741     return (char*)s;
11742 }
11743
11744
11745 #ifndef PERL_NO_UTF16_FILTER
11746 static I32
11747 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11748 {
11749     dVAR;
11750     SV *const filter = FILTER_DATA(idx);
11751     /* We re-use this each time round, throwing the contents away before we
11752        return.  */
11753     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11754     SV *const utf8_buffer = filter;
11755     IV status = IoPAGE(filter);
11756     const bool reverse = cBOOL(IoLINES(filter));
11757     I32 retval;
11758
11759     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11760
11761     /* As we're automatically added, at the lowest level, and hence only called
11762        from this file, we can be sure that we're not called in block mode. Hence
11763        don't bother writing code to deal with block mode.  */
11764     if (maxlen) {
11765         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11766     }
11767     if (status < 0) {
11768         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11769     }
11770     DEBUG_P(PerlIO_printf(Perl_debug_log,
11771                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11772                           FPTR2DPTR(void *, S_utf16_textfilter),
11773                           reverse ? 'l' : 'b', idx, maxlen, status,
11774                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11775
11776     while (1) {
11777         STRLEN chars;
11778         STRLEN have;
11779         I32 newlen;
11780         U8 *end;
11781         /* First, look in our buffer of existing UTF-8 data:  */
11782         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11783
11784         if (nl) {
11785             ++nl;
11786         } else if (status == 0) {
11787             /* EOF */
11788             IoPAGE(filter) = 0;
11789             nl = SvEND(utf8_buffer);
11790         }
11791         if (nl) {
11792             STRLEN got = nl - SvPVX(utf8_buffer);
11793             /* Did we have anything to append?  */
11794             retval = got != 0;
11795             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11796             /* Everything else in this code works just fine if SVp_POK isn't
11797                set.  This, however, needs it, and we need it to work, else
11798                we loop infinitely because the buffer is never consumed.  */
11799             sv_chop(utf8_buffer, nl);
11800             break;
11801         }
11802
11803         /* OK, not a complete line there, so need to read some more UTF-16.
11804            Read an extra octect if the buffer currently has an odd number. */
11805         while (1) {
11806             if (status <= 0)
11807                 break;
11808             if (SvCUR(utf16_buffer) >= 2) {
11809                 /* Location of the high octet of the last complete code point.
11810                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11811                    *coupled* with all the benefits of partial reads and
11812                    endianness.  */
11813                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11814                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11815
11816                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11817                     break;
11818                 }
11819
11820                 /* We have the first half of a surrogate. Read more.  */
11821                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11822             }
11823
11824             status = FILTER_READ(idx + 1, utf16_buffer,
11825                                  160 + (SvCUR(utf16_buffer) & 1));
11826             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11827             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11828             if (status < 0) {
11829                 /* Error */
11830                 IoPAGE(filter) = status;
11831                 return status;
11832             }
11833         }
11834
11835         chars = SvCUR(utf16_buffer) >> 1;
11836         have = SvCUR(utf8_buffer);
11837         SvGROW(utf8_buffer, have + chars * 3 + 1);
11838
11839         if (reverse) {
11840             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11841                                          (U8*)SvPVX_const(utf8_buffer) + have,
11842                                          chars * 2, &newlen);
11843         } else {
11844             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11845                                 (U8*)SvPVX_const(utf8_buffer) + have,
11846                                 chars * 2, &newlen);
11847         }
11848         SvCUR_set(utf8_buffer, have + newlen);
11849         *end = '\0';
11850
11851         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11852            it's private to us, and utf16_to_utf8{,reversed} take a
11853            (pointer,length) pair, rather than a NUL-terminated string.  */
11854         if(SvCUR(utf16_buffer) & 1) {
11855             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11856             SvCUR_set(utf16_buffer, 1);
11857         } else {
11858             SvCUR_set(utf16_buffer, 0);
11859         }
11860     }
11861     DEBUG_P(PerlIO_printf(Perl_debug_log,
11862                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11863                           status,
11864                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11865     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11866     return retval;
11867 }
11868
11869 static U8 *
11870 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11871 {
11872     SV *filter = filter_add(S_utf16_textfilter, NULL);
11873
11874     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11875
11876     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11877     sv_setpvs(filter, "");
11878     IoLINES(filter) = reversed;
11879     IoPAGE(filter) = 1; /* Not EOF */
11880
11881     /* Sadly, we have to return a valid pointer, come what may, so we have to
11882        ignore any error return from this.  */
11883     SvCUR_set(PL_linestr, 0);
11884     if (FILTER_READ(0, PL_linestr, 0)) {
11885         SvUTF8_on(PL_linestr);
11886     } else {
11887         SvUTF8_on(PL_linestr);
11888     }
11889     PL_bufend = SvEND(PL_linestr);
11890     return (U8*)SvPVX(PL_linestr);
11891 }
11892 #endif
11893
11894 /*
11895 Returns a pointer to the next character after the parsed
11896 vstring, as well as updating the passed in sv.
11897
11898 Function must be called like
11899
11900         sv = sv_2mortal(newSV(5));
11901         s = scan_vstring(s,e,sv);
11902
11903 where s and e are the start and end of the string.
11904 The sv should already be large enough to store the vstring
11905 passed in, for performance reasons.
11906
11907 This function may croak if fatal warnings are enabled in the
11908 calling scope, hence the sv_2mortal in the example (to prevent
11909 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11910 sv_2mortal.
11911
11912 */
11913
11914 char *
11915 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11916 {
11917     dVAR;
11918     const char *pos = s;
11919     const char *start = s;
11920
11921     PERL_ARGS_ASSERT_SCAN_VSTRING;
11922
11923     if (*pos == 'v') pos++;  /* get past 'v' */
11924     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11925         pos++;
11926     if ( *pos != '.') {
11927         /* this may not be a v-string if followed by => */
11928         const char *next = pos;
11929         while (next < e && isSPACE(*next))
11930             ++next;
11931         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11932             /* return string not v-string */
11933             sv_setpvn(sv,(char *)s,pos-s);
11934             return (char *)pos;
11935         }
11936     }
11937
11938     if (!isALPHA(*pos)) {
11939         U8 tmpbuf[UTF8_MAXBYTES+1];
11940
11941         if (*s == 'v')
11942             s++;  /* get past 'v' */
11943
11944         sv_setpvs(sv, "");
11945
11946         for (;;) {
11947             /* this is atoi() that tolerates underscores */
11948             U8 *tmpend;
11949             UV rev = 0;
11950             const char *end = pos;
11951             UV mult = 1;
11952             while (--end >= s) {
11953                 if (*end != '_') {
11954                     const UV orev = rev;
11955                     rev += (*end - '0') * mult;
11956                     mult *= 10;
11957                     if (orev > rev)
11958                         /* diag_listed_as: Integer overflow in %s number */
11959                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11960                                          "Integer overflow in decimal number");
11961                 }
11962             }
11963 #ifdef EBCDIC
11964             if (rev > 0x7FFFFFFF)
11965                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11966 #endif
11967             /* Append native character for the rev point */
11968             tmpend = uvchr_to_utf8(tmpbuf, rev);
11969             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11970             if (!UVCHR_IS_INVARIANT(rev))
11971                  SvUTF8_on(sv);
11972             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11973                  s = ++pos;
11974             else {
11975                  s = pos;
11976                  break;
11977             }
11978             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11979                  pos++;
11980         }
11981         SvPOK_on(sv);
11982         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11983         SvRMAGICAL_on(sv);
11984     }
11985     return (char *)s;
11986 }
11987
11988 int
11989 Perl_keyword_plugin_standard(pTHX_
11990         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11991 {
11992     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11993     PERL_UNUSED_CONTEXT;
11994     PERL_UNUSED_ARG(keyword_ptr);
11995     PERL_UNUSED_ARG(keyword_len);
11996     PERL_UNUSED_ARG(op_ptr);
11997     return KEYWORD_PLUGIN_DECLINE;
11998 }
11999
12000 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12001 static void
12002 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12003 {
12004     SAVEI32(PL_lex_brackets);
12005     if (PL_lex_brackets > 100)
12006         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12007     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12008     SAVEI32(PL_lex_allbrackets);
12009     PL_lex_allbrackets = 0;
12010     SAVEI8(PL_lex_fakeeof);
12011     PL_lex_fakeeof = (U8)fakeeof;
12012     if(yyparse(gramtype) && !PL_parser->error_count)
12013         qerror(Perl_mess(aTHX_ "Parse error"));
12014 }
12015
12016 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12017 static OP *
12018 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12019 {
12020     OP *o;
12021     ENTER;
12022     SAVEVPTR(PL_eval_root);
12023     PL_eval_root = NULL;
12024     parse_recdescent(gramtype, fakeeof);
12025     o = PL_eval_root;
12026     LEAVE;
12027     return o;
12028 }
12029
12030 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12031 static OP *
12032 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12033 {
12034     OP *exprop;
12035     if (flags & ~PARSE_OPTIONAL)
12036         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12037     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12038     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12039         if (!PL_parser->error_count)
12040             qerror(Perl_mess(aTHX_ "Parse error"));
12041         exprop = newOP(OP_NULL, 0);
12042     }
12043     return exprop;
12044 }
12045
12046 /*
12047 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12048
12049 Parse a Perl arithmetic expression.  This may contain operators of precedence
12050 down to the bit shift operators.  The expression must be followed (and thus
12051 terminated) either by a comparison or lower-precedence operator or by
12052 something that would normally terminate an expression such as semicolon.
12053 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12054 otherwise it is mandatory.  It is up to the caller to ensure that the
12055 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12056 the source of the code to be parsed and the lexical context for the
12057 expression.
12058
12059 The op tree representing the expression is returned.  If an optional
12060 expression is absent, a null pointer is returned, otherwise the pointer
12061 will be non-null.
12062
12063 If an error occurs in parsing or compilation, in most cases a valid op
12064 tree is returned anyway.  The error is reflected in the parser state,
12065 normally resulting in a single exception at the top level of parsing
12066 which covers all the compilation errors that occurred.  Some compilation
12067 errors, however, will throw an exception immediately.
12068
12069 =cut
12070 */
12071
12072 OP *
12073 Perl_parse_arithexpr(pTHX_ U32 flags)
12074 {
12075     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12076 }
12077
12078 /*
12079 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12080
12081 Parse a Perl term expression.  This may contain operators of precedence
12082 down to the assignment operators.  The expression must be followed (and thus
12083 terminated) either by a comma or lower-precedence operator or by
12084 something that would normally terminate an expression such as semicolon.
12085 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12086 otherwise it is mandatory.  It is up to the caller to ensure that the
12087 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12088 the source of the code to be parsed and the lexical context for the
12089 expression.
12090
12091 The op tree representing the expression is returned.  If an optional
12092 expression is absent, a null pointer is returned, otherwise the pointer
12093 will be non-null.
12094
12095 If an error occurs in parsing or compilation, in most cases a valid op
12096 tree is returned anyway.  The error is reflected in the parser state,
12097 normally resulting in a single exception at the top level of parsing
12098 which covers all the compilation errors that occurred.  Some compilation
12099 errors, however, will throw an exception immediately.
12100
12101 =cut
12102 */
12103
12104 OP *
12105 Perl_parse_termexpr(pTHX_ U32 flags)
12106 {
12107     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12108 }
12109
12110 /*
12111 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12112
12113 Parse a Perl list expression.  This may contain operators of precedence
12114 down to the comma operator.  The expression must be followed (and thus
12115 terminated) either by a low-precedence logic operator such as C<or> or by
12116 something that would normally terminate an expression such as semicolon.
12117 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12118 otherwise it is mandatory.  It is up to the caller to ensure that the
12119 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12120 the source of the code to be parsed and the lexical context for the
12121 expression.
12122
12123 The op tree representing the expression is returned.  If an optional
12124 expression is absent, a null pointer is returned, otherwise the pointer
12125 will be non-null.
12126
12127 If an error occurs in parsing or compilation, in most cases a valid op
12128 tree is returned anyway.  The error is reflected in the parser state,
12129 normally resulting in a single exception at the top level of parsing
12130 which covers all the compilation errors that occurred.  Some compilation
12131 errors, however, will throw an exception immediately.
12132
12133 =cut
12134 */
12135
12136 OP *
12137 Perl_parse_listexpr(pTHX_ U32 flags)
12138 {
12139     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12140 }
12141
12142 /*
12143 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12144
12145 Parse a single complete Perl expression.  This allows the full
12146 expression grammar, including the lowest-precedence operators such
12147 as C<or>.  The expression must be followed (and thus terminated) by a
12148 token that an expression would normally be terminated by: end-of-file,
12149 closing bracketing punctuation, semicolon, or one of the keywords that
12150 signals a postfix expression-statement modifier.  If I<flags> includes
12151 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12152 mandatory.  It is up to the caller to ensure that the dynamic parser
12153 state (L</PL_parser> et al) is correctly set to reflect the source of
12154 the code to be parsed and the lexical context for the expression.
12155
12156 The op tree representing the expression is returned.  If an optional
12157 expression is absent, a null pointer is returned, otherwise the pointer
12158 will be non-null.
12159
12160 If an error occurs in parsing or compilation, in most cases a valid op
12161 tree is returned anyway.  The error is reflected in the parser state,
12162 normally resulting in a single exception at the top level of parsing
12163 which covers all the compilation errors that occurred.  Some compilation
12164 errors, however, will throw an exception immediately.
12165
12166 =cut
12167 */
12168
12169 OP *
12170 Perl_parse_fullexpr(pTHX_ U32 flags)
12171 {
12172     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12173 }
12174
12175 /*
12176 =for apidoc Amx|OP *|parse_block|U32 flags
12177
12178 Parse a single complete Perl code block.  This consists of an opening
12179 brace, a sequence of statements, and a closing brace.  The block
12180 constitutes a lexical scope, so C<my> variables and various compile-time
12181 effects can be contained within it.  It is up to the caller to ensure
12182 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12183 reflect the source of the code to be parsed and the lexical context for
12184 the statement.
12185
12186 The op tree representing the code block is returned.  This is always a
12187 real op, never a null pointer.  It will normally be a C<lineseq> list,
12188 including C<nextstate> or equivalent ops.  No ops to construct any kind
12189 of runtime scope are included by virtue of it being a block.
12190
12191 If an error occurs in parsing or compilation, in most cases a valid op
12192 tree (most likely null) is returned anyway.  The error is reflected in
12193 the parser state, normally resulting in a single exception at the top
12194 level of parsing which covers all the compilation errors that occurred.
12195 Some compilation errors, however, will throw an exception immediately.
12196
12197 The I<flags> parameter is reserved for future use, and must always
12198 be zero.
12199
12200 =cut
12201 */
12202
12203 OP *
12204 Perl_parse_block(pTHX_ U32 flags)
12205 {
12206     if (flags)
12207         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12208     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12209 }
12210
12211 /*
12212 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12213
12214 Parse a single unadorned Perl statement.  This may be a normal imperative
12215 statement or a declaration that has compile-time effect.  It does not
12216 include any label or other affixture.  It is up to the caller to ensure
12217 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12218 reflect the source of the code to be parsed and the lexical context for
12219 the statement.
12220
12221 The op tree representing the statement is returned.  This may be a
12222 null pointer if the statement is null, for example if it was actually
12223 a subroutine definition (which has compile-time side effects).  If not
12224 null, it will be ops directly implementing the statement, suitable to
12225 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12226 equivalent op (except for those embedded in a scope contained entirely
12227 within the statement).
12228
12229 If an error occurs in parsing or compilation, in most cases a valid op
12230 tree (most likely null) is returned anyway.  The error is reflected in
12231 the parser state, normally resulting in a single exception at the top
12232 level of parsing which covers all the compilation errors that occurred.
12233 Some compilation errors, however, will throw an exception immediately.
12234
12235 The I<flags> parameter is reserved for future use, and must always
12236 be zero.
12237
12238 =cut
12239 */
12240
12241 OP *
12242 Perl_parse_barestmt(pTHX_ U32 flags)
12243 {
12244     if (flags)
12245         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12246     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12247 }
12248
12249 /*
12250 =for apidoc Amx|SV *|parse_label|U32 flags
12251
12252 Parse a single label, possibly optional, of the type that may prefix a
12253 Perl statement.  It is up to the caller to ensure that the dynamic parser
12254 state (L</PL_parser> et al) is correctly set to reflect the source of
12255 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12256 label is optional, otherwise it is mandatory.
12257
12258 The name of the label is returned in the form of a fresh scalar.  If an
12259 optional label is absent, a null pointer is returned.
12260
12261 If an error occurs in parsing, which can only occur if the label is
12262 mandatory, a valid label is returned anyway.  The error is reflected in
12263 the parser state, normally resulting in a single exception at the top
12264 level of parsing which covers all the compilation errors that occurred.
12265
12266 =cut
12267 */
12268
12269 SV *
12270 Perl_parse_label(pTHX_ U32 flags)
12271 {
12272     if (flags & ~PARSE_OPTIONAL)
12273         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12274     if (PL_lex_state == LEX_KNOWNEXT) {
12275         PL_parser->yychar = yylex();
12276         if (PL_parser->yychar == LABEL) {
12277             char * const lpv = pl_yylval.pval;
12278             STRLEN llen = strlen(lpv);
12279             PL_parser->yychar = YYEMPTY;
12280             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12281         } else {
12282             yyunlex();
12283             goto no_label;
12284         }
12285     } else {
12286         char *s, *t;
12287         STRLEN wlen, bufptr_pos;
12288         lex_read_space(0);
12289         t = s = PL_bufptr;
12290         if (!isIDFIRST_lazy_if(s, UTF))
12291             goto no_label;
12292         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12293         if (word_takes_any_delimeter(s, wlen))
12294             goto no_label;
12295         bufptr_pos = s - SvPVX(PL_linestr);
12296         PL_bufptr = t;
12297         lex_read_space(LEX_KEEP_PREVIOUS);
12298         t = PL_bufptr;
12299         s = SvPVX(PL_linestr) + bufptr_pos;
12300         if (t[0] == ':' && t[1] != ':') {
12301             PL_oldoldbufptr = PL_oldbufptr;
12302             PL_oldbufptr = s;
12303             PL_bufptr = t+1;
12304             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12305         } else {
12306             PL_bufptr = s;
12307             no_label:
12308             if (flags & PARSE_OPTIONAL) {
12309                 return NULL;
12310             } else {
12311                 qerror(Perl_mess(aTHX_ "Parse error"));
12312                 return newSVpvs("x");
12313             }
12314         }
12315     }
12316 }
12317
12318 /*
12319 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12320
12321 Parse a single complete Perl statement.  This may be a normal imperative
12322 statement or a declaration that has compile-time effect, and may include
12323 optional labels.  It is up to the caller to ensure that the dynamic
12324 parser state (L</PL_parser> et al) is correctly set to reflect the source
12325 of the code to be parsed and the lexical context for the statement.
12326
12327 The op tree representing the statement is returned.  This may be a
12328 null pointer if the statement is null, for example if it was actually
12329 a subroutine definition (which has compile-time side effects).  If not
12330 null, it will be the result of a L</newSTATEOP> call, normally including
12331 a C<nextstate> or equivalent op.
12332
12333 If an error occurs in parsing or compilation, in most cases a valid op
12334 tree (most likely null) is returned anyway.  The error is reflected in
12335 the parser state, normally resulting in a single exception at the top
12336 level of parsing which covers all the compilation errors that occurred.
12337 Some compilation errors, however, will throw an exception immediately.
12338
12339 The I<flags> parameter is reserved for future use, and must always
12340 be zero.
12341
12342 =cut
12343 */
12344
12345 OP *
12346 Perl_parse_fullstmt(pTHX_ U32 flags)
12347 {
12348     if (flags)
12349         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12350     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12351 }
12352
12353 /*
12354 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12355
12356 Parse a sequence of zero or more Perl statements.  These may be normal
12357 imperative statements, including optional labels, or declarations
12358 that have compile-time effect, or any mixture thereof.  The statement
12359 sequence ends when a closing brace or end-of-file is encountered in a
12360 place where a new statement could have validly started.  It is up to
12361 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12362 is correctly set to reflect the source of the code to be parsed and the
12363 lexical context for the statements.
12364
12365 The op tree representing the statement sequence is returned.  This may
12366 be a null pointer if the statements were all null, for example if there
12367 were no statements or if there were only subroutine definitions (which
12368 have compile-time side effects).  If not null, it will be a C<lineseq>
12369 list, normally including C<nextstate> or equivalent ops.
12370
12371 If an error occurs in parsing or compilation, in most cases a valid op
12372 tree is returned anyway.  The error is reflected in the parser state,
12373 normally resulting in a single exception at the top level of parsing
12374 which covers all the compilation errors that occurred.  Some compilation
12375 errors, however, will throw an exception immediately.
12376
12377 The I<flags> parameter is reserved for future use, and must always
12378 be zero.
12379
12380 =cut
12381 */
12382
12383 OP *
12384 Perl_parse_stmtseq(pTHX_ U32 flags)
12385 {
12386     OP *stmtseqop;
12387     I32 c;
12388     if (flags)
12389         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12390     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12391     c = lex_peek_unichar(0);
12392     if (c != -1 && c != /*{*/'}')
12393         qerror(Perl_mess(aTHX_ "Parse error"));
12394     return stmtseqop;
12395 }
12396
12397 #define lex_token_boundary() S_lex_token_boundary(aTHX)
12398 static void
12399 S_lex_token_boundary(pTHX)
12400 {
12401     PL_oldoldbufptr = PL_oldbufptr;
12402     PL_oldbufptr = PL_bufptr;
12403 }
12404
12405 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
12406 static OP *
12407 S_parse_opt_lexvar(pTHX)
12408 {
12409     I32 sigil, c;
12410     char *s, *d;
12411     OP *var;
12412     lex_token_boundary();
12413     sigil = lex_read_unichar(0);
12414     if (lex_peek_unichar(0) == '#') {
12415         qerror(Perl_mess(aTHX_ "Parse error"));
12416         return NULL;
12417     }
12418     lex_read_space(0);
12419     c = lex_peek_unichar(0);
12420     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
12421         return NULL;
12422     s = PL_bufptr;
12423     d = PL_tokenbuf + 1;
12424     PL_tokenbuf[0] = (char)sigil;
12425     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
12426     PL_bufptr = s;
12427     if (d == PL_tokenbuf+1)
12428         return NULL;
12429     *d = 0;
12430     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
12431                 OPf_MOD | (OPpLVAL_INTRO<<8));
12432     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
12433     return var;
12434 }
12435
12436 OP *
12437 Perl_parse_subsignature(pTHX)
12438 {
12439     I32 c;
12440     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
12441     OP *initops = NULL;
12442     lex_read_space(0);
12443     c = lex_peek_unichar(0);
12444     while (c != /*(*/')') {
12445         switch (c) {
12446             case '$': {
12447                 OP *var, *expr;
12448                 if (prev_type == 2)
12449                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12450                 var = parse_opt_lexvar();
12451                 expr = var ?
12452                     newBINOP(OP_AELEM, 0,
12453                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
12454                             OP_RV2AV),
12455                         newSVOP(OP_CONST, 0, newSViv(pos))) :
12456                     NULL;
12457                 lex_read_space(0);
12458                 c = lex_peek_unichar(0);
12459                 if (c == '=') {
12460                     lex_token_boundary();
12461                     lex_read_unichar(0);
12462                     lex_read_space(0);
12463                     c = lex_peek_unichar(0);
12464                     if (c == ',' || c == /*(*/')') {
12465                         if (var)
12466                             qerror(Perl_mess(aTHX_ "Optional parameter "
12467                                     "lacks default expression"));
12468                     } else {
12469                         OP *defexpr = parse_termexpr(0);
12470                         if (defexpr->op_type == OP_UNDEF &&
12471                                 !(defexpr->op_flags & OPf_KIDS)) {
12472                             op_free(defexpr);
12473                         } else {
12474                             OP *ifop =
12475                                 newBINOP(OP_GE, 0,
12476                                     scalar(newUNOP(OP_RV2AV, 0,
12477                                             newGVOP(OP_GV, 0, PL_defgv))),
12478                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
12479                             expr = var ?
12480                                 newCONDOP(0, ifop, expr, defexpr) :
12481                                 newLOGOP(OP_OR, 0, ifop, defexpr);
12482                         }
12483                     }
12484                     prev_type = 1;
12485                 } else {
12486                     if (prev_type == 1)
12487                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
12488                                 "follows optional parameter"));
12489                     prev_type = 0;
12490                     min_arity = pos + 1;
12491                 }
12492                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
12493                 if (expr)
12494                     initops = op_append_list(OP_LINESEQ, initops,
12495                                 newSTATEOP(0, NULL, expr));
12496                 max_arity = ++pos;
12497             } break;
12498             case '@':
12499             case '%': {
12500                 OP *var;
12501                 if (prev_type == 2)
12502                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12503                 var = parse_opt_lexvar();
12504                 if (c == '%') {
12505                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
12506                             newBINOP(OP_BIT_AND, 0,
12507                                 scalar(newUNOP(OP_RV2AV, 0,
12508                                     newGVOP(OP_GV, 0, PL_defgv))),
12509                                 newSVOP(OP_CONST, 0, newSViv(1))),
12510                             newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12511                                 newSVOP(OP_CONST, 0,
12512                                     newSVpvs("Odd name/value argument "
12513                                         "for subroutine"))));
12514                     if (pos != min_arity)
12515                         chkop = newLOGOP(OP_AND, 0,
12516                                     newBINOP(OP_GT, 0,
12517                                         scalar(newUNOP(OP_RV2AV, 0,
12518                                             newGVOP(OP_GV, 0, PL_defgv))),
12519                                         newSVOP(OP_CONST, 0, newSViv(pos))),
12520                                     chkop);
12521                     initops = op_append_list(OP_LINESEQ,
12522                                 newSTATEOP(0, NULL, chkop),
12523                                 initops);
12524                 }
12525                 if (var) {
12526                     OP *slice = pos ?
12527                         op_prepend_elem(OP_ASLICE,
12528                             newOP(OP_PUSHMARK, 0),
12529                             newLISTOP(OP_ASLICE, 0,
12530                                 list(newRANGE(0,
12531                                     newSVOP(OP_CONST, 0, newSViv(pos)),
12532                                     newUNOP(OP_AV2ARYLEN, 0,
12533                                         ref(newUNOP(OP_RV2AV, 0,
12534                                                 newGVOP(OP_GV, 0, PL_defgv)),
12535                                             OP_AV2ARYLEN)))),
12536                                 ref(newUNOP(OP_RV2AV, 0,
12537                                         newGVOP(OP_GV, 0, PL_defgv)),
12538                                     OP_ASLICE))) :
12539                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12540                     initops = op_append_list(OP_LINESEQ, initops,
12541                         newSTATEOP(0, NULL,
12542                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
12543                 }
12544                 prev_type = 2;
12545                 max_arity = -1;
12546             } break;
12547             default:
12548                 parse_error:
12549                 qerror(Perl_mess(aTHX_ "Parse error"));
12550                 return NULL;
12551         }
12552         lex_read_space(0);
12553         c = lex_peek_unichar(0);
12554         switch (c) {
12555             case /*(*/')': break;
12556             case ',':
12557                 do {
12558                     lex_token_boundary();
12559                     lex_read_unichar(0);
12560                     lex_read_space(0);
12561                     c = lex_peek_unichar(0);
12562                 } while (c == ',');
12563                 break;
12564             default:
12565                 goto parse_error;
12566         }
12567     }
12568     if (min_arity != 0) {
12569         initops = op_append_list(OP_LINESEQ,
12570             newSTATEOP(0, NULL,
12571                 newLOGOP(OP_OR, 0,
12572                     newBINOP(OP_GE, 0,
12573                         scalar(newUNOP(OP_RV2AV, 0,
12574                             newGVOP(OP_GV, 0, PL_defgv))),
12575                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
12576                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12577                         newSVOP(OP_CONST, 0,
12578                             newSVpvs("Too few arguments for subroutine"))))),
12579             initops);
12580     }
12581     if (max_arity != -1) {
12582         initops = op_append_list(OP_LINESEQ,
12583             newSTATEOP(0, NULL,
12584                 newLOGOP(OP_OR, 0,
12585                     newBINOP(OP_LE, 0,
12586                         scalar(newUNOP(OP_RV2AV, 0,
12587                             newGVOP(OP_GV, 0, PL_defgv))),
12588                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
12589                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12590                         newSVOP(OP_CONST, 0,
12591                             newSVpvs("Too many arguments for subroutine"))))),
12592             initops);
12593     }
12594     return initops;
12595 }
12596
12597 /*
12598  * Local variables:
12599  * c-indentation-style: bsd
12600  * c-basic-offset: 4
12601  * indent-tabs-mode: nil
12602  * End:
12603  *
12604  * ex: set ts=8 sts=4 sw=4 et:
12605  */