toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #include "keywords.h"
 186
 187 /* CLINE is a macro that ensures PL_copline has a sane value */
 188
 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 190
 191 #ifdef PERL_MAD
 192 #  define SKIPSPACE0(s) skipspace0(s)
 193 #  define SKIPSPACE1(s) skipspace1(s)
 194 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 195 #  define PEEKSPACE(s) skipspace2(s,0)
 196 #else
 197 #  define SKIPSPACE0(s) skipspace(s)
 198 #  define SKIPSPACE1(s) skipspace(s)
 199 #  define SKIPSPACE2(s,tsv) skipspace(s)
 200 #  define PEEKSPACE(s) skipspace(s)
 201 #endif
 202
 203 /*
 204  * Convenience functions to return different tokens and prime the
 205  * lexer for the next token.  They all take an argument.
 206  *
 207  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 208  * OPERATOR     : generic operator
 209  * AOPERATOR    : assignment operator
 210  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 211  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 212  * PREREF       : *EXPR where EXPR is not a simple identifier
 213  * TERM         : expression term
 214  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 215  * LOOPX        : loop exiting command (goto, last, dump, etc)
 216  * FTST         : file test operator
 217  * FUN0         : zero-argument function
 218  * FUN0OP       : zero-argument function, with its op created in this file
 219  * FUN1         : not used, except for not, which isn't a UNIOP
 220  * BOop         : bitwise or or xor
 221  * BAop         : bitwise and
 222  * SHop         : shift operator
 223  * PWop         : power operator
 224  * PMop         : pattern-matching operator
 225  * Aop          : addition-level operator
 226  * Mop          : multiplication-level operator
 227  * Eop          : equality-testing operator
 228  * Rop          : relational operator <= != gt
 229  *
 230  * Also see LOP and lop() below.
 231  */
 232
 233 #ifdef DEBUGGING /* Serve -DT. */
 234 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 235 #else
 236 #   define REPORT(retval) (retval)
 237 #endif
 238
 239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 248 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 249 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 250 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 251 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 252 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 253 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 254 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 255 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 256 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 257 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 258 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 259 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 260 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 261
 262 /* This bit of chicanery makes a unary function followed by
 263  * a parenthesis into a function with one argument, highest precedence.
 264  * The UNIDOR macro is for unary functions that can be followed by the //
 265  * operator (such as C<shift // 0>).
 266  */
 267 #define UNI3(f,x,have_x) { \
 268         pl_yylval.ival = f; \
 269         if (have_x) PL_expect = x; \
 270         PL_bufptr = s; \
 271         PL_last_uni = PL_oldbufptr; \
 272         PL_last_lop_op = f; \
 273         if (*s == '(') \
 274             return REPORT( (int)FUNC1 ); \
 275         s = PEEKSPACE(s); \
 276         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 277         }
 278 #define UNI(f)    UNI3(f,XTERM,1)
 279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 280 #define UNIPROTO(f,optional) { \
 281         if (optional) PL_last_uni = PL_oldbufptr; \
 282         OPERATOR(f); \
 283         }
 284
 285 #define UNIBRACK(f) UNI3(f,0,0)
 286
 287 /* grandfather return to old style */
 288 #define OLDLOP(f) \
 289         do { \
 290             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 291                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 292             pl_yylval.ival = (f); \
 293             PL_expect = XTERM; \
 294             PL_bufptr = s; \
 295             return (int)LSTOP; \
 296         } while(0)
 297
 298 #define COPLINE_INC_WITH_HERELINES                  \
 299     STMT_START {                                     \
 300         CopLINE_inc(PL_curcop);                       \
 301         if (PL_parser->herelines)                      \
 302             CopLINE(PL_curcop) += PL_parser->herelines, \
 303             PL_parser->herelines = 0;                    \
 304     } STMT_END
 305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 306  * is no sublex_push to follow. */
 307 #define COPLINE_SET_FROM_MULTI_END            \
 308     STMT_START {                               \
 309         CopLINE_set(PL_curcop, PL_multi_end);   \
 310         if (PL_multi_end != PL_multi_start)      \
 311             PL_parser->herelines = 0;             \
 312     } STMT_END
 313
 314
 315 #ifdef DEBUGGING
 316
 317 /* how to interpret the pl_yylval associated with the token */
 318 enum token_type {
 319     TOKENTYPE_NONE,
 320     TOKENTYPE_IVAL,
 321     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 322     TOKENTYPE_PVAL,
 323     TOKENTYPE_OPVAL
 324 };
 325
 326 static struct debug_tokens {
 327     const int token;
 328     enum token_type type;
 329     const char *name;
 330 } const debug_tokens[] =
 331 {
 332     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 333     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 334     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 335     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 336     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 337     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 338     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 339     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 340     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 341     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 342     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 343     { DO,               TOKENTYPE_NONE,         "DO" },
 344     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 345     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 346     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 347     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 348     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 349     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 350     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 351     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 352     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 353     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 354     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 355     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 356     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 357     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 358     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 359     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 360     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 361     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 362     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 363     { IF,               TOKENTYPE_IVAL,         "IF" },
 364     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 365     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 366     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 367     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 368     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 369     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 370     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 371     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 372     { MY,               TOKENTYPE_IVAL,         "MY" },
 373     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 374     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 375     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 376     { OROR,             TOKENTYPE_NONE,         "OROR" },
 377     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 378     { PEG,              TOKENTYPE_NONE,         "PEG" },
 379     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 380     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 381     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 382     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 383     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 384     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 385     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 386     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 387     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 388     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 389     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 390     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 391     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 392     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 393     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 394     { SUB,              TOKENTYPE_NONE,         "SUB" },
 395     { THING,            TOKENTYPE_OPVAL,        "THING" },
 396     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 397     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 398     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 399     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 400     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 401     { USE,              TOKENTYPE_IVAL,         "USE" },
 402     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 403     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 404     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 405     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 406     { 0,                TOKENTYPE_NONE,         NULL }
 407 };
 408
 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 410
 411 STATIC int
 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 413 {
 414     dVAR;
 415
 416     PERL_ARGS_ASSERT_TOKEREPORT;
 417
 418     if (DEBUG_T_TEST) {
 419         const char *name = NULL;
 420         enum token_type type = TOKENTYPE_NONE;
 421         const struct debug_tokens *p;
 422         SV* const report = newSVpvs("<== ");
 423
 424         for (p = debug_tokens; p->token; p++) {
 425             if (p->token == (int)rv) {
 426                 name = p->name;
 427                 type = p->type;
 428                 break;
 429             }
 430         }
 431         if (name)
 432             Perl_sv_catpv(aTHX_ report, name);
 433         else if ((char)rv > ' ' && (char)rv <= '~')
 434         {
 435             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 436             if ((char)rv == 'p')
 437                 sv_catpvs(report, " (pending identifier)");
 438         }
 439         else if (!rv)
 440             sv_catpvs(report, "EOF");
 441         else
 442             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 443         switch (type) {
 444         case TOKENTYPE_NONE:
 445             break;
 446         case TOKENTYPE_IVAL:
 447             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 448             break;
 449         case TOKENTYPE_OPNUM:
 450             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 451                                     PL_op_name[lvalp->ival]);
 452             break;
 453         case TOKENTYPE_PVAL:
 454             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 455             break;
 456         case TOKENTYPE_OPVAL:
 457             if (lvalp->opval) {
 458                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 459                                     PL_op_name[lvalp->opval->op_type]);
 460                 if (lvalp->opval->op_type == OP_CONST) {
 461                     Perl_sv_catpvf(aTHX_ report, " %s",
 462                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 463                 }
 464
 465             }
 466             else
 467                 sv_catpvs(report, "(opval=null)");
 468             break;
 469         }
 470         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 471     };
 472     return (int)rv;
 473 }
 474
 475
 476 /* print the buffer with suitable escapes */
 477
 478 STATIC void
 479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 480 {
 481     SV* const tmp = newSVpvs("");
 482
 483     PERL_ARGS_ASSERT_PRINTBUF;
 484
 485     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 486     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 487     GCC_DIAG_RESTORE;
 488     SvREFCNT_dec(tmp);
 489 }
 490
 491 #endif
 492
 493 static int
 494 S_deprecate_commaless_var_list(pTHX) {
 495     PL_expect = XTERM;
 496     deprecate("comma-less variable list");
 497     return REPORT(','); /* grandfather non-comma-format format */
 498 }
 499
 500 /*
 501  * S_ao
 502  *
 503  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 504  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 505  */
 506
 507 STATIC int
 508 S_ao(pTHX_ int toketype)
 509 {
 510     dVAR;
 511     if (*PL_bufptr == '=') {
 512         PL_bufptr++;
 513         if (toketype == ANDAND)
 514             pl_yylval.ival = OP_ANDASSIGN;
 515         else if (toketype == OROR)
 516             pl_yylval.ival = OP_ORASSIGN;
 517         else if (toketype == DORDOR)
 518             pl_yylval.ival = OP_DORASSIGN;
 519         toketype = ASSIGNOP;
 520     }
 521     return toketype;
 522 }
 523
 524 /*
 525  * S_no_op
 526  * When Perl expects an operator and finds something else, no_op
 527  * prints the warning.  It always prints "<something> found where
 528  * operator expected.  It prints "Missing semicolon on previous line?"
 529  * if the surprise occurs at the start of the line.  "do you need to
 530  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 531  * where the compiler doesn't know if foo is a method call or a function.
 532  * It prints "Missing operator before end of line" if there's nothing
 533  * after the missing operator, or "... before <...>" if there is something
 534  * after the missing operator.
 535  */
 536
 537 STATIC void
 538 S_no_op(pTHX_ const char *const what, char *s)
 539 {
 540     dVAR;
 541     char * const oldbp = PL_bufptr;
 542     const bool is_first = (PL_oldbufptr == PL_linestart);
 543
 544     PERL_ARGS_ASSERT_NO_OP;
 545
 546     if (!s)
 547         s = oldbp;
 548     else
 549         PL_bufptr = s;
 550     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 551     if (ckWARN_d(WARN_SYNTAX)) {
 552         if (is_first)
 553             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 554                     "\t(Missing semicolon on previous line?)\n");
 555         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 556             const char *t;
 557             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 558                                                             t += UTF ? UTF8SKIP(t) : 1)
 559                 NOOP;
 560             if (t < PL_bufptr && isSPACE(*t))
 561                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 562                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 563                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 564         }
 565         else {
 566             assert(s >= oldbp);
 567             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 568                     "\t(Missing operator before %"UTF8f"?)\n",
 569                      UTF8fARG(UTF, s - oldbp, oldbp));
 570         }
 571     }
 572     PL_bufptr = oldbp;
 573 }
 574
 575 /*
 576  * S_missingterm
 577  * Complain about missing quote/regexp/heredoc terminator.
 578  * If it's called with NULL then it cauterizes the line buffer.
 579  * If we're in a delimited string and the delimiter is a control
 580  * character, it's reformatted into a two-char sequence like ^C.
 581  * This is fatal.
 582  */
 583
 584 STATIC void
 585 S_missingterm(pTHX_ char *s)
 586 {
 587     dVAR;
 588     char tmpbuf[3];
 589     char q;
 590     if (s) {
 591         char * const nl = strrchr(s,'\n');
 592         if (nl)
 593             *nl = '\0';
 594     }
 595     else if ((U8) PL_multi_close < 32) {
 596         *tmpbuf = '^';
 597         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 598         tmpbuf[2] = '\0';
 599         s = tmpbuf;
 600     }
 601     else {
 602         *tmpbuf = (char)PL_multi_close;
 603         tmpbuf[1] = '\0';
 604         s = tmpbuf;
 605     }
 606     q = strchr(s,'"') ? '\'' : '"';
 607     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 608 }
 609
 610 #include "feature.h"
 611
 612 /*
 613  * Check whether the named feature is enabled.
 614  */
 615 bool
 616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 617 {
 618     dVAR;
 619     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 620
 621     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 622
 623     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 624
 625     if (namelen > MAX_FEATURE_LEN)
 626         return FALSE;
 627     memcpy(&he_name[8], name, namelen);
 628
 629     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 630                                      REFCOUNTED_HE_EXISTS));
 631 }
 632
 633 /*
 634  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 635  * utf16-to-utf8-reversed.
 636  */
 637
 638 #ifdef PERL_CR_FILTER
 639 static void
 640 strip_return(SV *sv)
 641 {
 642     const char *s = SvPVX_const(sv);
 643     const char * const e = s + SvCUR(sv);
 644
 645     PERL_ARGS_ASSERT_STRIP_RETURN;
 646
 647     /* outer loop optimized to do nothing if there are no CR-LFs */
 648     while (s < e) {
 649         if (*s++ == '\r' && *s == '\n') {
 650             /* hit a CR-LF, need to copy the rest */
 651             char *d = s - 1;
 652             *d++ = *s++;
 653             while (s < e) {
 654                 if (*s == '\r' && s[1] == '\n')
 655                     s++;
 656                 *d++ = *s++;
 657             }
 658             SvCUR(sv) -= s - d;
 659             return;
 660         }
 661     }
 662 }
 663
 664 STATIC I32
 665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 666 {
 667     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 668     if (count > 0 && !maxlen)
 669         strip_return(sv);
 670     return count;
 671 }
 672 #endif
 673
 674 /*
 675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 676
 677 Creates and initialises a new lexer/parser state object, supplying
 678 a context in which to lex and parse from a new source of Perl code.
 679 A pointer to the new state object is placed in L</PL_parser>.  An entry
 680 is made on the save stack so that upon unwinding the new state object
 681 will be destroyed and the former value of L</PL_parser> will be restored.
 682 Nothing else need be done to clean up the parsing context.
 683
 684 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 685 non-null, provides a string (in SV form) containing code to be parsed.
 686 A copy of the string is made, so subsequent modification of I<line>
 687 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 688 from which code will be read to be parsed.  If both are non-null, the
 689 code in I<line> comes first and must consist of complete lines of input,
 690 and I<rsfp> supplies the remainder of the source.
 691
 692 The I<flags> parameter is reserved for future use.  Currently it is only
 693 used by perl internally, so extensions should always pass zero.
 694
 695 =cut
 696 */
 697
 698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 699    can share filters with the current parser.
 700    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 701    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 702    destruction. This is used to handle the case of defaulting to reading the
 703    script from the standard input because no filename was given on the command
 704    line (without getting confused by situation where STDIN has been closed, so
 705    the script handle is opened on fd 0)  */
 706
 707 void
 708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 709 {
 710     dVAR;
 711     const char *s = NULL;
 712     yy_parser *parser, *oparser;
 713     if (flags && flags & ~LEX_START_FLAGS)
 714         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 715
 716     /* create and initialise a parser */
 717
 718     Newxz(parser, 1, yy_parser);
 719     parser->old_parser = oparser = PL_parser;
 720     PL_parser = parser;
 721
 722     parser->stack = NULL;
 723     parser->ps = NULL;
 724     parser->stack_size = 0;
 725
 726     /* on scope exit, free this parser and restore any outer one */
 727     SAVEPARSER(parser);
 728     parser->saved_curcop = PL_curcop;
 729
 730     /* initialise lexer state */
 731
 732 #ifdef PERL_MAD
 733     parser->curforce = -1;
 734 #else
 735     parser->nexttoke = 0;
 736 #endif
 737     parser->error_count = oparser ? oparser->error_count : 0;
 738     parser->copline = parser->preambling = NOLINE;
 739     parser->lex_state = LEX_NORMAL;
 740     parser->expect = XSTATE;
 741     parser->rsfp = rsfp;
 742     parser->rsfp_filters =
 743       !(flags & LEX_START_SAME_FILTER) || !oparser
 744         ? NULL
 745         : MUTABLE_AV(SvREFCNT_inc(
 746             oparser->rsfp_filters
 747              ? oparser->rsfp_filters
 748              : (oparser->rsfp_filters = newAV())
 749           ));
 750
 751     Newx(parser->lex_brackstack, 120, char);
 752     Newx(parser->lex_casestack, 12, char);
 753     *parser->lex_casestack = '\0';
 754     Newxz(parser->lex_shared, 1, LEXSHARED);
 755
 756     if (line) {
 757         STRLEN len;
 758         s = SvPV_const(line, len);
 759         parser->linestr = flags & LEX_START_COPIED
 760                             ? SvREFCNT_inc_simple_NN(line)
 761                             : newSVpvn_flags(s, len, SvUTF8(line));
 762         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 763     } else {
 764         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 765     }
 766     parser->oldoldbufptr =
 767         parser->oldbufptr =
 768         parser->bufptr =
 769         parser->linestart = SvPVX(parser->linestr);
 770     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 771     parser->last_lop = parser->last_uni = NULL;
 772
 773     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 774                                                         |LEX_DONT_CLOSE_RSFP));
 775     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 776                                                         |LEX_DONT_CLOSE_RSFP));
 777
 778     parser->in_pod = parser->filtered = 0;
 779 }
 780
 781
 782 /* delete a parser object */
 783
 784 void
 785 Perl_parser_free(pTHX_  const yy_parser *parser)
 786 {
 787     PERL_ARGS_ASSERT_PARSER_FREE;
 788
 789     PL_curcop = parser->saved_curcop;
 790     SvREFCNT_dec(parser->linestr);
 791
 792     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 793         PerlIO_clearerr(parser->rsfp);
 794     else if (parser->rsfp && (!parser->old_parser ||
 795                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 796         PerlIO_close(parser->rsfp);
 797     SvREFCNT_dec(parser->rsfp_filters);
 798     SvREFCNT_dec(parser->lex_stuff);
 799     SvREFCNT_dec(parser->sublex_info.repl);
 800
 801     Safefree(parser->lex_brackstack);
 802     Safefree(parser->lex_casestack);
 803     Safefree(parser->lex_shared);
 804     PL_parser = parser->old_parser;
 805     Safefree(parser);
 806 }
 807
 808 void
 809 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 810 {
 811 #ifdef PERL_MAD
 812     I32 nexttoke = parser->lasttoke;
 813 #else
 814     I32 nexttoke = parser->nexttoke;
 815 #endif
 816     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 817     while (nexttoke--) {
 818 #ifdef PERL_MAD
 819         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 820                                 & 0xffff)
 821          && parser->nexttoke[nexttoke].next_val.opval
 822          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 823          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 824                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 825                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 826         }
 827 #else
 828         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 829          && parser->nextval[nexttoke].opval
 830          && parser->nextval[nexttoke].opval->op_slabbed
 831          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 832             op_free(parser->nextval[nexttoke].opval);
 833             parser->nextval[nexttoke].opval = NULL;
 834         }
 835 #endif
 836     }
 837 }
 838
 839
 840 /*
 841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 842
 843 Buffer scalar containing the chunk currently under consideration of the
 844 text currently being lexed.  This is always a plain string scalar (for
 845 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 846 normal scalar means; instead refer to the buffer directly by the pointer
 847 variables described below.
 848
 849 The lexer maintains various C<char*> pointers to things in the
 850 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 851 reallocated, all of these pointers must be updated.  Don't attempt to
 852 do this manually, but rather use L</lex_grow_linestr> if you need to
 853 reallocate the buffer.
 854
 855 The content of the text chunk in the buffer is commonly exactly one
 856 complete line of input, up to and including a newline terminator,
 857 but there are situations where it is otherwise.  The octets of the
 858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 859 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 860 flag on this scalar, which may disagree with it.
 861
 862 For direct examination of the buffer, the variable
 863 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 865 of these pointers is usually preferable to examination of the scalar
 866 through normal scalar means.
 867
 868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 869
 870 Direct pointer to the end of the chunk of text currently being lexed, the
 871 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 872 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 873 always located at the end of the buffer, and does not count as part of
 874 the buffer's contents.
 875
 876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 877
 878 Points to the current position of lexing inside the lexer buffer.
 879 Characters around this point may be freely examined, within
 880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 881 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 883
 884 Lexing code (whether in the Perl core or not) moves this pointer past
 885 the characters that it consumes.  It is also expected to perform some
 886 bookkeeping whenever a newline character is consumed.  This movement
 887 can be more conveniently performed by the function L</lex_read_to>,
 888 which handles newlines appropriately.
 889
 890 Interpretation of the buffer's octets can be abstracted out by
 891 using the slightly higher-level functions L</lex_peek_unichar> and
 892 L</lex_read_unichar>.
 893
 894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 895
 896 Points to the start of the current line inside the lexer buffer.
 897 This is useful for indicating at which column an error occurred, and
 898 not much else.  This must be updated by any lexing code that consumes
 899 a newline; the function L</lex_read_to> handles this detail.
 900
 901 =cut
 902 */
 903
 904 /*
 905 =for apidoc Amx|bool|lex_bufutf8
 906
 907 Indicates whether the octets in the lexer buffer
 908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 909 of Unicode characters.  If not, they should be interpreted as Latin-1
 910 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 911
 912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 913 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 914 encoding.
 915
 916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 917 is significant, but not the whole story regarding the input character
 918 encoding.  Normally, when a file is being read, the scalar contains octets
 919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 920 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 923 is in effect.  This logic may change in the future; use this function
 924 instead of implementing the logic yourself.
 925
 926 =cut
 927 */
 928
 929 bool
 930 Perl_lex_bufutf8(pTHX)
 931 {
 932     return UTF;
 933 }
 934
 935 /*
 936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 937
 938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 939 at least I<len> octets (including terminating NUL).  Returns a
 940 pointer to the reallocated buffer.  This is necessary before making
 941 any direct modification of the buffer that would increase its length.
 942 L</lex_stuff_pvn> provides a more convenient way to insert text into
 943 the buffer.
 944
 945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 946 this function updates all of the lexer's variables that point directly
 947 into the buffer.
 948
 949 =cut
 950 */
 951
 952 char *
 953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 954 {
 955     SV *linestr;
 956     char *buf;
 957     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 958     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 959     linestr = PL_parser->linestr;
 960     buf = SvPVX(linestr);
 961     if (len <= SvLEN(linestr))
 962         return buf;
 963     bufend_pos = PL_parser->bufend - buf;
 964     bufptr_pos = PL_parser->bufptr - buf;
 965     oldbufptr_pos = PL_parser->oldbufptr - buf;
 966     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 967     linestart_pos = PL_parser->linestart - buf;
 968     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 969     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 970     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 971                             PL_parser->lex_shared->re_eval_start - buf : 0;
 972
 973     buf = sv_grow(linestr, len);
 974
 975     PL_parser->bufend = buf + bufend_pos;
 976     PL_parser->bufptr = buf + bufptr_pos;
 977     PL_parser->oldbufptr = buf + oldbufptr_pos;
 978     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 979     PL_parser->linestart = buf + linestart_pos;
 980     if (PL_parser->last_uni)
 981         PL_parser->last_uni = buf + last_uni_pos;
 982     if (PL_parser->last_lop)
 983         PL_parser->last_lop = buf + last_lop_pos;
 984     if (PL_parser->lex_shared->re_eval_start)
 985         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 986     return buf;
 987 }
 988
 989 /*
 990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 991
 992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 994 reallocating the buffer if necessary.  This means that lexing code that
 995 runs later will see the characters as if they had appeared in the input.
 996 It is not recommended to do this as part of normal parsing, and most
 997 uses of this facility run the risk of the inserted characters being
 998 interpreted in an unintended manner.
 999
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1007
1008 =cut
1009 */
1010
1011 void
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1013 {
1014     dVAR;
1015     char *bufptr;
1016     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017     if (flags & ~(LEX_STUFF_UTF8))
1018         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1019     if (UTF) {
1020         if (flags & LEX_STUFF_UTF8) {
1021             goto plain_copy;
1022         } else {
1023             STRLEN highhalf = 0;    /* Count of variants */
1024             const char *p, *e = pv+len;
1025             for (p = pv; p != e; p++) {
1026                 if (! UTF8_IS_INVARIANT(*p)) {
1027                     highhalf++;
1028                 }
1029             }
1030             if (!highhalf)
1031                 goto plain_copy;
1032             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033             bufptr = PL_parser->bufptr;
1034             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035             SvCUR_set(PL_parser->linestr,
1036                 SvCUR(PL_parser->linestr) + len+highhalf);
1037             PL_parser->bufend += len+highhalf;
1038             for (p = pv; p != e; p++) {
1039                 U8 c = (U8)*p;
1040                 if (! UTF8_IS_INVARIANT(c)) {
1041                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1043                 } else {
1044                     *bufptr++ = (char)c;
1045                 }
1046             }
1047         }
1048     } else {
1049         if (flags & LEX_STUFF_UTF8) {
1050             STRLEN highhalf = 0;
1051             const char *p, *e = pv+len;
1052             for (p = pv; p != e; p++) {
1053                 U8 c = (U8)*p;
1054                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056                                 "non-Latin-1 character into Latin-1 input");
1057                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1058                     p++;
1059                     highhalf++;
1060                 } else if (! UTF8_IS_INVARIANT(c)) {
1061                     /* malformed UTF-8 */
1062                     ENTER;
1063                     SAVESPTR(PL_warnhook);
1064                     PL_warnhook = PERL_WARNHOOK_FATAL;
1065                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1066                     LEAVE;
1067                 }
1068             }
1069             if (!highhalf)
1070                 goto plain_copy;
1071             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072             bufptr = PL_parser->bufptr;
1073             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074             SvCUR_set(PL_parser->linestr,
1075                 SvCUR(PL_parser->linestr) + len-highhalf);
1076             PL_parser->bufend += len-highhalf;
1077             p = pv;
1078             while (p < e) {
1079                 if (UTF8_IS_INVARIANT(*p)) {
1080                     *bufptr++ = *p;
1081                     p++;
1082                 }
1083                 else {
1084                     assert(p < e -1 );
1085                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1086                     p += 2;
1087                 }
1088             }
1089         } else {
1090           plain_copy:
1091             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092             bufptr = PL_parser->bufptr;
1093             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095             PL_parser->bufend += len;
1096             Copy(pv, bufptr, len, char);
1097         }
1098     }
1099 }
1100
1101 /*
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1103
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary.  This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1111
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul.  These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>.  The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1119
1120 =cut
1121 */
1122
1123 void
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1125 {
1126     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127     lex_stuff_pvn(pv, strlen(pv), flags);
1128 }
1129
1130 /*
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1132
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary.  This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1140
1141 The string to be inserted is the string value of I<sv>.  The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1146
1147 =cut
1148 */
1149
1150 void
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1152 {
1153     char *pv;
1154     STRLEN len;
1155     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1156     if (flags)
1157         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1158     pv = SvPV(sv, len);
1159     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1160 }
1161
1162 /*
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1164
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1169
1170 This is not the normal way to consume lexed text.  For that, use
1171 L</lex_read_to>.
1172
1173 =cut
1174 */
1175
1176 void
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1178 {
1179     char *buf, *bufend;
1180     STRLEN unstuff_len;
1181     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182     buf = PL_parser->bufptr;
1183     if (ptr < buf)
1184         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185     if (ptr == buf)
1186         return;
1187     bufend = PL_parser->bufend;
1188     if (ptr > bufend)
1189         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190     unstuff_len = ptr - buf;
1191     Move(ptr, buf, bufend+1-ptr, char);
1192     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193     PL_parser->bufend = bufend - unstuff_len;
1194 }
1195
1196 /*
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1198
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1203
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1207
1208 =cut
1209 */
1210
1211 void
1212 Perl_lex_read_to(pTHX_ char *ptr)
1213 {
1214     char *s;
1215     PERL_ARGS_ASSERT_LEX_READ_TO;
1216     s = PL_parser->bufptr;
1217     if (ptr < s || ptr > PL_parser->bufend)
1218         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219     for (; s != ptr; s++)
1220         if (*s == '\n') {
1221             COPLINE_INC_WITH_HERELINES;
1222             PL_parser->linestart = s+1;
1223         }
1224     PL_parser->bufptr = ptr;
1225 }
1226
1227 /*
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1229
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>.  The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately.  I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1235
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it.  However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1243
1244 =cut
1245 */
1246
1247 void
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1249 {
1250     char *buf;
1251     STRLEN discard_len;
1252     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253     buf = SvPVX(PL_parser->linestr);
1254     if (ptr < buf)
1255         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256     if (ptr == buf)
1257         return;
1258     if (ptr > PL_parser->bufptr)
1259         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260     discard_len = ptr - buf;
1261     if (PL_parser->oldbufptr < ptr)
1262         PL_parser->oldbufptr = ptr;
1263     if (PL_parser->oldoldbufptr < ptr)
1264         PL_parser->oldoldbufptr = ptr;
1265     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266         PL_parser->last_uni = NULL;
1267     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268         PL_parser->last_lop = NULL;
1269     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271     PL_parser->bufend -= discard_len;
1272     PL_parser->bufptr -= discard_len;
1273     PL_parser->oldbufptr -= discard_len;
1274     PL_parser->oldoldbufptr -= discard_len;
1275     if (PL_parser->last_uni)
1276         PL_parser->last_uni -= discard_len;
1277     if (PL_parser->last_lop)
1278         PL_parser->last_lop -= discard_len;
1279 }
1280
1281 /*
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1283
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more.  It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1289
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded.  If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1296
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1299
1300 =cut
1301 */
1302
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM  0x40000000
1305
1306 bool
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1308 {
1309     SV *linestr;
1310     char *buf;
1311     STRLEN old_bufend_pos, new_bufend_pos;
1312     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314     bool got_some_for_debugger = 0;
1315     bool got_some;
1316     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318     linestr = PL_parser->linestr;
1319     buf = SvPVX(linestr);
1320     if (!(flags & LEX_KEEP_PREVIOUS) &&
1321             PL_parser->bufptr == PL_parser->bufend) {
1322         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1323         linestart_pos = 0;
1324         if (PL_parser->last_uni != PL_parser->bufend)
1325             PL_parser->last_uni = NULL;
1326         if (PL_parser->last_lop != PL_parser->bufend)
1327             PL_parser->last_lop = NULL;
1328         last_uni_pos = last_lop_pos = 0;
1329         *buf = 0;
1330         SvCUR(linestr) = 0;
1331     } else {
1332         old_bufend_pos = PL_parser->bufend - buf;
1333         bufptr_pos = PL_parser->bufptr - buf;
1334         oldbufptr_pos = PL_parser->oldbufptr - buf;
1335         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336         linestart_pos = PL_parser->linestart - buf;
1337         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1339     }
1340     if (flags & LEX_FAKE_EOF) {
1341         goto eof;
1342     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1343         got_some = 0;
1344     } else if (filter_gets(linestr, old_bufend_pos)) {
1345         got_some = 1;
1346         got_some_for_debugger = 1;
1347     } else if (flags & LEX_NO_TERM) {
1348         got_some = 0;
1349     } else {
1350         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1351             sv_setpvs(linestr, "");
1352         eof:
1353         /* End of real input.  Close filehandle (unless it was STDIN),
1354          * then add implicit termination.
1355          */
1356         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357             PerlIO_clearerr(PL_parser->rsfp);
1358         else if (PL_parser->rsfp)
1359             (void)PerlIO_close(PL_parser->rsfp);
1360         PL_parser->rsfp = NULL;
1361         PL_parser->in_pod = PL_parser->filtered = 0;
1362 #ifdef PERL_MAD
1363         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1364             PL_faketokens = 1;
1365 #endif
1366         if (!PL_in_eval && PL_minus_p) {
1367             sv_catpvs(linestr,
1368                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369             PL_minus_n = PL_minus_p = 0;
1370         } else if (!PL_in_eval && PL_minus_n) {
1371             sv_catpvs(linestr, /*{*/";}");
1372             PL_minus_n = 0;
1373         } else
1374             sv_catpvs(linestr, ";");
1375         got_some = 1;
1376     }
1377     buf = SvPVX(linestr);
1378     new_bufend_pos = SvCUR(linestr);
1379     PL_parser->bufend = buf + new_bufend_pos;
1380     PL_parser->bufptr = buf + bufptr_pos;
1381     PL_parser->oldbufptr = buf + oldbufptr_pos;
1382     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383     PL_parser->linestart = buf + linestart_pos;
1384     if (PL_parser->last_uni)
1385         PL_parser->last_uni = buf + last_uni_pos;
1386     if (PL_parser->last_lop)
1387         PL_parser->last_lop = buf + last_lop_pos;
1388     if (PL_parser->preambling != NOLINE) {
1389         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390         PL_parser->preambling = NOLINE;
1391     }
1392     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393             PL_curstash != PL_debstash) {
1394         /* debugger active and we're not compiling the debugger code,
1395          * so store the line into the debugger's array of lines
1396          */
1397         update_debugger_info(NULL, buf+old_bufend_pos,
1398             new_bufend_pos-old_bufend_pos);
1399     }
1400     return got_some;
1401 }
1402
1403 /*
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1405
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text.  To consume the
1409 peeked character, use L</lex_read_unichar>.
1410
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in.  Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1415
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1418
1419 =cut
1420 */
1421
1422 I32
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1424 {
1425     dVAR;
1426     char *s, *bufend;
1427     if (flags & ~(LEX_KEEP_PREVIOUS))
1428         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429     s = PL_parser->bufptr;
1430     bufend = PL_parser->bufend;
1431     if (UTF) {
1432         U8 head;
1433         I32 unichar;
1434         STRLEN len, retlen;
1435         if (s == bufend) {
1436             if (!lex_next_chunk(flags))
1437                 return -1;
1438             s = PL_parser->bufptr;
1439             bufend = PL_parser->bufend;
1440         }
1441         head = (U8)*s;
1442         if (UTF8_IS_INVARIANT(head))
1443             return head;
1444         if (UTF8_IS_START(head)) {
1445             len = UTF8SKIP(&head);
1446             while ((STRLEN)(bufend-s) < len) {
1447                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1448                     break;
1449                 s = PL_parser->bufptr;
1450                 bufend = PL_parser->bufend;
1451             }
1452         }
1453         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454         if (retlen == (STRLEN)-1) {
1455             /* malformed UTF-8 */
1456             ENTER;
1457             SAVESPTR(PL_warnhook);
1458             PL_warnhook = PERL_WARNHOOK_FATAL;
1459             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1460             LEAVE;
1461         }
1462         return unichar;
1463     } else {
1464         if (s == bufend) {
1465             if (!lex_next_chunk(flags))
1466                 return -1;
1467             s = PL_parser->bufptr;
1468         }
1469         return (U8)*s;
1470     }
1471 }
1472
1473 /*
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1475
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text.  To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1481
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in.  Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1486
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1489
1490 =cut
1491 */
1492
1493 I32
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1495 {
1496     I32 c;
1497     if (flags & ~(LEX_KEEP_PREVIOUS))
1498         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499     c = lex_peek_unichar(flags);
1500     if (c != -1) {
1501         if (c == '\n')
1502             COPLINE_INC_WITH_HERELINES;
1503         if (UTF)
1504             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1505         else
1506             ++(PL_parser->bufptr);
1507     }
1508     return c;
1509 }
1510
1511 /*
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1513
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed.  The spaces may include ordinary whitespace characters and
1516 Perl-style comments.  C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1519
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in.  Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1524
1525 =cut
1526 */
1527
1528 #define LEX_NO_INCLINE    0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1530
1531 void
1532 Perl_lex_read_space(pTHX_ U32 flags)
1533 {
1534     char *s, *bufend;
1535     const bool can_incline = !(flags & LEX_NO_INCLINE);
1536     bool need_incline = 0;
1537     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1539 #ifdef PERL_MAD
1540     if (PL_skipwhite) {
1541         sv_free(PL_skipwhite);
1542         PL_skipwhite = NULL;
1543     }
1544     if (PL_madskills)
1545         PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547     s = PL_parser->bufptr;
1548     bufend = PL_parser->bufend;
1549     while (1) {
1550         char c = *s;
1551         if (c == '#') {
1552             do {
1553                 c = *++s;
1554             } while (!(c == '\n' || (c == 0 && s == bufend)));
1555         } else if (c == '\n') {
1556             s++;
1557             if (can_incline) {
1558                 PL_parser->linestart = s;
1559                 if (s == bufend)
1560                     need_incline = 1;
1561                 else
1562                     incline(s);
1563             }
1564         } else if (isSPACE(c)) {
1565             s++;
1566         } else if (c == 0 && s == bufend) {
1567             bool got_more;
1568             line_t l;
1569 #ifdef PERL_MAD
1570             if (PL_madskills)
1571                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573             if (flags & LEX_NO_NEXT_CHUNK)
1574                 break;
1575             PL_parser->bufptr = s;
1576             l = CopLINE(PL_curcop);
1577             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578             got_more = lex_next_chunk(flags);
1579             CopLINE_set(PL_curcop, l);
1580             s = PL_parser->bufptr;
1581             bufend = PL_parser->bufend;
1582             if (!got_more)
1583                 break;
1584             if (can_incline && need_incline && PL_parser->rsfp) {
1585                 incline(s);
1586                 need_incline = 0;
1587             }
1588         } else {
1589             break;
1590         }
1591     }
1592 #ifdef PERL_MAD
1593     if (PL_madskills)
1594         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596     PL_parser->bufptr = s;
1597 }
1598
1599 /*
1600
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1602
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1607
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1610 C<false>.
1611
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1613
1614 =cut
1615
1616  */
1617
1618 bool
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1620 {
1621     STRLEN len, origlen;
1622     char *p = proto ? SvPV(proto, len) : NULL;
1623     bool bad_proto = FALSE;
1624     bool in_brackets = FALSE;
1625     bool after_slash = FALSE;
1626     char greedy_proto = ' ';
1627     bool proto_after_greedy_proto = FALSE;
1628     bool must_be_last = FALSE;
1629     bool underscore = FALSE;
1630     bool bad_proto_after_underscore = FALSE;
1631
1632     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1633
1634     if (!proto)
1635         return TRUE;
1636
1637     origlen = len;
1638     for (; len--; p++) {
1639         if (!isSPACE(*p)) {
1640             if (must_be_last)
1641                 proto_after_greedy_proto = TRUE;
1642             if (underscore) {
1643                 if (!strchr(";@%", *p))
1644                     bad_proto_after_underscore = TRUE;
1645                 underscore = FALSE;
1646             }
1647             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1648                 bad_proto = TRUE;
1649             }
1650             else {
1651                 if (*p == '[')
1652                     in_brackets = TRUE;
1653                 else if (*p == ']')
1654                     in_brackets = FALSE;
1655                 else if ((*p == '@' || *p == '%') &&
1656                     !after_slash &&
1657                     !in_brackets ) {
1658                     must_be_last = TRUE;
1659                     greedy_proto = *p;
1660                 }
1661                 else if (*p == '_')
1662                     underscore = TRUE;
1663             }
1664             if (*p == '\\')
1665                 after_slash = TRUE;
1666             else
1667                 after_slash = FALSE;
1668         }
1669     }
1670
1671     if (warn) {
1672         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1673         p -= origlen;
1674         p = SvUTF8(proto)
1675             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676                              origlen, UNI_DISPLAY_ISPRINT)
1677             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1678
1679         if (proto_after_greedy_proto)
1680             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681                         "Prototype after '%c' for %"SVf" : %s",
1682                         greedy_proto, SVfARG(name), p);
1683         if (in_brackets)
1684             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685                         "Missing ']' in prototype for %"SVf" : %s",
1686                         SVfARG(name), p);
1687         if (bad_proto)
1688             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689                         "Illegal character in prototype for %"SVf" : %s",
1690                         SVfARG(name), p);
1691         if (bad_proto_after_underscore)
1692             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693                         "Illegal character after '_' in prototype for %"SVf" : %s",
1694                         SVfARG(name), p);
1695     }
1696
1697     return (! (proto_after_greedy_proto || bad_proto) );
1698 }
1699
1700 /*
1701  * S_incline
1702  * This subroutine has nothing to do with tilting, whether at windmills
1703  * or pinball tables.  Its name is short for "increment line".  It
1704  * increments the current line number in CopLINE(PL_curcop) and checks
1705  * to see whether the line starts with a comment of the form
1706  *    # line 500 "foo.pm"
1707  * If so, it sets the current line number and file to the values in the comment.
1708  */
1709
1710 STATIC void
1711 S_incline(pTHX_ const char *s)
1712 {
1713     dVAR;
1714     const char *t;
1715     const char *n;
1716     const char *e;
1717     line_t line_num;
1718
1719     PERL_ARGS_ASSERT_INCLINE;
1720
1721     COPLINE_INC_WITH_HERELINES;
1722     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723      && s+1 == PL_bufend && *s == ';') {
1724         /* fake newline in string eval */
1725         CopLINE_dec(PL_curcop);
1726         return;
1727     }
1728     if (*s++ != '#')
1729         return;
1730     while (SPACE_OR_TAB(*s))
1731         s++;
1732     if (strnEQ(s, "line", 4))
1733         s += 4;
1734     else
1735         return;
1736     if (SPACE_OR_TAB(*s))
1737         s++;
1738     else
1739         return;
1740     while (SPACE_OR_TAB(*s))
1741         s++;
1742     if (!isDIGIT(*s))
1743         return;
1744
1745     n = s;
1746     while (isDIGIT(*s))
1747         s++;
1748     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1749         return;
1750     while (SPACE_OR_TAB(*s))
1751         s++;
1752     if (*s == '"' && (t = strchr(s+1, '"'))) {
1753         s++;
1754         e = t + 1;
1755     }
1756     else {
1757         t = s;
1758         while (!isSPACE(*t))
1759             t++;
1760         e = t;
1761     }
1762     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1763         e++;
1764     if (*e != '\n' && *e != '\0')
1765         return;         /* false alarm */
1766
1767     line_num = atoi(n)-1;
1768
1769     if (t - s > 0) {
1770         const STRLEN len = t - s;
1771
1772         if (!PL_rsfp && !PL_parser->filtered) {
1773             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774              * to *{"::_<newfilename"} */
1775             /* However, the long form of evals is only turned on by the
1776                debugger - usually they're "(eval %lu)" */
1777             GV * const cfgv = CopFILEGV(PL_curcop);
1778             if (cfgv) {
1779                 char smallbuf[128];
1780                 STRLEN tmplen2 = len;
1781                 char *tmpbuf2;
1782                 GV *gv2;
1783
1784                 if (tmplen2 + 2 <= sizeof smallbuf)
1785                     tmpbuf2 = smallbuf;
1786                 else
1787                     Newx(tmpbuf2, tmplen2 + 2, char);
1788
1789                 tmpbuf2[0] = '_';
1790                 tmpbuf2[1] = '<';
1791
1792                 memcpy(tmpbuf2 + 2, s, tmplen2);
1793                 tmplen2 += 2;
1794
1795                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1796                 if (!isGV(gv2)) {
1797                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798                     /* adjust ${"::_<newfilename"} to store the new file name */
1799                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800                     /* The line number may differ. If that is the case,
1801                        alias the saved lines that are in the array.
1802                        Otherwise alias the whole array. */
1803                     if (CopLINE(PL_curcop) == line_num) {
1804                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1806                     }
1807                     else if (GvAV(cfgv)) {
1808                         AV * const av = GvAV(cfgv);
1809                         const I32 start = CopLINE(PL_curcop)+1;
1810                         I32 items = AvFILLp(av) - start;
1811                         if (items > 0) {
1812                             AV * const av2 = GvAVn(gv2);
1813                             SV **svp = AvARRAY(av) + start;
1814                             I32 l = (I32)line_num+1;
1815                             while (items--)
1816                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1817                         }
1818                     }
1819                 }
1820
1821                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1822             }
1823         }
1824         CopFILE_free(PL_curcop);
1825         CopFILE_setn(PL_curcop, s, len);
1826     }
1827     CopLINE_set(PL_curcop, line_num);
1828 }
1829
1830 #define skipspace(s) skipspace_flags(s, 0)
1831
1832 #ifdef PERL_MAD
1833 /* skip space before PL_thistoken */
1834
1835 STATIC char *
1836 S_skipspace0(pTHX_ char *s)
1837 {
1838     PERL_ARGS_ASSERT_SKIPSPACE0;
1839
1840     s = skipspace(s);
1841     if (!PL_madskills)
1842         return s;
1843     if (PL_skipwhite) {
1844         if (!PL_thiswhite)
1845             PL_thiswhite = newSVpvs("");
1846         sv_catsv(PL_thiswhite, PL_skipwhite);
1847         sv_free(PL_skipwhite);
1848         PL_skipwhite = 0;
1849     }
1850     PL_realtokenstart = s - SvPVX(PL_linestr);
1851     return s;
1852 }
1853
1854 /* skip space after PL_thistoken */
1855
1856 STATIC char *
1857 S_skipspace1(pTHX_ char *s)
1858 {
1859     const char *start = s;
1860     I32 startoff = start - SvPVX(PL_linestr);
1861
1862     PERL_ARGS_ASSERT_SKIPSPACE1;
1863
1864     s = skipspace(s);
1865     if (!PL_madskills)
1866         return s;
1867     start = SvPVX(PL_linestr) + startoff;
1868     if (!PL_thistoken && PL_realtokenstart >= 0) {
1869         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870         PL_thistoken = newSVpvn(tstart, start - tstart);
1871     }
1872     PL_realtokenstart = -1;
1873     if (PL_skipwhite) {
1874         if (!PL_nextwhite)
1875             PL_nextwhite = newSVpvs("");
1876         sv_catsv(PL_nextwhite, PL_skipwhite);
1877         sv_free(PL_skipwhite);
1878         PL_skipwhite = 0;
1879     }
1880     return s;
1881 }
1882
1883 STATIC char *
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1885 {
1886     char *start;
1887     const I32 startoff = s - SvPVX(PL_linestr);
1888
1889     PERL_ARGS_ASSERT_SKIPSPACE2;
1890
1891     s = skipspace(s);
1892     if (!PL_madskills || !svp)
1893         return s;
1894     start = SvPVX(PL_linestr) + startoff;
1895     if (!PL_thistoken && PL_realtokenstart >= 0) {
1896         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897         PL_thistoken = newSVpvn(tstart, start - tstart);
1898         PL_realtokenstart = -1;
1899     }
1900     if (PL_skipwhite) {
1901         if (!*svp)
1902             *svp = newSVpvs("");
1903         sv_setsv(*svp, PL_skipwhite);
1904         sv_free(PL_skipwhite);
1905         PL_skipwhite = 0;
1906     }
1907
1908     return s;
1909 }
1910 #endif
1911
1912 STATIC void
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1914 {
1915     AV *av = CopFILEAVx(PL_curcop);
1916     if (av) {
1917         SV * sv;
1918         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1919         else {
1920             sv = *av_fetch(av, 0, 1);
1921             SvUPGRADE(sv, SVt_PVMG);
1922         }
1923         if (!SvPOK(sv)) sv_setpvs(sv,"");
1924         if (orig_sv)
1925             sv_catsv(sv, orig_sv);
1926         else
1927             sv_catpvn(sv, buf, len);
1928         if (!SvIOK(sv)) {
1929             (void)SvIOK_on(sv);
1930             SvIV_set(sv, 0);
1931         }
1932         if (PL_parser->preambling == NOLINE)
1933             av_store(av, CopLINE(PL_curcop), sv);
1934     }
1935 }
1936
1937 /*
1938  * S_skipspace
1939  * Called to gobble the appropriate amount and type of whitespace.
1940  * Skips comments as well.
1941  */
1942
1943 STATIC char *
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1945 {
1946 #ifdef PERL_MAD
1947     char *start = s;
1948 #endif /* PERL_MAD */
1949     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1950 #ifdef PERL_MAD
1951     if (PL_skipwhite) {
1952         sv_free(PL_skipwhite);
1953         PL_skipwhite = NULL;
1954     }
1955 #endif /* PERL_MAD */
1956     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957         while (s < PL_bufend && SPACE_OR_TAB(*s))
1958             s++;
1959     } else {
1960         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1961         PL_bufptr = s;
1962         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964                     LEX_NO_NEXT_CHUNK : 0));
1965         s = PL_bufptr;
1966         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967         if (PL_linestart > PL_bufptr)
1968             PL_bufptr = PL_linestart;
1969         return s;
1970     }
1971 #ifdef PERL_MAD
1972     if (PL_madskills)
1973         PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1975     return s;
1976 }
1977
1978 /*
1979  * S_check_uni
1980  * Check the unary operators to ensure there's no ambiguity in how they're
1981  * used.  An ambiguous piece of code would be:
1982  *     rand + 5
1983  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1984  * the +5 is its argument.
1985  */
1986
1987 STATIC void
1988 S_check_uni(pTHX)
1989 {
1990     dVAR;
1991     const char *s;
1992     const char *t;
1993
1994     if (PL_oldoldbufptr != PL_last_uni)
1995         return;
1996     while (isSPACE(*PL_last_uni))
1997         PL_last_uni++;
1998     s = PL_last_uni;
1999     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2000         s++;
2001     if ((t = strchr(s, '(')) && t < PL_bufptr)
2002         return;
2003
2004     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006                      (int)(s - PL_last_uni), PL_last_uni);
2007 }
2008
2009 /*
2010  * LOP : macro to build a list operator.  Its behaviour has been replaced
2011  * with a subroutine, S_lop() for which LOP is just another name.
2012  */
2013
2014 #define LOP(f,x) return lop(f,x,s)
2015
2016 /*
2017  * S_lop
2018  * Build a list operator (or something that might be one).  The rules:
2019  *  - if we have a next token, then it's a list operator [why?]
2020  *  - if the next thing is an opening paren, then it's a function
2021  *  - else it's a list operator
2022  */
2023
2024 STATIC I32
2025 S_lop(pTHX_ I32 f, int x, char *s)
2026 {
2027     dVAR;
2028
2029     PERL_ARGS_ASSERT_LOP;
2030
2031     pl_yylval.ival = f;
2032     CLINE;
2033     PL_expect = x;
2034     PL_bufptr = s;
2035     PL_last_lop = PL_oldbufptr;
2036     PL_last_lop_op = (OPCODE)f;
2037 #ifdef PERL_MAD
2038     if (PL_lasttoke)
2039         goto lstop;
2040 #else
2041     if (PL_nexttoke)
2042         goto lstop;
2043 #endif
2044     if (*s == '(')
2045         return REPORT(FUNC);
2046     s = PEEKSPACE(s);
2047     if (*s == '(')
2048         return REPORT(FUNC);
2049     else {
2050         lstop:
2051         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053         return REPORT(LSTOP);
2054     }
2055 }
2056
2057 #ifdef PERL_MAD
2058  /*
2059  * S_start_force
2060  * Sets up for an eventual force_next().  start_force(0) basically does
2061  * an unshift, while start_force(-1) does a push.  yylex removes items
2062  * on the "pop" end.
2063  */
2064
2065 STATIC void
2066 S_start_force(pTHX_ int where)
2067 {
2068     int i;
2069
2070     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2071         where = PL_lasttoke;
2072     assert(PL_curforce < 0 || PL_curforce == where);
2073     if (PL_curforce != where) {
2074         for (i = PL_lasttoke; i > where; --i) {
2075             PL_nexttoke[i] = PL_nexttoke[i-1];
2076         }
2077         PL_lasttoke++;
2078     }
2079     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2080         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081     PL_curforce = where;
2082     if (PL_nextwhite) {
2083         if (PL_madskills)
2084             curmad('^', newSVpvs(""));
2085         CURMAD('_', PL_nextwhite);
2086     }
2087 }
2088
2089 STATIC void
2090 S_curmad(pTHX_ char slot, SV *sv)
2091 {
2092     MADPROP **where;
2093
2094     if (!sv)
2095         return;
2096     if (PL_curforce < 0)
2097         where = &PL_thismad;
2098     else
2099         where = &PL_nexttoke[PL_curforce].next_mad;
2100
2101     if (PL_faketokens)
2102         sv_setpvs(sv, "");
2103     else {
2104         if (!IN_BYTES) {
2105             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2106                 SvUTF8_on(sv);
2107             else if (PL_encoding) {
2108                 sv_recode_to_utf8(sv, PL_encoding);
2109             }
2110         }
2111     }
2112
2113     /* keep a slot open for the head of the list? */
2114     if (slot != '_' && *where && (*where)->mad_key == '^') {
2115         (*where)->mad_key = slot;
2116         sv_free(MUTABLE_SV(((*where)->mad_val)));
2117         (*where)->mad_val = (void*)sv;
2118     }
2119     else
2120         addmad(newMADsv(slot, sv), where, 0);
2121 }
2122 #else
2123 #  define start_force(where)    NOOP
2124 #  define curmad(slot, sv)      NOOP
2125 #endif
2126
2127 /*
2128  * S_force_next
2129  * When the lexer realizes it knows the next token (for instance,
2130  * it is reordering tokens for the parser) then it can call S_force_next
2131  * to know what token to return the next time the lexer is called.  Caller
2132  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133  * and possibly PL_expect to ensure the lexer handles the token correctly.
2134  */
2135
2136 STATIC void
2137 S_force_next(pTHX_ I32 type)
2138 {
2139     dVAR;
2140 #ifdef DEBUGGING
2141     if (DEBUG_T_TEST) {
2142         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143         tokereport(type, &NEXTVAL_NEXTTOKE);
2144     }
2145 #endif
2146 #ifdef PERL_MAD
2147     if (PL_curforce < 0)
2148         start_force(PL_lasttoke);
2149     PL_nexttoke[PL_curforce].next_type = type;
2150     if (PL_lex_state != LEX_KNOWNEXT)
2151         PL_lex_defer = PL_lex_state;
2152     PL_lex_state = LEX_KNOWNEXT;
2153     PL_lex_expect = PL_expect;
2154     PL_curforce = -1;
2155 #else
2156     PL_nexttype[PL_nexttoke] = type;
2157     PL_nexttoke++;
2158     if (PL_lex_state != LEX_KNOWNEXT) {
2159         PL_lex_defer = PL_lex_state;
2160         PL_lex_expect = PL_expect;
2161         PL_lex_state = LEX_KNOWNEXT;
2162     }
2163 #endif
2164 }
2165
2166 /*
2167  * S_postderef
2168  *
2169  * This subroutine handles postfix deref syntax after the arrow has already
2170  * been emitted.  @* $* etc. are emitted as two separate token right here.
2171  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172  * only the first, leaving yylex to find the next.
2173  */
2174
2175 static int
2176 S_postderef(pTHX_ int const funny, char const next)
2177 {
2178     dVAR;
2179     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180     assert(strchr("*[{", next));
2181     if (next == '*') {
2182         PL_expect = XOPERATOR;
2183         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185             PL_lex_state = LEX_INTERPEND;
2186             start_force(PL_curforce);
2187             force_next(POSTJOIN);
2188         }
2189         start_force(PL_curforce);
2190         force_next(next);
2191         PL_bufptr+=2;
2192     }
2193     else {
2194         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195          && !PL_lex_brackets)
2196             PL_lex_dojoin = 2;
2197         PL_expect = XOPERATOR;
2198         PL_bufptr++;
2199     }
2200     return funny;
2201 }
2202
2203 void
2204 Perl_yyunlex(pTHX)
2205 {
2206     int yyc = PL_parser->yychar;
2207     if (yyc != YYEMPTY) {
2208         if (yyc) {
2209             start_force(-1);
2210             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212                 PL_lex_allbrackets--;
2213                 PL_lex_brackets--;
2214                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215             } else if (yyc == '('/*)*/) {
2216                 PL_lex_allbrackets--;
2217                 yyc |= (2<<24);
2218             }
2219             force_next(yyc);
2220         }
2221         PL_parser->yychar = YYEMPTY;
2222     }
2223 }
2224
2225 STATIC SV *
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228     dVAR;
2229     SV * const sv = newSVpvn_utf8(start, len,
2230                                   !IN_BYTES
2231                                   && UTF
2232                                   && !is_ascii_string((const U8*)start, len)
2233                                   && is_utf8_string((const U8*)start, len));
2234     return sv;
2235 }
2236
2237 /*
2238  * S_force_word
2239  * When the lexer knows the next thing is a word (for instance, it has
2240  * just seen -> and it knows that the next char is a word char, then
2241  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2242  * lookahead.
2243  *
2244  * Arguments:
2245  *   char *start : buffer position (must be within PL_linestr)
2246  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247  *   int check_keyword : if true, Perl checks to make sure the word isn't
2248  *       a keyword (do this if the word is a label, e.g. goto FOO)
2249  *   int allow_pack : if true, : characters will also be allowed (require,
2250  *       use, etc. do this)
2251  *   int allow_initial_tick : used by the "sub" lexer only.
2252  */
2253
2254 STATIC char *
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2256 {
2257     dVAR;
2258     char *s;
2259     STRLEN len;
2260
2261     PERL_ARGS_ASSERT_FORCE_WORD;
2262
2263     start = SKIPSPACE1(start);
2264     s = start;
2265     if (isIDFIRST_lazy_if(s,UTF) ||
2266         (allow_pack && *s == ':') )
2267     {
2268         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269         if (check_keyword) {
2270           char *s2 = PL_tokenbuf;
2271           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2272             s2 += 6, len -= 6;
2273           if (keyword(s2, len, 0))
2274             return start;
2275         }
2276         start_force(PL_curforce);
2277         if (PL_madskills)
2278             curmad('X', newSVpvn(start,s-start));
2279         if (token == METHOD) {
2280             s = SKIPSPACE1(s);
2281             if (*s == '(')
2282                 PL_expect = XTERM;
2283             else {
2284                 PL_expect = XOPERATOR;
2285             }
2286         }
2287         if (PL_madskills)
2288             curmad('g', newSVpvs( "forced" ));
2289         NEXTVAL_NEXTTOKE.opval
2290             = (OP*)newSVOP(OP_CONST,0,
2291                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2293         force_next(token);
2294     }
2295     return s;
2296 }
2297
2298 /*
2299  * S_force_ident
2300  * Called when the lexer wants $foo *foo &foo etc, but the program
2301  * text only contains the "foo" portion.  The first argument is a pointer
2302  * to the "foo", and the second argument is the type symbol to prefix.
2303  * Forces the next token to be a "WORD".
2304  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2305  */
2306
2307 STATIC void
2308 S_force_ident(pTHX_ const char *s, int kind)
2309 {
2310     dVAR;
2311
2312     PERL_ARGS_ASSERT_FORCE_IDENT;
2313
2314     if (s[0]) {
2315         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317                                                                 UTF ? SVf_UTF8 : 0));
2318         start_force(PL_curforce);
2319         NEXTVAL_NEXTTOKE.opval = o;
2320         force_next(WORD);
2321         if (kind) {
2322             o->op_private = OPpCONST_ENTERED;
2323             /* XXX see note in pp_entereval() for why we forgo typo
2324                warnings if the symbol must be introduced in an eval.
2325                GSAR 96-10-12 */
2326             gv_fetchpvn_flags(s, len,
2327                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329                               kind == '$' ? SVt_PV :
2330                               kind == '@' ? SVt_PVAV :
2331                               kind == '%' ? SVt_PVHV :
2332                               SVt_PVGV
2333                               );
2334         }
2335     }
2336 }
2337
2338 static void
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2340 {
2341     start_force(PL_curforce);
2342     NEXTVAL_NEXTTOKE.ival = pit;
2343     force_next('p');
2344 }
2345
2346 NV
2347 Perl_str_to_version(pTHX_ SV *sv)
2348 {
2349     NV retval = 0.0;
2350     NV nshift = 1.0;
2351     STRLEN len;
2352     const char *start = SvPV_const(sv,len);
2353     const char * const end = start + len;
2354     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2355
2356     PERL_ARGS_ASSERT_STR_TO_VERSION;
2357
2358     while (start < end) {
2359         STRLEN skip;
2360         UV n;
2361         if (utf)
2362             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2363         else {
2364             n = *(U8*)start;
2365             skip = 1;
2366         }
2367         retval += ((NV)n)/nshift;
2368         start += skip;
2369         nshift *= 1000;
2370     }
2371     return retval;
2372 }
2373
2374 /*
2375  * S_force_version
2376  * Forces the next token to be a version number.
2377  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378  * and if "guessing" is TRUE, then no new token is created (and the caller
2379  * must use an alternative parsing method).
2380  */
2381
2382 STATIC char *
2383 S_force_version(pTHX_ char *s, int guessing)
2384 {
2385     dVAR;
2386     OP *version = NULL;
2387     char *d;
2388 #ifdef PERL_MAD
2389     I32 startoff = s - SvPVX(PL_linestr);
2390 #endif
2391
2392     PERL_ARGS_ASSERT_FORCE_VERSION;
2393
2394     s = SKIPSPACE1(s);
2395
2396     d = s;
2397     if (*d == 'v')
2398         d++;
2399     if (isDIGIT(*d)) {
2400         while (isDIGIT(*d) || *d == '_' || *d == '.')
2401             d++;
2402 #ifdef PERL_MAD
2403         if (PL_madskills) {
2404             start_force(PL_curforce);
2405             curmad('X', newSVpvn(s,d-s));
2406         }
2407 #endif
2408         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2409             SV *ver;
2410 #ifdef USE_LOCALE_NUMERIC
2411             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2412             setlocale(LC_NUMERIC, "C");
2413 #endif
2414             s = scan_num(s, &pl_yylval);
2415 #ifdef USE_LOCALE_NUMERIC
2416             setlocale(LC_NUMERIC, loc);
2417             Safefree(loc);
2418 #endif
2419             version = pl_yylval.opval;
2420             ver = cSVOPx(version)->op_sv;
2421             if (SvPOK(ver) && !SvNIOK(ver)) {
2422                 SvUPGRADE(ver, SVt_PVNV);
2423                 SvNV_set(ver, str_to_version(ver));
2424                 SvNOK_on(ver);          /* hint that it is a version */
2425             }
2426         }
2427         else if (guessing) {
2428 #ifdef PERL_MAD
2429             if (PL_madskills) {
2430                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2431                 PL_nextwhite = 0;
2432                 s = SvPVX(PL_linestr) + startoff;
2433             }
2434 #endif
2435             return s;
2436         }
2437     }
2438
2439 #ifdef PERL_MAD
2440     if (PL_madskills && !version) {
2441         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2442         PL_nextwhite = 0;
2443         s = SvPVX(PL_linestr) + startoff;
2444     }
2445 #endif
2446     /* NOTE: The parser sees the package name and the VERSION swapped */
2447     start_force(PL_curforce);
2448     NEXTVAL_NEXTTOKE.opval = version;
2449     force_next(WORD);
2450
2451     return s;
2452 }
2453
2454 /*
2455  * S_force_strict_version
2456  * Forces the next token to be a version number using strict syntax rules.
2457  */
2458
2459 STATIC char *
2460 S_force_strict_version(pTHX_ char *s)
2461 {
2462     dVAR;
2463     OP *version = NULL;
2464 #ifdef PERL_MAD
2465     I32 startoff = s - SvPVX(PL_linestr);
2466 #endif
2467     const char *errstr = NULL;
2468
2469     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2470
2471     while (isSPACE(*s)) /* leading whitespace */
2472         s++;
2473
2474     if (is_STRICT_VERSION(s,&errstr)) {
2475         SV *ver = newSV(0);
2476         s = (char *)scan_version(s, ver, 0);
2477         version = newSVOP(OP_CONST, 0, ver);
2478     }
2479     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2480             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2481     {
2482         PL_bufptr = s;
2483         if (errstr)
2484             yyerror(errstr); /* version required */
2485         return s;
2486     }
2487
2488 #ifdef PERL_MAD
2489     if (PL_madskills && !version) {
2490         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2491         PL_nextwhite = 0;
2492         s = SvPVX(PL_linestr) + startoff;
2493     }
2494 #endif
2495     /* NOTE: The parser sees the package name and the VERSION swapped */
2496     start_force(PL_curforce);
2497     NEXTVAL_NEXTTOKE.opval = version;
2498     force_next(WORD);
2499
2500     return s;
2501 }
2502
2503 /*
2504  * S_tokeq
2505  * Tokenize a quoted string passed in as an SV.  It finds the next
2506  * chunk, up to end of string or a backslash.  It may make a new
2507  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2508  * turns \\ into \.
2509  */
2510
2511 STATIC SV *
2512 S_tokeq(pTHX_ SV *sv)
2513 {
2514     dVAR;
2515     char *s;
2516     char *send;
2517     char *d;
2518     SV *pv = sv;
2519
2520     PERL_ARGS_ASSERT_TOKEQ;
2521
2522     assert (SvPOK(sv));
2523     assert (SvLEN(sv));
2524     assert (!SvIsCOW(sv));
2525     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2526         goto finish;
2527     s = SvPVX(sv);
2528     send = SvEND(sv);
2529     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2530     while (s < send && !(*s == '\\' && s[1] == '\\'))
2531         s++;
2532     if (s == send)
2533         goto finish;
2534     d = s;
2535     if ( PL_hints & HINT_NEW_STRING ) {
2536         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2537                             SVs_TEMP | SvUTF8(sv));
2538     }
2539     while (s < send) {
2540         if (*s == '\\') {
2541             if (s + 1 < send && (s[1] == '\\'))
2542                 s++;            /* all that, just for this */
2543         }
2544         *d++ = *s++;
2545     }
2546     *d = '\0';
2547     SvCUR_set(sv, d - SvPVX_const(sv));
2548   finish:
2549     if ( PL_hints & HINT_NEW_STRING )
2550        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2551     return sv;
2552 }
2553
2554 /*
2555  * Now come three functions related to double-quote context,
2556  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2557  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2558  * interact with PL_lex_state, and create fake ( ... ) argument lists
2559  * to handle functions and concatenation.
2560  * For example,
2561  *   "foo\lbar"
2562  * is tokenised as
2563  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2564  */
2565
2566 /*
2567  * S_sublex_start
2568  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2569  *
2570  * Pattern matching will set PL_lex_op to the pattern-matching op to
2571  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2572  *
2573  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2574  *
2575  * Everything else becomes a FUNC.
2576  *
2577  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2578  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2579  * call to S_sublex_push().
2580  */
2581
2582 STATIC I32
2583 S_sublex_start(pTHX)
2584 {
2585     dVAR;
2586     const I32 op_type = pl_yylval.ival;
2587
2588     if (op_type == OP_NULL) {
2589         pl_yylval.opval = PL_lex_op;
2590         PL_lex_op = NULL;
2591         return THING;
2592     }
2593     if (op_type == OP_CONST) {
2594         SV *sv = tokeq(PL_lex_stuff);
2595
2596         if (SvTYPE(sv) == SVt_PVIV) {
2597             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2598             STRLEN len;
2599             const char * const p = SvPV_const(sv, len);
2600             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2601             SvREFCNT_dec(sv);
2602             sv = nsv;
2603         }
2604         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2605         PL_lex_stuff = NULL;
2606         return THING;
2607     }
2608
2609     PL_sublex_info.super_state = PL_lex_state;
2610     PL_sublex_info.sub_inwhat = (U16)op_type;
2611     PL_sublex_info.sub_op = PL_lex_op;
2612     PL_lex_state = LEX_INTERPPUSH;
2613
2614     PL_expect = XTERM;
2615     if (PL_lex_op) {
2616         pl_yylval.opval = PL_lex_op;
2617         PL_lex_op = NULL;
2618         return PMFUNC;
2619     }
2620     else
2621         return FUNC;
2622 }
2623
2624 /*
2625  * S_sublex_push
2626  * Create a new scope to save the lexing state.  The scope will be
2627  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2628  * to the uc, lc, etc. found before.
2629  * Sets PL_lex_state to LEX_INTERPCONCAT.
2630  */
2631
2632 STATIC I32
2633 S_sublex_push(pTHX)
2634 {
2635     dVAR;
2636     LEXSHARED *shared;
2637     const bool is_heredoc = PL_multi_close == '<';
2638     ENTER;
2639
2640     PL_lex_state = PL_sublex_info.super_state;
2641     SAVEI8(PL_lex_dojoin);
2642     SAVEI32(PL_lex_brackets);
2643     SAVEI32(PL_lex_allbrackets);
2644     SAVEI32(PL_lex_formbrack);
2645     SAVEI8(PL_lex_fakeeof);
2646     SAVEI32(PL_lex_casemods);
2647     SAVEI32(PL_lex_starts);
2648     SAVEI8(PL_lex_state);
2649     SAVESPTR(PL_lex_repl);
2650     SAVEVPTR(PL_lex_inpat);
2651     SAVEI16(PL_lex_inwhat);
2652     if (is_heredoc)
2653     {
2654         SAVECOPLINE(PL_curcop);
2655         SAVEI32(PL_multi_end);
2656         SAVEI32(PL_parser->herelines);
2657         PL_parser->herelines = 0;
2658     }
2659     SAVEI8(PL_multi_close);
2660     SAVEPPTR(PL_bufptr);
2661     SAVEPPTR(PL_bufend);
2662     SAVEPPTR(PL_oldbufptr);
2663     SAVEPPTR(PL_oldoldbufptr);
2664     SAVEPPTR(PL_last_lop);
2665     SAVEPPTR(PL_last_uni);
2666     SAVEPPTR(PL_linestart);
2667     SAVESPTR(PL_linestr);
2668     SAVEGENERICPV(PL_lex_brackstack);
2669     SAVEGENERICPV(PL_lex_casestack);
2670     SAVEGENERICPV(PL_parser->lex_shared);
2671     SAVEBOOL(PL_parser->lex_re_reparsing);
2672     SAVEI32(PL_copline);
2673
2674     /* The here-doc parser needs to be able to peek into outer lexing
2675        scopes to find the body of the here-doc.  So we put PL_linestr and
2676        PL_bufptr into lex_shared, to ‘share’ those values.
2677      */
2678     PL_parser->lex_shared->ls_linestr = PL_linestr;
2679     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2680
2681     PL_linestr = PL_lex_stuff;
2682     PL_lex_repl = PL_sublex_info.repl;
2683     PL_lex_stuff = NULL;
2684     PL_sublex_info.repl = NULL;
2685
2686     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2687         = SvPVX(PL_linestr);
2688     PL_bufend += SvCUR(PL_linestr);
2689     PL_last_lop = PL_last_uni = NULL;
2690     SAVEFREESV(PL_linestr);
2691     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2692
2693     PL_lex_dojoin = FALSE;
2694     PL_lex_brackets = PL_lex_formbrack = 0;
2695     PL_lex_allbrackets = 0;
2696     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2697     Newx(PL_lex_brackstack, 120, char);
2698     Newx(PL_lex_casestack, 12, char);
2699     PL_lex_casemods = 0;
2700     *PL_lex_casestack = '\0';
2701     PL_lex_starts = 0;
2702     PL_lex_state = LEX_INTERPCONCAT;
2703     if (is_heredoc)
2704         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2705     PL_copline = NOLINE;
2706
2707     Newxz(shared, 1, LEXSHARED);
2708     shared->ls_prev = PL_parser->lex_shared;
2709     PL_parser->lex_shared = shared;
2710
2711     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2712     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2713     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2714         PL_lex_inpat = PL_sublex_info.sub_op;
2715     else
2716         PL_lex_inpat = NULL;
2717
2718     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2719     PL_in_eval &= ~EVAL_RE_REPARSING;
2720
2721     return '(';
2722 }
2723
2724 /*
2725  * S_sublex_done
2726  * Restores lexer state after a S_sublex_push.
2727  */
2728
2729 STATIC I32
2730 S_sublex_done(pTHX)
2731 {
2732     dVAR;
2733     if (!PL_lex_starts++) {
2734         SV * const sv = newSVpvs("");
2735         if (SvUTF8(PL_linestr))
2736             SvUTF8_on(sv);
2737         PL_expect = XOPERATOR;
2738         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2739         return THING;
2740     }
2741
2742     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2743         PL_lex_state = LEX_INTERPCASEMOD;
2744         return yylex();
2745     }
2746
2747     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2748     assert(PL_lex_inwhat != OP_TRANSR);
2749     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2750         PL_linestr = PL_lex_repl;
2751         PL_lex_inpat = 0;
2752         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2753         PL_bufend += SvCUR(PL_linestr);
2754         PL_last_lop = PL_last_uni = NULL;
2755         PL_lex_dojoin = FALSE;
2756         PL_lex_brackets = 0;
2757         PL_lex_allbrackets = 0;
2758         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2759         PL_lex_casemods = 0;
2760         *PL_lex_casestack = '\0';
2761         PL_lex_starts = 0;
2762         if (SvEVALED(PL_lex_repl)) {
2763             PL_lex_state = LEX_INTERPNORMAL;
2764             PL_lex_starts++;
2765             /*  we don't clear PL_lex_repl here, so that we can check later
2766                 whether this is an evalled subst; that means we rely on the
2767                 logic to ensure sublex_done() is called again only via the
2768                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2769         }
2770         else {
2771             PL_lex_state = LEX_INTERPCONCAT;
2772             PL_lex_repl = NULL;
2773         }
2774         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2775             CopLINE(PL_curcop) +=
2776                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2777                  + PL_parser->herelines;
2778             PL_parser->herelines = 0;
2779         }
2780         return ',';
2781     }
2782     else {
2783         const line_t l = CopLINE(PL_curcop);
2784 #ifdef PERL_MAD
2785         if (PL_madskills) {
2786             if (PL_thiswhite) {
2787                 if (!PL_endwhite)
2788                     PL_endwhite = newSVpvs("");
2789                 sv_catsv(PL_endwhite, PL_thiswhite);
2790                 PL_thiswhite = 0;
2791             }
2792             if (PL_thistoken)
2793                 sv_setpvs(PL_thistoken,"");
2794             else
2795                 PL_realtokenstart = -1;
2796         }
2797 #endif
2798         LEAVE;
2799         if (PL_multi_close == '<')
2800             PL_parser->herelines += l - PL_multi_end;
2801         PL_bufend = SvPVX(PL_linestr);
2802         PL_bufend += SvCUR(PL_linestr);
2803         PL_expect = XOPERATOR;
2804         PL_sublex_info.sub_inwhat = 0;
2805         return ')';
2806     }
2807 }
2808
2809 PERL_STATIC_INLINE SV*
2810 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2811 {
2812     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2813      * interior, hence to the "}".  Finds what the name resolves to, returning
2814      * an SV* containing it; NULL if no valid one found */
2815
2816     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2817
2818     HV * table;
2819     SV **cvp;
2820     SV *cv;
2821     SV *rv;
2822     HV *stash;
2823     const U8* first_bad_char_loc;
2824     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2825
2826     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2827
2828     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2829                                      e - backslash_ptr,
2830                                      &first_bad_char_loc))
2831     {
2832         /* If warnings are on, this will print a more detailed analysis of what
2833          * is wrong than the error message below */
2834         utf8n_to_uvchr(first_bad_char_loc,
2835                        e - ((char *) first_bad_char_loc),
2836                        NULL, 0);
2837
2838         /* We deliberately don't try to print the malformed character, which
2839          * might not print very well; it also may be just the first of many
2840          * malformations, so don't print what comes after it */
2841         yyerror(Perl_form(aTHX_
2842             "Malformed UTF-8 character immediately after '%.*s'",
2843             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2844         return NULL;
2845     }
2846
2847     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2848                         /* include the <}> */
2849                         e - backslash_ptr + 1);
2850     if (! SvPOK(res)) {
2851         SvREFCNT_dec_NN(res);
2852         return NULL;
2853     }
2854
2855     /* See if the charnames handler is the Perl core's, and if so, we can skip
2856      * the validation needed for a user-supplied one, as Perl's does its own
2857      * validation. */
2858     table = GvHV(PL_hintgv);             /* ^H */
2859     cvp = hv_fetchs(table, "charnames", FALSE);
2860     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2861         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2862     {
2863         const char * const name = HvNAME(stash);
2864         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2865          && strEQ(name, "_charnames")) {
2866            return res;
2867        }
2868     }
2869
2870     /* Here, it isn't Perl's charname handler.  We can't rely on a
2871      * user-supplied handler to validate the input name.  For non-ut8 input,
2872      * look to see that the first character is legal.  Then loop through the
2873      * rest checking that each is a continuation */
2874
2875     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2876      * the same thing */
2877
2878     if (! UTF) {
2879         if (! isALPHAU(*s)) {
2880             goto bad_charname;
2881         }
2882         s++;
2883         while (s < e) {
2884             if (! isCHARNAME_CONT(*s)) {
2885                 goto bad_charname;
2886             }
2887             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2888                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2889                            "A sequence of multiple spaces in a charnames "
2890                            "alias definition is deprecated");
2891             }
2892             s++;
2893         }
2894         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2895             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2896                         "Trailing white-space in a charnames alias "
2897                         "definition is deprecated");
2898         }
2899     }
2900     else {
2901         /* Similarly for utf8.  For invariants can check directly; for other
2902          * Latin1, can calculate their code point and check; otherwise  use a
2903          * swash */
2904         if (UTF8_IS_INVARIANT(*s)) {
2905             if (! isALPHAU(*s)) {
2906                 goto bad_charname;
2907             }
2908             s++;
2909         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2910             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2911                 goto bad_charname;
2912             }
2913             s += 2;
2914         }
2915         else {
2916             if (! PL_utf8_charname_begin) {
2917                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2918                 PL_utf8_charname_begin = _core_swash_init("utf8",
2919                                                         "_Perl_Charname_Begin",
2920                                                         &PL_sv_undef,
2921                                                         1, 0, NULL, &flags);
2922             }
2923             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2924                 goto bad_charname;
2925             }
2926             s += UTF8SKIP(s);
2927         }
2928
2929         while (s < e) {
2930             if (UTF8_IS_INVARIANT(*s)) {
2931                 if (! isCHARNAME_CONT(*s)) {
2932                     goto bad_charname;
2933                 }
2934                 if (*s == ' ' && *(s-1) == ' '
2935                  && ckWARN_d(WARN_DEPRECATED)) {
2936                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2937                                "A sequence of multiple spaces in a charnam"
2938                                "es alias definition is deprecated");
2939                 }
2940                 s++;
2941             }
2942             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2943                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2944                 {
2945                     goto bad_charname;
2946                 }
2947                 s += 2;
2948             }
2949             else {
2950                 if (! PL_utf8_charname_continue) {
2951                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2952                     PL_utf8_charname_continue = _core_swash_init("utf8",
2953                                                 "_Perl_Charname_Continue",
2954                                                 &PL_sv_undef,
2955                                                 1, 0, NULL, &flags);
2956                 }
2957                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2958                     goto bad_charname;
2959                 }
2960                 s += UTF8SKIP(s);
2961             }
2962         }
2963         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2964             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2965                        "Trailing white-space in a charnames alias "
2966                        "definition is deprecated");
2967         }
2968     }
2969
2970     if (SvUTF8(res)) { /* Don't accept malformed input */
2971         const U8* first_bad_char_loc;
2972         STRLEN len;
2973         const char* const str = SvPV_const(res, len);
2974         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2975             /* If warnings are on, this will print a more detailed analysis of
2976              * what is wrong than the error message below */
2977             utf8n_to_uvchr(first_bad_char_loc,
2978                            (char *) first_bad_char_loc - str,
2979                            NULL, 0);
2980
2981             /* We deliberately don't try to print the malformed character,
2982              * which might not print very well; it also may be just the first
2983              * of many malformations, so don't print what comes after it */
2984             yyerror_pv(
2985               Perl_form(aTHX_
2986                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2987                  (int) (e - backslash_ptr + 1), backslash_ptr,
2988                  (int) ((char *) first_bad_char_loc - str), str
2989               ),
2990               SVf_UTF8);
2991             return NULL;
2992         }
2993     }
2994
2995     return res;
2996
2997   bad_charname: {
2998         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2999
3000         /* The final %.*s makes sure that should the trailing NUL be missing
3001          * that this print won't run off the end of the string */
3002         yyerror_pv(
3003           Perl_form(aTHX_
3004             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3005             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3006             (int)(e - s + bad_char_size), s + bad_char_size
3007           ),
3008           UTF ? SVf_UTF8 : 0);
3009         return NULL;
3010     }
3011 }
3012
3013 /*
3014   scan_const
3015
3016   Extracts the next constant part of a pattern, double-quoted string,
3017   or transliteration.  This is terrifying code.
3018
3019   For example, in parsing the double-quoted string "ab\x63$d", it would
3020   stop at the '$' and return an OP_CONST containing 'abc'.
3021
3022   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3023   processing a pattern (PL_lex_inpat is true), a transliteration
3024   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3025
3026   Returns a pointer to the character scanned up to. If this is
3027   advanced from the start pointer supplied (i.e. if anything was
3028   successfully parsed), will leave an OP_CONST for the substring scanned
3029   in pl_yylval. Caller must intuit reason for not parsing further
3030   by looking at the next characters herself.
3031
3032   In patterns:
3033     expand:
3034       \N{FOO}  => \N{U+hex_for_character_FOO}
3035       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3036
3037     pass through:
3038         all other \-char, including \N and \N{ apart from \N{ABC}
3039
3040     stops on:
3041         @ and $ where it appears to be a var, but not for $ as tail anchor
3042         \l \L \u \U \Q \E
3043         (?{  or  (??{
3044
3045
3046   In transliterations:
3047     characters are VERY literal, except for - not at the start or end
3048     of the string, which indicates a range. If the range is in bytes,
3049     scan_const expands the range to the full set of intermediate
3050     characters. If the range is in utf8, the hyphen is replaced with
3051     a certain range mark which will be handled by pmtrans() in op.c.
3052
3053   In double-quoted strings:
3054     backslashes:
3055       double-quoted style: \r and \n
3056       constants: \x31, etc.
3057       deprecated backrefs: \1 (in substitution replacements)
3058       case and quoting: \U \Q \E
3059     stops on @ and $
3060
3061   scan_const does *not* construct ops to handle interpolated strings.
3062   It stops processing as soon as it finds an embedded $ or @ variable
3063   and leaves it to the caller to work out what's going on.
3064
3065   embedded arrays (whether in pattern or not) could be:
3066       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3067
3068   $ in double-quoted strings must be the symbol of an embedded scalar.
3069
3070   $ in pattern could be $foo or could be tail anchor.  Assumption:
3071   it's a tail anchor if $ is the last thing in the string, or if it's
3072   followed by one of "()| \r\n\t"
3073
3074   \1 (backreferences) are turned into $1 in substitutions
3075
3076   The structure of the code is
3077       while (there's a character to process) {
3078           handle transliteration ranges
3079           skip regexp comments /(?#comment)/ and codes /(?{code})/
3080           skip #-initiated comments in //x patterns
3081           check for embedded arrays
3082           check for embedded scalars
3083           if (backslash) {
3084               deprecate \1 in substitution replacements
3085               handle string-changing backslashes \l \U \Q \E, etc.
3086               switch (what was escaped) {
3087                   handle \- in a transliteration (becomes a literal -)
3088                   if a pattern and not \N{, go treat as regular character
3089                   handle \132 (octal characters)
3090                   handle \x15 and \x{1234} (hex characters)
3091                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3092                   handle \cV (control characters)
3093                   handle printf-style backslashes (\f, \r, \n, etc)
3094               } (end switch)
3095               continue
3096           } (end if backslash)
3097           handle regular character
3098     } (end while character to read)
3099
3100 */
3101
3102 STATIC char *
3103 S_scan_const(pTHX_ char *start)
3104 {
3105     dVAR;
3106     char *send = PL_bufend;             /* end of the constant */
3107     SV *sv = newSV(send - start);               /* sv for the constant.  See
3108                                                    note below on sizing. */
3109     char *s = start;                    /* start of the constant */
3110     char *d = SvPVX(sv);                /* destination for copies */
3111     bool dorange = FALSE;                       /* are we in a translit range? */
3112     bool didrange = FALSE;                      /* did we just finish a range? */
3113     bool in_charclass = FALSE;                  /* within /[...]/ */
3114     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3115     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3116                                                    to be UTF8?  But, this can
3117                                                    show as true when the source
3118                                                    isn't utf8, as for example
3119                                                    when it is entirely composed
3120                                                    of hex constants */
3121     SV *res;                            /* result from charnames */
3122
3123     /* Note on sizing:  The scanned constant is placed into sv, which is
3124      * initialized by newSV() assuming one byte of output for every byte of
3125      * input.  This routine expects newSV() to allocate an extra byte for a
3126      * trailing NUL, which this routine will append if it gets to the end of
3127      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3128      * CAPITAL LETTER A}), or more output than input if the constant ends up
3129      * recoded to utf8, but each time a construct is found that might increase
3130      * the needed size, SvGROW() is called.  Its size parameter each time is
3131      * based on the best guess estimate at the time, namely the length used so
3132      * far, plus the length the current construct will occupy, plus room for
3133      * the trailing NUL, plus one byte for every input byte still unscanned */
3134
3135     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3136                        before set */
3137 #ifdef EBCDIC
3138     UV literal_endpoint = 0;
3139     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3140 #endif
3141
3142     PERL_ARGS_ASSERT_SCAN_CONST;
3143
3144     assert(PL_lex_inwhat != OP_TRANSR);
3145     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3146         /* If we are doing a trans and we know we want UTF8 set expectation */
3147         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3148         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3149     }
3150
3151     /* Protect sv from errors and fatal warnings. */
3152     ENTER_with_name("scan_const");
3153     SAVEFREESV(sv);
3154
3155     while (s < send || dorange) {
3156
3157         /* get transliterations out of the way (they're most literal) */
3158         if (PL_lex_inwhat == OP_TRANS) {
3159             /* expand a range A-Z to the full set of characters.  AIE! */
3160             if (dorange) {
3161                 I32 i;                          /* current expanded character */
3162                 I32 min;                        /* first character in range */
3163                 I32 max;                        /* last character in range */
3164
3165 #ifdef EBCDIC
3166                 UV uvmax = 0;
3167 #endif
3168
3169                 if (has_utf8
3170 #ifdef EBCDIC
3171                     && !native_range
3172 #endif
3173                 ) {
3174                     char * const c = (char*)utf8_hop((U8*)d, -1);
3175                     char *e = d++;
3176                     while (e-- > c)
3177                         *(e + 1) = *e;
3178                     *c = (char) ILLEGAL_UTF8_BYTE;
3179                     /* mark the range as done, and continue */
3180                     dorange = FALSE;
3181                     didrange = TRUE;
3182                     continue;
3183                 }
3184
3185                 i = d - SvPVX_const(sv);                /* remember current offset */
3186 #ifdef EBCDIC
3187                 SvGROW(sv,
3188                        SvLEN(sv) + (has_utf8 ?
3189                                     (512 - UTF_CONTINUATION_MARK +
3190                                      UNISKIP(0x100))
3191                                     : 256));
3192                 /* How many two-byte within 0..255: 128 in UTF-8,
3193                  * 96 in UTF-8-mod. */
3194 #else
3195                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3196 #endif
3197                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3198 #ifdef EBCDIC
3199                 if (has_utf8) {
3200                     int j;
3201                     for (j = 0; j <= 1; j++) {
3202                         char * const c = (char*)utf8_hop((U8*)d, -1);
3203                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3204                         if (j)
3205                             min = (U8)uv;
3206                         else if (uv < 256)
3207                             max = (U8)uv;
3208                         else {
3209                             max = (U8)0xff; /* only to \xff */
3210                             uvmax = uv; /* \x{100} to uvmax */
3211                         }
3212                         d = c; /* eat endpoint chars */
3213                      }
3214                 }
3215                else {
3216 #endif
3217                    d -= 2;              /* eat the first char and the - */
3218                    min = (U8)*d;        /* first char in range */
3219                    max = (U8)d[1];      /* last char in range  */
3220 #ifdef EBCDIC
3221                }
3222 #endif
3223
3224                 if (min > max) {
3225                     Perl_croak(aTHX_
3226                                "Invalid range \"%c-%c\" in transliteration operator",
3227                                (char)min, (char)max);
3228                 }
3229
3230 #ifdef EBCDIC
3231                 if (literal_endpoint == 2 &&
3232                     ((isLOWER_A(min) && isLOWER_A(max)) ||
3233                      (isUPPER_A(min) && isUPPER_A(max))))
3234                 {
3235                     for (i = min; i <= max; i++) {
3236                         if (isALPHA_A(i))
3237                             *d++ = i;
3238                     }
3239                 }
3240                 else
3241 #endif
3242                     for (i = min; i <= max; i++)
3243 #ifdef EBCDIC
3244                         if (has_utf8) {
3245                             append_utf8_from_native_byte(i, &d);
3246                         }
3247                         else
3248 #endif
3249                             *d++ = (char)i;
3250
3251 #ifdef EBCDIC
3252                 if (uvmax) {
3253                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3254                     if (uvmax > 0x101)
3255                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3256                     if (uvmax > 0x100)
3257                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3258                 }
3259 #endif
3260
3261                 /* mark the range as done, and continue */
3262                 dorange = FALSE;
3263                 didrange = TRUE;
3264 #ifdef EBCDIC
3265                 literal_endpoint = 0;
3266 #endif
3267                 continue;
3268             }
3269
3270             /* range begins (ignore - as first or last char) */
3271             else if (*s == '-' && s+1 < send  && s != start) {
3272                 if (didrange) {
3273                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3274                 }
3275                 if (has_utf8
3276 #ifdef EBCDIC
3277                     && !native_range
3278 #endif
3279                     ) {
3280                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
3281                     s++;
3282                     continue;
3283                 }
3284                 dorange = TRUE;
3285                 s++;
3286             }
3287             else {
3288                 didrange = FALSE;
3289 #ifdef EBCDIC
3290                 literal_endpoint = 0;
3291                 native_range = TRUE;
3292 #endif
3293             }
3294         }
3295
3296         /* if we get here, we're not doing a transliteration */
3297
3298         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3299             char *s1 = s-1;
3300             int esc = 0;
3301             while (s1 >= start && *s1-- == '\\')
3302                 esc = !esc;
3303             if (!esc)
3304                 in_charclass = TRUE;
3305         }
3306
3307         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3308             char *s1 = s-1;
3309             int esc = 0;
3310             while (s1 >= start && *s1-- == '\\')
3311                 esc = !esc;
3312             if (!esc)
3313                 in_charclass = FALSE;
3314         }
3315
3316         /* skip for regexp comments /(?#comment)/, except for the last
3317          * char, which will be done separately.
3318          * Stop on (?{..}) and friends */
3319
3320         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3321             if (s[2] == '#') {
3322                 while (s+1 < send && *s != ')')
3323                     *d++ = *s++;
3324             }
3325             else if (!PL_lex_casemods &&
3326                      (    s[2] == '{' /* This should match regcomp.c */
3327                       || (s[2] == '?' && s[3] == '{')))
3328             {
3329                 break;
3330             }
3331         }
3332
3333         /* likewise skip #-initiated comments in //x patterns */
3334         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3335           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3336             while (s+1 < send && *s != '\n')
3337                 *d++ = *s++;
3338         }
3339
3340         /* no further processing of single-quoted regex */
3341         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3342             goto default_action;
3343
3344         /* check for embedded arrays
3345            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3346            */
3347         else if (*s == '@' && s[1]) {
3348             if (isWORDCHAR_lazy_if(s+1,UTF))
3349                 break;
3350             if (strchr(":'{$", s[1]))
3351                 break;
3352             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3353                 break; /* in regexp, neither @+ nor @- are interpolated */
3354         }
3355
3356         /* check for embedded scalars.  only stop if we're sure it's a
3357            variable.
3358         */
3359         else if (*s == '$') {
3360             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3361                 break;
3362             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3363                 if (s[1] == '\\') {
3364                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3365                                    "Possible unintended interpolation of $\\ in regex");
3366                 }
3367                 break;          /* in regexp, $ might be tail anchor */
3368             }
3369         }
3370
3371         /* End of else if chain - OP_TRANS rejoin rest */
3372
3373         /* backslashes */
3374         if (*s == '\\' && s+1 < send) {
3375             char* e;    /* Can be used for ending '}', etc. */
3376
3377             s++;
3378
3379             /* warn on \1 - \9 in substitution replacements, but note that \11
3380              * is an octal; and \19 is \1 followed by '9' */
3381             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3382                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3383             {
3384                 /* diag_listed_as: \%d better written as $%d */
3385                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3386                 *--s = '$';
3387                 break;
3388             }
3389
3390             /* string-change backslash escapes */
3391             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3392                 --s;
3393                 break;
3394             }
3395             /* In a pattern, process \N, but skip any other backslash escapes.
3396              * This is because we don't want to translate an escape sequence
3397              * into a meta symbol and have the regex compiler use the meta
3398              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3399              * in spite of this, we do have to process \N here while the proper
3400              * charnames handler is in scope.  See bugs #56444 and #62056.
3401              * There is a complication because \N in a pattern may also stand
3402              * for 'match a non-nl', and not mean a charname, in which case its
3403              * processing should be deferred to the regex compiler.  To be a
3404              * charname it must be followed immediately by a '{', and not look
3405              * like \N followed by a curly quantifier, i.e., not something like
3406              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3407              * quantifier */
3408             else if (PL_lex_inpat
3409                     && (*s != 'N'
3410                         || s[1] != '{'
3411                         || regcurly(s + 1, FALSE)))
3412             {
3413                 *d++ = '\\';
3414                 goto default_action;
3415             }
3416
3417             switch (*s) {
3418
3419             /* quoted - in transliterations */
3420             case '-':
3421                 if (PL_lex_inwhat == OP_TRANS) {
3422                     *d++ = *s++;
3423                     continue;
3424                 }
3425                 /* FALL THROUGH */
3426             default:
3427                 {
3428                     if ((isALPHANUMERIC(*s)))
3429                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3430                                        "Unrecognized escape \\%c passed through",
3431                                        *s);
3432                     /* default action is to copy the quoted character */
3433                     goto default_action;
3434                 }
3435
3436             /* eg. \132 indicates the octal constant 0132 */
3437             case '0': case '1': case '2': case '3':
3438             case '4': case '5': case '6': case '7':
3439                 {
3440                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3441                     STRLEN len = 3;
3442                     uv = grok_oct(s, &len, &flags, NULL);
3443                     s += len;
3444                     if (len < 3 && s < send && isDIGIT(*s)
3445                         && ckWARN(WARN_MISC))
3446                     {
3447                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3448                                     "%s", form_short_octal_warning(s, len));
3449                     }
3450                 }
3451                 goto NUM_ESCAPE_INSERT;
3452
3453             /* eg. \o{24} indicates the octal constant \024 */
3454             case 'o':
3455                 {
3456                     const char* error;
3457
3458                     bool valid = grok_bslash_o(&s, &uv, &error,
3459                                                TRUE, /* Output warning */
3460                                                FALSE, /* Not strict */
3461                                                TRUE, /* Output warnings for
3462                                                          non-portables */
3463                                                UTF);
3464                     if (! valid) {
3465                         yyerror(error);
3466                         continue;
3467                     }
3468                     goto NUM_ESCAPE_INSERT;
3469                 }
3470
3471             /* eg. \x24 indicates the hex constant 0x24 */
3472             case 'x':
3473                 {
3474                     const char* error;
3475
3476                     bool valid = grok_bslash_x(&s, &uv, &error,
3477                                                TRUE, /* Output warning */
3478                                                FALSE, /* Not strict */
3479                                                TRUE,  /* Output warnings for
3480                                                          non-portables */
3481                                                UTF);
3482                     if (! valid) {
3483                         yyerror(error);
3484                         continue;
3485                     }
3486                 }
3487
3488               NUM_ESCAPE_INSERT:
3489                 /* Insert oct or hex escaped character.  There will always be
3490                  * enough room in sv since such escapes will be longer than any
3491                  * UTF-8 sequence they can end up as, except if they force us
3492                  * to recode the rest of the string into utf8 */
3493
3494                 /* Here uv is the ordinal of the next character being added */
3495                 if (!UVCHR_IS_INVARIANT(uv)) {
3496                     if (!has_utf8 && uv > 255) {
3497                         /* Might need to recode whatever we have accumulated so
3498                          * far if it contains any chars variant in utf8 or
3499                          * utf-ebcdic. */
3500
3501                         SvCUR_set(sv, d - SvPVX_const(sv));
3502                         SvPOK_on(sv);
3503                         *d = '\0';
3504                         /* See Note on sizing above.  */
3505                         sv_utf8_upgrade_flags_grow(sv,
3506                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3507                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3508                         d = SvPVX(sv) + SvCUR(sv);
3509                         has_utf8 = TRUE;
3510                     }
3511
3512                     if (has_utf8) {
3513                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3514                         if (PL_lex_inwhat == OP_TRANS &&
3515                             PL_sublex_info.sub_op) {
3516                             PL_sublex_info.sub_op->op_private |=
3517                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3518                                              : OPpTRANS_TO_UTF);
3519                         }
3520 #ifdef EBCDIC
3521                         if (uv > 255 && !dorange)
3522                             native_range = FALSE;
3523 #endif
3524                     }
3525                     else {
3526                         *d++ = (char)uv;
3527                     }
3528                 }
3529                 else {
3530                     *d++ = (char) uv;
3531                 }
3532                 continue;
3533
3534             case 'N':
3535                 /* In a non-pattern \N must be a named character, like \N{LATIN
3536                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3537                  * mean to match a non-newline.  For non-patterns, named
3538                  * characters are converted to their string equivalents. In
3539                  * patterns, named characters are not converted to their
3540                  * ultimate forms for the same reasons that other escapes
3541                  * aren't.  Instead, they are converted to the \N{U+...} form
3542                  * to get the value from the charnames that is in effect right
3543                  * now, while preserving the fact that it was a named character
3544                  * so that the regex compiler knows this */
3545
3546                 /* The structure of this section of code (besides checking for
3547                  * errors and upgrading to utf8) is:
3548                  *  Further disambiguate between the two meanings of \N, and if
3549                  *      not a charname, go process it elsewhere
3550                  *  If of form \N{U+...}, pass it through if a pattern;
3551                  *      otherwise convert to utf8
3552                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3553                  *  pattern; otherwise convert to utf8 */
3554
3555                 /* Here, s points to the 'N'; the test below is guaranteed to
3556                  * succeed if we are being called on a pattern as we already
3557                  * know from a test above that the next character is a '{'.
3558                  * On a non-pattern \N must mean 'named sequence, which
3559                  * requires braces */
3560                 s++;
3561                 if (*s != '{') {
3562                     yyerror("Missing braces on \\N{}");
3563                     continue;
3564                 }
3565                 s++;
3566
3567                 /* If there is no matching '}', it is an error. */
3568                 if (! (e = strchr(s, '}'))) {
3569                     if (! PL_lex_inpat) {
3570                         yyerror("Missing right brace on \\N{}");
3571                     } else {
3572                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3573                     }
3574                     continue;
3575                 }
3576
3577                 /* Here it looks like a named character */
3578
3579                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3580                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3581                                 | PERL_SCAN_DISALLOW_PREFIX;
3582                     STRLEN len;
3583
3584                     /* For \N{U+...}, the '...' is a unicode value even on
3585                      * EBCDIC machines */
3586                     s += 2;         /* Skip to next char after the 'U+' */
3587                     len = e - s;
3588                     uv = grok_hex(s, &len, &flags, NULL);
3589                     if (len == 0 || len != (STRLEN)(e - s)) {
3590                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3591                         s = e + 1;
3592                         continue;
3593                     }
3594
3595                     if (PL_lex_inpat) {
3596
3597                         /* On non-EBCDIC platforms, pass through to the regex
3598                          * compiler unchanged.  The reason we evaluated the
3599                          * number above is to make sure there wasn't a syntax
3600                          * error.  But on EBCDIC we convert to native so
3601                          * downstream code can continue to assume it's native
3602                          */
3603                         s -= 5;     /* Include the '\N{U+' */
3604 #ifdef EBCDIC
3605                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3606                                                                and the \0 */
3607                                     "\\N{U+%X}",
3608                                     (unsigned int) UNI_TO_NATIVE(uv));
3609 #else
3610                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3611                         d += e - s + 1;
3612 #endif
3613                     }
3614                     else {  /* Not a pattern: convert the hex to string */
3615
3616                          /* If destination is not in utf8, unconditionally
3617                           * recode it to be so.  This is because \N{} implies
3618                           * Unicode semantics, and scalars have to be in utf8
3619                           * to guarantee those semantics */
3620                         if (! has_utf8) {
3621                             SvCUR_set(sv, d - SvPVX_const(sv));
3622                             SvPOK_on(sv);
3623                             *d = '\0';
3624                             /* See Note on sizing above.  */
3625                             sv_utf8_upgrade_flags_grow(
3626                                         sv,
3627                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3628                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3629                             d = SvPVX(sv) + SvCUR(sv);
3630                             has_utf8 = TRUE;
3631                         }
3632
3633                         /* Add the (Unicode) code point to the output. */
3634                         if (UNI_IS_INVARIANT(uv)) {
3635                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3636                         }
3637                         else {
3638                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3639                         }
3640                     }
3641                 }
3642                 else /* Here is \N{NAME} but not \N{U+...}. */
3643                      if ((res = get_and_check_backslash_N_name(s, e)))
3644                 {
3645                     STRLEN len;
3646                     const char *str = SvPV_const(res, len);
3647                     if (PL_lex_inpat) {
3648
3649                         if (! len) { /* The name resolved to an empty string */
3650                             Copy("\\N{}", d, 4, char);
3651                             d += 4;
3652                         }
3653                         else {
3654                             /* In order to not lose information for the regex
3655                             * compiler, pass the result in the specially made
3656                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3657                             * the code points in hex of each character
3658                             * returned by charnames */
3659
3660                             const char *str_end = str + len;
3661                             const STRLEN off = d - SvPVX_const(sv);
3662
3663                             if (! SvUTF8(res)) {
3664                                 /* For the non-UTF-8 case, we can determine the
3665                                  * exact length needed without having to parse
3666                                  * through the string.  Each character takes up
3667                                  * 2 hex digits plus either a trailing dot or
3668                                  * the "}" */
3669                                 d = off + SvGROW(sv, off
3670                                                     + 3 * len
3671                                                     + 6 /* For the "\N{U+", and
3672                                                            trailing NUL */
3673                                                     + (STRLEN)(send - e));
3674                                 Copy("\\N{U+", d, 5, char);
3675                                 d += 5;
3676                                 while (str < str_end) {
3677                                     char hex_string[4];
3678                                     my_snprintf(hex_string, sizeof(hex_string),
3679                                                 "%02X.", (U8) *str);
3680                                     Copy(hex_string, d, 3, char);
3681                                     d += 3;
3682                                     str++;
3683                                 }
3684                                 d--;    /* We will overwrite below the final
3685                                            dot with a right brace */
3686                             }
3687                             else {
3688                                 STRLEN char_length; /* cur char's byte length */
3689
3690                                 /* and the number of bytes after this is
3691                                  * translated into hex digits */
3692                                 STRLEN output_length;
3693
3694                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3695                                  * for max('U+', '.'); and 1 for NUL */
3696                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3697
3698                                 /* Get the first character of the result. */
3699                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3700                                                         len,
3701                                                         &char_length,
3702                                                         UTF8_ALLOW_ANYUV);
3703                                 /* Convert first code point to hex, including
3704                                  * the boiler plate before it. */
3705                                 output_length =
3706                                     my_snprintf(hex_string, sizeof(hex_string),
3707                                                 "\\N{U+%X",
3708                                                 (unsigned int) uv);
3709
3710                                 /* Make sure there is enough space to hold it */
3711                                 d = off + SvGROW(sv, off
3712                                                     + output_length
3713                                                     + (STRLEN)(send - e)
3714                                                     + 2);       /* '}' + NUL */
3715                                 /* And output it */
3716                                 Copy(hex_string, d, output_length, char);
3717                                 d += output_length;
3718
3719                                 /* For each subsequent character, append dot and
3720                                 * its ordinal in hex */
3721                                 while ((str += char_length) < str_end) {
3722                                     const STRLEN off = d - SvPVX_const(sv);
3723                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3724                                                             str_end - str,
3725                                                             &char_length,
3726                                                             UTF8_ALLOW_ANYUV);
3727                                     output_length =
3728                                         my_snprintf(hex_string,
3729                                                     sizeof(hex_string),
3730                                                     ".%X",
3731                                                     (unsigned int) uv);
3732
3733                                     d = off + SvGROW(sv, off
3734                                                         + output_length
3735                                                         + (STRLEN)(send - e)
3736                                                         + 2);   /* '}' +  NUL */
3737                                     Copy(hex_string, d, output_length, char);
3738                                     d += output_length;
3739                                 }
3740                             }
3741
3742                             *d++ = '}'; /* Done.  Add the trailing brace */
3743                         }
3744                     }
3745                     else { /* Here, not in a pattern.  Convert the name to a
3746                             * string. */
3747
3748                          /* If destination is not in utf8, unconditionally
3749                           * recode it to be so.  This is because \N{} implies
3750                           * Unicode semantics, and scalars have to be in utf8
3751                           * to guarantee those semantics */
3752                         if (! has_utf8) {
3753                             SvCUR_set(sv, d - SvPVX_const(sv));
3754                             SvPOK_on(sv);
3755                             *d = '\0';
3756                             /* See Note on sizing above.  */
3757                             sv_utf8_upgrade_flags_grow(sv,
3758                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3759                                                 len + (STRLEN)(send - s) + 1);
3760                             d = SvPVX(sv) + SvCUR(sv);
3761                             has_utf8 = TRUE;
3762                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3763
3764                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3765                              * set correctly here). */
3766                             const STRLEN off = d - SvPVX_const(sv);
3767                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3768                         }
3769                         Copy(str, d, len, char);
3770                         d += len;
3771                     }
3772
3773                     SvREFCNT_dec(res);
3774
3775                 } /* End \N{NAME} */
3776 #ifdef EBCDIC
3777                 if (!dorange)
3778                     native_range = FALSE; /* \N{} is defined to be Unicode */
3779 #endif
3780                 s = e + 1;  /* Point to just after the '}' */
3781                 continue;
3782
3783             /* \c is a control character */
3784             case 'c':
3785                 s++;
3786                 if (s < send) {
3787                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3788                 }
3789                 else {
3790                     yyerror("Missing control char name in \\c");
3791                 }
3792                 continue;
3793
3794             /* printf-style backslashes, formfeeds, newlines, etc */
3795             case 'b':
3796                 *d++ = '\b';
3797                 break;
3798             case 'n':
3799                 *d++ = '\n';
3800                 break;
3801             case 'r':
3802                 *d++ = '\r';
3803                 break;
3804             case 'f':
3805                 *d++ = '\f';
3806                 break;
3807             case 't':
3808                 *d++ = '\t';
3809                 break;
3810             case 'e':
3811                 *d++ = ASCII_TO_NATIVE('\033');
3812                 break;
3813             case 'a':
3814                 *d++ = '\a';
3815                 break;
3816             } /* end switch */
3817
3818             s++;
3819             continue;
3820         } /* end if (backslash) */
3821 #ifdef EBCDIC
3822         else
3823             literal_endpoint++;
3824 #endif
3825
3826     default_action:
3827         /* If we started with encoded form, or already know we want it,
3828            then encode the next character */
3829         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3830             STRLEN len  = 1;
3831
3832
3833             /* One might think that it is wasted effort in the case of the
3834              * source being utf8 (this_utf8 == TRUE) to take the next character
3835              * in the source, convert it to an unsigned value, and then convert
3836              * it back again.  But the source has not been validated here.  The
3837              * routine that does the conversion checks for errors like
3838              * malformed utf8 */
3839
3840             const UV nextuv   = (this_utf8)
3841                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3842                                 : (UV) ((U8) *s);
3843             const STRLEN need = UNISKIP(nextuv);
3844             if (!has_utf8) {
3845                 SvCUR_set(sv, d - SvPVX_const(sv));
3846                 SvPOK_on(sv);
3847                 *d = '\0';
3848                 /* See Note on sizing above.  */
3849                 sv_utf8_upgrade_flags_grow(sv,
3850                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3851                                         need + (STRLEN)(send - s) + 1);
3852                 d = SvPVX(sv) + SvCUR(sv);
3853                 has_utf8 = TRUE;
3854             } else if (need > len) {
3855                 /* encoded value larger than old, may need extra space (NOTE:
3856                  * SvCUR() is not set correctly here).   See Note on sizing
3857                  * above.  */
3858                 const STRLEN off = d - SvPVX_const(sv);
3859                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3860             }
3861             s += len;
3862
3863             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3864 #ifdef EBCDIC
3865             if (uv > 255 && !dorange)
3866                 native_range = FALSE;
3867 #endif
3868         }
3869         else {
3870             *d++ = *s++;
3871         }
3872     } /* while loop to process each character */
3873
3874     /* terminate the string and set up the sv */
3875     *d = '\0';
3876     SvCUR_set(sv, d - SvPVX_const(sv));
3877     if (SvCUR(sv) >= SvLEN(sv))
3878         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3879                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3880
3881     SvPOK_on(sv);
3882     if (PL_encoding && !has_utf8) {
3883         sv_recode_to_utf8(sv, PL_encoding);
3884         if (SvUTF8(sv))
3885             has_utf8 = TRUE;
3886     }
3887     if (has_utf8) {
3888         SvUTF8_on(sv);
3889         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3890             PL_sublex_info.sub_op->op_private |=
3891                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3892         }
3893     }
3894
3895     /* shrink the sv if we allocated more than we used */
3896     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3897         SvPV_shrink_to_cur(sv);
3898     }
3899
3900     /* return the substring (via pl_yylval) only if we parsed anything */
3901     if (s > start) {
3902         char *s2 = start;
3903         for (; s2 < s; s2++) {
3904             if (*s2 == '\n')
3905                 COPLINE_INC_WITH_HERELINES;
3906         }
3907         SvREFCNT_inc_simple_void_NN(sv);
3908         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3909             && ! PL_parser->lex_re_reparsing)
3910         {
3911             const char *const key = PL_lex_inpat ? "qr" : "q";
3912             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3913             const char *type;
3914             STRLEN typelen;
3915
3916             if (PL_lex_inwhat == OP_TRANS) {
3917                 type = "tr";
3918                 typelen = 2;
3919             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3920                 type = "s";
3921                 typelen = 1;
3922             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3923                 type = "q";
3924                 typelen = 1;
3925             } else  {
3926                 type = "qq";
3927                 typelen = 2;
3928             }
3929
3930             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3931                                 type, typelen);
3932         }
3933         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3934     }
3935     LEAVE_with_name("scan_const");
3936     return s;
3937 }
3938
3939 /* S_intuit_more
3940  * Returns TRUE if there's more to the expression (e.g., a subscript),
3941  * FALSE otherwise.
3942  *
3943  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3944  *
3945  * ->[ and ->{ return TRUE
3946  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3947  * { and [ outside a pattern are always subscripts, so return TRUE
3948  * if we're outside a pattern and it's not { or [, then return FALSE
3949  * if we're in a pattern and the first char is a {
3950  *   {4,5} (any digits around the comma) returns FALSE
3951  * if we're in a pattern and the first char is a [
3952  *   [] returns FALSE
3953  *   [SOMETHING] has a funky algorithm to decide whether it's a
3954  *      character class or not.  It has to deal with things like
3955  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3956  * anything else returns TRUE
3957  */
3958
3959 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3960
3961 STATIC int
3962 S_intuit_more(pTHX_ char *s)
3963 {
3964     dVAR;
3965
3966     PERL_ARGS_ASSERT_INTUIT_MORE;
3967
3968     if (PL_lex_brackets)
3969         return TRUE;
3970     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3971         return TRUE;
3972     if (*s == '-' && s[1] == '>'
3973      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3974      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3975         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3976         return TRUE;
3977     if (*s != '{' && *s != '[')
3978         return FALSE;
3979     if (!PL_lex_inpat)
3980         return TRUE;
3981
3982     /* In a pattern, so maybe we have {n,m}. */
3983     if (*s == '{') {
3984         if (regcurly(s, FALSE)) {
3985             return FALSE;
3986         }
3987         return TRUE;
3988     }
3989
3990     /* On the other hand, maybe we have a character class */
3991
3992     s++;
3993     if (*s == ']' || *s == '^')
3994         return FALSE;
3995     else {
3996         /* this is terrifying, and it works */
3997         int weight;
3998         char seen[256];
3999         const char * const send = strchr(s,']');
4000         unsigned char un_char, last_un_char;
4001         char tmpbuf[sizeof PL_tokenbuf * 4];
4002
4003         if (!send)              /* has to be an expression */
4004             return TRUE;
4005         weight = 2;             /* let's weigh the evidence */
4006
4007         if (*s == '$')
4008             weight -= 3;
4009         else if (isDIGIT(*s)) {
4010             if (s[1] != ']') {
4011                 if (isDIGIT(s[1]) && s[2] == ']')
4012                     weight -= 10;
4013             }
4014             else
4015                 weight -= 100;
4016         }
4017         Zero(seen,256,char);
4018         un_char = 255;
4019         for (; s < send; s++) {
4020             last_un_char = un_char;
4021             un_char = (unsigned char)*s;
4022             switch (*s) {
4023             case '@':
4024             case '&':
4025             case '$':
4026                 weight -= seen[un_char] * 10;
4027                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4028                     int len;
4029                     char *tmp = PL_bufend;
4030                     PL_bufend = (char*)send;
4031                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4032                     PL_bufend = tmp;
4033                     len = (int)strlen(tmpbuf);
4034                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4035                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4036                         weight -= 100;
4037                     else
4038                         weight -= 10;
4039                 }
4040                 else if (*s == '$' && s[1] &&
4041                   strchr("[#!%*<>()-=",s[1])) {
4042                     if (/*{*/ strchr("])} =",s[2]))
4043                         weight -= 10;
4044                     else
4045                         weight -= 1;
4046                 }
4047                 break;
4048             case '\\':
4049                 un_char = 254;
4050                 if (s[1]) {
4051                     if (strchr("wds]",s[1]))
4052                         weight += 100;
4053                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4054                         weight += 1;
4055                     else if (strchr("rnftbxcav",s[1]))
4056                         weight += 40;
4057                     else if (isDIGIT(s[1])) {
4058                         weight += 40;
4059                         while (s[1] && isDIGIT(s[1]))
4060                             s++;
4061                     }
4062                 }
4063                 else
4064                     weight += 100;
4065                 break;
4066             case '-':
4067                 if (s[1] == '\\')
4068                     weight += 50;
4069                 if (strchr("aA01! ",last_un_char))
4070                     weight += 30;
4071                 if (strchr("zZ79~",s[1]))
4072                     weight += 30;
4073                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4074                     weight -= 5;        /* cope with negative subscript */
4075                 break;
4076             default:
4077                 if (!isWORDCHAR(last_un_char)
4078                     && !(last_un_char == '$' || last_un_char == '@'
4079                          || last_un_char == '&')
4080                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4081                     char *d = tmpbuf;
4082                     while (isALPHA(*s))
4083                         *d++ = *s++;
4084                     *d = '\0';
4085                     if (keyword(tmpbuf, d - tmpbuf, 0))
4086                         weight -= 150;
4087                 }
4088                 if (un_char == last_un_char + 1)
4089                     weight += 5;
4090                 weight -= seen[un_char];
4091                 break;
4092             }
4093             seen[un_char]++;
4094         }
4095         if (weight >= 0)        /* probably a character class */
4096             return FALSE;
4097     }
4098
4099     return TRUE;
4100 }
4101
4102 /*
4103  * S_intuit_method
4104  *
4105  * Does all the checking to disambiguate
4106  *   foo bar
4107  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4108  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4109  *
4110  * First argument is the stuff after the first token, e.g. "bar".
4111  *
4112  * Not a method if foo is a filehandle.
4113  * Not a method if foo is a subroutine prototyped to take a filehandle.
4114  * Not a method if it's really "Foo $bar"
4115  * Method if it's "foo $bar"
4116  * Not a method if it's really "print foo $bar"
4117  * Method if it's really "foo package::" (interpreted as package->foo)
4118  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4119  * Not a method if bar is a filehandle or package, but is quoted with
4120  *   =>
4121  */
4122
4123 STATIC int
4124 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4125 {
4126     dVAR;
4127     char *s = start + (*start == '$');
4128     char tmpbuf[sizeof PL_tokenbuf];
4129     STRLEN len;
4130     GV* indirgv;
4131 #ifdef PERL_MAD
4132     int soff;
4133 #endif
4134
4135     PERL_ARGS_ASSERT_INTUIT_METHOD;
4136
4137     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4138             return 0;
4139     if (cv && SvPOK(cv)) {
4140         const char *proto = CvPROTO(cv);
4141         if (proto) {
4142             while (*proto && (isSPACE(*proto) || *proto == ';'))
4143                 proto++;
4144             if (*proto == '*')
4145                 return 0;
4146         }
4147     }
4148
4149     if (*start == '$') {
4150         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4151                 isUPPER(*PL_tokenbuf))
4152             return 0;
4153 #ifdef PERL_MAD
4154         len = start - SvPVX(PL_linestr);
4155 #endif
4156         s = PEEKSPACE(s);
4157 #ifdef PERL_MAD
4158         start = SvPVX(PL_linestr) + len;
4159 #endif
4160         PL_bufptr = start;
4161         PL_expect = XREF;
4162         return *s == '(' ? FUNCMETH : METHOD;
4163     }
4164
4165     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4166     /* start is the beginning of the possible filehandle/object,
4167      * and s is the end of it
4168      * tmpbuf is a copy of it (but with single quotes as double colons)
4169      */
4170
4171     if (!keyword(tmpbuf, len, 0)) {
4172         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4173             len -= 2;
4174             tmpbuf[len] = '\0';
4175 #ifdef PERL_MAD
4176             soff = s - SvPVX(PL_linestr);
4177 #endif
4178             goto bare_package;
4179         }
4180         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4181         if (indirgv && GvCVu(indirgv))
4182             return 0;
4183         /* filehandle or package name makes it a method */
4184         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4185 #ifdef PERL_MAD
4186             soff = s - SvPVX(PL_linestr);
4187 #endif
4188             s = PEEKSPACE(s);
4189             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4190                 return 0;       /* no assumptions -- "=>" quotes bareword */
4191       bare_package:
4192             start_force(PL_curforce);
4193             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4194                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4195             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4196             if (PL_madskills)
4197                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4198                                                             ( UTF ? SVf_UTF8 : 0 )));
4199             PL_expect = XTERM;
4200             force_next(WORD);
4201             PL_bufptr = s;
4202 #ifdef PERL_MAD
4203             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4204 #endif
4205             return *s == '(' ? FUNCMETH : METHOD;
4206         }
4207     }
4208     return 0;
4209 }
4210
4211 /* Encoded script support. filter_add() effectively inserts a
4212  * 'pre-processing' function into the current source input stream.
4213  * Note that the filter function only applies to the current source file
4214  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4215  *
4216  * The datasv parameter (which may be NULL) can be used to pass
4217  * private data to this instance of the filter. The filter function
4218  * can recover the SV using the FILTER_DATA macro and use it to
4219  * store private buffers and state information.
4220  *
4221  * The supplied datasv parameter is upgraded to a PVIO type
4222  * and the IoDIRP/IoANY field is used to store the function pointer,
4223  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4224  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4225  * private use must be set using malloc'd pointers.
4226  */
4227
4228 SV *
4229 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4230 {
4231     dVAR;
4232     if (!funcp)
4233         return NULL;
4234
4235     if (!PL_parser)
4236         return NULL;
4237
4238     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4239         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4240
4241     if (!PL_rsfp_filters)
4242         PL_rsfp_filters = newAV();
4243     if (!datasv)
4244         datasv = newSV(0);
4245     SvUPGRADE(datasv, SVt_PVIO);
4246     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4247     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4248     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4249                           FPTR2DPTR(void *, IoANY(datasv)),
4250                           SvPV_nolen(datasv)));
4251     av_unshift(PL_rsfp_filters, 1);
4252     av_store(PL_rsfp_filters, 0, datasv) ;
4253     if (
4254         !PL_parser->filtered
4255      && PL_parser->lex_flags & LEX_EVALBYTES
4256      && PL_bufptr < PL_bufend
4257     ) {
4258         const char *s = PL_bufptr;
4259         while (s < PL_bufend) {
4260             if (*s == '\n') {
4261                 SV *linestr = PL_parser->linestr;
4262                 char *buf = SvPVX(linestr);
4263                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4264                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4265                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4266                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4267                 STRLEN const last_uni_pos =
4268                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4269                 STRLEN const last_lop_pos =
4270                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4271                 av_push(PL_rsfp_filters, linestr);
4272                 PL_parser->linestr =
4273                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4274                 buf = SvPVX(PL_parser->linestr);
4275                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4276                 PL_parser->bufptr = buf + bufptr_pos;
4277                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4278                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4279                 PL_parser->linestart = buf + linestart_pos;
4280                 if (PL_parser->last_uni)
4281                     PL_parser->last_uni = buf + last_uni_pos;
4282                 if (PL_parser->last_lop)
4283                     PL_parser->last_lop = buf + last_lop_pos;
4284                 SvLEN(linestr) = SvCUR(linestr);
4285                 SvCUR(linestr) = s-SvPVX(linestr);
4286                 PL_parser->filtered = 1;
4287                 break;
4288             }
4289             s++;
4290         }
4291     }
4292     return(datasv);
4293 }
4294
4295
4296 /* Delete most recently added instance of this filter function. */
4297 void
4298 Perl_filter_del(pTHX_ filter_t funcp)
4299 {
4300     dVAR;
4301     SV *datasv;
4302
4303     PERL_ARGS_ASSERT_FILTER_DEL;
4304
4305 #ifdef DEBUGGING
4306     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4307                           FPTR2DPTR(void*, funcp)));
4308 #endif
4309     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4310         return;
4311     /* if filter is on top of stack (usual case) just pop it off */
4312     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4313     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4314         sv_free(av_pop(PL_rsfp_filters));
4315
4316         return;
4317     }
4318     /* we need to search for the correct entry and clear it     */
4319     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4320 }
4321
4322
4323 /* Invoke the idxth filter function for the current rsfp.        */
4324 /* maxlen 0 = read one text line */
4325 I32
4326 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4327 {
4328     dVAR;
4329     filter_t funcp;
4330     SV *datasv = NULL;
4331     /* This API is bad. It should have been using unsigned int for maxlen.
4332        Not sure if we want to change the API, but if not we should sanity
4333        check the value here.  */
4334     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4335
4336     PERL_ARGS_ASSERT_FILTER_READ;
4337
4338     if (!PL_parser || !PL_rsfp_filters)
4339         return -1;
4340     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4341         /* Provide a default input filter to make life easy.    */
4342         /* Note that we append to the line. This is handy.      */
4343         DEBUG_P(PerlIO_printf(Perl_debug_log,
4344                               "filter_read %d: from rsfp\n", idx));
4345         if (correct_length) {
4346             /* Want a block */
4347             int len ;
4348             const int old_len = SvCUR(buf_sv);
4349
4350             /* ensure buf_sv is large enough */
4351             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4352             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4353                                    correct_length)) <= 0) {
4354                 if (PerlIO_error(PL_rsfp))
4355                     return -1;          /* error */
4356                 else
4357                     return 0 ;          /* end of file */
4358             }
4359             SvCUR_set(buf_sv, old_len + len) ;
4360             SvPVX(buf_sv)[old_len + len] = '\0';
4361         } else {
4362             /* Want a line */
4363             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4364                 if (PerlIO_error(PL_rsfp))
4365                     return -1;          /* error */
4366                 else
4367                     return 0 ;          /* end of file */
4368             }
4369         }
4370         return SvCUR(buf_sv);
4371     }
4372     /* Skip this filter slot if filter has been deleted */
4373     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4374         DEBUG_P(PerlIO_printf(Perl_debug_log,
4375                               "filter_read %d: skipped (filter deleted)\n",
4376                               idx));
4377         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4378     }
4379     if (SvTYPE(datasv) != SVt_PVIO) {
4380         if (correct_length) {
4381             /* Want a block */
4382             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4383             if (!remainder) return 0; /* eof */
4384             if (correct_length > remainder) correct_length = remainder;
4385             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4386             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4387         } else {
4388             /* Want a line */
4389             const char *s = SvEND(datasv);
4390             const char *send = SvPVX(datasv) + SvLEN(datasv);
4391             while (s < send) {
4392                 if (*s == '\n') {
4393                     s++;
4394                     break;
4395                 }
4396                 s++;
4397             }
4398             if (s == send) return 0; /* eof */
4399             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4400             SvCUR_set(datasv, s-SvPVX(datasv));
4401         }
4402         return SvCUR(buf_sv);
4403     }
4404     /* Get function pointer hidden within datasv        */
4405     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4406     DEBUG_P(PerlIO_printf(Perl_debug_log,
4407                           "filter_read %d: via function %p (%s)\n",
4408                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4409     /* Call function. The function is expected to       */
4410     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4411     /* Return: <0:error, =0:eof, >0:not eof             */
4412     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4413 }
4414
4415 STATIC char *
4416 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4417 {
4418     dVAR;
4419
4420     PERL_ARGS_ASSERT_FILTER_GETS;
4421
4422 #ifdef PERL_CR_FILTER
4423     if (!PL_rsfp_filters) {
4424         filter_add(S_cr_textfilter,NULL);
4425     }
4426 #endif
4427     if (PL_rsfp_filters) {
4428         if (!append)
4429             SvCUR_set(sv, 0);   /* start with empty line        */
4430         if (FILTER_READ(0, sv, 0) > 0)
4431             return ( SvPVX(sv) ) ;
4432         else
4433             return NULL ;
4434     }
4435     else
4436         return (sv_gets(sv, PL_rsfp, append));
4437 }
4438
4439 STATIC HV *
4440 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4441 {
4442     dVAR;
4443     GV *gv;
4444
4445     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4446
4447     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4448         return PL_curstash;
4449
4450     if (len > 2 &&
4451         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4452         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4453     {
4454         return GvHV(gv);                        /* Foo:: */
4455     }
4456
4457     /* use constant CLASS => 'MyClass' */
4458     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4459     if (gv && GvCV(gv)) {
4460         SV * const sv = cv_const_sv(GvCV(gv));
4461         if (sv)
4462             pkgname = SvPV_const(sv, len);
4463     }
4464
4465     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4466 }
4467
4468 #ifdef PERL_MAD
4469  /*
4470  * Perl_madlex
4471  * The intent of this yylex wrapper is to minimize the changes to the
4472  * tokener when we aren't interested in collecting madprops.  It remains
4473  * to be seen how successful this strategy will be...
4474  */
4475
4476 int
4477 Perl_madlex(pTHX)
4478 {
4479     int optype;
4480     char *s = PL_bufptr;
4481
4482     /* make sure PL_thiswhite is initialized */
4483     PL_thiswhite = 0;
4484     PL_thismad = 0;
4485
4486     /* previous token ate up our whitespace? */
4487     if (!PL_lasttoke && PL_nextwhite) {
4488         PL_thiswhite = PL_nextwhite;
4489         PL_nextwhite = 0;
4490     }
4491
4492     /* isolate the token, and figure out where it is without whitespace */
4493     PL_realtokenstart = -1;
4494     PL_thistoken = 0;
4495     optype = yylex();
4496     s = PL_bufptr;
4497     assert(PL_curforce < 0);
4498
4499     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4500         if (!PL_thistoken) {
4501             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4502                 PL_thistoken = newSVpvs("");
4503             else {
4504                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4505                 PL_thistoken = newSVpvn(tstart, s - tstart);
4506             }
4507         }
4508         if (PL_thismad) /* install head */
4509             CURMAD('X', PL_thistoken);
4510     }
4511
4512     /* last whitespace of a sublex? */
4513     if (optype == ')' && PL_endwhite) {
4514         CURMAD('X', PL_endwhite);
4515     }
4516
4517     if (!PL_thismad) {
4518
4519         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4520         if (!PL_thiswhite && !PL_endwhite && !optype) {
4521             sv_free(PL_thistoken);
4522             PL_thistoken = 0;
4523             return 0;
4524         }
4525
4526         /* put off final whitespace till peg */
4527         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4528             PL_nextwhite = PL_thiswhite;
4529             PL_thiswhite = 0;
4530         }
4531         else if (PL_thisopen) {
4532             CURMAD('q', PL_thisopen);
4533             if (PL_thistoken)
4534                 sv_free(PL_thistoken);
4535             PL_thistoken = 0;
4536         }
4537         else {
4538             /* Store actual token text as madprop X */
4539             CURMAD('X', PL_thistoken);
4540         }
4541
4542         if (PL_thiswhite) {
4543             /* add preceding whitespace as madprop _ */
4544             CURMAD('_', PL_thiswhite);
4545         }
4546
4547         if (PL_thisstuff) {
4548             /* add quoted material as madprop = */
4549             CURMAD('=', PL_thisstuff);
4550         }
4551
4552         if (PL_thisclose) {
4553             /* add terminating quote as madprop Q */
4554             CURMAD('Q', PL_thisclose);
4555         }
4556     }
4557
4558     /* special processing based on optype */
4559
4560     switch (optype) {
4561
4562     /* opval doesn't need a TOKEN since it can already store mp */
4563     case WORD:
4564     case METHOD:
4565     case FUNCMETH:
4566     case THING:
4567     case PMFUNC:
4568     case PRIVATEREF:
4569     case FUNC0SUB:
4570     case UNIOPSUB:
4571     case LSTOPSUB:
4572         if (pl_yylval.opval)
4573             append_madprops(PL_thismad, pl_yylval.opval, 0);
4574         PL_thismad = 0;
4575         return optype;
4576
4577     /* fake EOF */
4578     case 0:
4579         optype = PEG;
4580         if (PL_endwhite) {
4581             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4582             PL_endwhite = 0;
4583         }
4584         break;
4585
4586     /* pval */
4587     case LABEL:
4588         break;
4589
4590     case ']':
4591     case '}':
4592         if (PL_faketokens)
4593             break;
4594         /* remember any fake bracket that lexer is about to discard */
4595         if (PL_lex_brackets == 1 &&
4596             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4597         {
4598             s = PL_bufptr;
4599             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4600                 s++;
4601             if (*s == '}') {
4602                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4603                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4604                 PL_thiswhite = 0;
4605                 PL_bufptr = s - 1;
4606                 break;  /* don't bother looking for trailing comment */
4607             }
4608             else
4609                 s = PL_bufptr;
4610         }
4611         if (optype == ']')
4612             break;
4613         /* FALLTHROUGH */
4614
4615     /* attach a trailing comment to its statement instead of next token */
4616     case ';':
4617         if (PL_faketokens)
4618             break;
4619         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4620             s = PL_bufptr;
4621             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4622                 s++;
4623             if (*s == '\n' || *s == '#') {
4624                 while (s < PL_bufend && *s != '\n')
4625                     s++;
4626                 if (s < PL_bufend)
4627                     s++;
4628                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4629                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4630                 PL_thiswhite = 0;
4631                 PL_bufptr = s;
4632             }
4633         }
4634         break;
4635
4636     /* ival */
4637     default:
4638         break;
4639
4640     }
4641
4642     /* Create new token struct.  Note: opvals return early above. */
4643     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4644     PL_thismad = 0;
4645     return optype;
4646 }
4647 #endif
4648
4649 STATIC char *
4650 S_tokenize_use(pTHX_ int is_use, char *s) {
4651     dVAR;
4652
4653     PERL_ARGS_ASSERT_TOKENIZE_USE;
4654
4655     if (PL_expect != XSTATE)
4656         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4657                     is_use ? "use" : "no"));
4658     PL_expect = XTERM;
4659     s = SKIPSPACE1(s);
4660     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4661         s = force_version(s, TRUE);
4662         if (*s == ';' || *s == '}'
4663                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4664             start_force(PL_curforce);
4665             NEXTVAL_NEXTTOKE.opval = NULL;
4666             force_next(WORD);
4667         }
4668         else if (*s == 'v') {
4669             s = force_word(s,WORD,FALSE,TRUE);
4670             s = force_version(s, FALSE);
4671         }
4672     }
4673     else {
4674         s = force_word(s,WORD,FALSE,TRUE);
4675         s = force_version(s, FALSE);
4676     }
4677     pl_yylval.ival = is_use;
4678     return s;
4679 }
4680 #ifdef DEBUGGING
4681     static const char* const exp_name[] =
4682         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4683           "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4684         };
4685 #endif
4686
4687 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4688 STATIC bool
4689 S_word_takes_any_delimeter(char *p, STRLEN len)
4690 {
4691     return (len == 1 && strchr("msyq", p[0])) ||
4692            (len == 2 && (
4693             (p[0] == 't' && p[1] == 'r') ||
4694             (p[0] == 'q' && strchr("qwxr", p[1]))));
4695 }
4696
4697 static void
4698 S_check_scalar_slice(pTHX_ char *s)
4699 {
4700     s++;
4701     while (*s == ' ' || *s == '\t') s++;
4702     if (*s == 'q' && s[1] == 'w'
4703      && !isWORDCHAR_lazy_if(s+2,UTF))
4704         return;
4705     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4706         s += UTF ? UTF8SKIP(s) : 1;
4707     if (*s == '}' || *s == ']')
4708         pl_yylval.ival = OPpSLICEWARNING;
4709 }
4710
4711 /*
4712   yylex
4713
4714   Works out what to call the token just pulled out of the input
4715   stream.  The yacc parser takes care of taking the ops we return and
4716   stitching them into a tree.
4717
4718   Returns:
4719     The type of the next token
4720
4721   Structure:
4722       Switch based on the current state:
4723           - if we already built the token before, use it
4724           - if we have a case modifier in a string, deal with that
4725           - handle other cases of interpolation inside a string
4726           - scan the next line if we are inside a format
4727       In the normal state switch on the next character:
4728           - default:
4729             if alphabetic, go to key lookup
4730             unrecoginized character - croak
4731           - 0/4/26: handle end-of-line or EOF
4732           - cases for whitespace
4733           - \n and #: handle comments and line numbers
4734           - various operators, brackets and sigils
4735           - numbers
4736           - quotes
4737           - 'v': vstrings (or go to key lookup)
4738           - 'x' repetition operator (or go to key lookup)
4739           - other ASCII alphanumerics (key lookup begins here):
4740               word before => ?
4741               keyword plugin
4742               scan built-in keyword (but do nothing with it yet)
4743               check for statement label
4744               check for lexical subs
4745                   goto just_a_word if there is one
4746               see whether built-in keyword is overridden
4747               switch on keyword number:
4748                   - default: just_a_word:
4749                       not a built-in keyword; handle bareword lookup
4750                       disambiguate between method and sub call
4751                       fall back to bareword
4752                   - cases for built-in keywords
4753 */
4754
4755
4756 int
4757 Perl_yylex(pTHX)
4758 {
4759     dVAR;
4760     char *s = PL_bufptr;
4761     char *d;
4762     STRLEN len;
4763     bool bof = FALSE;
4764     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4765     U8 formbrack = 0;
4766     U32 fake_eof = 0;
4767
4768     /* orig_keyword, gvp, and gv are initialized here because
4769      * jump to the label just_a_word_zero can bypass their
4770      * initialization later. */
4771     I32 orig_keyword = 0;
4772     GV *gv = NULL;
4773     GV **gvp = NULL;
4774
4775     DEBUG_T( {
4776         SV* tmp = newSVpvs("");
4777         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4778             (IV)CopLINE(PL_curcop),
4779             lex_state_names[PL_lex_state],
4780             exp_name[PL_expect],
4781             pv_display(tmp, s, strlen(s), 0, 60));
4782         SvREFCNT_dec(tmp);
4783     } );
4784
4785     switch (PL_lex_state) {
4786     case LEX_NORMAL:
4787     case LEX_INTERPNORMAL:
4788         break;
4789
4790     /* when we've already built the next token, just pull it out of the queue */
4791     case LEX_KNOWNEXT:
4792 #ifdef PERL_MAD
4793         PL_lasttoke--;
4794         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4795         if (PL_madskills) {
4796             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4797             PL_nexttoke[PL_lasttoke].next_mad = 0;
4798             if (PL_thismad && PL_thismad->mad_key == '_') {
4799                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4800                 PL_thismad->mad_val = 0;
4801                 mad_free(PL_thismad);
4802                 PL_thismad = 0;
4803             }
4804         }
4805         if (!PL_lasttoke) {
4806             PL_lex_state = PL_lex_defer;
4807             PL_expect = PL_lex_expect;
4808             PL_lex_defer = LEX_NORMAL;
4809             if (!PL_nexttoke[PL_lasttoke].next_type)
4810                 return yylex();
4811         }
4812 #else
4813         PL_nexttoke--;
4814         pl_yylval = PL_nextval[PL_nexttoke];
4815         if (!PL_nexttoke) {
4816             PL_lex_state = PL_lex_defer;
4817             PL_expect = PL_lex_expect;
4818             PL_lex_defer = LEX_NORMAL;
4819         }
4820 #endif
4821         {
4822             I32 next_type;
4823 #ifdef PERL_MAD
4824             next_type = PL_nexttoke[PL_lasttoke].next_type;
4825 #else
4826             next_type = PL_nexttype[PL_nexttoke];
4827 #endif
4828             if (next_type & (7<<24)) {
4829                 if (next_type & (1<<24)) {
4830                     if (PL_lex_brackets > 100)
4831                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4832                     PL_lex_brackstack[PL_lex_brackets++] =
4833                         (char) ((next_type >> 16) & 0xff);
4834                 }
4835                 if (next_type & (2<<24))
4836                     PL_lex_allbrackets++;
4837                 if (next_type & (4<<24))
4838                     PL_lex_allbrackets--;
4839                 next_type &= 0xffff;
4840             }
4841             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4842         }
4843
4844     /* interpolated case modifiers like \L \U, including \Q and \E.
4845        when we get here, PL_bufptr is at the \
4846     */
4847     case LEX_INTERPCASEMOD:
4848 #ifdef DEBUGGING
4849         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4850             Perl_croak(aTHX_
4851                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4852                        PL_bufptr, PL_bufend, *PL_bufptr);
4853 #endif
4854         /* handle \E or end of string */
4855         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4856             /* if at a \E */
4857             if (PL_lex_casemods) {
4858                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4859                 PL_lex_casestack[PL_lex_casemods] = '\0';
4860
4861                 if (PL_bufptr != PL_bufend
4862                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4863                         || oldmod == 'F')) {
4864                     PL_bufptr += 2;
4865                     PL_lex_state = LEX_INTERPCONCAT;
4866 #ifdef PERL_MAD
4867                     if (PL_madskills)
4868                         PL_thistoken = newSVpvs("\\E");
4869 #endif
4870                 }
4871                 PL_lex_allbrackets--;
4872                 return REPORT(')');
4873             }
4874             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4875                /* Got an unpaired \E */
4876                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4877                         "Useless use of \\E");
4878             }
4879 #ifdef PERL_MAD
4880             while (PL_bufptr != PL_bufend &&
4881               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4882                 if (PL_madskills) {
4883                   if (!PL_thiswhite)
4884                     PL_thiswhite = newSVpvs("");
4885                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4886                 }
4887                 PL_bufptr += 2;
4888             }
4889 #else
4890             if (PL_bufptr != PL_bufend)
4891                 PL_bufptr += 2;
4892 #endif
4893             PL_lex_state = LEX_INTERPCONCAT;
4894             return yylex();
4895         }
4896         else {
4897             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4898               "### Saw case modifier\n"); });
4899             s = PL_bufptr + 1;
4900             if (s[1] == '\\' && s[2] == 'E') {
4901 #ifdef PERL_MAD
4902                 if (PL_madskills) {
4903                   if (!PL_thiswhite)
4904                     PL_thiswhite = newSVpvs("");
4905                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4906                 }
4907 #endif
4908                 PL_bufptr = s + 3;
4909                 PL_lex_state = LEX_INTERPCONCAT;
4910                 return yylex();
4911             }
4912             else {
4913                 I32 tmp;
4914                 if (!PL_madskills) /* when just compiling don't need correct */
4915                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4916                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4917                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4918                     (strchr(PL_lex_casestack, 'L')
4919                         || strchr(PL_lex_casestack, 'U')
4920                         || strchr(PL_lex_casestack, 'F'))) {
4921                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4922                     PL_lex_allbrackets--;
4923                     return REPORT(')');
4924                 }
4925                 if (PL_lex_casemods > 10)
4926                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4927                 PL_lex_casestack[PL_lex_casemods++] = *s;
4928                 PL_lex_casestack[PL_lex_casemods] = '\0';
4929                 PL_lex_state = LEX_INTERPCONCAT;
4930                 start_force(PL_curforce);
4931                 NEXTVAL_NEXTTOKE.ival = 0;
4932                 force_next((2<<24)|'(');
4933                 start_force(PL_curforce);
4934                 if (*s == 'l')
4935                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4936                 else if (*s == 'u')
4937                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4938                 else if (*s == 'L')
4939                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4940                 else if (*s == 'U')
4941                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4942                 else if (*s == 'Q')
4943                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4944                 else if (*s == 'F')
4945                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4946                 else
4947                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4948                 if (PL_madskills) {
4949                     SV* const tmpsv = newSVpvs("\\ ");
4950                     /* replace the space with the character we want to escape
4951                      */
4952                     SvPVX(tmpsv)[1] = *s;
4953                     curmad('_', tmpsv);
4954                 }
4955                 PL_bufptr = s + 1;
4956             }
4957             force_next(FUNC);
4958             if (PL_lex_starts) {
4959                 s = PL_bufptr;
4960                 PL_lex_starts = 0;
4961 #ifdef PERL_MAD
4962                 if (PL_madskills) {
4963                     if (PL_thistoken)
4964                         sv_free(PL_thistoken);
4965                     PL_thistoken = newSVpvs("");
4966                 }
4967 #endif
4968                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4969                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4970                     OPERATOR(',');
4971                 else
4972                     Aop(OP_CONCAT);
4973             }
4974             else
4975                 return yylex();
4976         }
4977
4978     case LEX_INTERPPUSH:
4979         return REPORT(sublex_push());
4980
4981     case LEX_INTERPSTART:
4982         if (PL_bufptr == PL_bufend)
4983             return REPORT(sublex_done());
4984         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4985               "### Interpolated variable\n"); });
4986         PL_expect = XTERM;
4987         /* for /@a/, we leave the joining for the regex engine to do
4988          * (unless we're within \Q etc) */
4989         PL_lex_dojoin = (*PL_bufptr == '@'
4990                             && (!PL_lex_inpat || PL_lex_casemods));
4991         PL_lex_state = LEX_INTERPNORMAL;
4992         if (PL_lex_dojoin) {
4993             start_force(PL_curforce);
4994             NEXTVAL_NEXTTOKE.ival = 0;
4995             force_next(',');
4996             start_force(PL_curforce);
4997             force_ident("\"", '$');
4998             start_force(PL_curforce);
4999             NEXTVAL_NEXTTOKE.ival = 0;
5000             force_next('$');
5001             start_force(PL_curforce);
5002             NEXTVAL_NEXTTOKE.ival = 0;
5003             force_next((2<<24)|'(');
5004             start_force(PL_curforce);
5005             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
5006             force_next(FUNC);
5007         }
5008         /* Convert (?{...}) and friends to 'do {...}' */
5009         if (PL_lex_inpat && *PL_bufptr == '(') {
5010             PL_parser->lex_shared->re_eval_start = PL_bufptr;
5011             PL_bufptr += 2;
5012             if (*PL_bufptr != '{')
5013                 PL_bufptr++;
5014             start_force(PL_curforce);
5015             /* XXX probably need a CURMAD(something) here */
5016             PL_expect = XTERMBLOCK;
5017             force_next(DO);
5018         }
5019
5020         if (PL_lex_starts++) {
5021             s = PL_bufptr;
5022 #ifdef PERL_MAD
5023             if (PL_madskills) {
5024                 if (PL_thistoken)
5025                     sv_free(PL_thistoken);
5026                 PL_thistoken = newSVpvs("");
5027             }
5028 #endif
5029             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5030             if (!PL_lex_casemods && PL_lex_inpat)
5031                 OPERATOR(',');
5032             else
5033                 Aop(OP_CONCAT);
5034         }
5035         return yylex();
5036
5037     case LEX_INTERPENDMAYBE:
5038         if (intuit_more(PL_bufptr)) {
5039             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5040             break;
5041         }
5042         /* FALL THROUGH */
5043
5044     case LEX_INTERPEND:
5045         if (PL_lex_dojoin) {
5046             const U8 dojoin_was = PL_lex_dojoin;
5047             PL_lex_dojoin = FALSE;
5048             PL_lex_state = LEX_INTERPCONCAT;
5049 #ifdef PERL_MAD
5050             if (PL_madskills) {
5051                 if (PL_thistoken)
5052                     sv_free(PL_thistoken);
5053                 PL_thistoken = newSVpvs("");
5054             }
5055 #endif
5056             PL_lex_allbrackets--;
5057             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5058         }
5059         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5060             && SvEVALED(PL_lex_repl))
5061         {
5062             if (PL_bufptr != PL_bufend)
5063                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5064             PL_lex_repl = NULL;
5065         }
5066         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5067            re_eval_str.  If the here-doc body’s length equals the previous
5068            value of re_eval_start, re_eval_start will now be null.  So
5069            check re_eval_str as well. */
5070         if (PL_parser->lex_shared->re_eval_start
5071          || PL_parser->lex_shared->re_eval_str) {
5072             SV *sv;
5073             if (*PL_bufptr != ')')
5074                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5075             PL_bufptr++;
5076             /* having compiled a (?{..}) expression, return the original
5077              * text too, as a const */
5078             if (PL_parser->lex_shared->re_eval_str) {
5079                 sv = PL_parser->lex_shared->re_eval_str;
5080                 PL_parser->lex_shared->re_eval_str = NULL;
5081                 SvCUR_set(sv,
5082                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5083                 SvPV_shrink_to_cur(sv);
5084             }
5085             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5086                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5087             start_force(PL_curforce);
5088             /* XXX probably need a CURMAD(something) here */
5089             NEXTVAL_NEXTTOKE.opval =
5090                     (OP*)newSVOP(OP_CONST, 0,
5091                                  sv);
5092             force_next(THING);
5093             PL_parser->lex_shared->re_eval_start = NULL;
5094             PL_expect = XTERM;
5095             return REPORT(',');
5096         }
5097
5098         /* FALLTHROUGH */
5099     case LEX_INTERPCONCAT:
5100 #ifdef DEBUGGING
5101         if (PL_lex_brackets)
5102             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5103                        (long) PL_lex_brackets);
5104 #endif
5105         if (PL_bufptr == PL_bufend)
5106             return REPORT(sublex_done());
5107
5108         /* m'foo' still needs to be parsed for possible (?{...}) */
5109         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5110             SV *sv = newSVsv(PL_linestr);
5111             sv = tokeq(sv);
5112             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5113             s = PL_bufend;
5114         }
5115         else {
5116             s = scan_const(PL_bufptr);
5117             if (*s == '\\')
5118                 PL_lex_state = LEX_INTERPCASEMOD;
5119             else
5120                 PL_lex_state = LEX_INTERPSTART;
5121         }
5122
5123         if (s != PL_bufptr) {
5124             start_force(PL_curforce);
5125             if (PL_madskills) {
5126                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5127             }
5128             NEXTVAL_NEXTTOKE = pl_yylval;
5129             PL_expect = XTERM;
5130             force_next(THING);
5131             if (PL_lex_starts++) {
5132 #ifdef PERL_MAD
5133                 if (PL_madskills) {
5134                     if (PL_thistoken)
5135                         sv_free(PL_thistoken);
5136                     PL_thistoken = newSVpvs("");
5137                 }
5138 #endif
5139                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5140                 if (!PL_lex_casemods && PL_lex_inpat)
5141                     OPERATOR(',');
5142                 else
5143                     Aop(OP_CONCAT);
5144             }
5145             else {
5146                 PL_bufptr = s;
5147                 return yylex();
5148             }
5149         }
5150
5151         return yylex();
5152     case LEX_FORMLINE:
5153         s = scan_formline(PL_bufptr);
5154         if (!PL_lex_formbrack)
5155         {
5156             formbrack = 1;
5157             goto rightbracket;
5158         }
5159         PL_bufptr = s;
5160         return yylex();
5161     }
5162
5163     /* We really do *not* want PL_linestr ever becoming a COW. */
5164     assert (!SvIsCOW(PL_linestr));
5165     s = PL_bufptr;
5166     PL_oldoldbufptr = PL_oldbufptr;
5167     PL_oldbufptr = s;
5168     PL_parser->saw_infix_sigil = 0;
5169
5170   retry:
5171 #ifdef PERL_MAD
5172     if (PL_thistoken) {
5173         sv_free(PL_thistoken);
5174         PL_thistoken = 0;
5175     }
5176     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5177 #endif
5178     switch (*s) {
5179     default:
5180         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5181             goto keylookup;
5182         {
5183         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5184         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5185                                                     UTF8SKIP(s),
5186                                                     SVs_TEMP | SVf_UTF8),
5187                                             10, UNI_DISPLAY_ISPRINT)
5188                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5189         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5190         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5191             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5192         } else {
5193             d = PL_linestart;
5194         }
5195         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5196                           UTF8fARG(UTF, (s - d), d),
5197                          (int) len + 1);
5198     }
5199     case 4:
5200     case 26:
5201         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5202     case 0:
5203 #ifdef PERL_MAD
5204         if (PL_madskills)
5205             PL_faketokens = 0;
5206 #endif
5207         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5208             PL_last_uni = 0;
5209             PL_last_lop = 0;
5210             if (PL_lex_brackets &&
5211                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5212                 yyerror((const char *)
5213                         (PL_lex_formbrack
5214                          ? "Format not terminated"
5215                          : "Missing right curly or square bracket"));
5216             }
5217             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5218                         "### Tokener got EOF\n");
5219             } );
5220             TOKEN(0);
5221         }
5222         if (s++ < PL_bufend)
5223             goto retry;                 /* ignore stray nulls */
5224         PL_last_uni = 0;
5225         PL_last_lop = 0;
5226         if (!PL_in_eval && !PL_preambled) {
5227             PL_preambled = TRUE;
5228 #ifdef PERL_MAD
5229             if (PL_madskills)
5230                 PL_faketokens = 1;
5231 #endif
5232             if (PL_perldb) {
5233                 /* Generate a string of Perl code to load the debugger.
5234                  * If PERL5DB is set, it will return the contents of that,
5235                  * otherwise a compile-time require of perl5db.pl.  */
5236
5237                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5238
5239                 if (pdb) {
5240                     sv_setpv(PL_linestr, pdb);
5241                     sv_catpvs(PL_linestr,";");
5242                 } else {
5243                     SETERRNO(0,SS_NORMAL);
5244                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5245                 }
5246                 PL_parser->preambling = CopLINE(PL_curcop);
5247             } else
5248                 sv_setpvs(PL_linestr,"");
5249             if (PL_preambleav) {
5250                 SV **svp = AvARRAY(PL_preambleav);
5251                 SV **const end = svp + AvFILLp(PL_preambleav);
5252                 while(svp <= end) {
5253                     sv_catsv(PL_linestr, *svp);
5254                     ++svp;
5255                     sv_catpvs(PL_linestr, ";");
5256                 }
5257                 sv_free(MUTABLE_SV(PL_preambleav));
5258                 PL_preambleav = NULL;
5259             }
5260             if (PL_minus_E)
5261                 sv_catpvs(PL_linestr,
5262                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5263             if (PL_minus_n || PL_minus_p) {
5264                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5265                 if (PL_minus_l)
5266                     sv_catpvs(PL_linestr,"chomp;");
5267                 if (PL_minus_a) {
5268                     if (PL_minus_F) {
5269                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5270                              || *PL_splitstr == '"')
5271                               && strchr(PL_splitstr + 1, *PL_splitstr))
5272                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5273                         else {
5274                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5275                                bytes can be used as quoting characters.  :-) */
5276                             const char *splits = PL_splitstr;
5277                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5278                             do {
5279                                 /* Need to \ \s  */
5280                                 if (*splits == '\\')
5281                                     sv_catpvn(PL_linestr, splits, 1);
5282                                 sv_catpvn(PL_linestr, splits, 1);
5283                             } while (*splits++);
5284                             /* This loop will embed the trailing NUL of
5285                                PL_linestr as the last thing it does before
5286                                terminating.  */
5287                             sv_catpvs(PL_linestr, ");");
5288                         }
5289                     }
5290                     else
5291                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5292                 }
5293             }
5294             sv_catpvs(PL_linestr, "\n");
5295             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5296             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5297             PL_last_lop = PL_last_uni = NULL;
5298             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5299                 update_debugger_info(PL_linestr, NULL, 0);
5300             goto retry;
5301         }
5302         do {
5303             fake_eof = 0;
5304             bof = PL_rsfp ? TRUE : FALSE;
5305             if (0) {
5306               fake_eof:
5307                 fake_eof = LEX_FAKE_EOF;
5308             }
5309             PL_bufptr = PL_bufend;
5310             COPLINE_INC_WITH_HERELINES;
5311             if (!lex_next_chunk(fake_eof)) {
5312                 CopLINE_dec(PL_curcop);
5313                 s = PL_bufptr;
5314                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5315             }
5316             CopLINE_dec(PL_curcop);
5317 #ifdef PERL_MAD
5318             if (!PL_rsfp)
5319                 PL_realtokenstart = -1;
5320 #endif
5321             s = PL_bufptr;
5322             /* If it looks like the start of a BOM or raw UTF-16,
5323              * check if it in fact is. */
5324             if (bof && PL_rsfp &&
5325                      (*s == 0 ||
5326                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5327                       *(U8*)s >= 0xFE ||
5328                       s[1] == 0)) {
5329                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5330                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5331 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5332                 /* offset may include swallowed CR */
5333                 if (!bof)
5334                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5335 #endif
5336                 if (bof) {
5337                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5338                     s = swallow_bom((U8*)s);
5339                 }
5340             }
5341             if (PL_parser->in_pod) {
5342                 /* Incest with pod. */
5343 #ifdef PERL_MAD
5344                 if (PL_madskills)
5345                     sv_catsv(PL_thiswhite, PL_linestr);
5346 #endif
5347                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5348                     sv_setpvs(PL_linestr, "");
5349                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5350                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5351                     PL_last_lop = PL_last_uni = NULL;
5352                     PL_parser->in_pod = 0;
5353                 }
5354             }
5355             if (PL_rsfp || PL_parser->filtered)
5356                 incline(s);
5357         } while (PL_parser->in_pod);
5358         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5359         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5360         PL_last_lop = PL_last_uni = NULL;
5361         if (CopLINE(PL_curcop) == 1) {
5362             while (s < PL_bufend && isSPACE(*s))
5363                 s++;
5364             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5365                 s++;
5366 #ifdef PERL_MAD
5367             if (PL_madskills)
5368                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5369 #endif
5370             d = NULL;
5371             if (!PL_in_eval) {
5372                 if (*s == '#' && *(s+1) == '!')
5373                     d = s + 2;
5374 #ifdef ALTERNATE_SHEBANG
5375                 else {
5376                     static char const as[] = ALTERNATE_SHEBANG;
5377                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5378                         d = s + (sizeof(as) - 1);
5379                 }
5380 #endif /* ALTERNATE_SHEBANG */
5381             }
5382             if (d) {
5383                 char *ipath;
5384                 char *ipathend;
5385
5386                 while (isSPACE(*d))
5387                     d++;
5388                 ipath = d;
5389                 while (*d && !isSPACE(*d))
5390                     d++;
5391                 ipathend = d;
5392
5393 #ifdef ARG_ZERO_IS_SCRIPT
5394                 if (ipathend > ipath) {
5395                     /*
5396                      * HP-UX (at least) sets argv[0] to the script name,
5397                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5398                      * at least, set argv[0] to the basename of the Perl
5399                      * interpreter. So, having found "#!", we'll set it right.
5400                      */
5401                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5402                                                     SVt_PV)); /* $^X */
5403                     assert(SvPOK(x) || SvGMAGICAL(x));
5404                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5405                         sv_setpvn(x, ipath, ipathend - ipath);
5406                         SvSETMAGIC(x);
5407                     }
5408                     else {
5409                         STRLEN blen;
5410                         STRLEN llen;
5411                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5412                         const char * const lstart = SvPV_const(x,llen);
5413                         if (llen < blen) {
5414                             bstart += blen - llen;
5415                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5416                                 sv_setpvn(x, ipath, ipathend - ipath);
5417                                 SvSETMAGIC(x);
5418                             }
5419                         }
5420                     }
5421                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5422                 }
5423 #endif /* ARG_ZERO_IS_SCRIPT */
5424
5425                 /*
5426                  * Look for options.
5427                  */
5428                 d = instr(s,"perl -");
5429                 if (!d) {
5430                     d = instr(s,"perl");
5431 #if defined(DOSISH)
5432                     /* avoid getting into infinite loops when shebang
5433                      * line contains "Perl" rather than "perl" */
5434                     if (!d) {
5435                         for (d = ipathend-4; d >= ipath; --d) {
5436                             if ((*d == 'p' || *d == 'P')
5437                                 && !ibcmp(d, "perl", 4))
5438                             {
5439                                 break;
5440                             }
5441                         }
5442                         if (d < ipath)
5443                             d = NULL;
5444                     }
5445 #endif
5446                 }
5447 #ifdef ALTERNATE_SHEBANG
5448                 /*
5449                  * If the ALTERNATE_SHEBANG on this system starts with a
5450                  * character that can be part of a Perl expression, then if
5451                  * we see it but not "perl", we're probably looking at the
5452                  * start of Perl code, not a request to hand off to some
5453                  * other interpreter.  Similarly, if "perl" is there, but
5454                  * not in the first 'word' of the line, we assume the line
5455                  * contains the start of the Perl program.
5456                  */
5457                 if (d && *s != '#') {
5458                     const char *c = ipath;
5459                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5460                         c++;
5461                     if (c < d)
5462                         d = NULL;       /* "perl" not in first word; ignore */
5463                     else
5464                         *s = '#';       /* Don't try to parse shebang line */
5465                 }
5466 #endif /* ALTERNATE_SHEBANG */
5467                 if (!d &&
5468                     *s == '#' &&
5469                     ipathend > ipath &&
5470                     !PL_minus_c &&
5471                     !instr(s,"indir") &&
5472                     instr(PL_origargv[0],"perl"))
5473                 {
5474                     dVAR;
5475                     char **newargv;
5476
5477                     *ipathend = '\0';
5478                     s = ipathend + 1;
5479                     while (s < PL_bufend && isSPACE(*s))
5480                         s++;
5481                     if (s < PL_bufend) {
5482                         Newx(newargv,PL_origargc+3,char*);
5483                         newargv[1] = s;
5484                         while (s < PL_bufend && !isSPACE(*s))
5485                             s++;
5486                         *s = '\0';
5487                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5488                     }
5489                     else
5490                         newargv = PL_origargv;
5491                     newargv[0] = ipath;
5492                     PERL_FPU_PRE_EXEC
5493                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5494                     PERL_FPU_POST_EXEC
5495                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5496                 }
5497                 if (d) {
5498                     while (*d && !isSPACE(*d))
5499                         d++;
5500                     while (SPACE_OR_TAB(*d))
5501                         d++;
5502
5503                     if (*d++ == '-') {
5504                         const bool switches_done = PL_doswitches;
5505                         const U32 oldpdb = PL_perldb;
5506                         const bool oldn = PL_minus_n;
5507                         const bool oldp = PL_minus_p;
5508                         const char *d1 = d;
5509
5510                         do {
5511                             bool baduni = FALSE;
5512                             if (*d1 == 'C') {
5513                                 const char *d2 = d1 + 1;
5514                                 if (parse_unicode_opts((const char **)&d2)
5515                                     != PL_unicode)
5516                                     baduni = TRUE;
5517                             }
5518                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5519                                 const char * const m = d1;
5520                                 while (*d1 && !isSPACE(*d1))
5521                                     d1++;
5522                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5523                                       (int)(d1 - m), m);
5524                             }
5525                             d1 = moreswitches(d1);
5526                         } while (d1);
5527                         if (PL_doswitches && !switches_done) {
5528                             int argc = PL_origargc;
5529                             char **argv = PL_origargv;
5530                             do {
5531                                 argc--,argv++;
5532                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5533                             init_argv_symbols(argc,argv);
5534                         }
5535                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5536                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5537                               /* if we have already added "LINE: while (<>) {",
5538                                  we must not do it again */
5539                         {
5540                             sv_setpvs(PL_linestr, "");
5541                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5542                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5543                             PL_last_lop = PL_last_uni = NULL;
5544                             PL_preambled = FALSE;
5545                             if (PERLDB_LINE || PERLDB_SAVESRC)
5546                                 (void)gv_fetchfile(PL_origfilename);
5547                             goto retry;
5548                         }
5549                     }
5550                 }
5551             }
5552         }
5553         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5554             PL_lex_state = LEX_FORMLINE;
5555             start_force(PL_curforce);
5556             NEXTVAL_NEXTTOKE.ival = 0;
5557             force_next(FORMRBRACK);
5558             TOKEN(';');
5559         }
5560         goto retry;
5561     case '\r':
5562 #ifdef PERL_STRICT_CR
5563         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5564         Perl_croak(aTHX_
5565       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5566 #endif
5567     case ' ': case '\t': case '\f': case 013:
5568 #ifdef PERL_MAD
5569         PL_realtokenstart = -1;
5570         if (PL_madskills) {
5571           if (!PL_thiswhite)
5572             PL_thiswhite = newSVpvs("");
5573           sv_catpvn(PL_thiswhite, s, 1);
5574         }
5575 #endif
5576         s++;
5577         goto retry;
5578     case '#':
5579     case '\n':
5580 #ifdef PERL_MAD
5581         PL_realtokenstart = -1;
5582         if (PL_madskills)
5583             PL_faketokens = 0;
5584 #endif
5585         if (PL_lex_state != LEX_NORMAL ||
5586              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5587             if (*s == '#' && s == PL_linestart && PL_in_eval
5588              && !PL_rsfp && !PL_parser->filtered) {
5589                 /* handle eval qq[#line 1 "foo"\n ...] */
5590                 CopLINE_dec(PL_curcop);
5591                 incline(s);
5592             }
5593             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5594                 s = SKIPSPACE0(s);
5595                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5596                     incline(s);
5597             }
5598             else {
5599                 const bool in_comment = *s == '#';
5600                 d = s;
5601                 while (d < PL_bufend && *d != '\n')
5602                     d++;
5603                 if (d < PL_bufend)
5604                     d++;
5605                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5606                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5607                                d, PL_bufend);
5608 #ifdef PERL_MAD
5609                 if (PL_madskills)
5610                     PL_thiswhite = newSVpvn(s, d - s);
5611 #endif
5612                 s = d;
5613                 if (in_comment && d == PL_bufend
5614                  && PL_lex_state == LEX_INTERPNORMAL
5615                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5616                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5617                 else incline(s);
5618             }
5619             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5620                 PL_lex_state = LEX_FORMLINE;
5621                 start_force(PL_curforce);
5622                 NEXTVAL_NEXTTOKE.ival = 0;
5623                 force_next(FORMRBRACK);
5624                 TOKEN(';');
5625             }
5626         }
5627         else {
5628 #ifdef PERL_MAD
5629             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5630                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5631                     PL_faketokens = 0;
5632                     s = SKIPSPACE0(s);
5633                     TOKEN(PEG); /* make sure any #! line is accessible */
5634                 }
5635                 s = SKIPSPACE0(s);
5636             }
5637             else {
5638 #endif
5639                     if (PL_madskills) d = s;
5640                     while (s < PL_bufend && *s != '\n')
5641                         s++;
5642                     if (s < PL_bufend)
5643                     {
5644                         s++;
5645                         if (s < PL_bufend)
5646                             incline(s);
5647                     }
5648                     else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5649                       Perl_croak(aTHX_ "panic: input overflow");
5650 #ifdef PERL_MAD
5651                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5652                         if (!PL_thiswhite)
5653                             PL_thiswhite = newSVpvs("");
5654                         if (CopLINE(PL_curcop) == 1) {
5655                             sv_setpvs(PL_thiswhite, "");
5656                             PL_faketokens = 0;
5657                         }
5658                         sv_catpvn(PL_thiswhite, d, s - d);
5659                     }
5660             }
5661 #endif
5662         }
5663         goto retry;
5664     case '-':
5665         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5666             I32 ftst = 0;
5667             char tmp;
5668
5669             s++;
5670             PL_bufptr = s;
5671             tmp = *s++;
5672
5673             while (s < PL_bufend && SPACE_OR_TAB(*s))
5674                 s++;
5675
5676             if (strnEQ(s,"=>",2)) {
5677                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5678                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5679                 OPERATOR('-');          /* unary minus */
5680             }
5681             switch (tmp) {
5682             case 'r': ftst = OP_FTEREAD;        break;
5683             case 'w': ftst = OP_FTEWRITE;       break;
5684             case 'x': ftst = OP_FTEEXEC;        break;
5685             case 'o': ftst = OP_FTEOWNED;       break;
5686             case 'R': ftst = OP_FTRREAD;        break;
5687             case 'W': ftst = OP_FTRWRITE;       break;
5688             case 'X': ftst = OP_FTREXEC;        break;
5689             case 'O': ftst = OP_FTROWNED;       break;
5690             case 'e': ftst = OP_FTIS;           break;
5691             case 'z': ftst = OP_FTZERO;         break;
5692             case 's': ftst = OP_FTSIZE;         break;
5693             case 'f': ftst = OP_FTFILE;         break;
5694             case 'd': ftst = OP_FTDIR;          break;
5695             case 'l': ftst = OP_FTLINK;         break;
5696             case 'p': ftst = OP_FTPIPE;         break;
5697             case 'S': ftst = OP_FTSOCK;         break;
5698             case 'u': ftst = OP_FTSUID;         break;
5699             case 'g': ftst = OP_FTSGID;         break;
5700             case 'k': ftst = OP_FTSVTX;         break;
5701             case 'b': ftst = OP_FTBLK;          break;
5702             case 'c': ftst = OP_FTCHR;          break;
5703             case 't': ftst = OP_FTTTY;          break;
5704             case 'T': ftst = OP_FTTEXT;         break;
5705             case 'B': ftst = OP_FTBINARY;       break;
5706             case 'M': case 'A': case 'C':
5707                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5708                 switch (tmp) {
5709                 case 'M': ftst = OP_FTMTIME;    break;
5710                 case 'A': ftst = OP_FTATIME;    break;
5711                 case 'C': ftst = OP_FTCTIME;    break;
5712                 default:                        break;
5713                 }
5714                 break;
5715             default:
5716                 break;
5717             }
5718             if (ftst) {
5719                 PL_last_uni = PL_oldbufptr;
5720                 PL_last_lop_op = (OPCODE)ftst;
5721                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5722                         "### Saw file test %c\n", (int)tmp);
5723                 } );
5724                 FTST(ftst);
5725             }
5726             else {
5727                 /* Assume it was a minus followed by a one-letter named
5728                  * subroutine call (or a -bareword), then. */
5729                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5730                         "### '-%c' looked like a file test but was not\n",
5731                         (int) tmp);
5732                 } );
5733                 s = --PL_bufptr;
5734             }
5735         }
5736         {
5737             const char tmp = *s++;
5738             if (*s == tmp) {
5739                 s++;
5740                 if (PL_expect == XOPERATOR)
5741                     TERM(POSTDEC);
5742                 else
5743                     OPERATOR(PREDEC);
5744             }
5745             else if (*s == '>') {
5746                 s++;
5747                 s = SKIPSPACE1(s);
5748                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5749                     ((*s == '$' || *s == '&') && s[1] == '*')
5750                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5751                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5752                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5753                  ))
5754                 {
5755                     Perl_ck_warner_d(aTHX_
5756                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5757                         "Postfix dereference is experimental"
5758                     );
5759                     PL_expect = XPOSTDEREF;
5760                     TOKEN(ARROW);
5761                 }
5762                 if (isIDFIRST_lazy_if(s,UTF)) {
5763                     s = force_word(s,METHOD,FALSE,TRUE);
5764                     TOKEN(ARROW);
5765                 }
5766                 else if (*s == '$')
5767                     OPERATOR(ARROW);
5768                 else
5769                     TERM(ARROW);
5770             }
5771             if (PL_expect == XOPERATOR) {
5772                 if (*s == '=' && !PL_lex_allbrackets &&
5773                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5774                     s--;
5775                     TOKEN(0);
5776                 }
5777                 Aop(OP_SUBTRACT);
5778             }
5779             else {
5780                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5781                     check_uni();
5782                 OPERATOR('-');          /* unary minus */
5783             }
5784         }
5785
5786     case '+':
5787         {
5788             const char tmp = *s++;
5789             if (*s == tmp) {
5790                 s++;
5791                 if (PL_expect == XOPERATOR)
5792                     TERM(POSTINC);
5793                 else
5794                     OPERATOR(PREINC);
5795             }
5796             if (PL_expect == XOPERATOR) {
5797                 if (*s == '=' && !PL_lex_allbrackets &&
5798                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5799                     s--;
5800                     TOKEN(0);
5801                 }
5802                 Aop(OP_ADD);
5803             }
5804             else {
5805                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5806                     check_uni();
5807                 OPERATOR('+');
5808             }
5809         }
5810
5811     case '*':
5812         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5813         if (PL_expect != XOPERATOR) {
5814             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5815             PL_expect = XOPERATOR;
5816             force_ident(PL_tokenbuf, '*');
5817             if (!*PL_tokenbuf)
5818                 PREREF('*');
5819             TERM('*');
5820         }
5821         s++;
5822         if (*s == '*') {
5823             s++;
5824             if (*s == '=' && !PL_lex_allbrackets &&
5825                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5826                 s -= 2;
5827                 TOKEN(0);
5828             }
5829             PWop(OP_POW);
5830         }
5831         if (*s == '=' && !PL_lex_allbrackets &&
5832                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5833             s--;
5834             TOKEN(0);
5835         }
5836         PL_parser->saw_infix_sigil = 1;
5837         Mop(OP_MULTIPLY);
5838
5839     case '%':
5840     {
5841         if (PL_expect == XOPERATOR) {
5842             if (s[1] == '=' && !PL_lex_allbrackets &&
5843                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5844                 TOKEN(0);
5845             ++s;
5846             PL_parser->saw_infix_sigil = 1;
5847             Mop(OP_MODULO);
5848         }
5849         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5850         PL_tokenbuf[0] = '%';
5851         s = scan_ident(s, PL_tokenbuf + 1,
5852                 sizeof PL_tokenbuf - 1, FALSE);
5853         pl_yylval.ival = 0;
5854         if (!PL_tokenbuf[1]) {
5855             PREREF('%');
5856         }
5857         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5858             if (*s == '[')
5859                 PL_tokenbuf[0] = '@';
5860         }
5861         PL_expect = XOPERATOR;
5862         force_ident_maybe_lex('%');
5863         TERM('%');
5864     }
5865     case '^':
5866         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5867                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5868             TOKEN(0);
5869         s++;
5870         BOop(OP_BIT_XOR);
5871     case '[':
5872         if (PL_lex_brackets > 100)
5873             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5874         PL_lex_brackstack[PL_lex_brackets++] = 0;
5875         PL_lex_allbrackets++;
5876         {
5877             const char tmp = *s++;
5878             OPERATOR(tmp);
5879         }
5880     case '~':
5881         if (s[1] == '~'
5882             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5883         {
5884             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5885                 TOKEN(0);
5886             s += 2;
5887             Perl_ck_warner_d(aTHX_
5888                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5889                 "Smartmatch is experimental");
5890             Eop(OP_SMARTMATCH);
5891         }
5892         s++;
5893         OPERATOR('~');
5894     case ',':
5895         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5896             TOKEN(0);
5897         s++;
5898         OPERATOR(',');
5899     case ':':
5900         if (s[1] == ':') {
5901             len = 0;
5902             goto just_a_word_zero_gv;
5903         }
5904         s++;
5905         switch (PL_expect) {
5906             OP *attrs;
5907 #ifdef PERL_MAD
5908             I32 stuffstart;
5909 #endif
5910         case XOPERATOR:
5911             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5912                 break;
5913             PL_bufptr = s;      /* update in case we back off */
5914             if (*s == '=') {
5915                 Perl_croak(aTHX_
5916                            "Use of := for an empty attribute list is not allowed");
5917             }
5918             goto grabattrs;
5919         case XATTRBLOCK:
5920             PL_expect = XBLOCK;
5921             goto grabattrs;
5922         case XATTRTERM:
5923             PL_expect = XTERMBLOCK;
5924          grabattrs:
5925 #ifdef PERL_MAD
5926             stuffstart = s - SvPVX(PL_linestr) - 1;
5927 #endif
5928             s = PEEKSPACE(s);
5929             attrs = NULL;
5930             while (isIDFIRST_lazy_if(s,UTF)) {
5931                 I32 tmp;
5932                 SV *sv;
5933                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5934                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5935                     if (tmp < 0) tmp = -tmp;
5936                     switch (tmp) {
5937                     case KEY_or:
5938                     case KEY_and:
5939                     case KEY_for:
5940                     case KEY_foreach:
5941                     case KEY_unless:
5942                     case KEY_if:
5943                     case KEY_while:
5944                     case KEY_until:
5945                         goto got_attrs;
5946                     default:
5947                         break;
5948                     }
5949                 }
5950                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5951                 if (*d == '(') {
5952                     d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5953                     COPLINE_SET_FROM_MULTI_END;
5954                     if (!d) {
5955                         /* MUST advance bufptr here to avoid bogus
5956                            "at end of line" context messages from yyerror().
5957                          */
5958                         PL_bufptr = s + len;
5959                         yyerror("Unterminated attribute parameter in attribute list");
5960                         if (attrs)
5961                             op_free(attrs);
5962                         sv_free(sv);
5963                         return REPORT(0);       /* EOF indicator */
5964                     }
5965                 }
5966                 if (PL_lex_stuff) {
5967                     sv_catsv(sv, PL_lex_stuff);
5968                     attrs = op_append_elem(OP_LIST, attrs,
5969                                         newSVOP(OP_CONST, 0, sv));
5970                     SvREFCNT_dec(PL_lex_stuff);
5971                     PL_lex_stuff = NULL;
5972                 }
5973                 else {
5974                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5975                         sv_free(sv);
5976                         if (PL_in_my == KEY_our) {
5977                             deprecate(":unique");
5978                         }
5979                         else
5980                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5981                     }
5982
5983                     /* NOTE: any CV attrs applied here need to be part of
5984                        the CVf_BUILTIN_ATTRS define in cv.h! */
5985                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5986                         sv_free(sv);
5987                         CvLVALUE_on(PL_compcv);
5988                     }
5989                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5990                         sv_free(sv);
5991                         deprecate(":locked");
5992                     }
5993                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5994                         sv_free(sv);
5995                         CvMETHOD_on(PL_compcv);
5996                     }
5997                     /* After we've set the flags, it could be argued that
5998                        we don't need to do the attributes.pm-based setting
5999                        process, and shouldn't bother appending recognized
6000                        flags.  To experiment with that, uncomment the
6001                        following "else".  (Note that's already been
6002                        uncommented.  That keeps the above-applied built-in
6003                        attributes from being intercepted (and possibly
6004                        rejected) by a package's attribute routines, but is
6005                        justified by the performance win for the common case
6006                        of applying only built-in attributes.) */
6007                     else
6008                         attrs = op_append_elem(OP_LIST, attrs,
6009                                             newSVOP(OP_CONST, 0,
6010                                                     sv));
6011                 }
6012                 s = PEEKSPACE(d);
6013                 if (*s == ':' && s[1] != ':')
6014                     s = PEEKSPACE(s+1);
6015                 else if (s == d)
6016                     break;      /* require real whitespace or :'s */
6017                 /* XXX losing whitespace on sequential attributes here */
6018             }
6019             {
6020                 const char tmp
6021                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6022                 if (*s != ';' && *s != '}' && *s != tmp
6023                     && (tmp != '=' || *s != ')')) {
6024                     const char q = ((*s == '\'') ? '"' : '\'');
6025                     /* If here for an expression, and parsed no attrs, back
6026                        off. */
6027                     if (tmp == '=' && !attrs) {
6028                         s = PL_bufptr;
6029                         break;
6030                     }
6031                     /* MUST advance bufptr here to avoid bogus "at end of line"
6032                        context messages from yyerror().
6033                     */
6034                     PL_bufptr = s;
6035                     yyerror( (const char *)
6036                              (*s
6037                               ? Perl_form(aTHX_ "Invalid separator character "
6038                                           "%c%c%c in attribute list", q, *s, q)
6039                               : "Unterminated attribute list" ) );
6040                     if (attrs)
6041                         op_free(attrs);
6042                     OPERATOR(':');
6043                 }
6044             }
6045         got_attrs:
6046             if (attrs) {
6047                 start_force(PL_curforce);
6048                 NEXTVAL_NEXTTOKE.opval = attrs;
6049                 CURMAD('_', PL_nextwhite);
6050                 force_next(THING);
6051             }
6052 #ifdef PERL_MAD
6053             if (PL_madskills) {
6054                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6055                                      (s - SvPVX(PL_linestr)) - stuffstart);
6056             }
6057 #endif
6058             TOKEN(COLONATTR);
6059         }
6060         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6061             s--;
6062             TOKEN(0);
6063         }
6064         PL_lex_allbrackets--;
6065         OPERATOR(':');
6066     case '(':
6067         s++;
6068         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6069             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6070         else
6071             PL_expect = XTERM;
6072         s = SKIPSPACE1(s);
6073         PL_lex_allbrackets++;
6074         TOKEN('(');
6075     case ';':
6076         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6077             TOKEN(0);
6078         CLINE;
6079         s++;
6080         OPERATOR(';');
6081     case ')':
6082         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6083             TOKEN(0);
6084         s++;
6085         PL_lex_allbrackets--;
6086         s = SKIPSPACE1(s);
6087         if (*s == '{')
6088             PREBLOCK(')');
6089         TERM(')');
6090     case ']':
6091         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6092             TOKEN(0);
6093         s++;
6094         if (PL_lex_brackets <= 0)
6095             /* diag_listed_as: Unmatched right %s bracket */
6096             yyerror("Unmatched right square bracket");
6097         else
6098             --PL_lex_brackets;
6099         PL_lex_allbrackets--;
6100         if (PL_lex_state == LEX_INTERPNORMAL) {
6101             if (PL_lex_brackets == 0) {
6102                 if (*s == '-' && s[1] == '>')
6103                     PL_lex_state = LEX_INTERPENDMAYBE;
6104                 else if (*s != '[' && *s != '{')
6105                     PL_lex_state = LEX_INTERPEND;
6106             }
6107         }
6108         TERM(']');
6109     case '{':
6110         s++;
6111       leftbracket:
6112         if (PL_lex_brackets > 100) {
6113             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6114         }
6115         switch (PL_expect) {
6116         case XTERM:
6117             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6118             PL_lex_allbrackets++;
6119             OPERATOR(HASHBRACK);
6120         case XOPERATOR:
6121             while (s < PL_bufend && SPACE_OR_TAB(*s))
6122                 s++;
6123             d = s;
6124             PL_tokenbuf[0] = '\0';
6125             if (d < PL_bufend && *d == '-') {
6126                 PL_tokenbuf[0] = '-';
6127                 d++;
6128                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6129                     d++;
6130             }
6131             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6132                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6133                               FALSE, &len);
6134                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6135                     d++;
6136                 if (*d == '}') {
6137                     const char minus = (PL_tokenbuf[0] == '-');
6138                     s = force_word(s + minus, WORD, FALSE, TRUE);
6139                     if (minus)
6140                         force_next('-');
6141                 }
6142             }
6143             /* FALL THROUGH */
6144         case XATTRBLOCK:
6145         case XBLOCK:
6146             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6147             PL_lex_allbrackets++;
6148             PL_expect = XSTATE;
6149             break;
6150         case XATTRTERM:
6151         case XTERMBLOCK:
6152             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153             PL_lex_allbrackets++;
6154             PL_expect = XSTATE;
6155             break;
6156         default: {
6157                 const char *t;
6158                 if (PL_oldoldbufptr == PL_last_lop)
6159                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6160                 else
6161                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6162                 PL_lex_allbrackets++;
6163                 s = SKIPSPACE1(s);
6164                 if (*s == '}') {
6165                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6166                         PL_expect = XTERM;
6167                         /* This hack is to get the ${} in the message. */
6168                         PL_bufptr = s+1;
6169                         yyerror("syntax error");
6170                         break;
6171                     }
6172                     OPERATOR(HASHBRACK);
6173                 }
6174                 /* This hack serves to disambiguate a pair of curlies
6175                  * as being a block or an anon hash.  Normally, expectation
6176                  * determines that, but in cases where we're not in a
6177                  * position to expect anything in particular (like inside
6178                  * eval"") we have to resolve the ambiguity.  This code
6179                  * covers the case where the first term in the curlies is a
6180                  * quoted string.  Most other cases need to be explicitly
6181                  * disambiguated by prepending a "+" before the opening
6182                  * curly in order to force resolution as an anon hash.
6183                  *
6184                  * XXX should probably propagate the outer expectation
6185                  * into eval"" to rely less on this hack, but that could
6186                  * potentially break current behavior of eval"".
6187                  * GSAR 97-07-21
6188                  */
6189                 t = s;
6190                 if (*s == '\'' || *s == '"' || *s == '`') {
6191                     /* common case: get past first string, handling escapes */
6192                     for (t++; t < PL_bufend && *t != *s;)
6193                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6194                             t++;
6195                     t++;
6196                 }
6197                 else if (*s == 'q') {
6198                     if (++t < PL_bufend
6199                         && (!isWORDCHAR(*t)
6200                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6201                                 && !isWORDCHAR(*t))))
6202                     {
6203                         /* skip q//-like construct */
6204                         const char *tmps;
6205                         char open, close, term;
6206                         I32 brackets = 1;
6207
6208                         while (t < PL_bufend && isSPACE(*t))
6209                             t++;
6210                         /* check for q => */
6211                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6212                             OPERATOR(HASHBRACK);
6213                         }
6214                         term = *t;
6215                         open = term;
6216                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6217                             term = tmps[5];
6218                         close = term;
6219                         if (open == close)
6220                             for (t++; t < PL_bufend; t++) {
6221                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6222                                     t++;
6223                                 else if (*t == open)
6224                                     break;
6225                             }
6226                         else {
6227                             for (t++; t < PL_bufend; t++) {
6228                                 if (*t == '\\' && t+1 < PL_bufend)
6229                                     t++;
6230                                 else if (*t == close && --brackets <= 0)
6231                                     break;
6232                                 else if (*t == open)
6233                                     brackets++;
6234                             }
6235                         }
6236                         t++;
6237                     }
6238                     else
6239                         /* skip plain q word */
6240                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6241                              t += UTF8SKIP(t);
6242                 }
6243                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6244                     t += UTF8SKIP(t);
6245                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6246                          t += UTF8SKIP(t);
6247                 }
6248                 while (t < PL_bufend && isSPACE(*t))
6249                     t++;
6250                 /* if comma follows first term, call it an anon hash */
6251                 /* XXX it could be a comma expression with loop modifiers */
6252                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6253                                    || (*t == '=' && t[1] == '>')))
6254                     OPERATOR(HASHBRACK);
6255                 if (PL_expect == XREF)
6256                     PL_expect = XTERM;
6257                 else {
6258                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6259                     PL_expect = XSTATE;
6260                 }
6261             }
6262             break;
6263         }
6264         pl_yylval.ival = CopLINE(PL_curcop);
6265         if (isSPACE(*s) || *s == '#')
6266             PL_copline = NOLINE;   /* invalidate current command line number */
6267         TOKEN(formbrack ? '=' : '{');
6268     case '}':
6269         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6270             TOKEN(0);
6271       rightbracket:
6272         s++;
6273         if (PL_lex_brackets <= 0)
6274             /* diag_listed_as: Unmatched right %s bracket */
6275             yyerror("Unmatched right curly bracket");
6276         else
6277             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6278         PL_lex_allbrackets--;
6279         if (PL_lex_state == LEX_INTERPNORMAL) {
6280             if (PL_lex_brackets == 0) {
6281                 if (PL_expect & XFAKEBRACK) {
6282                     PL_expect &= XENUMMASK;
6283                     PL_lex_state = LEX_INTERPEND;
6284                     PL_bufptr = s;
6285 #if 0
6286                     if (PL_madskills) {
6287                         if (!PL_thiswhite)
6288                             PL_thiswhite = newSVpvs("");
6289                         sv_catpvs(PL_thiswhite,"}");
6290                     }
6291 #endif
6292                     return yylex();     /* ignore fake brackets */
6293                 }
6294                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6295                  && SvEVALED(PL_lex_repl))
6296                     PL_lex_state = LEX_INTERPEND;
6297                 else if (*s == '-' && s[1] == '>')
6298                     PL_lex_state = LEX_INTERPENDMAYBE;
6299                 else if (*s != '[' && *s != '{')
6300                     PL_lex_state = LEX_INTERPEND;
6301             }
6302         }
6303         if (PL_expect & XFAKEBRACK) {
6304             PL_expect &= XENUMMASK;
6305             PL_bufptr = s;
6306             return yylex();             /* ignore fake brackets */
6307         }
6308         start_force(PL_curforce);
6309         if (PL_madskills) {
6310             curmad('X', newSVpvn(s-1,1));
6311             CURMAD('_', PL_thiswhite);
6312         }
6313         force_next(formbrack ? '.' : '}');
6314         if (formbrack) LEAVE;
6315 #ifdef PERL_MAD
6316         if (PL_madskills && !PL_thistoken)
6317             PL_thistoken = newSVpvs("");
6318 #endif
6319         if (formbrack == 2) { /* means . where arguments were expected */
6320             start_force(PL_curforce);
6321             force_next(';');
6322             TOKEN(FORMRBRACK);
6323         }
6324         TOKEN(';');
6325     case '&':
6326         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6327         s++;
6328         if (*s++ == '&') {
6329             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6330                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6331                 s -= 2;
6332                 TOKEN(0);
6333             }
6334             AOPERATOR(ANDAND);
6335         }
6336         s--;
6337         if (PL_expect == XOPERATOR) {
6338             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6339                 && isIDFIRST_lazy_if(s,UTF))
6340             {
6341                 CopLINE_dec(PL_curcop);
6342                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6343                 CopLINE_inc(PL_curcop);
6344             }
6345             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6346                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6347                 s--;
6348                 TOKEN(0);
6349             }
6350             PL_parser->saw_infix_sigil = 1;
6351             BAop(OP_BIT_AND);
6352         }
6353
6354         PL_tokenbuf[0] = '&';
6355         s = scan_ident(s - 1, PL_tokenbuf + 1,
6356                        sizeof PL_tokenbuf - 1, TRUE);
6357         if (PL_tokenbuf[1]) {
6358             PL_expect = XOPERATOR;
6359             force_ident_maybe_lex('&');
6360         }
6361         else
6362             PREREF('&');
6363         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6364         TERM('&');
6365
6366     case '|':
6367         s++;
6368         if (*s++ == '|') {
6369             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6370                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6371                 s -= 2;
6372                 TOKEN(0);
6373             }
6374             AOPERATOR(OROR);
6375         }
6376         s--;
6377         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6378                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6379             s--;
6380             TOKEN(0);
6381         }
6382         BOop(OP_BIT_OR);
6383     case '=':
6384         s++;
6385         {
6386             const char tmp = *s++;
6387             if (tmp == '=') {
6388                 if (!PL_lex_allbrackets &&
6389                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6390                     s -= 2;
6391                     TOKEN(0);
6392                 }
6393                 Eop(OP_EQ);
6394             }
6395             if (tmp == '>') {
6396                 if (!PL_lex_allbrackets &&
6397                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6398                     s -= 2;
6399                     TOKEN(0);
6400                 }
6401                 OPERATOR(',');
6402             }
6403             if (tmp == '~')
6404                 PMop(OP_MATCH);
6405             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6406                 && strchr("+-*/%.^&|<",tmp))
6407                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6408                             "Reversed %c= operator",(int)tmp);
6409             s--;
6410             if (PL_expect == XSTATE && isALPHA(tmp) &&
6411                 (s == PL_linestart+1 || s[-2] == '\n') )
6412                 {
6413                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6414                         || PL_lex_state != LEX_NORMAL) {
6415                         d = PL_bufend;
6416                         while (s < d) {
6417                             if (*s++ == '\n') {
6418                                 incline(s);
6419                                 if (strnEQ(s,"=cut",4)) {
6420                                     s = strchr(s,'\n');
6421                                     if (s)
6422                                         s++;
6423                                     else
6424                                         s = d;
6425                                     incline(s);
6426                                     goto retry;
6427                                 }
6428                             }
6429                         }
6430                         goto retry;
6431                     }
6432 #ifdef PERL_MAD
6433                     if (PL_madskills) {
6434                         if (!PL_thiswhite)
6435                             PL_thiswhite = newSVpvs("");
6436                         sv_catpvn(PL_thiswhite, PL_linestart,
6437                                   PL_bufend - PL_linestart);
6438                     }
6439 #endif
6440                     s = PL_bufend;
6441                     PL_parser->in_pod = 1;
6442                     goto retry;
6443                 }
6444         }
6445         if (PL_expect == XBLOCK) {
6446             const char *t = s;
6447 #ifdef PERL_STRICT_CR
6448             while (SPACE_OR_TAB(*t))
6449 #else
6450             while (SPACE_OR_TAB(*t) || *t == '\r')
6451 #endif
6452                 t++;
6453             if (*t == '\n' || *t == '#') {
6454                 formbrack = 1;
6455                 ENTER;
6456                 SAVEI8(PL_parser->form_lex_state);
6457                 SAVEI32(PL_lex_formbrack);
6458                 PL_parser->form_lex_state = PL_lex_state;
6459                 PL_lex_formbrack = PL_lex_brackets + 1;
6460                 goto leftbracket;
6461             }
6462         }
6463         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6464             s--;
6465             TOKEN(0);
6466         }
6467         pl_yylval.ival = 0;
6468         OPERATOR(ASSIGNOP);
6469     case '!':
6470         s++;
6471         {
6472             const char tmp = *s++;
6473             if (tmp == '=') {
6474                 /* was this !=~ where !~ was meant?
6475                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6476
6477                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6478                     const char *t = s+1;
6479
6480                     while (t < PL_bufend && isSPACE(*t))
6481                         ++t;
6482
6483                     if (*t == '/' || *t == '?' ||
6484                         ((*t == 'm' || *t == 's' || *t == 'y')
6485                          && !isWORDCHAR(t[1])) ||
6486                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6487                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6488                                     "!=~ should be !~");
6489                 }
6490                 if (!PL_lex_allbrackets &&
6491                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6492                     s -= 2;
6493                     TOKEN(0);
6494                 }
6495                 Eop(OP_NE);
6496             }
6497             if (tmp == '~')
6498                 PMop(OP_NOT);
6499         }
6500         s--;
6501         OPERATOR('!');
6502     case '<':
6503         if (PL_expect != XOPERATOR) {
6504             if (s[1] != '<' && !strchr(s,'>'))
6505                 check_uni();
6506             if (s[1] == '<')
6507                 s = scan_heredoc(s);
6508             else
6509                 s = scan_inputsymbol(s);
6510             PL_expect = XOPERATOR;
6511             TOKEN(sublex_start());
6512         }
6513         s++;
6514         {
6515             char tmp = *s++;
6516             if (tmp == '<') {
6517                 if (*s == '=' && !PL_lex_allbrackets &&
6518                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6519                     s -= 2;
6520                     TOKEN(0);
6521                 }
6522                 SHop(OP_LEFT_SHIFT);
6523             }
6524             if (tmp == '=') {
6525                 tmp = *s++;
6526                 if (tmp == '>') {
6527                     if (!PL_lex_allbrackets &&
6528                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6529                         s -= 3;
6530                         TOKEN(0);
6531                     }
6532                     Eop(OP_NCMP);
6533                 }
6534                 s--;
6535                 if (!PL_lex_allbrackets &&
6536                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6537                     s -= 2;
6538                     TOKEN(0);
6539                 }
6540                 Rop(OP_LE);
6541             }
6542         }
6543         s--;
6544         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6545             s--;
6546             TOKEN(0);
6547         }
6548         Rop(OP_LT);
6549     case '>':
6550         s++;
6551         {
6552             const char tmp = *s++;
6553             if (tmp == '>') {
6554                 if (*s == '=' && !PL_lex_allbrackets &&
6555                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6556                     s -= 2;
6557                     TOKEN(0);
6558                 }
6559                 SHop(OP_RIGHT_SHIFT);
6560             }
6561             else if (tmp == '=') {
6562                 if (!PL_lex_allbrackets &&
6563                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6564                     s -= 2;
6565                     TOKEN(0);
6566                 }
6567                 Rop(OP_GE);
6568             }
6569         }
6570         s--;
6571         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6572             s--;
6573             TOKEN(0);
6574         }
6575         Rop(OP_GT);
6576
6577     case '$':
6578         CLINE;
6579
6580         if (PL_expect == XOPERATOR) {
6581             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6582                 return deprecate_commaless_var_list();
6583             }
6584         }
6585         else if (PL_expect == XPOSTDEREF) {
6586             if (s[1] == '#') {
6587                 s++;
6588                 POSTDEREF(DOLSHARP);
6589             }
6590             POSTDEREF('$');
6591         }
6592
6593         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6594             PL_tokenbuf[0] = '@';
6595             s = scan_ident(s + 1, PL_tokenbuf + 1,
6596                            sizeof PL_tokenbuf - 1, FALSE);
6597             if (PL_expect == XOPERATOR)
6598                 no_op("Array length", s);
6599             if (!PL_tokenbuf[1])
6600                 PREREF(DOLSHARP);
6601             PL_expect = XOPERATOR;
6602             force_ident_maybe_lex('#');
6603             TOKEN(DOLSHARP);
6604         }
6605
6606         PL_tokenbuf[0] = '$';
6607         s = scan_ident(s, PL_tokenbuf + 1,
6608                        sizeof PL_tokenbuf - 1, FALSE);
6609         if (PL_expect == XOPERATOR)
6610             no_op("Scalar", s);
6611         if (!PL_tokenbuf[1]) {
6612             if (s == PL_bufend)
6613                 yyerror("Final $ should be \\$ or $name");
6614             PREREF('$');
6615         }
6616
6617         d = s;
6618         {
6619             const char tmp = *s;
6620             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6621                 s = SKIPSPACE1(s);
6622
6623             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6624                 && intuit_more(s)) {
6625                 if (*s == '[') {
6626                     PL_tokenbuf[0] = '@';
6627                     if (ckWARN(WARN_SYNTAX)) {
6628                         char *t = s+1;
6629
6630                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6631                             t++;
6632                         if (*t++ == ',') {
6633                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6634                             while (t < PL_bufend && *t != ']')
6635                                 t++;
6636                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6637                                         "Multidimensional syntax %.*s not supported",
6638                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6639                         }
6640                     }
6641                 }
6642                 else if (*s == '{') {
6643                     char *t;
6644                     PL_tokenbuf[0] = '%';
6645                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6646                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6647                         {
6648                             char tmpbuf[sizeof PL_tokenbuf];
6649                             do {
6650                                 t++;
6651                             } while (isSPACE(*t));
6652                             if (isIDFIRST_lazy_if(t,UTF)) {
6653                                 STRLEN len;
6654                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6655                                               &len);
6656                                 while (isSPACE(*t))
6657                                     t++;
6658                                 if (*t == ';'
6659                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6660                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6661                                         "You need to quote \"%"UTF8f"\"",
6662                                          UTF8fARG(UTF, len, tmpbuf));
6663                             }
6664                         }
6665                 }
6666             }
6667
6668             PL_expect = XOPERATOR;
6669             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6670                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6671                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6672                     PL_expect = XOPERATOR;
6673                 else if (strchr("$@\"'`q", *s))
6674                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6675                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6676                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6677                 else if (isIDFIRST_lazy_if(s,UTF)) {
6678                     char tmpbuf[sizeof PL_tokenbuf];
6679                     int t2;
6680                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6681                     if ((t2 = keyword(tmpbuf, len, 0))) {
6682                         /* binary operators exclude handle interpretations */
6683                         switch (t2) {
6684                         case -KEY_x:
6685                         case -KEY_eq:
6686                         case -KEY_ne:
6687                         case -KEY_gt:
6688                         case -KEY_lt:
6689                         case -KEY_ge:
6690                         case -KEY_le:
6691                         case -KEY_cmp:
6692                             break;
6693                         default:
6694                             PL_expect = XTERM;  /* e.g. print $fh length() */
6695                             break;
6696                         }
6697                     }
6698                     else {
6699                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6700                     }
6701                 }
6702                 else if (isDIGIT(*s))
6703                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6704                 else if (*s == '.' && isDIGIT(s[1]))
6705                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6706                 else if ((*s == '?' || *s == '-' || *s == '+')
6707                          && !isSPACE(s[1]) && s[1] != '=')
6708                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6709                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6710                          && s[1] != '/')
6711                     PL_expect = XTERM;          /* e.g. print $fh /.../
6712                                                    XXX except DORDOR operator
6713                                                 */
6714                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6715                          && s[2] != '=')
6716                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6717             }
6718         }
6719         force_ident_maybe_lex('$');
6720         TOKEN('$');
6721
6722     case '@':
6723         if (PL_expect == XOPERATOR)
6724             no_op("Array", s);
6725         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6726         PL_tokenbuf[0] = '@';
6727         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6728         pl_yylval.ival = 0;
6729         if (!PL_tokenbuf[1]) {
6730             PREREF('@');
6731         }
6732         if (PL_lex_state == LEX_NORMAL)
6733             s = SKIPSPACE1(s);
6734         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6735             if (*s == '{')
6736                 PL_tokenbuf[0] = '%';
6737
6738             /* Warn about @ where they meant $. */
6739             if (*s == '[' || *s == '{') {
6740                 if (ckWARN(WARN_SYNTAX)) {
6741                     S_check_scalar_slice(aTHX_ s);
6742                 }
6743             }
6744         }
6745         PL_expect = XOPERATOR;
6746         force_ident_maybe_lex('@');
6747         TERM('@');
6748
6749      case '/':                  /* may be division, defined-or, or pattern */
6750         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6751             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6752                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6753                 TOKEN(0);
6754             s += 2;
6755             AOPERATOR(DORDOR);
6756         }
6757      case '?':                  /* may either be conditional or pattern */
6758         if (PL_expect == XOPERATOR) {
6759              char tmp = *s++;
6760              if(tmp == '?') {
6761                 if (!PL_lex_allbrackets &&
6762                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6763                     s--;
6764                     TOKEN(0);
6765                 }
6766                 PL_lex_allbrackets++;
6767                 OPERATOR('?');
6768              }
6769              else {
6770                  tmp = *s++;
6771                  if(tmp == '/') {
6772                      /* A // operator. */
6773                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6774                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6775                                             LEX_FAKEEOF_LOGIC)) {
6776                         s -= 2;
6777                         TOKEN(0);
6778                     }
6779                     AOPERATOR(DORDOR);
6780                  }
6781                  else {
6782                      s--;
6783                      if (*s == '=' && !PL_lex_allbrackets &&
6784                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6785                          s--;
6786                          TOKEN(0);
6787                      }
6788                      Mop(OP_DIVIDE);
6789                  }
6790              }
6791          }
6792          else {
6793              /* Disable warning on "study /blah/" */
6794              if (PL_oldoldbufptr == PL_last_uni
6795               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6796                   || memNE(PL_last_uni, "study", 5)
6797                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6798               ))
6799                  check_uni();
6800              if (*s == '?')
6801                  deprecate("?PATTERN? without explicit operator");
6802              s = scan_pat(s,OP_MATCH);
6803              TERM(sublex_start());
6804          }
6805
6806     case '.':
6807         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6808 #ifdef PERL_STRICT_CR
6809             && s[1] == '\n'
6810 #else
6811             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6812 #endif
6813             && (s == PL_linestart || s[-1] == '\n') )
6814         {
6815             PL_expect = XSTATE;
6816             formbrack = 2; /* dot seen where arguments expected */
6817             goto rightbracket;
6818         }
6819         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6820             s += 3;
6821             OPERATOR(YADAYADA);
6822         }
6823         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6824             char tmp = *s++;
6825             if (*s == tmp) {
6826                 if (!PL_lex_allbrackets &&
6827                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6828                     s--;
6829                     TOKEN(0);
6830                 }
6831                 s++;
6832                 if (*s == tmp) {
6833                     s++;
6834                     pl_yylval.ival = OPf_SPECIAL;
6835                 }
6836                 else
6837                     pl_yylval.ival = 0;
6838                 OPERATOR(DOTDOT);
6839             }
6840             if (*s == '=' && !PL_lex_allbrackets &&
6841                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6842                 s--;
6843                 TOKEN(0);
6844             }
6845             Aop(OP_CONCAT);
6846         }
6847         /* FALL THROUGH */
6848     case '0': case '1': case '2': case '3': case '4':
6849     case '5': case '6': case '7': case '8': case '9':
6850         s = scan_num(s, &pl_yylval);
6851         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6852         if (PL_expect == XOPERATOR)
6853             no_op("Number",s);
6854         TERM(THING);
6855
6856     case '\'':
6857         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6858         COPLINE_SET_FROM_MULTI_END;
6859         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6860         if (PL_expect == XOPERATOR) {
6861             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6862                 return deprecate_commaless_var_list();
6863             }
6864             else
6865                 no_op("String",s);
6866         }
6867         if (!s)
6868             missingterm(NULL);
6869         pl_yylval.ival = OP_CONST;
6870         TERM(sublex_start());
6871
6872     case '"':
6873         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6874         DEBUG_T( {
6875             if (s)
6876                 printbuf("### Saw string before %s\n", s);
6877             else
6878                 PerlIO_printf(Perl_debug_log,
6879                              "### Saw unterminated string\n");
6880         } );
6881         if (PL_expect == XOPERATOR) {
6882             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6883                 return deprecate_commaless_var_list();
6884             }
6885             else
6886                 no_op("String",s);
6887         }
6888         if (!s)
6889             missingterm(NULL);
6890         pl_yylval.ival = OP_CONST;
6891         /* FIXME. I think that this can be const if char *d is replaced by
6892            more localised variables.  */
6893         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6894             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6895                 pl_yylval.ival = OP_STRINGIFY;
6896                 break;
6897             }
6898         }
6899         if (pl_yylval.ival == OP_CONST)
6900             COPLINE_SET_FROM_MULTI_END;
6901         TERM(sublex_start());
6902
6903     case '`':
6904         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6905         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6906         if (PL_expect == XOPERATOR)
6907             no_op("Backticks",s);
6908         if (!s)
6909             missingterm(NULL);
6910         pl_yylval.ival = OP_BACKTICK;
6911         TERM(sublex_start());
6912
6913     case '\\':
6914         s++;
6915         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6916          && isDIGIT(*s))
6917             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6918                            *s, *s);
6919         if (PL_expect == XOPERATOR)
6920             no_op("Backslash",s);
6921         OPERATOR(REFGEN);
6922
6923     case 'v':
6924         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6925             char *start = s + 2;
6926             while (isDIGIT(*start) || *start == '_')
6927                 start++;
6928             if (*start == '.' && isDIGIT(start[1])) {
6929                 s = scan_num(s, &pl_yylval);
6930                 TERM(THING);
6931             }
6932             else if ((*start == ':' && start[1] == ':')
6933                   || (PL_expect == XSTATE && *start == ':'))
6934                 goto keylookup;
6935             else if (PL_expect == XSTATE) {
6936                 d = start;
6937                 while (d < PL_bufend && isSPACE(*d)) d++;
6938                 if (*d == ':') goto keylookup;
6939             }
6940             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6941             if (!isALPHA(*start) && (PL_expect == XTERM
6942                         || PL_expect == XREF || PL_expect == XSTATE
6943                         || PL_expect == XTERMORDORDOR)) {
6944                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6945                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6946                 if (!gv) {
6947                     s = scan_num(s, &pl_yylval);
6948                     TERM(THING);
6949                 }
6950             }
6951         }
6952         goto keylookup;
6953     case 'x':
6954         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6955             s++;
6956             Mop(OP_REPEAT);
6957         }
6958         goto keylookup;
6959
6960     case '_':
6961     case 'a': case 'A':
6962     case 'b': case 'B':
6963     case 'c': case 'C':
6964     case 'd': case 'D':
6965     case 'e': case 'E':
6966     case 'f': case 'F':
6967     case 'g': case 'G':
6968     case 'h': case 'H':
6969     case 'i': case 'I':
6970     case 'j': case 'J':
6971     case 'k': case 'K':
6972     case 'l': case 'L':
6973     case 'm': case 'M':
6974     case 'n': case 'N':
6975     case 'o': case 'O':
6976     case 'p': case 'P':
6977     case 'q': case 'Q':
6978     case 'r': case 'R':
6979     case 's': case 'S':
6980     case 't': case 'T':
6981     case 'u': case 'U':
6982               case 'V':
6983     case 'w': case 'W':
6984               case 'X':
6985     case 'y': case 'Y':
6986     case 'z': case 'Z':
6987
6988       keylookup: {
6989         bool anydelim;
6990         bool lex;
6991         I32 tmp;
6992         SV *sv;
6993         CV *cv;
6994         PADOFFSET off;
6995         OP *rv2cv_op;
6996
6997         lex = FALSE;
6998         orig_keyword = 0;
6999         off = 0;
7000         sv = NULL;
7001         cv = NULL;
7002         gv = NULL;
7003         gvp = NULL;
7004         rv2cv_op = NULL;
7005
7006         PL_bufptr = s;
7007         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7008
7009         /* Some keywords can be followed by any delimiter, including ':' */
7010         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7011
7012         /* x::* is just a word, unless x is "CORE" */
7013         if (!anydelim && *s == ':' && s[1] == ':') {
7014             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7015             goto just_a_word;
7016         }
7017
7018         d = s;
7019         while (d < PL_bufend && isSPACE(*d))
7020                 d++;    /* no comments skipped here, or s### is misparsed */
7021
7022         /* Is this a word before a => operator? */
7023         if (*d == '=' && d[1] == '>') {
7024           fat_arrow:
7025             CLINE;
7026             pl_yylval.opval
7027                 = (OP*)newSVOP(OP_CONST, 0,
7028                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7029             pl_yylval.opval->op_private = OPpCONST_BARE;
7030             TERM(WORD);
7031         }
7032
7033         /* Check for plugged-in keyword */
7034         {
7035             OP *o;
7036             int result;
7037             char *saved_bufptr = PL_bufptr;
7038             PL_bufptr = s;
7039             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7040             s = PL_bufptr;
7041             if (result == KEYWORD_PLUGIN_DECLINE) {
7042                 /* not a plugged-in keyword */
7043                 PL_bufptr = saved_bufptr;
7044             } else if (result == KEYWORD_PLUGIN_STMT) {
7045                 pl_yylval.opval = o;
7046                 CLINE;
7047                 PL_expect = XSTATE;
7048                 return REPORT(PLUGSTMT);
7049             } else if (result == KEYWORD_PLUGIN_EXPR) {
7050                 pl_yylval.opval = o;
7051                 CLINE;
7052                 PL_expect = XOPERATOR;
7053                 return REPORT(PLUGEXPR);
7054             } else {
7055                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7056                                         PL_tokenbuf);
7057             }
7058         }
7059
7060         /* Check for built-in keyword */
7061         tmp = keyword(PL_tokenbuf, len, 0);
7062
7063         /* Is this a label? */
7064         if (!anydelim && PL_expect == XSTATE
7065               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7066             s = d + 1;
7067             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7068             pl_yylval.pval[len] = '\0';
7069             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7070             CLINE;
7071             TOKEN(LABEL);
7072         }
7073
7074         /* Check for lexical sub */
7075         if (PL_expect != XOPERATOR) {
7076             char tmpbuf[sizeof PL_tokenbuf + 1];
7077             *tmpbuf = '&';
7078             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7079             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7080             if (off != NOT_IN_PAD) {
7081                 assert(off); /* we assume this is boolean-true below */
7082                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7083                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7084                     HEK * const stashname = HvNAME_HEK(stash);
7085                     sv = newSVhek(stashname);
7086                     sv_catpvs(sv, "::");
7087                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7088                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7089                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7090                                     SVt_PVCV);
7091                     off = 0;
7092                     if (!gv) {
7093                         sv_free(sv);
7094                         sv = NULL;
7095                         goto just_a_word;
7096                     }
7097                 }
7098                 else {
7099                     rv2cv_op = newOP(OP_PADANY, 0);
7100                     rv2cv_op->op_targ = off;
7101                     cv = find_lexical_cv(off);
7102                 }
7103                 lex = TRUE;
7104                 goto just_a_word;
7105             }
7106             off = 0;
7107         }
7108
7109         if (tmp < 0) {                  /* second-class keyword? */
7110             GV *ogv = NULL;     /* override (winner) */
7111             GV *hgv = NULL;     /* hidden (loser) */
7112             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7113                 CV *cv;
7114                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7115                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7116                                             SVt_PVCV)) &&
7117                     (cv = GvCVu(gv)))
7118                 {
7119                     if (GvIMPORTED_CV(gv))
7120                         ogv = gv;
7121                     else if (! CvMETHOD(cv))
7122                         hgv = gv;
7123                 }
7124                 if (!ogv &&
7125                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7126                                           len, FALSE)) &&
7127                     (gv = *gvp) && (
7128                         isGV_with_GP(gv)
7129                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
7130                             :   SvPCS_IMPORTED(gv)
7131                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7132                                          len, 0), 1)
7133                    ))
7134                 {
7135                     ogv = gv;
7136                 }
7137             }
7138             if (ogv) {
7139                 orig_keyword = tmp;
7140                 tmp = 0;                /* overridden by import or by GLOBAL */
7141             }
7142             else if (gv && !gvp
7143                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7144                      && GvCVu(gv))
7145             {
7146                 tmp = 0;                /* any sub overrides "weak" keyword */
7147             }
7148             else {                      /* no override */
7149                 tmp = -tmp;
7150                 if (tmp == KEY_dump) {
7151                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7152                                    "dump() better written as CORE::dump()");
7153                 }
7154                 gv = NULL;
7155                 gvp = 0;
7156                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7157                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7158                                    "Ambiguous call resolved as CORE::%s(), "
7159                                    "qualify as such or use &",
7160                                    GvENAME(hgv));
7161             }
7162         }
7163
7164         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7165          && (!anydelim || *s != '#')) {
7166             /* no override, and not s### either; skipspace is safe here
7167              * check for => on following line */
7168             bool arrow;
7169             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7170             STRLEN   soff = s         - SvPVX(PL_linestr);
7171             s = skipspace_flags(s, LEX_NO_INCLINE);
7172             arrow = *s == '=' && s[1] == '>';
7173             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7174             s         = SvPVX(PL_linestr) +   soff;
7175             if (arrow)
7176                 goto fat_arrow;
7177         }
7178
7179       reserved_word:
7180         switch (tmp) {
7181
7182         default:                        /* not a keyword */
7183             /* Trade off - by using this evil construction we can pull the
7184                variable gv into the block labelled keylookup. If not, then
7185                we have to give it function scope so that the goto from the
7186                earlier ':' case doesn't bypass the initialisation.  */
7187             if (0) {
7188             just_a_word_zero_gv:
7189                 sv = NULL;
7190                 cv = NULL;
7191                 gv = NULL;
7192                 gvp = NULL;
7193                 rv2cv_op = NULL;
7194                 orig_keyword = 0;
7195                 lex = 0;
7196                 off = 0;
7197             }
7198           just_a_word: {
7199                 int pkgname = 0;
7200                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7201                 const char penultchar =
7202                     lastchar && PL_bufptr - 2 >= PL_linestart
7203                          ? PL_bufptr[-2]
7204                          : 0;
7205 #ifdef PERL_MAD
7206                 SV *nextPL_nextwhite = 0;
7207 #endif
7208
7209
7210                 /* Get the rest if it looks like a package qualifier */
7211
7212                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7213                     STRLEN morelen;
7214                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7215                                   TRUE, &morelen);
7216                     if (!morelen)
7217                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7218                                 UTF8fARG(UTF, len, PL_tokenbuf),
7219                                 *s == '\'' ? "'" : "::");
7220                     len += morelen;
7221                     pkgname = 1;
7222                 }
7223
7224                 if (PL_expect == XOPERATOR) {
7225                     if (PL_bufptr == PL_linestart) {
7226                         CopLINE_dec(PL_curcop);
7227                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7228                         CopLINE_inc(PL_curcop);
7229                     }
7230                     else
7231                         no_op("Bareword",s);
7232                 }
7233
7234                 /* Look for a subroutine with this name in current package,
7235                    unless this is a lexical sub, or name is "Foo::",
7236                    in which case Foo is a bareword
7237                    (and a package name). */
7238
7239                 if (len > 2 && !PL_madskills &&
7240                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7241                 {
7242                     if (ckWARN(WARN_BAREWORD)
7243                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7244                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7245                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7246                            UTF8fARG(UTF, len, PL_tokenbuf));
7247                     len -= 2;
7248                     PL_tokenbuf[len] = '\0';
7249                     gv = NULL;
7250                     gvp = 0;
7251                 }
7252                 else {
7253                     if (!lex && !gv) {
7254                         /* Mustn't actually add anything to a symbol table.
7255                            But also don't want to "initialise" any placeholder
7256                            constants that might already be there into full
7257                            blown PVGVs with attached PVCV.  */
7258                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7259                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7260                                                SVt_PVCV);
7261                     }
7262                     len = 0;
7263                 }
7264
7265                 /* if we saw a global override before, get the right name */
7266
7267                 if (!sv)
7268                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7269                     len ? len : strlen(PL_tokenbuf));
7270                 if (gvp) {
7271                     SV * const tmp_sv = sv;
7272                     sv = newSVpvs("CORE::GLOBAL::");
7273                     sv_catsv(sv, tmp_sv);
7274                     SvREFCNT_dec(tmp_sv);
7275                 }
7276
7277 #ifdef PERL_MAD
7278                 if (PL_madskills && !PL_thistoken) {
7279                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7280                     PL_thistoken = newSVpvn(start,s - start);
7281                     PL_realtokenstart = s - SvPVX(PL_linestr);
7282                 }
7283 #endif
7284
7285                 /* Presume this is going to be a bareword of some sort. */
7286                 CLINE;
7287                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7288                 pl_yylval.opval->op_private = OPpCONST_BARE;
7289
7290                 /* And if "Foo::", then that's what it certainly is. */
7291                 if (len)
7292                     goto safe_bareword;
7293
7294                 if (!off)
7295                 {
7296                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7297                     const_op->op_private = OPpCONST_BARE;
7298                     rv2cv_op = newCVREF(0, const_op);
7299                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7300                 }
7301
7302                 /* See if it's the indirect object for a list operator. */
7303
7304                 if (PL_oldoldbufptr &&
7305                     PL_oldoldbufptr < PL_bufptr &&
7306                     (PL_oldoldbufptr == PL_last_lop
7307                      || PL_oldoldbufptr == PL_last_uni) &&
7308                     /* NO SKIPSPACE BEFORE HERE! */
7309                     (PL_expect == XREF ||
7310                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7311                 {
7312                     bool immediate_paren = *s == '(';
7313
7314                     /* (Now we can afford to cross potential line boundary.) */
7315                     s = SKIPSPACE2(s,nextPL_nextwhite);
7316 #ifdef PERL_MAD
7317                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7318 #endif
7319
7320                     /* Two barewords in a row may indicate method call. */
7321
7322                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7323                         (tmp = intuit_method(s, gv, cv))) {
7324                         op_free(rv2cv_op);
7325                         if (tmp == METHOD && !PL_lex_allbrackets &&
7326                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7327                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7328                         return REPORT(tmp);
7329                     }
7330
7331                     /* If not a declared subroutine, it's an indirect object. */
7332                     /* (But it's an indir obj regardless for sort.) */
7333                     /* Also, if "_" follows a filetest operator, it's a bareword */
7334
7335                     if (
7336                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7337                          (!cv &&
7338                         (PL_last_lop_op != OP_MAPSTART &&
7339                          PL_last_lop_op != OP_GREPSTART))))
7340                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7341                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7342                        )
7343                     {
7344                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7345                         goto bareword;
7346                     }
7347                 }
7348
7349                 PL_expect = XOPERATOR;
7350 #ifdef PERL_MAD
7351                 if (isSPACE(*s))
7352                     s = SKIPSPACE2(s,nextPL_nextwhite);
7353                 PL_nextwhite = nextPL_nextwhite;
7354 #else
7355                 s = skipspace(s);
7356 #endif
7357
7358                 /* Is this a word before a => operator? */
7359                 if (*s == '=' && s[1] == '>' && !pkgname) {
7360                     op_free(rv2cv_op);
7361                     CLINE;
7362                     /* This is our own scalar, created a few lines above,
7363                        so this is safe. */
7364                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7365                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7366                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7367                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7368                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7369                     TERM(WORD);
7370                 }
7371
7372                 /* If followed by a paren, it's certainly a subroutine. */
7373                 if (*s == '(') {
7374                     CLINE;
7375                     if (cv) {
7376                         d = s + 1;
7377                         while (SPACE_OR_TAB(*d))
7378                             d++;
7379                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7380                             s = d + 1;
7381                             goto its_constant;
7382                         }
7383                     }
7384 #ifdef PERL_MAD
7385                     if (PL_madskills) {
7386                         PL_nextwhite = PL_thiswhite;
7387                         PL_thiswhite = 0;
7388                     }
7389                     start_force(PL_curforce);
7390 #endif
7391                     NEXTVAL_NEXTTOKE.opval =
7392                         off ? rv2cv_op : pl_yylval.opval;
7393                     PL_expect = XOPERATOR;
7394 #ifdef PERL_MAD
7395                     if (PL_madskills) {
7396                         PL_nextwhite = nextPL_nextwhite;
7397                         curmad('X', PL_thistoken);
7398                         PL_thistoken = newSVpvs("");
7399                     }
7400 #endif
7401                     if (off)
7402                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7403                     else op_free(rv2cv_op),        force_next(WORD);
7404                     pl_yylval.ival = 0;
7405                     TOKEN('&');
7406                 }
7407
7408                 /* If followed by var or block, call it a method (unless sub) */
7409
7410                 if ((*s == '$' || *s == '{') && !cv) {
7411                     op_free(rv2cv_op);
7412                     PL_last_lop = PL_oldbufptr;
7413                     PL_last_lop_op = OP_METHOD;
7414                     if (!PL_lex_allbrackets &&
7415                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7416                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7417                     PREBLOCK(METHOD);
7418                 }
7419
7420                 /* If followed by a bareword, see if it looks like indir obj. */
7421
7422                 if (!orig_keyword
7423                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7424                         && (tmp = intuit_method(s, gv, cv))) {
7425                     op_free(rv2cv_op);
7426                     if (tmp == METHOD && !PL_lex_allbrackets &&
7427                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7428                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7429                     return REPORT(tmp);
7430                 }
7431
7432                 /* Not a method, so call it a subroutine (if defined) */
7433
7434                 if (cv) {
7435                     if (lastchar == '-' && penultchar != '-') {
7436                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7437                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7438                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7439                              UTF8fARG(UTF, l, PL_tokenbuf),
7440                              UTF8fARG(UTF, l, PL_tokenbuf));
7441                     }
7442                     /* Check for a constant sub */
7443                     if ((sv = cv_const_sv_or_av(cv))) {
7444                   its_constant:
7445                         op_free(rv2cv_op);
7446                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7447                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7448                         if (SvTYPE(sv) == SVt_PVAV)
7449                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7450                                                       pl_yylval.opval);
7451                         else {
7452                             pl_yylval.opval->op_private = 0;
7453                             pl_yylval.opval->op_folded = 1;
7454                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7455                         }
7456                         TOKEN(WORD);
7457                     }
7458
7459                     op_free(pl_yylval.opval);
7460                     pl_yylval.opval =
7461                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7462                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7463                     PL_last_lop = PL_oldbufptr;
7464                     PL_last_lop_op = OP_ENTERSUB;
7465                     /* Is there a prototype? */
7466                     if (
7467 #ifdef PERL_MAD
7468                         cv &&
7469 #endif
7470                         SvPOK(cv))
7471                     {
7472                         STRLEN protolen = CvPROTOLEN(cv);
7473                         const char *proto = CvPROTO(cv);
7474                         bool optional;
7475                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7476                         if (!protolen)
7477                             TERM(FUNC0SUB);
7478                         if ((optional = *proto == ';'))
7479                           do
7480                             proto++;
7481                           while (*proto == ';');
7482                         if (
7483                             (
7484                                 (
7485                                     *proto == '$' || *proto == '_'
7486                                  || *proto == '*' || *proto == '+'
7487                                 )
7488                              && proto[1] == '\0'
7489                             )
7490                          || (
7491                              *proto == '\\' && proto[1] && proto[2] == '\0'
7492                             )
7493                         )
7494                             UNIPROTO(UNIOPSUB,optional);
7495                         if (*proto == '\\' && proto[1] == '[') {
7496                             const char *p = proto + 2;
7497                             while(*p && *p != ']')
7498                                 ++p;
7499                             if(*p == ']' && !p[1])
7500                                 UNIPROTO(UNIOPSUB,optional);
7501                         }
7502                         if (*proto == '&' && *s == '{') {
7503                             if (PL_curstash)
7504                                 sv_setpvs(PL_subname, "__ANON__");
7505                             else
7506                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7507                             if (!PL_lex_allbrackets &&
7508                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7509                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7510                             PREBLOCK(LSTOPSUB);
7511                         }
7512                     }
7513 #ifdef PERL_MAD
7514                     {
7515                         if (PL_madskills) {
7516                             PL_nextwhite = PL_thiswhite;
7517                             PL_thiswhite = 0;
7518                         }
7519                         start_force(PL_curforce);
7520                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7521                         PL_expect = XTERM;
7522                         if (PL_madskills) {
7523                             PL_nextwhite = nextPL_nextwhite;
7524                             curmad('X', PL_thistoken);
7525                             PL_thistoken = newSVpvs("");
7526                         }
7527                         force_next(off ? PRIVATEREF : WORD);
7528                         if (!PL_lex_allbrackets &&
7529                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7530                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7531                         TOKEN(NOAMP);
7532                     }
7533                 }
7534
7535                 /* Guess harder when madskills require "best effort". */
7536                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7537                     int probable_sub = 0;
7538                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7539                         probable_sub = 1;
7540                     else if (isALPHA(*s)) {
7541                         char tmpbuf[1024];
7542                         STRLEN tmplen;
7543                         d = s;
7544                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7545                         if (!keyword(tmpbuf, tmplen, 0))
7546                             probable_sub = 1;
7547                         else {
7548                             while (d < PL_bufend && isSPACE(*d))
7549                                 d++;
7550                             if (*d == '=' && d[1] == '>')
7551                                 probable_sub = 1;
7552                         }
7553                     }
7554                     if (probable_sub) {
7555                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7556                                         SVt_PVCV);
7557                         op_free(pl_yylval.opval);
7558                         pl_yylval.opval =
7559                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7560                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7561                         PL_last_lop = PL_oldbufptr;
7562                         PL_last_lop_op = OP_ENTERSUB;
7563                         PL_nextwhite = PL_thiswhite;
7564                         PL_thiswhite = 0;
7565                         start_force(PL_curforce);
7566                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7567                         PL_expect = XTERM;
7568                         PL_nextwhite = nextPL_nextwhite;
7569                         curmad('X', PL_thistoken);
7570                         PL_thistoken = newSVpvs("");
7571                         force_next(off ? PRIVATEREF : WORD);
7572                         if (!PL_lex_allbrackets &&
7573                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7574                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7575                         TOKEN(NOAMP);
7576                     }
7577 #else
7578                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7579                     PL_expect = XTERM;
7580                     force_next(off ? PRIVATEREF : WORD);
7581                     if (!PL_lex_allbrackets &&
7582                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7583                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7584                     TOKEN(NOAMP);
7585 #endif
7586                 }
7587
7588                 /* Call it a bare word */
7589
7590                 if (PL_hints & HINT_STRICT_SUBS)
7591                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7592                 else {
7593                 bareword:
7594                     /* after "print" and similar functions (corresponding to
7595                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7596                      * a filehandle should be subject to "strict subs".
7597                      * Likewise for the optional indirect-object argument to system
7598                      * or exec, which can't be a bareword */
7599                     if ((PL_last_lop_op == OP_PRINT
7600                             || PL_last_lop_op == OP_PRTF
7601                             || PL_last_lop_op == OP_SAY
7602                             || PL_last_lop_op == OP_SYSTEM
7603                             || PL_last_lop_op == OP_EXEC)
7604                             && (PL_hints & HINT_STRICT_SUBS))
7605                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7606                     if (lastchar != '-') {
7607                         if (ckWARN(WARN_RESERVED)) {
7608                             d = PL_tokenbuf;
7609                             while (isLOWER(*d))
7610                                 d++;
7611                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7612                             {
7613                                 /* PL_warn_reserved is constant */
7614                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7615                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7616                                        PL_tokenbuf);
7617                                 GCC_DIAG_RESTORE;
7618                             }
7619                         }
7620                     }
7621                 }
7622                 op_free(rv2cv_op);
7623
7624             safe_bareword:
7625                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7626                  && saw_infix_sigil) {
7627                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7628                                      "Operator or semicolon missing before %c%"UTF8f,
7629                                      lastchar,
7630                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7631                                               PL_tokenbuf));
7632                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7633                                      "Ambiguous use of %c resolved as operator %c",
7634                                      lastchar, lastchar);
7635                 }
7636                 TOKEN(WORD);
7637             }
7638
7639         case KEY___FILE__:
7640             FUN0OP(
7641                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7642             );
7643
7644         case KEY___LINE__:
7645             FUN0OP(
7646                 (OP*)newSVOP(OP_CONST, 0,
7647                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7648             );
7649
7650         case KEY___PACKAGE__:
7651             FUN0OP(
7652                 (OP*)newSVOP(OP_CONST, 0,
7653                                         (PL_curstash
7654                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7655                                          : &PL_sv_undef))
7656             );
7657
7658         case KEY___DATA__:
7659         case KEY___END__: {
7660             GV *gv;
7661             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7662                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7663                                         ? PL_curstash
7664                                         : PL_defstash;
7665                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7666                 if (!isGV(gv))
7667                     gv_init(gv,stash,"DATA",4,0);
7668                 GvMULTI_on(gv);
7669                 if (!GvIO(gv))
7670                     GvIOp(gv) = newIO();
7671                 IoIFP(GvIOp(gv)) = PL_rsfp;
7672 #if defined(HAS_FCNTL) && defined(F_SETFD)
7673                 {
7674                     const int fd = PerlIO_fileno(PL_rsfp);
7675                     fcntl(fd,F_SETFD,fd >= 3);
7676                 }
7677 #endif
7678                 /* Mark this internal pseudo-handle as clean */
7679                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7680                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7681                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7682                 else
7683                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7684 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7685                 /* if the script was opened in binmode, we need to revert
7686                  * it to text mode for compatibility; but only iff it has CRs
7687                  * XXX this is a questionable hack at best. */
7688                 if (PL_bufend-PL_bufptr > 2
7689                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7690                 {
7691                     Off_t loc = 0;
7692                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7693                         loc = PerlIO_tell(PL_rsfp);
7694                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7695                     }
7696 #ifdef NETWARE
7697                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7698 #else
7699                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7700 #endif  /* NETWARE */
7701                         if (loc > 0)
7702                             PerlIO_seek(PL_rsfp, loc, 0);
7703                     }
7704                 }
7705 #endif
7706 #ifdef PERLIO_LAYERS
7707                 if (!IN_BYTES) {
7708                     if (UTF)
7709                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7710                     else if (PL_encoding) {
7711                         SV *name;
7712                         dSP;
7713                         ENTER;
7714                         SAVETMPS;
7715                         PUSHMARK(sp);
7716                         EXTEND(SP, 1);
7717                         XPUSHs(PL_encoding);
7718                         PUTBACK;
7719                         call_method("name", G_SCALAR);
7720                         SPAGAIN;
7721                         name = POPs;
7722                         PUTBACK;
7723                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7724                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7725                                                       SVfARG(name)));
7726                         FREETMPS;
7727                         LEAVE;
7728                     }
7729                 }
7730 #endif
7731 #ifdef PERL_MAD
7732                 if (PL_madskills) {
7733                     if (PL_realtokenstart >= 0) {
7734                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7735                         if (!PL_endwhite)
7736                             PL_endwhite = newSVpvs("");
7737                         sv_catsv(PL_endwhite, PL_thiswhite);
7738                         PL_thiswhite = 0;
7739                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7740                         PL_realtokenstart = -1;
7741                     }
7742                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7743                            != NULL) ;
7744                 }
7745 #endif
7746                 PL_rsfp = NULL;
7747             }
7748             goto fake_eof;
7749         }
7750
7751         case KEY___SUB__:
7752             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7753
7754         case KEY_AUTOLOAD:
7755         case KEY_DESTROY:
7756         case KEY_BEGIN:
7757         case KEY_UNITCHECK:
7758         case KEY_CHECK:
7759         case KEY_INIT:
7760         case KEY_END:
7761             if (PL_expect == XSTATE) {
7762                 s = PL_bufptr;
7763                 goto really_sub;
7764             }
7765             goto just_a_word;
7766
7767         case_KEY_CORE:
7768             {
7769                 STRLEN olen = len;
7770                 d = s;
7771                 s += 2;
7772                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7773                 if ((*s == ':' && s[1] == ':')
7774                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7775                 {
7776                     s = d;
7777                     len = olen;
7778                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7779                     goto just_a_word;
7780                 }
7781                 if (!tmp)
7782                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7783                                       UTF8fARG(UTF, len, PL_tokenbuf));
7784                 if (tmp < 0)
7785                     tmp = -tmp;
7786                 else if (tmp == KEY_require || tmp == KEY_do
7787                       || tmp == KEY_glob)
7788                     /* that's a way to remember we saw "CORE::" */
7789                     orig_keyword = tmp;
7790                 goto reserved_word;
7791             }
7792
7793         case KEY_abs:
7794             UNI(OP_ABS);
7795
7796         case KEY_alarm:
7797             UNI(OP_ALARM);
7798
7799         case KEY_accept:
7800             LOP(OP_ACCEPT,XTERM);
7801
7802         case KEY_and:
7803             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7804                 return REPORT(0);
7805             OPERATOR(ANDOP);
7806
7807         case KEY_atan2:
7808             LOP(OP_ATAN2,XTERM);
7809
7810         case KEY_bind:
7811             LOP(OP_BIND,XTERM);
7812
7813         case KEY_binmode:
7814             LOP(OP_BINMODE,XTERM);
7815
7816         case KEY_bless:
7817             LOP(OP_BLESS,XTERM);
7818
7819         case KEY_break:
7820             FUN0(OP_BREAK);
7821
7822         case KEY_chop:
7823             UNI(OP_CHOP);
7824
7825         case KEY_continue:
7826                     /* We have to disambiguate the two senses of
7827                       "continue". If the next token is a '{' then
7828                       treat it as the start of a continue block;
7829                       otherwise treat it as a control operator.
7830                      */
7831                     s = skipspace(s);
7832                     if (*s == '{')
7833             PREBLOCK(CONTINUE);
7834                     else
7835                         FUN0(OP_CONTINUE);
7836
7837         case KEY_chdir:
7838             /* may use HOME */
7839             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7840             UNI(OP_CHDIR);
7841
7842         case KEY_close:
7843             UNI(OP_CLOSE);
7844
7845         case KEY_closedir:
7846             UNI(OP_CLOSEDIR);
7847
7848         case KEY_cmp:
7849             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7850                 return REPORT(0);
7851             Eop(OP_SCMP);
7852
7853         case KEY_caller:
7854             UNI(OP_CALLER);
7855
7856         case KEY_crypt:
7857 #ifdef FCRYPT
7858             if (!PL_cryptseen) {
7859                 PL_cryptseen = TRUE;
7860                 init_des();
7861             }
7862 #endif
7863             LOP(OP_CRYPT,XTERM);
7864
7865         case KEY_chmod:
7866             LOP(OP_CHMOD,XTERM);
7867
7868         case KEY_chown:
7869             LOP(OP_CHOWN,XTERM);
7870
7871         case KEY_connect:
7872             LOP(OP_CONNECT,XTERM);
7873
7874         case KEY_chr:
7875             UNI(OP_CHR);
7876
7877         case KEY_cos:
7878             UNI(OP_COS);
7879
7880         case KEY_chroot:
7881             UNI(OP_CHROOT);
7882
7883         case KEY_default:
7884             PREBLOCK(DEFAULT);
7885
7886         case KEY_do:
7887             s = SKIPSPACE1(s);
7888             if (*s == '{')
7889                 PRETERMBLOCK(DO);
7890             if (*s != '\'') {
7891                 *PL_tokenbuf = '&';
7892                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7893                               1, &len);
7894                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7895                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7896                     d = SKIPSPACE1(d);
7897                     if (*d == '(') {
7898                         force_ident_maybe_lex('&');
7899                         s = d;
7900                     }
7901                 }
7902             }
7903             if (orig_keyword == KEY_do) {
7904                 orig_keyword = 0;
7905                 pl_yylval.ival = 1;
7906             }
7907             else
7908                 pl_yylval.ival = 0;
7909             OPERATOR(DO);
7910
7911         case KEY_die:
7912             PL_hints |= HINT_BLOCK_SCOPE;
7913             LOP(OP_DIE,XTERM);
7914
7915         case KEY_defined:
7916             UNI(OP_DEFINED);
7917
7918         case KEY_delete:
7919             UNI(OP_DELETE);
7920
7921         case KEY_dbmopen:
7922             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7923                               STR_WITH_LEN("NDBM_File::"),
7924                               STR_WITH_LEN("DB_File::"),
7925                               STR_WITH_LEN("GDBM_File::"),
7926                               STR_WITH_LEN("SDBM_File::"),
7927                               STR_WITH_LEN("ODBM_File::"),
7928                               NULL);
7929             LOP(OP_DBMOPEN,XTERM);
7930
7931         case KEY_dbmclose:
7932             UNI(OP_DBMCLOSE);
7933
7934         case KEY_dump:
7935             PL_expect = XOPERATOR;
7936             s = force_word(s,WORD,TRUE,FALSE);
7937             LOOPX(OP_DUMP);
7938
7939         case KEY_else:
7940             PREBLOCK(ELSE);
7941
7942         case KEY_elsif:
7943             pl_yylval.ival = CopLINE(PL_curcop);
7944             OPERATOR(ELSIF);
7945
7946         case KEY_eq:
7947             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7948                 return REPORT(0);
7949             Eop(OP_SEQ);
7950
7951         case KEY_exists:
7952             UNI(OP_EXISTS);
7953
7954         case KEY_exit:
7955             if (PL_madskills)
7956                 UNI(OP_INT);
7957             UNI(OP_EXIT);
7958
7959         case KEY_eval:
7960             s = SKIPSPACE1(s);
7961             if (*s == '{') { /* block eval */
7962                 PL_expect = XTERMBLOCK;
7963                 UNIBRACK(OP_ENTERTRY);
7964             }
7965             else { /* string eval */
7966                 PL_expect = XTERM;
7967                 UNIBRACK(OP_ENTEREVAL);
7968             }
7969
7970         case KEY_evalbytes:
7971             PL_expect = XTERM;
7972             UNIBRACK(-OP_ENTEREVAL);
7973
7974         case KEY_eof:
7975             UNI(OP_EOF);
7976
7977         case KEY_exp:
7978             UNI(OP_EXP);
7979
7980         case KEY_each:
7981             UNI(OP_EACH);
7982
7983         case KEY_exec:
7984             LOP(OP_EXEC,XREF);
7985
7986         case KEY_endhostent:
7987             FUN0(OP_EHOSTENT);
7988
7989         case KEY_endnetent:
7990             FUN0(OP_ENETENT);
7991
7992         case KEY_endservent:
7993             FUN0(OP_ESERVENT);
7994
7995         case KEY_endprotoent:
7996             FUN0(OP_EPROTOENT);
7997
7998         case KEY_endpwent:
7999             FUN0(OP_EPWENT);
8000
8001         case KEY_endgrent:
8002             FUN0(OP_EGRENT);
8003
8004         case KEY_for:
8005         case KEY_foreach:
8006             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8007                 return REPORT(0);
8008             pl_yylval.ival = CopLINE(PL_curcop);
8009             s = SKIPSPACE1(s);
8010             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8011                 char *p = s;
8012 #ifdef PERL_MAD
8013                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8014 #endif
8015
8016                 if ((PL_bufend - p) >= 3 &&
8017                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8018                     p += 2;
8019                 else if ((PL_bufend - p) >= 4 &&
8020                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8021                     p += 3;
8022                 p = PEEKSPACE(p);
8023                 /* skip optional package name, as in "for my abc $x (..)" */
8024                 if (isIDFIRST_lazy_if(p,UTF)) {
8025                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8026                     p = PEEKSPACE(p);
8027                 }
8028                 if (*p != '$')
8029                     Perl_croak(aTHX_ "Missing $ on loop variable");
8030 #ifdef PERL_MAD
8031                 s = SvPVX(PL_linestr) + soff;
8032 #endif
8033             }
8034             OPERATOR(FOR);
8035
8036         case KEY_formline:
8037             LOP(OP_FORMLINE,XTERM);
8038
8039         case KEY_fork:
8040             FUN0(OP_FORK);
8041
8042         case KEY_fc:
8043             UNI(OP_FC);
8044
8045         case KEY_fcntl:
8046             LOP(OP_FCNTL,XTERM);
8047
8048         case KEY_fileno:
8049             UNI(OP_FILENO);
8050
8051         case KEY_flock:
8052             LOP(OP_FLOCK,XTERM);
8053
8054         case KEY_gt:
8055             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8056                 return REPORT(0);
8057             Rop(OP_SGT);
8058
8059         case KEY_ge:
8060             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8061                 return REPORT(0);
8062             Rop(OP_SGE);
8063
8064         case KEY_grep:
8065             LOP(OP_GREPSTART, XREF);
8066
8067         case KEY_goto:
8068             PL_expect = XOPERATOR;
8069             s = force_word(s,WORD,TRUE,FALSE);
8070             LOOPX(OP_GOTO);
8071
8072         case KEY_gmtime:
8073             UNI(OP_GMTIME);
8074
8075         case KEY_getc:
8076             UNIDOR(OP_GETC);
8077
8078         case KEY_getppid:
8079             FUN0(OP_GETPPID);
8080
8081         case KEY_getpgrp:
8082             UNI(OP_GETPGRP);
8083
8084         case KEY_getpriority:
8085             LOP(OP_GETPRIORITY,XTERM);
8086
8087         case KEY_getprotobyname:
8088             UNI(OP_GPBYNAME);
8089
8090         case KEY_getprotobynumber:
8091             LOP(OP_GPBYNUMBER,XTERM);
8092
8093         case KEY_getprotoent:
8094             FUN0(OP_GPROTOENT);
8095
8096         case KEY_getpwent:
8097             FUN0(OP_GPWENT);
8098
8099         case KEY_getpwnam:
8100             UNI(OP_GPWNAM);
8101
8102         case KEY_getpwuid:
8103             UNI(OP_GPWUID);
8104
8105         case KEY_getpeername:
8106             UNI(OP_GETPEERNAME);
8107
8108         case KEY_gethostbyname:
8109             UNI(OP_GHBYNAME);
8110
8111         case KEY_gethostbyaddr:
8112             LOP(OP_GHBYADDR,XTERM);
8113
8114         case KEY_gethostent:
8115             FUN0(OP_GHOSTENT);
8116
8117         case KEY_getnetbyname:
8118             UNI(OP_GNBYNAME);
8119
8120         case KEY_getnetbyaddr:
8121             LOP(OP_GNBYADDR,XTERM);
8122
8123         case KEY_getnetent:
8124             FUN0(OP_GNETENT);
8125
8126         case KEY_getservbyname:
8127             LOP(OP_GSBYNAME,XTERM);
8128
8129         case KEY_getservbyport:
8130             LOP(OP_GSBYPORT,XTERM);
8131
8132         case KEY_getservent:
8133             FUN0(OP_GSERVENT);
8134
8135         case KEY_getsockname:
8136             UNI(OP_GETSOCKNAME);
8137
8138         case KEY_getsockopt:
8139             LOP(OP_GSOCKOPT,XTERM);
8140
8141         case KEY_getgrent:
8142             FUN0(OP_GGRENT);
8143
8144         case KEY_getgrnam:
8145             UNI(OP_GGRNAM);
8146
8147         case KEY_getgrgid:
8148             UNI(OP_GGRGID);
8149
8150         case KEY_getlogin:
8151             FUN0(OP_GETLOGIN);
8152
8153         case KEY_given:
8154             pl_yylval.ival = CopLINE(PL_curcop);
8155             Perl_ck_warner_d(aTHX_
8156                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8157                 "given is experimental");
8158             OPERATOR(GIVEN);
8159
8160         case KEY_glob:
8161             LOP(
8162              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8163              XTERM
8164             );
8165
8166         case KEY_hex:
8167             UNI(OP_HEX);
8168
8169         case KEY_if:
8170             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8171                 return REPORT(0);
8172             pl_yylval.ival = CopLINE(PL_curcop);
8173             OPERATOR(IF);
8174
8175         case KEY_index:
8176             LOP(OP_INDEX,XTERM);
8177
8178         case KEY_int:
8179             UNI(OP_INT);
8180
8181         case KEY_ioctl:
8182             LOP(OP_IOCTL,XTERM);
8183
8184         case KEY_join:
8185             LOP(OP_JOIN,XTERM);
8186
8187         case KEY_keys:
8188             UNI(OP_KEYS);
8189
8190         case KEY_kill:
8191             LOP(OP_KILL,XTERM);
8192
8193         case KEY_last:
8194             PL_expect = XOPERATOR;
8195             s = force_word(s,WORD,TRUE,FALSE);
8196             LOOPX(OP_LAST);
8197
8198         case KEY_lc:
8199             UNI(OP_LC);
8200
8201         case KEY_lcfirst:
8202             UNI(OP_LCFIRST);
8203
8204         case KEY_local:
8205             pl_yylval.ival = 0;
8206             OPERATOR(LOCAL);
8207
8208         case KEY_length:
8209             UNI(OP_LENGTH);
8210
8211         case KEY_lt:
8212             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8213                 return REPORT(0);
8214             Rop(OP_SLT);
8215
8216         case KEY_le:
8217             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8218                 return REPORT(0);
8219             Rop(OP_SLE);
8220
8221         case KEY_localtime:
8222             UNI(OP_LOCALTIME);
8223
8224         case KEY_log:
8225             UNI(OP_LOG);
8226
8227         case KEY_link:
8228             LOP(OP_LINK,XTERM);
8229
8230         case KEY_listen:
8231             LOP(OP_LISTEN,XTERM);
8232
8233         case KEY_lock:
8234             UNI(OP_LOCK);
8235
8236         case KEY_lstat:
8237             UNI(OP_LSTAT);
8238
8239         case KEY_m:
8240             s = scan_pat(s,OP_MATCH);
8241             TERM(sublex_start());
8242
8243         case KEY_map:
8244             LOP(OP_MAPSTART, XREF);
8245
8246         case KEY_mkdir:
8247             LOP(OP_MKDIR,XTERM);
8248
8249         case KEY_msgctl:
8250             LOP(OP_MSGCTL,XTERM);
8251
8252         case KEY_msgget:
8253             LOP(OP_MSGGET,XTERM);
8254
8255         case KEY_msgrcv:
8256             LOP(OP_MSGRCV,XTERM);
8257
8258         case KEY_msgsnd:
8259             LOP(OP_MSGSND,XTERM);
8260
8261         case KEY_our:
8262         case KEY_my:
8263         case KEY_state:
8264             PL_in_my = (U16)tmp;
8265             s = SKIPSPACE1(s);
8266             if (isIDFIRST_lazy_if(s,UTF)) {
8267 #ifdef PERL_MAD
8268                 char* start = s;
8269 #endif
8270                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8271                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8272                 {
8273                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8274                         Perl_croak(aTHX_
8275                                   "Experimental \"%s\" subs not enabled",
8276                                    tmp == KEY_my    ? "my"    :
8277                                    tmp == KEY_state ? "state" : "our");
8278                     Perl_ck_warner_d(aTHX_
8279                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8280                         "The lexical_subs feature is experimental");
8281                     goto really_sub;
8282                 }
8283                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8284                 if (!PL_in_my_stash) {
8285                     char tmpbuf[1024];
8286                     PL_bufptr = s;
8287                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8288                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8289                 }
8290 #ifdef PERL_MAD
8291                 if (PL_madskills) {     /* just add type to declarator token */
8292                     sv_catsv(PL_thistoken, PL_nextwhite);
8293                     PL_nextwhite = 0;
8294                     sv_catpvn(PL_thistoken, start, s - start);
8295                 }
8296 #endif
8297             }
8298             pl_yylval.ival = 1;
8299             OPERATOR(MY);
8300
8301         case KEY_next:
8302             PL_expect = XOPERATOR;
8303             s = force_word(s,WORD,TRUE,FALSE);
8304             LOOPX(OP_NEXT);
8305
8306         case KEY_ne:
8307             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8308                 return REPORT(0);
8309             Eop(OP_SNE);
8310
8311         case KEY_no:
8312             s = tokenize_use(0, s);
8313             TERM(USE);
8314
8315         case KEY_not:
8316             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8317                 FUN1(OP_NOT);
8318             else {
8319                 if (!PL_lex_allbrackets &&
8320                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8321                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8322                 OPERATOR(NOTOP);
8323             }
8324
8325         case KEY_open:
8326             s = SKIPSPACE1(s);
8327             if (isIDFIRST_lazy_if(s,UTF)) {
8328           const char *t;
8329           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8330               &len);
8331                 for (t=d; isSPACE(*t);)
8332                     t++;
8333                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8334                     /* [perl #16184] */
8335                     && !(t[0] == '=' && t[1] == '>')
8336                     && !(t[0] == ':' && t[1] == ':')
8337                     && !keyword(s, d-s, 0)
8338                 ) {
8339                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8340                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8341                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8342                 }
8343             }
8344             LOP(OP_OPEN,XTERM);
8345
8346         case KEY_or:
8347             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8348                 return REPORT(0);
8349             pl_yylval.ival = OP_OR;
8350             OPERATOR(OROP);
8351
8352         case KEY_ord:
8353             UNI(OP_ORD);
8354
8355         case KEY_oct:
8356             UNI(OP_OCT);
8357
8358         case KEY_opendir:
8359             LOP(OP_OPEN_DIR,XTERM);
8360
8361         case KEY_print:
8362             checkcomma(s,PL_tokenbuf,"filehandle");
8363             LOP(OP_PRINT,XREF);
8364
8365         case KEY_printf:
8366             checkcomma(s,PL_tokenbuf,"filehandle");
8367             LOP(OP_PRTF,XREF);
8368
8369         case KEY_prototype:
8370             UNI(OP_PROTOTYPE);
8371
8372         case KEY_push:
8373             LOP(OP_PUSH,XTERM);
8374
8375         case KEY_pop:
8376             UNIDOR(OP_POP);
8377
8378         case KEY_pos:
8379             UNIDOR(OP_POS);
8380
8381         case KEY_pack:
8382             LOP(OP_PACK,XTERM);
8383
8384         case KEY_package:
8385             s = force_word(s,WORD,FALSE,TRUE);
8386             s = SKIPSPACE1(s);
8387             s = force_strict_version(s);
8388             PL_lex_expect = XBLOCK;
8389             OPERATOR(PACKAGE);
8390
8391         case KEY_pipe:
8392             LOP(OP_PIPE_OP,XTERM);
8393
8394         case KEY_q:
8395             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8396             COPLINE_SET_FROM_MULTI_END;
8397             if (!s)
8398                 missingterm(NULL);
8399             pl_yylval.ival = OP_CONST;
8400             TERM(sublex_start());
8401
8402         case KEY_quotemeta:
8403             UNI(OP_QUOTEMETA);
8404
8405         case KEY_qw: {
8406             OP *words = NULL;
8407             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8408             COPLINE_SET_FROM_MULTI_END;
8409             if (!s)
8410                 missingterm(NULL);
8411             PL_expect = XOPERATOR;
8412             if (SvCUR(PL_lex_stuff)) {
8413                 int warned_comma = !ckWARN(WARN_QW);
8414                 int warned_comment = warned_comma;
8415                 d = SvPV_force(PL_lex_stuff, len);
8416                 while (len) {
8417                     for (; isSPACE(*d) && len; --len, ++d)
8418                         /**/;
8419                     if (len) {
8420                         SV *sv;
8421                         const char *b = d;
8422                         if (!warned_comma || !warned_comment) {
8423                             for (; !isSPACE(*d) && len; --len, ++d) {
8424                                 if (!warned_comma && *d == ',') {
8425                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8426                                         "Possible attempt to separate words with commas");
8427                                     ++warned_comma;
8428                                 }
8429                                 else if (!warned_comment && *d == '#') {
8430                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8431                                         "Possible attempt to put comments in qw() list");
8432                                     ++warned_comment;
8433                                 }
8434                             }
8435                         }
8436                         else {
8437                             for (; !isSPACE(*d) && len; --len, ++d)
8438                                 /**/;
8439                         }
8440                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8441                         words = op_append_elem(OP_LIST, words,
8442                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8443                     }
8444                 }
8445             }
8446             if (!words)
8447                 words = newNULLLIST();
8448             if (PL_lex_stuff) {
8449                 SvREFCNT_dec(PL_lex_stuff);
8450                 PL_lex_stuff = NULL;
8451             }
8452             PL_expect = XOPERATOR;
8453             pl_yylval.opval = sawparens(words);
8454             TOKEN(QWLIST);
8455         }
8456
8457         case KEY_qq:
8458             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8459             if (!s)
8460                 missingterm(NULL);
8461             pl_yylval.ival = OP_STRINGIFY;
8462             if (SvIVX(PL_lex_stuff) == '\'')
8463                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8464             TERM(sublex_start());
8465
8466         case KEY_qr:
8467             s = scan_pat(s,OP_QR);
8468             TERM(sublex_start());
8469
8470         case KEY_qx:
8471             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8472             if (!s)
8473                 missingterm(NULL);
8474             pl_yylval.ival = OP_BACKTICK;
8475             TERM(sublex_start());
8476
8477         case KEY_return:
8478             OLDLOP(OP_RETURN);
8479
8480         case KEY_require:
8481             s = SKIPSPACE1(s);
8482             PL_expect = XOPERATOR;
8483             if (isDIGIT(*s)) {
8484                 s = force_version(s, FALSE);
8485             }
8486             else if (*s != 'v' || !isDIGIT(s[1])
8487                     || (s = force_version(s, TRUE), *s == 'v'))
8488             {
8489                 *PL_tokenbuf = '\0';
8490                 s = force_word(s,WORD,TRUE,TRUE);
8491                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8492                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8493                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8494                 else if (*s == '<')
8495                     yyerror("<> should be quotes");
8496             }
8497             if (orig_keyword == KEY_require) {
8498                 orig_keyword = 0;
8499                 pl_yylval.ival = 1;
8500             }
8501             else
8502                 pl_yylval.ival = 0;
8503             PL_expect = XTERM;
8504             PL_bufptr = s;
8505             PL_last_uni = PL_oldbufptr;
8506             PL_last_lop_op = OP_REQUIRE;
8507             s = skipspace(s);
8508             return REPORT( (int)REQUIRE );
8509
8510         case KEY_reset:
8511             UNI(OP_RESET);
8512
8513         case KEY_redo:
8514             PL_expect = XOPERATOR;
8515             s = force_word(s,WORD,TRUE,FALSE);
8516             LOOPX(OP_REDO);
8517
8518         case KEY_rename:
8519             LOP(OP_RENAME,XTERM);
8520
8521         case KEY_rand:
8522             UNI(OP_RAND);
8523
8524         case KEY_rmdir:
8525             UNI(OP_RMDIR);
8526
8527         case KEY_rindex:
8528             LOP(OP_RINDEX,XTERM);
8529
8530         case KEY_read:
8531             LOP(OP_READ,XTERM);
8532
8533         case KEY_readdir:
8534             UNI(OP_READDIR);
8535
8536         case KEY_readline:
8537             UNIDOR(OP_READLINE);
8538
8539         case KEY_readpipe:
8540             UNIDOR(OP_BACKTICK);
8541
8542         case KEY_rewinddir:
8543             UNI(OP_REWINDDIR);
8544
8545         case KEY_recv:
8546             LOP(OP_RECV,XTERM);
8547
8548         case KEY_reverse:
8549             LOP(OP_REVERSE,XTERM);
8550
8551         case KEY_readlink:
8552             UNIDOR(OP_READLINK);
8553
8554         case KEY_ref:
8555             UNI(OP_REF);
8556
8557         case KEY_s:
8558             s = scan_subst(s);
8559             if (pl_yylval.opval)
8560                 TERM(sublex_start());
8561             else
8562                 TOKEN(1);       /* force error */
8563
8564         case KEY_say:
8565             checkcomma(s,PL_tokenbuf,"filehandle");
8566             LOP(OP_SAY,XREF);
8567
8568         case KEY_chomp:
8569             UNI(OP_CHOMP);
8570
8571         case KEY_scalar:
8572             UNI(OP_SCALAR);
8573
8574         case KEY_select:
8575             LOP(OP_SELECT,XTERM);
8576
8577         case KEY_seek:
8578             LOP(OP_SEEK,XTERM);
8579
8580         case KEY_semctl:
8581             LOP(OP_SEMCTL,XTERM);
8582
8583         case KEY_semget:
8584             LOP(OP_SEMGET,XTERM);
8585
8586         case KEY_semop:
8587             LOP(OP_SEMOP,XTERM);
8588
8589         case KEY_send:
8590             LOP(OP_SEND,XTERM);
8591
8592         case KEY_setpgrp:
8593             LOP(OP_SETPGRP,XTERM);
8594
8595         case KEY_setpriority:
8596             LOP(OP_SETPRIORITY,XTERM);
8597
8598         case KEY_sethostent:
8599             UNI(OP_SHOSTENT);
8600
8601         case KEY_setnetent:
8602             UNI(OP_SNETENT);
8603
8604         case KEY_setservent:
8605             UNI(OP_SSERVENT);
8606
8607         case KEY_setprotoent:
8608             UNI(OP_SPROTOENT);
8609
8610         case KEY_setpwent:
8611             FUN0(OP_SPWENT);
8612
8613         case KEY_setgrent:
8614             FUN0(OP_SGRENT);
8615
8616         case KEY_seekdir:
8617             LOP(OP_SEEKDIR,XTERM);
8618
8619         case KEY_setsockopt:
8620             LOP(OP_SSOCKOPT,XTERM);
8621
8622         case KEY_shift:
8623             UNIDOR(OP_SHIFT);
8624
8625         case KEY_shmctl:
8626             LOP(OP_SHMCTL,XTERM);
8627
8628         case KEY_shmget:
8629             LOP(OP_SHMGET,XTERM);
8630
8631         case KEY_shmread:
8632             LOP(OP_SHMREAD,XTERM);
8633
8634         case KEY_shmwrite:
8635             LOP(OP_SHMWRITE,XTERM);
8636
8637         case KEY_shutdown:
8638             LOP(OP_SHUTDOWN,XTERM);
8639
8640         case KEY_sin:
8641             UNI(OP_SIN);
8642
8643         case KEY_sleep:
8644             UNI(OP_SLEEP);
8645
8646         case KEY_socket:
8647             LOP(OP_SOCKET,XTERM);
8648
8649         case KEY_socketpair:
8650             LOP(OP_SOCKPAIR,XTERM);
8651
8652         case KEY_sort:
8653             checkcomma(s,PL_tokenbuf,"subroutine name");
8654             s = SKIPSPACE1(s);
8655             PL_expect = XTERM;
8656             s = force_word(s,WORD,TRUE,TRUE);
8657             LOP(OP_SORT,XREF);
8658
8659         case KEY_split:
8660             LOP(OP_SPLIT,XTERM);
8661
8662         case KEY_sprintf:
8663             LOP(OP_SPRINTF,XTERM);
8664
8665         case KEY_splice:
8666             LOP(OP_SPLICE,XTERM);
8667
8668         case KEY_sqrt:
8669             UNI(OP_SQRT);
8670
8671         case KEY_srand:
8672             UNI(OP_SRAND);
8673
8674         case KEY_stat:
8675             UNI(OP_STAT);
8676
8677         case KEY_study:
8678             UNI(OP_STUDY);
8679
8680         case KEY_substr:
8681             LOP(OP_SUBSTR,XTERM);
8682
8683         case KEY_format:
8684         case KEY_sub:
8685           really_sub:
8686             {
8687                 char * const tmpbuf = PL_tokenbuf + 1;
8688                 expectation attrful;
8689                 bool have_name, have_proto;
8690                 const int key = tmp;
8691 #ifndef PERL_MAD
8692                 SV *format_name = NULL;
8693 #endif
8694
8695 #ifdef PERL_MAD
8696                 SV *tmpwhite = 0;
8697
8698                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8699                 SV *subtoken = PL_madskills
8700                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8701                    : NULL;
8702                 PL_thistoken = 0;
8703
8704                 d = s;
8705                 s = SKIPSPACE2(s,tmpwhite);
8706 #else
8707                 d = s;
8708                 s = skipspace(s);
8709 #endif
8710
8711                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8712                     (*s == ':' && s[1] == ':'))
8713                 {
8714 #ifdef PERL_MAD
8715                     SV *nametoke = NULL;
8716 #endif
8717
8718                     PL_expect = XBLOCK;
8719                     attrful = XATTRBLOCK;
8720                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8721                                   &len);
8722 #ifdef PERL_MAD
8723                     if (PL_madskills)
8724                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8725 #else
8726                     if (key == KEY_format)
8727                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8728 #endif
8729                     *PL_tokenbuf = '&';
8730                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8731                      || pad_findmy_pvn(
8732                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8733                         ) != NOT_IN_PAD)
8734                         sv_setpvn(PL_subname, tmpbuf, len);
8735                     else {
8736                         sv_setsv(PL_subname,PL_curstname);
8737                         sv_catpvs(PL_subname,"::");
8738                         sv_catpvn(PL_subname,tmpbuf,len);
8739                     }
8740                     if (SvUTF8(PL_linestr))
8741                         SvUTF8_on(PL_subname);
8742                     have_name = TRUE;
8743
8744
8745 #ifdef PERL_MAD
8746                     start_force(0);
8747                     CURMAD('X', nametoke);
8748                     CURMAD('_', tmpwhite);
8749                     force_ident_maybe_lex('&');
8750
8751                     s = SKIPSPACE2(d,tmpwhite);
8752 #else
8753                     s = skipspace(d);
8754 #endif
8755                 }
8756                 else {
8757                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8758                     {
8759                         *d = '\0';
8760                         /* diag_listed_as: Missing name in "%s sub" */
8761                         Perl_croak(aTHX_
8762                                   "Missing name in \"%s\"", PL_bufptr);
8763                     }
8764                     PL_expect = XTERMBLOCK;
8765                     attrful = XATTRTERM;
8766                     sv_setpvs(PL_subname,"?");
8767                     have_name = FALSE;
8768                 }
8769
8770                 if (key == KEY_format) {
8771 #ifdef PERL_MAD
8772                     PL_thistoken = subtoken;
8773                     s = d;
8774 #else
8775                     if (format_name) {
8776                         start_force(PL_curforce);
8777                         NEXTVAL_NEXTTOKE.opval
8778                             = (OP*)newSVOP(OP_CONST,0, format_name);
8779                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8780                         force_next(WORD);
8781                     }
8782 #endif
8783                     PREBLOCK(FORMAT);
8784                 }
8785
8786                 /* Look for a prototype */
8787                 if (*s == '(') {
8788                     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8789                     COPLINE_SET_FROM_MULTI_END;
8790                     if (!s)
8791                         Perl_croak(aTHX_ "Prototype not terminated");
8792                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8793                     have_proto = TRUE;
8794
8795 #ifdef PERL_MAD
8796                     start_force(0);
8797                     CURMAD('q', PL_thisopen);
8798                     CURMAD('_', tmpwhite);
8799                     CURMAD('=', PL_thisstuff);
8800                     CURMAD('Q', PL_thisclose);
8801                     NEXTVAL_NEXTTOKE.opval =
8802                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8803                     PL_lex_stuff = NULL;
8804                     force_next(THING);
8805
8806                     s = SKIPSPACE2(s,tmpwhite);
8807 #else
8808                     s = skipspace(s);
8809 #endif
8810                 }
8811                 else
8812                     have_proto = FALSE;
8813
8814                 if (*s == ':' && s[1] != ':')
8815                     PL_expect = attrful;
8816                 else if (*s != '{' && key == KEY_sub) {
8817                     if (!have_name)
8818                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8819                     else if (*s != ';' && *s != '}')
8820                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8821                 }
8822
8823 #ifdef PERL_MAD
8824                 start_force(0);
8825                 if (tmpwhite) {
8826                     if (PL_madskills)
8827                         curmad('^', newSVpvs(""));
8828                     CURMAD('_', tmpwhite);
8829                 }
8830                 force_next(0);
8831
8832                 PL_thistoken = subtoken;
8833                 PERL_UNUSED_VAR(have_proto);
8834 #else
8835                 if (have_proto) {
8836                     NEXTVAL_NEXTTOKE.opval =
8837                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8838                     PL_lex_stuff = NULL;
8839                     force_next(THING);
8840                 }
8841 #endif
8842                 if (!have_name) {
8843                     if (PL_curstash)
8844                         sv_setpvs(PL_subname, "__ANON__");
8845                     else
8846                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8847                     TOKEN(ANONSUB);
8848                 }
8849 #ifndef PERL_MAD
8850                 force_ident_maybe_lex('&');
8851 #endif
8852                 TOKEN(SUB);
8853             }
8854
8855         case KEY_system:
8856             LOP(OP_SYSTEM,XREF);
8857
8858         case KEY_symlink:
8859             LOP(OP_SYMLINK,XTERM);
8860
8861         case KEY_syscall:
8862             LOP(OP_SYSCALL,XTERM);
8863
8864         case KEY_sysopen:
8865             LOP(OP_SYSOPEN,XTERM);
8866
8867         case KEY_sysseek:
8868             LOP(OP_SYSSEEK,XTERM);
8869
8870         case KEY_sysread:
8871             LOP(OP_SYSREAD,XTERM);
8872
8873         case KEY_syswrite:
8874             LOP(OP_SYSWRITE,XTERM);
8875
8876         case KEY_tr:
8877         case KEY_y:
8878             s = scan_trans(s);
8879             TERM(sublex_start());
8880
8881         case KEY_tell:
8882             UNI(OP_TELL);
8883
8884         case KEY_telldir:
8885             UNI(OP_TELLDIR);
8886
8887         case KEY_tie:
8888             LOP(OP_TIE,XTERM);
8889
8890         case KEY_tied:
8891             UNI(OP_TIED);
8892
8893         case KEY_time:
8894             FUN0(OP_TIME);
8895
8896         case KEY_times:
8897             FUN0(OP_TMS);
8898
8899         case KEY_truncate:
8900             LOP(OP_TRUNCATE,XTERM);
8901
8902         case KEY_uc:
8903             UNI(OP_UC);
8904
8905         case KEY_ucfirst:
8906             UNI(OP_UCFIRST);
8907
8908         case KEY_untie:
8909             UNI(OP_UNTIE);
8910
8911         case KEY_until:
8912             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8913                 return REPORT(0);
8914             pl_yylval.ival = CopLINE(PL_curcop);
8915             OPERATOR(UNTIL);
8916
8917         case KEY_unless:
8918             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8919                 return REPORT(0);
8920             pl_yylval.ival = CopLINE(PL_curcop);
8921             OPERATOR(UNLESS);
8922
8923         case KEY_unlink:
8924             LOP(OP_UNLINK,XTERM);
8925
8926         case KEY_undef:
8927             UNIDOR(OP_UNDEF);
8928
8929         case KEY_unpack:
8930             LOP(OP_UNPACK,XTERM);
8931
8932         case KEY_utime:
8933             LOP(OP_UTIME,XTERM);
8934
8935         case KEY_umask:
8936             UNIDOR(OP_UMASK);
8937
8938         case KEY_unshift:
8939             LOP(OP_UNSHIFT,XTERM);
8940
8941         case KEY_use:
8942             s = tokenize_use(1, s);
8943             OPERATOR(USE);
8944
8945         case KEY_values:
8946             UNI(OP_VALUES);
8947
8948         case KEY_vec:
8949             LOP(OP_VEC,XTERM);
8950
8951         case KEY_when:
8952             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8953                 return REPORT(0);
8954             pl_yylval.ival = CopLINE(PL_curcop);
8955             Perl_ck_warner_d(aTHX_
8956                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8957                 "when is experimental");
8958             OPERATOR(WHEN);
8959
8960         case KEY_while:
8961             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8962                 return REPORT(0);
8963             pl_yylval.ival = CopLINE(PL_curcop);
8964             OPERATOR(WHILE);
8965
8966         case KEY_warn:
8967             PL_hints |= HINT_BLOCK_SCOPE;
8968             LOP(OP_WARN,XTERM);
8969
8970         case KEY_wait:
8971             FUN0(OP_WAIT);
8972
8973         case KEY_waitpid:
8974             LOP(OP_WAITPID,XTERM);
8975
8976         case KEY_wantarray:
8977             FUN0(OP_WANTARRAY);
8978
8979         case KEY_write:
8980             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8981              * we use the same number on EBCDIC */
8982             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8983             UNI(OP_ENTERWRITE);
8984
8985         case KEY_x:
8986             if (PL_expect == XOPERATOR) {
8987                 if (*s == '=' && !PL_lex_allbrackets &&
8988                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8989                     return REPORT(0);
8990                 Mop(OP_REPEAT);
8991             }
8992             check_uni();
8993             goto just_a_word;
8994
8995         case KEY_xor:
8996             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8997                 return REPORT(0);
8998             pl_yylval.ival = OP_XOR;
8999             OPERATOR(OROP);
9000         }
9001     }}
9002 }
9003
9004 /*
9005   S_pending_ident
9006
9007   Looks up an identifier in the pad or in a package
9008
9009   Returns:
9010     PRIVATEREF if this is a lexical name.
9011     WORD       if this belongs to a package.
9012
9013   Structure:
9014       if we're in a my declaration
9015           croak if they tried to say my($foo::bar)
9016           build the ops for a my() declaration
9017       if it's an access to a my() variable
9018           build ops for access to a my() variable
9019       if in a dq string, and they've said @foo and we can't find @foo
9020           warn
9021       build ops for a bareword
9022 */
9023
9024 static int
9025 S_pending_ident(pTHX)
9026 {
9027     dVAR;
9028     PADOFFSET tmp = 0;
9029     const char pit = (char)pl_yylval.ival;
9030     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9031     /* All routes through this function want to know if there is a colon.  */
9032     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9033
9034     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9035           "### Pending identifier '%s'\n", PL_tokenbuf); });
9036
9037     /* if we're in a my(), we can't allow dynamics here.
9038        $foo'bar has already been turned into $foo::bar, so
9039        just check for colons.
9040
9041        if it's a legal name, the OP is a PADANY.
9042     */
9043     if (PL_in_my) {
9044         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9045             if (has_colon)
9046                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9047                                   "variable %s in \"our\"",
9048                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9049             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9050         }
9051         else {
9052             if (has_colon) {
9053                 /* PL_no_myglob is constant */
9054                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9055                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9056                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9057                             UTF ? SVf_UTF8 : 0);
9058                 GCC_DIAG_RESTORE;
9059             }
9060
9061             pl_yylval.opval = newOP(OP_PADANY, 0);
9062             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9063                                                         UTF ? SVf_UTF8 : 0);
9064             return PRIVATEREF;
9065         }
9066     }
9067
9068     /*
9069        build the ops for accesses to a my() variable.
9070     */
9071
9072     if (!has_colon) {
9073         if (!PL_in_my)
9074             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9075                                     UTF ? SVf_UTF8 : 0);
9076         if (tmp != NOT_IN_PAD) {
9077             /* might be an "our" variable" */
9078             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9079                 /* build ops for a bareword */
9080                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9081                 HEK * const stashname = HvNAME_HEK(stash);
9082                 SV *  const sym = newSVhek(stashname);
9083                 sv_catpvs(sym, "::");
9084                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9085                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9086                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9087                 if (pit != '&')
9088                   gv_fetchsv(sym,
9089                     (PL_in_eval
9090                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9091                         : GV_ADDMULTI
9092                     ),
9093                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9094                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9095                      : SVt_PVHV));
9096                 return WORD;
9097             }
9098
9099             pl_yylval.opval = newOP(OP_PADANY, 0);
9100             pl_yylval.opval->op_targ = tmp;
9101             return PRIVATEREF;
9102         }
9103     }
9104
9105     /*
9106        Whine if they've said @foo in a doublequoted string,
9107        and @foo isn't a variable we can find in the symbol
9108        table.
9109     */
9110     if (ckWARN(WARN_AMBIGUOUS) &&
9111         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9112         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9113                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9114         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9115                 /* DO NOT warn for @- and @+ */
9116                 && !( PL_tokenbuf[2] == '\0' &&
9117                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9118            )
9119         {
9120             /* Downgraded from fatal to warning 20000522 mjd */
9121             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9122                         "Possible unintended interpolation of %"UTF8f
9123                         " in string",
9124                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9125         }
9126     }
9127
9128     /* build ops for a bareword */
9129     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9130                                    newSVpvn_flags(PL_tokenbuf + 1,
9131                                                       tokenbuf_len - 1,
9132                                                       UTF ? SVf_UTF8 : 0 ));
9133     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9134     if (pit != '&')
9135         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9136                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9137                      | ( UTF ? SVf_UTF8 : 0 ),
9138                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9139                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9140                       : SVt_PVHV));
9141     return WORD;
9142 }
9143
9144 STATIC void
9145 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9146 {
9147     dVAR;
9148
9149     PERL_ARGS_ASSERT_CHECKCOMMA;
9150
9151     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9152         if (ckWARN(WARN_SYNTAX)) {
9153             int level = 1;
9154             const char *w;
9155             for (w = s+2; *w && level; w++) {
9156                 if (*w == '(')
9157                     ++level;
9158                 else if (*w == ')')
9159                     --level;
9160             }
9161             while (isSPACE(*w))
9162                 ++w;
9163             /* the list of chars below is for end of statements or
9164              * block / parens, boolean operators (&&, ||, //) and branch
9165              * constructs (or, and, if, until, unless, while, err, for).
9166              * Not a very solid hack... */
9167             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9168                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9169                             "%s (...) interpreted as function",name);
9170         }
9171     }
9172     while (s < PL_bufend && isSPACE(*s))
9173         s++;
9174     if (*s == '(')
9175         s++;
9176     while (s < PL_bufend && isSPACE(*s))
9177         s++;
9178     if (isIDFIRST_lazy_if(s,UTF)) {
9179         const char * const w = s;
9180         s += UTF ? UTF8SKIP(s) : 1;
9181         while (isWORDCHAR_lazy_if(s,UTF))
9182             s += UTF ? UTF8SKIP(s) : 1;
9183         while (s < PL_bufend && isSPACE(*s))
9184             s++;
9185         if (*s == ',') {
9186             GV* gv;
9187             if (keyword(w, s - w, 0))
9188                 return;
9189
9190             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9191             if (gv && GvCVu(gv))
9192                 return;
9193             Perl_croak(aTHX_ "No comma allowed after %s", what);
9194         }
9195     }
9196 }
9197
9198 /* S_new_constant(): do any overload::constant lookup.
9199
9200    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9201    Best used as sv=new_constant(..., sv, ...).
9202    If s, pv are NULL, calls subroutine with one argument,
9203    and <type> is used with error messages only.
9204    <type> is assumed to be well formed UTF-8 */
9205
9206 STATIC SV *
9207 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9208                SV *sv, SV *pv, const char *type, STRLEN typelen)
9209 {
9210     dVAR; dSP;
9211     HV * table = GvHV(PL_hintgv);                /* ^H */
9212     SV *res;
9213     SV *errsv = NULL;
9214     SV **cvp;
9215     SV *cv, *typesv;
9216     const char *why1 = "", *why2 = "", *why3 = "";
9217
9218     PERL_ARGS_ASSERT_NEW_CONSTANT;
9219     /* We assume that this is true: */
9220     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9221     assert(type || s);
9222
9223     /* charnames doesn't work well if there have been errors found */
9224     if (PL_error_count > 0 && *key == 'c')
9225     {
9226         SvREFCNT_dec_NN(sv);
9227         return &PL_sv_undef;
9228     }
9229
9230     sv_2mortal(sv);                     /* Parent created it permanently */
9231     if (!table
9232         || ! (PL_hints & HINT_LOCALIZE_HH)
9233         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9234         || ! SvOK(*cvp))
9235     {
9236         char *msg;
9237
9238         /* Here haven't found what we're looking for.  If it is charnames,
9239          * perhaps it needs to be loaded.  Try doing that before giving up */
9240         if (*key == 'c') {
9241             Perl_load_module(aTHX_
9242                             0,
9243                             newSVpvs("_charnames"),
9244                              /* version parameter; no need to specify it, as if
9245                               * we get too early a version, will fail anyway,
9246                               * not being able to find '_charnames' */
9247                             NULL,
9248                             newSVpvs(":full"),
9249                             newSVpvs(":short"),
9250                             NULL);
9251             assert(sp == PL_stack_sp);
9252             table = GvHV(PL_hintgv);
9253             if (table
9254                 && (PL_hints & HINT_LOCALIZE_HH)
9255                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9256                 && SvOK(*cvp))
9257             {
9258                 goto now_ok;
9259             }
9260         }
9261         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9262             msg = Perl_form(aTHX_
9263                                "Constant(%.*s) unknown",
9264                                 (int)(type ? typelen : len),
9265                                 (type ? type: s));
9266         }
9267         else {
9268             why1 = "$^H{";
9269             why2 = key;
9270             why3 = "} is not defined";
9271         report:
9272             if (*key == 'c') {
9273                 msg = Perl_form(aTHX_
9274                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9275                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9276                       );
9277             }
9278             else {
9279                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9280                                     (int)(type ? typelen : len),
9281                                     (type ? type: s), why1, why2, why3);
9282             }
9283         }
9284         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9285         return SvREFCNT_inc_simple_NN(sv);
9286     }
9287 now_ok:
9288     cv = *cvp;
9289     if (!pv && s)
9290         pv = newSVpvn_flags(s, len, SVs_TEMP);
9291     if (type && pv)
9292         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9293     else
9294         typesv = &PL_sv_undef;
9295
9296     PUSHSTACKi(PERLSI_OVERLOAD);
9297     ENTER ;
9298     SAVETMPS;
9299
9300     PUSHMARK(SP) ;
9301     EXTEND(sp, 3);
9302     if (pv)
9303         PUSHs(pv);
9304     PUSHs(sv);
9305     if (pv)
9306         PUSHs(typesv);
9307     PUTBACK;
9308     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9309
9310     SPAGAIN ;
9311
9312     /* Check the eval first */
9313     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9314         STRLEN errlen;
9315         const char * errstr;
9316         sv_catpvs(errsv, "Propagated");
9317         errstr = SvPV_const(errsv, errlen);
9318         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9319         (void)POPs;
9320         res = SvREFCNT_inc_simple_NN(sv);
9321     }
9322     else {
9323         res = POPs;
9324         SvREFCNT_inc_simple_void_NN(res);
9325     }
9326
9327     PUTBACK ;
9328     FREETMPS ;
9329     LEAVE ;
9330     POPSTACK;
9331
9332     if (!SvOK(res)) {
9333         why1 = "Call to &{$^H{";
9334         why2 = key;
9335         why3 = "}} did not return a defined value";
9336         sv = res;
9337         (void)sv_2mortal(sv);
9338         goto report;
9339     }
9340
9341     return res;
9342 }
9343
9344 PERL_STATIC_INLINE void
9345 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9346     dVAR;
9347     PERL_ARGS_ASSERT_PARSE_IDENT;
9348
9349     for (;;) {
9350         if (*d >= e)
9351             Perl_croak(aTHX_ "%s", ident_too_long);
9352         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9353              /* The UTF-8 case must come first, otherwise things
9354              * like c\N{COMBINING TILDE} would start failing, as the
9355              * isWORDCHAR_A case below would gobble the 'c' up.
9356              */
9357
9358             char *t = *s + UTF8SKIP(*s);
9359             while (isIDCONT_utf8((U8*)t))
9360                 t += UTF8SKIP(t);
9361             if (*d + (t - *s) > e)
9362                 Perl_croak(aTHX_ "%s", ident_too_long);
9363             Copy(*s, *d, t - *s, char);
9364             *d += t - *s;
9365             *s = t;
9366         }
9367         else if ( isWORDCHAR_A(**s) ) {
9368             do {
9369                 *(*d)++ = *(*s)++;
9370             } while (isWORDCHAR_A(**s) && *d < e);
9371         }
9372         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9373             *(*d)++ = ':';
9374             *(*d)++ = ':';
9375             (*s)++;
9376         }
9377         else if (allow_package && **s == ':' && (*s)[1] == ':'
9378            /* Disallow things like Foo::$bar. For the curious, this is
9379             * the code path that triggers the "Bad name after" warning
9380             * when looking for barewords.
9381             */
9382            && (*s)[2] != '$') {
9383             *(*d)++ = *(*s)++;
9384             *(*d)++ = *(*s)++;
9385         }
9386         else
9387             break;
9388     }
9389     return;
9390 }
9391
9392 /* Returns a NUL terminated string, with the length of the string written to
9393    *slp
9394    */
9395 STATIC char *
9396 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9397 {
9398     dVAR;
9399     char *d = dest;
9400     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9401     bool is_utf8 = cBOOL(UTF);
9402
9403     PERL_ARGS_ASSERT_SCAN_WORD;
9404
9405     parse_ident(&s, &d, e, allow_package, is_utf8);
9406     *d = '\0';
9407     *slp = d - dest;
9408     return s;
9409 }
9410
9411 STATIC char *
9412 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9413 {
9414     dVAR;
9415     I32 herelines = PL_parser->herelines;
9416     SSize_t bracket = -1;
9417     char funny = *s++;
9418     char *d = dest;
9419     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9420     bool is_utf8 = cBOOL(UTF);
9421     I32 orig_copline = 0, tmp_copline = 0;
9422
9423     PERL_ARGS_ASSERT_SCAN_IDENT;
9424
9425     if (isSPACE(*s))
9426         s = PEEKSPACE(s);
9427     if (isDIGIT(*s)) {
9428         while (isDIGIT(*s)) {
9429             if (d >= e)
9430                 Perl_croak(aTHX_ "%s", ident_too_long);
9431             *d++ = *s++;
9432         }
9433     }
9434     else {
9435         parse_ident(&s, &d, e, 1, is_utf8);
9436     }
9437     *d = '\0';
9438     d = dest;
9439     if (*d) {
9440         /* Either a digit variable, or parse_ident() found an identifier
9441            (anything valid as a bareword), so job done and return.  */
9442         if (PL_lex_state != LEX_NORMAL)
9443             PL_lex_state = LEX_INTERPENDMAYBE;
9444         return s;
9445     }
9446     if (*s == '$' && s[1] &&
9447       (isIDFIRST_lazy_if(s+1,is_utf8)
9448          || isDIGIT_A((U8)s[1])
9449          || s[1] == '$'
9450          || s[1] == '{'
9451          || strnEQ(s+1,"::",2)) )
9452     {
9453         /* Dereferencing a value in a scalar variable.
9454            The alternatives are different syntaxes for a scalar variable.
9455            Using ' as a leading package separator isn't allowed. :: is.   */
9456         return s;
9457     }
9458     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9459     if (*s == '{') {
9460         bracket = s - SvPVX(PL_linestr);
9461         s++;
9462         orig_copline = CopLINE(PL_curcop);
9463         if (s < PL_bufend && isSPACE(*s)) {
9464             s = PEEKSPACE(s);
9465         }
9466     }
9467
9468 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9469  * iff Unicode semantics are to be used.  The legal ones are any of:
9470  *  a) ASCII digits
9471  *  b) ASCII punctuation
9472  *  c) When not under Unicode rules, any upper Latin1 character
9473  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9474  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
9475  *     the \s ones. */
9476 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
9477                                    || isDIGIT_A((U8)(d))                    \
9478                                    || (!(u) && !isASCII((U8)(d)))           \
9479                                    || ((((U8)(d)) < 32)                     \
9480                                        && (((((U8)(d)) >= 14)               \
9481                                            || (((U8)(d)) <= 8 && (d) != 0) \
9482                                            || (((U8)(d)) == 13))))          \
9483                                    || (((U8)(d)) == toCTRL('?')))
9484     if (s < PL_bufend
9485         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9486     {
9487         if ( isCNTRL_A((U8)*s) ) {
9488             deprecate("literal control characters in variable names");
9489         }
9490
9491         if (is_utf8) {
9492             const STRLEN skip = UTF8SKIP(s);
9493             STRLEN i;
9494             d[skip] = '\0';
9495             for ( i = 0; i < skip; i++ )
9496                 d[i] = *s++;
9497         }
9498         else {
9499             *d = *s++;
9500             d[1] = '\0';
9501         }
9502     }
9503     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9504     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9505         *d = toCTRL(*s);
9506         s++;
9507     }
9508     /* Warn about ambiguous code after unary operators if {...} notation isn't
9509        used.  There's no difference in ambiguity; it's merely a heuristic
9510        about when not to warn.  */
9511     else if (ck_uni && bracket == -1)
9512         check_uni();
9513     if (bracket != -1) {
9514         /* If we were processing {...} notation then...  */
9515         if (isIDFIRST_lazy_if(d,is_utf8)) {
9516             /* if it starts as a valid identifier, assume that it is one.
9517                (the later check for } being at the expected point will trap
9518                cases where this doesn't pan out.)  */
9519         d += is_utf8 ? UTF8SKIP(d) : 1;
9520         parse_ident(&s, &d, e, 1, is_utf8);
9521             *d = '\0';
9522             tmp_copline = CopLINE(PL_curcop);
9523             if (s < PL_bufend && isSPACE(*s)) {
9524                 s = PEEKSPACE(s);
9525             }
9526             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9527                 /* ${foo[0]} and ${foo{bar}} notation.  */
9528                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9529                     const char * const brack =
9530                         (const char *)
9531                         ((*s == '[') ? "[...]" : "{...}");
9532                     orig_copline = CopLINE(PL_curcop);
9533                     CopLINE_set(PL_curcop, tmp_copline);
9534    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9535                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9536                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9537                         funny, dest, brack, funny, dest, brack);
9538                     CopLINE_set(PL_curcop, orig_copline);
9539                 }
9540                 bracket++;
9541                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9542                 PL_lex_allbrackets++;
9543                 return s;
9544             }
9545         }
9546         /* Handle extended ${^Foo} variables
9547          * 1999-02-27 mjd-perl-patch@plover.com */
9548         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9549                  && isWORDCHAR(*s))
9550         {
9551             d++;
9552             while (isWORDCHAR(*s) && d < e) {
9553                 *d++ = *s++;
9554             }
9555             if (d >= e)
9556                 Perl_croak(aTHX_ "%s", ident_too_long);
9557             *d = '\0';
9558         }
9559
9560         if ( !tmp_copline )
9561             tmp_copline = CopLINE(PL_curcop);
9562         if (s < PL_bufend && isSPACE(*s)) {
9563             s = PEEKSPACE(s);
9564         }
9565
9566         /* Expect to find a closing } after consuming any trailing whitespace.
9567          */
9568         if (*s == '}') {
9569             s++;
9570             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9571                 PL_lex_state = LEX_INTERPEND;
9572                 PL_expect = XREF;
9573             }
9574             if (PL_lex_state == LEX_NORMAL) {
9575                 if (ckWARN(WARN_AMBIGUOUS) &&
9576                     (keyword(dest, d - dest, 0)
9577                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9578                 {
9579                     SV *tmp = newSVpvn_flags( dest, d - dest,
9580                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9581                     if (funny == '#')
9582                         funny = '@';
9583                     orig_copline = CopLINE(PL_curcop);
9584                     CopLINE_set(PL_curcop, tmp_copline);
9585                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9586                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9587                         funny, tmp, funny, tmp);
9588                     CopLINE_set(PL_curcop, orig_copline);
9589                 }
9590             }
9591         }
9592         else {
9593             /* Didn't find the closing } at the point we expected, so restore
9594                state such that the next thing to process is the opening { and */
9595             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9596             CopLINE_set(PL_curcop, orig_copline);
9597             PL_parser->herelines = herelines;
9598             *dest = '\0';
9599         }
9600     }
9601     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9602         PL_lex_state = LEX_INTERPEND;
9603     return s;
9604 }
9605
9606 static bool
9607 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9608
9609     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9610      * the parse starting at 's', based on the subset that are valid in this
9611      * context input to this routine in 'valid_flags'. Advances s.  Returns
9612      * TRUE if the input should be treated as a valid flag, so the next char
9613      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9614      * first call on the current regex.  This routine will set it to any
9615      * charset modifier found.  The caller shouldn't change it.  This way,
9616      * another charset modifier encountered in the parse can be detected as an
9617      * error, as we have decided to allow only one */
9618
9619     const char c = **s;
9620     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9621
9622     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9623         if (isWORDCHAR_lazy_if(*s, UTF)) {
9624             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9625                        UTF ? SVf_UTF8 : 0);
9626             (*s) += charlen;
9627             /* Pretend that it worked, so will continue processing before
9628              * dieing */
9629             return TRUE;
9630         }
9631         return FALSE;
9632     }
9633
9634     switch (c) {
9635
9636         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9637         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9638         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9639         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9640         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9641         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9642         case LOCALE_PAT_MOD:
9643             if (*charset) {
9644                 goto multiple_charsets;
9645             }
9646             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9647             *charset = c;
9648             break;
9649         case UNICODE_PAT_MOD:
9650             if (*charset) {
9651                 goto multiple_charsets;
9652             }
9653             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9654             *charset = c;
9655             break;
9656         case ASCII_RESTRICT_PAT_MOD:
9657             if (! *charset) {
9658                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9659             }
9660             else {
9661
9662                 /* Error if previous modifier wasn't an 'a', but if it was, see
9663                  * if, and accept, a second occurrence (only) */
9664                 if (*charset != 'a'
9665                     || get_regex_charset(*pmfl)
9666                         != REGEX_ASCII_RESTRICTED_CHARSET)
9667                 {
9668                         goto multiple_charsets;
9669                 }
9670                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9671             }
9672             *charset = c;
9673             break;
9674         case DEPENDS_PAT_MOD:
9675             if (*charset) {
9676                 goto multiple_charsets;
9677             }
9678             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9679             *charset = c;
9680             break;
9681     }
9682
9683     (*s)++;
9684     return TRUE;
9685
9686     multiple_charsets:
9687         if (*charset != c) {
9688             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9689         }
9690         else if (c == 'a') {
9691   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9692             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9693         }
9694         else {
9695             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9696         }
9697
9698         /* Pretend that it worked, so will continue processing before dieing */
9699         (*s)++;
9700         return TRUE;
9701 }
9702
9703 STATIC char *
9704 S_scan_pat(pTHX_ char *start, I32 type)
9705 {
9706     dVAR;
9707     PMOP *pm;
9708     char *s;
9709     const char * const valid_flags =
9710         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9711     char charset = '\0';    /* character set modifier */
9712 #ifdef PERL_MAD
9713     char *modstart;
9714 #endif
9715
9716     PERL_ARGS_ASSERT_SCAN_PAT;
9717
9718     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9719                        TRUE /* look for escaped bracketed metas */, NULL);
9720
9721     if (!s) {
9722         const char * const delimiter = skipspace(start);
9723         Perl_croak(aTHX_
9724                    (const char *)
9725                    (*delimiter == '?'
9726                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9727                     : "Search pattern not terminated" ));
9728     }
9729
9730     pm = (PMOP*)newPMOP(type, 0);
9731     if (PL_multi_open == '?') {
9732         /* This is the only point in the code that sets PMf_ONCE:  */
9733         pm->op_pmflags |= PMf_ONCE;
9734
9735         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9736            allows us to restrict the list needed by reset to just the ??
9737            matches.  */
9738         assert(type != OP_TRANS);
9739         if (PL_curstash) {
9740             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9741             U32 elements;
9742             if (!mg) {
9743                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9744                                  0);
9745             }
9746             elements = mg->mg_len / sizeof(PMOP**);
9747             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9748             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9749             mg->mg_len = elements * sizeof(PMOP**);
9750             PmopSTASH_set(pm,PL_curstash);
9751         }
9752     }
9753 #ifdef PERL_MAD
9754     modstart = s;
9755 #endif
9756
9757     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9758      * anon CV. False positives like qr/[(?{]/ are harmless */
9759
9760     if (type == OP_QR) {
9761         STRLEN len;
9762         char *e, *p = SvPV(PL_lex_stuff, len);
9763         e = p + len;
9764         for (; p < e; p++) {
9765             if (p[0] == '(' && p[1] == '?'
9766                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9767             {
9768                 pm->op_pmflags |= PMf_HAS_CV;
9769                 break;
9770             }
9771         }
9772         pm->op_pmflags |= PMf_IS_QR;
9773     }
9774
9775     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9776 #ifdef PERL_MAD
9777     if (PL_madskills && modstart != s) {
9778         SV* tmptoken = newSVpvn(modstart, s - modstart);
9779         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9780     }
9781 #endif
9782     /* issue a warning if /c is specified,but /g is not */
9783     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9784     {
9785         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9786                        "Use of /c modifier is meaningless without /g" );
9787     }
9788
9789     PL_lex_op = (OP*)pm;
9790     pl_yylval.ival = OP_MATCH;
9791     return s;
9792 }
9793
9794 STATIC char *
9795 S_scan_subst(pTHX_ char *start)
9796 {
9797     dVAR;
9798     char *s;
9799     PMOP *pm;
9800     I32 first_start;
9801     line_t first_line;
9802     I32 es = 0;
9803     char charset = '\0';    /* character set modifier */
9804 #ifdef PERL_MAD
9805     char *modstart;
9806 #endif
9807     char *t;
9808
9809     PERL_ARGS_ASSERT_SCAN_SUBST;
9810
9811     pl_yylval.ival = OP_NULL;
9812
9813     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9814                  TRUE /* look for escaped bracketed metas */, &t);
9815
9816     if (!s)
9817         Perl_croak(aTHX_ "Substitution pattern not terminated");
9818
9819     s = t;
9820 #ifdef PERL_MAD
9821     if (PL_madskills) {
9822         CURMAD('q', PL_thisopen);
9823         CURMAD('_', PL_thiswhite);
9824         CURMAD('E', PL_thisstuff);
9825         CURMAD('Q', PL_thisclose);
9826         PL_realtokenstart = s - SvPVX(PL_linestr);
9827     }
9828 #endif
9829
9830     first_start = PL_multi_start;
9831     first_line = CopLINE(PL_curcop);
9832     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9833     if (!s) {
9834         if (PL_lex_stuff) {
9835             SvREFCNT_dec(PL_lex_stuff);
9836             PL_lex_stuff = NULL;
9837         }
9838         Perl_croak(aTHX_ "Substitution replacement not terminated");
9839     }
9840     PL_multi_start = first_start;       /* so whole substitution is taken together */
9841
9842     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9843
9844 #ifdef PERL_MAD
9845     if (PL_madskills) {
9846         CURMAD('z', PL_thisopen);
9847         CURMAD('R', PL_thisstuff);
9848         CURMAD('Z', PL_thisclose);
9849     }
9850     modstart = s;
9851 #endif
9852
9853     while (*s) {
9854         if (*s == EXEC_PAT_MOD) {
9855             s++;
9856             es++;
9857         }
9858         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9859         {
9860             break;
9861         }
9862     }
9863
9864 #ifdef PERL_MAD
9865     if (PL_madskills) {
9866         if (modstart != s)
9867             curmad('m', newSVpvn(modstart, s - modstart));
9868         append_madprops(PL_thismad, (OP*)pm, 0);
9869         PL_thismad = 0;
9870     }
9871 #endif
9872     if ((pm->op_pmflags & PMf_CONTINUE)) {
9873         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9874     }
9875
9876     if (es) {
9877         SV * const repl = newSVpvs("");
9878
9879         PL_multi_end = 0;
9880         pm->op_pmflags |= PMf_EVAL;
9881         while (es-- > 0) {
9882             if (es)
9883                 sv_catpvs(repl, "eval ");
9884             else
9885                 sv_catpvs(repl, "do ");
9886         }
9887         sv_catpvs(repl, "{");
9888         sv_catsv(repl, PL_sublex_info.repl);
9889         sv_catpvs(repl, "}");
9890         SvEVALED_on(repl);
9891         SvREFCNT_dec(PL_sublex_info.repl);
9892         PL_sublex_info.repl = repl;
9893     }
9894     if (CopLINE(PL_curcop) != first_line) {
9895         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9896         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9897             CopLINE(PL_curcop) - first_line;
9898         CopLINE_set(PL_curcop, first_line);
9899     }
9900
9901     PL_lex_op = (OP*)pm;
9902     pl_yylval.ival = OP_SUBST;
9903     return s;
9904 }
9905
9906 STATIC char *
9907 S_scan_trans(pTHX_ char *start)
9908 {
9909     dVAR;
9910     char* s;
9911     OP *o;
9912     U8 squash;
9913     U8 del;
9914     U8 complement;
9915     bool nondestruct = 0;
9916 #ifdef PERL_MAD
9917     char *modstart;
9918 #endif
9919     char *t;
9920
9921     PERL_ARGS_ASSERT_SCAN_TRANS;
9922
9923     pl_yylval.ival = OP_NULL;
9924
9925     s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9926     if (!s)
9927         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9928
9929     s = t;
9930 #ifdef PERL_MAD
9931     if (PL_madskills) {
9932         CURMAD('q', PL_thisopen);
9933         CURMAD('_', PL_thiswhite);
9934         CURMAD('E', PL_thisstuff);
9935         CURMAD('Q', PL_thisclose);
9936         PL_realtokenstart = s - SvPVX(PL_linestr);
9937     }
9938 #endif
9939
9940     s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9941     if (!s) {
9942         if (PL_lex_stuff) {
9943             SvREFCNT_dec(PL_lex_stuff);
9944             PL_lex_stuff = NULL;
9945         }
9946         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9947     }
9948     if (PL_madskills) {
9949         CURMAD('z', PL_thisopen);
9950         CURMAD('R', PL_thisstuff);
9951         CURMAD('Z', PL_thisclose);
9952     }
9953
9954     complement = del = squash = 0;
9955 #ifdef PERL_MAD
9956     modstart = s;
9957 #endif
9958     while (1) {
9959         switch (*s) {
9960         case 'c':
9961             complement = OPpTRANS_COMPLEMENT;
9962             break;
9963         case 'd':
9964             del = OPpTRANS_DELETE;
9965             break;
9966         case 's':
9967             squash = OPpTRANS_SQUASH;
9968             break;
9969         case 'r':
9970             nondestruct = 1;
9971             break;
9972         default:
9973             goto no_more;
9974         }
9975         s++;
9976     }
9977   no_more:
9978
9979     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9980     o->op_private &= ~OPpTRANS_ALL;
9981     o->op_private |= del|squash|complement|
9982       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9983       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9984
9985     PL_lex_op = o;
9986     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9987
9988 #ifdef PERL_MAD
9989     if (PL_madskills) {
9990         if (modstart != s)
9991             curmad('m', newSVpvn(modstart, s - modstart));
9992         append_madprops(PL_thismad, o, 0);
9993         PL_thismad = 0;
9994     }
9995 #endif
9996
9997     return s;
9998 }
9999
10000 /* scan_heredoc
10001    Takes a pointer to the first < in <<FOO.
10002    Returns a pointer to the byte following <<FOO.
10003
10004    This function scans a heredoc, which involves different methods
10005    depending on whether we are in a string eval, quoted construct, etc.
10006    This is because PL_linestr could containing a single line of input, or
10007    a whole string being evalled, or the contents of the current quote-
10008    like operator.
10009
10010    The two basic methods are:
10011     - Steal lines from the input stream
10012     - Scan the heredoc in PL_linestr and remove it therefrom
10013
10014    In a file scope or filtered eval, the first method is used; in a
10015    string eval, the second.
10016
10017    In a quote-like operator, we have to choose between the two,
10018    depending on where we can find a newline.  We peek into outer lex-
10019    ing scopes until we find one with a newline in it.  If we reach the
10020    outermost lexing scope and it is a file, we use the stream method.
10021    Otherwise it is treated as an eval.
10022 */
10023
10024 STATIC char *
10025 S_scan_heredoc(pTHX_ char *s)
10026 {
10027     dVAR;
10028     I32 op_type = OP_SCALAR;
10029     I32 len;
10030     SV *tmpstr;
10031     char term;
10032     char *d;
10033     char *e;
10034     char *peek;
10035     const bool infile = PL_rsfp || PL_parser->filtered;
10036     const line_t origline = CopLINE(PL_curcop);
10037     LEXSHARED *shared = PL_parser->lex_shared;
10038 #ifdef PERL_MAD
10039     I32 stuffstart = s - SvPVX(PL_linestr);
10040     char *tstart;
10041
10042     PL_realtokenstart = -1;
10043 #endif
10044
10045     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10046
10047     s += 2;
10048     d = PL_tokenbuf + 1;
10049     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10050     *PL_tokenbuf = '\n';
10051     peek = s;
10052     while (SPACE_OR_TAB(*peek))
10053         peek++;
10054     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10055         s = peek;
10056         term = *s++;
10057         s = delimcpy(d, e, s, PL_bufend, term, &len);
10058         if (s == PL_bufend)
10059             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10060         d += len;
10061         s++;
10062     }
10063     else {
10064         if (*s == '\\')
10065             /* <<\FOO is equivalent to <<'FOO' */
10066             s++, term = '\'';
10067         else
10068             term = '"';
10069         if (!isWORDCHAR_lazy_if(s,UTF))
10070             deprecate("bare << to mean <<\"\"");
10071         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10072             if (d < e)
10073                 *d++ = *s;
10074         }
10075     }
10076     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10077         Perl_croak(aTHX_ "Delimiter for here document is too long");
10078     *d++ = '\n';
10079     *d = '\0';
10080     len = d - PL_tokenbuf;
10081
10082 #ifdef PERL_MAD
10083     if (PL_madskills) {
10084         tstart = PL_tokenbuf + 1;
10085         PL_thisclose = newSVpvn(tstart, len - 1);
10086         tstart = SvPVX(PL_linestr) + stuffstart;
10087         PL_thisopen = newSVpvn(tstart, s - tstart);
10088         stuffstart = s - SvPVX(PL_linestr);
10089     }
10090 #endif
10091 #ifndef PERL_STRICT_CR
10092     d = strchr(s, '\r');
10093     if (d) {
10094         char * const olds = s;
10095         s = d;
10096         while (s < PL_bufend) {
10097             if (*s == '\r') {
10098                 *d++ = '\n';
10099                 if (*++s == '\n')
10100                     s++;
10101             }
10102             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10103                 *d++ = *s++;
10104                 s++;
10105             }
10106             else
10107                 *d++ = *s++;
10108         }
10109         *d = '\0';
10110         PL_bufend = d;
10111         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10112         s = olds;
10113     }
10114 #endif
10115 #ifdef PERL_MAD
10116     if (PL_madskills) {
10117         tstart = SvPVX(PL_linestr) + stuffstart;
10118         if (PL_thisstuff)
10119             sv_catpvn(PL_thisstuff, tstart, s - tstart);
10120         else
10121             PL_thisstuff = newSVpvn(tstart, s - tstart);
10122     }
10123
10124     stuffstart = s - SvPVX(PL_linestr);
10125 #endif
10126
10127     tmpstr = newSV_type(SVt_PVIV);
10128     SvGROW(tmpstr, 80);
10129     if (term == '\'') {
10130         op_type = OP_CONST;
10131         SvIV_set(tmpstr, -1);
10132     }
10133     else if (term == '`') {
10134         op_type = OP_BACKTICK;
10135         SvIV_set(tmpstr, '\\');
10136     }
10137
10138     PL_multi_start = origline + 1 + PL_parser->herelines;
10139     PL_multi_open = PL_multi_close = '<';
10140     /* inside a string eval or quote-like operator */
10141     if (!infile || PL_lex_inwhat) {
10142         SV *linestr;
10143         char *bufend;
10144         char * const olds = s;
10145         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10146         /* These two fields are not set until an inner lexing scope is
10147            entered.  But we need them set here. */
10148         shared->ls_bufptr  = s;
10149         shared->ls_linestr = PL_linestr;
10150         if (PL_lex_inwhat)
10151           /* Look for a newline.  If the current buffer does not have one,
10152              peek into the line buffer of the parent lexing scope, going
10153              up as many levels as necessary to find one with a newline
10154              after bufptr.
10155            */
10156           while (!(s = (char *)memchr(
10157                     (void *)shared->ls_bufptr, '\n',
10158                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10159                 ))) {
10160             shared = shared->ls_prev;
10161             /* shared is only null if we have gone beyond the outermost
10162                lexing scope.  In a file, we will have broken out of the
10163                loop in the previous iteration.  In an eval, the string buf-
10164                fer ends with "\n;", so the while condition above will have
10165                evaluated to false.  So shared can never be null. */
10166             assert(shared);
10167             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10168                most lexing scope.  In a file, shared->ls_linestr at that
10169                level is just one line, so there is no body to steal. */
10170             if (infile && !shared->ls_prev) {
10171                 s = olds;
10172                 goto streaming;
10173             }
10174           }
10175         else {  /* eval */
10176             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10177             assert(s);
10178         }
10179         linestr = shared->ls_linestr;
10180         bufend = SvEND(linestr);
10181         d = s;
10182         while (s < bufend - len + 1 &&
10183           memNE(s,PL_tokenbuf,len) ) {
10184             if (*s++ == '\n')
10185                 ++PL_parser->herelines;
10186         }
10187         if (s >= bufend - len + 1) {
10188             goto interminable;
10189         }
10190         sv_setpvn(tmpstr,d+1,s-d);
10191 #ifdef PERL_MAD
10192         if (PL_madskills) {
10193             if (PL_thisstuff)
10194                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10195             else
10196                 PL_thisstuff = newSVpvn(d + 1, s - d);
10197             stuffstart = s - SvPVX(PL_linestr);
10198         }
10199 #endif
10200         s += len - 1;
10201         /* the preceding stmt passes a newline */
10202         PL_parser->herelines++;
10203
10204         /* s now points to the newline after the heredoc terminator.
10205            d points to the newline before the body of the heredoc.
10206          */
10207
10208         /* We are going to modify linestr in place here, so set
10209            aside copies of the string if necessary for re-evals or
10210            (caller $n)[6]. */
10211         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10212            check shared->re_eval_str. */
10213         if (shared->re_eval_start || shared->re_eval_str) {
10214             /* Set aside the rest of the regexp */
10215             if (!shared->re_eval_str)
10216                 shared->re_eval_str =
10217                        newSVpvn(shared->re_eval_start,
10218                                 bufend - shared->re_eval_start);
10219             shared->re_eval_start -= s-d;
10220         }
10221         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10222             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10223             cx->blk_eval.cur_text == linestr)
10224         {
10225             cx->blk_eval.cur_text = newSVsv(linestr);
10226             SvSCREAM_on(cx->blk_eval.cur_text);
10227         }
10228         /* Copy everything from s onwards back to d. */
10229         Move(s,d,bufend-s + 1,char);
10230         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10231         /* Setting PL_bufend only applies when we have not dug deeper
10232            into other scopes, because sublex_done sets PL_bufend to
10233            SvEND(PL_linestr). */
10234         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10235         s = olds;
10236     }
10237     else
10238     {
10239       SV *linestr_save;
10240      streaming:
10241       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10242       term = PL_tokenbuf[1];
10243       len--;
10244       linestr_save = PL_linestr; /* must restore this afterwards */
10245       d = s;                     /* and this */
10246       PL_linestr = newSVpvs("");
10247       PL_bufend = SvPVX(PL_linestr);
10248       while (1) {
10249 #ifdef PERL_MAD
10250         if (PL_madskills) {
10251             tstart = SvPVX(PL_linestr) + stuffstart;
10252             if (PL_thisstuff)
10253                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10254             else
10255                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10256         }
10257 #endif
10258         PL_bufptr = PL_bufend;
10259         CopLINE_set(PL_curcop,
10260                     origline + 1 + PL_parser->herelines);
10261         if (!lex_next_chunk(LEX_NO_TERM)
10262          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10263             SvREFCNT_dec(linestr_save);
10264             goto interminable;
10265         }
10266         CopLINE_set(PL_curcop, origline);
10267         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10268             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10269             /* ^That should be enough to avoid this needing to grow:  */
10270             sv_catpvs(PL_linestr, "\n\0");
10271             assert(s == SvPVX(PL_linestr));
10272             PL_bufend = SvEND(PL_linestr);
10273         }
10274         s = PL_bufptr;
10275 #ifdef PERL_MAD
10276         stuffstart = s - SvPVX(PL_linestr);
10277 #endif
10278         PL_parser->herelines++;
10279         PL_last_lop = PL_last_uni = NULL;
10280 #ifndef PERL_STRICT_CR
10281         if (PL_bufend - PL_linestart >= 2) {
10282             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10283                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10284             {
10285                 PL_bufend[-2] = '\n';
10286                 PL_bufend--;
10287                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10288             }
10289             else if (PL_bufend[-1] == '\r')
10290                 PL_bufend[-1] = '\n';
10291         }
10292         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10293             PL_bufend[-1] = '\n';
10294 #endif
10295         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10296             SvREFCNT_dec(PL_linestr);
10297             PL_linestr = linestr_save;
10298             PL_linestart = SvPVX(linestr_save);
10299             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10300             s = d;
10301             break;
10302         }
10303         else {
10304             sv_catsv(tmpstr,PL_linestr);
10305         }
10306       }
10307     }
10308     PL_multi_end = origline + PL_parser->herelines;
10309     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10310         SvPV_shrink_to_cur(tmpstr);
10311     }
10312     if (!IN_BYTES) {
10313         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10314             SvUTF8_on(tmpstr);
10315         else if (PL_encoding)
10316             sv_recode_to_utf8(tmpstr, PL_encoding);
10317     }
10318     PL_lex_stuff = tmpstr;
10319     pl_yylval.ival = op_type;
10320     return s;
10321
10322   interminable:
10323     SvREFCNT_dec(tmpstr);
10324     CopLINE_set(PL_curcop, origline);
10325     missingterm(PL_tokenbuf + 1);
10326 }
10327
10328 /* scan_inputsymbol
10329    takes: current position in input buffer
10330    returns: new position in input buffer
10331    side-effects: pl_yylval and lex_op are set.
10332
10333    This code handles:
10334
10335    <>           read from ARGV
10336    <FH>         read from filehandle
10337    <pkg::FH>    read from package qualified filehandle
10338    <pkg'FH>     read from package qualified filehandle
10339    <$fh>        read from filehandle in $fh
10340    <*.h>        filename glob
10341
10342 */
10343
10344 STATIC char *
10345 S_scan_inputsymbol(pTHX_ char *start)
10346 {
10347     dVAR;
10348     char *s = start;            /* current position in buffer */
10349     char *end;
10350     I32 len;
10351     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10352     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10353
10354     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10355
10356     end = strchr(s, '\n');
10357     if (!end)
10358         end = PL_bufend;
10359     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10360
10361     /* die if we didn't have space for the contents of the <>,
10362        or if it didn't end, or if we see a newline
10363     */
10364
10365     if (len >= (I32)sizeof PL_tokenbuf)
10366         Perl_croak(aTHX_ "Excessively long <> operator");
10367     if (s >= end)
10368         Perl_croak(aTHX_ "Unterminated <> operator");
10369
10370     s++;
10371
10372     /* check for <$fh>
10373        Remember, only scalar variables are interpreted as filehandles by
10374        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10375        treated as a glob() call.
10376        This code makes use of the fact that except for the $ at the front,
10377        a scalar variable and a filehandle look the same.
10378     */
10379     if (*d == '$' && d[1]) d++;
10380
10381     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10382     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10383         d += UTF ? UTF8SKIP(d) : 1;
10384
10385     /* If we've tried to read what we allow filehandles to look like, and
10386        there's still text left, then it must be a glob() and not a getline.
10387        Use scan_str to pull out the stuff between the <> and treat it
10388        as nothing more than a string.
10389     */
10390
10391     if (d - PL_tokenbuf != len) {
10392         pl_yylval.ival = OP_GLOB;
10393         s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10394         if (!s)
10395            Perl_croak(aTHX_ "Glob not terminated");
10396         return s;
10397     }
10398     else {
10399         bool readline_overriden = FALSE;
10400         GV *gv_readline;
10401         /* we're in a filehandle read situation */
10402         d = PL_tokenbuf;
10403
10404         /* turn <> into <ARGV> */
10405         if (!len)
10406             Copy("ARGV",d,5,char);
10407
10408         /* Check whether readline() is overriden */
10409         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10410         if ((gv_readline = gv_override("readline",8)))
10411             readline_overriden = TRUE;
10412
10413         /* if <$fh>, create the ops to turn the variable into a
10414            filehandle
10415         */
10416         if (*d == '$') {
10417             /* try to find it in the pad for this block, otherwise find
10418                add symbol table ops
10419             */
10420             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10421             if (tmp != NOT_IN_PAD) {
10422                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10423                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10424                     HEK * const stashname = HvNAME_HEK(stash);
10425                     SV * const sym = sv_2mortal(newSVhek(stashname));
10426                     sv_catpvs(sym, "::");
10427                     sv_catpv(sym, d+1);
10428                     d = SvPVX(sym);
10429                     goto intro_sym;
10430                 }
10431                 else {
10432                     OP * const o = newOP(OP_PADSV, 0);
10433                     o->op_targ = tmp;
10434                     PL_lex_op = readline_overriden
10435                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10436                                 op_append_elem(OP_LIST, o,
10437                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10438                         : (OP*)newUNOP(OP_READLINE, 0, o);
10439                 }
10440             }
10441             else {
10442                 GV *gv;
10443                 ++d;
10444 intro_sym:
10445                 gv = gv_fetchpv(d,
10446                                 (PL_in_eval
10447                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10448                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10449                                 SVt_PV);
10450                 PL_lex_op = readline_overriden
10451                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10452                             op_append_elem(OP_LIST,
10453                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10454                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10455                     : (OP*)newUNOP(OP_READLINE, 0,
10456                             newUNOP(OP_RV2SV, 0,
10457                                 newGVOP(OP_GV, 0, gv)));
10458             }
10459             if (!readline_overriden)
10460                 PL_lex_op->op_flags |= OPf_SPECIAL;
10461             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10462             pl_yylval.ival = OP_NULL;
10463         }
10464
10465         /* If it's none of the above, it must be a literal filehandle
10466            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10467         else {
10468             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10469             PL_lex_op = readline_overriden
10470                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10471                         op_append_elem(OP_LIST,
10472                             newGVOP(OP_GV, 0, gv),
10473                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10474                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10475             pl_yylval.ival = OP_NULL;
10476         }
10477     }
10478
10479     return s;
10480 }
10481
10482
10483 /* scan_str
10484    takes:
10485         start                   position in buffer
10486         keep_quoted             preserve \ on the embedded delimiter(s)
10487         keep_delims             preserve the delimiters around the string
10488         re_reparse              compiling a run-time /(?{})/:
10489                                    collapse // to /,  and skip encoding src
10490         deprecate_escaped_meta  issue a deprecation warning for cer-
10491                                 tain paired metacharacters that appear
10492                                 escaped within it
10493         delimp                  if non-null, this is set to the position of
10494                                 the closing delimiter, or just after it if
10495                                 the closing and opening delimiters differ
10496                                 (i.e., the opening delimiter of a substitu-
10497                                 tion replacement)
10498    returns: position to continue reading from buffer
10499    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10500         updates the read buffer.
10501
10502    This subroutine pulls a string out of the input.  It is called for:
10503         q               single quotes           q(literal text)
10504         '               single quotes           'literal text'
10505         qq              double quotes           qq(interpolate $here please)
10506         "               double quotes           "interpolate $here please"
10507         qx              backticks               qx(/bin/ls -l)
10508         `               backticks               `/bin/ls -l`
10509         qw              quote words             @EXPORT_OK = qw( func() $spam )
10510         m//             regexp match            m/this/
10511         s///            regexp substitute       s/this/that/
10512         tr///           string transliterate    tr/this/that/
10513         y///            string transliterate    y/this/that/
10514         ($*@)           sub prototypes          sub foo ($)
10515         (stuff)         sub attr parameters     sub foo : attr(stuff)
10516         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10517
10518    In most of these cases (all but <>, patterns and transliterate)
10519    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10520    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10521    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10522    calls scan_str().
10523
10524    It skips whitespace before the string starts, and treats the first
10525    character as the delimiter.  If the delimiter is one of ([{< then
10526    the corresponding "close" character )]}> is used as the closing
10527    delimiter.  It allows quoting of delimiters, and if the string has
10528    balanced delimiters ([{<>}]) it allows nesting.
10529
10530    On success, the SV with the resulting string is put into lex_stuff or,
10531    if that is already non-NULL, into lex_repl. The second case occurs only
10532    when parsing the RHS of the special constructs s/// and tr/// (y///).
10533    For convenience, the terminating delimiter character is stuffed into
10534    SvIVX of the SV.
10535 */
10536
10537 STATIC char *
10538 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10539                  bool deprecate_escaped_meta, char **delimp
10540     )
10541 {
10542     dVAR;
10543     SV *sv;                     /* scalar value: string */
10544     const char *tmps;           /* temp string, used for delimiter matching */
10545     char *s = start;            /* current position in the buffer */
10546     char term;                  /* terminating character */
10547     char *to;                   /* current position in the sv's data */
10548     I32 brackets = 1;           /* bracket nesting level */
10549     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10550     I32 termcode;               /* terminating char. code */
10551     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10552     STRLEN termlen;             /* length of terminating string */
10553     int last_off = 0;           /* last position for nesting bracket */
10554     char *escaped_open = NULL;
10555     line_t herelines;
10556 #ifdef PERL_MAD
10557     int stuffstart;
10558     char *tstart;
10559 #endif
10560
10561     PERL_ARGS_ASSERT_SCAN_STR;
10562
10563     /* skip space before the delimiter */
10564     if (isSPACE(*s)) {
10565         s = PEEKSPACE(s);
10566     }
10567
10568 #ifdef PERL_MAD
10569     if (PL_realtokenstart >= 0) {
10570         stuffstart = PL_realtokenstart;
10571         PL_realtokenstart = -1;
10572     }
10573     else
10574         stuffstart = start - SvPVX(PL_linestr);
10575 #endif
10576     /* mark where we are, in case we need to report errors */
10577     CLINE;
10578
10579     /* after skipping whitespace, the next character is the terminator */
10580     term = *s;
10581     if (!UTF) {
10582         termcode = termstr[0] = term;
10583         termlen = 1;
10584     }
10585     else {
10586         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10587         Copy(s, termstr, termlen, U8);
10588         if (!UTF8_IS_INVARIANT(term))
10589             has_utf8 = TRUE;
10590     }
10591
10592     /* mark where we are */
10593     PL_multi_start = CopLINE(PL_curcop);
10594     PL_multi_open = term;
10595     herelines = PL_parser->herelines;
10596
10597     /* find corresponding closing delimiter */
10598     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10599         termcode = termstr[0] = term = tmps[5];
10600
10601     PL_multi_close = term;
10602
10603     /* A warning is raised if the input parameter requires it for escaped (by a
10604      * backslash) paired metacharacters {} [] and () when the delimiters are
10605      * those same characters, and the backslash is ineffective.  This doesn't
10606      * happen for <>, as they aren't metas. */
10607     if (deprecate_escaped_meta
10608         && (PL_multi_open == PL_multi_close
10609             || PL_multi_open == '<'
10610             || ! ckWARN_d(WARN_DEPRECATED)))
10611     {
10612         deprecate_escaped_meta = FALSE;
10613     }
10614
10615     /* create a new SV to hold the contents.  79 is the SV's initial length.
10616        What a random number. */
10617     sv = newSV_type(SVt_PVIV);
10618     SvGROW(sv, 80);
10619     SvIV_set(sv, termcode);
10620     (void)SvPOK_only(sv);               /* validate pointer */
10621
10622     /* move past delimiter and try to read a complete string */
10623     if (keep_delims)
10624         sv_catpvn(sv, s, termlen);
10625     s += termlen;
10626 #ifdef PERL_MAD
10627     tstart = SvPVX(PL_linestr) + stuffstart;
10628     if (PL_madskills && !PL_thisopen && !keep_delims) {
10629         PL_thisopen = newSVpvn(tstart, s - tstart);
10630         stuffstart = s - SvPVX(PL_linestr);
10631     }
10632 #endif
10633     for (;;) {
10634         if (PL_encoding && !UTF && !re_reparse) {
10635             bool cont = TRUE;
10636
10637             while (cont) {
10638                 int offset = s - SvPVX_const(PL_linestr);
10639                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10640                                            &offset, (char*)termstr, termlen);
10641                 const char *ns;
10642                 char *svlast;
10643
10644                 if (SvIsCOW(PL_linestr)) {
10645                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10646                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10647                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10648                     char *buf = SvPVX(PL_linestr);
10649                     bufend_pos = PL_parser->bufend - buf;
10650                     bufptr_pos = PL_parser->bufptr - buf;
10651                     oldbufptr_pos = PL_parser->oldbufptr - buf;
10652                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10653                     linestart_pos = PL_parser->linestart - buf;
10654                     last_uni_pos = PL_parser->last_uni
10655                         ? PL_parser->last_uni - buf
10656                         : 0;
10657                     last_lop_pos = PL_parser->last_lop
10658                         ? PL_parser->last_lop - buf
10659                         : 0;
10660                     re_eval_start_pos =
10661                         PL_parser->lex_shared->re_eval_start ?
10662                             PL_parser->lex_shared->re_eval_start - buf : 0;
10663                     s_pos = s - buf;
10664
10665                     sv_force_normal(PL_linestr);
10666
10667                     buf = SvPVX(PL_linestr);
10668                     PL_parser->bufend = buf + bufend_pos;
10669                     PL_parser->bufptr = buf + bufptr_pos;
10670                     PL_parser->oldbufptr = buf + oldbufptr_pos;
10671                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10672                     PL_parser->linestart = buf + linestart_pos;
10673                     if (PL_parser->last_uni)
10674                         PL_parser->last_uni = buf + last_uni_pos;
10675                     if (PL_parser->last_lop)
10676                         PL_parser->last_lop = buf + last_lop_pos;
10677                     if (PL_parser->lex_shared->re_eval_start)
10678                         PL_parser->lex_shared->re_eval_start  =
10679                             buf + re_eval_start_pos;
10680                     s = buf + s_pos;
10681                 }
10682                 ns = SvPVX_const(PL_linestr) + offset;
10683                 svlast = SvEND(sv) - 1;
10684
10685                 for (; s < ns; s++) {
10686                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10687                         COPLINE_INC_WITH_HERELINES;
10688                 }
10689                 if (!found)
10690                     goto read_more_line;
10691                 else {
10692                     /* handle quoted delimiters */
10693                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10694                         const char *t;
10695                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10696                             t--;
10697                         if ((svlast-1 - t) % 2) {
10698                             if (!keep_quoted) {
10699                                 *(svlast-1) = term;
10700                                 *svlast = '\0';
10701                                 SvCUR_set(sv, SvCUR(sv) - 1);
10702                             }
10703                             continue;
10704                         }
10705                     }
10706                     if (PL_multi_open == PL_multi_close) {
10707                         cont = FALSE;
10708                     }
10709                     else {
10710                         const char *t;
10711                         char *w;
10712                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10713                             /* At here, all closes are "was quoted" one,
10714                                so we don't check PL_multi_close. */
10715                             if (*t == '\\') {
10716                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10717                                     t++;
10718                                 else
10719                                     *w++ = *t++;
10720                             }
10721                             else if (*t == PL_multi_open)
10722                                 brackets++;
10723
10724                             *w = *t;
10725                         }
10726                         if (w < t) {
10727                             *w++ = term;
10728                             *w = '\0';
10729                             SvCUR_set(sv, w - SvPVX_const(sv));
10730                         }
10731                         last_off = w - SvPVX(sv);
10732                         if (--brackets <= 0)
10733                             cont = FALSE;
10734                     }
10735                 }
10736             }
10737             if (!keep_delims) {
10738                 SvCUR_set(sv, SvCUR(sv) - 1);
10739                 *SvEND(sv) = '\0';
10740             }
10741             break;
10742         }
10743
10744         /* extend sv if need be */
10745         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10746         /* set 'to' to the next character in the sv's string */
10747         to = SvPVX(sv)+SvCUR(sv);
10748
10749         /* if open delimiter is the close delimiter read unbridle */
10750         if (PL_multi_open == PL_multi_close) {
10751             for (; s < PL_bufend; s++,to++) {
10752                 /* embedded newlines increment the current line number */
10753                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10754                     COPLINE_INC_WITH_HERELINES;
10755                 /* handle quoted delimiters */
10756                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10757                     if (!keep_quoted
10758                         && (s[1] == term
10759                             || (re_reparse && s[1] == '\\'))
10760                     )
10761                         s++;
10762                     /* any other quotes are simply copied straight through */
10763                     else
10764                         *to++ = *s++;
10765                 }
10766                 /* terminate when run out of buffer (the for() condition), or
10767                    have found the terminator */
10768                 else if (*s == term) {
10769                     if (termlen == 1)
10770                         break;
10771                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10772                         break;
10773                 }
10774                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10775                     has_utf8 = TRUE;
10776                 *to = *s;
10777             }
10778         }
10779
10780         /* if the terminator isn't the same as the start character (e.g.,
10781            matched brackets), we have to allow more in the quoting, and
10782            be prepared for nested brackets.
10783         */
10784         else {
10785             /* read until we run out of string, or we find the terminator */
10786             for (; s < PL_bufend; s++,to++) {
10787                 /* embedded newlines increment the line count */
10788                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10789                     COPLINE_INC_WITH_HERELINES;
10790                 /* backslashes can escape the open or closing characters */
10791                 if (*s == '\\' && s+1 < PL_bufend) {
10792                     if (!keep_quoted &&
10793                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10794                     {
10795                         s++;
10796
10797                         /* Here, 'deprecate_escaped_meta' is true iff the
10798                          * delimiters are paired metacharacters, and 's' points
10799                          * to an occurrence of one of them within the string,
10800                          * which was preceded by a backslash.  If this is a
10801                          * context where the delimiter is also a metacharacter,
10802                          * the backslash is useless, and deprecated.  () and []
10803                          * are meta in any context. {} are meta only when
10804                          * appearing in a quantifier or in things like '\p{'
10805                          * (but '\\p{' isn't meta).  They also aren't meta
10806                          * unless there is a matching closed, escaped char
10807                          * later on within the string.  If 's' points to an
10808                          * open, set a flag; if to a close, test that flag, and
10809                          * raise a warning if it was set */
10810
10811                         if (deprecate_escaped_meta) {
10812                             if (*s == PL_multi_open) {
10813                                 if (*s != '{') {
10814                                     escaped_open = s;
10815                                 }
10816                                      /* Look for a closing '\}' */
10817                                 else if (regcurly(s, TRUE)) {
10818                                     escaped_open = s;
10819                                 }
10820                                      /* Look for e.g.  '\x{' */
10821                                 else if (s - start > 2
10822                                          && _generic_isCC(*(s-2),
10823                                              _CC_BACKSLASH_FOO_LBRACE_IS_META))
10824                                 { /* Exclude '\\x', '\\\\x', etc. */
10825                                     char *lookbehind = s - 4;
10826                                     bool is_meta = TRUE;
10827                                     while (lookbehind >= start
10828                                            && *lookbehind == '\\')
10829                                     {
10830                                         is_meta = ! is_meta;
10831                                         lookbehind--;
10832                                     }
10833                                     if (is_meta) {
10834                                         escaped_open = s;
10835                                     }
10836                                 }
10837                             }
10838                             else if (escaped_open) {
10839                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10840                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10841                                 escaped_open = NULL;
10842                             }
10843                         }
10844                     }
10845                     else
10846                         *to++ = *s++;
10847                 }
10848                 /* allow nested opens and closes */
10849                 else if (*s == PL_multi_close && --brackets <= 0)
10850                     break;
10851                 else if (*s == PL_multi_open)
10852                     brackets++;
10853                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10854                     has_utf8 = TRUE;
10855                 *to = *s;
10856             }
10857         }
10858         /* terminate the copied string and update the sv's end-of-string */
10859         *to = '\0';
10860         SvCUR_set(sv, to - SvPVX_const(sv));
10861
10862         /*
10863          * this next chunk reads more into the buffer if we're not done yet
10864          */
10865
10866         if (s < PL_bufend)
10867             break;              /* handle case where we are done yet :-) */
10868
10869 #ifndef PERL_STRICT_CR
10870         if (to - SvPVX_const(sv) >= 2) {
10871             if ((to[-2] == '\r' && to[-1] == '\n') ||
10872                 (to[-2] == '\n' && to[-1] == '\r'))
10873             {
10874                 to[-2] = '\n';
10875                 to--;
10876                 SvCUR_set(sv, to - SvPVX_const(sv));
10877             }
10878             else if (to[-1] == '\r')
10879                 to[-1] = '\n';
10880         }
10881         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10882             to[-1] = '\n';
10883 #endif
10884
10885      read_more_line:
10886         /* if we're out of file, or a read fails, bail and reset the current
10887            line marker so we can report where the unterminated string began
10888         */
10889 #ifdef PERL_MAD
10890         if (PL_madskills) {
10891             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10892             if (PL_thisstuff)
10893                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10894             else
10895                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10896         }
10897 #endif
10898         COPLINE_INC_WITH_HERELINES;
10899         PL_bufptr = PL_bufend;
10900         if (!lex_next_chunk(0)) {
10901             sv_free(sv);
10902             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10903             return NULL;
10904         }
10905         s = PL_bufptr;
10906 #ifdef PERL_MAD
10907         stuffstart = 0;
10908 #endif
10909     }
10910
10911     /* at this point, we have successfully read the delimited string */
10912
10913     if (!PL_encoding || UTF || re_reparse) {
10914 #ifdef PERL_MAD
10915         if (PL_madskills) {
10916             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10917             const int len = s - tstart;
10918             if (PL_thisstuff)
10919                 sv_catpvn(PL_thisstuff, tstart, len);
10920             else
10921                 PL_thisstuff = newSVpvn(tstart, len);
10922             if (!PL_thisclose && !keep_delims)
10923                 PL_thisclose = newSVpvn(s,termlen);
10924         }
10925 #endif
10926
10927         if (keep_delims)
10928             sv_catpvn(sv, s, termlen);
10929         s += termlen;
10930     }
10931 #ifdef PERL_MAD
10932     else {
10933         if (PL_madskills) {
10934             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10935             const int len = s - tstart - termlen;
10936             if (PL_thisstuff)
10937                 sv_catpvn(PL_thisstuff, tstart, len);
10938             else
10939                 PL_thisstuff = newSVpvn(tstart, len);
10940             if (!PL_thisclose && !keep_delims)
10941                 PL_thisclose = newSVpvn(s - termlen,termlen);
10942         }
10943     }
10944 #endif
10945     if (has_utf8 || (PL_encoding && !re_reparse))
10946         SvUTF8_on(sv);
10947
10948     PL_multi_end = CopLINE(PL_curcop);
10949     CopLINE_set(PL_curcop, PL_multi_start);
10950     PL_parser->herelines = herelines;
10951
10952     /* if we allocated too much space, give some back */
10953     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10954         SvLEN_set(sv, SvCUR(sv) + 1);
10955         SvPV_renew(sv, SvLEN(sv));
10956     }
10957
10958     /* decide whether this is the first or second quoted string we've read
10959        for this op
10960     */
10961
10962     if (PL_lex_stuff)
10963         PL_sublex_info.repl = sv;
10964     else
10965         PL_lex_stuff = sv;
10966     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10967     return s;
10968 }
10969
10970 /*
10971   scan_num
10972   takes: pointer to position in buffer
10973   returns: pointer to new position in buffer
10974   side-effects: builds ops for the constant in pl_yylval.op
10975
10976   Read a number in any of the formats that Perl accepts:
10977
10978   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10979   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10980   0b[01](_?[01])*
10981   0[0-7](_?[0-7])*
10982   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10983
10984   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10985   thing it reads.
10986
10987   If it reads a number without a decimal point or an exponent, it will
10988   try converting the number to an integer and see if it can do so
10989   without loss of precision.
10990 */
10991
10992 char *
10993 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10994 {
10995     dVAR;
10996     const char *s = start;      /* current position in buffer */
10997     char *d;                    /* destination in temp buffer */
10998     char *e;                    /* end of temp buffer */
10999     NV nv;                              /* number read, as a double */
11000     SV *sv = NULL;                      /* place to put the converted number */
11001     bool floatit;                       /* boolean: int or float? */
11002     const char *lastub = NULL;          /* position of last underbar */
11003     static const char* const number_too_long = "Number too long";
11004
11005     PERL_ARGS_ASSERT_SCAN_NUM;
11006
11007     /* We use the first character to decide what type of number this is */
11008
11009     switch (*s) {
11010     default:
11011         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11012
11013     /* if it starts with a 0, it could be an octal number, a decimal in
11014        0.13 disguise, or a hexadecimal number, or a binary number. */
11015     case '0':
11016         {
11017           /* variables:
11018              u          holds the "number so far"
11019              shift      the power of 2 of the base
11020                         (hex == 4, octal == 3, binary == 1)
11021              overflowed was the number more than we can hold?
11022
11023              Shift is used when we add a digit.  It also serves as an "are
11024              we in octal/hex/binary?" indicator to disallow hex characters
11025              when in octal mode.
11026            */
11027             NV n = 0.0;
11028             UV u = 0;
11029             I32 shift;
11030             bool overflowed = FALSE;
11031             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11032             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11033             static const char* const bases[5] =
11034               { "", "binary", "", "octal", "hexadecimal" };
11035             static const char* const Bases[5] =
11036               { "", "Binary", "", "Octal", "Hexadecimal" };
11037             static const char* const maxima[5] =
11038               { "",
11039                 "0b11111111111111111111111111111111",
11040                 "",
11041                 "037777777777",
11042                 "0xffffffff" };
11043             const char *base, *Base, *max;
11044
11045             /* check for hex */
11046             if (s[1] == 'x' || s[1] == 'X') {
11047                 shift = 4;
11048                 s += 2;
11049                 just_zero = FALSE;
11050             } else if (s[1] == 'b' || s[1] == 'B') {
11051                 shift = 1;
11052                 s += 2;
11053                 just_zero = FALSE;
11054             }
11055             /* check for a decimal in disguise */
11056             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11057                 goto decimal;
11058             /* so it must be octal */
11059             else {
11060                 shift = 3;
11061                 s++;
11062             }
11063
11064             if (*s == '_') {
11065                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11066                                "Misplaced _ in number");
11067                lastub = s++;
11068             }
11069
11070             base = bases[shift];
11071             Base = Bases[shift];
11072             max  = maxima[shift];
11073
11074             /* read the rest of the number */
11075             for (;;) {
11076                 /* x is used in the overflow test,
11077                    b is the digit we're adding on. */
11078                 UV x, b;
11079
11080                 switch (*s) {
11081
11082                 /* if we don't mention it, we're done */
11083                 default:
11084                     goto out;
11085
11086                 /* _ are ignored -- but warned about if consecutive */
11087                 case '_':
11088                     if (lastub && s == lastub + 1)
11089                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11090                                        "Misplaced _ in number");
11091                     lastub = s++;
11092                     break;
11093
11094                 /* 8 and 9 are not octal */
11095                 case '8': case '9':
11096                     if (shift == 3)
11097                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11098                     /* FALL THROUGH */
11099
11100                 /* octal digits */
11101                 case '2': case '3': case '4':
11102                 case '5': case '6': case '7':
11103                     if (shift == 1)
11104                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11105                     /* FALL THROUGH */
11106
11107                 case '0': case '1':
11108                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11109                     goto digit;
11110
11111                 /* hex digits */
11112                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11113                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11114                     /* make sure they said 0x */
11115                     if (shift != 4)
11116                         goto out;
11117                     b = (*s++ & 7) + 9;
11118
11119                     /* Prepare to put the digit we have onto the end
11120                        of the number so far.  We check for overflows.
11121                     */
11122
11123                   digit:
11124                     just_zero = FALSE;
11125                     if (!overflowed) {
11126                         x = u << shift; /* make room for the digit */
11127
11128                         if ((x >> shift) != u
11129                             && !(PL_hints & HINT_NEW_BINARY)) {
11130                             overflowed = TRUE;
11131                             n = (NV) u;
11132                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11133                                              "Integer overflow in %s number",
11134                                              base);
11135                         } else
11136                             u = x | b;          /* add the digit to the end */
11137                     }
11138                     if (overflowed) {
11139                         n *= nvshift[shift];
11140                         /* If an NV has not enough bits in its
11141                          * mantissa to represent an UV this summing of
11142                          * small low-order numbers is a waste of time
11143                          * (because the NV cannot preserve the
11144                          * low-order bits anyway): we could just
11145                          * remember when did we overflow and in the
11146                          * end just multiply n by the right
11147                          * amount. */
11148                         n += (NV) b;
11149                     }
11150                     break;
11151                 }
11152             }
11153
11154           /* if we get here, we had success: make a scalar value from
11155              the number.
11156           */
11157           out:
11158
11159             /* final misplaced underbar check */
11160             if (s[-1] == '_') {
11161                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11162             }
11163
11164             if (overflowed) {
11165                 if (n > 4294967295.0)
11166                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11167                                    "%s number > %s non-portable",
11168                                    Base, max);
11169                 sv = newSVnv(n);
11170             }
11171             else {
11172 #if UVSIZE > 4
11173                 if (u > 0xffffffff)
11174                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11175                                    "%s number > %s non-portable",
11176                                    Base, max);
11177 #endif
11178                 sv = newSVuv(u);
11179             }
11180             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11181                 sv = new_constant(start, s - start, "integer",
11182                                   sv, NULL, NULL, 0);
11183             else if (PL_hints & HINT_NEW_BINARY)
11184                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11185         }
11186         break;
11187
11188     /*
11189       handle decimal numbers.
11190       we're also sent here when we read a 0 as the first digit
11191     */
11192     case '1': case '2': case '3': case '4': case '5':
11193     case '6': case '7': case '8': case '9': case '.':
11194       decimal:
11195         d = PL_tokenbuf;
11196         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11197         floatit = FALSE;
11198
11199         /* read next group of digits and _ and copy into d */
11200         while (isDIGIT(*s) || *s == '_') {
11201             /* skip underscores, checking for misplaced ones
11202                if -w is on
11203             */
11204             if (*s == '_') {
11205                 if (lastub && s == lastub + 1)
11206                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11207                                    "Misplaced _ in number");
11208                 lastub = s++;
11209             }
11210             else {
11211                 /* check for end of fixed-length buffer */
11212                 if (d >= e)
11213                     Perl_croak(aTHX_ "%s", number_too_long);
11214                 /* if we're ok, copy the character */
11215                 *d++ = *s++;
11216             }
11217         }
11218
11219         /* final misplaced underbar check */
11220         if (lastub && s == lastub + 1) {
11221             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11222         }
11223
11224         /* read a decimal portion if there is one.  avoid
11225            3..5 being interpreted as the number 3. followed
11226            by .5
11227         */
11228         if (*s == '.' && s[1] != '.') {
11229             floatit = TRUE;
11230             *d++ = *s++;
11231
11232             if (*s == '_') {
11233                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11234                                "Misplaced _ in number");
11235                 lastub = s;
11236             }
11237
11238             /* copy, ignoring underbars, until we run out of digits.
11239             */
11240             for (; isDIGIT(*s) || *s == '_'; s++) {
11241                 /* fixed length buffer check */
11242                 if (d >= e)
11243                     Perl_croak(aTHX_ "%s", number_too_long);
11244                 if (*s == '_') {
11245                    if (lastub && s == lastub + 1)
11246                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11247                                       "Misplaced _ in number");
11248                    lastub = s;
11249                 }
11250                 else
11251                     *d++ = *s;
11252             }
11253             /* fractional part ending in underbar? */
11254             if (s[-1] == '_') {
11255                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11256                                "Misplaced _ in number");
11257             }
11258             if (*s == '.' && isDIGIT(s[1])) {
11259                 /* oops, it's really a v-string, but without the "v" */
11260                 s = start;
11261                 goto vstring;
11262             }
11263         }
11264
11265         /* read exponent part, if present */
11266         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11267             floatit = TRUE;
11268             s++;
11269
11270             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11271             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11272
11273             /* stray preinitial _ */
11274             if (*s == '_') {
11275                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11276                                "Misplaced _ in number");
11277                 lastub = s++;
11278             }
11279
11280             /* allow positive or negative exponent */
11281             if (*s == '+' || *s == '-')
11282                 *d++ = *s++;
11283
11284             /* stray initial _ */
11285             if (*s == '_') {
11286                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11287                                "Misplaced _ in number");
11288                 lastub = s++;
11289             }
11290
11291             /* read digits of exponent */
11292             while (isDIGIT(*s) || *s == '_') {
11293                 if (isDIGIT(*s)) {
11294                     if (d >= e)
11295                         Perl_croak(aTHX_ "%s", number_too_long);
11296                     *d++ = *s++;
11297                 }
11298                 else {
11299                    if (((lastub && s == lastub + 1) ||
11300                         (!isDIGIT(s[1]) && s[1] != '_')))
11301                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11302                                       "Misplaced _ in number");
11303                    lastub = s++;
11304                 }
11305             }
11306         }
11307
11308
11309         /*
11310            We try to do an integer conversion first if no characters
11311            indicating "float" have been found.
11312          */
11313
11314         if (!floatit) {
11315             UV uv;
11316             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11317
11318             if (flags == IS_NUMBER_IN_UV) {
11319               if (uv <= IV_MAX)
11320                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11321               else
11322                 sv = newSVuv(uv);
11323             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11324               if (uv <= (UV) IV_MIN)
11325                 sv = newSViv(-(IV)uv);
11326               else
11327                 floatit = TRUE;
11328             } else
11329               floatit = TRUE;
11330         }
11331         if (floatit) {
11332             /* terminate the string */
11333             *d = '\0';
11334             nv = Atof(PL_tokenbuf);
11335             sv = newSVnv(nv);
11336         }
11337
11338         if ( floatit
11339              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11340             const char *const key = floatit ? "float" : "integer";
11341             const STRLEN keylen = floatit ? 5 : 7;
11342             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11343                                 key, keylen, sv, NULL, NULL, 0);
11344         }
11345         break;
11346
11347     /* if it starts with a v, it could be a v-string */
11348     case 'v':
11349 vstring:
11350                 sv = newSV(5); /* preallocate storage space */
11351                 ENTER_with_name("scan_vstring");
11352                 SAVEFREESV(sv);
11353                 s = scan_vstring(s, PL_bufend, sv);
11354                 SvREFCNT_inc_simple_void_NN(sv);
11355                 LEAVE_with_name("scan_vstring");
11356         break;
11357     }
11358
11359     /* make the op for the constant and return */
11360
11361     if (sv)
11362         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11363     else
11364         lvalp->opval = NULL;
11365
11366     return (char *)s;
11367 }
11368
11369 STATIC char *
11370 S_scan_formline(pTHX_ char *s)
11371 {
11372     dVAR;
11373     char *eol;
11374     char *t;
11375     SV * const stuff = newSVpvs("");
11376     bool needargs = FALSE;
11377     bool eofmt = FALSE;
11378 #ifdef PERL_MAD
11379     char *tokenstart = s;
11380     SV* savewhite = NULL;
11381
11382     if (PL_madskills) {
11383         savewhite = PL_thiswhite;
11384         PL_thiswhite = 0;
11385     }
11386 #endif
11387
11388     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11389
11390     while (!needargs) {
11391         if (*s == '.') {
11392             t = s+1;
11393 #ifdef PERL_STRICT_CR
11394             while (SPACE_OR_TAB(*t))
11395                 t++;
11396 #else
11397             while (SPACE_OR_TAB(*t) || *t == '\r')
11398                 t++;
11399 #endif
11400             if (*t == '\n' || t == PL_bufend) {
11401                 eofmt = TRUE;
11402                 break;
11403             }
11404         }
11405         eol = (char *) memchr(s,'\n',PL_bufend-s);
11406         if (!eol++)
11407                 eol = PL_bufend;
11408         if (*s != '#') {
11409             for (t = s; t < eol; t++) {
11410                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11411                     needargs = FALSE;
11412                     goto enough;        /* ~~ must be first line in formline */
11413                 }
11414                 if (*t == '@' || *t == '^')
11415                     needargs = TRUE;
11416             }
11417             if (eol > s) {
11418                 sv_catpvn(stuff, s, eol-s);
11419 #ifndef PERL_STRICT_CR
11420                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11421                     char *end = SvPVX(stuff) + SvCUR(stuff);
11422                     end[-2] = '\n';
11423                     end[-1] = '\0';
11424                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11425                 }
11426 #endif
11427             }
11428             else
11429               break;
11430         }
11431         s = (char*)eol;
11432         if ((PL_rsfp || PL_parser->filtered)
11433          && PL_parser->form_lex_state == LEX_NORMAL) {
11434             bool got_some;
11435 #ifdef PERL_MAD
11436             if (PL_madskills) {
11437                 if (PL_thistoken)
11438                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11439                 else
11440                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11441             }
11442 #endif
11443             PL_bufptr = PL_bufend;
11444             COPLINE_INC_WITH_HERELINES;
11445             got_some = lex_next_chunk(0);
11446             CopLINE_dec(PL_curcop);
11447             s = PL_bufptr;
11448 #ifdef PERL_MAD
11449             tokenstart = PL_bufptr;
11450 #endif
11451             if (!got_some)
11452                 break;
11453         }
11454         incline(s);
11455     }
11456   enough:
11457     if (!SvCUR(stuff) || needargs)
11458         PL_lex_state = PL_parser->form_lex_state;
11459     if (SvCUR(stuff)) {
11460         PL_expect = XSTATE;
11461         if (needargs) {
11462             start_force(PL_curforce);
11463             NEXTVAL_NEXTTOKE.ival = 0;
11464             force_next(FORMLBRACK);
11465         }
11466         if (!IN_BYTES) {
11467             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11468                 SvUTF8_on(stuff);
11469             else if (PL_encoding)
11470                 sv_recode_to_utf8(stuff, PL_encoding);
11471         }
11472         start_force(PL_curforce);
11473         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11474         force_next(THING);
11475     }
11476     else {
11477         SvREFCNT_dec(stuff);
11478         if (eofmt)
11479             PL_lex_formbrack = 0;
11480     }
11481 #ifdef PERL_MAD
11482     if (PL_madskills) {
11483         if (PL_thistoken)
11484             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11485         else
11486             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11487         PL_thiswhite = savewhite;
11488     }
11489 #endif
11490     return s;
11491 }
11492
11493 I32
11494 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11495 {
11496     dVAR;
11497     const I32 oldsavestack_ix = PL_savestack_ix;
11498     CV* const outsidecv = PL_compcv;
11499
11500     SAVEI32(PL_subline);
11501     save_item(PL_subname);
11502     SAVESPTR(PL_compcv);
11503
11504     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11505     CvFLAGS(PL_compcv) |= flags;
11506
11507     PL_subline = CopLINE(PL_curcop);
11508     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11509     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11510     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11511     if (outsidecv && CvPADLIST(outsidecv))
11512         CvPADLIST(PL_compcv)->xpadl_outid =
11513             PadlistNAMES(CvPADLIST(outsidecv));
11514
11515     return oldsavestack_ix;
11516 }
11517
11518 static int
11519 S_yywarn(pTHX_ const char *const s, U32 flags)
11520 {
11521     dVAR;
11522
11523     PERL_ARGS_ASSERT_YYWARN;
11524
11525     PL_in_eval |= EVAL_WARNONLY;
11526     yyerror_pv(s, flags);
11527     PL_in_eval &= ~EVAL_WARNONLY;
11528     return 0;
11529 }
11530
11531 int
11532 Perl_yyerror(pTHX_ const char *const s)
11533 {
11534     PERL_ARGS_ASSERT_YYERROR;
11535     return yyerror_pvn(s, strlen(s), 0);
11536 }
11537
11538 int
11539 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11540 {
11541     PERL_ARGS_ASSERT_YYERROR_PV;
11542     return yyerror_pvn(s, strlen(s), flags);
11543 }
11544
11545 int
11546 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11547 {
11548     dVAR;
11549     const char *context = NULL;
11550     int contlen = -1;
11551     SV *msg;
11552     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11553     int yychar  = PL_parser->yychar;
11554
11555     PERL_ARGS_ASSERT_YYERROR_PVN;
11556
11557     if (!yychar || (yychar == ';' && !PL_rsfp))
11558         sv_catpvs(where_sv, "at EOF");
11559     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11560       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11561       PL_oldbufptr != PL_bufptr) {
11562         /*
11563                 Only for NetWare:
11564                 The code below is removed for NetWare because it abends/crashes on NetWare
11565                 when the script has error such as not having the closing quotes like:
11566                     if ($var eq "value)
11567                 Checking of white spaces is anyway done in NetWare code.
11568         */
11569 #ifndef NETWARE
11570         while (isSPACE(*PL_oldoldbufptr))
11571             PL_oldoldbufptr++;
11572 #endif
11573         context = PL_oldoldbufptr;
11574         contlen = PL_bufptr - PL_oldoldbufptr;
11575     }
11576     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11577       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11578         /*
11579                 Only for NetWare:
11580                 The code below is removed for NetWare because it abends/crashes on NetWare
11581                 when the script has error such as not having the closing quotes like:
11582                     if ($var eq "value)
11583                 Checking of white spaces is anyway done in NetWare code.
11584         */
11585 #ifndef NETWARE
11586         while (isSPACE(*PL_oldbufptr))
11587             PL_oldbufptr++;
11588 #endif
11589         context = PL_oldbufptr;
11590         contlen = PL_bufptr - PL_oldbufptr;
11591     }
11592     else if (yychar > 255)
11593         sv_catpvs(where_sv, "next token ???");
11594     else if (yychar == -2) { /* YYEMPTY */
11595         if (PL_lex_state == LEX_NORMAL ||
11596            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11597             sv_catpvs(where_sv, "at end of line");
11598         else if (PL_lex_inpat)
11599             sv_catpvs(where_sv, "within pattern");
11600         else
11601             sv_catpvs(where_sv, "within string");
11602     }
11603     else {
11604         sv_catpvs(where_sv, "next char ");
11605         if (yychar < 32)
11606             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11607         else if (isPRINT_LC(yychar)) {
11608             const char string = yychar;
11609             sv_catpvn(where_sv, &string, 1);
11610         }
11611         else
11612             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11613     }
11614     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11615     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11616         OutCopFILE(PL_curcop),
11617         (IV)(PL_parser->preambling == NOLINE
11618                ? CopLINE(PL_curcop)
11619                : PL_parser->preambling));
11620     if (context)
11621         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11622                              UTF8fARG(UTF, contlen, context));
11623     else
11624         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11625     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11626         Perl_sv_catpvf(aTHX_ msg,
11627         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11628                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11629         PL_multi_end = 0;
11630     }
11631     if (PL_in_eval & EVAL_WARNONLY) {
11632         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11633     }
11634     else
11635         qerror(msg);
11636     if (PL_error_count >= 10) {
11637         SV * errsv;
11638         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11639             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11640                        SVfARG(errsv), OutCopFILE(PL_curcop));
11641         else
11642             Perl_croak(aTHX_ "%s has too many errors.\n",
11643             OutCopFILE(PL_curcop));
11644     }
11645     PL_in_my = 0;
11646     PL_in_my_stash = NULL;
11647     return 0;
11648 }
11649
11650 STATIC char*
11651 S_swallow_bom(pTHX_ U8 *s)
11652 {
11653     dVAR;
11654     const STRLEN slen = SvCUR(PL_linestr);
11655
11656     PERL_ARGS_ASSERT_SWALLOW_BOM;
11657
11658     switch (s[0]) {
11659     case 0xFF:
11660         if (s[1] == 0xFE) {
11661             /* UTF-16 little-endian? (or UTF-32LE?) */
11662             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11663                 /* diag_listed_as: Unsupported script encoding %s */
11664                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11665 #ifndef PERL_NO_UTF16_FILTER
11666             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11667             s += 2;
11668             if (PL_bufend > (char*)s) {
11669                 s = add_utf16_textfilter(s, TRUE);
11670             }
11671 #else
11672             /* diag_listed_as: Unsupported script encoding %s */
11673             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11674 #endif
11675         }
11676         break;
11677     case 0xFE:
11678         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11679 #ifndef PERL_NO_UTF16_FILTER
11680             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11681             s += 2;
11682             if (PL_bufend > (char *)s) {
11683                 s = add_utf16_textfilter(s, FALSE);
11684             }
11685 #else
11686             /* diag_listed_as: Unsupported script encoding %s */
11687             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11688 #endif
11689         }
11690         break;
11691     case BOM_UTF8_FIRST_BYTE: {
11692         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11693         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11694             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11695             s += len + 1;                      /* UTF-8 */
11696         }
11697         break;
11698     }
11699     case 0:
11700         if (slen > 3) {
11701              if (s[1] == 0) {
11702                   if (s[2] == 0xFE && s[3] == 0xFF) {
11703                        /* UTF-32 big-endian */
11704                        /* diag_listed_as: Unsupported script encoding %s */
11705                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11706                   }
11707              }
11708              else if (s[2] == 0 && s[3] != 0) {
11709                   /* Leading bytes
11710                    * 00 xx 00 xx
11711                    * are a good indicator of UTF-16BE. */
11712 #ifndef PERL_NO_UTF16_FILTER
11713                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11714                   s = add_utf16_textfilter(s, FALSE);
11715 #else
11716                   /* diag_listed_as: Unsupported script encoding %s */
11717                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11718 #endif
11719              }
11720         }
11721
11722     default:
11723          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11724                   /* Leading bytes
11725                    * xx 00 xx 00
11726                    * are a good indicator of UTF-16LE. */
11727 #ifndef PERL_NO_UTF16_FILTER
11728               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11729               s = add_utf16_textfilter(s, TRUE);
11730 #else
11731               /* diag_listed_as: Unsupported script encoding %s */
11732               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11733 #endif
11734          }
11735     }
11736     return (char*)s;
11737 }
11738
11739
11740 #ifndef PERL_NO_UTF16_FILTER
11741 static I32
11742 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11743 {
11744     dVAR;
11745     SV *const filter = FILTER_DATA(idx);
11746     /* We re-use this each time round, throwing the contents away before we
11747        return.  */
11748     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11749     SV *const utf8_buffer = filter;
11750     IV status = IoPAGE(filter);
11751     const bool reverse = cBOOL(IoLINES(filter));
11752     I32 retval;
11753
11754     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11755
11756     /* As we're automatically added, at the lowest level, and hence only called
11757        from this file, we can be sure that we're not called in block mode. Hence
11758        don't bother writing code to deal with block mode.  */
11759     if (maxlen) {
11760         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11761     }
11762     if (status < 0) {
11763         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11764     }
11765     DEBUG_P(PerlIO_printf(Perl_debug_log,
11766                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11767                           FPTR2DPTR(void *, S_utf16_textfilter),
11768                           reverse ? 'l' : 'b', idx, maxlen, status,
11769                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11770
11771     while (1) {
11772         STRLEN chars;
11773         STRLEN have;
11774         I32 newlen;
11775         U8 *end;
11776         /* First, look in our buffer of existing UTF-8 data:  */
11777         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11778
11779         if (nl) {
11780             ++nl;
11781         } else if (status == 0) {
11782             /* EOF */
11783             IoPAGE(filter) = 0;
11784             nl = SvEND(utf8_buffer);
11785         }
11786         if (nl) {
11787             STRLEN got = nl - SvPVX(utf8_buffer);
11788             /* Did we have anything to append?  */
11789             retval = got != 0;
11790             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11791             /* Everything else in this code works just fine if SVp_POK isn't
11792                set.  This, however, needs it, and we need it to work, else
11793                we loop infinitely because the buffer is never consumed.  */
11794             sv_chop(utf8_buffer, nl);
11795             break;
11796         }
11797
11798         /* OK, not a complete line there, so need to read some more UTF-16.
11799            Read an extra octect if the buffer currently has an odd number. */
11800         while (1) {
11801             if (status <= 0)
11802                 break;
11803             if (SvCUR(utf16_buffer) >= 2) {
11804                 /* Location of the high octet of the last complete code point.
11805                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11806                    *coupled* with all the benefits of partial reads and
11807                    endianness.  */
11808                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11809                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11810
11811                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11812                     break;
11813                 }
11814
11815                 /* We have the first half of a surrogate. Read more.  */
11816                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11817             }
11818
11819             status = FILTER_READ(idx + 1, utf16_buffer,
11820                                  160 + (SvCUR(utf16_buffer) & 1));
11821             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11822             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11823             if (status < 0) {
11824                 /* Error */
11825                 IoPAGE(filter) = status;
11826                 return status;
11827             }
11828         }
11829
11830         chars = SvCUR(utf16_buffer) >> 1;
11831         have = SvCUR(utf8_buffer);
11832         SvGROW(utf8_buffer, have + chars * 3 + 1);
11833
11834         if (reverse) {
11835             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11836                                          (U8*)SvPVX_const(utf8_buffer) + have,
11837                                          chars * 2, &newlen);
11838         } else {
11839             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11840                                 (U8*)SvPVX_const(utf8_buffer) + have,
11841                                 chars * 2, &newlen);
11842         }
11843         SvCUR_set(utf8_buffer, have + newlen);
11844         *end = '\0';
11845
11846         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11847            it's private to us, and utf16_to_utf8{,reversed} take a
11848            (pointer,length) pair, rather than a NUL-terminated string.  */
11849         if(SvCUR(utf16_buffer) & 1) {
11850             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11851             SvCUR_set(utf16_buffer, 1);
11852         } else {
11853             SvCUR_set(utf16_buffer, 0);
11854         }
11855     }
11856     DEBUG_P(PerlIO_printf(Perl_debug_log,
11857                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11858                           status,
11859                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11860     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11861     return retval;
11862 }
11863
11864 static U8 *
11865 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11866 {
11867     SV *filter = filter_add(S_utf16_textfilter, NULL);
11868
11869     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11870
11871     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11872     sv_setpvs(filter, "");
11873     IoLINES(filter) = reversed;
11874     IoPAGE(filter) = 1; /* Not EOF */
11875
11876     /* Sadly, we have to return a valid pointer, come what may, so we have to
11877        ignore any error return from this.  */
11878     SvCUR_set(PL_linestr, 0);
11879     if (FILTER_READ(0, PL_linestr, 0)) {
11880         SvUTF8_on(PL_linestr);
11881     } else {
11882         SvUTF8_on(PL_linestr);
11883     }
11884     PL_bufend = SvEND(PL_linestr);
11885     return (U8*)SvPVX(PL_linestr);
11886 }
11887 #endif
11888
11889 /*
11890 Returns a pointer to the next character after the parsed
11891 vstring, as well as updating the passed in sv.
11892
11893 Function must be called like
11894
11895         sv = sv_2mortal(newSV(5));
11896         s = scan_vstring(s,e,sv);
11897
11898 where s and e are the start and end of the string.
11899 The sv should already be large enough to store the vstring
11900 passed in, for performance reasons.
11901
11902 This function may croak if fatal warnings are enabled in the
11903 calling scope, hence the sv_2mortal in the example (to prevent
11904 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11905 sv_2mortal.
11906
11907 */
11908
11909 char *
11910 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11911 {
11912     dVAR;
11913     const char *pos = s;
11914     const char *start = s;
11915
11916     PERL_ARGS_ASSERT_SCAN_VSTRING;
11917
11918     if (*pos == 'v') pos++;  /* get past 'v' */
11919     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11920         pos++;
11921     if ( *pos != '.') {
11922         /* this may not be a v-string if followed by => */
11923         const char *next = pos;
11924         while (next < e && isSPACE(*next))
11925             ++next;
11926         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11927             /* return string not v-string */
11928             sv_setpvn(sv,(char *)s,pos-s);
11929             return (char *)pos;
11930         }
11931     }
11932
11933     if (!isALPHA(*pos)) {
11934         U8 tmpbuf[UTF8_MAXBYTES+1];
11935
11936         if (*s == 'v')
11937             s++;  /* get past 'v' */
11938
11939         sv_setpvs(sv, "");
11940
11941         for (;;) {
11942             /* this is atoi() that tolerates underscores */
11943             U8 *tmpend;
11944             UV rev = 0;
11945             const char *end = pos;
11946             UV mult = 1;
11947             while (--end >= s) {
11948                 if (*end != '_') {
11949                     const UV orev = rev;
11950                     rev += (*end - '0') * mult;
11951                     mult *= 10;
11952                     if (orev > rev)
11953                         /* diag_listed_as: Integer overflow in %s number */
11954                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11955                                          "Integer overflow in decimal number");
11956                 }
11957             }
11958 #ifdef EBCDIC
11959             if (rev > 0x7FFFFFFF)
11960                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11961 #endif
11962             /* Append native character for the rev point */
11963             tmpend = uvchr_to_utf8(tmpbuf, rev);
11964             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11965             if (!UVCHR_IS_INVARIANT(rev))
11966                  SvUTF8_on(sv);
11967             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11968                  s = ++pos;
11969             else {
11970                  s = pos;
11971                  break;
11972             }
11973             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11974                  pos++;
11975         }
11976         SvPOK_on(sv);
11977         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11978         SvRMAGICAL_on(sv);
11979     }
11980     return (char *)s;
11981 }
11982
11983 int
11984 Perl_keyword_plugin_standard(pTHX_
11985         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11986 {
11987     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11988     PERL_UNUSED_CONTEXT;
11989     PERL_UNUSED_ARG(keyword_ptr);
11990     PERL_UNUSED_ARG(keyword_len);
11991     PERL_UNUSED_ARG(op_ptr);
11992     return KEYWORD_PLUGIN_DECLINE;
11993 }
11994
11995 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11996 static void
11997 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11998 {
11999     SAVEI32(PL_lex_brackets);
12000     if (PL_lex_brackets > 100)
12001         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12002     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12003     SAVEI32(PL_lex_allbrackets);
12004     PL_lex_allbrackets = 0;
12005     SAVEI8(PL_lex_fakeeof);
12006     PL_lex_fakeeof = (U8)fakeeof;
12007     if(yyparse(gramtype) && !PL_parser->error_count)
12008         qerror(Perl_mess(aTHX_ "Parse error"));
12009 }
12010
12011 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12012 static OP *
12013 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12014 {
12015     OP *o;
12016     ENTER;
12017     SAVEVPTR(PL_eval_root);
12018     PL_eval_root = NULL;
12019     parse_recdescent(gramtype, fakeeof);
12020     o = PL_eval_root;
12021     LEAVE;
12022     return o;
12023 }
12024
12025 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12026 static OP *
12027 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12028 {
12029     OP *exprop;
12030     if (flags & ~PARSE_OPTIONAL)
12031         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12032     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12033     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12034         if (!PL_parser->error_count)
12035             qerror(Perl_mess(aTHX_ "Parse error"));
12036         exprop = newOP(OP_NULL, 0);
12037     }
12038     return exprop;
12039 }
12040
12041 /*
12042 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12043
12044 Parse a Perl arithmetic expression.  This may contain operators of precedence
12045 down to the bit shift operators.  The expression must be followed (and thus
12046 terminated) either by a comparison or lower-precedence operator or by
12047 something that would normally terminate an expression such as semicolon.
12048 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12049 otherwise it is mandatory.  It is up to the caller to ensure that the
12050 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12051 the source of the code to be parsed and the lexical context for the
12052 expression.
12053
12054 The op tree representing the expression is returned.  If an optional
12055 expression is absent, a null pointer is returned, otherwise the pointer
12056 will be non-null.
12057
12058 If an error occurs in parsing or compilation, in most cases a valid op
12059 tree is returned anyway.  The error is reflected in the parser state,
12060 normally resulting in a single exception at the top level of parsing
12061 which covers all the compilation errors that occurred.  Some compilation
12062 errors, however, will throw an exception immediately.
12063
12064 =cut
12065 */
12066
12067 OP *
12068 Perl_parse_arithexpr(pTHX_ U32 flags)
12069 {
12070     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12071 }
12072
12073 /*
12074 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12075
12076 Parse a Perl term expression.  This may contain operators of precedence
12077 down to the assignment operators.  The expression must be followed (and thus
12078 terminated) either by a comma or lower-precedence operator or by
12079 something that would normally terminate an expression such as semicolon.
12080 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12081 otherwise it is mandatory.  It is up to the caller to ensure that the
12082 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12083 the source of the code to be parsed and the lexical context for the
12084 expression.
12085
12086 The op tree representing the expression is returned.  If an optional
12087 expression is absent, a null pointer is returned, otherwise the pointer
12088 will be non-null.
12089
12090 If an error occurs in parsing or compilation, in most cases a valid op
12091 tree is returned anyway.  The error is reflected in the parser state,
12092 normally resulting in a single exception at the top level of parsing
12093 which covers all the compilation errors that occurred.  Some compilation
12094 errors, however, will throw an exception immediately.
12095
12096 =cut
12097 */
12098
12099 OP *
12100 Perl_parse_termexpr(pTHX_ U32 flags)
12101 {
12102     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12103 }
12104
12105 /*
12106 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12107
12108 Parse a Perl list expression.  This may contain operators of precedence
12109 down to the comma operator.  The expression must be followed (and thus
12110 terminated) either by a low-precedence logic operator such as C<or> or by
12111 something that would normally terminate an expression such as semicolon.
12112 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12113 otherwise it is mandatory.  It is up to the caller to ensure that the
12114 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12115 the source of the code to be parsed and the lexical context for the
12116 expression.
12117
12118 The op tree representing the expression is returned.  If an optional
12119 expression is absent, a null pointer is returned, otherwise the pointer
12120 will be non-null.
12121
12122 If an error occurs in parsing or compilation, in most cases a valid op
12123 tree is returned anyway.  The error is reflected in the parser state,
12124 normally resulting in a single exception at the top level of parsing
12125 which covers all the compilation errors that occurred.  Some compilation
12126 errors, however, will throw an exception immediately.
12127
12128 =cut
12129 */
12130
12131 OP *
12132 Perl_parse_listexpr(pTHX_ U32 flags)
12133 {
12134     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12135 }
12136
12137 /*
12138 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12139
12140 Parse a single complete Perl expression.  This allows the full
12141 expression grammar, including the lowest-precedence operators such
12142 as C<or>.  The expression must be followed (and thus terminated) by a
12143 token that an expression would normally be terminated by: end-of-file,
12144 closing bracketing punctuation, semicolon, or one of the keywords that
12145 signals a postfix expression-statement modifier.  If I<flags> includes
12146 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12147 mandatory.  It is up to the caller to ensure that the dynamic parser
12148 state (L</PL_parser> et al) is correctly set to reflect the source of
12149 the code to be parsed and the lexical context for the expression.
12150
12151 The op tree representing the expression is returned.  If an optional
12152 expression is absent, a null pointer is returned, otherwise the pointer
12153 will be non-null.
12154
12155 If an error occurs in parsing or compilation, in most cases a valid op
12156 tree is returned anyway.  The error is reflected in the parser state,
12157 normally resulting in a single exception at the top level of parsing
12158 which covers all the compilation errors that occurred.  Some compilation
12159 errors, however, will throw an exception immediately.
12160
12161 =cut
12162 */
12163
12164 OP *
12165 Perl_parse_fullexpr(pTHX_ U32 flags)
12166 {
12167     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12168 }
12169
12170 /*
12171 =for apidoc Amx|OP *|parse_block|U32 flags
12172
12173 Parse a single complete Perl code block.  This consists of an opening
12174 brace, a sequence of statements, and a closing brace.  The block
12175 constitutes a lexical scope, so C<my> variables and various compile-time
12176 effects can be contained within it.  It is up to the caller to ensure
12177 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12178 reflect the source of the code to be parsed and the lexical context for
12179 the statement.
12180
12181 The op tree representing the code block is returned.  This is always a
12182 real op, never a null pointer.  It will normally be a C<lineseq> list,
12183 including C<nextstate> or equivalent ops.  No ops to construct any kind
12184 of runtime scope are included by virtue of it being a block.
12185
12186 If an error occurs in parsing or compilation, in most cases a valid op
12187 tree (most likely null) is returned anyway.  The error is reflected in
12188 the parser state, normally resulting in a single exception at the top
12189 level of parsing which covers all the compilation errors that occurred.
12190 Some compilation errors, however, will throw an exception immediately.
12191
12192 The I<flags> parameter is reserved for future use, and must always
12193 be zero.
12194
12195 =cut
12196 */
12197
12198 OP *
12199 Perl_parse_block(pTHX_ U32 flags)
12200 {
12201     if (flags)
12202         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12203     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12204 }
12205
12206 /*
12207 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12208
12209 Parse a single unadorned Perl statement.  This may be a normal imperative
12210 statement or a declaration that has compile-time effect.  It does not
12211 include any label or other affixture.  It is up to the caller to ensure
12212 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12213 reflect the source of the code to be parsed and the lexical context for
12214 the statement.
12215
12216 The op tree representing the statement is returned.  This may be a
12217 null pointer if the statement is null, for example if it was actually
12218 a subroutine definition (which has compile-time side effects).  If not
12219 null, it will be ops directly implementing the statement, suitable to
12220 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12221 equivalent op (except for those embedded in a scope contained entirely
12222 within the statement).
12223
12224 If an error occurs in parsing or compilation, in most cases a valid op
12225 tree (most likely null) is returned anyway.  The error is reflected in
12226 the parser state, normally resulting in a single exception at the top
12227 level of parsing which covers all the compilation errors that occurred.
12228 Some compilation errors, however, will throw an exception immediately.
12229
12230 The I<flags> parameter is reserved for future use, and must always
12231 be zero.
12232
12233 =cut
12234 */
12235
12236 OP *
12237 Perl_parse_barestmt(pTHX_ U32 flags)
12238 {
12239     if (flags)
12240         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12241     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12242 }
12243
12244 /*
12245 =for apidoc Amx|SV *|parse_label|U32 flags
12246
12247 Parse a single label, possibly optional, of the type that may prefix a
12248 Perl statement.  It is up to the caller to ensure that the dynamic parser
12249 state (L</PL_parser> et al) is correctly set to reflect the source of
12250 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12251 label is optional, otherwise it is mandatory.
12252
12253 The name of the label is returned in the form of a fresh scalar.  If an
12254 optional label is absent, a null pointer is returned.
12255
12256 If an error occurs in parsing, which can only occur if the label is
12257 mandatory, a valid label is returned anyway.  The error is reflected in
12258 the parser state, normally resulting in a single exception at the top
12259 level of parsing which covers all the compilation errors that occurred.
12260
12261 =cut
12262 */
12263
12264 SV *
12265 Perl_parse_label(pTHX_ U32 flags)
12266 {
12267     if (flags & ~PARSE_OPTIONAL)
12268         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12269     if (PL_lex_state == LEX_KNOWNEXT) {
12270         PL_parser->yychar = yylex();
12271         if (PL_parser->yychar == LABEL) {
12272             char * const lpv = pl_yylval.pval;
12273             STRLEN llen = strlen(lpv);
12274             PL_parser->yychar = YYEMPTY;
12275             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12276         } else {
12277             yyunlex();
12278             goto no_label;
12279         }
12280     } else {
12281         char *s, *t;
12282         STRLEN wlen, bufptr_pos;
12283         lex_read_space(0);
12284         t = s = PL_bufptr;
12285         if (!isIDFIRST_lazy_if(s, UTF))
12286             goto no_label;
12287         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12288         if (word_takes_any_delimeter(s, wlen))
12289             goto no_label;
12290         bufptr_pos = s - SvPVX(PL_linestr);
12291         PL_bufptr = t;
12292         lex_read_space(LEX_KEEP_PREVIOUS);
12293         t = PL_bufptr;
12294         s = SvPVX(PL_linestr) + bufptr_pos;
12295         if (t[0] == ':' && t[1] != ':') {
12296             PL_oldoldbufptr = PL_oldbufptr;
12297             PL_oldbufptr = s;
12298             PL_bufptr = t+1;
12299             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12300         } else {
12301             PL_bufptr = s;
12302             no_label:
12303             if (flags & PARSE_OPTIONAL) {
12304                 return NULL;
12305             } else {
12306                 qerror(Perl_mess(aTHX_ "Parse error"));
12307                 return newSVpvs("x");
12308             }
12309         }
12310     }
12311 }
12312
12313 /*
12314 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12315
12316 Parse a single complete Perl statement.  This may be a normal imperative
12317 statement or a declaration that has compile-time effect, and may include
12318 optional labels.  It is up to the caller to ensure that the dynamic
12319 parser state (L</PL_parser> et al) is correctly set to reflect the source
12320 of the code to be parsed and the lexical context for the statement.
12321
12322 The op tree representing the statement is returned.  This may be a
12323 null pointer if the statement is null, for example if it was actually
12324 a subroutine definition (which has compile-time side effects).  If not
12325 null, it will be the result of a L</newSTATEOP> call, normally including
12326 a C<nextstate> or equivalent op.
12327
12328 If an error occurs in parsing or compilation, in most cases a valid op
12329 tree (most likely null) is returned anyway.  The error is reflected in
12330 the parser state, normally resulting in a single exception at the top
12331 level of parsing which covers all the compilation errors that occurred.
12332 Some compilation errors, however, will throw an exception immediately.
12333
12334 The I<flags> parameter is reserved for future use, and must always
12335 be zero.
12336
12337 =cut
12338 */
12339
12340 OP *
12341 Perl_parse_fullstmt(pTHX_ U32 flags)
12342 {
12343     if (flags)
12344         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12345     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12346 }
12347
12348 /*
12349 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12350
12351 Parse a sequence of zero or more Perl statements.  These may be normal
12352 imperative statements, including optional labels, or declarations
12353 that have compile-time effect, or any mixture thereof.  The statement
12354 sequence ends when a closing brace or end-of-file is encountered in a
12355 place where a new statement could have validly started.  It is up to
12356 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12357 is correctly set to reflect the source of the code to be parsed and the
12358 lexical context for the statements.
12359
12360 The op tree representing the statement sequence is returned.  This may
12361 be a null pointer if the statements were all null, for example if there
12362 were no statements or if there were only subroutine definitions (which
12363 have compile-time side effects).  If not null, it will be a C<lineseq>
12364 list, normally including C<nextstate> or equivalent ops.
12365
12366 If an error occurs in parsing or compilation, in most cases a valid op
12367 tree is returned anyway.  The error is reflected in the parser state,
12368 normally resulting in a single exception at the top level of parsing
12369 which covers all the compilation errors that occurred.  Some compilation
12370 errors, however, will throw an exception immediately.
12371
12372 The I<flags> parameter is reserved for future use, and must always
12373 be zero.
12374
12375 =cut
12376 */
12377
12378 OP *
12379 Perl_parse_stmtseq(pTHX_ U32 flags)
12380 {
12381     OP *stmtseqop;
12382     I32 c;
12383     if (flags)
12384         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12385     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12386     c = lex_peek_unichar(0);
12387     if (c != -1 && c != /*{*/'}')
12388         qerror(Perl_mess(aTHX_ "Parse error"));
12389     return stmtseqop;
12390 }
12391
12392 /*
12393  * Local variables:
12394  * c-indentation-style: bsd
12395  * c-basic-offset: 4
12396  * indent-tabs-mode: nil
12397  * End:
12398  *
12399  * ex: set ts=8 sts=4 sw=4 et:
12400  */