toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #include "keywords.h"
 186
 187 /* CLINE is a macro that ensures PL_copline has a sane value */
 188
 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 190
 191 #ifdef PERL_MAD
 192 #  define SKIPSPACE0(s) skipspace0(s)
 193 #  define SKIPSPACE1(s) skipspace1(s)
 194 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 195 #  define PEEKSPACE(s) skipspace2(s,0)
 196 #else
 197 #  define SKIPSPACE0(s) skipspace(s)
 198 #  define SKIPSPACE1(s) skipspace(s)
 199 #  define SKIPSPACE2(s,tsv) skipspace(s)
 200 #  define PEEKSPACE(s) skipspace(s)
 201 #endif
 202
 203 /*
 204  * Convenience functions to return different tokens and prime the
 205  * lexer for the next token.  They all take an argument.
 206  *
 207  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 208  * OPERATOR     : generic operator
 209  * AOPERATOR    : assignment operator
 210  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 211  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 212  * PREREF       : *EXPR where EXPR is not a simple identifier
 213  * TERM         : expression term
 214  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 215  * LOOPX        : loop exiting command (goto, last, dump, etc)
 216  * FTST         : file test operator
 217  * FUN0         : zero-argument function
 218  * FUN0OP       : zero-argument function, with its op created in this file
 219  * FUN1         : not used, except for not, which isn't a UNIOP
 220  * BOop         : bitwise or or xor
 221  * BAop         : bitwise and
 222  * SHop         : shift operator
 223  * PWop         : power operator
 224  * PMop         : pattern-matching operator
 225  * Aop          : addition-level operator
 226  * Mop          : multiplication-level operator
 227  * Eop          : equality-testing operator
 228  * Rop          : relational operator <= != gt
 229  *
 230  * Also see LOP and lop() below.
 231  */
 232
 233 #ifdef DEBUGGING /* Serve -DT. */
 234 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 235 #else
 236 #   define REPORT(retval) (retval)
 237 #endif
 238
 239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 248 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 249 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 250 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 251 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 252 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 253 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 254 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 255 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 256 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 257 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 258 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 259 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 260 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 261
 262 /* This bit of chicanery makes a unary function followed by
 263  * a parenthesis into a function with one argument, highest precedence.
 264  * The UNIDOR macro is for unary functions that can be followed by the //
 265  * operator (such as C<shift // 0>).
 266  */
 267 #define UNI3(f,x,have_x) { \
 268         pl_yylval.ival = f; \
 269         if (have_x) PL_expect = x; \
 270         PL_bufptr = s; \
 271         PL_last_uni = PL_oldbufptr; \
 272         PL_last_lop_op = f; \
 273         if (*s == '(') \
 274             return REPORT( (int)FUNC1 ); \
 275         s = PEEKSPACE(s); \
 276         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 277         }
 278 #define UNI(f)    UNI3(f,XTERM,1)
 279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 280 #define UNIPROTO(f,optional) { \
 281         if (optional) PL_last_uni = PL_oldbufptr; \
 282         OPERATOR(f); \
 283         }
 284
 285 #define UNIBRACK(f) UNI3(f,0,0)
 286
 287 /* grandfather return to old style */
 288 #define OLDLOP(f) \
 289         do { \
 290             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 291                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 292             pl_yylval.ival = (f); \
 293             PL_expect = XTERM; \
 294             PL_bufptr = s; \
 295             return (int)LSTOP; \
 296         } while(0)
 297
 298 #define COPLINE_INC_WITH_HERELINES                  \
 299     STMT_START {                                     \
 300         CopLINE_inc(PL_curcop);                       \
 301         if (PL_parser->herelines)                      \
 302             CopLINE(PL_curcop) += PL_parser->herelines, \
 303             PL_parser->herelines = 0;                    \
 304     } STMT_END
 305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 306  * is no sublex_push to follow. */
 307 #define COPLINE_SET_FROM_MULTI_END            \
 308     STMT_START {                               \
 309         CopLINE_set(PL_curcop, PL_multi_end);   \
 310         if (PL_multi_end != PL_multi_start)      \
 311             PL_parser->herelines = 0;             \
 312     } STMT_END
 313
 314
 315 #ifdef DEBUGGING
 316
 317 /* how to interpret the pl_yylval associated with the token */
 318 enum token_type {
 319     TOKENTYPE_NONE,
 320     TOKENTYPE_IVAL,
 321     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 322     TOKENTYPE_PVAL,
 323     TOKENTYPE_OPVAL
 324 };
 325
 326 static struct debug_tokens {
 327     const int token;
 328     enum token_type type;
 329     const char *name;
 330 } const debug_tokens[] =
 331 {
 332     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 333     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 334     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 335     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 336     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 337     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 338     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 339     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 340     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 341     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 342     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 343     { DO,               TOKENTYPE_NONE,         "DO" },
 344     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 345     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 346     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 347     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 348     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 349     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 350     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 351     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 352     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 353     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 354     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 355     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 356     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 357     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 358     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 359     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 360     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 361     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 362     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 363     { IF,               TOKENTYPE_IVAL,         "IF" },
 364     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 365     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 366     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 367     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 368     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 369     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 370     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 371     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 372     { MY,               TOKENTYPE_IVAL,         "MY" },
 373     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 374     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 375     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 376     { OROR,             TOKENTYPE_NONE,         "OROR" },
 377     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 378     { PEG,              TOKENTYPE_NONE,         "PEG" },
 379     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 380     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 381     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 382     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 383     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 384     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 385     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 386     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 387     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 388     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 389     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 390     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 391     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 392     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 393     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 394     { SUB,              TOKENTYPE_NONE,         "SUB" },
 395     { THING,            TOKENTYPE_OPVAL,        "THING" },
 396     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 397     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 398     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 399     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 400     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 401     { USE,              TOKENTYPE_IVAL,         "USE" },
 402     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 403     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 404     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 405     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 406     { 0,                TOKENTYPE_NONE,         NULL }
 407 };
 408
 409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 410
 411 STATIC int
 412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 413 {
 414     dVAR;
 415
 416     PERL_ARGS_ASSERT_TOKEREPORT;
 417
 418     if (DEBUG_T_TEST) {
 419         const char *name = NULL;
 420         enum token_type type = TOKENTYPE_NONE;
 421         const struct debug_tokens *p;
 422         SV* const report = newSVpvs("<== ");
 423
 424         for (p = debug_tokens; p->token; p++) {
 425             if (p->token == (int)rv) {
 426                 name = p->name;
 427                 type = p->type;
 428                 break;
 429             }
 430         }
 431         if (name)
 432             Perl_sv_catpv(aTHX_ report, name);
 433         else if ((char)rv > ' ' && (char)rv <= '~')
 434         {
 435             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 436             if ((char)rv == 'p')
 437                 sv_catpvs(report, " (pending identifier)");
 438         }
 439         else if (!rv)
 440             sv_catpvs(report, "EOF");
 441         else
 442             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 443         switch (type) {
 444         case TOKENTYPE_NONE:
 445             break;
 446         case TOKENTYPE_IVAL:
 447             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 448             break;
 449         case TOKENTYPE_OPNUM:
 450             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 451                                     PL_op_name[lvalp->ival]);
 452             break;
 453         case TOKENTYPE_PVAL:
 454             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 455             break;
 456         case TOKENTYPE_OPVAL:
 457             if (lvalp->opval) {
 458                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 459                                     PL_op_name[lvalp->opval->op_type]);
 460                 if (lvalp->opval->op_type == OP_CONST) {
 461                     Perl_sv_catpvf(aTHX_ report, " %s",
 462                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 463                 }
 464
 465             }
 466             else
 467                 sv_catpvs(report, "(opval=null)");
 468             break;
 469         }
 470         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 471     };
 472     return (int)rv;
 473 }
 474
 475
 476 /* print the buffer with suitable escapes */
 477
 478 STATIC void
 479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 480 {
 481     SV* const tmp = newSVpvs("");
 482
 483     PERL_ARGS_ASSERT_PRINTBUF;
 484
 485     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 486     SvREFCNT_dec(tmp);
 487 }
 488
 489 #endif
 490
 491 static int
 492 S_deprecate_commaless_var_list(pTHX) {
 493     PL_expect = XTERM;
 494     deprecate("comma-less variable list");
 495     return REPORT(','); /* grandfather non-comma-format format */
 496 }
 497
 498 /*
 499  * S_ao
 500  *
 501  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 502  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 503  */
 504
 505 STATIC int
 506 S_ao(pTHX_ int toketype)
 507 {
 508     dVAR;
 509     if (*PL_bufptr == '=') {
 510         PL_bufptr++;
 511         if (toketype == ANDAND)
 512             pl_yylval.ival = OP_ANDASSIGN;
 513         else if (toketype == OROR)
 514             pl_yylval.ival = OP_ORASSIGN;
 515         else if (toketype == DORDOR)
 516             pl_yylval.ival = OP_DORASSIGN;
 517         toketype = ASSIGNOP;
 518     }
 519     return toketype;
 520 }
 521
 522 /*
 523  * S_no_op
 524  * When Perl expects an operator and finds something else, no_op
 525  * prints the warning.  It always prints "<something> found where
 526  * operator expected.  It prints "Missing semicolon on previous line?"
 527  * if the surprise occurs at the start of the line.  "do you need to
 528  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 529  * where the compiler doesn't know if foo is a method call or a function.
 530  * It prints "Missing operator before end of line" if there's nothing
 531  * after the missing operator, or "... before <...>" if there is something
 532  * after the missing operator.
 533  */
 534
 535 STATIC void
 536 S_no_op(pTHX_ const char *const what, char *s)
 537 {
 538     dVAR;
 539     char * const oldbp = PL_bufptr;
 540     const bool is_first = (PL_oldbufptr == PL_linestart);
 541
 542     PERL_ARGS_ASSERT_NO_OP;
 543
 544     if (!s)
 545         s = oldbp;
 546     else
 547         PL_bufptr = s;
 548     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 549     if (ckWARN_d(WARN_SYNTAX)) {
 550         if (is_first)
 551             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 552                     "\t(Missing semicolon on previous line?)\n");
 553         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 554             const char *t;
 555             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 556                                                             t += UTF ? UTF8SKIP(t) : 1)
 557                 NOOP;
 558             if (t < PL_bufptr && isSPACE(*t))
 559                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 560                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 561                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 562         }
 563         else {
 564             assert(s >= oldbp);
 565             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 566                     "\t(Missing operator before %"UTF8f"?)\n",
 567                      UTF8fARG(UTF, s - oldbp, oldbp));
 568         }
 569     }
 570     PL_bufptr = oldbp;
 571 }
 572
 573 /*
 574  * S_missingterm
 575  * Complain about missing quote/regexp/heredoc terminator.
 576  * If it's called with NULL then it cauterizes the line buffer.
 577  * If we're in a delimited string and the delimiter is a control
 578  * character, it's reformatted into a two-char sequence like ^C.
 579  * This is fatal.
 580  */
 581
 582 STATIC void
 583 S_missingterm(pTHX_ char *s)
 584 {
 585     dVAR;
 586     char tmpbuf[3];
 587     char q;
 588     if (s) {
 589         char * const nl = strrchr(s,'\n');
 590         if (nl)
 591             *nl = '\0';
 592     }
 593     else if ((U8) PL_multi_close < 32) {
 594         *tmpbuf = '^';
 595         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 596         tmpbuf[2] = '\0';
 597         s = tmpbuf;
 598     }
 599     else {
 600         *tmpbuf = (char)PL_multi_close;
 601         tmpbuf[1] = '\0';
 602         s = tmpbuf;
 603     }
 604     q = strchr(s,'"') ? '\'' : '"';
 605     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 606 }
 607
 608 #include "feature.h"
 609
 610 /*
 611  * Check whether the named feature is enabled.
 612  */
 613 bool
 614 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 615 {
 616     dVAR;
 617     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 618
 619     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 620
 621     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 622
 623     if (namelen > MAX_FEATURE_LEN)
 624         return FALSE;
 625     memcpy(&he_name[8], name, namelen);
 626
 627     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 628                                      REFCOUNTED_HE_EXISTS));
 629 }
 630
 631 /*
 632  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 633  * utf16-to-utf8-reversed.
 634  */
 635
 636 #ifdef PERL_CR_FILTER
 637 static void
 638 strip_return(SV *sv)
 639 {
 640     const char *s = SvPVX_const(sv);
 641     const char * const e = s + SvCUR(sv);
 642
 643     PERL_ARGS_ASSERT_STRIP_RETURN;
 644
 645     /* outer loop optimized to do nothing if there are no CR-LFs */
 646     while (s < e) {
 647         if (*s++ == '\r' && *s == '\n') {
 648             /* hit a CR-LF, need to copy the rest */
 649             char *d = s - 1;
 650             *d++ = *s++;
 651             while (s < e) {
 652                 if (*s == '\r' && s[1] == '\n')
 653                     s++;
 654                 *d++ = *s++;
 655             }
 656             SvCUR(sv) -= s - d;
 657             return;
 658         }
 659     }
 660 }
 661
 662 STATIC I32
 663 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 664 {
 665     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 666     if (count > 0 && !maxlen)
 667         strip_return(sv);
 668     return count;
 669 }
 670 #endif
 671
 672 /*
 673 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 674
 675 Creates and initialises a new lexer/parser state object, supplying
 676 a context in which to lex and parse from a new source of Perl code.
 677 A pointer to the new state object is placed in L</PL_parser>.  An entry
 678 is made on the save stack so that upon unwinding the new state object
 679 will be destroyed and the former value of L</PL_parser> will be restored.
 680 Nothing else need be done to clean up the parsing context.
 681
 682 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 683 non-null, provides a string (in SV form) containing code to be parsed.
 684 A copy of the string is made, so subsequent modification of I<line>
 685 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 686 from which code will be read to be parsed.  If both are non-null, the
 687 code in I<line> comes first and must consist of complete lines of input,
 688 and I<rsfp> supplies the remainder of the source.
 689
 690 The I<flags> parameter is reserved for future use.  Currently it is only
 691 used by perl internally, so extensions should always pass zero.
 692
 693 =cut
 694 */
 695
 696 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 697    can share filters with the current parser.
 698    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 699    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 700    destruction. This is used to handle the case of defaulting to reading the
 701    script from the standard input because no filename was given on the command
 702    line (without getting confused by situation where STDIN has been closed, so
 703    the script handle is opened on fd 0)  */
 704
 705 void
 706 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 707 {
 708     dVAR;
 709     const char *s = NULL;
 710     yy_parser *parser, *oparser;
 711     if (flags && flags & ~LEX_START_FLAGS)
 712         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 713
 714     /* create and initialise a parser */
 715
 716     Newxz(parser, 1, yy_parser);
 717     parser->old_parser = oparser = PL_parser;
 718     PL_parser = parser;
 719
 720     parser->stack = NULL;
 721     parser->ps = NULL;
 722     parser->stack_size = 0;
 723
 724     /* on scope exit, free this parser and restore any outer one */
 725     SAVEPARSER(parser);
 726     parser->saved_curcop = PL_curcop;
 727
 728     /* initialise lexer state */
 729
 730 #ifdef PERL_MAD
 731     parser->curforce = -1;
 732 #else
 733     parser->nexttoke = 0;
 734 #endif
 735     parser->error_count = oparser ? oparser->error_count : 0;
 736     parser->copline = parser->preambling = NOLINE;
 737     parser->lex_state = LEX_NORMAL;
 738     parser->expect = XSTATE;
 739     parser->rsfp = rsfp;
 740     parser->rsfp_filters =
 741       !(flags & LEX_START_SAME_FILTER) || !oparser
 742         ? NULL
 743         : MUTABLE_AV(SvREFCNT_inc(
 744             oparser->rsfp_filters
 745              ? oparser->rsfp_filters
 746              : (oparser->rsfp_filters = newAV())
 747           ));
 748
 749     Newx(parser->lex_brackstack, 120, char);
 750     Newx(parser->lex_casestack, 12, char);
 751     *parser->lex_casestack = '\0';
 752     Newxz(parser->lex_shared, 1, LEXSHARED);
 753
 754     if (line) {
 755         STRLEN len;
 756         s = SvPV_const(line, len);
 757         parser->linestr = flags & LEX_START_COPIED
 758                             ? SvREFCNT_inc_simple_NN(line)
 759                             : newSVpvn_flags(s, len, SvUTF8(line));
 760         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 761     } else {
 762         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 763     }
 764     parser->oldoldbufptr =
 765         parser->oldbufptr =
 766         parser->bufptr =
 767         parser->linestart = SvPVX(parser->linestr);
 768     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 769     parser->last_lop = parser->last_uni = NULL;
 770     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 771                                  |LEX_DONT_CLOSE_RSFP);
 772
 773     parser->in_pod = parser->filtered = 0;
 774 }
 775
 776
 777 /* delete a parser object */
 778
 779 void
 780 Perl_parser_free(pTHX_  const yy_parser *parser)
 781 {
 782     PERL_ARGS_ASSERT_PARSER_FREE;
 783
 784     PL_curcop = parser->saved_curcop;
 785     SvREFCNT_dec(parser->linestr);
 786
 787     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 788         PerlIO_clearerr(parser->rsfp);
 789     else if (parser->rsfp && (!parser->old_parser ||
 790                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 791         PerlIO_close(parser->rsfp);
 792     SvREFCNT_dec(parser->rsfp_filters);
 793     SvREFCNT_dec(parser->lex_stuff);
 794     SvREFCNT_dec(parser->sublex_info.repl);
 795
 796     Safefree(parser->lex_brackstack);
 797     Safefree(parser->lex_casestack);
 798     Safefree(parser->lex_shared);
 799     PL_parser = parser->old_parser;
 800     Safefree(parser);
 801 }
 802
 803 void
 804 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 805 {
 806 #ifdef PERL_MAD
 807     I32 nexttoke = parser->lasttoke;
 808 #else
 809     I32 nexttoke = parser->nexttoke;
 810 #endif
 811     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 812     while (nexttoke--) {
 813 #ifdef PERL_MAD
 814         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 815                                 & 0xffff)
 816          && parser->nexttoke[nexttoke].next_val.opval
 817          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 818          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 819                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 820                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 821         }
 822 #else
 823         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 824          && parser->nextval[nexttoke].opval
 825          && parser->nextval[nexttoke].opval->op_slabbed
 826          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 827             op_free(parser->nextval[nexttoke].opval);
 828             parser->nextval[nexttoke].opval = NULL;
 829         }
 830 #endif
 831     }
 832 }
 833
 834
 835 /*
 836 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 837
 838 Buffer scalar containing the chunk currently under consideration of the
 839 text currently being lexed.  This is always a plain string scalar (for
 840 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 841 normal scalar means; instead refer to the buffer directly by the pointer
 842 variables described below.
 843
 844 The lexer maintains various C<char*> pointers to things in the
 845 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 846 reallocated, all of these pointers must be updated.  Don't attempt to
 847 do this manually, but rather use L</lex_grow_linestr> if you need to
 848 reallocate the buffer.
 849
 850 The content of the text chunk in the buffer is commonly exactly one
 851 complete line of input, up to and including a newline terminator,
 852 but there are situations where it is otherwise.  The octets of the
 853 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 854 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 855 flag on this scalar, which may disagree with it.
 856
 857 For direct examination of the buffer, the variable
 858 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 859 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 860 of these pointers is usually preferable to examination of the scalar
 861 through normal scalar means.
 862
 863 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 864
 865 Direct pointer to the end of the chunk of text currently being lexed, the
 866 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 867 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 868 always located at the end of the buffer, and does not count as part of
 869 the buffer's contents.
 870
 871 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 872
 873 Points to the current position of lexing inside the lexer buffer.
 874 Characters around this point may be freely examined, within
 875 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 876 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 877 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 878
 879 Lexing code (whether in the Perl core or not) moves this pointer past
 880 the characters that it consumes.  It is also expected to perform some
 881 bookkeeping whenever a newline character is consumed.  This movement
 882 can be more conveniently performed by the function L</lex_read_to>,
 883 which handles newlines appropriately.
 884
 885 Interpretation of the buffer's octets can be abstracted out by
 886 using the slightly higher-level functions L</lex_peek_unichar> and
 887 L</lex_read_unichar>.
 888
 889 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 890
 891 Points to the start of the current line inside the lexer buffer.
 892 This is useful for indicating at which column an error occurred, and
 893 not much else.  This must be updated by any lexing code that consumes
 894 a newline; the function L</lex_read_to> handles this detail.
 895
 896 =cut
 897 */
 898
 899 /*
 900 =for apidoc Amx|bool|lex_bufutf8
 901
 902 Indicates whether the octets in the lexer buffer
 903 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 904 of Unicode characters.  If not, they should be interpreted as Latin-1
 905 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 906
 907 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 908 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 909 encoding.
 910
 911 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 912 is significant, but not the whole story regarding the input character
 913 encoding.  Normally, when a file is being read, the scalar contains octets
 914 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 915 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 916 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 917 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 918 is in effect.  This logic may change in the future; use this function
 919 instead of implementing the logic yourself.
 920
 921 =cut
 922 */
 923
 924 bool
 925 Perl_lex_bufutf8(pTHX)
 926 {
 927     return UTF;
 928 }
 929
 930 /*
 931 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 932
 933 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 934 at least I<len> octets (including terminating NUL).  Returns a
 935 pointer to the reallocated buffer.  This is necessary before making
 936 any direct modification of the buffer that would increase its length.
 937 L</lex_stuff_pvn> provides a more convenient way to insert text into
 938 the buffer.
 939
 940 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 941 this function updates all of the lexer's variables that point directly
 942 into the buffer.
 943
 944 =cut
 945 */
 946
 947 char *
 948 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 949 {
 950     SV *linestr;
 951     char *buf;
 952     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 953     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 954     linestr = PL_parser->linestr;
 955     buf = SvPVX(linestr);
 956     if (len <= SvLEN(linestr))
 957         return buf;
 958     bufend_pos = PL_parser->bufend - buf;
 959     bufptr_pos = PL_parser->bufptr - buf;
 960     oldbufptr_pos = PL_parser->oldbufptr - buf;
 961     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 962     linestart_pos = PL_parser->linestart - buf;
 963     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 964     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 965     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 966                             PL_parser->lex_shared->re_eval_start - buf : 0;
 967
 968     buf = sv_grow(linestr, len);
 969
 970     PL_parser->bufend = buf + bufend_pos;
 971     PL_parser->bufptr = buf + bufptr_pos;
 972     PL_parser->oldbufptr = buf + oldbufptr_pos;
 973     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 974     PL_parser->linestart = buf + linestart_pos;
 975     if (PL_parser->last_uni)
 976         PL_parser->last_uni = buf + last_uni_pos;
 977     if (PL_parser->last_lop)
 978         PL_parser->last_lop = buf + last_lop_pos;
 979     if (PL_parser->lex_shared->re_eval_start)
 980         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 981     return buf;
 982 }
 983
 984 /*
 985 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 986
 987 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 988 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 989 reallocating the buffer if necessary.  This means that lexing code that
 990 runs later will see the characters as if they had appeared in the input.
 991 It is not recommended to do this as part of normal parsing, and most
 992 uses of this facility run the risk of the inserted characters being
 993 interpreted in an unintended manner.
 994
 995 The string to be inserted is represented by I<len> octets starting
 996 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 997 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 998 The characters are recoded for the lexer buffer, according to how the
 999 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1000 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1001 function is more convenient.
1002
1003 =cut
1004 */
1005
1006 void
1007 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1008 {
1009     dVAR;
1010     char *bufptr;
1011     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1012     if (flags & ~(LEX_STUFF_UTF8))
1013         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1014     if (UTF) {
1015         if (flags & LEX_STUFF_UTF8) {
1016             goto plain_copy;
1017         } else {
1018             STRLEN highhalf = 0;    /* Count of variants */
1019             const char *p, *e = pv+len;
1020             for (p = pv; p != e; p++) {
1021                 if (! UTF8_IS_INVARIANT(*p)) {
1022                     highhalf++;
1023                 }
1024             }
1025             if (!highhalf)
1026                 goto plain_copy;
1027             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1028             bufptr = PL_parser->bufptr;
1029             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1030             SvCUR_set(PL_parser->linestr,
1031                 SvCUR(PL_parser->linestr) + len+highhalf);
1032             PL_parser->bufend += len+highhalf;
1033             for (p = pv; p != e; p++) {
1034                 U8 c = (U8)*p;
1035                 if (! UTF8_IS_INVARIANT(c)) {
1036                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1037                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1038                 } else {
1039                     *bufptr++ = (char)c;
1040                 }
1041             }
1042         }
1043     } else {
1044         if (flags & LEX_STUFF_UTF8) {
1045             STRLEN highhalf = 0;
1046             const char *p, *e = pv+len;
1047             for (p = pv; p != e; p++) {
1048                 U8 c = (U8)*p;
1049                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1050                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1051                                 "non-Latin-1 character into Latin-1 input");
1052                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1053                     p++;
1054                     highhalf++;
1055                 } else if (! UTF8_IS_INVARIANT(c)) {
1056                     /* malformed UTF-8 */
1057                     ENTER;
1058                     SAVESPTR(PL_warnhook);
1059                     PL_warnhook = PERL_WARNHOOK_FATAL;
1060                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1061                     LEAVE;
1062                 }
1063             }
1064             if (!highhalf)
1065                 goto plain_copy;
1066             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1067             bufptr = PL_parser->bufptr;
1068             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1069             SvCUR_set(PL_parser->linestr,
1070                 SvCUR(PL_parser->linestr) + len-highhalf);
1071             PL_parser->bufend += len-highhalf;
1072             p = pv;
1073             while (p < e) {
1074                 if (UTF8_IS_INVARIANT(*p)) {
1075                     *bufptr++ = *p;
1076                     p++;
1077                 }
1078                 else {
1079                     assert(p < e -1 );
1080                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1081                     p += 2;
1082                 }
1083             }
1084         } else {
1085           plain_copy:
1086             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1087             bufptr = PL_parser->bufptr;
1088             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1089             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1090             PL_parser->bufend += len;
1091             Copy(pv, bufptr, len, char);
1092         }
1093     }
1094 }
1095
1096 /*
1097 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1098
1099 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1100 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1101 reallocating the buffer if necessary.  This means that lexing code that
1102 runs later will see the characters as if they had appeared in the input.
1103 It is not recommended to do this as part of normal parsing, and most
1104 uses of this facility run the risk of the inserted characters being
1105 interpreted in an unintended manner.
1106
1107 The string to be inserted is represented by octets starting at I<pv>
1108 and continuing to the first nul.  These octets are interpreted as either
1109 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1110 in I<flags>.  The characters are recoded for the lexer buffer, according
1111 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1112 If it is not convenient to nul-terminate a string to be inserted, the
1113 L</lex_stuff_pvn> function is more appropriate.
1114
1115 =cut
1116 */
1117
1118 void
1119 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1120 {
1121     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1122     lex_stuff_pvn(pv, strlen(pv), flags);
1123 }
1124
1125 /*
1126 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1127
1128 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1129 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1130 reallocating the buffer if necessary.  This means that lexing code that
1131 runs later will see the characters as if they had appeared in the input.
1132 It is not recommended to do this as part of normal parsing, and most
1133 uses of this facility run the risk of the inserted characters being
1134 interpreted in an unintended manner.
1135
1136 The string to be inserted is the string value of I<sv>.  The characters
1137 are recoded for the lexer buffer, according to how the buffer is currently
1138 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1139 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1140 need to construct a scalar.
1141
1142 =cut
1143 */
1144
1145 void
1146 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1147 {
1148     char *pv;
1149     STRLEN len;
1150     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1151     if (flags)
1152         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1153     pv = SvPV(sv, len);
1154     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1155 }
1156
1157 /*
1158 =for apidoc Amx|void|lex_unstuff|char *ptr
1159
1160 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1161 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1162 This hides the discarded text from any lexing code that runs later,
1163 as if the text had never appeared.
1164
1165 This is not the normal way to consume lexed text.  For that, use
1166 L</lex_read_to>.
1167
1168 =cut
1169 */
1170
1171 void
1172 Perl_lex_unstuff(pTHX_ char *ptr)
1173 {
1174     char *buf, *bufend;
1175     STRLEN unstuff_len;
1176     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1177     buf = PL_parser->bufptr;
1178     if (ptr < buf)
1179         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1180     if (ptr == buf)
1181         return;
1182     bufend = PL_parser->bufend;
1183     if (ptr > bufend)
1184         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185     unstuff_len = ptr - buf;
1186     Move(ptr, buf, bufend+1-ptr, char);
1187     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1188     PL_parser->bufend = bufend - unstuff_len;
1189 }
1190
1191 /*
1192 =for apidoc Amx|void|lex_read_to|char *ptr
1193
1194 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1195 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1196 performing the correct bookkeeping whenever a newline character is passed.
1197 This is the normal way to consume lexed text.
1198
1199 Interpretation of the buffer's octets can be abstracted out by
1200 using the slightly higher-level functions L</lex_peek_unichar> and
1201 L</lex_read_unichar>.
1202
1203 =cut
1204 */
1205
1206 void
1207 Perl_lex_read_to(pTHX_ char *ptr)
1208 {
1209     char *s;
1210     PERL_ARGS_ASSERT_LEX_READ_TO;
1211     s = PL_parser->bufptr;
1212     if (ptr < s || ptr > PL_parser->bufend)
1213         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1214     for (; s != ptr; s++)
1215         if (*s == '\n') {
1216             COPLINE_INC_WITH_HERELINES;
1217             PL_parser->linestart = s+1;
1218         }
1219     PL_parser->bufptr = ptr;
1220 }
1221
1222 /*
1223 =for apidoc Amx|void|lex_discard_to|char *ptr
1224
1225 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1226 up to I<ptr>.  The remaining content of the buffer will be moved, and
1227 all pointers into the buffer updated appropriately.  I<ptr> must not
1228 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1229 it is not permitted to discard text that has yet to be lexed.
1230
1231 Normally it is not necessarily to do this directly, because it suffices to
1232 use the implicit discarding behaviour of L</lex_next_chunk> and things
1233 based on it.  However, if a token stretches across multiple lines,
1234 and the lexing code has kept multiple lines of text in the buffer for
1235 that purpose, then after completion of the token it would be wise to
1236 explicitly discard the now-unneeded earlier lines, to avoid future
1237 multi-line tokens growing the buffer without bound.
1238
1239 =cut
1240 */
1241
1242 void
1243 Perl_lex_discard_to(pTHX_ char *ptr)
1244 {
1245     char *buf;
1246     STRLEN discard_len;
1247     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1248     buf = SvPVX(PL_parser->linestr);
1249     if (ptr < buf)
1250         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1251     if (ptr == buf)
1252         return;
1253     if (ptr > PL_parser->bufptr)
1254         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1255     discard_len = ptr - buf;
1256     if (PL_parser->oldbufptr < ptr)
1257         PL_parser->oldbufptr = ptr;
1258     if (PL_parser->oldoldbufptr < ptr)
1259         PL_parser->oldoldbufptr = ptr;
1260     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1261         PL_parser->last_uni = NULL;
1262     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1263         PL_parser->last_lop = NULL;
1264     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1265     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1266     PL_parser->bufend -= discard_len;
1267     PL_parser->bufptr -= discard_len;
1268     PL_parser->oldbufptr -= discard_len;
1269     PL_parser->oldoldbufptr -= discard_len;
1270     if (PL_parser->last_uni)
1271         PL_parser->last_uni -= discard_len;
1272     if (PL_parser->last_lop)
1273         PL_parser->last_lop -= discard_len;
1274 }
1275
1276 /*
1277 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1278
1279 Reads in the next chunk of text to be lexed, appending it to
1280 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1281 looked to the end of the current chunk and wants to know more.  It is
1282 usual, but not necessary, for lexing to have consumed the entirety of
1283 the current chunk at this time.
1284
1285 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1286 chunk (i.e., the current chunk has been entirely consumed), normally the
1287 current chunk will be discarded at the same time that the new chunk is
1288 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1289 will not be discarded.  If the current chunk has not been entirely
1290 consumed, then it will not be discarded regardless of the flag.
1291
1292 Returns true if some new text was added to the buffer, or false if the
1293 buffer has reached the end of the input text.
1294
1295 =cut
1296 */
1297
1298 #define LEX_FAKE_EOF 0x80000000
1299 #define LEX_NO_TERM  0x40000000
1300
1301 bool
1302 Perl_lex_next_chunk(pTHX_ U32 flags)
1303 {
1304     SV *linestr;
1305     char *buf;
1306     STRLEN old_bufend_pos, new_bufend_pos;
1307     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1308     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1309     bool got_some_for_debugger = 0;
1310     bool got_some;
1311     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1312         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1313     linestr = PL_parser->linestr;
1314     buf = SvPVX(linestr);
1315     if (!(flags & LEX_KEEP_PREVIOUS) &&
1316             PL_parser->bufptr == PL_parser->bufend) {
1317         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1318         linestart_pos = 0;
1319         if (PL_parser->last_uni != PL_parser->bufend)
1320             PL_parser->last_uni = NULL;
1321         if (PL_parser->last_lop != PL_parser->bufend)
1322             PL_parser->last_lop = NULL;
1323         last_uni_pos = last_lop_pos = 0;
1324         *buf = 0;
1325         SvCUR(linestr) = 0;
1326     } else {
1327         old_bufend_pos = PL_parser->bufend - buf;
1328         bufptr_pos = PL_parser->bufptr - buf;
1329         oldbufptr_pos = PL_parser->oldbufptr - buf;
1330         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1331         linestart_pos = PL_parser->linestart - buf;
1332         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1333         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1334     }
1335     if (flags & LEX_FAKE_EOF) {
1336         goto eof;
1337     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1338         got_some = 0;
1339     } else if (filter_gets(linestr, old_bufend_pos)) {
1340         got_some = 1;
1341         got_some_for_debugger = 1;
1342     } else if (flags & LEX_NO_TERM) {
1343         got_some = 0;
1344     } else {
1345         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1346             sv_setpvs(linestr, "");
1347         eof:
1348         /* End of real input.  Close filehandle (unless it was STDIN),
1349          * then add implicit termination.
1350          */
1351         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1352             PerlIO_clearerr(PL_parser->rsfp);
1353         else if (PL_parser->rsfp)
1354             (void)PerlIO_close(PL_parser->rsfp);
1355         PL_parser->rsfp = NULL;
1356         PL_parser->in_pod = PL_parser->filtered = 0;
1357 #ifdef PERL_MAD
1358         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1359             PL_faketokens = 1;
1360 #endif
1361         if (!PL_in_eval && PL_minus_p) {
1362             sv_catpvs(linestr,
1363                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1364             PL_minus_n = PL_minus_p = 0;
1365         } else if (!PL_in_eval && PL_minus_n) {
1366             sv_catpvs(linestr, /*{*/";}");
1367             PL_minus_n = 0;
1368         } else
1369             sv_catpvs(linestr, ";");
1370         got_some = 1;
1371     }
1372     buf = SvPVX(linestr);
1373     new_bufend_pos = SvCUR(linestr);
1374     PL_parser->bufend = buf + new_bufend_pos;
1375     PL_parser->bufptr = buf + bufptr_pos;
1376     PL_parser->oldbufptr = buf + oldbufptr_pos;
1377     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1378     PL_parser->linestart = buf + linestart_pos;
1379     if (PL_parser->last_uni)
1380         PL_parser->last_uni = buf + last_uni_pos;
1381     if (PL_parser->last_lop)
1382         PL_parser->last_lop = buf + last_lop_pos;
1383     if (PL_parser->preambling != NOLINE) {
1384         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1385         PL_parser->preambling = NOLINE;
1386     }
1387     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1388             PL_curstash != PL_debstash) {
1389         /* debugger active and we're not compiling the debugger code,
1390          * so store the line into the debugger's array of lines
1391          */
1392         update_debugger_info(NULL, buf+old_bufend_pos,
1393             new_bufend_pos-old_bufend_pos);
1394     }
1395     return got_some;
1396 }
1397
1398 /*
1399 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1400
1401 Looks ahead one (Unicode) character in the text currently being lexed.
1402 Returns the codepoint (unsigned integer value) of the next character,
1403 or -1 if lexing has reached the end of the input text.  To consume the
1404 peeked character, use L</lex_read_unichar>.
1405
1406 If the next character is in (or extends into) the next chunk of input
1407 text, the next chunk will be read in.  Normally the current chunk will be
1408 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1409 then the current chunk will not be discarded.
1410
1411 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1412 is encountered, an exception is generated.
1413
1414 =cut
1415 */
1416
1417 I32
1418 Perl_lex_peek_unichar(pTHX_ U32 flags)
1419 {
1420     dVAR;
1421     char *s, *bufend;
1422     if (flags & ~(LEX_KEEP_PREVIOUS))
1423         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1424     s = PL_parser->bufptr;
1425     bufend = PL_parser->bufend;
1426     if (UTF) {
1427         U8 head;
1428         I32 unichar;
1429         STRLEN len, retlen;
1430         if (s == bufend) {
1431             if (!lex_next_chunk(flags))
1432                 return -1;
1433             s = PL_parser->bufptr;
1434             bufend = PL_parser->bufend;
1435         }
1436         head = (U8)*s;
1437         if (UTF8_IS_INVARIANT(head))
1438             return head;
1439         if (UTF8_IS_START(head)) {
1440             len = UTF8SKIP(&head);
1441             while ((STRLEN)(bufend-s) < len) {
1442                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1443                     break;
1444                 s = PL_parser->bufptr;
1445                 bufend = PL_parser->bufend;
1446             }
1447         }
1448         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1449         if (retlen == (STRLEN)-1) {
1450             /* malformed UTF-8 */
1451             ENTER;
1452             SAVESPTR(PL_warnhook);
1453             PL_warnhook = PERL_WARNHOOK_FATAL;
1454             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1455             LEAVE;
1456         }
1457         return unichar;
1458     } else {
1459         if (s == bufend) {
1460             if (!lex_next_chunk(flags))
1461                 return -1;
1462             s = PL_parser->bufptr;
1463         }
1464         return (U8)*s;
1465     }
1466 }
1467
1468 /*
1469 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1470
1471 Reads the next (Unicode) character in the text currently being lexed.
1472 Returns the codepoint (unsigned integer value) of the character read,
1473 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1474 if lexing has reached the end of the input text.  To non-destructively
1475 examine the next character, use L</lex_peek_unichar> instead.
1476
1477 If the next character is in (or extends into) the next chunk of input
1478 text, the next chunk will be read in.  Normally the current chunk will be
1479 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1480 then the current chunk will not be discarded.
1481
1482 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1483 is encountered, an exception is generated.
1484
1485 =cut
1486 */
1487
1488 I32
1489 Perl_lex_read_unichar(pTHX_ U32 flags)
1490 {
1491     I32 c;
1492     if (flags & ~(LEX_KEEP_PREVIOUS))
1493         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1494     c = lex_peek_unichar(flags);
1495     if (c != -1) {
1496         if (c == '\n')
1497             COPLINE_INC_WITH_HERELINES;
1498         if (UTF)
1499             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1500         else
1501             ++(PL_parser->bufptr);
1502     }
1503     return c;
1504 }
1505
1506 /*
1507 =for apidoc Amx|void|lex_read_space|U32 flags
1508
1509 Reads optional spaces, in Perl style, in the text currently being
1510 lexed.  The spaces may include ordinary whitespace characters and
1511 Perl-style comments.  C<#line> directives are processed if encountered.
1512 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1513 at a non-space character (or the end of the input text).
1514
1515 If spaces extend into the next chunk of input text, the next chunk will
1516 be read in.  Normally the current chunk will be discarded at the same
1517 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1518 chunk will not be discarded.
1519
1520 =cut
1521 */
1522
1523 #define LEX_NO_INCLINE    0x40000000
1524 #define LEX_NO_NEXT_CHUNK 0x80000000
1525
1526 void
1527 Perl_lex_read_space(pTHX_ U32 flags)
1528 {
1529     char *s, *bufend;
1530     const bool can_incline = !(flags & LEX_NO_INCLINE);
1531     bool need_incline = 0;
1532     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1533         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1534 #ifdef PERL_MAD
1535     if (PL_skipwhite) {
1536         sv_free(PL_skipwhite);
1537         PL_skipwhite = NULL;
1538     }
1539     if (PL_madskills)
1540         PL_skipwhite = newSVpvs("");
1541 #endif /* PERL_MAD */
1542     s = PL_parser->bufptr;
1543     bufend = PL_parser->bufend;
1544     while (1) {
1545         char c = *s;
1546         if (c == '#') {
1547             do {
1548                 c = *++s;
1549             } while (!(c == '\n' || (c == 0 && s == bufend)));
1550         } else if (c == '\n') {
1551             s++;
1552             if (can_incline) {
1553                 PL_parser->linestart = s;
1554                 if (s == bufend)
1555                     need_incline = 1;
1556                 else
1557                     incline(s);
1558             }
1559         } else if (isSPACE(c)) {
1560             s++;
1561         } else if (c == 0 && s == bufend) {
1562             bool got_more;
1563             line_t l;
1564 #ifdef PERL_MAD
1565             if (PL_madskills)
1566                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1567 #endif /* PERL_MAD */
1568             if (flags & LEX_NO_NEXT_CHUNK)
1569                 break;
1570             PL_parser->bufptr = s;
1571             l = CopLINE(PL_curcop);
1572             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1573             got_more = lex_next_chunk(flags);
1574             CopLINE_set(PL_curcop, l);
1575             s = PL_parser->bufptr;
1576             bufend = PL_parser->bufend;
1577             if (!got_more)
1578                 break;
1579             if (can_incline && need_incline && PL_parser->rsfp) {
1580                 incline(s);
1581                 need_incline = 0;
1582             }
1583         } else {
1584             break;
1585         }
1586     }
1587 #ifdef PERL_MAD
1588     if (PL_madskills)
1589         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1590 #endif /* PERL_MAD */
1591     PL_parser->bufptr = s;
1592 }
1593
1594 /*
1595
1596 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1597
1598 This function performs syntax checking on a prototype, C<proto>.
1599 If C<warn> is true, any illegal characters or mismatched brackets
1600 will trigger illegalproto warnings, declaring that they were
1601 detected in the prototype for C<name>.
1602
1603 The return value is C<true> if this is a valid prototype, and
1604 C<false> if it is not, regardless of whether C<warn> was C<true> or
1605 C<false>.
1606
1607 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1608
1609 =cut
1610
1611  */
1612
1613 bool
1614 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1615 {
1616     STRLEN len, origlen;
1617     char *p = proto ? SvPV(proto, len) : NULL;
1618     bool bad_proto = FALSE;
1619     bool in_brackets = FALSE;
1620     bool after_slash = FALSE;
1621     char greedy_proto = ' ';
1622     bool proto_after_greedy_proto = FALSE;
1623     bool must_be_last = FALSE;
1624     bool underscore = FALSE;
1625     bool bad_proto_after_underscore = FALSE;
1626
1627     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1628
1629     if (!proto)
1630         return TRUE;
1631
1632     origlen = len;
1633     for (; len--; p++) {
1634         if (!isSPACE(*p)) {
1635             if (must_be_last)
1636                 proto_after_greedy_proto = TRUE;
1637             if (underscore) {
1638                 if (!strchr(";@%", *p))
1639                     bad_proto_after_underscore = TRUE;
1640                 underscore = FALSE;
1641             }
1642             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1643                 bad_proto = TRUE;
1644             }
1645             else {
1646                 if (*p == '[')
1647                     in_brackets = TRUE;
1648                 else if (*p == ']')
1649                     in_brackets = FALSE;
1650                 else if ((*p == '@' || *p == '%') &&
1651                     !after_slash &&
1652                     !in_brackets ) {
1653                     must_be_last = TRUE;
1654                     greedy_proto = *p;
1655                 }
1656                 else if (*p == '_')
1657                     underscore = TRUE;
1658             }
1659             if (*p == '\\')
1660                 after_slash = TRUE;
1661             else
1662                 after_slash = FALSE;
1663         }
1664     }
1665
1666     if (warn) {
1667         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1668         p -= origlen;
1669         p = SvUTF8(proto)
1670             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1671                              origlen, UNI_DISPLAY_ISPRINT)
1672             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1673
1674         if (proto_after_greedy_proto)
1675             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1676                         "Prototype after '%c' for %"SVf" : %s",
1677                         greedy_proto, SVfARG(name), p);
1678         if (in_brackets)
1679             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1680                         "Missing ']' in prototype for %"SVf" : %s",
1681                         SVfARG(name), p);
1682         if (bad_proto)
1683             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1684                         "Illegal character in prototype for %"SVf" : %s",
1685                         SVfARG(name), p);
1686         if (bad_proto_after_underscore)
1687             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1688                         "Illegal character after '_' in prototype for %"SVf" : %s",
1689                         SVfARG(name), p);
1690     }
1691
1692     return (! (proto_after_greedy_proto || bad_proto) );
1693 }
1694
1695 /*
1696  * S_incline
1697  * This subroutine has nothing to do with tilting, whether at windmills
1698  * or pinball tables.  Its name is short for "increment line".  It
1699  * increments the current line number in CopLINE(PL_curcop) and checks
1700  * to see whether the line starts with a comment of the form
1701  *    # line 500 "foo.pm"
1702  * If so, it sets the current line number and file to the values in the comment.
1703  */
1704
1705 STATIC void
1706 S_incline(pTHX_ const char *s)
1707 {
1708     dVAR;
1709     const char *t;
1710     const char *n;
1711     const char *e;
1712     line_t line_num;
1713
1714     PERL_ARGS_ASSERT_INCLINE;
1715
1716     COPLINE_INC_WITH_HERELINES;
1717     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1718      && s+1 == PL_bufend && *s == ';') {
1719         /* fake newline in string eval */
1720         CopLINE_dec(PL_curcop);
1721         return;
1722     }
1723     if (*s++ != '#')
1724         return;
1725     while (SPACE_OR_TAB(*s))
1726         s++;
1727     if (strnEQ(s, "line", 4))
1728         s += 4;
1729     else
1730         return;
1731     if (SPACE_OR_TAB(*s))
1732         s++;
1733     else
1734         return;
1735     while (SPACE_OR_TAB(*s))
1736         s++;
1737     if (!isDIGIT(*s))
1738         return;
1739
1740     n = s;
1741     while (isDIGIT(*s))
1742         s++;
1743     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1744         return;
1745     while (SPACE_OR_TAB(*s))
1746         s++;
1747     if (*s == '"' && (t = strchr(s+1, '"'))) {
1748         s++;
1749         e = t + 1;
1750     }
1751     else {
1752         t = s;
1753         while (!isSPACE(*t))
1754             t++;
1755         e = t;
1756     }
1757     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1758         e++;
1759     if (*e != '\n' && *e != '\0')
1760         return;         /* false alarm */
1761
1762     line_num = atoi(n)-1;
1763
1764     if (t - s > 0) {
1765         const STRLEN len = t - s;
1766
1767         if (!PL_rsfp && !PL_parser->filtered) {
1768             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1769              * to *{"::_<newfilename"} */
1770             /* However, the long form of evals is only turned on by the
1771                debugger - usually they're "(eval %lu)" */
1772             GV * const cfgv = CopFILEGV(PL_curcop);
1773             if (cfgv) {
1774                 char smallbuf[128];
1775                 STRLEN tmplen2 = len;
1776                 char *tmpbuf2;
1777                 GV *gv2;
1778
1779                 if (tmplen2 + 2 <= sizeof smallbuf)
1780                     tmpbuf2 = smallbuf;
1781                 else
1782                     Newx(tmpbuf2, tmplen2 + 2, char);
1783
1784                 tmpbuf2[0] = '_';
1785                 tmpbuf2[1] = '<';
1786
1787                 memcpy(tmpbuf2 + 2, s, tmplen2);
1788                 tmplen2 += 2;
1789
1790                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1791                 if (!isGV(gv2)) {
1792                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1793                     /* adjust ${"::_<newfilename"} to store the new file name */
1794                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1795                     /* The line number may differ. If that is the case,
1796                        alias the saved lines that are in the array.
1797                        Otherwise alias the whole array. */
1798                     if (CopLINE(PL_curcop) == line_num) {
1799                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1800                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1801                     }
1802                     else if (GvAV(cfgv)) {
1803                         AV * const av = GvAV(cfgv);
1804                         const I32 start = CopLINE(PL_curcop)+1;
1805                         I32 items = AvFILLp(av) - start;
1806                         if (items > 0) {
1807                             AV * const av2 = GvAVn(gv2);
1808                             SV **svp = AvARRAY(av) + start;
1809                             I32 l = (I32)line_num+1;
1810                             while (items--)
1811                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1812                         }
1813                     }
1814                 }
1815
1816                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1817             }
1818         }
1819         CopFILE_free(PL_curcop);
1820         CopFILE_setn(PL_curcop, s, len);
1821     }
1822     CopLINE_set(PL_curcop, line_num);
1823 }
1824
1825 #define skipspace(s) skipspace_flags(s, 0)
1826
1827 #ifdef PERL_MAD
1828 /* skip space before PL_thistoken */
1829
1830 STATIC char *
1831 S_skipspace0(pTHX_ char *s)
1832 {
1833     PERL_ARGS_ASSERT_SKIPSPACE0;
1834
1835     s = skipspace(s);
1836     if (!PL_madskills)
1837         return s;
1838     if (PL_skipwhite) {
1839         if (!PL_thiswhite)
1840             PL_thiswhite = newSVpvs("");
1841         sv_catsv(PL_thiswhite, PL_skipwhite);
1842         sv_free(PL_skipwhite);
1843         PL_skipwhite = 0;
1844     }
1845     PL_realtokenstart = s - SvPVX(PL_linestr);
1846     return s;
1847 }
1848
1849 /* skip space after PL_thistoken */
1850
1851 STATIC char *
1852 S_skipspace1(pTHX_ char *s)
1853 {
1854     const char *start = s;
1855     I32 startoff = start - SvPVX(PL_linestr);
1856
1857     PERL_ARGS_ASSERT_SKIPSPACE1;
1858
1859     s = skipspace(s);
1860     if (!PL_madskills)
1861         return s;
1862     start = SvPVX(PL_linestr) + startoff;
1863     if (!PL_thistoken && PL_realtokenstart >= 0) {
1864         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1865         PL_thistoken = newSVpvn(tstart, start - tstart);
1866     }
1867     PL_realtokenstart = -1;
1868     if (PL_skipwhite) {
1869         if (!PL_nextwhite)
1870             PL_nextwhite = newSVpvs("");
1871         sv_catsv(PL_nextwhite, PL_skipwhite);
1872         sv_free(PL_skipwhite);
1873         PL_skipwhite = 0;
1874     }
1875     return s;
1876 }
1877
1878 STATIC char *
1879 S_skipspace2(pTHX_ char *s, SV **svp)
1880 {
1881     char *start;
1882     const I32 startoff = s - SvPVX(PL_linestr);
1883
1884     PERL_ARGS_ASSERT_SKIPSPACE2;
1885
1886     s = skipspace(s);
1887     if (!PL_madskills || !svp)
1888         return s;
1889     start = SvPVX(PL_linestr) + startoff;
1890     if (!PL_thistoken && PL_realtokenstart >= 0) {
1891         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1892         PL_thistoken = newSVpvn(tstart, start - tstart);
1893         PL_realtokenstart = -1;
1894     }
1895     if (PL_skipwhite) {
1896         if (!*svp)
1897             *svp = newSVpvs("");
1898         sv_setsv(*svp, PL_skipwhite);
1899         sv_free(PL_skipwhite);
1900         PL_skipwhite = 0;
1901     }
1902
1903     return s;
1904 }
1905 #endif
1906
1907 STATIC void
1908 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1909 {
1910     AV *av = CopFILEAVx(PL_curcop);
1911     if (av) {
1912         SV * sv;
1913         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1914         else {
1915             sv = *av_fetch(av, 0, 1);
1916             SvUPGRADE(sv, SVt_PVMG);
1917         }
1918         if (!SvPOK(sv)) sv_setpvs(sv,"");
1919         if (orig_sv)
1920             sv_catsv(sv, orig_sv);
1921         else
1922             sv_catpvn(sv, buf, len);
1923         if (!SvIOK(sv)) {
1924             (void)SvIOK_on(sv);
1925             SvIV_set(sv, 0);
1926         }
1927         if (PL_parser->preambling == NOLINE)
1928             av_store(av, CopLINE(PL_curcop), sv);
1929     }
1930 }
1931
1932 /*
1933  * S_skipspace
1934  * Called to gobble the appropriate amount and type of whitespace.
1935  * Skips comments as well.
1936  */
1937
1938 STATIC char *
1939 S_skipspace_flags(pTHX_ char *s, U32 flags)
1940 {
1941 #ifdef PERL_MAD
1942     char *start = s;
1943 #endif /* PERL_MAD */
1944     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1945 #ifdef PERL_MAD
1946     if (PL_skipwhite) {
1947         sv_free(PL_skipwhite);
1948         PL_skipwhite = NULL;
1949     }
1950 #endif /* PERL_MAD */
1951     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1952         while (s < PL_bufend && SPACE_OR_TAB(*s))
1953             s++;
1954     } else {
1955         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1956         PL_bufptr = s;
1957         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1958                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1959                     LEX_NO_NEXT_CHUNK : 0));
1960         s = PL_bufptr;
1961         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1962         if (PL_linestart > PL_bufptr)
1963             PL_bufptr = PL_linestart;
1964         return s;
1965     }
1966 #ifdef PERL_MAD
1967     if (PL_madskills)
1968         PL_skipwhite = newSVpvn(start, s-start);
1969 #endif /* PERL_MAD */
1970     return s;
1971 }
1972
1973 /*
1974  * S_check_uni
1975  * Check the unary operators to ensure there's no ambiguity in how they're
1976  * used.  An ambiguous piece of code would be:
1977  *     rand + 5
1978  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1979  * the +5 is its argument.
1980  */
1981
1982 STATIC void
1983 S_check_uni(pTHX)
1984 {
1985     dVAR;
1986     const char *s;
1987     const char *t;
1988
1989     if (PL_oldoldbufptr != PL_last_uni)
1990         return;
1991     while (isSPACE(*PL_last_uni))
1992         PL_last_uni++;
1993     s = PL_last_uni;
1994     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1995         s++;
1996     if ((t = strchr(s, '(')) && t < PL_bufptr)
1997         return;
1998
1999     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2000                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2001                      (int)(s - PL_last_uni), PL_last_uni);
2002 }
2003
2004 /*
2005  * LOP : macro to build a list operator.  Its behaviour has been replaced
2006  * with a subroutine, S_lop() for which LOP is just another name.
2007  */
2008
2009 #define LOP(f,x) return lop(f,x,s)
2010
2011 /*
2012  * S_lop
2013  * Build a list operator (or something that might be one).  The rules:
2014  *  - if we have a next token, then it's a list operator [why?]
2015  *  - if the next thing is an opening paren, then it's a function
2016  *  - else it's a list operator
2017  */
2018
2019 STATIC I32
2020 S_lop(pTHX_ I32 f, int x, char *s)
2021 {
2022     dVAR;
2023
2024     PERL_ARGS_ASSERT_LOP;
2025
2026     pl_yylval.ival = f;
2027     CLINE;
2028     PL_expect = x;
2029     PL_bufptr = s;
2030     PL_last_lop = PL_oldbufptr;
2031     PL_last_lop_op = (OPCODE)f;
2032 #ifdef PERL_MAD
2033     if (PL_lasttoke)
2034         goto lstop;
2035 #else
2036     if (PL_nexttoke)
2037         goto lstop;
2038 #endif
2039     if (*s == '(')
2040         return REPORT(FUNC);
2041     s = PEEKSPACE(s);
2042     if (*s == '(')
2043         return REPORT(FUNC);
2044     else {
2045         lstop:
2046         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2047             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2048         return REPORT(LSTOP);
2049     }
2050 }
2051
2052 #ifdef PERL_MAD
2053  /*
2054  * S_start_force
2055  * Sets up for an eventual force_next().  start_force(0) basically does
2056  * an unshift, while start_force(-1) does a push.  yylex removes items
2057  * on the "pop" end.
2058  */
2059
2060 STATIC void
2061 S_start_force(pTHX_ int where)
2062 {
2063     int i;
2064
2065     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2066         where = PL_lasttoke;
2067     assert(PL_curforce < 0 || PL_curforce == where);
2068     if (PL_curforce != where) {
2069         for (i = PL_lasttoke; i > where; --i) {
2070             PL_nexttoke[i] = PL_nexttoke[i-1];
2071         }
2072         PL_lasttoke++;
2073     }
2074     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2075         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2076     PL_curforce = where;
2077     if (PL_nextwhite) {
2078         if (PL_madskills)
2079             curmad('^', newSVpvs(""));
2080         CURMAD('_', PL_nextwhite);
2081     }
2082 }
2083
2084 STATIC void
2085 S_curmad(pTHX_ char slot, SV *sv)
2086 {
2087     MADPROP **where;
2088
2089     if (!sv)
2090         return;
2091     if (PL_curforce < 0)
2092         where = &PL_thismad;
2093     else
2094         where = &PL_nexttoke[PL_curforce].next_mad;
2095
2096     if (PL_faketokens)
2097         sv_setpvs(sv, "");
2098     else {
2099         if (!IN_BYTES) {
2100             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2101                 SvUTF8_on(sv);
2102             else if (PL_encoding) {
2103                 sv_recode_to_utf8(sv, PL_encoding);
2104             }
2105         }
2106     }
2107
2108     /* keep a slot open for the head of the list? */
2109     if (slot != '_' && *where && (*where)->mad_key == '^') {
2110         (*where)->mad_key = slot;
2111         sv_free(MUTABLE_SV(((*where)->mad_val)));
2112         (*where)->mad_val = (void*)sv;
2113     }
2114     else
2115         addmad(newMADsv(slot, sv), where, 0);
2116 }
2117 #else
2118 #  define start_force(where)    NOOP
2119 #  define curmad(slot, sv)      NOOP
2120 #endif
2121
2122 /*
2123  * S_force_next
2124  * When the lexer realizes it knows the next token (for instance,
2125  * it is reordering tokens for the parser) then it can call S_force_next
2126  * to know what token to return the next time the lexer is called.  Caller
2127  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2128  * and possibly PL_expect to ensure the lexer handles the token correctly.
2129  */
2130
2131 STATIC void
2132 S_force_next(pTHX_ I32 type)
2133 {
2134     dVAR;
2135 #ifdef DEBUGGING
2136     if (DEBUG_T_TEST) {
2137         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2138         tokereport(type, &NEXTVAL_NEXTTOKE);
2139     }
2140 #endif
2141 #ifdef PERL_MAD
2142     if (PL_curforce < 0)
2143         start_force(PL_lasttoke);
2144     PL_nexttoke[PL_curforce].next_type = type;
2145     if (PL_lex_state != LEX_KNOWNEXT)
2146         PL_lex_defer = PL_lex_state;
2147     PL_lex_state = LEX_KNOWNEXT;
2148     PL_lex_expect = PL_expect;
2149     PL_curforce = -1;
2150 #else
2151     PL_nexttype[PL_nexttoke] = type;
2152     PL_nexttoke++;
2153     if (PL_lex_state != LEX_KNOWNEXT) {
2154         PL_lex_defer = PL_lex_state;
2155         PL_lex_expect = PL_expect;
2156         PL_lex_state = LEX_KNOWNEXT;
2157     }
2158 #endif
2159 }
2160
2161 /*
2162  * S_postderef
2163  *
2164  * This subroutine handles postfix deref syntax after the arrow has already
2165  * been emitted.  @* $* etc. are emitted as two separate token right here.
2166  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2167  * only the first, leaving yylex to find the next.
2168  */
2169
2170 static int
2171 S_postderef(pTHX_ char const funny, char const next)
2172 {
2173     dVAR;
2174     assert(strchr("$@%&*", funny));
2175     assert(strchr("*[{", next));
2176     if (next == '*') {
2177         PL_expect = XOPERATOR;
2178         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2179             assert('@' == funny || '$' == funny);
2180             PL_lex_state = LEX_INTERPEND;
2181             start_force(PL_curforce);
2182             force_next(POSTJOIN);
2183         }
2184         start_force(PL_curforce);
2185         force_next(next);
2186         PL_bufptr+=2;
2187     }
2188     else {
2189         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2190          && !PL_lex_brackets)
2191             PL_lex_dojoin = 2;
2192         PL_expect = XOPERATOR;
2193         PL_bufptr++;
2194     }
2195     return funny;
2196 }
2197
2198 void
2199 Perl_yyunlex(pTHX)
2200 {
2201     int yyc = PL_parser->yychar;
2202     if (yyc != YYEMPTY) {
2203         if (yyc) {
2204             start_force(-1);
2205             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2206             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2207                 PL_lex_allbrackets--;
2208                 PL_lex_brackets--;
2209                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2210             } else if (yyc == '('/*)*/) {
2211                 PL_lex_allbrackets--;
2212                 yyc |= (2<<24);
2213             }
2214             force_next(yyc);
2215         }
2216         PL_parser->yychar = YYEMPTY;
2217     }
2218 }
2219
2220 STATIC SV *
2221 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2222 {
2223     dVAR;
2224     SV * const sv = newSVpvn_utf8(start, len,
2225                                   !IN_BYTES
2226                                   && UTF
2227                                   && !is_ascii_string((const U8*)start, len)
2228                                   && is_utf8_string((const U8*)start, len));
2229     return sv;
2230 }
2231
2232 /*
2233  * S_force_word
2234  * When the lexer knows the next thing is a word (for instance, it has
2235  * just seen -> and it knows that the next char is a word char, then
2236  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2237  * lookahead.
2238  *
2239  * Arguments:
2240  *   char *start : buffer position (must be within PL_linestr)
2241  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2242  *   int check_keyword : if true, Perl checks to make sure the word isn't
2243  *       a keyword (do this if the word is a label, e.g. goto FOO)
2244  *   int allow_pack : if true, : characters will also be allowed (require,
2245  *       use, etc. do this)
2246  *   int allow_initial_tick : used by the "sub" lexer only.
2247  */
2248
2249 STATIC char *
2250 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2251 {
2252     dVAR;
2253     char *s;
2254     STRLEN len;
2255
2256     PERL_ARGS_ASSERT_FORCE_WORD;
2257
2258     start = SKIPSPACE1(start);
2259     s = start;
2260     if (isIDFIRST_lazy_if(s,UTF) ||
2261         (allow_pack && *s == ':') )
2262     {
2263         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2264         if (check_keyword) {
2265           char *s2 = PL_tokenbuf;
2266           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2267             s2 += 6, len -= 6;
2268           if (keyword(s2, len, 0))
2269             return start;
2270         }
2271         start_force(PL_curforce);
2272         if (PL_madskills)
2273             curmad('X', newSVpvn(start,s-start));
2274         if (token == METHOD) {
2275             s = SKIPSPACE1(s);
2276             if (*s == '(')
2277                 PL_expect = XTERM;
2278             else {
2279                 PL_expect = XOPERATOR;
2280             }
2281         }
2282         if (PL_madskills)
2283             curmad('g', newSVpvs( "forced" ));
2284         NEXTVAL_NEXTTOKE.opval
2285             = (OP*)newSVOP(OP_CONST,0,
2286                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2287         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2288         force_next(token);
2289     }
2290     return s;
2291 }
2292
2293 /*
2294  * S_force_ident
2295  * Called when the lexer wants $foo *foo &foo etc, but the program
2296  * text only contains the "foo" portion.  The first argument is a pointer
2297  * to the "foo", and the second argument is the type symbol to prefix.
2298  * Forces the next token to be a "WORD".
2299  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2300  */
2301
2302 STATIC void
2303 S_force_ident(pTHX_ const char *s, int kind)
2304 {
2305     dVAR;
2306
2307     PERL_ARGS_ASSERT_FORCE_IDENT;
2308
2309     if (s[0]) {
2310         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2311         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2312                                                                 UTF ? SVf_UTF8 : 0));
2313         start_force(PL_curforce);
2314         NEXTVAL_NEXTTOKE.opval = o;
2315         force_next(WORD);
2316         if (kind) {
2317             o->op_private = OPpCONST_ENTERED;
2318             /* XXX see note in pp_entereval() for why we forgo typo
2319                warnings if the symbol must be introduced in an eval.
2320                GSAR 96-10-12 */
2321             gv_fetchpvn_flags(s, len,
2322                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2323                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2324                               kind == '$' ? SVt_PV :
2325                               kind == '@' ? SVt_PVAV :
2326                               kind == '%' ? SVt_PVHV :
2327                               SVt_PVGV
2328                               );
2329         }
2330     }
2331 }
2332
2333 static void
2334 S_force_ident_maybe_lex(pTHX_ char pit)
2335 {
2336     start_force(PL_curforce);
2337     NEXTVAL_NEXTTOKE.ival = pit;
2338     force_next('p');
2339 }
2340
2341 NV
2342 Perl_str_to_version(pTHX_ SV *sv)
2343 {
2344     NV retval = 0.0;
2345     NV nshift = 1.0;
2346     STRLEN len;
2347     const char *start = SvPV_const(sv,len);
2348     const char * const end = start + len;
2349     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2350
2351     PERL_ARGS_ASSERT_STR_TO_VERSION;
2352
2353     while (start < end) {
2354         STRLEN skip;
2355         UV n;
2356         if (utf)
2357             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2358         else {
2359             n = *(U8*)start;
2360             skip = 1;
2361         }
2362         retval += ((NV)n)/nshift;
2363         start += skip;
2364         nshift *= 1000;
2365     }
2366     return retval;
2367 }
2368
2369 /*
2370  * S_force_version
2371  * Forces the next token to be a version number.
2372  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2373  * and if "guessing" is TRUE, then no new token is created (and the caller
2374  * must use an alternative parsing method).
2375  */
2376
2377 STATIC char *
2378 S_force_version(pTHX_ char *s, int guessing)
2379 {
2380     dVAR;
2381     OP *version = NULL;
2382     char *d;
2383 #ifdef PERL_MAD
2384     I32 startoff = s - SvPVX(PL_linestr);
2385 #endif
2386
2387     PERL_ARGS_ASSERT_FORCE_VERSION;
2388
2389     s = SKIPSPACE1(s);
2390
2391     d = s;
2392     if (*d == 'v')
2393         d++;
2394     if (isDIGIT(*d)) {
2395         while (isDIGIT(*d) || *d == '_' || *d == '.')
2396             d++;
2397 #ifdef PERL_MAD
2398         if (PL_madskills) {
2399             start_force(PL_curforce);
2400             curmad('X', newSVpvn(s,d-s));
2401         }
2402 #endif
2403         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2404             SV *ver;
2405 #ifdef USE_LOCALE_NUMERIC
2406             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2407             setlocale(LC_NUMERIC, "C");
2408 #endif
2409             s = scan_num(s, &pl_yylval);
2410 #ifdef USE_LOCALE_NUMERIC
2411             setlocale(LC_NUMERIC, loc);
2412             Safefree(loc);
2413 #endif
2414             version = pl_yylval.opval;
2415             ver = cSVOPx(version)->op_sv;
2416             if (SvPOK(ver) && !SvNIOK(ver)) {
2417                 SvUPGRADE(ver, SVt_PVNV);
2418                 SvNV_set(ver, str_to_version(ver));
2419                 SvNOK_on(ver);          /* hint that it is a version */
2420             }
2421         }
2422         else if (guessing) {
2423 #ifdef PERL_MAD
2424             if (PL_madskills) {
2425                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2426                 PL_nextwhite = 0;
2427                 s = SvPVX(PL_linestr) + startoff;
2428             }
2429 #endif
2430             return s;
2431         }
2432     }
2433
2434 #ifdef PERL_MAD
2435     if (PL_madskills && !version) {
2436         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2437         PL_nextwhite = 0;
2438         s = SvPVX(PL_linestr) + startoff;
2439     }
2440 #endif
2441     /* NOTE: The parser sees the package name and the VERSION swapped */
2442     start_force(PL_curforce);
2443     NEXTVAL_NEXTTOKE.opval = version;
2444     force_next(WORD);
2445
2446     return s;
2447 }
2448
2449 /*
2450  * S_force_strict_version
2451  * Forces the next token to be a version number using strict syntax rules.
2452  */
2453
2454 STATIC char *
2455 S_force_strict_version(pTHX_ char *s)
2456 {
2457     dVAR;
2458     OP *version = NULL;
2459 #ifdef PERL_MAD
2460     I32 startoff = s - SvPVX(PL_linestr);
2461 #endif
2462     const char *errstr = NULL;
2463
2464     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2465
2466     while (isSPACE(*s)) /* leading whitespace */
2467         s++;
2468
2469     if (is_STRICT_VERSION(s,&errstr)) {
2470         SV *ver = newSV(0);
2471         s = (char *)scan_version(s, ver, 0);
2472         version = newSVOP(OP_CONST, 0, ver);
2473     }
2474     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2475             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2476     {
2477         PL_bufptr = s;
2478         if (errstr)
2479             yyerror(errstr); /* version required */
2480         return s;
2481     }
2482
2483 #ifdef PERL_MAD
2484     if (PL_madskills && !version) {
2485         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2486         PL_nextwhite = 0;
2487         s = SvPVX(PL_linestr) + startoff;
2488     }
2489 #endif
2490     /* NOTE: The parser sees the package name and the VERSION swapped */
2491     start_force(PL_curforce);
2492     NEXTVAL_NEXTTOKE.opval = version;
2493     force_next(WORD);
2494
2495     return s;
2496 }
2497
2498 /*
2499  * S_tokeq
2500  * Tokenize a quoted string passed in as an SV.  It finds the next
2501  * chunk, up to end of string or a backslash.  It may make a new
2502  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2503  * turns \\ into \.
2504  */
2505
2506 STATIC SV *
2507 S_tokeq(pTHX_ SV *sv)
2508 {
2509     dVAR;
2510     char *s;
2511     char *send;
2512     char *d;
2513     STRLEN len = 0;
2514     SV *pv = sv;
2515
2516     PERL_ARGS_ASSERT_TOKEQ;
2517
2518     if (!SvLEN(sv))
2519         goto finish;
2520
2521     s = SvPV_force(sv, len);
2522     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2523         goto finish;
2524     send = s + len;
2525     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2526     while (s < send && !(*s == '\\' && s[1] == '\\'))
2527         s++;
2528     if (s == send)
2529         goto finish;
2530     d = s;
2531     if ( PL_hints & HINT_NEW_STRING ) {
2532         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2533     }
2534     while (s < send) {
2535         if (*s == '\\') {
2536             if (s + 1 < send && (s[1] == '\\'))
2537                 s++;            /* all that, just for this */
2538         }
2539         *d++ = *s++;
2540     }
2541     *d = '\0';
2542     SvCUR_set(sv, d - SvPVX_const(sv));
2543   finish:
2544     if ( PL_hints & HINT_NEW_STRING )
2545        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2546     return sv;
2547 }
2548
2549 /*
2550  * Now come three functions related to double-quote context,
2551  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2552  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2553  * interact with PL_lex_state, and create fake ( ... ) argument lists
2554  * to handle functions and concatenation.
2555  * For example,
2556  *   "foo\lbar"
2557  * is tokenised as
2558  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2559  */
2560
2561 /*
2562  * S_sublex_start
2563  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2564  *
2565  * Pattern matching will set PL_lex_op to the pattern-matching op to
2566  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2567  *
2568  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2569  *
2570  * Everything else becomes a FUNC.
2571  *
2572  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2573  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2574  * call to S_sublex_push().
2575  */
2576
2577 STATIC I32
2578 S_sublex_start(pTHX)
2579 {
2580     dVAR;
2581     const I32 op_type = pl_yylval.ival;
2582
2583     if (op_type == OP_NULL) {
2584         pl_yylval.opval = PL_lex_op;
2585         PL_lex_op = NULL;
2586         return THING;
2587     }
2588     if (op_type == OP_CONST || op_type == OP_READLINE) {
2589         SV *sv = tokeq(PL_lex_stuff);
2590
2591         if (SvTYPE(sv) == SVt_PVIV) {
2592             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2593             STRLEN len;
2594             const char * const p = SvPV_const(sv, len);
2595             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2596             SvREFCNT_dec(sv);
2597             sv = nsv;
2598         }
2599         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2600         PL_lex_stuff = NULL;
2601         /* Allow <FH> // "foo" */
2602         if (op_type == OP_READLINE)
2603             PL_expect = XTERMORDORDOR;
2604         return THING;
2605     }
2606     else if (op_type == OP_BACKTICK && PL_lex_op) {
2607         /* readpipe() was overridden */
2608         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2609         pl_yylval.opval = PL_lex_op;
2610         PL_lex_op = NULL;
2611         PL_lex_stuff = NULL;
2612         return THING;
2613     }
2614
2615     PL_sublex_info.super_state = PL_lex_state;
2616     PL_sublex_info.sub_inwhat = (U16)op_type;
2617     PL_sublex_info.sub_op = PL_lex_op;
2618     PL_lex_state = LEX_INTERPPUSH;
2619
2620     PL_expect = XTERM;
2621     if (PL_lex_op) {
2622         pl_yylval.opval = PL_lex_op;
2623         PL_lex_op = NULL;
2624         return PMFUNC;
2625     }
2626     else
2627         return FUNC;
2628 }
2629
2630 /*
2631  * S_sublex_push
2632  * Create a new scope to save the lexing state.  The scope will be
2633  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2634  * to the uc, lc, etc. found before.
2635  * Sets PL_lex_state to LEX_INTERPCONCAT.
2636  */
2637
2638 STATIC I32
2639 S_sublex_push(pTHX)
2640 {
2641     dVAR;
2642     LEXSHARED *shared;
2643     const bool is_heredoc = PL_multi_close == '<';
2644     ENTER;
2645
2646     PL_lex_state = PL_sublex_info.super_state;
2647     SAVEI8(PL_lex_dojoin);
2648     SAVEI32(PL_lex_brackets);
2649     SAVEI32(PL_lex_allbrackets);
2650     SAVEI32(PL_lex_formbrack);
2651     SAVEI8(PL_lex_fakeeof);
2652     SAVEI32(PL_lex_casemods);
2653     SAVEI32(PL_lex_starts);
2654     SAVEI8(PL_lex_state);
2655     SAVESPTR(PL_lex_repl);
2656     SAVEVPTR(PL_lex_inpat);
2657     SAVEI16(PL_lex_inwhat);
2658     if (is_heredoc)
2659     {
2660         SAVECOPLINE(PL_curcop);
2661         SAVEI32(PL_multi_end);
2662         SAVEI32(PL_parser->herelines);
2663         PL_parser->herelines = 0;
2664     }
2665     SAVEI8(PL_multi_close);
2666     SAVEPPTR(PL_bufptr);
2667     SAVEPPTR(PL_bufend);
2668     SAVEPPTR(PL_oldbufptr);
2669     SAVEPPTR(PL_oldoldbufptr);
2670     SAVEPPTR(PL_last_lop);
2671     SAVEPPTR(PL_last_uni);
2672     SAVEPPTR(PL_linestart);
2673     SAVESPTR(PL_linestr);
2674     SAVEGENERICPV(PL_lex_brackstack);
2675     SAVEGENERICPV(PL_lex_casestack);
2676     SAVEGENERICPV(PL_parser->lex_shared);
2677     SAVEBOOL(PL_parser->lex_re_reparsing);
2678     SAVEI32(PL_copline);
2679
2680     /* The here-doc parser needs to be able to peek into outer lexing
2681        scopes to find the body of the here-doc.  So we put PL_linestr and
2682        PL_bufptr into lex_shared, to ‘share’ those values.
2683      */
2684     PL_parser->lex_shared->ls_linestr = PL_linestr;
2685     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2686
2687     PL_linestr = PL_lex_stuff;
2688     PL_lex_repl = PL_sublex_info.repl;
2689     PL_lex_stuff = NULL;
2690     PL_sublex_info.repl = NULL;
2691
2692     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2693         = SvPVX(PL_linestr);
2694     PL_bufend += SvCUR(PL_linestr);
2695     PL_last_lop = PL_last_uni = NULL;
2696     SAVEFREESV(PL_linestr);
2697     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2698
2699     PL_lex_dojoin = FALSE;
2700     PL_lex_brackets = PL_lex_formbrack = 0;
2701     PL_lex_allbrackets = 0;
2702     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2703     Newx(PL_lex_brackstack, 120, char);
2704     Newx(PL_lex_casestack, 12, char);
2705     PL_lex_casemods = 0;
2706     *PL_lex_casestack = '\0';
2707     PL_lex_starts = 0;
2708     PL_lex_state = LEX_INTERPCONCAT;
2709     if (is_heredoc)
2710         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2711     PL_copline = NOLINE;
2712
2713     Newxz(shared, 1, LEXSHARED);
2714     shared->ls_prev = PL_parser->lex_shared;
2715     PL_parser->lex_shared = shared;
2716
2717     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2718     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2719     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2720         PL_lex_inpat = PL_sublex_info.sub_op;
2721     else
2722         PL_lex_inpat = NULL;
2723
2724     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2725     PL_in_eval &= ~EVAL_RE_REPARSING;
2726
2727     return '(';
2728 }
2729
2730 /*
2731  * S_sublex_done
2732  * Restores lexer state after a S_sublex_push.
2733  */
2734
2735 STATIC I32
2736 S_sublex_done(pTHX)
2737 {
2738     dVAR;
2739     if (!PL_lex_starts++) {
2740         SV * const sv = newSVpvs("");
2741         if (SvUTF8(PL_linestr))
2742             SvUTF8_on(sv);
2743         PL_expect = XOPERATOR;
2744         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2745         return THING;
2746     }
2747
2748     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2749         PL_lex_state = LEX_INTERPCASEMOD;
2750         return yylex();
2751     }
2752
2753     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2754     assert(PL_lex_inwhat != OP_TRANSR);
2755     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2756         PL_linestr = PL_lex_repl;
2757         PL_lex_inpat = 0;
2758         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2759         PL_bufend += SvCUR(PL_linestr);
2760         PL_last_lop = PL_last_uni = NULL;
2761         PL_lex_dojoin = FALSE;
2762         PL_lex_brackets = 0;
2763         PL_lex_allbrackets = 0;
2764         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2765         PL_lex_casemods = 0;
2766         *PL_lex_casestack = '\0';
2767         PL_lex_starts = 0;
2768         if (SvEVALED(PL_lex_repl)) {
2769             PL_lex_state = LEX_INTERPNORMAL;
2770             PL_lex_starts++;
2771             /*  we don't clear PL_lex_repl here, so that we can check later
2772                 whether this is an evalled subst; that means we rely on the
2773                 logic to ensure sublex_done() is called again only via the
2774                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2775         }
2776         else {
2777             PL_lex_state = LEX_INTERPCONCAT;
2778             PL_lex_repl = NULL;
2779         }
2780         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2781             CopLINE(PL_curcop) +=
2782                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2783                  + PL_parser->herelines;
2784             PL_parser->herelines = 0;
2785         }
2786         return ',';
2787     }
2788     else {
2789         const line_t l = CopLINE(PL_curcop);
2790 #ifdef PERL_MAD
2791         if (PL_madskills) {
2792             if (PL_thiswhite) {
2793                 if (!PL_endwhite)
2794                     PL_endwhite = newSVpvs("");
2795                 sv_catsv(PL_endwhite, PL_thiswhite);
2796                 PL_thiswhite = 0;
2797             }
2798             if (PL_thistoken)
2799                 sv_setpvs(PL_thistoken,"");
2800             else
2801                 PL_realtokenstart = -1;
2802         }
2803 #endif
2804         LEAVE;
2805         if (PL_multi_close == '<')
2806             PL_parser->herelines += l - PL_multi_end;
2807         PL_bufend = SvPVX(PL_linestr);
2808         PL_bufend += SvCUR(PL_linestr);
2809         PL_expect = XOPERATOR;
2810         PL_sublex_info.sub_inwhat = 0;
2811         return ')';
2812     }
2813 }
2814
2815 PERL_STATIC_INLINE SV*
2816 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2817 {
2818     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2819      * interior, hence to the "}".  Finds what the name resolves to, returning
2820      * an SV* containing it; NULL if no valid one found */
2821
2822     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2823
2824     HV * table;
2825     SV **cvp;
2826     SV *cv;
2827     SV *rv;
2828     HV *stash;
2829     const U8* first_bad_char_loc;
2830     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2831
2832     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2833
2834     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2835                                      e - backslash_ptr,
2836                                      &first_bad_char_loc))
2837     {
2838         /* If warnings are on, this will print a more detailed analysis of what
2839          * is wrong than the error message below */
2840         utf8n_to_uvchr(first_bad_char_loc,
2841                        e - ((char *) first_bad_char_loc),
2842                        NULL, 0);
2843
2844         /* We deliberately don't try to print the malformed character, which
2845          * might not print very well; it also may be just the first of many
2846          * malformations, so don't print what comes after it */
2847         yyerror(Perl_form(aTHX_
2848             "Malformed UTF-8 character immediately after '%.*s'",
2849             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2850         return NULL;
2851     }
2852
2853     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2854                         /* include the <}> */
2855                         e - backslash_ptr + 1);
2856     if (! SvPOK(res)) {
2857         SvREFCNT_dec_NN(res);
2858         return NULL;
2859     }
2860
2861     /* See if the charnames handler is the Perl core's, and if so, we can skip
2862      * the validation needed for a user-supplied one, as Perl's does its own
2863      * validation. */
2864     table = GvHV(PL_hintgv);             /* ^H */
2865     cvp = hv_fetchs(table, "charnames", FALSE);
2866     if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2867         && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2868     {
2869         const char * const name = HvNAME(stash);
2870         if strEQ(name, "_charnames") {
2871            return res;
2872        }
2873     }
2874
2875     /* Here, it isn't Perl's charname handler.  We can't rely on a
2876      * user-supplied handler to validate the input name.  For non-ut8 input,
2877      * look to see that the first character is legal.  Then loop through the
2878      * rest checking that each is a continuation */
2879
2880     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2881      * the same thing */
2882
2883     if (! UTF) {
2884         if (! isALPHAU(*s)) {
2885             goto bad_charname;
2886         }
2887         s++;
2888         while (s < e) {
2889             if (! isCHARNAME_CONT(*s)) {
2890                 goto bad_charname;
2891             }
2892             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2893                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2894                            "A sequence of multiple spaces in a charnames "
2895                            "alias definition is deprecated");
2896             }
2897             s++;
2898         }
2899         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2900             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2901                         "Trailing white-space in a charnames alias "
2902                         "definition is deprecated");
2903         }
2904     }
2905     else {
2906         /* Similarly for utf8.  For invariants can check directly; for other
2907          * Latin1, can calculate their code point and check; otherwise  use a
2908          * swash */
2909         if (UTF8_IS_INVARIANT(*s)) {
2910             if (! isALPHAU(*s)) {
2911                 goto bad_charname;
2912             }
2913             s++;
2914         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2915             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2916                 goto bad_charname;
2917             }
2918             s += 2;
2919         }
2920         else {
2921             if (! PL_utf8_charname_begin) {
2922                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2923                 PL_utf8_charname_begin = _core_swash_init("utf8",
2924                                                         "_Perl_Charname_Begin",
2925                                                         &PL_sv_undef,
2926                                                         1, 0, NULL, &flags);
2927             }
2928             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2929                 goto bad_charname;
2930             }
2931             s += UTF8SKIP(s);
2932         }
2933
2934         while (s < e) {
2935             if (UTF8_IS_INVARIANT(*s)) {
2936                 if (! isCHARNAME_CONT(*s)) {
2937                     goto bad_charname;
2938                 }
2939                 if (*s == ' ' && *(s-1) == ' '
2940                  && ckWARN_d(WARN_DEPRECATED)) {
2941                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2942                                "A sequence of multiple spaces in a charnam"
2943                                "es alias definition is deprecated");
2944                 }
2945                 s++;
2946             }
2947             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2948                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2949                 {
2950                     goto bad_charname;
2951                 }
2952                 s += 2;
2953             }
2954             else {
2955                 if (! PL_utf8_charname_continue) {
2956                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2957                     PL_utf8_charname_continue = _core_swash_init("utf8",
2958                                                 "_Perl_Charname_Continue",
2959                                                 &PL_sv_undef,
2960                                                 1, 0, NULL, &flags);
2961                 }
2962                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2963                     goto bad_charname;
2964                 }
2965                 s += UTF8SKIP(s);
2966             }
2967         }
2968         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2969             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2970                        "Trailing white-space in a charnames alias "
2971                        "definition is deprecated");
2972         }
2973     }
2974
2975     if (SvUTF8(res)) { /* Don't accept malformed input */
2976         const U8* first_bad_char_loc;
2977         STRLEN len;
2978         const char* const str = SvPV_const(res, len);
2979         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2980             /* If warnings are on, this will print a more detailed analysis of
2981              * what is wrong than the error message below */
2982             utf8n_to_uvchr(first_bad_char_loc,
2983                            (char *) first_bad_char_loc - str,
2984                            NULL, 0);
2985
2986             /* We deliberately don't try to print the malformed character,
2987              * which might not print very well; it also may be just the first
2988              * of many malformations, so don't print what comes after it */
2989             yyerror_pv(
2990               Perl_form(aTHX_
2991                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2992                  (int) (e - backslash_ptr + 1), backslash_ptr,
2993                  (int) ((char *) first_bad_char_loc - str), str
2994               ),
2995               SVf_UTF8);
2996             return NULL;
2997         }
2998     }
2999
3000     return res;
3001
3002   bad_charname: {
3003         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
3004
3005         /* The final %.*s makes sure that should the trailing NUL be missing
3006          * that this print won't run off the end of the string */
3007         yyerror_pv(
3008           Perl_form(aTHX_
3009             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3010             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3011             (int)(e - s + bad_char_size), s + bad_char_size
3012           ),
3013           UTF ? SVf_UTF8 : 0);
3014         return NULL;
3015     }
3016 }
3017
3018 /*
3019   scan_const
3020
3021   Extracts the next constant part of a pattern, double-quoted string,
3022   or transliteration.  This is terrifying code.
3023
3024   For example, in parsing the double-quoted string "ab\x63$d", it would
3025   stop at the '$' and return an OP_CONST containing 'abc'.
3026
3027   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3028   processing a pattern (PL_lex_inpat is true), a transliteration
3029   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3030
3031   Returns a pointer to the character scanned up to. If this is
3032   advanced from the start pointer supplied (i.e. if anything was
3033   successfully parsed), will leave an OP_CONST for the substring scanned
3034   in pl_yylval. Caller must intuit reason for not parsing further
3035   by looking at the next characters herself.
3036
3037   In patterns:
3038     expand:
3039       \N{FOO}  => \N{U+hex_for_character_FOO}
3040       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3041
3042     pass through:
3043         all other \-char, including \N and \N{ apart from \N{ABC}
3044
3045     stops on:
3046         @ and $ where it appears to be a var, but not for $ as tail anchor
3047         \l \L \u \U \Q \E
3048         (?{  or  (??{
3049
3050
3051   In transliterations:
3052     characters are VERY literal, except for - not at the start or end
3053     of the string, which indicates a range. If the range is in bytes,
3054     scan_const expands the range to the full set of intermediate
3055     characters. If the range is in utf8, the hyphen is replaced with
3056     a certain range mark which will be handled by pmtrans() in op.c.
3057
3058   In double-quoted strings:
3059     backslashes:
3060       double-quoted style: \r and \n
3061       constants: \x31, etc.
3062       deprecated backrefs: \1 (in substitution replacements)
3063       case and quoting: \U \Q \E
3064     stops on @ and $
3065
3066   scan_const does *not* construct ops to handle interpolated strings.
3067   It stops processing as soon as it finds an embedded $ or @ variable
3068   and leaves it to the caller to work out what's going on.
3069
3070   embedded arrays (whether in pattern or not) could be:
3071       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3072
3073   $ in double-quoted strings must be the symbol of an embedded scalar.
3074
3075   $ in pattern could be $foo or could be tail anchor.  Assumption:
3076   it's a tail anchor if $ is the last thing in the string, or if it's
3077   followed by one of "()| \r\n\t"
3078
3079   \1 (backreferences) are turned into $1 in substitutions
3080
3081   The structure of the code is
3082       while (there's a character to process) {
3083           handle transliteration ranges
3084           skip regexp comments /(?#comment)/ and codes /(?{code})/
3085           skip #-initiated comments in //x patterns
3086           check for embedded arrays
3087           check for embedded scalars
3088           if (backslash) {
3089               deprecate \1 in substitution replacements
3090               handle string-changing backslashes \l \U \Q \E, etc.
3091               switch (what was escaped) {
3092                   handle \- in a transliteration (becomes a literal -)
3093                   if a pattern and not \N{, go treat as regular character
3094                   handle \132 (octal characters)
3095                   handle \x15 and \x{1234} (hex characters)
3096                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3097                   handle \cV (control characters)
3098                   handle printf-style backslashes (\f, \r, \n, etc)
3099               } (end switch)
3100               continue
3101           } (end if backslash)
3102           handle regular character
3103     } (end while character to read)
3104
3105 */
3106
3107 STATIC char *
3108 S_scan_const(pTHX_ char *start)
3109 {
3110     dVAR;
3111     char *send = PL_bufend;             /* end of the constant */
3112     SV *sv = newSV(send - start);               /* sv for the constant.  See
3113                                                    note below on sizing. */
3114     char *s = start;                    /* start of the constant */
3115     char *d = SvPVX(sv);                /* destination for copies */
3116     bool dorange = FALSE;                       /* are we in a translit range? */
3117     bool didrange = FALSE;                      /* did we just finish a range? */
3118     bool in_charclass = FALSE;                  /* within /[...]/ */
3119     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3120     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3121                                                    to be UTF8?  But, this can
3122                                                    show as true when the source
3123                                                    isn't utf8, as for example
3124                                                    when it is entirely composed
3125                                                    of hex constants */
3126     SV *res;                            /* result from charnames */
3127
3128     /* Note on sizing:  The scanned constant is placed into sv, which is
3129      * initialized by newSV() assuming one byte of output for every byte of
3130      * input.  This routine expects newSV() to allocate an extra byte for a
3131      * trailing NUL, which this routine will append if it gets to the end of
3132      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3133      * CAPITAL LETTER A}), or more output than input if the constant ends up
3134      * recoded to utf8, but each time a construct is found that might increase
3135      * the needed size, SvGROW() is called.  Its size parameter each time is
3136      * based on the best guess estimate at the time, namely the length used so
3137      * far, plus the length the current construct will occupy, plus room for
3138      * the trailing NUL, plus one byte for every input byte still unscanned */
3139
3140     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3141                        before set */
3142 #ifdef EBCDIC
3143     UV literal_endpoint = 0;
3144     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3145 #endif
3146
3147     PERL_ARGS_ASSERT_SCAN_CONST;
3148
3149     assert(PL_lex_inwhat != OP_TRANSR);
3150     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3151         /* If we are doing a trans and we know we want UTF8 set expectation */
3152         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3153         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3154     }
3155
3156     /* Protect sv from errors and fatal warnings. */
3157     ENTER_with_name("scan_const");
3158     SAVEFREESV(sv);
3159
3160     while (s < send || dorange) {
3161
3162         /* get transliterations out of the way (they're most literal) */
3163         if (PL_lex_inwhat == OP_TRANS) {
3164             /* expand a range A-Z to the full set of characters.  AIE! */
3165             if (dorange) {
3166                 I32 i;                          /* current expanded character */
3167                 I32 min;                        /* first character in range */
3168                 I32 max;                        /* last character in range */
3169
3170 #ifdef EBCDIC
3171                 UV uvmax = 0;
3172 #endif
3173
3174                 if (has_utf8
3175 #ifdef EBCDIC
3176                     && !native_range
3177 #endif
3178                 ) {
3179                     char * const c = (char*)utf8_hop((U8*)d, -1);
3180                     char *e = d++;
3181                     while (e-- > c)
3182                         *(e + 1) = *e;
3183                     *c = (char) ILLEGAL_UTF8_BYTE;
3184                     /* mark the range as done, and continue */
3185                     dorange = FALSE;
3186                     didrange = TRUE;
3187                     continue;
3188                 }
3189
3190                 i = d - SvPVX_const(sv);                /* remember current offset */
3191 #ifdef EBCDIC
3192                 SvGROW(sv,
3193                        SvLEN(sv) + (has_utf8 ?
3194                                     (512 - UTF_CONTINUATION_MARK +
3195                                      UNISKIP(0x100))
3196                                     : 256));
3197                 /* How many two-byte within 0..255: 128 in UTF-8,
3198                  * 96 in UTF-8-mod. */
3199 #else
3200                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3201 #endif
3202                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3203 #ifdef EBCDIC
3204                 if (has_utf8) {
3205                     int j;
3206                     for (j = 0; j <= 1; j++) {
3207                         char * const c = (char*)utf8_hop((U8*)d, -1);
3208                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3209                         if (j)
3210                             min = (U8)uv;
3211                         else if (uv < 256)
3212                             max = (U8)uv;
3213                         else {
3214                             max = (U8)0xff; /* only to \xff */
3215                             uvmax = uv; /* \x{100} to uvmax */
3216                         }
3217                         d = c; /* eat endpoint chars */
3218                      }
3219                 }
3220                else {
3221 #endif
3222                    d -= 2;              /* eat the first char and the - */
3223                    min = (U8)*d;        /* first char in range */
3224                    max = (U8)d[1];      /* last char in range  */
3225 #ifdef EBCDIC
3226                }
3227 #endif
3228
3229                 if (min > max) {
3230                     Perl_croak(aTHX_
3231                                "Invalid range \"%c-%c\" in transliteration operator",
3232                                (char)min, (char)max);
3233                 }
3234
3235 #ifdef EBCDIC
3236                 if (literal_endpoint == 2 &&
3237                     ((isLOWER_A(min) && isLOWER_A(max)) ||
3238                      (isUPPER_A(min) && isUPPER_A(max))))
3239                 {
3240                     for (i = min; i <= max; i++) {
3241                         if (isALPHA_A(i))
3242                             *d++ = i;
3243                     }
3244                 }
3245                 else
3246 #endif
3247                     for (i = min; i <= max; i++)
3248 #ifdef EBCDIC
3249                         if (has_utf8) {
3250                             append_utf8_from_native_byte(i, &d);
3251                         }
3252                         else
3253 #endif
3254                             *d++ = (char)i;
3255
3256 #ifdef EBCDIC
3257                 if (uvmax) {
3258                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3259                     if (uvmax > 0x101)
3260                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3261                     if (uvmax > 0x100)
3262                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3263                 }
3264 #endif
3265
3266                 /* mark the range as done, and continue */
3267                 dorange = FALSE;
3268                 didrange = TRUE;
3269 #ifdef EBCDIC
3270                 literal_endpoint = 0;
3271 #endif
3272                 continue;
3273             }
3274
3275             /* range begins (ignore - as first or last char) */
3276             else if (*s == '-' && s+1 < send  && s != start) {
3277                 if (didrange) {
3278                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3279                 }
3280                 if (has_utf8
3281 #ifdef EBCDIC
3282                     && !native_range
3283 #endif
3284                     ) {
3285                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
3286                     s++;
3287                     continue;
3288                 }
3289                 dorange = TRUE;
3290                 s++;
3291             }
3292             else {
3293                 didrange = FALSE;
3294 #ifdef EBCDIC
3295                 literal_endpoint = 0;
3296                 native_range = TRUE;
3297 #endif
3298             }
3299         }
3300
3301         /* if we get here, we're not doing a transliteration */
3302
3303         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3304             char *s1 = s-1;
3305             int esc = 0;
3306             while (s1 >= start && *s1-- == '\\')
3307                 esc = !esc;
3308             if (!esc)
3309                 in_charclass = TRUE;
3310         }
3311
3312         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3313             char *s1 = s-1;
3314             int esc = 0;
3315             while (s1 >= start && *s1-- == '\\')
3316                 esc = !esc;
3317             if (!esc)
3318                 in_charclass = FALSE;
3319         }
3320
3321         /* skip for regexp comments /(?#comment)/, except for the last
3322          * char, which will be done separately.
3323          * Stop on (?{..}) and friends */
3324
3325         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3326             if (s[2] == '#') {
3327                 while (s+1 < send && *s != ')')
3328                     *d++ = *s++;
3329             }
3330             else if (!PL_lex_casemods &&
3331                      (    s[2] == '{' /* This should match regcomp.c */
3332                       || (s[2] == '?' && s[3] == '{')))
3333             {
3334                 break;
3335             }
3336         }
3337
3338         /* likewise skip #-initiated comments in //x patterns */
3339         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3340           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3341             while (s+1 < send && *s != '\n')
3342                 *d++ = *s++;
3343         }
3344
3345         /* no further processing of single-quoted regex */
3346         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3347             goto default_action;
3348
3349         /* check for embedded arrays
3350            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3351            */
3352         else if (*s == '@' && s[1]) {
3353             if (isWORDCHAR_lazy_if(s+1,UTF))
3354                 break;
3355             if (strchr(":'{$", s[1]))
3356                 break;
3357             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3358                 break; /* in regexp, neither @+ nor @- are interpolated */
3359         }
3360
3361         /* check for embedded scalars.  only stop if we're sure it's a
3362            variable.
3363         */
3364         else if (*s == '$') {
3365             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3366                 break;
3367             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3368                 if (s[1] == '\\') {
3369                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3370                                    "Possible unintended interpolation of $\\ in regex");
3371                 }
3372                 break;          /* in regexp, $ might be tail anchor */
3373             }
3374         }
3375
3376         /* End of else if chain - OP_TRANS rejoin rest */
3377
3378         /* backslashes */
3379         if (*s == '\\' && s+1 < send) {
3380             char* e;    /* Can be used for ending '}', etc. */
3381
3382             s++;
3383
3384             /* warn on \1 - \9 in substitution replacements, but note that \11
3385              * is an octal; and \19 is \1 followed by '9' */
3386             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3387                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3388             {
3389                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3390                 *--s = '$';
3391                 break;
3392             }
3393
3394             /* string-change backslash escapes */
3395             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3396                 --s;
3397                 break;
3398             }
3399             /* In a pattern, process \N, but skip any other backslash escapes.
3400              * This is because we don't want to translate an escape sequence
3401              * into a meta symbol and have the regex compiler use the meta
3402              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3403              * in spite of this, we do have to process \N here while the proper
3404              * charnames handler is in scope.  See bugs #56444 and #62056.
3405              * There is a complication because \N in a pattern may also stand
3406              * for 'match a non-nl', and not mean a charname, in which case its
3407              * processing should be deferred to the regex compiler.  To be a
3408              * charname it must be followed immediately by a '{', and not look
3409              * like \N followed by a curly quantifier, i.e., not something like
3410              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3411              * quantifier */
3412             else if (PL_lex_inpat
3413                     && (*s != 'N'
3414                         || s[1] != '{'
3415                         || regcurly(s + 1, FALSE)))
3416             {
3417                 *d++ = '\\';
3418                 goto default_action;
3419             }
3420
3421             switch (*s) {
3422
3423             /* quoted - in transliterations */
3424             case '-':
3425                 if (PL_lex_inwhat == OP_TRANS) {
3426                     *d++ = *s++;
3427                     continue;
3428                 }
3429                 /* FALL THROUGH */
3430             default:
3431                 {
3432                     if ((isALPHANUMERIC(*s)))
3433                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3434                                        "Unrecognized escape \\%c passed through",
3435                                        *s);
3436                     /* default action is to copy the quoted character */
3437                     goto default_action;
3438                 }
3439
3440             /* eg. \132 indicates the octal constant 0132 */
3441             case '0': case '1': case '2': case '3':
3442             case '4': case '5': case '6': case '7':
3443                 {
3444                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3445                     STRLEN len = 3;
3446                     uv = grok_oct(s, &len, &flags, NULL);
3447                     s += len;
3448                     if (len < 3 && s < send && isDIGIT(*s)
3449                         && ckWARN(WARN_MISC))
3450                     {
3451                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3452                                     "%s", form_short_octal_warning(s, len));
3453                     }
3454                 }
3455                 goto NUM_ESCAPE_INSERT;
3456
3457             /* eg. \o{24} indicates the octal constant \024 */
3458             case 'o':
3459                 {
3460                     const char* error;
3461
3462                     bool valid = grok_bslash_o(&s, &uv, &error,
3463                                                TRUE, /* Output warning */
3464                                                FALSE, /* Not strict */
3465                                                TRUE, /* Output warnings for
3466                                                          non-portables */
3467                                                UTF);
3468                     if (! valid) {
3469                         yyerror(error);
3470                         continue;
3471                     }
3472                     goto NUM_ESCAPE_INSERT;
3473                 }
3474
3475             /* eg. \x24 indicates the hex constant 0x24 */
3476             case 'x':
3477                 {
3478                     const char* error;
3479
3480                     bool valid = grok_bslash_x(&s, &uv, &error,
3481                                                TRUE, /* Output warning */
3482                                                FALSE, /* Not strict */
3483                                                TRUE,  /* Output warnings for
3484                                                          non-portables */
3485                                                UTF);
3486                     if (! valid) {
3487                         yyerror(error);
3488                         continue;
3489                     }
3490                 }
3491
3492               NUM_ESCAPE_INSERT:
3493                 /* Insert oct or hex escaped character.  There will always be
3494                  * enough room in sv since such escapes will be longer than any
3495                  * UTF-8 sequence they can end up as, except if they force us
3496                  * to recode the rest of the string into utf8 */
3497
3498                 /* Here uv is the ordinal of the next character being added */
3499                 if (!UVCHR_IS_INVARIANT(uv)) {
3500                     if (!has_utf8 && uv > 255) {
3501                         /* Might need to recode whatever we have accumulated so
3502                          * far if it contains any chars variant in utf8 or
3503                          * utf-ebcdic. */
3504
3505                         SvCUR_set(sv, d - SvPVX_const(sv));
3506                         SvPOK_on(sv);
3507                         *d = '\0';
3508                         /* See Note on sizing above.  */
3509                         sv_utf8_upgrade_flags_grow(sv,
3510                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3511                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3512                         d = SvPVX(sv) + SvCUR(sv);
3513                         has_utf8 = TRUE;
3514                     }
3515
3516                     if (has_utf8) {
3517                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3518                         if (PL_lex_inwhat == OP_TRANS &&
3519                             PL_sublex_info.sub_op) {
3520                             PL_sublex_info.sub_op->op_private |=
3521                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3522                                              : OPpTRANS_TO_UTF);
3523                         }
3524 #ifdef EBCDIC
3525                         if (uv > 255 && !dorange)
3526                             native_range = FALSE;
3527 #endif
3528                     }
3529                     else {
3530                         *d++ = (char)uv;
3531                     }
3532                 }
3533                 else {
3534                     *d++ = (char) uv;
3535                 }
3536                 continue;
3537
3538             case 'N':
3539                 /* In a non-pattern \N must be a named character, like \N{LATIN
3540                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3541                  * mean to match a non-newline.  For non-patterns, named
3542                  * characters are converted to their string equivalents. In
3543                  * patterns, named characters are not converted to their
3544                  * ultimate forms for the same reasons that other escapes
3545                  * aren't.  Instead, they are converted to the \N{U+...} form
3546                  * to get the value from the charnames that is in effect right
3547                  * now, while preserving the fact that it was a named character
3548                  * so that the regex compiler knows this */
3549
3550                 /* The structure of this section of code (besides checking for
3551                  * errors and upgrading to utf8) is:
3552                  *  Further disambiguate between the two meanings of \N, and if
3553                  *      not a charname, go process it elsewhere
3554                  *  If of form \N{U+...}, pass it through if a pattern;
3555                  *      otherwise convert to utf8
3556                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3557                  *  pattern; otherwise convert to utf8 */
3558
3559                 /* Here, s points to the 'N'; the test below is guaranteed to
3560                  * succeed if we are being called on a pattern as we already
3561                  * know from a test above that the next character is a '{'.
3562                  * On a non-pattern \N must mean 'named sequence, which
3563                  * requires braces */
3564                 s++;
3565                 if (*s != '{') {
3566                     yyerror("Missing braces on \\N{}");
3567                     continue;
3568                 }
3569                 s++;
3570
3571                 /* If there is no matching '}', it is an error. */
3572                 if (! (e = strchr(s, '}'))) {
3573                     if (! PL_lex_inpat) {
3574                         yyerror("Missing right brace on \\N{}");
3575                     } else {
3576                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3577                     }
3578                     continue;
3579                 }
3580
3581                 /* Here it looks like a named character */
3582
3583                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3584                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3585                                 | PERL_SCAN_DISALLOW_PREFIX;
3586                     STRLEN len;
3587
3588                     /* For \N{U+...}, the '...' is a unicode value even on
3589                      * EBCDIC machines */
3590                     s += 2;         /* Skip to next char after the 'U+' */
3591                     len = e - s;
3592                     uv = grok_hex(s, &len, &flags, NULL);
3593                     if (len == 0 || len != (STRLEN)(e - s)) {
3594                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3595                         s = e + 1;
3596                         continue;
3597                     }
3598
3599                     if (PL_lex_inpat) {
3600
3601                         /* On non-EBCDIC platforms, pass through to the regex
3602                          * compiler unchanged.  The reason we evaluated the
3603                          * number above is to make sure there wasn't a syntax
3604                          * error.  But on EBCDIC we convert to native so
3605                          * downstream code can continue to assume it's native
3606                          */
3607                         s -= 5;     /* Include the '\N{U+' */
3608 #ifdef EBCDIC
3609                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3610                                                                and the \0 */
3611                                     "\\N{U+%X}",
3612                                     (unsigned int) UNI_TO_NATIVE(uv));
3613 #else
3614                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3615                         d += e - s + 1;
3616 #endif
3617                     }
3618                     else {  /* Not a pattern: convert the hex to string */
3619
3620                          /* If destination is not in utf8, unconditionally
3621                           * recode it to be so.  This is because \N{} implies
3622                           * Unicode semantics, and scalars have to be in utf8
3623                           * to guarantee those semantics */
3624                         if (! has_utf8) {
3625                             SvCUR_set(sv, d - SvPVX_const(sv));
3626                             SvPOK_on(sv);
3627                             *d = '\0';
3628                             /* See Note on sizing above.  */
3629                             sv_utf8_upgrade_flags_grow(
3630                                         sv,
3631                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3632                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3633                             d = SvPVX(sv) + SvCUR(sv);
3634                             has_utf8 = TRUE;
3635                         }
3636
3637                         /* Add the (Unicode) code point to the output. */
3638                         if (UNI_IS_INVARIANT(uv)) {
3639                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3640                         }
3641                         else {
3642                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3643                         }
3644                     }
3645                 }
3646                 else /* Here is \N{NAME} but not \N{U+...}. */
3647                      if ((res = get_and_check_backslash_N_name(s, e)))
3648                 {
3649                     STRLEN len;
3650                     const char *str = SvPV_const(res, len);
3651                     if (PL_lex_inpat) {
3652
3653                         if (! len) { /* The name resolved to an empty string */
3654                             Copy("\\N{}", d, 4, char);
3655                             d += 4;
3656                         }
3657                         else {
3658                             /* In order to not lose information for the regex
3659                             * compiler, pass the result in the specially made
3660                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3661                             * the code points in hex of each character
3662                             * returned by charnames */
3663
3664                             const char *str_end = str + len;
3665                             const STRLEN off = d - SvPVX_const(sv);
3666
3667                             if (! SvUTF8(res)) {
3668                                 /* For the non-UTF-8 case, we can determine the
3669                                  * exact length needed without having to parse
3670                                  * through the string.  Each character takes up
3671                                  * 2 hex digits plus either a trailing dot or
3672                                  * the "}" */
3673                                 d = off + SvGROW(sv, off
3674                                                     + 3 * len
3675                                                     + 6 /* For the "\N{U+", and
3676                                                            trailing NUL */
3677                                                     + (STRLEN)(send - e));
3678                                 Copy("\\N{U+", d, 5, char);
3679                                 d += 5;
3680                                 while (str < str_end) {
3681                                     char hex_string[4];
3682                                     my_snprintf(hex_string, sizeof(hex_string),
3683                                                 "%02X.", (U8) *str);
3684                                     Copy(hex_string, d, 3, char);
3685                                     d += 3;
3686                                     str++;
3687                                 }
3688                                 d--;    /* We will overwrite below the final
3689                                            dot with a right brace */
3690                             }
3691                             else {
3692                                 STRLEN char_length; /* cur char's byte length */
3693
3694                                 /* and the number of bytes after this is
3695                                  * translated into hex digits */
3696                                 STRLEN output_length;
3697
3698                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3699                                  * for max('U+', '.'); and 1 for NUL */
3700                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3701
3702                                 /* Get the first character of the result. */
3703                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3704                                                         len,
3705                                                         &char_length,
3706                                                         UTF8_ALLOW_ANYUV);
3707                                 /* Convert first code point to hex, including
3708                                  * the boiler plate before it. */
3709                                 output_length =
3710                                     my_snprintf(hex_string, sizeof(hex_string),
3711                                                 "\\N{U+%X",
3712                                                 (unsigned int) uv);
3713
3714                                 /* Make sure there is enough space to hold it */
3715                                 d = off + SvGROW(sv, off
3716                                                     + output_length
3717                                                     + (STRLEN)(send - e)
3718                                                     + 2);       /* '}' + NUL */
3719                                 /* And output it */
3720                                 Copy(hex_string, d, output_length, char);
3721                                 d += output_length;
3722
3723                                 /* For each subsequent character, append dot and
3724                                 * its ordinal in hex */
3725                                 while ((str += char_length) < str_end) {
3726                                     const STRLEN off = d - SvPVX_const(sv);
3727                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3728                                                             str_end - str,
3729                                                             &char_length,
3730                                                             UTF8_ALLOW_ANYUV);
3731                                     output_length =
3732                                         my_snprintf(hex_string,
3733                                                     sizeof(hex_string),
3734                                                     ".%X",
3735                                                     (unsigned int) uv);
3736
3737                                     d = off + SvGROW(sv, off
3738                                                         + output_length
3739                                                         + (STRLEN)(send - e)
3740                                                         + 2);   /* '}' +  NUL */
3741                                     Copy(hex_string, d, output_length, char);
3742                                     d += output_length;
3743                                 }
3744                             }
3745
3746                             *d++ = '}'; /* Done.  Add the trailing brace */
3747                         }
3748                     }
3749                     else { /* Here, not in a pattern.  Convert the name to a
3750                             * string. */
3751
3752                          /* If destination is not in utf8, unconditionally
3753                           * recode it to be so.  This is because \N{} implies
3754                           * Unicode semantics, and scalars have to be in utf8
3755                           * to guarantee those semantics */
3756                         if (! has_utf8) {
3757                             SvCUR_set(sv, d - SvPVX_const(sv));
3758                             SvPOK_on(sv);
3759                             *d = '\0';
3760                             /* See Note on sizing above.  */
3761                             sv_utf8_upgrade_flags_grow(sv,
3762                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3763                                                 len + (STRLEN)(send - s) + 1);
3764                             d = SvPVX(sv) + SvCUR(sv);
3765                             has_utf8 = TRUE;
3766                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3767
3768                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3769                              * set correctly here). */
3770                             const STRLEN off = d - SvPVX_const(sv);
3771                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3772                         }
3773                         Copy(str, d, len, char);
3774                         d += len;
3775                     }
3776
3777                     SvREFCNT_dec(res);
3778
3779                 } /* End \N{NAME} */
3780 #ifdef EBCDIC
3781                 if (!dorange)
3782                     native_range = FALSE; /* \N{} is defined to be Unicode */
3783 #endif
3784                 s = e + 1;  /* Point to just after the '}' */
3785                 continue;
3786
3787             /* \c is a control character */
3788             case 'c':
3789                 s++;
3790                 if (s < send) {
3791                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3792                 }
3793                 else {
3794                     yyerror("Missing control char name in \\c");
3795                 }
3796                 continue;
3797
3798             /* printf-style backslashes, formfeeds, newlines, etc */
3799             case 'b':
3800                 *d++ = '\b';
3801                 break;
3802             case 'n':
3803                 *d++ = '\n';
3804                 break;
3805             case 'r':
3806                 *d++ = '\r';
3807                 break;
3808             case 'f':
3809                 *d++ = '\f';
3810                 break;
3811             case 't':
3812                 *d++ = '\t';
3813                 break;
3814             case 'e':
3815                 *d++ = ASCII_TO_NATIVE('\033');
3816                 break;
3817             case 'a':
3818                 *d++ = '\a';
3819                 break;
3820             } /* end switch */
3821
3822             s++;
3823             continue;
3824         } /* end if (backslash) */
3825 #ifdef EBCDIC
3826         else
3827             literal_endpoint++;
3828 #endif
3829
3830     default_action:
3831         /* If we started with encoded form, or already know we want it,
3832            then encode the next character */
3833         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3834             STRLEN len  = 1;
3835
3836
3837             /* One might think that it is wasted effort in the case of the
3838              * source being utf8 (this_utf8 == TRUE) to take the next character
3839              * in the source, convert it to an unsigned value, and then convert
3840              * it back again.  But the source has not been validated here.  The
3841              * routine that does the conversion checks for errors like
3842              * malformed utf8 */
3843
3844             const UV nextuv   = (this_utf8)
3845                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3846                                 : (UV) ((U8) *s);
3847             const STRLEN need = UNISKIP(nextuv);
3848             if (!has_utf8) {
3849                 SvCUR_set(sv, d - SvPVX_const(sv));
3850                 SvPOK_on(sv);
3851                 *d = '\0';
3852                 /* See Note on sizing above.  */
3853                 sv_utf8_upgrade_flags_grow(sv,
3854                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3855                                         need + (STRLEN)(send - s) + 1);
3856                 d = SvPVX(sv) + SvCUR(sv);
3857                 has_utf8 = TRUE;
3858             } else if (need > len) {
3859                 /* encoded value larger than old, may need extra space (NOTE:
3860                  * SvCUR() is not set correctly here).   See Note on sizing
3861                  * above.  */
3862                 const STRLEN off = d - SvPVX_const(sv);
3863                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3864             }
3865             s += len;
3866
3867             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3868 #ifdef EBCDIC
3869             if (uv > 255 && !dorange)
3870                 native_range = FALSE;
3871 #endif
3872         }
3873         else {
3874             *d++ = *s++;
3875         }
3876     } /* while loop to process each character */
3877
3878     /* terminate the string and set up the sv */
3879     *d = '\0';
3880     SvCUR_set(sv, d - SvPVX_const(sv));
3881     if (SvCUR(sv) >= SvLEN(sv))
3882         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3883                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3884
3885     SvPOK_on(sv);
3886     if (PL_encoding && !has_utf8) {
3887         sv_recode_to_utf8(sv, PL_encoding);
3888         if (SvUTF8(sv))
3889             has_utf8 = TRUE;
3890     }
3891     if (has_utf8) {
3892         SvUTF8_on(sv);
3893         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3894             PL_sublex_info.sub_op->op_private |=
3895                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3896         }
3897     }
3898
3899     /* shrink the sv if we allocated more than we used */
3900     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3901         SvPV_shrink_to_cur(sv);
3902     }
3903
3904     /* return the substring (via pl_yylval) only if we parsed anything */
3905     if (s > start) {
3906         char *s2 = start;
3907         for (; s2 < s; s2++) {
3908             if (*s2 == '\n')
3909                 COPLINE_INC_WITH_HERELINES;
3910         }
3911         SvREFCNT_inc_simple_void_NN(sv);
3912         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3913             && ! PL_parser->lex_re_reparsing)
3914         {
3915             const char *const key = PL_lex_inpat ? "qr" : "q";
3916             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3917             const char *type;
3918             STRLEN typelen;
3919
3920             if (PL_lex_inwhat == OP_TRANS) {
3921                 type = "tr";
3922                 typelen = 2;
3923             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3924                 type = "s";
3925                 typelen = 1;
3926             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3927                 type = "q";
3928                 typelen = 1;
3929             } else  {
3930                 type = "qq";
3931                 typelen = 2;
3932             }
3933
3934             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3935                                 type, typelen);
3936         }
3937         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3938     }
3939     LEAVE_with_name("scan_const");
3940     return s;
3941 }
3942
3943 /* S_intuit_more
3944  * Returns TRUE if there's more to the expression (e.g., a subscript),
3945  * FALSE otherwise.
3946  *
3947  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3948  *
3949  * ->[ and ->{ return TRUE
3950  * ->$* ->@* ->@[ and ->@{ return TRUE if postfix_interpolate is enabled
3951  * { and [ outside a pattern are always subscripts, so return TRUE
3952  * if we're outside a pattern and it's not { or [, then return FALSE
3953  * if we're in a pattern and the first char is a {
3954  *   {4,5} (any digits around the comma) returns FALSE
3955  * if we're in a pattern and the first char is a [
3956  *   [] returns FALSE
3957  *   [SOMETHING] has a funky algorithm to decide whether it's a
3958  *      character class or not.  It has to deal with things like
3959  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3960  * anything else returns TRUE
3961  */
3962
3963 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3964
3965 STATIC int
3966 S_intuit_more(pTHX_ char *s)
3967 {
3968     dVAR;
3969
3970     PERL_ARGS_ASSERT_INTUIT_MORE;
3971
3972     if (PL_lex_brackets)
3973         return TRUE;
3974     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3975         return TRUE;
3976     if (*s == '-' && s[1] == '>'
3977      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3978      && ( (s[2] == '$' && s[3] == '*')
3979         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3980         return TRUE;
3981     if (*s != '{' && *s != '[')
3982         return FALSE;
3983     if (!PL_lex_inpat)
3984         return TRUE;
3985
3986     /* In a pattern, so maybe we have {n,m}. */
3987     if (*s == '{') {
3988         if (regcurly(s, FALSE)) {
3989             return FALSE;
3990         }
3991         return TRUE;
3992     }
3993
3994     /* On the other hand, maybe we have a character class */
3995
3996     s++;
3997     if (*s == ']' || *s == '^')
3998         return FALSE;
3999     else {
4000         /* this is terrifying, and it works */
4001         int weight;
4002         char seen[256];
4003         const char * const send = strchr(s,']');
4004         unsigned char un_char, last_un_char;
4005         char tmpbuf[sizeof PL_tokenbuf * 4];
4006
4007         if (!send)              /* has to be an expression */
4008             return TRUE;
4009         weight = 2;             /* let's weigh the evidence */
4010
4011         if (*s == '$')
4012             weight -= 3;
4013         else if (isDIGIT(*s)) {
4014             if (s[1] != ']') {
4015                 if (isDIGIT(s[1]) && s[2] == ']')
4016                     weight -= 10;
4017             }
4018             else
4019                 weight -= 100;
4020         }
4021         Zero(seen,256,char);
4022         un_char = 255;
4023         for (; s < send; s++) {
4024             last_un_char = un_char;
4025             un_char = (unsigned char)*s;
4026             switch (*s) {
4027             case '@':
4028             case '&':
4029             case '$':
4030                 weight -= seen[un_char] * 10;
4031                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4032                     int len;
4033                     char *tmp = PL_bufend;
4034                     PL_bufend = (char*)send;
4035                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4036                     PL_bufend = tmp;
4037                     len = (int)strlen(tmpbuf);
4038                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4039                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4040                         weight -= 100;
4041                     else
4042                         weight -= 10;
4043                 }
4044                 else if (*s == '$' && s[1] &&
4045                   strchr("[#!%*<>()-=",s[1])) {
4046                     if (/*{*/ strchr("])} =",s[2]))
4047                         weight -= 10;
4048                     else
4049                         weight -= 1;
4050                 }
4051                 break;
4052             case '\\':
4053                 un_char = 254;
4054                 if (s[1]) {
4055                     if (strchr("wds]",s[1]))
4056                         weight += 100;
4057                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4058                         weight += 1;
4059                     else if (strchr("rnftbxcav",s[1]))
4060                         weight += 40;
4061                     else if (isDIGIT(s[1])) {
4062                         weight += 40;
4063                         while (s[1] && isDIGIT(s[1]))
4064                             s++;
4065                     }
4066                 }
4067                 else
4068                     weight += 100;
4069                 break;
4070             case '-':
4071                 if (s[1] == '\\')
4072                     weight += 50;
4073                 if (strchr("aA01! ",last_un_char))
4074                     weight += 30;
4075                 if (strchr("zZ79~",s[1]))
4076                     weight += 30;
4077                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4078                     weight -= 5;        /* cope with negative subscript */
4079                 break;
4080             default:
4081                 if (!isWORDCHAR(last_un_char)
4082                     && !(last_un_char == '$' || last_un_char == '@'
4083                          || last_un_char == '&')
4084                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4085                     char *d = tmpbuf;
4086                     while (isALPHA(*s))
4087                         *d++ = *s++;
4088                     *d = '\0';
4089                     if (keyword(tmpbuf, d - tmpbuf, 0))
4090                         weight -= 150;
4091                 }
4092                 if (un_char == last_un_char + 1)
4093                     weight += 5;
4094                 weight -= seen[un_char];
4095                 break;
4096             }
4097             seen[un_char]++;
4098         }
4099         if (weight >= 0)        /* probably a character class */
4100             return FALSE;
4101     }
4102
4103     return TRUE;
4104 }
4105
4106 /*
4107  * S_intuit_method
4108  *
4109  * Does all the checking to disambiguate
4110  *   foo bar
4111  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4112  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4113  *
4114  * First argument is the stuff after the first token, e.g. "bar".
4115  *
4116  * Not a method if foo is a filehandle.
4117  * Not a method if foo is a subroutine prototyped to take a filehandle.
4118  * Not a method if it's really "Foo $bar"
4119  * Method if it's "foo $bar"
4120  * Not a method if it's really "print foo $bar"
4121  * Method if it's really "foo package::" (interpreted as package->foo)
4122  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4123  * Not a method if bar is a filehandle or package, but is quoted with
4124  *   =>
4125  */
4126
4127 STATIC int
4128 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4129 {
4130     dVAR;
4131     char *s = start + (*start == '$');
4132     char tmpbuf[sizeof PL_tokenbuf];
4133     STRLEN len;
4134     GV* indirgv;
4135 #ifdef PERL_MAD
4136     int soff;
4137 #endif
4138
4139     PERL_ARGS_ASSERT_INTUIT_METHOD;
4140
4141     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4142             return 0;
4143     if (cv && SvPOK(cv)) {
4144         const char *proto = CvPROTO(cv);
4145         if (proto) {
4146             while (*proto && (isSPACE(*proto) || *proto == ';'))
4147                 proto++;
4148             if (*proto == '*')
4149                 return 0;
4150         }
4151     }
4152
4153     if (*start == '$') {
4154         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4155                 isUPPER(*PL_tokenbuf))
4156             return 0;
4157 #ifdef PERL_MAD
4158         len = start - SvPVX(PL_linestr);
4159 #endif
4160         s = PEEKSPACE(s);
4161 #ifdef PERL_MAD
4162         start = SvPVX(PL_linestr) + len;
4163 #endif
4164         PL_bufptr = start;
4165         PL_expect = XREF;
4166         return *s == '(' ? FUNCMETH : METHOD;
4167     }
4168
4169     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4170     /* start is the beginning of the possible filehandle/object,
4171      * and s is the end of it
4172      * tmpbuf is a copy of it (but with single quotes as double colons)
4173      */
4174
4175     if (!keyword(tmpbuf, len, 0)) {
4176         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4177             len -= 2;
4178             tmpbuf[len] = '\0';
4179 #ifdef PERL_MAD
4180             soff = s - SvPVX(PL_linestr);
4181 #endif
4182             goto bare_package;
4183         }
4184         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4185         if (indirgv && GvCVu(indirgv))
4186             return 0;
4187         /* filehandle or package name makes it a method */
4188         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4189 #ifdef PERL_MAD
4190             soff = s - SvPVX(PL_linestr);
4191 #endif
4192             s = PEEKSPACE(s);
4193             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4194                 return 0;       /* no assumptions -- "=>" quotes bareword */
4195       bare_package:
4196             start_force(PL_curforce);
4197             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4198                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4199             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4200             if (PL_madskills)
4201                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4202                                                             ( UTF ? SVf_UTF8 : 0 )));
4203             PL_expect = XTERM;
4204             force_next(WORD);
4205             PL_bufptr = s;
4206 #ifdef PERL_MAD
4207             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4208 #endif
4209             return *s == '(' ? FUNCMETH : METHOD;
4210         }
4211     }
4212     return 0;
4213 }
4214
4215 /* Encoded script support. filter_add() effectively inserts a
4216  * 'pre-processing' function into the current source input stream.
4217  * Note that the filter function only applies to the current source file
4218  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4219  *
4220  * The datasv parameter (which may be NULL) can be used to pass
4221  * private data to this instance of the filter. The filter function
4222  * can recover the SV using the FILTER_DATA macro and use it to
4223  * store private buffers and state information.
4224  *
4225  * The supplied datasv parameter is upgraded to a PVIO type
4226  * and the IoDIRP/IoANY field is used to store the function pointer,
4227  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4228  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4229  * private use must be set using malloc'd pointers.
4230  */
4231
4232 SV *
4233 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4234 {
4235     dVAR;
4236     if (!funcp)
4237         return NULL;
4238
4239     if (!PL_parser)
4240         return NULL;
4241
4242     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4243         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4244
4245     if (!PL_rsfp_filters)
4246         PL_rsfp_filters = newAV();
4247     if (!datasv)
4248         datasv = newSV(0);
4249     SvUPGRADE(datasv, SVt_PVIO);
4250     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4251     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4252     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4253                           FPTR2DPTR(void *, IoANY(datasv)),
4254                           SvPV_nolen(datasv)));
4255     av_unshift(PL_rsfp_filters, 1);
4256     av_store(PL_rsfp_filters, 0, datasv) ;
4257     if (
4258         !PL_parser->filtered
4259      && PL_parser->lex_flags & LEX_EVALBYTES
4260      && PL_bufptr < PL_bufend
4261     ) {
4262         const char *s = PL_bufptr;
4263         while (s < PL_bufend) {
4264             if (*s == '\n') {
4265                 SV *linestr = PL_parser->linestr;
4266                 char *buf = SvPVX(linestr);
4267                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4268                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4269                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4270                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4271                 STRLEN const last_uni_pos =
4272                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4273                 STRLEN const last_lop_pos =
4274                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4275                 av_push(PL_rsfp_filters, linestr);
4276                 PL_parser->linestr =
4277                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4278                 buf = SvPVX(PL_parser->linestr);
4279                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4280                 PL_parser->bufptr = buf + bufptr_pos;
4281                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4282                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4283                 PL_parser->linestart = buf + linestart_pos;
4284                 if (PL_parser->last_uni)
4285                     PL_parser->last_uni = buf + last_uni_pos;
4286                 if (PL_parser->last_lop)
4287                     PL_parser->last_lop = buf + last_lop_pos;
4288                 SvLEN(linestr) = SvCUR(linestr);
4289                 SvCUR(linestr) = s-SvPVX(linestr);
4290                 PL_parser->filtered = 1;
4291                 break;
4292             }
4293             s++;
4294         }
4295     }
4296     return(datasv);
4297 }
4298
4299
4300 /* Delete most recently added instance of this filter function. */
4301 void
4302 Perl_filter_del(pTHX_ filter_t funcp)
4303 {
4304     dVAR;
4305     SV *datasv;
4306
4307     PERL_ARGS_ASSERT_FILTER_DEL;
4308
4309 #ifdef DEBUGGING
4310     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4311                           FPTR2DPTR(void*, funcp)));
4312 #endif
4313     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4314         return;
4315     /* if filter is on top of stack (usual case) just pop it off */
4316     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4317     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4318         sv_free(av_pop(PL_rsfp_filters));
4319
4320         return;
4321     }
4322     /* we need to search for the correct entry and clear it     */
4323     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4324 }
4325
4326
4327 /* Invoke the idxth filter function for the current rsfp.        */
4328 /* maxlen 0 = read one text line */
4329 I32
4330 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4331 {
4332     dVAR;
4333     filter_t funcp;
4334     SV *datasv = NULL;
4335     /* This API is bad. It should have been using unsigned int for maxlen.
4336        Not sure if we want to change the API, but if not we should sanity
4337        check the value here.  */
4338     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4339
4340     PERL_ARGS_ASSERT_FILTER_READ;
4341
4342     if (!PL_parser || !PL_rsfp_filters)
4343         return -1;
4344     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4345         /* Provide a default input filter to make life easy.    */
4346         /* Note that we append to the line. This is handy.      */
4347         DEBUG_P(PerlIO_printf(Perl_debug_log,
4348                               "filter_read %d: from rsfp\n", idx));
4349         if (correct_length) {
4350             /* Want a block */
4351             int len ;
4352             const int old_len = SvCUR(buf_sv);
4353
4354             /* ensure buf_sv is large enough */
4355             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4356             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4357                                    correct_length)) <= 0) {
4358                 if (PerlIO_error(PL_rsfp))
4359                     return -1;          /* error */
4360                 else
4361                     return 0 ;          /* end of file */
4362             }
4363             SvCUR_set(buf_sv, old_len + len) ;
4364             SvPVX(buf_sv)[old_len + len] = '\0';
4365         } else {
4366             /* Want a line */
4367             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4368                 if (PerlIO_error(PL_rsfp))
4369                     return -1;          /* error */
4370                 else
4371                     return 0 ;          /* end of file */
4372             }
4373         }
4374         return SvCUR(buf_sv);
4375     }
4376     /* Skip this filter slot if filter has been deleted */
4377     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4378         DEBUG_P(PerlIO_printf(Perl_debug_log,
4379                               "filter_read %d: skipped (filter deleted)\n",
4380                               idx));
4381         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4382     }
4383     if (SvTYPE(datasv) != SVt_PVIO) {
4384         if (correct_length) {
4385             /* Want a block */
4386             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4387             if (!remainder) return 0; /* eof */
4388             if (correct_length > remainder) correct_length = remainder;
4389             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4390             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4391         } else {
4392             /* Want a line */
4393             const char *s = SvEND(datasv);
4394             const char *send = SvPVX(datasv) + SvLEN(datasv);
4395             while (s < send) {
4396                 if (*s == '\n') {
4397                     s++;
4398                     break;
4399                 }
4400                 s++;
4401             }
4402             if (s == send) return 0; /* eof */
4403             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4404             SvCUR_set(datasv, s-SvPVX(datasv));
4405         }
4406         return SvCUR(buf_sv);
4407     }
4408     /* Get function pointer hidden within datasv        */
4409     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4410     DEBUG_P(PerlIO_printf(Perl_debug_log,
4411                           "filter_read %d: via function %p (%s)\n",
4412                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4413     /* Call function. The function is expected to       */
4414     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4415     /* Return: <0:error, =0:eof, >0:not eof             */
4416     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4417 }
4418
4419 STATIC char *
4420 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4421 {
4422     dVAR;
4423
4424     PERL_ARGS_ASSERT_FILTER_GETS;
4425
4426 #ifdef PERL_CR_FILTER
4427     if (!PL_rsfp_filters) {
4428         filter_add(S_cr_textfilter,NULL);
4429     }
4430 #endif
4431     if (PL_rsfp_filters) {
4432         if (!append)
4433             SvCUR_set(sv, 0);   /* start with empty line        */
4434         if (FILTER_READ(0, sv, 0) > 0)
4435             return ( SvPVX(sv) ) ;
4436         else
4437             return NULL ;
4438     }
4439     else
4440         return (sv_gets(sv, PL_rsfp, append));
4441 }
4442
4443 STATIC HV *
4444 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4445 {
4446     dVAR;
4447     GV *gv;
4448
4449     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4450
4451     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4452         return PL_curstash;
4453
4454     if (len > 2 &&
4455         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4456         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4457     {
4458         return GvHV(gv);                        /* Foo:: */
4459     }
4460
4461     /* use constant CLASS => 'MyClass' */
4462     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4463     if (gv && GvCV(gv)) {
4464         SV * const sv = cv_const_sv(GvCV(gv));
4465         if (sv)
4466             pkgname = SvPV_const(sv, len);
4467     }
4468
4469     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4470 }
4471
4472 /*
4473  * S_readpipe_override
4474  * Check whether readpipe() is overridden, and generates the appropriate
4475  * optree, provided sublex_start() is called afterwards.
4476  */
4477 STATIC void
4478 S_readpipe_override(pTHX)
4479 {
4480     GV **gvp;
4481     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4482     pl_yylval.ival = OP_BACKTICK;
4483     if ((gv_readpipe
4484                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4485             ||
4486             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4487              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4488              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4489     {
4490         COPLINE_SET_FROM_MULTI_END;
4491         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4492             op_append_elem(OP_LIST,
4493                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4494                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4495     }
4496 }
4497
4498 #ifdef PERL_MAD
4499  /*
4500  * Perl_madlex
4501  * The intent of this yylex wrapper is to minimize the changes to the
4502  * tokener when we aren't interested in collecting madprops.  It remains
4503  * to be seen how successful this strategy will be...
4504  */
4505
4506 int
4507 Perl_madlex(pTHX)
4508 {
4509     int optype;
4510     char *s = PL_bufptr;
4511
4512     /* make sure PL_thiswhite is initialized */
4513     PL_thiswhite = 0;
4514     PL_thismad = 0;
4515
4516     /* previous token ate up our whitespace? */
4517     if (!PL_lasttoke && PL_nextwhite) {
4518         PL_thiswhite = PL_nextwhite;
4519         PL_nextwhite = 0;
4520     }
4521
4522     /* isolate the token, and figure out where it is without whitespace */
4523     PL_realtokenstart = -1;
4524     PL_thistoken = 0;
4525     optype = yylex();
4526     s = PL_bufptr;
4527     assert(PL_curforce < 0);
4528
4529     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4530         if (!PL_thistoken) {
4531             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4532                 PL_thistoken = newSVpvs("");
4533             else {
4534                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4535                 PL_thistoken = newSVpvn(tstart, s - tstart);
4536             }
4537         }
4538         if (PL_thismad) /* install head */
4539             CURMAD('X', PL_thistoken);
4540     }
4541
4542     /* last whitespace of a sublex? */
4543     if (optype == ')' && PL_endwhite) {
4544         CURMAD('X', PL_endwhite);
4545     }
4546
4547     if (!PL_thismad) {
4548
4549         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4550         if (!PL_thiswhite && !PL_endwhite && !optype) {
4551             sv_free(PL_thistoken);
4552             PL_thistoken = 0;
4553             return 0;
4554         }
4555
4556         /* put off final whitespace till peg */
4557         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4558             PL_nextwhite = PL_thiswhite;
4559             PL_thiswhite = 0;
4560         }
4561         else if (PL_thisopen) {
4562             CURMAD('q', PL_thisopen);
4563             if (PL_thistoken)
4564                 sv_free(PL_thistoken);
4565             PL_thistoken = 0;
4566         }
4567         else {
4568             /* Store actual token text as madprop X */
4569             CURMAD('X', PL_thistoken);
4570         }
4571
4572         if (PL_thiswhite) {
4573             /* add preceding whitespace as madprop _ */
4574             CURMAD('_', PL_thiswhite);
4575         }
4576
4577         if (PL_thisstuff) {
4578             /* add quoted material as madprop = */
4579             CURMAD('=', PL_thisstuff);
4580         }
4581
4582         if (PL_thisclose) {
4583             /* add terminating quote as madprop Q */
4584             CURMAD('Q', PL_thisclose);
4585         }
4586     }
4587
4588     /* special processing based on optype */
4589
4590     switch (optype) {
4591
4592     /* opval doesn't need a TOKEN since it can already store mp */
4593     case WORD:
4594     case METHOD:
4595     case FUNCMETH:
4596     case THING:
4597     case PMFUNC:
4598     case PRIVATEREF:
4599     case FUNC0SUB:
4600     case UNIOPSUB:
4601     case LSTOPSUB:
4602         if (pl_yylval.opval)
4603             append_madprops(PL_thismad, pl_yylval.opval, 0);
4604         PL_thismad = 0;
4605         return optype;
4606
4607     /* fake EOF */
4608     case 0:
4609         optype = PEG;
4610         if (PL_endwhite) {
4611             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4612             PL_endwhite = 0;
4613         }
4614         break;
4615
4616     /* pval */
4617     case LABEL:
4618         break;
4619
4620     case ']':
4621     case '}':
4622         if (PL_faketokens)
4623             break;
4624         /* remember any fake bracket that lexer is about to discard */
4625         if (PL_lex_brackets == 1 &&
4626             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4627         {
4628             s = PL_bufptr;
4629             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4630                 s++;
4631             if (*s == '}') {
4632                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4633                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4634                 PL_thiswhite = 0;
4635                 PL_bufptr = s - 1;
4636                 break;  /* don't bother looking for trailing comment */
4637             }
4638             else
4639                 s = PL_bufptr;
4640         }
4641         if (optype == ']')
4642             break;
4643         /* FALLTHROUGH */
4644
4645     /* attach a trailing comment to its statement instead of next token */
4646     case ';':
4647         if (PL_faketokens)
4648             break;
4649         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4650             s = PL_bufptr;
4651             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4652                 s++;
4653             if (*s == '\n' || *s == '#') {
4654                 while (s < PL_bufend && *s != '\n')
4655                     s++;
4656                 if (s < PL_bufend)
4657                     s++;
4658                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4659                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4660                 PL_thiswhite = 0;
4661                 PL_bufptr = s;
4662             }
4663         }
4664         break;
4665
4666     /* ival */
4667     default:
4668         break;
4669
4670     }
4671
4672     /* Create new token struct.  Note: opvals return early above. */
4673     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4674     PL_thismad = 0;
4675     return optype;
4676 }
4677 #endif
4678
4679 STATIC char *
4680 S_tokenize_use(pTHX_ int is_use, char *s) {
4681     dVAR;
4682
4683     PERL_ARGS_ASSERT_TOKENIZE_USE;
4684
4685     if (PL_expect != XSTATE)
4686         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4687                     is_use ? "use" : "no"));
4688     PL_expect = XTERM;
4689     s = SKIPSPACE1(s);
4690     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4691         s = force_version(s, TRUE);
4692         if (*s == ';' || *s == '}'
4693                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4694             start_force(PL_curforce);
4695             NEXTVAL_NEXTTOKE.opval = NULL;
4696             force_next(WORD);
4697         }
4698         else if (*s == 'v') {
4699             s = force_word(s,WORD,FALSE,TRUE);
4700             s = force_version(s, FALSE);
4701         }
4702     }
4703     else {
4704         s = force_word(s,WORD,FALSE,TRUE);
4705         s = force_version(s, FALSE);
4706     }
4707     pl_yylval.ival = is_use;
4708     return s;
4709 }
4710 #ifdef DEBUGGING
4711     static const char* const exp_name[] =
4712         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4713           "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4714         };
4715 #endif
4716
4717 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4718 STATIC bool
4719 S_word_takes_any_delimeter(char *p, STRLEN len)
4720 {
4721     return (len == 1 && strchr("msyq", p[0])) ||
4722            (len == 2 && (
4723             (p[0] == 't' && p[1] == 'r') ||
4724             (p[0] == 'q' && strchr("qwxr", p[1]))));
4725 }
4726
4727 static void
4728 S_check_scalar_slice(pTHX_ char *s)
4729 {
4730     s++;
4731     while (*s == ' ' || *s == '\t') s++;
4732     if (*s == 'q' && s[1] == 'w'
4733      && !isWORDCHAR_lazy_if(s+2,UTF))
4734         return;
4735     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4736         s += UTF ? UTF8SKIP(s) : 1;
4737     if (*s == '}' || *s == ']')
4738         pl_yylval.ival = OPpSLICEWARNING;
4739 }
4740
4741 /*
4742   yylex
4743
4744   Works out what to call the token just pulled out of the input
4745   stream.  The yacc parser takes care of taking the ops we return and
4746   stitching them into a tree.
4747
4748   Returns:
4749     The type of the next token
4750
4751   Structure:
4752       Switch based on the current state:
4753           - if we already built the token before, use it
4754           - if we have a case modifier in a string, deal with that
4755           - handle other cases of interpolation inside a string
4756           - scan the next line if we are inside a format
4757       In the normal state switch on the next character:
4758           - default:
4759             if alphabetic, go to key lookup
4760             unrecoginized character - croak
4761           - 0/4/26: handle end-of-line or EOF
4762           - cases for whitespace
4763           - \n and #: handle comments and line numbers
4764           - various operators, brackets and sigils
4765           - numbers
4766           - quotes
4767           - 'v': vstrings (or go to key lookup)
4768           - 'x' repetition operator (or go to key lookup)
4769           - other ASCII alphanumerics (key lookup begins here):
4770               word before => ?
4771               keyword plugin
4772               scan built-in keyword (but do nothing with it yet)
4773               check for statement label
4774               check for lexical subs
4775                   goto just_a_word if there is one
4776               see whether built-in keyword is overridden
4777               switch on keyword number:
4778                   - default: just_a_word:
4779                       not a built-in keyword; handle bareword lookup
4780                       disambiguate between method and sub call
4781                       fall back to bareword
4782                   - cases for built-in keywords
4783 */
4784
4785
4786 int
4787 Perl_yylex(pTHX)
4788 {
4789     dVAR;
4790     char *s = PL_bufptr;
4791     char *d;
4792     STRLEN len;
4793     bool bof = FALSE;
4794     const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4795     U8 formbrack = 0;
4796     U32 fake_eof = 0;
4797
4798     /* orig_keyword, gvp, and gv are initialized here because
4799      * jump to the label just_a_word_zero can bypass their
4800      * initialization later. */
4801     I32 orig_keyword = 0;
4802     GV *gv = NULL;
4803     GV **gvp = NULL;
4804
4805     DEBUG_T( {
4806         SV* tmp = newSVpvs("");
4807         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4808             (IV)CopLINE(PL_curcop),
4809             lex_state_names[PL_lex_state],
4810             exp_name[PL_expect],
4811             pv_display(tmp, s, strlen(s), 0, 60));
4812         SvREFCNT_dec(tmp);
4813     } );
4814
4815     switch (PL_lex_state) {
4816     case LEX_NORMAL:
4817     case LEX_INTERPNORMAL:
4818         break;
4819
4820     /* when we've already built the next token, just pull it out of the queue */
4821     case LEX_KNOWNEXT:
4822 #ifdef PERL_MAD
4823         PL_lasttoke--;
4824         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4825         if (PL_madskills) {
4826             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4827             PL_nexttoke[PL_lasttoke].next_mad = 0;
4828             if (PL_thismad && PL_thismad->mad_key == '_') {
4829                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4830                 PL_thismad->mad_val = 0;
4831                 mad_free(PL_thismad);
4832                 PL_thismad = 0;
4833             }
4834         }
4835         if (!PL_lasttoke) {
4836             PL_lex_state = PL_lex_defer;
4837             PL_expect = PL_lex_expect;
4838             PL_lex_defer = LEX_NORMAL;
4839             if (!PL_nexttoke[PL_lasttoke].next_type)
4840                 return yylex();
4841         }
4842 #else
4843         PL_nexttoke--;
4844         pl_yylval = PL_nextval[PL_nexttoke];
4845         if (!PL_nexttoke) {
4846             PL_lex_state = PL_lex_defer;
4847             PL_expect = PL_lex_expect;
4848             PL_lex_defer = LEX_NORMAL;
4849         }
4850 #endif
4851         {
4852             I32 next_type;
4853 #ifdef PERL_MAD
4854             next_type = PL_nexttoke[PL_lasttoke].next_type;
4855 #else
4856             next_type = PL_nexttype[PL_nexttoke];
4857 #endif
4858             if (next_type & (7<<24)) {
4859                 if (next_type & (1<<24)) {
4860                     if (PL_lex_brackets > 100)
4861                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4862                     PL_lex_brackstack[PL_lex_brackets++] =
4863                         (char) ((next_type >> 16) & 0xff);
4864                 }
4865                 if (next_type & (2<<24))
4866                     PL_lex_allbrackets++;
4867                 if (next_type & (4<<24))
4868                     PL_lex_allbrackets--;
4869                 next_type &= 0xffff;
4870             }
4871             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4872         }
4873
4874     /* interpolated case modifiers like \L \U, including \Q and \E.
4875        when we get here, PL_bufptr is at the \
4876     */
4877     case LEX_INTERPCASEMOD:
4878 #ifdef DEBUGGING
4879         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4880             Perl_croak(aTHX_
4881                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4882                        PL_bufptr, PL_bufend, *PL_bufptr);
4883 #endif
4884         /* handle \E or end of string */
4885         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4886             /* if at a \E */
4887             if (PL_lex_casemods) {
4888                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4889                 PL_lex_casestack[PL_lex_casemods] = '\0';
4890
4891                 if (PL_bufptr != PL_bufend
4892                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4893                         || oldmod == 'F')) {
4894                     PL_bufptr += 2;
4895                     PL_lex_state = LEX_INTERPCONCAT;
4896 #ifdef PERL_MAD
4897                     if (PL_madskills)
4898                         PL_thistoken = newSVpvs("\\E");
4899 #endif
4900                 }
4901                 PL_lex_allbrackets--;
4902                 return REPORT(')');
4903             }
4904             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4905                /* Got an unpaired \E */
4906                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4907                         "Useless use of \\E");
4908             }
4909 #ifdef PERL_MAD
4910             while (PL_bufptr != PL_bufend &&
4911               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4912                 if (PL_madskills) {
4913                   if (!PL_thiswhite)
4914                     PL_thiswhite = newSVpvs("");
4915                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4916                 }
4917                 PL_bufptr += 2;
4918             }
4919 #else
4920             if (PL_bufptr != PL_bufend)
4921                 PL_bufptr += 2;
4922 #endif
4923             PL_lex_state = LEX_INTERPCONCAT;
4924             return yylex();
4925         }
4926         else {
4927             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4928               "### Saw case modifier\n"); });
4929             s = PL_bufptr + 1;
4930             if (s[1] == '\\' && s[2] == 'E') {
4931 #ifdef PERL_MAD
4932                 if (PL_madskills) {
4933                   if (!PL_thiswhite)
4934                     PL_thiswhite = newSVpvs("");
4935                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4936                 }
4937 #endif
4938                 PL_bufptr = s + 3;
4939                 PL_lex_state = LEX_INTERPCONCAT;
4940                 return yylex();
4941             }
4942             else {
4943                 I32 tmp;
4944                 if (!PL_madskills) /* when just compiling don't need correct */
4945                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4946                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4947                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4948                     (strchr(PL_lex_casestack, 'L')
4949                         || strchr(PL_lex_casestack, 'U')
4950                         || strchr(PL_lex_casestack, 'F'))) {
4951                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4952                     PL_lex_allbrackets--;
4953                     return REPORT(')');
4954                 }
4955                 if (PL_lex_casemods > 10)
4956                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4957                 PL_lex_casestack[PL_lex_casemods++] = *s;
4958                 PL_lex_casestack[PL_lex_casemods] = '\0';
4959                 PL_lex_state = LEX_INTERPCONCAT;
4960                 start_force(PL_curforce);
4961                 NEXTVAL_NEXTTOKE.ival = 0;
4962                 force_next((2<<24)|'(');
4963                 start_force(PL_curforce);
4964                 if (*s == 'l')
4965                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4966                 else if (*s == 'u')
4967                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4968                 else if (*s == 'L')
4969                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4970                 else if (*s == 'U')
4971                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4972                 else if (*s == 'Q')
4973                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4974                 else if (*s == 'F')
4975                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4976                 else
4977                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4978                 if (PL_madskills) {
4979                     SV* const tmpsv = newSVpvs("\\ ");
4980                     /* replace the space with the character we want to escape
4981                      */
4982                     SvPVX(tmpsv)[1] = *s;
4983                     curmad('_', tmpsv);
4984                 }
4985                 PL_bufptr = s + 1;
4986             }
4987             force_next(FUNC);
4988             if (PL_lex_starts) {
4989                 s = PL_bufptr;
4990                 PL_lex_starts = 0;
4991 #ifdef PERL_MAD
4992                 if (PL_madskills) {
4993                     if (PL_thistoken)
4994                         sv_free(PL_thistoken);
4995                     PL_thistoken = newSVpvs("");
4996                 }
4997 #endif
4998                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4999                 if (PL_lex_casemods == 1 && PL_lex_inpat)
5000                     OPERATOR(',');
5001                 else
5002                     Aop(OP_CONCAT);
5003             }
5004             else
5005                 return yylex();
5006         }
5007
5008     case LEX_INTERPPUSH:
5009         return REPORT(sublex_push());
5010
5011     case LEX_INTERPSTART:
5012         if (PL_bufptr == PL_bufend)
5013             return REPORT(sublex_done());
5014         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
5015               "### Interpolated variable\n"); });
5016         PL_expect = XTERM;
5017         /* for /@a/, we leave the joining for the regex engine to do
5018          * (unless we're within \Q etc) */
5019         PL_lex_dojoin = (*PL_bufptr == '@'
5020                             && (!PL_lex_inpat || PL_lex_casemods));
5021         PL_lex_state = LEX_INTERPNORMAL;
5022         if (PL_lex_dojoin) {
5023             start_force(PL_curforce);
5024             NEXTVAL_NEXTTOKE.ival = 0;
5025             force_next(',');
5026             start_force(PL_curforce);
5027             force_ident("\"", '$');
5028             start_force(PL_curforce);
5029             NEXTVAL_NEXTTOKE.ival = 0;
5030             force_next('$');
5031             start_force(PL_curforce);
5032             NEXTVAL_NEXTTOKE.ival = 0;
5033             force_next((2<<24)|'(');
5034             start_force(PL_curforce);
5035             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
5036             force_next(FUNC);
5037         }
5038         /* Convert (?{...}) and friends to 'do {...}' */
5039         if (PL_lex_inpat && *PL_bufptr == '(') {
5040             PL_parser->lex_shared->re_eval_start = PL_bufptr;
5041             PL_bufptr += 2;
5042             if (*PL_bufptr != '{')
5043                 PL_bufptr++;
5044             start_force(PL_curforce);
5045             /* XXX probably need a CURMAD(something) here */
5046             PL_expect = XTERMBLOCK;
5047             force_next(DO);
5048         }
5049
5050         if (PL_lex_starts++) {
5051             s = PL_bufptr;
5052 #ifdef PERL_MAD
5053             if (PL_madskills) {
5054                 if (PL_thistoken)
5055                     sv_free(PL_thistoken);
5056                 PL_thistoken = newSVpvs("");
5057             }
5058 #endif
5059             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5060             if (!PL_lex_casemods && PL_lex_inpat)
5061                 OPERATOR(',');
5062             else
5063                 Aop(OP_CONCAT);
5064         }
5065         return yylex();
5066
5067     case LEX_INTERPENDMAYBE:
5068         if (intuit_more(PL_bufptr)) {
5069             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5070             break;
5071         }
5072         /* FALL THROUGH */
5073
5074     case LEX_INTERPEND:
5075         if (PL_lex_dojoin) {
5076             const U8 dojoin_was = PL_lex_dojoin;
5077             PL_lex_dojoin = FALSE;
5078             PL_lex_state = LEX_INTERPCONCAT;
5079 #ifdef PERL_MAD
5080             if (PL_madskills) {
5081                 if (PL_thistoken)
5082                     sv_free(PL_thistoken);
5083                 PL_thistoken = newSVpvs("");
5084             }
5085 #endif
5086             PL_lex_allbrackets--;
5087             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5088         }
5089         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5090             && SvEVALED(PL_lex_repl))
5091         {
5092             if (PL_bufptr != PL_bufend)
5093                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5094             PL_lex_repl = NULL;
5095         }
5096         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5097            re_eval_str.  If the here-doc body’s length equals the previous
5098            value of re_eval_start, re_eval_start will now be null.  So
5099            check re_eval_str as well. */
5100         if (PL_parser->lex_shared->re_eval_start
5101          || PL_parser->lex_shared->re_eval_str) {
5102             SV *sv;
5103             if (*PL_bufptr != ')')
5104                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5105             PL_bufptr++;
5106             /* having compiled a (?{..}) expression, return the original
5107              * text too, as a const */
5108             if (PL_parser->lex_shared->re_eval_str) {
5109                 sv = PL_parser->lex_shared->re_eval_str;
5110                 PL_parser->lex_shared->re_eval_str = NULL;
5111                 SvCUR_set(sv,
5112                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5113                 SvPV_shrink_to_cur(sv);
5114             }
5115             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5116                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5117             start_force(PL_curforce);
5118             /* XXX probably need a CURMAD(something) here */
5119             NEXTVAL_NEXTTOKE.opval =
5120                     (OP*)newSVOP(OP_CONST, 0,
5121                                  sv);
5122             force_next(THING);
5123             PL_parser->lex_shared->re_eval_start = NULL;
5124             PL_expect = XTERM;
5125             return REPORT(',');
5126         }
5127
5128         /* FALLTHROUGH */
5129     case LEX_INTERPCONCAT:
5130 #ifdef DEBUGGING
5131         if (PL_lex_brackets)
5132             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5133                        (long) PL_lex_brackets);
5134 #endif
5135         if (PL_bufptr == PL_bufend)
5136             return REPORT(sublex_done());
5137
5138         /* m'foo' still needs to be parsed for possible (?{...}) */
5139         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5140             SV *sv = newSVsv(PL_linestr);
5141             sv = tokeq(sv);
5142             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5143             s = PL_bufend;
5144         }
5145         else {
5146             s = scan_const(PL_bufptr);
5147             if (*s == '\\')
5148                 PL_lex_state = LEX_INTERPCASEMOD;
5149             else
5150                 PL_lex_state = LEX_INTERPSTART;
5151         }
5152
5153         if (s != PL_bufptr) {
5154             start_force(PL_curforce);
5155             if (PL_madskills) {
5156                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5157             }
5158             NEXTVAL_NEXTTOKE = pl_yylval;
5159             PL_expect = XTERM;
5160             force_next(THING);
5161             if (PL_lex_starts++) {
5162 #ifdef PERL_MAD
5163                 if (PL_madskills) {
5164                     if (PL_thistoken)
5165                         sv_free(PL_thistoken);
5166                     PL_thistoken = newSVpvs("");
5167                 }
5168 #endif
5169                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5170                 if (!PL_lex_casemods && PL_lex_inpat)
5171                     OPERATOR(',');
5172                 else
5173                     Aop(OP_CONCAT);
5174             }
5175             else {
5176                 PL_bufptr = s;
5177                 return yylex();
5178             }
5179         }
5180
5181         return yylex();
5182     case LEX_FORMLINE:
5183         s = scan_formline(PL_bufptr);
5184         if (!PL_lex_formbrack)
5185         {
5186             formbrack = 1;
5187             goto rightbracket;
5188         }
5189         PL_bufptr = s;
5190         return yylex();
5191     }
5192
5193     /* We really do *not* want PL_linestr ever becoming a COW. */
5194     assert (!SvIsCOW(PL_linestr));
5195     s = PL_bufptr;
5196     PL_oldoldbufptr = PL_oldbufptr;
5197     PL_oldbufptr = s;
5198     PL_parser->saw_infix_sigil = 0;
5199
5200   retry:
5201 #ifdef PERL_MAD
5202     if (PL_thistoken) {
5203         sv_free(PL_thistoken);
5204         PL_thistoken = 0;
5205     }
5206     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5207 #endif
5208     switch (*s) {
5209     default:
5210         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5211             goto keylookup;
5212         {
5213         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5214         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5215                                                     UTF8SKIP(s),
5216                                                     SVs_TEMP | SVf_UTF8),
5217                                             10, UNI_DISPLAY_ISPRINT)
5218                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5219         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5220         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5221             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5222         } else {
5223             d = PL_linestart;
5224         }
5225         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5226                           UTF8fARG(UTF, (s - d), d),
5227                          (int) len + 1);
5228     }
5229     case 4:
5230     case 26:
5231         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5232     case 0:
5233 #ifdef PERL_MAD
5234         if (PL_madskills)
5235             PL_faketokens = 0;
5236 #endif
5237         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5238             PL_last_uni = 0;
5239             PL_last_lop = 0;
5240             if (PL_lex_brackets &&
5241                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5242                 yyerror((const char *)
5243                         (PL_lex_formbrack
5244                          ? "Format not terminated"
5245                          : "Missing right curly or square bracket"));
5246             }
5247             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5248                         "### Tokener got EOF\n");
5249             } );
5250             TOKEN(0);
5251         }
5252         if (s++ < PL_bufend)
5253             goto retry;                 /* ignore stray nulls */
5254         PL_last_uni = 0;
5255         PL_last_lop = 0;
5256         if (!PL_in_eval && !PL_preambled) {
5257             PL_preambled = TRUE;
5258 #ifdef PERL_MAD
5259             if (PL_madskills)
5260                 PL_faketokens = 1;
5261 #endif
5262             if (PL_perldb) {
5263                 /* Generate a string of Perl code to load the debugger.
5264                  * If PERL5DB is set, it will return the contents of that,
5265                  * otherwise a compile-time require of perl5db.pl.  */
5266
5267                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5268
5269                 if (pdb) {
5270                     sv_setpv(PL_linestr, pdb);
5271                     sv_catpvs(PL_linestr,";");
5272                 } else {
5273                     SETERRNO(0,SS_NORMAL);
5274                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5275                 }
5276                 PL_parser->preambling = CopLINE(PL_curcop);
5277             } else
5278                 sv_setpvs(PL_linestr,"");
5279             if (PL_preambleav) {
5280                 SV **svp = AvARRAY(PL_preambleav);
5281                 SV **const end = svp + AvFILLp(PL_preambleav);
5282                 while(svp <= end) {
5283                     sv_catsv(PL_linestr, *svp);
5284                     ++svp;
5285                     sv_catpvs(PL_linestr, ";");
5286                 }
5287                 sv_free(MUTABLE_SV(PL_preambleav));
5288                 PL_preambleav = NULL;
5289             }
5290             if (PL_minus_E)
5291                 sv_catpvs(PL_linestr,
5292                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5293             if (PL_minus_n || PL_minus_p) {
5294                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5295                 if (PL_minus_l)
5296                     sv_catpvs(PL_linestr,"chomp;");
5297                 if (PL_minus_a) {
5298                     if (PL_minus_F) {
5299                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5300                              || *PL_splitstr == '"')
5301                               && strchr(PL_splitstr + 1, *PL_splitstr))
5302                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5303                         else {
5304                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5305                                bytes can be used as quoting characters.  :-) */
5306                             const char *splits = PL_splitstr;
5307                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5308                             do {
5309                                 /* Need to \ \s  */
5310                                 if (*splits == '\\')
5311                                     sv_catpvn(PL_linestr, splits, 1);
5312                                 sv_catpvn(PL_linestr, splits, 1);
5313                             } while (*splits++);
5314                             /* This loop will embed the trailing NUL of
5315                                PL_linestr as the last thing it does before
5316                                terminating.  */
5317                             sv_catpvs(PL_linestr, ");");
5318                         }
5319                     }
5320                     else
5321                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5322                 }
5323             }
5324             sv_catpvs(PL_linestr, "\n");
5325             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5326             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5327             PL_last_lop = PL_last_uni = NULL;
5328             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5329                 update_debugger_info(PL_linestr, NULL, 0);
5330             goto retry;
5331         }
5332         do {
5333             fake_eof = 0;
5334             bof = PL_rsfp ? TRUE : FALSE;
5335             if (0) {
5336               fake_eof:
5337                 fake_eof = LEX_FAKE_EOF;
5338             }
5339             PL_bufptr = PL_bufend;
5340             COPLINE_INC_WITH_HERELINES;
5341             if (!lex_next_chunk(fake_eof)) {
5342                 CopLINE_dec(PL_curcop);
5343                 s = PL_bufptr;
5344                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5345             }
5346             CopLINE_dec(PL_curcop);
5347 #ifdef PERL_MAD
5348             if (!PL_rsfp)
5349                 PL_realtokenstart = -1;
5350 #endif
5351             s = PL_bufptr;
5352             /* If it looks like the start of a BOM or raw UTF-16,
5353              * check if it in fact is. */
5354             if (bof && PL_rsfp &&
5355                      (*s == 0 ||
5356                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5357                       *(U8*)s >= 0xFE ||
5358                       s[1] == 0)) {
5359                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5360                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5361 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5362                 /* offset may include swallowed CR */
5363                 if (!bof)
5364                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5365 #endif
5366                 if (bof) {
5367                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5368                     s = swallow_bom((U8*)s);
5369                 }
5370             }
5371             if (PL_parser->in_pod) {
5372                 /* Incest with pod. */
5373 #ifdef PERL_MAD
5374                 if (PL_madskills)
5375                     sv_catsv(PL_thiswhite, PL_linestr);
5376 #endif
5377                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5378                     sv_setpvs(PL_linestr, "");
5379                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5380                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5381                     PL_last_lop = PL_last_uni = NULL;
5382                     PL_parser->in_pod = 0;
5383                 }
5384             }
5385             if (PL_rsfp || PL_parser->filtered)
5386                 incline(s);
5387         } while (PL_parser->in_pod);
5388         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5389         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5390         PL_last_lop = PL_last_uni = NULL;
5391         if (CopLINE(PL_curcop) == 1) {
5392             while (s < PL_bufend && isSPACE(*s))
5393                 s++;
5394             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5395                 s++;
5396 #ifdef PERL_MAD
5397             if (PL_madskills)
5398                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5399 #endif
5400             d = NULL;
5401             if (!PL_in_eval) {
5402                 if (*s == '#' && *(s+1) == '!')
5403                     d = s + 2;
5404 #ifdef ALTERNATE_SHEBANG
5405                 else {
5406                     static char const as[] = ALTERNATE_SHEBANG;
5407                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5408                         d = s + (sizeof(as) - 1);
5409                 }
5410 #endif /* ALTERNATE_SHEBANG */
5411             }
5412             if (d) {
5413                 char *ipath;
5414                 char *ipathend;
5415
5416                 while (isSPACE(*d))
5417                     d++;
5418                 ipath = d;
5419                 while (*d && !isSPACE(*d))
5420                     d++;
5421                 ipathend = d;
5422
5423 #ifdef ARG_ZERO_IS_SCRIPT
5424                 if (ipathend > ipath) {
5425                     /*
5426                      * HP-UX (at least) sets argv[0] to the script name,
5427                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5428                      * at least, set argv[0] to the basename of the Perl
5429                      * interpreter. So, having found "#!", we'll set it right.
5430                      */
5431                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5432                                                     SVt_PV)); /* $^X */
5433                     assert(SvPOK(x) || SvGMAGICAL(x));
5434                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5435                         sv_setpvn(x, ipath, ipathend - ipath);
5436                         SvSETMAGIC(x);
5437                     }
5438                     else {
5439                         STRLEN blen;
5440                         STRLEN llen;
5441                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5442                         const char * const lstart = SvPV_const(x,llen);
5443                         if (llen < blen) {
5444                             bstart += blen - llen;
5445                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5446                                 sv_setpvn(x, ipath, ipathend - ipath);
5447                                 SvSETMAGIC(x);
5448                             }
5449                         }
5450                     }
5451                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5452                 }
5453 #endif /* ARG_ZERO_IS_SCRIPT */
5454
5455                 /*
5456                  * Look for options.
5457                  */
5458                 d = instr(s,"perl -");
5459                 if (!d) {
5460                     d = instr(s,"perl");
5461 #if defined(DOSISH)
5462                     /* avoid getting into infinite loops when shebang
5463                      * line contains "Perl" rather than "perl" */
5464                     if (!d) {
5465                         for (d = ipathend-4; d >= ipath; --d) {
5466                             if ((*d == 'p' || *d == 'P')
5467                                 && !ibcmp(d, "perl", 4))
5468                             {
5469                                 break;
5470                             }
5471                         }
5472                         if (d < ipath)
5473                             d = NULL;
5474                     }
5475 #endif
5476                 }
5477 #ifdef ALTERNATE_SHEBANG
5478                 /*
5479                  * If the ALTERNATE_SHEBANG on this system starts with a
5480                  * character that can be part of a Perl expression, then if
5481                  * we see it but not "perl", we're probably looking at the
5482                  * start of Perl code, not a request to hand off to some
5483                  * other interpreter.  Similarly, if "perl" is there, but
5484                  * not in the first 'word' of the line, we assume the line
5485                  * contains the start of the Perl program.
5486                  */
5487                 if (d && *s != '#') {
5488                     const char *c = ipath;
5489                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5490                         c++;
5491                     if (c < d)
5492                         d = NULL;       /* "perl" not in first word; ignore */
5493                     else
5494                         *s = '#';       /* Don't try to parse shebang line */
5495                 }
5496 #endif /* ALTERNATE_SHEBANG */
5497                 if (!d &&
5498                     *s == '#' &&
5499                     ipathend > ipath &&
5500                     !PL_minus_c &&
5501                     !instr(s,"indir") &&
5502                     instr(PL_origargv[0],"perl"))
5503                 {
5504                     dVAR;
5505                     char **newargv;
5506
5507                     *ipathend = '\0';
5508                     s = ipathend + 1;
5509                     while (s < PL_bufend && isSPACE(*s))
5510                         s++;
5511                     if (s < PL_bufend) {
5512                         Newx(newargv,PL_origargc+3,char*);
5513                         newargv[1] = s;
5514                         while (s < PL_bufend && !isSPACE(*s))
5515                             s++;
5516                         *s = '\0';
5517                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5518                     }
5519                     else
5520                         newargv = PL_origargv;
5521                     newargv[0] = ipath;
5522                     PERL_FPU_PRE_EXEC
5523                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5524                     PERL_FPU_POST_EXEC
5525                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5526                 }
5527                 if (d) {
5528                     while (*d && !isSPACE(*d))
5529                         d++;
5530                     while (SPACE_OR_TAB(*d))
5531                         d++;
5532
5533                     if (*d++ == '-') {
5534                         const bool switches_done = PL_doswitches;
5535                         const U32 oldpdb = PL_perldb;
5536                         const bool oldn = PL_minus_n;
5537                         const bool oldp = PL_minus_p;
5538                         const char *d1 = d;
5539
5540                         do {
5541                             bool baduni = FALSE;
5542                             if (*d1 == 'C') {
5543                                 const char *d2 = d1 + 1;
5544                                 if (parse_unicode_opts((const char **)&d2)
5545                                     != PL_unicode)
5546                                     baduni = TRUE;
5547                             }
5548                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5549                                 const char * const m = d1;
5550                                 while (*d1 && !isSPACE(*d1))
5551                                     d1++;
5552                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5553                                       (int)(d1 - m), m);
5554                             }
5555                             d1 = moreswitches(d1);
5556                         } while (d1);
5557                         if (PL_doswitches && !switches_done) {
5558                             int argc = PL_origargc;
5559                             char **argv = PL_origargv;
5560                             do {
5561                                 argc--,argv++;
5562                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5563                             init_argv_symbols(argc,argv);
5564                         }
5565                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5566                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5567                               /* if we have already added "LINE: while (<>) {",
5568                                  we must not do it again */
5569                         {
5570                             sv_setpvs(PL_linestr, "");
5571                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5572                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5573                             PL_last_lop = PL_last_uni = NULL;
5574                             PL_preambled = FALSE;
5575                             if (PERLDB_LINE || PERLDB_SAVESRC)
5576                                 (void)gv_fetchfile(PL_origfilename);
5577                             goto retry;
5578                         }
5579                     }
5580                 }
5581             }
5582         }
5583         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5584             PL_lex_state = LEX_FORMLINE;
5585             start_force(PL_curforce);
5586             NEXTVAL_NEXTTOKE.ival = 0;
5587             force_next(FORMRBRACK);
5588             TOKEN(';');
5589         }
5590         goto retry;
5591     case '\r':
5592 #ifdef PERL_STRICT_CR
5593         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5594         Perl_croak(aTHX_
5595       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5596 #endif
5597     case ' ': case '\t': case '\f': case 013:
5598 #ifdef PERL_MAD
5599         PL_realtokenstart = -1;
5600         if (PL_madskills) {
5601           if (!PL_thiswhite)
5602             PL_thiswhite = newSVpvs("");
5603           sv_catpvn(PL_thiswhite, s, 1);
5604         }
5605 #endif
5606         s++;
5607         goto retry;
5608     case '#':
5609     case '\n':
5610 #ifdef PERL_MAD
5611         PL_realtokenstart = -1;
5612         if (PL_madskills)
5613             PL_faketokens = 0;
5614 #endif
5615         if (PL_lex_state != LEX_NORMAL ||
5616              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5617             if (*s == '#' && s == PL_linestart && PL_in_eval
5618              && !PL_rsfp && !PL_parser->filtered) {
5619                 /* handle eval qq[#line 1 "foo"\n ...] */
5620                 CopLINE_dec(PL_curcop);
5621                 incline(s);
5622             }
5623             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5624                 s = SKIPSPACE0(s);
5625                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5626                     incline(s);
5627             }
5628             else {
5629                 const bool in_comment = *s == '#';
5630                 d = s;
5631                 while (d < PL_bufend && *d != '\n')
5632                     d++;
5633                 if (d < PL_bufend)
5634                     d++;
5635                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5636                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5637                                d, PL_bufend);
5638 #ifdef PERL_MAD
5639                 if (PL_madskills)
5640                     PL_thiswhite = newSVpvn(s, d - s);
5641 #endif
5642                 s = d;
5643                 if (in_comment && d == PL_bufend
5644                  && PL_lex_state == LEX_INTERPNORMAL
5645                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5646                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5647                 else incline(s);
5648             }
5649             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5650                 PL_lex_state = LEX_FORMLINE;
5651                 start_force(PL_curforce);
5652                 NEXTVAL_NEXTTOKE.ival = 0;
5653                 force_next(FORMRBRACK);
5654                 TOKEN(';');
5655             }
5656         }
5657         else {
5658 #ifdef PERL_MAD
5659             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5660                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5661                     PL_faketokens = 0;
5662                     s = SKIPSPACE0(s);
5663                     TOKEN(PEG); /* make sure any #! line is accessible */
5664                 }
5665                 s = SKIPSPACE0(s);
5666             }
5667             else {
5668 #endif
5669                     if (PL_madskills) d = s;
5670                     while (s < PL_bufend && *s != '\n')
5671                         s++;
5672                     if (s < PL_bufend)
5673                     {
5674                         s++;
5675                         if (s < PL_bufend)
5676                             incline(s);
5677                     }
5678                     else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5679                       Perl_croak(aTHX_ "panic: input overflow");
5680 #ifdef PERL_MAD
5681                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5682                         if (!PL_thiswhite)
5683                             PL_thiswhite = newSVpvs("");
5684                         if (CopLINE(PL_curcop) == 1) {
5685                             sv_setpvs(PL_thiswhite, "");
5686                             PL_faketokens = 0;
5687                         }
5688                         sv_catpvn(PL_thiswhite, d, s - d);
5689                     }
5690             }
5691 #endif
5692         }
5693         goto retry;
5694     case '-':
5695         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5696             I32 ftst = 0;
5697             char tmp;
5698
5699             s++;
5700             PL_bufptr = s;
5701             tmp = *s++;
5702
5703             while (s < PL_bufend && SPACE_OR_TAB(*s))
5704                 s++;
5705
5706             if (strnEQ(s,"=>",2)) {
5707                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5708                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5709                 OPERATOR('-');          /* unary minus */
5710             }
5711             PL_last_uni = PL_oldbufptr;
5712             switch (tmp) {
5713             case 'r': ftst = OP_FTEREAD;        break;
5714             case 'w': ftst = OP_FTEWRITE;       break;
5715             case 'x': ftst = OP_FTEEXEC;        break;
5716             case 'o': ftst = OP_FTEOWNED;       break;
5717             case 'R': ftst = OP_FTRREAD;        break;
5718             case 'W': ftst = OP_FTRWRITE;       break;
5719             case 'X': ftst = OP_FTREXEC;        break;
5720             case 'O': ftst = OP_FTROWNED;       break;
5721             case 'e': ftst = OP_FTIS;           break;
5722             case 'z': ftst = OP_FTZERO;         break;
5723             case 's': ftst = OP_FTSIZE;         break;
5724             case 'f': ftst = OP_FTFILE;         break;
5725             case 'd': ftst = OP_FTDIR;          break;
5726             case 'l': ftst = OP_FTLINK;         break;
5727             case 'p': ftst = OP_FTPIPE;         break;
5728             case 'S': ftst = OP_FTSOCK;         break;
5729             case 'u': ftst = OP_FTSUID;         break;
5730             case 'g': ftst = OP_FTSGID;         break;
5731             case 'k': ftst = OP_FTSVTX;         break;
5732             case 'b': ftst = OP_FTBLK;          break;
5733             case 'c': ftst = OP_FTCHR;          break;
5734             case 't': ftst = OP_FTTTY;          break;
5735             case 'T': ftst = OP_FTTEXT;         break;
5736             case 'B': ftst = OP_FTBINARY;       break;
5737             case 'M': case 'A': case 'C':
5738                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5739                 switch (tmp) {
5740                 case 'M': ftst = OP_FTMTIME;    break;
5741                 case 'A': ftst = OP_FTATIME;    break;
5742                 case 'C': ftst = OP_FTCTIME;    break;
5743                 default:                        break;
5744                 }
5745                 break;
5746             default:
5747                 break;
5748             }
5749             if (ftst) {
5750                 PL_last_lop_op = (OPCODE)ftst;
5751                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5752                         "### Saw file test %c\n", (int)tmp);
5753                 } );
5754                 FTST(ftst);
5755             }
5756             else {
5757                 /* Assume it was a minus followed by a one-letter named
5758                  * subroutine call (or a -bareword), then. */
5759                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5760                         "### '-%c' looked like a file test but was not\n",
5761                         (int) tmp);
5762                 } );
5763                 s = --PL_bufptr;
5764             }
5765         }
5766         {
5767             const char tmp = *s++;
5768             if (*s == tmp) {
5769                 s++;
5770                 if (PL_expect == XOPERATOR)
5771                     TERM(POSTDEC);
5772                 else
5773                     OPERATOR(PREDEC);
5774             }
5775             else if (*s == '>') {
5776                 s++;
5777                 s = SKIPSPACE1(s);
5778                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5779                     ((*s == '$' || *s == '&') && s[1] == '*')
5780                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5781                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5782                  ))
5783                 {
5784                     Perl_ck_warner_d(aTHX_
5785                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5786                         "Postfix dereference is experimental"
5787                     );
5788                     PL_expect = XPOSTDEREF;
5789                     TOKEN(ARROW);
5790                 }
5791                 if (isIDFIRST_lazy_if(s,UTF)) {
5792                     s = force_word(s,METHOD,FALSE,TRUE);
5793                     TOKEN(ARROW);
5794                 }
5795                 else if (*s == '$')
5796                     OPERATOR(ARROW);
5797                 else
5798                     TERM(ARROW);
5799             }
5800             if (PL_expect == XOPERATOR) {
5801                 if (*s == '=' && !PL_lex_allbrackets &&
5802                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5803                     s--;
5804                     TOKEN(0);
5805                 }
5806                 Aop(OP_SUBTRACT);
5807             }
5808             else {
5809                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5810                     check_uni();
5811                 OPERATOR('-');          /* unary minus */
5812             }
5813         }
5814
5815     case '+':
5816         {
5817             const char tmp = *s++;
5818             if (*s == tmp) {
5819                 s++;
5820                 if (PL_expect == XOPERATOR)
5821                     TERM(POSTINC);
5822                 else
5823                     OPERATOR(PREINC);
5824             }
5825             if (PL_expect == XOPERATOR) {
5826                 if (*s == '=' && !PL_lex_allbrackets &&
5827                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5828                     s--;
5829                     TOKEN(0);
5830                 }
5831                 Aop(OP_ADD);
5832             }
5833             else {
5834                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5835                     check_uni();
5836                 OPERATOR('+');
5837             }
5838         }
5839
5840     case '*':
5841         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5842         if (PL_expect != XOPERATOR) {
5843             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5844             PL_expect = XOPERATOR;
5845             force_ident(PL_tokenbuf, '*');
5846             if (!*PL_tokenbuf)
5847                 PREREF('*');
5848             TERM('*');
5849         }
5850         s++;
5851         if (*s == '*') {
5852             s++;
5853             if (*s == '=' && !PL_lex_allbrackets &&
5854                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5855                 s -= 2;
5856                 TOKEN(0);
5857             }
5858             PWop(OP_POW);
5859         }
5860         if (*s == '=' && !PL_lex_allbrackets &&
5861                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5862             s--;
5863             TOKEN(0);
5864         }
5865         PL_parser->saw_infix_sigil = 1;
5866         Mop(OP_MULTIPLY);
5867
5868     case '%':
5869     {
5870         if (PL_expect == XOPERATOR) {
5871             if (s[1] == '=' && !PL_lex_allbrackets &&
5872                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5873                 TOKEN(0);
5874             ++s;
5875             PL_parser->saw_infix_sigil = 1;
5876             Mop(OP_MODULO);
5877         }
5878         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5879         PL_tokenbuf[0] = '%';
5880         s = scan_ident(s, PL_tokenbuf + 1,
5881                 sizeof PL_tokenbuf - 1, FALSE);
5882         pl_yylval.ival = 0;
5883         if (!PL_tokenbuf[1]) {
5884             PREREF('%');
5885         }
5886         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5887             if (*s == '[')
5888                 PL_tokenbuf[0] = '@';
5889
5890             /* Warn about % where they meant $. */
5891             if (*s == '[' || *s == '{') {
5892                 if (ckWARN(WARN_SYNTAX)) {
5893                     S_check_scalar_slice(aTHX_ s);
5894                 }
5895             }
5896         }
5897         PL_expect = XOPERATOR;
5898         force_ident_maybe_lex('%');
5899         TERM('%');
5900     }
5901     case '^':
5902         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5903                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5904             TOKEN(0);
5905         s++;
5906         BOop(OP_BIT_XOR);
5907     case '[':
5908         if (PL_lex_brackets > 100)
5909             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5910         PL_lex_brackstack[PL_lex_brackets++] = 0;
5911         PL_lex_allbrackets++;
5912         {
5913             const char tmp = *s++;
5914             OPERATOR(tmp);
5915         }
5916     case '~':
5917         if (s[1] == '~'
5918             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5919         {
5920             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5921                 TOKEN(0);
5922             s += 2;
5923             Perl_ck_warner_d(aTHX_
5924                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5925                 "Smartmatch is experimental");
5926             Eop(OP_SMARTMATCH);
5927         }
5928         s++;
5929         OPERATOR('~');
5930     case ',':
5931         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5932             TOKEN(0);
5933         s++;
5934         OPERATOR(',');
5935     case ':':
5936         if (s[1] == ':') {
5937             len = 0;
5938             goto just_a_word_zero_gv;
5939         }
5940         s++;
5941         switch (PL_expect) {
5942             OP *attrs;
5943 #ifdef PERL_MAD
5944             I32 stuffstart;
5945 #endif
5946         case XOPERATOR:
5947             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5948                 break;
5949             PL_bufptr = s;      /* update in case we back off */
5950             if (*s == '=') {
5951                 Perl_croak(aTHX_
5952                            "Use of := for an empty attribute list is not allowed");
5953             }
5954             goto grabattrs;
5955         case XATTRBLOCK:
5956             PL_expect = XBLOCK;
5957             goto grabattrs;
5958         case XATTRTERM:
5959             PL_expect = XTERMBLOCK;
5960          grabattrs:
5961 #ifdef PERL_MAD
5962             stuffstart = s - SvPVX(PL_linestr) - 1;
5963 #endif
5964             s = PEEKSPACE(s);
5965             attrs = NULL;
5966             while (isIDFIRST_lazy_if(s,UTF)) {
5967                 I32 tmp;
5968                 SV *sv;
5969                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5970                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5971                     if (tmp < 0) tmp = -tmp;
5972                     switch (tmp) {
5973                     case KEY_or:
5974                     case KEY_and:
5975                     case KEY_for:
5976                     case KEY_foreach:
5977                     case KEY_unless:
5978                     case KEY_if:
5979                     case KEY_while:
5980                     case KEY_until:
5981                         goto got_attrs;
5982                     default:
5983                         break;
5984                     }
5985                 }
5986                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5987                 if (*d == '(') {
5988                     d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5989                     COPLINE_SET_FROM_MULTI_END;
5990                     if (!d) {
5991                         /* MUST advance bufptr here to avoid bogus
5992                            "at end of line" context messages from yyerror().
5993                          */
5994                         PL_bufptr = s + len;
5995                         yyerror("Unterminated attribute parameter in attribute list");
5996                         if (attrs)
5997                             op_free(attrs);
5998                         sv_free(sv);
5999                         return REPORT(0);       /* EOF indicator */
6000                     }
6001                 }
6002                 if (PL_lex_stuff) {
6003                     sv_catsv(sv, PL_lex_stuff);
6004                     attrs = op_append_elem(OP_LIST, attrs,
6005                                         newSVOP(OP_CONST, 0, sv));
6006                     SvREFCNT_dec(PL_lex_stuff);
6007                     PL_lex_stuff = NULL;
6008                 }
6009                 else {
6010                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
6011                         sv_free(sv);
6012                         if (PL_in_my == KEY_our) {
6013                             deprecate(":unique");
6014                         }
6015                         else
6016                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
6017                     }
6018
6019                     /* NOTE: any CV attrs applied here need to be part of
6020                        the CVf_BUILTIN_ATTRS define in cv.h! */
6021                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
6022                         sv_free(sv);
6023                         CvLVALUE_on(PL_compcv);
6024                     }
6025                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
6026                         sv_free(sv);
6027                         deprecate(":locked");
6028                     }
6029                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
6030                         sv_free(sv);
6031                         CvMETHOD_on(PL_compcv);
6032                     }
6033                     /* After we've set the flags, it could be argued that
6034                        we don't need to do the attributes.pm-based setting
6035                        process, and shouldn't bother appending recognized
6036                        flags.  To experiment with that, uncomment the
6037                        following "else".  (Note that's already been
6038                        uncommented.  That keeps the above-applied built-in
6039                        attributes from being intercepted (and possibly
6040                        rejected) by a package's attribute routines, but is
6041                        justified by the performance win for the common case
6042                        of applying only built-in attributes.) */
6043                     else
6044                         attrs = op_append_elem(OP_LIST, attrs,
6045                                             newSVOP(OP_CONST, 0,
6046                                                     sv));
6047                 }
6048                 s = PEEKSPACE(d);
6049                 if (*s == ':' && s[1] != ':')
6050                     s = PEEKSPACE(s+1);
6051                 else if (s == d)
6052                     break;      /* require real whitespace or :'s */
6053                 /* XXX losing whitespace on sequential attributes here */
6054             }
6055             {
6056                 const char tmp
6057                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6058                 if (*s != ';' && *s != '}' && *s != tmp
6059                     && (tmp != '=' || *s != ')')) {
6060                     const char q = ((*s == '\'') ? '"' : '\'');
6061                     /* If here for an expression, and parsed no attrs, back
6062                        off. */
6063                     if (tmp == '=' && !attrs) {
6064                         s = PL_bufptr;
6065                         break;
6066                     }
6067                     /* MUST advance bufptr here to avoid bogus "at end of line"
6068                        context messages from yyerror().
6069                     */
6070                     PL_bufptr = s;
6071                     yyerror( (const char *)
6072                              (*s
6073                               ? Perl_form(aTHX_ "Invalid separator character "
6074                                           "%c%c%c in attribute list", q, *s, q)
6075                               : "Unterminated attribute list" ) );
6076                     if (attrs)
6077                         op_free(attrs);
6078                     OPERATOR(':');
6079                 }
6080             }
6081         got_attrs:
6082             if (attrs) {
6083                 start_force(PL_curforce);
6084                 NEXTVAL_NEXTTOKE.opval = attrs;
6085                 CURMAD('_', PL_nextwhite);
6086                 force_next(THING);
6087             }
6088 #ifdef PERL_MAD
6089             if (PL_madskills) {
6090                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6091                                      (s - SvPVX(PL_linestr)) - stuffstart);
6092             }
6093 #endif
6094             TOKEN(COLONATTR);
6095         }
6096         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6097             s--;
6098             TOKEN(0);
6099         }
6100         PL_lex_allbrackets--;
6101         OPERATOR(':');
6102     case '(':
6103         s++;
6104         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6105             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6106         else
6107             PL_expect = XTERM;
6108         s = SKIPSPACE1(s);
6109         PL_lex_allbrackets++;
6110         TOKEN('(');
6111     case ';':
6112         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6113             TOKEN(0);
6114         CLINE;
6115         s++;
6116         OPERATOR(';');
6117     case ')':
6118         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6119             TOKEN(0);
6120         s++;
6121         PL_lex_allbrackets--;
6122         s = SKIPSPACE1(s);
6123         if (*s == '{')
6124             PREBLOCK(')');
6125         TERM(')');
6126     case ']':
6127         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6128             TOKEN(0);
6129         s++;
6130         if (PL_lex_brackets <= 0)
6131             yyerror("Unmatched right square bracket");
6132         else
6133             --PL_lex_brackets;
6134         PL_lex_allbrackets--;
6135         if (PL_lex_state == LEX_INTERPNORMAL) {
6136             if (PL_lex_brackets == 0) {
6137                 if (*s == '-' && s[1] == '>')
6138                     PL_lex_state = LEX_INTERPENDMAYBE;
6139                 else if (*s != '[' && *s != '{')
6140                     PL_lex_state = LEX_INTERPEND;
6141             }
6142         }
6143         TERM(']');
6144     case '{':
6145         s++;
6146       leftbracket:
6147         if (PL_lex_brackets > 100) {
6148             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6149         }
6150         switch (PL_expect) {
6151         case XTERM:
6152             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153             PL_lex_allbrackets++;
6154             OPERATOR(HASHBRACK);
6155         case XOPERATOR:
6156             while (s < PL_bufend && SPACE_OR_TAB(*s))
6157                 s++;
6158             d = s;
6159             PL_tokenbuf[0] = '\0';
6160             if (d < PL_bufend && *d == '-') {
6161                 PL_tokenbuf[0] = '-';
6162                 d++;
6163                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6164                     d++;
6165             }
6166             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6167                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6168                               FALSE, &len);
6169                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6170                     d++;
6171                 if (*d == '}') {
6172                     const char minus = (PL_tokenbuf[0] == '-');
6173                     s = force_word(s + minus, WORD, FALSE, TRUE);
6174                     if (minus)
6175                         force_next('-');
6176                 }
6177             }
6178             /* FALL THROUGH */
6179         case XATTRBLOCK:
6180         case XBLOCK:
6181             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6182             PL_lex_allbrackets++;
6183             PL_expect = XSTATE;
6184             break;
6185         case XATTRTERM:
6186         case XTERMBLOCK:
6187             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6188             PL_lex_allbrackets++;
6189             PL_expect = XSTATE;
6190             break;
6191         default: {
6192                 const char *t;
6193                 if (PL_oldoldbufptr == PL_last_lop)
6194                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6195                 else
6196                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6197                 PL_lex_allbrackets++;
6198                 s = SKIPSPACE1(s);
6199                 if (*s == '}') {
6200                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6201                         PL_expect = XTERM;
6202                         /* This hack is to get the ${} in the message. */
6203                         PL_bufptr = s+1;
6204                         yyerror("syntax error");
6205                         break;
6206                     }
6207                     OPERATOR(HASHBRACK);
6208                 }
6209                 /* This hack serves to disambiguate a pair of curlies
6210                  * as being a block or an anon hash.  Normally, expectation
6211                  * determines that, but in cases where we're not in a
6212                  * position to expect anything in particular (like inside
6213                  * eval"") we have to resolve the ambiguity.  This code
6214                  * covers the case where the first term in the curlies is a
6215                  * quoted string.  Most other cases need to be explicitly
6216                  * disambiguated by prepending a "+" before the opening
6217                  * curly in order to force resolution as an anon hash.
6218                  *
6219                  * XXX should probably propagate the outer expectation
6220                  * into eval"" to rely less on this hack, but that could
6221                  * potentially break current behavior of eval"".
6222                  * GSAR 97-07-21
6223                  */
6224                 t = s;
6225                 if (*s == '\'' || *s == '"' || *s == '`') {
6226                     /* common case: get past first string, handling escapes */
6227                     for (t++; t < PL_bufend && *t != *s;)
6228                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6229                             t++;
6230                     t++;
6231                 }
6232                 else if (*s == 'q') {
6233                     if (++t < PL_bufend
6234                         && (!isWORDCHAR(*t)
6235                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6236                                 && !isWORDCHAR(*t))))
6237                     {
6238                         /* skip q//-like construct */
6239                         const char *tmps;
6240                         char open, close, term;
6241                         I32 brackets = 1;
6242
6243                         while (t < PL_bufend && isSPACE(*t))
6244                             t++;
6245                         /* check for q => */
6246                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6247                             OPERATOR(HASHBRACK);
6248                         }
6249                         term = *t;
6250                         open = term;
6251                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6252                             term = tmps[5];
6253                         close = term;
6254                         if (open == close)
6255                             for (t++; t < PL_bufend; t++) {
6256                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6257                                     t++;
6258                                 else if (*t == open)
6259                                     break;
6260                             }
6261                         else {
6262                             for (t++; t < PL_bufend; t++) {
6263                                 if (*t == '\\' && t+1 < PL_bufend)
6264                                     t++;
6265                                 else if (*t == close && --brackets <= 0)
6266                                     break;
6267                                 else if (*t == open)
6268                                     brackets++;
6269                             }
6270                         }
6271                         t++;
6272                     }
6273                     else
6274                         /* skip plain q word */
6275                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6276                              t += UTF8SKIP(t);
6277                 }
6278                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6279                     t += UTF8SKIP(t);
6280                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6281                          t += UTF8SKIP(t);
6282                 }
6283                 while (t < PL_bufend && isSPACE(*t))
6284                     t++;
6285                 /* if comma follows first term, call it an anon hash */
6286                 /* XXX it could be a comma expression with loop modifiers */
6287                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6288                                    || (*t == '=' && t[1] == '>')))
6289                     OPERATOR(HASHBRACK);
6290                 if (PL_expect == XREF)
6291                     PL_expect = XTERM;
6292                 else {
6293                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6294                     PL_expect = XSTATE;
6295                 }
6296             }
6297             break;
6298         }
6299         pl_yylval.ival = CopLINE(PL_curcop);
6300         if (isSPACE(*s) || *s == '#')
6301             PL_copline = NOLINE;   /* invalidate current command line number */
6302         TOKEN(formbrack ? '=' : '{');
6303     case '}':
6304         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6305             TOKEN(0);
6306       rightbracket:
6307         s++;
6308         if (PL_lex_brackets <= 0)
6309             yyerror("Unmatched right curly bracket");
6310         else
6311             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6312         PL_lex_allbrackets--;
6313         if (PL_lex_state == LEX_INTERPNORMAL) {
6314             if (PL_lex_brackets == 0) {
6315                 if (PL_expect & XFAKEBRACK) {
6316                     PL_expect &= XENUMMASK;
6317                     PL_lex_state = LEX_INTERPEND;
6318                     PL_bufptr = s;
6319 #if 0
6320                     if (PL_madskills) {
6321                         if (!PL_thiswhite)
6322                             PL_thiswhite = newSVpvs("");
6323                         sv_catpvs(PL_thiswhite,"}");
6324                     }
6325 #endif
6326                     return yylex();     /* ignore fake brackets */
6327                 }
6328                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6329                  && SvEVALED(PL_lex_repl))
6330                     PL_lex_state = LEX_INTERPEND;
6331                 else if (*s == '-' && s[1] == '>')
6332                     PL_lex_state = LEX_INTERPENDMAYBE;
6333                 else if (*s != '[' && *s != '{')
6334                     PL_lex_state = LEX_INTERPEND;
6335             }
6336         }
6337         if (PL_expect & XFAKEBRACK) {
6338             PL_expect &= XENUMMASK;
6339             PL_bufptr = s;
6340             return yylex();             /* ignore fake brackets */
6341         }
6342         start_force(PL_curforce);
6343         if (PL_madskills) {
6344             curmad('X', newSVpvn(s-1,1));
6345             CURMAD('_', PL_thiswhite);
6346         }
6347         force_next(formbrack ? '.' : '}');
6348         if (formbrack) LEAVE;
6349 #ifdef PERL_MAD
6350         if (PL_madskills && !PL_thistoken)
6351             PL_thistoken = newSVpvs("");
6352 #endif
6353         if (formbrack == 2) { /* means . where arguments were expected */
6354             start_force(PL_curforce);
6355             force_next(';');
6356             TOKEN(FORMRBRACK);
6357         }
6358         TOKEN(';');
6359     case '&':
6360         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6361         s++;
6362         if (*s++ == '&') {
6363             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6364                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6365                 s -= 2;
6366                 TOKEN(0);
6367             }
6368             AOPERATOR(ANDAND);
6369         }
6370         s--;
6371         if (PL_expect == XOPERATOR) {
6372             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6373                 && isIDFIRST_lazy_if(s,UTF))
6374             {
6375                 CopLINE_dec(PL_curcop);
6376                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6377                 CopLINE_inc(PL_curcop);
6378             }
6379             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6380                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6381                 s--;
6382                 TOKEN(0);
6383             }
6384             PL_parser->saw_infix_sigil = 1;
6385             BAop(OP_BIT_AND);
6386         }
6387
6388         PL_tokenbuf[0] = '&';
6389         s = scan_ident(s - 1, PL_tokenbuf + 1,
6390                        sizeof PL_tokenbuf - 1, TRUE);
6391         if (PL_tokenbuf[1]) {
6392             PL_expect = XOPERATOR;
6393             force_ident_maybe_lex('&');
6394         }
6395         else
6396             PREREF('&');
6397         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6398         TERM('&');
6399
6400     case '|':
6401         s++;
6402         if (*s++ == '|') {
6403             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6404                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6405                 s -= 2;
6406                 TOKEN(0);
6407             }
6408             AOPERATOR(OROR);
6409         }
6410         s--;
6411         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6412                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6413             s--;
6414             TOKEN(0);
6415         }
6416         BOop(OP_BIT_OR);
6417     case '=':
6418         s++;
6419         {
6420             const char tmp = *s++;
6421             if (tmp == '=') {
6422                 if (!PL_lex_allbrackets &&
6423                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6424                     s -= 2;
6425                     TOKEN(0);
6426                 }
6427                 Eop(OP_EQ);
6428             }
6429             if (tmp == '>') {
6430                 if (!PL_lex_allbrackets &&
6431                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6432                     s -= 2;
6433                     TOKEN(0);
6434                 }
6435                 OPERATOR(',');
6436             }
6437             if (tmp == '~')
6438                 PMop(OP_MATCH);
6439             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6440                 && strchr("+-*/%.^&|<",tmp))
6441                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6442                             "Reversed %c= operator",(int)tmp);
6443             s--;
6444             if (PL_expect == XSTATE && isALPHA(tmp) &&
6445                 (s == PL_linestart+1 || s[-2] == '\n') )
6446                 {
6447                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6448                         || PL_lex_state != LEX_NORMAL) {
6449                         d = PL_bufend;
6450                         while (s < d) {
6451                             if (*s++ == '\n') {
6452                                 incline(s);
6453                                 if (strnEQ(s,"=cut",4)) {
6454                                     s = strchr(s,'\n');
6455                                     if (s)
6456                                         s++;
6457                                     else
6458                                         s = d;
6459                                     incline(s);
6460                                     goto retry;
6461                                 }
6462                             }
6463                         }
6464                         goto retry;
6465                     }
6466 #ifdef PERL_MAD
6467                     if (PL_madskills) {
6468                         if (!PL_thiswhite)
6469                             PL_thiswhite = newSVpvs("");
6470                         sv_catpvn(PL_thiswhite, PL_linestart,
6471                                   PL_bufend - PL_linestart);
6472                     }
6473 #endif
6474                     s = PL_bufend;
6475                     PL_parser->in_pod = 1;
6476                     goto retry;
6477                 }
6478         }
6479         if (PL_expect == XBLOCK) {
6480             const char *t = s;
6481 #ifdef PERL_STRICT_CR
6482             while (SPACE_OR_TAB(*t))
6483 #else
6484             while (SPACE_OR_TAB(*t) || *t == '\r')
6485 #endif
6486                 t++;
6487             if (*t == '\n' || *t == '#') {
6488                 formbrack = 1;
6489                 ENTER;
6490                 SAVEI8(PL_parser->form_lex_state);
6491                 SAVEI32(PL_lex_formbrack);
6492                 PL_parser->form_lex_state = PL_lex_state;
6493                 PL_lex_formbrack = PL_lex_brackets + 1;
6494                 goto leftbracket;
6495             }
6496         }
6497         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6498             s--;
6499             TOKEN(0);
6500         }
6501         pl_yylval.ival = 0;
6502         OPERATOR(ASSIGNOP);
6503     case '!':
6504         s++;
6505         {
6506             const char tmp = *s++;
6507             if (tmp == '=') {
6508                 /* was this !=~ where !~ was meant?
6509                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6510
6511                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6512                     const char *t = s+1;
6513
6514                     while (t < PL_bufend && isSPACE(*t))
6515                         ++t;
6516
6517                     if (*t == '/' || *t == '?' ||
6518                         ((*t == 'm' || *t == 's' || *t == 'y')
6519                          && !isWORDCHAR(t[1])) ||
6520                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6521                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6522                                     "!=~ should be !~");
6523                 }
6524                 if (!PL_lex_allbrackets &&
6525                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6526                     s -= 2;
6527                     TOKEN(0);
6528                 }
6529                 Eop(OP_NE);
6530             }
6531             if (tmp == '~')
6532                 PMop(OP_NOT);
6533         }
6534         s--;
6535         OPERATOR('!');
6536     case '<':
6537         if (PL_expect != XOPERATOR) {
6538             if (s[1] != '<' && !strchr(s,'>'))
6539                 check_uni();
6540             if (s[1] == '<')
6541                 s = scan_heredoc(s);
6542             else
6543                 s = scan_inputsymbol(s);
6544             PL_expect = XOPERATOR;
6545             TOKEN(sublex_start());
6546         }
6547         s++;
6548         {
6549             char tmp = *s++;
6550             if (tmp == '<') {
6551                 if (*s == '=' && !PL_lex_allbrackets &&
6552                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6553                     s -= 2;
6554                     TOKEN(0);
6555                 }
6556                 SHop(OP_LEFT_SHIFT);
6557             }
6558             if (tmp == '=') {
6559                 tmp = *s++;
6560                 if (tmp == '>') {
6561                     if (!PL_lex_allbrackets &&
6562                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6563                         s -= 3;
6564                         TOKEN(0);
6565                     }
6566                     Eop(OP_NCMP);
6567                 }
6568                 s--;
6569                 if (!PL_lex_allbrackets &&
6570                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6571                     s -= 2;
6572                     TOKEN(0);
6573                 }
6574                 Rop(OP_LE);
6575             }
6576         }
6577         s--;
6578         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6579             s--;
6580             TOKEN(0);
6581         }
6582         Rop(OP_LT);
6583     case '>':
6584         s++;
6585         {
6586             const char tmp = *s++;
6587             if (tmp == '>') {
6588                 if (*s == '=' && !PL_lex_allbrackets &&
6589                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6590                     s -= 2;
6591                     TOKEN(0);
6592                 }
6593                 SHop(OP_RIGHT_SHIFT);
6594             }
6595             else if (tmp == '=') {
6596                 if (!PL_lex_allbrackets &&
6597                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6598                     s -= 2;
6599                     TOKEN(0);
6600                 }
6601                 Rop(OP_GE);
6602             }
6603         }
6604         s--;
6605         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6606             s--;
6607             TOKEN(0);
6608         }
6609         Rop(OP_GT);
6610
6611     case '$':
6612         CLINE;
6613
6614         if (PL_expect == XOPERATOR) {
6615             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6616                 return deprecate_commaless_var_list();
6617             }
6618         }
6619         else if (PL_expect == XPOSTDEREF) POSTDEREF('$');
6620
6621         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6622             PL_tokenbuf[0] = '@';
6623             s = scan_ident(s + 1, PL_tokenbuf + 1,
6624                            sizeof PL_tokenbuf - 1, FALSE);
6625             if (PL_expect == XOPERATOR)
6626                 no_op("Array length", s);
6627             if (!PL_tokenbuf[1])
6628                 PREREF(DOLSHARP);
6629             PL_expect = XOPERATOR;
6630             force_ident_maybe_lex('#');
6631             TOKEN(DOLSHARP);
6632         }
6633
6634         PL_tokenbuf[0] = '$';
6635         s = scan_ident(s, PL_tokenbuf + 1,
6636                        sizeof PL_tokenbuf - 1, FALSE);
6637         if (PL_expect == XOPERATOR)
6638             no_op("Scalar", s);
6639         if (!PL_tokenbuf[1]) {
6640             if (s == PL_bufend)
6641                 yyerror("Final $ should be \\$ or $name");
6642             PREREF('$');
6643         }
6644
6645         d = s;
6646         {
6647             const char tmp = *s;
6648             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6649                 s = SKIPSPACE1(s);
6650
6651             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6652                 && intuit_more(s)) {
6653                 if (*s == '[') {
6654                     PL_tokenbuf[0] = '@';
6655                     if (ckWARN(WARN_SYNTAX)) {
6656                         char *t = s+1;
6657
6658                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6659                             t++;
6660                         if (*t++ == ',') {
6661                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6662                             while (t < PL_bufend && *t != ']')
6663                                 t++;
6664                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6665                                         "Multidimensional syntax %.*s not supported",
6666                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6667                         }
6668                     }
6669                 }
6670                 else if (*s == '{') {
6671                     char *t;
6672                     PL_tokenbuf[0] = '%';
6673                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6674                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6675                         {
6676                             char tmpbuf[sizeof PL_tokenbuf];
6677                             do {
6678                                 t++;
6679                             } while (isSPACE(*t));
6680                             if (isIDFIRST_lazy_if(t,UTF)) {
6681                                 STRLEN len;
6682                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6683                                               &len);
6684                                 while (isSPACE(*t))
6685                                     t++;
6686                                 if (*t == ';'
6687                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6688                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6689                                         "You need to quote \"%"UTF8f"\"",
6690                                          UTF8fARG(UTF, len, tmpbuf));
6691                             }
6692                         }
6693                 }
6694             }
6695
6696             PL_expect = XOPERATOR;
6697             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6698                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6699                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6700                     PL_expect = XOPERATOR;
6701                 else if (strchr("$@\"'`q", *s))
6702                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6703                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6704                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6705                 else if (isIDFIRST_lazy_if(s,UTF)) {
6706                     char tmpbuf[sizeof PL_tokenbuf];
6707                     int t2;
6708                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6709                     if ((t2 = keyword(tmpbuf, len, 0))) {
6710                         /* binary operators exclude handle interpretations */
6711                         switch (t2) {
6712                         case -KEY_x:
6713                         case -KEY_eq:
6714                         case -KEY_ne:
6715                         case -KEY_gt:
6716                         case -KEY_lt:
6717                         case -KEY_ge:
6718                         case -KEY_le:
6719                         case -KEY_cmp:
6720                             break;
6721                         default:
6722                             PL_expect = XTERM;  /* e.g. print $fh length() */
6723                             break;
6724                         }
6725                     }
6726                     else {
6727                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6728                     }
6729                 }
6730                 else if (isDIGIT(*s))
6731                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6732                 else if (*s == '.' && isDIGIT(s[1]))
6733                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6734                 else if ((*s == '?' || *s == '-' || *s == '+')
6735                          && !isSPACE(s[1]) && s[1] != '=')
6736                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6737                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6738                          && s[1] != '/')
6739                     PL_expect = XTERM;          /* e.g. print $fh /.../
6740                                                    XXX except DORDOR operator
6741                                                 */
6742                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6743                          && s[2] != '=')
6744                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6745             }
6746         }
6747         force_ident_maybe_lex('$');
6748         TOKEN('$');
6749
6750     case '@':
6751         if (PL_expect == XOPERATOR)
6752             no_op("Array", s);
6753         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6754         PL_tokenbuf[0] = '@';
6755         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6756         pl_yylval.ival = 0;
6757         if (!PL_tokenbuf[1]) {
6758             PREREF('@');
6759         }
6760         if (PL_lex_state == LEX_NORMAL)
6761             s = SKIPSPACE1(s);
6762         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6763             if (*s == '{')
6764                 PL_tokenbuf[0] = '%';
6765
6766             /* Warn about @ where they meant $. */
6767             if (*s == '[' || *s == '{') {
6768                 if (ckWARN(WARN_SYNTAX)) {
6769                     S_check_scalar_slice(aTHX_ s);
6770                 }
6771             }
6772         }
6773         PL_expect = XOPERATOR;
6774         force_ident_maybe_lex('@');
6775         TERM('@');
6776
6777      case '/':                  /* may be division, defined-or, or pattern */
6778         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6779             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6780                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6781                 TOKEN(0);
6782             s += 2;
6783             AOPERATOR(DORDOR);
6784         }
6785      case '?':                  /* may either be conditional or pattern */
6786         if (PL_expect == XOPERATOR) {
6787              char tmp = *s++;
6788              if(tmp == '?') {
6789                 if (!PL_lex_allbrackets &&
6790                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6791                     s--;
6792                     TOKEN(0);
6793                 }
6794                 PL_lex_allbrackets++;
6795                 OPERATOR('?');
6796              }
6797              else {
6798                  tmp = *s++;
6799                  if(tmp == '/') {
6800                      /* A // operator. */
6801                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6802                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6803                                             LEX_FAKEEOF_LOGIC)) {
6804                         s -= 2;
6805                         TOKEN(0);
6806                     }
6807                     AOPERATOR(DORDOR);
6808                  }
6809                  else {
6810                      s--;
6811                      if (*s == '=' && !PL_lex_allbrackets &&
6812                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6813                          s--;
6814                          TOKEN(0);
6815                      }
6816                      Mop(OP_DIVIDE);
6817                  }
6818              }
6819          }
6820          else {
6821              /* Disable warning on "study /blah/" */
6822              if (PL_oldoldbufptr == PL_last_uni
6823               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6824                   || memNE(PL_last_uni, "study", 5)
6825                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6826               ))
6827                  check_uni();
6828              if (*s == '?')
6829                  deprecate("?PATTERN? without explicit operator");
6830              s = scan_pat(s,OP_MATCH);
6831              TERM(sublex_start());
6832          }
6833
6834     case '.':
6835         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6836 #ifdef PERL_STRICT_CR
6837             && s[1] == '\n'
6838 #else
6839             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6840 #endif
6841             && (s == PL_linestart || s[-1] == '\n') )
6842         {
6843             PL_expect = XSTATE;
6844             formbrack = 2; /* dot seen where arguments expected */
6845             goto rightbracket;
6846         }
6847         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6848             s += 3;
6849             OPERATOR(YADAYADA);
6850         }
6851         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6852             char tmp = *s++;
6853             if (*s == tmp) {
6854                 if (!PL_lex_allbrackets &&
6855                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6856                     s--;
6857                     TOKEN(0);
6858                 }
6859                 s++;
6860                 if (*s == tmp) {
6861                     s++;
6862                     pl_yylval.ival = OPf_SPECIAL;
6863                 }
6864                 else
6865                     pl_yylval.ival = 0;
6866                 OPERATOR(DOTDOT);
6867             }
6868             if (*s == '=' && !PL_lex_allbrackets &&
6869                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6870                 s--;
6871                 TOKEN(0);
6872             }
6873             Aop(OP_CONCAT);
6874         }
6875         /* FALL THROUGH */
6876     case '0': case '1': case '2': case '3': case '4':
6877     case '5': case '6': case '7': case '8': case '9':
6878         s = scan_num(s, &pl_yylval);
6879         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6880         if (PL_expect == XOPERATOR)
6881             no_op("Number",s);
6882         TERM(THING);
6883
6884     case '\'':
6885         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6886         COPLINE_SET_FROM_MULTI_END;
6887         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6888         if (PL_expect == XOPERATOR) {
6889             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6890                 return deprecate_commaless_var_list();
6891             }
6892             else
6893                 no_op("String",s);
6894         }
6895         if (!s)
6896             missingterm(NULL);
6897         pl_yylval.ival = OP_CONST;
6898         TERM(sublex_start());
6899
6900     case '"':
6901         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6902         DEBUG_T( {
6903             if (s)
6904                 printbuf("### Saw string before %s\n", s);
6905             else
6906                 PerlIO_printf(Perl_debug_log,
6907                              "### Saw unterminated string\n");
6908         } );
6909         if (PL_expect == XOPERATOR) {
6910             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6911                 return deprecate_commaless_var_list();
6912             }
6913             else
6914                 no_op("String",s);
6915         }
6916         if (!s)
6917             missingterm(NULL);
6918         pl_yylval.ival = OP_CONST;
6919         /* FIXME. I think that this can be const if char *d is replaced by
6920            more localised variables.  */
6921         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6922             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6923                 pl_yylval.ival = OP_STRINGIFY;
6924                 break;
6925             }
6926         }
6927         if (pl_yylval.ival == OP_CONST)
6928             COPLINE_SET_FROM_MULTI_END;
6929         TERM(sublex_start());
6930
6931     case '`':
6932         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6933         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6934         if (PL_expect == XOPERATOR)
6935             no_op("Backticks",s);
6936         if (!s)
6937             missingterm(NULL);
6938         readpipe_override();
6939         TERM(sublex_start());
6940
6941     case '\\':
6942         s++;
6943         if (PL_lex_inwhat && isDIGIT(*s))
6944             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6945                            *s, *s);
6946         if (PL_expect == XOPERATOR)
6947             no_op("Backslash",s);
6948         OPERATOR(REFGEN);
6949
6950     case 'v':
6951         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6952             char *start = s + 2;
6953             while (isDIGIT(*start) || *start == '_')
6954                 start++;
6955             if (*start == '.' && isDIGIT(start[1])) {
6956                 s = scan_num(s, &pl_yylval);
6957                 TERM(THING);
6958             }
6959             else if ((*start == ':' && start[1] == ':')
6960                   || (PL_expect == XSTATE && *start == ':'))
6961                 goto keylookup;
6962             else if (PL_expect == XSTATE) {
6963                 d = start;
6964                 while (d < PL_bufend && isSPACE(*d)) d++;
6965                 if (*d == ':') goto keylookup;
6966             }
6967             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6968             if (!isALPHA(*start) && (PL_expect == XTERM
6969                         || PL_expect == XREF || PL_expect == XSTATE
6970                         || PL_expect == XTERMORDORDOR)) {
6971                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6972                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6973                 if (!gv) {
6974                     s = scan_num(s, &pl_yylval);
6975                     TERM(THING);
6976                 }
6977             }
6978         }
6979         goto keylookup;
6980     case 'x':
6981         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6982             s++;
6983             Mop(OP_REPEAT);
6984         }
6985         goto keylookup;
6986
6987     case '_':
6988     case 'a': case 'A':
6989     case 'b': case 'B':
6990     case 'c': case 'C':
6991     case 'd': case 'D':
6992     case 'e': case 'E':
6993     case 'f': case 'F':
6994     case 'g': case 'G':
6995     case 'h': case 'H':
6996     case 'i': case 'I':
6997     case 'j': case 'J':
6998     case 'k': case 'K':
6999     case 'l': case 'L':
7000     case 'm': case 'M':
7001     case 'n': case 'N':
7002     case 'o': case 'O':
7003     case 'p': case 'P':
7004     case 'q': case 'Q':
7005     case 'r': case 'R':
7006     case 's': case 'S':
7007     case 't': case 'T':
7008     case 'u': case 'U':
7009               case 'V':
7010     case 'w': case 'W':
7011               case 'X':
7012     case 'y': case 'Y':
7013     case 'z': case 'Z':
7014
7015       keylookup: {
7016         bool anydelim;
7017         bool lex;
7018         I32 tmp;
7019         SV *sv;
7020         CV *cv;
7021         PADOFFSET off;
7022         OP *rv2cv_op;
7023
7024         lex = FALSE;
7025         orig_keyword = 0;
7026         off = 0;
7027         sv = NULL;
7028         cv = NULL;
7029         gv = NULL;
7030         gvp = NULL;
7031         rv2cv_op = NULL;
7032
7033         PL_bufptr = s;
7034         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7035
7036         /* Some keywords can be followed by any delimiter, including ':' */
7037         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7038
7039         /* x::* is just a word, unless x is "CORE" */
7040         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
7041             goto just_a_word;
7042
7043         d = s;
7044         while (d < PL_bufend && isSPACE(*d))
7045                 d++;    /* no comments skipped here, or s### is misparsed */
7046
7047         /* Is this a word before a => operator? */
7048         if (*d == '=' && d[1] == '>') {
7049           fat_arrow:
7050             CLINE;
7051             pl_yylval.opval
7052                 = (OP*)newSVOP(OP_CONST, 0,
7053                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7054             pl_yylval.opval->op_private = OPpCONST_BARE;
7055             TERM(WORD);
7056         }
7057
7058         /* Check for plugged-in keyword */
7059         {
7060             OP *o;
7061             int result;
7062             char *saved_bufptr = PL_bufptr;
7063             PL_bufptr = s;
7064             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7065             s = PL_bufptr;
7066             if (result == KEYWORD_PLUGIN_DECLINE) {
7067                 /* not a plugged-in keyword */
7068                 PL_bufptr = saved_bufptr;
7069             } else if (result == KEYWORD_PLUGIN_STMT) {
7070                 pl_yylval.opval = o;
7071                 CLINE;
7072                 PL_expect = XSTATE;
7073                 return REPORT(PLUGSTMT);
7074             } else if (result == KEYWORD_PLUGIN_EXPR) {
7075                 pl_yylval.opval = o;
7076                 CLINE;
7077                 PL_expect = XOPERATOR;
7078                 return REPORT(PLUGEXPR);
7079             } else {
7080                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7081                                         PL_tokenbuf);
7082             }
7083         }
7084
7085         /* Check for built-in keyword */
7086         tmp = keyword(PL_tokenbuf, len, 0);
7087
7088         /* Is this a label? */
7089         if (!anydelim && PL_expect == XSTATE
7090               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7091             s = d + 1;
7092             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7093             pl_yylval.pval[len] = '\0';
7094             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7095             CLINE;
7096             TOKEN(LABEL);
7097         }
7098
7099         /* Check for lexical sub */
7100         if (PL_expect != XOPERATOR) {
7101             char tmpbuf[sizeof PL_tokenbuf + 1];
7102             *tmpbuf = '&';
7103             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7104             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7105             if (off != NOT_IN_PAD) {
7106                 assert(off); /* we assume this is boolean-true below */
7107                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7108                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7109                     HEK * const stashname = HvNAME_HEK(stash);
7110                     sv = newSVhek(stashname);
7111                     sv_catpvs(sv, "::");
7112                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7113                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7114                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7115                                     SVt_PVCV);
7116                     off = 0;
7117                     if (!gv) {
7118                         sv_free(sv);
7119                         sv = NULL;
7120                         goto just_a_word;
7121                     }
7122                 }
7123                 else {
7124                     rv2cv_op = newOP(OP_PADANY, 0);
7125                     rv2cv_op->op_targ = off;
7126                     cv = find_lexical_cv(off);
7127                 }
7128                 lex = TRUE;
7129                 goto just_a_word;
7130             }
7131             off = 0;
7132         }
7133
7134         if (tmp < 0) {                  /* second-class keyword? */
7135             GV *ogv = NULL;     /* override (winner) */
7136             GV *hgv = NULL;     /* hidden (loser) */
7137             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7138                 CV *cv;
7139                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7140                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7141                     (cv = GvCVu(gv)))
7142                 {
7143                     if (GvIMPORTED_CV(gv))
7144                         ogv = gv;
7145                     else if (! CvMETHOD(cv))
7146                         hgv = gv;
7147                 }
7148                 if (!ogv &&
7149                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7150                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
7151                     (gv = *gvp) && isGV_with_GP(gv) &&
7152                     GvCVu(gv) && GvIMPORTED_CV(gv))
7153                 {
7154                     ogv = gv;
7155                 }
7156             }
7157             if (ogv) {
7158                 orig_keyword = tmp;
7159                 tmp = 0;                /* overridden by import or by GLOBAL */
7160             }
7161             else if (gv && !gvp
7162                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7163                      && GvCVu(gv))
7164             {
7165                 tmp = 0;                /* any sub overrides "weak" keyword */
7166             }
7167             else {                      /* no override */
7168                 tmp = -tmp;
7169                 if (tmp == KEY_dump) {
7170                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7171                                    "dump() better written as CORE::dump()");
7172                 }
7173                 gv = NULL;
7174                 gvp = 0;
7175                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
7176                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7177                                    "Ambiguous call resolved as CORE::%s(), "
7178                                    "qualify as such or use &",
7179                                    GvENAME(hgv));
7180             }
7181         }
7182
7183         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7184          && (!anydelim || *s != '#')) {
7185             /* no override, and not s### either; skipspace is safe here
7186              * check for => on following line */
7187             bool arrow;
7188             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7189             STRLEN   soff = s         - SvPVX(PL_linestr);
7190             s = skipspace_flags(s, LEX_NO_INCLINE);
7191             arrow = *s == '=' && s[1] == '>';
7192             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7193             s         = SvPVX(PL_linestr) +   soff;
7194             if (arrow)
7195                 goto fat_arrow;
7196         }
7197
7198       reserved_word:
7199         switch (tmp) {
7200
7201         default:                        /* not a keyword */
7202             /* Trade off - by using this evil construction we can pull the
7203                variable gv into the block labelled keylookup. If not, then
7204                we have to give it function scope so that the goto from the
7205                earlier ':' case doesn't bypass the initialisation.  */
7206             if (0) {
7207             just_a_word_zero_gv:
7208                 sv = NULL;
7209                 cv = NULL;
7210                 gv = NULL;
7211                 gvp = NULL;
7212                 rv2cv_op = NULL;
7213                 orig_keyword = 0;
7214                 lex = 0;
7215                 off = 0;
7216             }
7217           just_a_word: {
7218                 int pkgname = 0;
7219                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7220                 const char penultchar =
7221                     lastchar && PL_bufptr - 2 >= PL_linestart
7222                          ? PL_bufptr[-2]
7223                          : 0;
7224 #ifdef PERL_MAD
7225                 SV *nextPL_nextwhite = 0;
7226 #endif
7227
7228
7229                 /* Get the rest if it looks like a package qualifier */
7230
7231                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7232                     STRLEN morelen;
7233                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7234                                   TRUE, &morelen);
7235                     if (!morelen)
7236                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7237                                 UTF8fARG(UTF, len, PL_tokenbuf),
7238                                 *s == '\'' ? "'" : "::");
7239                     len += morelen;
7240                     pkgname = 1;
7241                 }
7242
7243                 if (PL_expect == XOPERATOR) {
7244                     if (PL_bufptr == PL_linestart) {
7245                         CopLINE_dec(PL_curcop);
7246                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7247                         CopLINE_inc(PL_curcop);
7248                     }
7249                     else
7250                         no_op("Bareword",s);
7251                 }
7252
7253                 /* Look for a subroutine with this name in current package,
7254                    unless this is a lexical sub, or name is "Foo::",
7255                    in which case Foo is a bareword
7256                    (and a package name). */
7257
7258                 if (len > 2 && !PL_madskills &&
7259                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7260                 {
7261                     if (ckWARN(WARN_BAREWORD)
7262                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7263                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7264                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7265                            UTF8fARG(UTF, len, PL_tokenbuf));
7266                     len -= 2;
7267                     PL_tokenbuf[len] = '\0';
7268                     gv = NULL;
7269                     gvp = 0;
7270                 }
7271                 else {
7272                     if (!lex && !gv) {
7273                         /* Mustn't actually add anything to a symbol table.
7274                            But also don't want to "initialise" any placeholder
7275                            constants that might already be there into full
7276                            blown PVGVs with attached PVCV.  */
7277                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7278                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7279                                                SVt_PVCV);
7280                     }
7281                     len = 0;
7282                 }
7283
7284                 /* if we saw a global override before, get the right name */
7285
7286                 if (!sv)
7287                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7288                     len ? len : strlen(PL_tokenbuf));
7289                 if (gvp) {
7290                     SV * const tmp_sv = sv;
7291                     sv = newSVpvs("CORE::GLOBAL::");
7292                     sv_catsv(sv, tmp_sv);
7293                     SvREFCNT_dec(tmp_sv);
7294                 }
7295
7296 #ifdef PERL_MAD
7297                 if (PL_madskills && !PL_thistoken) {
7298                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7299                     PL_thistoken = newSVpvn(start,s - start);
7300                     PL_realtokenstart = s - SvPVX(PL_linestr);
7301                 }
7302 #endif
7303
7304                 /* Presume this is going to be a bareword of some sort. */
7305                 CLINE;
7306                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7307                 pl_yylval.opval->op_private = OPpCONST_BARE;
7308
7309                 /* And if "Foo::", then that's what it certainly is. */
7310                 if (len)
7311                     goto safe_bareword;
7312
7313                 if (!off)
7314                 {
7315                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7316                     const_op->op_private = OPpCONST_BARE;
7317                     rv2cv_op = newCVREF(0, const_op);
7318                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7319                 }
7320
7321                 /* See if it's the indirect object for a list operator. */
7322
7323                 if (PL_oldoldbufptr &&
7324                     PL_oldoldbufptr < PL_bufptr &&
7325                     (PL_oldoldbufptr == PL_last_lop
7326                      || PL_oldoldbufptr == PL_last_uni) &&
7327                     /* NO SKIPSPACE BEFORE HERE! */
7328                     (PL_expect == XREF ||
7329                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7330                 {
7331                     bool immediate_paren = *s == '(';
7332
7333                     /* (Now we can afford to cross potential line boundary.) */
7334                     s = SKIPSPACE2(s,nextPL_nextwhite);
7335 #ifdef PERL_MAD
7336                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7337 #endif
7338
7339                     /* Two barewords in a row may indicate method call. */
7340
7341                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7342                         (tmp = intuit_method(s, gv, cv))) {
7343                         op_free(rv2cv_op);
7344                         if (tmp == METHOD && !PL_lex_allbrackets &&
7345                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7346                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7347                         return REPORT(tmp);
7348                     }
7349
7350                     /* If not a declared subroutine, it's an indirect object. */
7351                     /* (But it's an indir obj regardless for sort.) */
7352                     /* Also, if "_" follows a filetest operator, it's a bareword */
7353
7354                     if (
7355                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7356                          (!cv &&
7357                         (PL_last_lop_op != OP_MAPSTART &&
7358                          PL_last_lop_op != OP_GREPSTART))))
7359                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7360                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7361                        )
7362                     {
7363                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7364                         goto bareword;
7365                     }
7366                 }
7367
7368                 PL_expect = XOPERATOR;
7369 #ifdef PERL_MAD
7370                 if (isSPACE(*s))
7371                     s = SKIPSPACE2(s,nextPL_nextwhite);
7372                 PL_nextwhite = nextPL_nextwhite;
7373 #else
7374                 s = skipspace(s);
7375 #endif
7376
7377                 /* Is this a word before a => operator? */
7378                 if (*s == '=' && s[1] == '>' && !pkgname) {
7379                     op_free(rv2cv_op);
7380                     CLINE;
7381                     /* This is our own scalar, created a few lines above,
7382                        so this is safe. */
7383                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7384                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7385                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7386                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7387                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7388                     TERM(WORD);
7389                 }
7390
7391                 /* If followed by a paren, it's certainly a subroutine. */
7392                 if (*s == '(') {
7393                     CLINE;
7394                     if (cv) {
7395                         d = s + 1;
7396                         while (SPACE_OR_TAB(*d))
7397                             d++;
7398                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7399                             s = d + 1;
7400                             goto its_constant;
7401                         }
7402                     }
7403 #ifdef PERL_MAD
7404                     if (PL_madskills) {
7405                         PL_nextwhite = PL_thiswhite;
7406                         PL_thiswhite = 0;
7407                     }
7408                     start_force(PL_curforce);
7409 #endif
7410                     NEXTVAL_NEXTTOKE.opval =
7411                         off ? rv2cv_op : pl_yylval.opval;
7412                     PL_expect = XOPERATOR;
7413 #ifdef PERL_MAD
7414                     if (PL_madskills) {
7415                         PL_nextwhite = nextPL_nextwhite;
7416                         curmad('X', PL_thistoken);
7417                         PL_thistoken = newSVpvs("");
7418                     }
7419 #endif
7420                     if (off)
7421                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7422                     else op_free(rv2cv_op),        force_next(WORD);
7423                     pl_yylval.ival = 0;
7424                     TOKEN('&');
7425                 }
7426
7427                 /* If followed by var or block, call it a method (unless sub) */
7428
7429                 if ((*s == '$' || *s == '{') && !cv) {
7430                     op_free(rv2cv_op);
7431                     PL_last_lop = PL_oldbufptr;
7432                     PL_last_lop_op = OP_METHOD;
7433                     if (!PL_lex_allbrackets &&
7434                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7435                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7436                     PREBLOCK(METHOD);
7437                 }
7438
7439                 /* If followed by a bareword, see if it looks like indir obj. */
7440
7441                 if (!orig_keyword
7442                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7443                         && (tmp = intuit_method(s, gv, cv))) {
7444                     op_free(rv2cv_op);
7445                     if (tmp == METHOD && !PL_lex_allbrackets &&
7446                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7447                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7448                     return REPORT(tmp);
7449                 }
7450
7451                 /* Not a method, so call it a subroutine (if defined) */
7452
7453                 if (cv) {
7454                     if (lastchar == '-' && penultchar != '-') {
7455                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7456                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7457                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7458                              UTF8fARG(UTF, l, PL_tokenbuf),
7459                              UTF8fARG(UTF, l, PL_tokenbuf));
7460                     }
7461                     /* Check for a constant sub */
7462                     if ((sv = cv_const_sv_or_av(cv))) {
7463                   its_constant:
7464                         op_free(rv2cv_op);
7465                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7466                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7467                         if (SvTYPE(sv) == SVt_PVAV)
7468                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7469                                                       pl_yylval.opval);
7470                         else {
7471                             pl_yylval.opval->op_private = 0;
7472                             pl_yylval.opval->op_folded = 1;
7473                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7474                         }
7475                         TOKEN(WORD);
7476                     }
7477
7478                     op_free(pl_yylval.opval);
7479                     pl_yylval.opval =
7480                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7481                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7482                     PL_last_lop = PL_oldbufptr;
7483                     PL_last_lop_op = OP_ENTERSUB;
7484                     /* Is there a prototype? */
7485                     if (
7486 #ifdef PERL_MAD
7487                         cv &&
7488 #endif
7489                         SvPOK(cv))
7490                     {
7491                         STRLEN protolen = CvPROTOLEN(cv);
7492                         const char *proto = CvPROTO(cv);
7493                         bool optional;
7494                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7495                         if (!protolen)
7496                             TERM(FUNC0SUB);
7497                         if ((optional = *proto == ';'))
7498                           do
7499                             proto++;
7500                           while (*proto == ';');
7501                         if (
7502                             (
7503                                 (
7504                                     *proto == '$' || *proto == '_'
7505                                  || *proto == '*' || *proto == '+'
7506                                 )
7507                              && proto[1] == '\0'
7508                             )
7509                          || (
7510                              *proto == '\\' && proto[1] && proto[2] == '\0'
7511                             )
7512                         )
7513                             UNIPROTO(UNIOPSUB,optional);
7514                         if (*proto == '\\' && proto[1] == '[') {
7515                             const char *p = proto + 2;
7516                             while(*p && *p != ']')
7517                                 ++p;
7518                             if(*p == ']' && !p[1])
7519                                 UNIPROTO(UNIOPSUB,optional);
7520                         }
7521                         if (*proto == '&' && *s == '{') {
7522                             if (PL_curstash)
7523                                 sv_setpvs(PL_subname, "__ANON__");
7524                             else
7525                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7526                             if (!PL_lex_allbrackets &&
7527                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7528                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7529                             PREBLOCK(LSTOPSUB);
7530                         }
7531                     }
7532 #ifdef PERL_MAD
7533                     {
7534                         if (PL_madskills) {
7535                             PL_nextwhite = PL_thiswhite;
7536                             PL_thiswhite = 0;
7537                         }
7538                         start_force(PL_curforce);
7539                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7540                         PL_expect = XTERM;
7541                         if (PL_madskills) {
7542                             PL_nextwhite = nextPL_nextwhite;
7543                             curmad('X', PL_thistoken);
7544                             PL_thistoken = newSVpvs("");
7545                         }
7546                         force_next(off ? PRIVATEREF : WORD);
7547                         if (!PL_lex_allbrackets &&
7548                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7549                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7550                         TOKEN(NOAMP);
7551                     }
7552                 }
7553
7554                 /* Guess harder when madskills require "best effort". */
7555                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7556                     int probable_sub = 0;
7557                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7558                         probable_sub = 1;
7559                     else if (isALPHA(*s)) {
7560                         char tmpbuf[1024];
7561                         STRLEN tmplen;
7562                         d = s;
7563                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7564                         if (!keyword(tmpbuf, tmplen, 0))
7565                             probable_sub = 1;
7566                         else {
7567                             while (d < PL_bufend && isSPACE(*d))
7568                                 d++;
7569                             if (*d == '=' && d[1] == '>')
7570                                 probable_sub = 1;
7571                         }
7572                     }
7573                     if (probable_sub) {
7574                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7575                                         SVt_PVCV);
7576                         op_free(pl_yylval.opval);
7577                         pl_yylval.opval =
7578                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7579                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7580                         PL_last_lop = PL_oldbufptr;
7581                         PL_last_lop_op = OP_ENTERSUB;
7582                         PL_nextwhite = PL_thiswhite;
7583                         PL_thiswhite = 0;
7584                         start_force(PL_curforce);
7585                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7586                         PL_expect = XTERM;
7587                         PL_nextwhite = nextPL_nextwhite;
7588                         curmad('X', PL_thistoken);
7589                         PL_thistoken = newSVpvs("");
7590                         force_next(off ? PRIVATEREF : WORD);
7591                         if (!PL_lex_allbrackets &&
7592                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7593                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7594                         TOKEN(NOAMP);
7595                     }
7596 #else
7597                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7598                     PL_expect = XTERM;
7599                     force_next(off ? PRIVATEREF : WORD);
7600                     if (!PL_lex_allbrackets &&
7601                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7602                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7603                     TOKEN(NOAMP);
7604 #endif
7605                 }
7606
7607                 /* Call it a bare word */
7608
7609                 if (PL_hints & HINT_STRICT_SUBS)
7610                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7611                 else {
7612                 bareword:
7613                     /* after "print" and similar functions (corresponding to
7614                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7615                      * a filehandle should be subject to "strict subs".
7616                      * Likewise for the optional indirect-object argument to system
7617                      * or exec, which can't be a bareword */
7618                     if ((PL_last_lop_op == OP_PRINT
7619                             || PL_last_lop_op == OP_PRTF
7620                             || PL_last_lop_op == OP_SAY
7621                             || PL_last_lop_op == OP_SYSTEM
7622                             || PL_last_lop_op == OP_EXEC)
7623                             && (PL_hints & HINT_STRICT_SUBS))
7624                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7625                     if (lastchar != '-') {
7626                         if (ckWARN(WARN_RESERVED)) {
7627                             d = PL_tokenbuf;
7628                             while (isLOWER(*d))
7629                                 d++;
7630                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7631                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7632                                        PL_tokenbuf);
7633                         }
7634                     }
7635                 }
7636                 op_free(rv2cv_op);
7637
7638             safe_bareword:
7639                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7640                  && saw_infix_sigil) {
7641                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7642                                      "Operator or semicolon missing before %c%"UTF8f,
7643                                      lastchar,
7644                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7645                                               PL_tokenbuf));
7646                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7647                                      "Ambiguous use of %c resolved as operator %c",
7648                                      lastchar, lastchar);
7649                 }
7650                 TOKEN(WORD);
7651             }
7652
7653         case KEY___FILE__:
7654             FUN0OP(
7655                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7656             );
7657
7658         case KEY___LINE__:
7659             FUN0OP(
7660                 (OP*)newSVOP(OP_CONST, 0,
7661                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7662             );
7663
7664         case KEY___PACKAGE__:
7665             FUN0OP(
7666                 (OP*)newSVOP(OP_CONST, 0,
7667                                         (PL_curstash
7668                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7669                                          : &PL_sv_undef))
7670             );
7671
7672         case KEY___DATA__:
7673         case KEY___END__: {
7674             GV *gv;
7675             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7676                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7677                                         ? PL_curstash
7678                                         : PL_defstash;
7679                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7680                 if (!isGV(gv))
7681                     gv_init(gv,stash,"DATA",4,0);
7682                 GvMULTI_on(gv);
7683                 if (!GvIO(gv))
7684                     GvIOp(gv) = newIO();
7685                 IoIFP(GvIOp(gv)) = PL_rsfp;
7686 #if defined(HAS_FCNTL) && defined(F_SETFD)
7687                 {
7688                     const int fd = PerlIO_fileno(PL_rsfp);
7689                     fcntl(fd,F_SETFD,fd >= 3);
7690                 }
7691 #endif
7692                 /* Mark this internal pseudo-handle as clean */
7693                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7694                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7695                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7696                 else
7697                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7698 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7699                 /* if the script was opened in binmode, we need to revert
7700                  * it to text mode for compatibility; but only iff it has CRs
7701                  * XXX this is a questionable hack at best. */
7702                 if (PL_bufend-PL_bufptr > 2
7703                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7704                 {
7705                     Off_t loc = 0;
7706                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7707                         loc = PerlIO_tell(PL_rsfp);
7708                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7709                     }
7710 #ifdef NETWARE
7711                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7712 #else
7713                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7714 #endif  /* NETWARE */
7715                         if (loc > 0)
7716                             PerlIO_seek(PL_rsfp, loc, 0);
7717                     }
7718                 }
7719 #endif
7720 #ifdef PERLIO_LAYERS
7721                 if (!IN_BYTES) {
7722                     if (UTF)
7723                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7724                     else if (PL_encoding) {
7725                         SV *name;
7726                         dSP;
7727                         ENTER;
7728                         SAVETMPS;
7729                         PUSHMARK(sp);
7730                         EXTEND(SP, 1);
7731                         XPUSHs(PL_encoding);
7732                         PUTBACK;
7733                         call_method("name", G_SCALAR);
7734                         SPAGAIN;
7735                         name = POPs;
7736                         PUTBACK;
7737                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7738                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7739                                                       SVfARG(name)));
7740                         FREETMPS;
7741                         LEAVE;
7742                     }
7743                 }
7744 #endif
7745 #ifdef PERL_MAD
7746                 if (PL_madskills) {
7747                     if (PL_realtokenstart >= 0) {
7748                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7749                         if (!PL_endwhite)
7750                             PL_endwhite = newSVpvs("");
7751                         sv_catsv(PL_endwhite, PL_thiswhite);
7752                         PL_thiswhite = 0;
7753                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7754                         PL_realtokenstart = -1;
7755                     }
7756                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7757                            != NULL) ;
7758                 }
7759 #endif
7760                 PL_rsfp = NULL;
7761             }
7762             goto fake_eof;
7763         }
7764
7765         case KEY___SUB__:
7766             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7767
7768         case KEY_AUTOLOAD:
7769         case KEY_DESTROY:
7770         case KEY_BEGIN:
7771         case KEY_UNITCHECK:
7772         case KEY_CHECK:
7773         case KEY_INIT:
7774         case KEY_END:
7775             if (PL_expect == XSTATE) {
7776                 s = PL_bufptr;
7777                 goto really_sub;
7778             }
7779             goto just_a_word;
7780
7781         case KEY_CORE:
7782             if (*s == ':' && s[1] == ':') {
7783                 STRLEN olen = len;
7784                 d = s;
7785                 s += 2;
7786                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7787                 if ((*s == ':' && s[1] == ':')
7788                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7789                 {
7790                     s = d;
7791                     len = olen;
7792                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7793                     goto just_a_word;
7794                 }
7795                 if (!tmp)
7796                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7797                                       UTF8fARG(UTF, len, PL_tokenbuf));
7798                 if (tmp < 0)
7799                     tmp = -tmp;
7800                 else if (tmp == KEY_require || tmp == KEY_do
7801                       || tmp == KEY_glob)
7802                     /* that's a way to remember we saw "CORE::" */
7803                     orig_keyword = tmp;
7804                 goto reserved_word;
7805             }
7806             goto just_a_word;
7807
7808         case KEY_abs:
7809             UNI(OP_ABS);
7810
7811         case KEY_alarm:
7812             UNI(OP_ALARM);
7813
7814         case KEY_accept:
7815             LOP(OP_ACCEPT,XTERM);
7816
7817         case KEY_and:
7818             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7819                 return REPORT(0);
7820             OPERATOR(ANDOP);
7821
7822         case KEY_atan2:
7823             LOP(OP_ATAN2,XTERM);
7824
7825         case KEY_bind:
7826             LOP(OP_BIND,XTERM);
7827
7828         case KEY_binmode:
7829             LOP(OP_BINMODE,XTERM);
7830
7831         case KEY_bless:
7832             LOP(OP_BLESS,XTERM);
7833
7834         case KEY_break:
7835             FUN0(OP_BREAK);
7836
7837         case KEY_chop:
7838             UNI(OP_CHOP);
7839
7840         case KEY_continue:
7841                     /* We have to disambiguate the two senses of
7842                       "continue". If the next token is a '{' then
7843                       treat it as the start of a continue block;
7844                       otherwise treat it as a control operator.
7845                      */
7846                     s = skipspace(s);
7847                     if (*s == '{')
7848             PREBLOCK(CONTINUE);
7849                     else
7850                         FUN0(OP_CONTINUE);
7851
7852         case KEY_chdir:
7853             /* may use HOME */
7854             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7855             UNI(OP_CHDIR);
7856
7857         case KEY_close:
7858             UNI(OP_CLOSE);
7859
7860         case KEY_closedir:
7861             UNI(OP_CLOSEDIR);
7862
7863         case KEY_cmp:
7864             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7865                 return REPORT(0);
7866             Eop(OP_SCMP);
7867
7868         case KEY_caller:
7869             UNI(OP_CALLER);
7870
7871         case KEY_crypt:
7872 #ifdef FCRYPT
7873             if (!PL_cryptseen) {
7874                 PL_cryptseen = TRUE;
7875                 init_des();
7876             }
7877 #endif
7878             LOP(OP_CRYPT,XTERM);
7879
7880         case KEY_chmod:
7881             LOP(OP_CHMOD,XTERM);
7882
7883         case KEY_chown:
7884             LOP(OP_CHOWN,XTERM);
7885
7886         case KEY_connect:
7887             LOP(OP_CONNECT,XTERM);
7888
7889         case KEY_chr:
7890             UNI(OP_CHR);
7891
7892         case KEY_cos:
7893             UNI(OP_COS);
7894
7895         case KEY_chroot:
7896             UNI(OP_CHROOT);
7897
7898         case KEY_default:
7899             PREBLOCK(DEFAULT);
7900
7901         case KEY_do:
7902             s = SKIPSPACE1(s);
7903             if (*s == '{')
7904                 PRETERMBLOCK(DO);
7905             if (*s != '\'') {
7906                 *PL_tokenbuf = '&';
7907                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7908                               1, &len);
7909                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7910                     d = SKIPSPACE1(d);
7911                     if (*d == '(') {
7912                         force_ident_maybe_lex('&');
7913                         s = d;
7914                     }
7915                 }
7916             }
7917             if (orig_keyword == KEY_do) {
7918                 orig_keyword = 0;
7919                 pl_yylval.ival = 1;
7920             }
7921             else
7922                 pl_yylval.ival = 0;
7923             OPERATOR(DO);
7924
7925         case KEY_die:
7926             PL_hints |= HINT_BLOCK_SCOPE;
7927             LOP(OP_DIE,XTERM);
7928
7929         case KEY_defined:
7930             UNI(OP_DEFINED);
7931
7932         case KEY_delete:
7933             UNI(OP_DELETE);
7934
7935         case KEY_dbmopen:
7936             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7937                               STR_WITH_LEN("NDBM_File::"),
7938                               STR_WITH_LEN("DB_File::"),
7939                               STR_WITH_LEN("GDBM_File::"),
7940                               STR_WITH_LEN("SDBM_File::"),
7941                               STR_WITH_LEN("ODBM_File::"),
7942                               NULL);
7943             LOP(OP_DBMOPEN,XTERM);
7944
7945         case KEY_dbmclose:
7946             UNI(OP_DBMCLOSE);
7947
7948         case KEY_dump:
7949             PL_expect = XOPERATOR;
7950             s = force_word(s,WORD,TRUE,FALSE);
7951             LOOPX(OP_DUMP);
7952
7953         case KEY_else:
7954             PREBLOCK(ELSE);
7955
7956         case KEY_elsif:
7957             pl_yylval.ival = CopLINE(PL_curcop);
7958             OPERATOR(ELSIF);
7959
7960         case KEY_eq:
7961             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7962                 return REPORT(0);
7963             Eop(OP_SEQ);
7964
7965         case KEY_exists:
7966             UNI(OP_EXISTS);
7967
7968         case KEY_exit:
7969             if (PL_madskills)
7970                 UNI(OP_INT);
7971             UNI(OP_EXIT);
7972
7973         case KEY_eval:
7974             s = SKIPSPACE1(s);
7975             if (*s == '{') { /* block eval */
7976                 PL_expect = XTERMBLOCK;
7977                 UNIBRACK(OP_ENTERTRY);
7978             }
7979             else { /* string eval */
7980                 PL_expect = XTERM;
7981                 UNIBRACK(OP_ENTEREVAL);
7982             }
7983
7984         case KEY_evalbytes:
7985             PL_expect = XTERM;
7986             UNIBRACK(-OP_ENTEREVAL);
7987
7988         case KEY_eof:
7989             UNI(OP_EOF);
7990
7991         case KEY_exp:
7992             UNI(OP_EXP);
7993
7994         case KEY_each:
7995             UNI(OP_EACH);
7996
7997         case KEY_exec:
7998             LOP(OP_EXEC,XREF);
7999
8000         case KEY_endhostent:
8001             FUN0(OP_EHOSTENT);
8002
8003         case KEY_endnetent:
8004             FUN0(OP_ENETENT);
8005
8006         case KEY_endservent:
8007             FUN0(OP_ESERVENT);
8008
8009         case KEY_endprotoent:
8010             FUN0(OP_EPROTOENT);
8011
8012         case KEY_endpwent:
8013             FUN0(OP_EPWENT);
8014
8015         case KEY_endgrent:
8016             FUN0(OP_EGRENT);
8017
8018         case KEY_for:
8019         case KEY_foreach:
8020             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8021                 return REPORT(0);
8022             pl_yylval.ival = CopLINE(PL_curcop);
8023             s = SKIPSPACE1(s);
8024             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8025                 char *p = s;
8026 #ifdef PERL_MAD
8027                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8028 #endif
8029
8030                 if ((PL_bufend - p) >= 3 &&
8031                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8032                     p += 2;
8033                 else if ((PL_bufend - p) >= 4 &&
8034                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8035                     p += 3;
8036                 p = PEEKSPACE(p);
8037                 if (isIDFIRST_lazy_if(p,UTF)) {
8038                     p = scan_ident(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
8039                     p = PEEKSPACE(p);
8040                 }
8041                 if (*p != '$')
8042                     Perl_croak(aTHX_ "Missing $ on loop variable");
8043 #ifdef PERL_MAD
8044                 s = SvPVX(PL_linestr) + soff;
8045 #endif
8046             }
8047             OPERATOR(FOR);
8048
8049         case KEY_formline:
8050             LOP(OP_FORMLINE,XTERM);
8051
8052         case KEY_fork:
8053             FUN0(OP_FORK);
8054
8055         case KEY_fc:
8056             UNI(OP_FC);
8057
8058         case KEY_fcntl:
8059             LOP(OP_FCNTL,XTERM);
8060
8061         case KEY_fileno:
8062             UNI(OP_FILENO);
8063
8064         case KEY_flock:
8065             LOP(OP_FLOCK,XTERM);
8066
8067         case KEY_gt:
8068             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8069                 return REPORT(0);
8070             Rop(OP_SGT);
8071
8072         case KEY_ge:
8073             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8074                 return REPORT(0);
8075             Rop(OP_SGE);
8076
8077         case KEY_grep:
8078             LOP(OP_GREPSTART, XREF);
8079
8080         case KEY_goto:
8081             PL_expect = XOPERATOR;
8082             s = force_word(s,WORD,TRUE,FALSE);
8083             LOOPX(OP_GOTO);
8084
8085         case KEY_gmtime:
8086             UNI(OP_GMTIME);
8087
8088         case KEY_getc:
8089             UNIDOR(OP_GETC);
8090
8091         case KEY_getppid:
8092             FUN0(OP_GETPPID);
8093
8094         case KEY_getpgrp:
8095             UNI(OP_GETPGRP);
8096
8097         case KEY_getpriority:
8098             LOP(OP_GETPRIORITY,XTERM);
8099
8100         case KEY_getprotobyname:
8101             UNI(OP_GPBYNAME);
8102
8103         case KEY_getprotobynumber:
8104             LOP(OP_GPBYNUMBER,XTERM);
8105
8106         case KEY_getprotoent:
8107             FUN0(OP_GPROTOENT);
8108
8109         case KEY_getpwent:
8110             FUN0(OP_GPWENT);
8111
8112         case KEY_getpwnam:
8113             UNI(OP_GPWNAM);
8114
8115         case KEY_getpwuid:
8116             UNI(OP_GPWUID);
8117
8118         case KEY_getpeername:
8119             UNI(OP_GETPEERNAME);
8120
8121         case KEY_gethostbyname:
8122             UNI(OP_GHBYNAME);
8123
8124         case KEY_gethostbyaddr:
8125             LOP(OP_GHBYADDR,XTERM);
8126
8127         case KEY_gethostent:
8128             FUN0(OP_GHOSTENT);
8129
8130         case KEY_getnetbyname:
8131             UNI(OP_GNBYNAME);
8132
8133         case KEY_getnetbyaddr:
8134             LOP(OP_GNBYADDR,XTERM);
8135
8136         case KEY_getnetent:
8137             FUN0(OP_GNETENT);
8138
8139         case KEY_getservbyname:
8140             LOP(OP_GSBYNAME,XTERM);
8141
8142         case KEY_getservbyport:
8143             LOP(OP_GSBYPORT,XTERM);
8144
8145         case KEY_getservent:
8146             FUN0(OP_GSERVENT);
8147
8148         case KEY_getsockname:
8149             UNI(OP_GETSOCKNAME);
8150
8151         case KEY_getsockopt:
8152             LOP(OP_GSOCKOPT,XTERM);
8153
8154         case KEY_getgrent:
8155             FUN0(OP_GGRENT);
8156
8157         case KEY_getgrnam:
8158             UNI(OP_GGRNAM);
8159
8160         case KEY_getgrgid:
8161             UNI(OP_GGRGID);
8162
8163         case KEY_getlogin:
8164             FUN0(OP_GETLOGIN);
8165
8166         case KEY_given:
8167             pl_yylval.ival = CopLINE(PL_curcop);
8168             Perl_ck_warner_d(aTHX_
8169                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8170                 "given is experimental");
8171             OPERATOR(GIVEN);
8172
8173         case KEY_glob:
8174             LOP(
8175              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8176              XTERM
8177             );
8178
8179         case KEY_hex:
8180             UNI(OP_HEX);
8181
8182         case KEY_if:
8183             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8184                 return REPORT(0);
8185             pl_yylval.ival = CopLINE(PL_curcop);
8186             OPERATOR(IF);
8187
8188         case KEY_index:
8189             LOP(OP_INDEX,XTERM);
8190
8191         case KEY_int:
8192             UNI(OP_INT);
8193
8194         case KEY_ioctl:
8195             LOP(OP_IOCTL,XTERM);
8196
8197         case KEY_join:
8198             LOP(OP_JOIN,XTERM);
8199
8200         case KEY_keys:
8201             UNI(OP_KEYS);
8202
8203         case KEY_kill:
8204             LOP(OP_KILL,XTERM);
8205
8206         case KEY_last:
8207             PL_expect = XOPERATOR;
8208             s = force_word(s,WORD,TRUE,FALSE);
8209             LOOPX(OP_LAST);
8210
8211         case KEY_lc:
8212             UNI(OP_LC);
8213
8214         case KEY_lcfirst:
8215             UNI(OP_LCFIRST);
8216
8217         case KEY_local:
8218             pl_yylval.ival = 0;
8219             OPERATOR(LOCAL);
8220
8221         case KEY_length:
8222             UNI(OP_LENGTH);
8223
8224         case KEY_lt:
8225             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8226                 return REPORT(0);
8227             Rop(OP_SLT);
8228
8229         case KEY_le:
8230             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8231                 return REPORT(0);
8232             Rop(OP_SLE);
8233
8234         case KEY_localtime:
8235             UNI(OP_LOCALTIME);
8236
8237         case KEY_log:
8238             UNI(OP_LOG);
8239
8240         case KEY_link:
8241             LOP(OP_LINK,XTERM);
8242
8243         case KEY_listen:
8244             LOP(OP_LISTEN,XTERM);
8245
8246         case KEY_lock:
8247             UNI(OP_LOCK);
8248
8249         case KEY_lstat:
8250             UNI(OP_LSTAT);
8251
8252         case KEY_m:
8253             s = scan_pat(s,OP_MATCH);
8254             TERM(sublex_start());
8255
8256         case KEY_map:
8257             LOP(OP_MAPSTART, XREF);
8258
8259         case KEY_mkdir:
8260             LOP(OP_MKDIR,XTERM);
8261
8262         case KEY_msgctl:
8263             LOP(OP_MSGCTL,XTERM);
8264
8265         case KEY_msgget:
8266             LOP(OP_MSGGET,XTERM);
8267
8268         case KEY_msgrcv:
8269             LOP(OP_MSGRCV,XTERM);
8270
8271         case KEY_msgsnd:
8272             LOP(OP_MSGSND,XTERM);
8273
8274         case KEY_our:
8275         case KEY_my:
8276         case KEY_state:
8277             PL_in_my = (U16)tmp;
8278             s = SKIPSPACE1(s);
8279             if (isIDFIRST_lazy_if(s,UTF)) {
8280 #ifdef PERL_MAD
8281                 char* start = s;
8282 #endif
8283                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8284                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8285                 {
8286                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8287                         Perl_croak(aTHX_
8288                                   "Experimental \"%s\" subs not enabled",
8289                                    tmp == KEY_my    ? "my"    :
8290                                    tmp == KEY_state ? "state" : "our");
8291                     Perl_ck_warner_d(aTHX_
8292                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8293                         "The lexical_subs feature is experimental");
8294                     goto really_sub;
8295                 }
8296                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8297                 if (!PL_in_my_stash) {
8298                     char tmpbuf[1024];
8299                     PL_bufptr = s;
8300                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8301                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8302                 }
8303 #ifdef PERL_MAD
8304                 if (PL_madskills) {     /* just add type to declarator token */
8305                     sv_catsv(PL_thistoken, PL_nextwhite);
8306                     PL_nextwhite = 0;
8307                     sv_catpvn(PL_thistoken, start, s - start);
8308                 }
8309 #endif
8310             }
8311             pl_yylval.ival = 1;
8312             OPERATOR(MY);
8313
8314         case KEY_next:
8315             PL_expect = XOPERATOR;
8316             s = force_word(s,WORD,TRUE,FALSE);
8317             LOOPX(OP_NEXT);
8318
8319         case KEY_ne:
8320             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8321                 return REPORT(0);
8322             Eop(OP_SNE);
8323
8324         case KEY_no:
8325             s = tokenize_use(0, s);
8326             TERM(USE);
8327
8328         case KEY_not:
8329             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8330                 FUN1(OP_NOT);
8331             else {
8332                 if (!PL_lex_allbrackets &&
8333                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8334                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8335                 OPERATOR(NOTOP);
8336             }
8337
8338         case KEY_open:
8339             s = SKIPSPACE1(s);
8340             if (isIDFIRST_lazy_if(s,UTF)) {
8341           const char *t;
8342           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8343               &len);
8344                 for (t=d; isSPACE(*t);)
8345                     t++;
8346                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8347                     /* [perl #16184] */
8348                     && !(t[0] == '=' && t[1] == '>')
8349                     && !(t[0] == ':' && t[1] == ':')
8350                     && !keyword(s, d-s, 0)
8351                 ) {
8352                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8353                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8354                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8355                 }
8356             }
8357             LOP(OP_OPEN,XTERM);
8358
8359         case KEY_or:
8360             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8361                 return REPORT(0);
8362             pl_yylval.ival = OP_OR;
8363             OPERATOR(OROP);
8364
8365         case KEY_ord:
8366             UNI(OP_ORD);
8367
8368         case KEY_oct:
8369             UNI(OP_OCT);
8370
8371         case KEY_opendir:
8372             LOP(OP_OPEN_DIR,XTERM);
8373
8374         case KEY_print:
8375             checkcomma(s,PL_tokenbuf,"filehandle");
8376             LOP(OP_PRINT,XREF);
8377
8378         case KEY_printf:
8379             checkcomma(s,PL_tokenbuf,"filehandle");
8380             LOP(OP_PRTF,XREF);
8381
8382         case KEY_prototype:
8383             UNI(OP_PROTOTYPE);
8384
8385         case KEY_push:
8386             LOP(OP_PUSH,XTERM);
8387
8388         case KEY_pop:
8389             UNIDOR(OP_POP);
8390
8391         case KEY_pos:
8392             UNIDOR(OP_POS);
8393
8394         case KEY_pack:
8395             LOP(OP_PACK,XTERM);
8396
8397         case KEY_package:
8398             s = force_word(s,WORD,FALSE,TRUE);
8399             s = SKIPSPACE1(s);
8400             s = force_strict_version(s);
8401             PL_lex_expect = XBLOCK;
8402             OPERATOR(PACKAGE);
8403
8404         case KEY_pipe:
8405             LOP(OP_PIPE_OP,XTERM);
8406
8407         case KEY_q:
8408             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8409             COPLINE_SET_FROM_MULTI_END;
8410             if (!s)
8411                 missingterm(NULL);
8412             pl_yylval.ival = OP_CONST;
8413             TERM(sublex_start());
8414
8415         case KEY_quotemeta:
8416             UNI(OP_QUOTEMETA);
8417
8418         case KEY_qw: {
8419             OP *words = NULL;
8420             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8421             COPLINE_SET_FROM_MULTI_END;
8422             if (!s)
8423                 missingterm(NULL);
8424             PL_expect = XOPERATOR;
8425             if (SvCUR(PL_lex_stuff)) {
8426                 int warned_comma = !ckWARN(WARN_QW);
8427                 int warned_comment = warned_comma;
8428                 d = SvPV_force(PL_lex_stuff, len);
8429                 while (len) {
8430                     for (; isSPACE(*d) && len; --len, ++d)
8431                         /**/;
8432                     if (len) {
8433                         SV *sv;
8434                         const char *b = d;
8435                         if (!warned_comma || !warned_comment) {
8436                             for (; !isSPACE(*d) && len; --len, ++d) {
8437                                 if (!warned_comma && *d == ',') {
8438                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8439                                         "Possible attempt to separate words with commas");
8440                                     ++warned_comma;
8441                                 }
8442                                 else if (!warned_comment && *d == '#') {
8443                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8444                                         "Possible attempt to put comments in qw() list");
8445                                     ++warned_comment;
8446                                 }
8447                             }
8448                         }
8449                         else {
8450                             for (; !isSPACE(*d) && len; --len, ++d)
8451                                 /**/;
8452                         }
8453                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8454                         words = op_append_elem(OP_LIST, words,
8455                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8456                     }
8457                 }
8458             }
8459             if (!words)
8460                 words = newNULLLIST();
8461             if (PL_lex_stuff) {
8462                 SvREFCNT_dec(PL_lex_stuff);
8463                 PL_lex_stuff = NULL;
8464             }
8465             PL_expect = XOPERATOR;
8466             pl_yylval.opval = sawparens(words);
8467             TOKEN(QWLIST);
8468         }
8469
8470         case KEY_qq:
8471             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8472             if (!s)
8473                 missingterm(NULL);
8474             pl_yylval.ival = OP_STRINGIFY;
8475             if (SvIVX(PL_lex_stuff) == '\'')
8476                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8477             TERM(sublex_start());
8478
8479         case KEY_qr:
8480             s = scan_pat(s,OP_QR);
8481             TERM(sublex_start());
8482
8483         case KEY_qx:
8484             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8485             if (!s)
8486                 missingterm(NULL);
8487             readpipe_override();
8488             TERM(sublex_start());
8489
8490         case KEY_return:
8491             OLDLOP(OP_RETURN);
8492
8493         case KEY_require:
8494             s = SKIPSPACE1(s);
8495             PL_expect = XOPERATOR;
8496             if (isDIGIT(*s)) {
8497                 s = force_version(s, FALSE);
8498             }
8499             else if (*s != 'v' || !isDIGIT(s[1])
8500                     || (s = force_version(s, TRUE), *s == 'v'))
8501             {
8502                 *PL_tokenbuf = '\0';
8503                 s = force_word(s,WORD,TRUE,TRUE);
8504                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8505                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8506                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8507                 else if (*s == '<')
8508                     yyerror("<> should be quotes");
8509             }
8510             if (orig_keyword == KEY_require) {
8511                 orig_keyword = 0;
8512                 pl_yylval.ival = 1;
8513             }
8514             else
8515                 pl_yylval.ival = 0;
8516             PL_expect = XTERM;
8517             PL_bufptr = s;
8518             PL_last_uni = PL_oldbufptr;
8519             PL_last_lop_op = OP_REQUIRE;
8520             s = skipspace(s);
8521             return REPORT( (int)REQUIRE );
8522
8523         case KEY_reset:
8524             UNI(OP_RESET);
8525
8526         case KEY_redo:
8527             PL_expect = XOPERATOR;
8528             s = force_word(s,WORD,TRUE,FALSE);
8529             LOOPX(OP_REDO);
8530
8531         case KEY_rename:
8532             LOP(OP_RENAME,XTERM);
8533
8534         case KEY_rand:
8535             UNI(OP_RAND);
8536
8537         case KEY_rmdir:
8538             UNI(OP_RMDIR);
8539
8540         case KEY_rindex:
8541             LOP(OP_RINDEX,XTERM);
8542
8543         case KEY_read:
8544             LOP(OP_READ,XTERM);
8545
8546         case KEY_readdir:
8547             UNI(OP_READDIR);
8548
8549         case KEY_readline:
8550             UNIDOR(OP_READLINE);
8551
8552         case KEY_readpipe:
8553             UNIDOR(OP_BACKTICK);
8554
8555         case KEY_rewinddir:
8556             UNI(OP_REWINDDIR);
8557
8558         case KEY_recv:
8559             LOP(OP_RECV,XTERM);
8560
8561         case KEY_reverse:
8562             LOP(OP_REVERSE,XTERM);
8563
8564         case KEY_readlink:
8565             UNIDOR(OP_READLINK);
8566
8567         case KEY_ref:
8568             UNI(OP_REF);
8569
8570         case KEY_s:
8571             s = scan_subst(s);
8572             if (pl_yylval.opval)
8573                 TERM(sublex_start());
8574             else
8575                 TOKEN(1);       /* force error */
8576
8577         case KEY_say:
8578             checkcomma(s,PL_tokenbuf,"filehandle");
8579             LOP(OP_SAY,XREF);
8580
8581         case KEY_chomp:
8582             UNI(OP_CHOMP);
8583
8584         case KEY_scalar:
8585             UNI(OP_SCALAR);
8586
8587         case KEY_select:
8588             LOP(OP_SELECT,XTERM);
8589
8590         case KEY_seek:
8591             LOP(OP_SEEK,XTERM);
8592
8593         case KEY_semctl:
8594             LOP(OP_SEMCTL,XTERM);
8595
8596         case KEY_semget:
8597             LOP(OP_SEMGET,XTERM);
8598
8599         case KEY_semop:
8600             LOP(OP_SEMOP,XTERM);
8601
8602         case KEY_send:
8603             LOP(OP_SEND,XTERM);
8604
8605         case KEY_setpgrp:
8606             LOP(OP_SETPGRP,XTERM);
8607
8608         case KEY_setpriority:
8609             LOP(OP_SETPRIORITY,XTERM);
8610
8611         case KEY_sethostent:
8612             UNI(OP_SHOSTENT);
8613
8614         case KEY_setnetent:
8615             UNI(OP_SNETENT);
8616
8617         case KEY_setservent:
8618             UNI(OP_SSERVENT);
8619
8620         case KEY_setprotoent:
8621             UNI(OP_SPROTOENT);
8622
8623         case KEY_setpwent:
8624             FUN0(OP_SPWENT);
8625
8626         case KEY_setgrent:
8627             FUN0(OP_SGRENT);
8628
8629         case KEY_seekdir:
8630             LOP(OP_SEEKDIR,XTERM);
8631
8632         case KEY_setsockopt:
8633             LOP(OP_SSOCKOPT,XTERM);
8634
8635         case KEY_shift:
8636             UNIDOR(OP_SHIFT);
8637
8638         case KEY_shmctl:
8639             LOP(OP_SHMCTL,XTERM);
8640
8641         case KEY_shmget:
8642             LOP(OP_SHMGET,XTERM);
8643
8644         case KEY_shmread:
8645             LOP(OP_SHMREAD,XTERM);
8646
8647         case KEY_shmwrite:
8648             LOP(OP_SHMWRITE,XTERM);
8649
8650         case KEY_shutdown:
8651             LOP(OP_SHUTDOWN,XTERM);
8652
8653         case KEY_sin:
8654             UNI(OP_SIN);
8655
8656         case KEY_sleep:
8657             UNI(OP_SLEEP);
8658
8659         case KEY_socket:
8660             LOP(OP_SOCKET,XTERM);
8661
8662         case KEY_socketpair:
8663             LOP(OP_SOCKPAIR,XTERM);
8664
8665         case KEY_sort:
8666             checkcomma(s,PL_tokenbuf,"subroutine name");
8667             s = SKIPSPACE1(s);
8668             PL_expect = XTERM;
8669             s = force_word(s,WORD,TRUE,TRUE);
8670             LOP(OP_SORT,XREF);
8671
8672         case KEY_split:
8673             LOP(OP_SPLIT,XTERM);
8674
8675         case KEY_sprintf:
8676             LOP(OP_SPRINTF,XTERM);
8677
8678         case KEY_splice:
8679             LOP(OP_SPLICE,XTERM);
8680
8681         case KEY_sqrt:
8682             UNI(OP_SQRT);
8683
8684         case KEY_srand:
8685             UNI(OP_SRAND);
8686
8687         case KEY_stat:
8688             UNI(OP_STAT);
8689
8690         case KEY_study:
8691             UNI(OP_STUDY);
8692
8693         case KEY_substr:
8694             LOP(OP_SUBSTR,XTERM);
8695
8696         case KEY_format:
8697         case KEY_sub:
8698           really_sub:
8699             {
8700                 char * const tmpbuf = PL_tokenbuf + 1;
8701                 expectation attrful;
8702                 bool have_name, have_proto;
8703                 const int key = tmp;
8704 #ifndef PERL_MAD
8705                 SV *format_name = NULL;
8706 #endif
8707
8708 #ifdef PERL_MAD
8709                 SV *tmpwhite = 0;
8710
8711                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8712                 SV *subtoken = PL_madskills
8713                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8714                    : NULL;
8715                 PL_thistoken = 0;
8716
8717                 d = s;
8718                 s = SKIPSPACE2(s,tmpwhite);
8719 #else
8720                 d = s;
8721                 s = skipspace(s);
8722 #endif
8723
8724                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8725                     (*s == ':' && s[1] == ':'))
8726                 {
8727 #ifdef PERL_MAD
8728                     SV *nametoke = NULL;
8729 #endif
8730
8731                     PL_expect = XBLOCK;
8732                     attrful = XATTRBLOCK;
8733                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8734                                   &len);
8735 #ifdef PERL_MAD
8736                     if (PL_madskills)
8737                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8738 #else
8739                     if (key == KEY_format)
8740                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8741 #endif
8742                     *PL_tokenbuf = '&';
8743                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8744                      || pad_findmy_pvn(
8745                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8746                         ) != NOT_IN_PAD)
8747                         sv_setpvn(PL_subname, tmpbuf, len);
8748                     else {
8749                         sv_setsv(PL_subname,PL_curstname);
8750                         sv_catpvs(PL_subname,"::");
8751                         sv_catpvn(PL_subname,tmpbuf,len);
8752                     }
8753                     if (SvUTF8(PL_linestr))
8754                         SvUTF8_on(PL_subname);
8755                     have_name = TRUE;
8756
8757
8758 #ifdef PERL_MAD
8759                     start_force(0);
8760                     CURMAD('X', nametoke);
8761                     CURMAD('_', tmpwhite);
8762                     force_ident_maybe_lex('&');
8763
8764                     s = SKIPSPACE2(d,tmpwhite);
8765 #else
8766                     s = skipspace(d);
8767 #endif
8768                 }
8769                 else {
8770                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8771                     {
8772                         *d = '\0';
8773                         /* diag_listed_as: Missing name in "%s sub" */
8774                         Perl_croak(aTHX_
8775                                   "Missing name in \"%s\"", PL_bufptr);
8776                     }
8777                     PL_expect = XTERMBLOCK;
8778                     attrful = XATTRTERM;
8779                     sv_setpvs(PL_subname,"?");
8780                     have_name = FALSE;
8781                 }
8782
8783                 if (key == KEY_format) {
8784 #ifdef PERL_MAD
8785                     PL_thistoken = subtoken;
8786                     s = d;
8787 #else
8788                     if (format_name) {
8789                         start_force(PL_curforce);
8790                         NEXTVAL_NEXTTOKE.opval
8791                             = (OP*)newSVOP(OP_CONST,0, format_name);
8792                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8793                         force_next(WORD);
8794                     }
8795 #endif
8796                     PREBLOCK(FORMAT);
8797                 }
8798
8799                 /* Look for a prototype */
8800                 if (*s == '(') {
8801                     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8802                     COPLINE_SET_FROM_MULTI_END;
8803                     if (!s)
8804                         Perl_croak(aTHX_ "Prototype not terminated");
8805                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8806                     have_proto = TRUE;
8807
8808 #ifdef PERL_MAD
8809                     start_force(0);
8810                     CURMAD('q', PL_thisopen);
8811                     CURMAD('_', tmpwhite);
8812                     CURMAD('=', PL_thisstuff);
8813                     CURMAD('Q', PL_thisclose);
8814                     NEXTVAL_NEXTTOKE.opval =
8815                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8816                     PL_lex_stuff = NULL;
8817                     force_next(THING);
8818
8819                     s = SKIPSPACE2(s,tmpwhite);
8820 #else
8821                     s = skipspace(s);
8822 #endif
8823                 }
8824                 else
8825                     have_proto = FALSE;
8826
8827                 if (*s == ':' && s[1] != ':')
8828                     PL_expect = attrful;
8829                 else if (*s != '{' && key == KEY_sub) {
8830                     if (!have_name)
8831                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8832                     else if (*s != ';' && *s != '}')
8833                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8834                 }
8835
8836 #ifdef PERL_MAD
8837                 start_force(0);
8838                 if (tmpwhite) {
8839                     if (PL_madskills)
8840                         curmad('^', newSVpvs(""));
8841                     CURMAD('_', tmpwhite);
8842                 }
8843                 force_next(0);
8844
8845                 PL_thistoken = subtoken;
8846                 PERL_UNUSED_VAR(have_proto);
8847 #else
8848                 if (have_proto) {
8849                     NEXTVAL_NEXTTOKE.opval =
8850                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8851                     PL_lex_stuff = NULL;
8852                     force_next(THING);
8853                 }
8854 #endif
8855                 if (!have_name) {
8856                     if (PL_curstash)
8857                         sv_setpvs(PL_subname, "__ANON__");
8858                     else
8859                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8860                     TOKEN(ANONSUB);
8861                 }
8862 #ifndef PERL_MAD
8863                 force_ident_maybe_lex('&');
8864 #endif
8865                 TOKEN(SUB);
8866             }
8867
8868         case KEY_system:
8869             LOP(OP_SYSTEM,XREF);
8870
8871         case KEY_symlink:
8872             LOP(OP_SYMLINK,XTERM);
8873
8874         case KEY_syscall:
8875             LOP(OP_SYSCALL,XTERM);
8876
8877         case KEY_sysopen:
8878             LOP(OP_SYSOPEN,XTERM);
8879
8880         case KEY_sysseek:
8881             LOP(OP_SYSSEEK,XTERM);
8882
8883         case KEY_sysread:
8884             LOP(OP_SYSREAD,XTERM);
8885
8886         case KEY_syswrite:
8887             LOP(OP_SYSWRITE,XTERM);
8888
8889         case KEY_tr:
8890         case KEY_y:
8891             s = scan_trans(s);
8892             TERM(sublex_start());
8893
8894         case KEY_tell:
8895             UNI(OP_TELL);
8896
8897         case KEY_telldir:
8898             UNI(OP_TELLDIR);
8899
8900         case KEY_tie:
8901             LOP(OP_TIE,XTERM);
8902
8903         case KEY_tied:
8904             UNI(OP_TIED);
8905
8906         case KEY_time:
8907             FUN0(OP_TIME);
8908
8909         case KEY_times:
8910             FUN0(OP_TMS);
8911
8912         case KEY_truncate:
8913             LOP(OP_TRUNCATE,XTERM);
8914
8915         case KEY_uc:
8916             UNI(OP_UC);
8917
8918         case KEY_ucfirst:
8919             UNI(OP_UCFIRST);
8920
8921         case KEY_untie:
8922             UNI(OP_UNTIE);
8923
8924         case KEY_until:
8925             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8926                 return REPORT(0);
8927             pl_yylval.ival = CopLINE(PL_curcop);
8928             OPERATOR(UNTIL);
8929
8930         case KEY_unless:
8931             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8932                 return REPORT(0);
8933             pl_yylval.ival = CopLINE(PL_curcop);
8934             OPERATOR(UNLESS);
8935
8936         case KEY_unlink:
8937             LOP(OP_UNLINK,XTERM);
8938
8939         case KEY_undef:
8940             UNIDOR(OP_UNDEF);
8941
8942         case KEY_unpack:
8943             LOP(OP_UNPACK,XTERM);
8944
8945         case KEY_utime:
8946             LOP(OP_UTIME,XTERM);
8947
8948         case KEY_umask:
8949             UNIDOR(OP_UMASK);
8950
8951         case KEY_unshift:
8952             LOP(OP_UNSHIFT,XTERM);
8953
8954         case KEY_use:
8955             s = tokenize_use(1, s);
8956             OPERATOR(USE);
8957
8958         case KEY_values:
8959             UNI(OP_VALUES);
8960
8961         case KEY_vec:
8962             LOP(OP_VEC,XTERM);
8963
8964         case KEY_when:
8965             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8966                 return REPORT(0);
8967             pl_yylval.ival = CopLINE(PL_curcop);
8968             Perl_ck_warner_d(aTHX_
8969                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8970                 "when is experimental");
8971             OPERATOR(WHEN);
8972
8973         case KEY_while:
8974             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8975                 return REPORT(0);
8976             pl_yylval.ival = CopLINE(PL_curcop);
8977             OPERATOR(WHILE);
8978
8979         case KEY_warn:
8980             PL_hints |= HINT_BLOCK_SCOPE;
8981             LOP(OP_WARN,XTERM);
8982
8983         case KEY_wait:
8984             FUN0(OP_WAIT);
8985
8986         case KEY_waitpid:
8987             LOP(OP_WAITPID,XTERM);
8988
8989         case KEY_wantarray:
8990             FUN0(OP_WANTARRAY);
8991
8992         case KEY_write:
8993             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8994              * we use the same number on EBCDIC */
8995             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8996             UNI(OP_ENTERWRITE);
8997
8998         case KEY_x:
8999             if (PL_expect == XOPERATOR) {
9000                 if (*s == '=' && !PL_lex_allbrackets &&
9001                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9002                     return REPORT(0);
9003                 Mop(OP_REPEAT);
9004             }
9005             check_uni();
9006             goto just_a_word;
9007
9008         case KEY_xor:
9009             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
9010                 return REPORT(0);
9011             pl_yylval.ival = OP_XOR;
9012             OPERATOR(OROP);
9013         }
9014     }}
9015 }
9016
9017 /*
9018   S_pending_ident
9019
9020   Looks up an identifier in the pad or in a package
9021
9022   Returns:
9023     PRIVATEREF if this is a lexical name.
9024     WORD       if this belongs to a package.
9025
9026   Structure:
9027       if we're in a my declaration
9028           croak if they tried to say my($foo::bar)
9029           build the ops for a my() declaration
9030       if it's an access to a my() variable
9031           build ops for access to a my() variable
9032       if in a dq string, and they've said @foo and we can't find @foo
9033           warn
9034       build ops for a bareword
9035 */
9036
9037 static int
9038 S_pending_ident(pTHX)
9039 {
9040     dVAR;
9041     PADOFFSET tmp = 0;
9042     const char pit = (char)pl_yylval.ival;
9043     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9044     /* All routes through this function want to know if there is a colon.  */
9045     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9046
9047     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9048           "### Pending identifier '%s'\n", PL_tokenbuf); });
9049
9050     /* if we're in a my(), we can't allow dynamics here.
9051        $foo'bar has already been turned into $foo::bar, so
9052        just check for colons.
9053
9054        if it's a legal name, the OP is a PADANY.
9055     */
9056     if (PL_in_my) {
9057         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9058             if (has_colon)
9059                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9060                                   "variable %s in \"our\"",
9061                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9062             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9063         }
9064         else {
9065             if (has_colon)
9066                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9067                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9068                             UTF ? SVf_UTF8 : 0);
9069
9070             pl_yylval.opval = newOP(OP_PADANY, 0);
9071             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9072                                                         UTF ? SVf_UTF8 : 0);
9073             return PRIVATEREF;
9074         }
9075     }
9076
9077     /*
9078        build the ops for accesses to a my() variable.
9079     */
9080
9081     if (!has_colon) {
9082         if (!PL_in_my)
9083             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9084                                     UTF ? SVf_UTF8 : 0);
9085         if (tmp != NOT_IN_PAD) {
9086             /* might be an "our" variable" */
9087             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9088                 /* build ops for a bareword */
9089                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9090                 HEK * const stashname = HvNAME_HEK(stash);
9091                 SV *  const sym = newSVhek(stashname);
9092                 sv_catpvs(sym, "::");
9093                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9094                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9095                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9096                 if (pit != '&')
9097                   gv_fetchsv(sym,
9098                     (PL_in_eval
9099                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9100                         : GV_ADDMULTI
9101                     ),
9102                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9103                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9104                      : SVt_PVHV));
9105                 return WORD;
9106             }
9107
9108             pl_yylval.opval = newOP(OP_PADANY, 0);
9109             pl_yylval.opval->op_targ = tmp;
9110             return PRIVATEREF;
9111         }
9112     }
9113
9114     /*
9115        Whine if they've said @foo in a doublequoted string,
9116        and @foo isn't a variable we can find in the symbol
9117        table.
9118     */
9119     if (ckWARN(WARN_AMBIGUOUS) &&
9120         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9121         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9122                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9123         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9124                 /* DO NOT warn for @- and @+ */
9125                 && !( PL_tokenbuf[2] == '\0' &&
9126                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9127            )
9128         {
9129             /* Downgraded from fatal to warning 20000522 mjd */
9130             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9131                         "Possible unintended interpolation of %"UTF8f
9132                         " in string",
9133                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9134         }
9135     }
9136
9137     /* build ops for a bareword */
9138     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9139                                    newSVpvn_flags(PL_tokenbuf + 1,
9140                                                       tokenbuf_len - 1,
9141                                                       UTF ? SVf_UTF8 : 0 ));
9142     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9143     if (pit != '&')
9144         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9145                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9146                      | ( UTF ? SVf_UTF8 : 0 ),
9147                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9148                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9149                       : SVt_PVHV));
9150     return WORD;
9151 }
9152
9153 STATIC void
9154 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9155 {
9156     dVAR;
9157
9158     PERL_ARGS_ASSERT_CHECKCOMMA;
9159
9160     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9161         if (ckWARN(WARN_SYNTAX)) {
9162             int level = 1;
9163             const char *w;
9164             for (w = s+2; *w && level; w++) {
9165                 if (*w == '(')
9166                     ++level;
9167                 else if (*w == ')')
9168                     --level;
9169             }
9170             while (isSPACE(*w))
9171                 ++w;
9172             /* the list of chars below is for end of statements or
9173              * block / parens, boolean operators (&&, ||, //) and branch
9174              * constructs (or, and, if, until, unless, while, err, for).
9175              * Not a very solid hack... */
9176             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9177                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9178                             "%s (...) interpreted as function",name);
9179         }
9180     }
9181     while (s < PL_bufend && isSPACE(*s))
9182         s++;
9183     if (*s == '(')
9184         s++;
9185     while (s < PL_bufend && isSPACE(*s))
9186         s++;
9187     if (isIDFIRST_lazy_if(s,UTF)) {
9188         const char * const w = s;
9189         s += UTF ? UTF8SKIP(s) : 1;
9190         while (isWORDCHAR_lazy_if(s,UTF))
9191             s += UTF ? UTF8SKIP(s) : 1;
9192         while (s < PL_bufend && isSPACE(*s))
9193             s++;
9194         if (*s == ',') {
9195             GV* gv;
9196             if (keyword(w, s - w, 0))
9197                 return;
9198
9199             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9200             if (gv && GvCVu(gv))
9201                 return;
9202             Perl_croak(aTHX_ "No comma allowed after %s", what);
9203         }
9204     }
9205 }
9206
9207 /* S_new_constant(): do any overload::constant lookup.
9208
9209    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9210    Best used as sv=new_constant(..., sv, ...).
9211    If s, pv are NULL, calls subroutine with one argument,
9212    and <type> is used with error messages only.
9213    <type> is assumed to be well formed UTF-8 */
9214
9215 STATIC SV *
9216 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9217                SV *sv, SV *pv, const char *type, STRLEN typelen)
9218 {
9219     dVAR; dSP;
9220     HV * table = GvHV(PL_hintgv);                /* ^H */
9221     SV *res;
9222     SV *errsv = NULL;
9223     SV **cvp;
9224     SV *cv, *typesv;
9225     const char *why1 = "", *why2 = "", *why3 = "";
9226
9227     PERL_ARGS_ASSERT_NEW_CONSTANT;
9228     /* We assume that this is true: */
9229     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9230     assert(type || s);
9231
9232     /* charnames doesn't work well if there have been errors found */
9233     if (PL_error_count > 0 && *key == 'c')
9234     {
9235         SvREFCNT_dec_NN(sv);
9236         return &PL_sv_undef;
9237     }
9238
9239     sv_2mortal(sv);                     /* Parent created it permanently */
9240     if (!table
9241         || ! (PL_hints & HINT_LOCALIZE_HH)
9242         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9243         || ! SvOK(*cvp))
9244     {
9245         char *msg;
9246
9247         /* Here haven't found what we're looking for.  If it is charnames,
9248          * perhaps it needs to be loaded.  Try doing that before giving up */
9249         if (*key == 'c') {
9250             Perl_load_module(aTHX_
9251                             0,
9252                             newSVpvs("_charnames"),
9253                              /* version parameter; no need to specify it, as if
9254                               * we get too early a version, will fail anyway,
9255                               * not being able to find '_charnames' */
9256                             NULL,
9257                             newSVpvs(":full"),
9258                             newSVpvs(":short"),
9259                             NULL);
9260             SPAGAIN;
9261             table = GvHV(PL_hintgv);
9262             if (table
9263                 && (PL_hints & HINT_LOCALIZE_HH)
9264                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9265                 && SvOK(*cvp))
9266             {
9267                 goto now_ok;
9268             }
9269         }
9270         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9271             msg = Perl_form(aTHX_
9272                                "Constant(%.*s) unknown",
9273                                 (int)(type ? typelen : len),
9274                                 (type ? type: s));
9275         }
9276         else {
9277             why1 = "$^H{";
9278             why2 = key;
9279             why3 = "} is not defined";
9280         report:
9281             if (*key == 'c') {
9282                 msg = Perl_form(aTHX_
9283                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9284                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9285                       );
9286             }
9287             else {
9288                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9289                                     (int)(type ? typelen : len),
9290                                     (type ? type: s), why1, why2, why3);
9291             }
9292         }
9293         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9294         return SvREFCNT_inc_simple_NN(sv);
9295     }
9296 now_ok:
9297     cv = *cvp;
9298     if (!pv && s)
9299         pv = newSVpvn_flags(s, len, SVs_TEMP);
9300     if (type && pv)
9301         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9302     else
9303         typesv = &PL_sv_undef;
9304
9305     PUSHSTACKi(PERLSI_OVERLOAD);
9306     ENTER ;
9307     SAVETMPS;
9308
9309     PUSHMARK(SP) ;
9310     EXTEND(sp, 3);
9311     if (pv)
9312         PUSHs(pv);
9313     PUSHs(sv);
9314     if (pv)
9315         PUSHs(typesv);
9316     PUTBACK;
9317     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9318
9319     SPAGAIN ;
9320
9321     /* Check the eval first */
9322     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9323         STRLEN errlen;
9324         const char * errstr;
9325         sv_catpvs(errsv, "Propagated");
9326         errstr = SvPV_const(errsv, errlen);
9327         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9328         (void)POPs;
9329         res = SvREFCNT_inc_simple_NN(sv);
9330     }
9331     else {
9332         res = POPs;
9333         SvREFCNT_inc_simple_void_NN(res);
9334     }
9335
9336     PUTBACK ;
9337     FREETMPS ;
9338     LEAVE ;
9339     POPSTACK;
9340
9341     if (!SvOK(res)) {
9342         why1 = "Call to &{$^H{";
9343         why2 = key;
9344         why3 = "}} did not return a defined value";
9345         sv = res;
9346         (void)sv_2mortal(sv);
9347         goto report;
9348     }
9349
9350     return res;
9351 }
9352
9353 PERL_STATIC_INLINE void
9354 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9355     dVAR;
9356     PERL_ARGS_ASSERT_PARSE_IDENT;
9357
9358     for (;;) {
9359         if (*d >= e)
9360             Perl_croak(aTHX_ "%s", ident_too_long);
9361         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9362              /* The UTF-8 case must come first, otherwise things
9363              * like c\N{COMBINING TILDE} would start failing, as the
9364              * isWORDCHAR_A case below would gobble the 'c' up.
9365              */
9366
9367             char *t = *s + UTF8SKIP(*s);
9368             while (isIDCONT_utf8((U8*)t))
9369                 t += UTF8SKIP(t);
9370             if (*d + (t - *s) > e)
9371                 Perl_croak(aTHX_ "%s", ident_too_long);
9372             Copy(*s, *d, t - *s, char);
9373             *d += t - *s;
9374             *s = t;
9375         }
9376         else if ( isWORDCHAR_A(**s) ) {
9377             do {
9378                 *(*d)++ = *(*s)++;
9379             } while (isWORDCHAR_A(**s) && *d < e);
9380         }
9381         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9382             *(*d)++ = ':';
9383             *(*d)++ = ':';
9384             (*s)++;
9385         }
9386         else if (allow_package && **s == ':' && (*s)[1] == ':'
9387            /* Disallow things like Foo::$bar. For the curious, this is
9388             * the code path that triggers the "Bad name after" warning
9389             * when looking for barewords.
9390             */
9391            && (*s)[2] != '$') {
9392             *(*d)++ = *(*s)++;
9393             *(*d)++ = *(*s)++;
9394         }
9395         else
9396             break;
9397     }
9398     return;
9399 }
9400
9401 /* Returns a NUL terminated string, with the length of the string written to
9402    *slp
9403    */
9404 STATIC char *
9405 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9406 {
9407     dVAR;
9408     char *d = dest;
9409     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9410     bool is_utf8 = cBOOL(UTF);
9411
9412     PERL_ARGS_ASSERT_SCAN_WORD;
9413
9414     parse_ident(&s, &d, e, allow_package, is_utf8);
9415     *d = '\0';
9416     *slp = d - dest;
9417     return s;
9418 }
9419
9420 STATIC char *
9421 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9422 {
9423     dVAR;
9424     I32 herelines = PL_parser->herelines;
9425     SSize_t bracket = -1;
9426     char funny = *s++;
9427     char *d = dest;
9428     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9429     bool is_utf8 = cBOOL(UTF);
9430     I32 orig_copline, tmp_copline = 0;
9431
9432     PERL_ARGS_ASSERT_SCAN_IDENT;
9433
9434     if (isSPACE(*s))
9435         s = PEEKSPACE(s);
9436     if (isDIGIT(*s)) {
9437         while (isDIGIT(*s)) {
9438             if (d >= e)
9439                 Perl_croak(aTHX_ "%s", ident_too_long);
9440             *d++ = *s++;
9441         }
9442     }
9443     else {
9444         parse_ident(&s, &d, e, 1, is_utf8);
9445     }
9446     *d = '\0';
9447     d = dest;
9448     if (*d) {
9449         /* Either a digit variable, or parse_ident() found an identifier
9450            (anything valid as a bareword), so job done and return.  */
9451         if (PL_lex_state != LEX_NORMAL)
9452             PL_lex_state = LEX_INTERPENDMAYBE;
9453         return s;
9454     }
9455     if (*s == '$' && s[1] &&
9456       (isIDFIRST_lazy_if(s+1,is_utf8)
9457          || isDIGIT_A((U8)s[1])
9458          || s[1] == '$'
9459          || s[1] == '{'
9460          || strnEQ(s+1,"::",2)) )
9461     {
9462         /* Dereferencing a value in a scalar variable.
9463            The alternatives are different syntaxes for a scalar variable.
9464            Using ' as a leading package separator isn't allowed. :: is.   */
9465         return s;
9466     }
9467     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9468     if (*s == '{') {
9469         bracket = s - SvPVX(PL_linestr);
9470         s++;
9471         orig_copline = CopLINE(PL_curcop);
9472         if (s < PL_bufend && isSPACE(*s)) {
9473             s = PEEKSPACE(s);
9474         }
9475     }
9476
9477 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9478  * iff Unicode semantics are to be used.  The legal ones are any of:
9479  *  a) ASCII digits
9480  *  b) ASCII punctuation
9481  *  c) When not under Unicode rules, any upper Latin1 character
9482  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9483  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
9484  *     the \s ones. */
9485 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
9486                                    || isDIGIT_A((U8)(d))                    \
9487                                    || (!(u) && !isASCII((U8)(d)))           \
9488                                    || ((((U8)(d)) < 32)                     \
9489                                        && (((((U8)(d)) >= 14)               \
9490                                            || (((U8)(d)) <= 8 && (d) != 0) \
9491                                            || (((U8)(d)) == 13))))          \
9492                                    || (((U8)(d)) == toCTRL('?')))
9493     if (s < PL_bufend
9494         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9495     {
9496         if ( isCNTRL_A((U8)*s) ) {
9497             deprecate("literal control characters in variable names");
9498         }
9499
9500         if (is_utf8) {
9501             const STRLEN skip = UTF8SKIP(s);
9502             STRLEN i;
9503             d[skip] = '\0';
9504             for ( i = 0; i < skip; i++ )
9505                 d[i] = *s++;
9506         }
9507         else {
9508             *d = *s++;
9509             d[1] = '\0';
9510         }
9511     }
9512     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9513     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9514         *d = toCTRL(*s);
9515         s++;
9516     }
9517     /* Warn about ambiguous code after unary operators if {...} notation isn't
9518        used.  There's no difference in ambiguity; it's merely a heuristic
9519        about when not to warn.  */
9520     else if (ck_uni && bracket == -1)
9521         check_uni();
9522     if (bracket != -1) {
9523         /* If we were processing {...} notation then...  */
9524         if (isIDFIRST_lazy_if(d,is_utf8)) {
9525             /* if it starts as a valid identifier, assume that it is one.
9526                (the later check for } being at the expected point will trap
9527                cases where this doesn't pan out.)  */
9528         d += is_utf8 ? UTF8SKIP(d) : 1;
9529         parse_ident(&s, &d, e, 1, is_utf8);
9530             *d = '\0';
9531             tmp_copline = CopLINE(PL_curcop);
9532             if (s < PL_bufend && isSPACE(*s)) {
9533                 s = PEEKSPACE(s);
9534             }
9535             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9536                 /* ${foo[0]} and ${foo{bar}} notation.  */
9537                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9538                     const char * const brack =
9539                         (const char *)
9540                         ((*s == '[') ? "[...]" : "{...}");
9541                     orig_copline = CopLINE(PL_curcop);
9542                     CopLINE_set(PL_curcop, tmp_copline);
9543    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9544                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9545                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9546                         funny, dest, brack, funny, dest, brack);
9547                     CopLINE_set(PL_curcop, orig_copline);
9548                 }
9549                 bracket++;
9550                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9551                 PL_lex_allbrackets++;
9552                 return s;
9553             }
9554         }
9555         /* Handle extended ${^Foo} variables
9556          * 1999-02-27 mjd-perl-patch@plover.com */
9557         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9558                  && isWORDCHAR(*s))
9559         {
9560             d++;
9561             while (isWORDCHAR(*s) && d < e) {
9562                 *d++ = *s++;
9563             }
9564             if (d >= e)
9565                 Perl_croak(aTHX_ "%s", ident_too_long);
9566             *d = '\0';
9567         }
9568
9569         if ( !tmp_copline )
9570             tmp_copline = CopLINE(PL_curcop);
9571         if (s < PL_bufend && isSPACE(*s)) {
9572             s = PEEKSPACE(s);
9573         }
9574
9575         /* Expect to find a closing } after consuming any trailing whitespace.
9576          */
9577         if (*s == '}') {
9578             s++;
9579             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9580                 PL_lex_state = LEX_INTERPEND;
9581                 PL_expect = XREF;
9582             }
9583             if (PL_lex_state == LEX_NORMAL) {
9584                 if (ckWARN(WARN_AMBIGUOUS) &&
9585                     (keyword(dest, d - dest, 0)
9586                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9587                 {
9588                     SV *tmp = newSVpvn_flags( dest, d - dest,
9589                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9590                     if (funny == '#')
9591                         funny = '@';
9592                     orig_copline = CopLINE(PL_curcop);
9593                     CopLINE_set(PL_curcop, tmp_copline);
9594                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9595                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9596                         funny, tmp, funny, tmp);
9597                     CopLINE_set(PL_curcop, orig_copline);
9598                 }
9599             }
9600         }
9601         else {
9602             /* Didn't find the closing } at the point we expected, so restore
9603                state such that the next thing to process is the opening { and */
9604             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9605             CopLINE_set(PL_curcop, orig_copline);
9606             PL_parser->herelines = herelines;
9607             *dest = '\0';
9608         }
9609     }
9610     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9611         PL_lex_state = LEX_INTERPEND;
9612     return s;
9613 }
9614
9615 static bool
9616 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9617
9618     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9619      * the parse starting at 's', based on the subset that are valid in this
9620      * context input to this routine in 'valid_flags'. Advances s.  Returns
9621      * TRUE if the input should be treated as a valid flag, so the next char
9622      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9623      * first call on the current regex.  This routine will set it to any
9624      * charset modifier found.  The caller shouldn't change it.  This way,
9625      * another charset modifier encountered in the parse can be detected as an
9626      * error, as we have decided to allow only one */
9627
9628     const char c = **s;
9629     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9630
9631     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9632         if (isWORDCHAR_lazy_if(*s, UTF)) {
9633             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9634                        UTF ? SVf_UTF8 : 0);
9635             (*s) += charlen;
9636             /* Pretend that it worked, so will continue processing before
9637              * dieing */
9638             return TRUE;
9639         }
9640         return FALSE;
9641     }
9642
9643     switch (c) {
9644
9645         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9646         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9647         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9648         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9649         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9650         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9651         case LOCALE_PAT_MOD:
9652             if (*charset) {
9653                 goto multiple_charsets;
9654             }
9655             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9656             *charset = c;
9657             break;
9658         case UNICODE_PAT_MOD:
9659             if (*charset) {
9660                 goto multiple_charsets;
9661             }
9662             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9663             *charset = c;
9664             break;
9665         case ASCII_RESTRICT_PAT_MOD:
9666             if (! *charset) {
9667                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9668             }
9669             else {
9670
9671                 /* Error if previous modifier wasn't an 'a', but if it was, see
9672                  * if, and accept, a second occurrence (only) */
9673                 if (*charset != 'a'
9674                     || get_regex_charset(*pmfl)
9675                         != REGEX_ASCII_RESTRICTED_CHARSET)
9676                 {
9677                         goto multiple_charsets;
9678                 }
9679                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9680             }
9681             *charset = c;
9682             break;
9683         case DEPENDS_PAT_MOD:
9684             if (*charset) {
9685                 goto multiple_charsets;
9686             }
9687             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9688             *charset = c;
9689             break;
9690     }
9691
9692     (*s)++;
9693     return TRUE;
9694
9695     multiple_charsets:
9696         if (*charset != c) {
9697             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9698         }
9699         else if (c == 'a') {
9700             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9701         }
9702         else {
9703             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9704         }
9705
9706         /* Pretend that it worked, so will continue processing before dieing */
9707         (*s)++;
9708         return TRUE;
9709 }
9710
9711 STATIC char *
9712 S_scan_pat(pTHX_ char *start, I32 type)
9713 {
9714     dVAR;
9715     PMOP *pm;
9716     char *s;
9717     const char * const valid_flags =
9718         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9719     char charset = '\0';    /* character set modifier */
9720 #ifdef PERL_MAD
9721     char *modstart;
9722 #endif
9723
9724     PERL_ARGS_ASSERT_SCAN_PAT;
9725
9726     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9727                        TRUE /* look for escaped bracketed metas */ );
9728
9729     if (!s) {
9730         const char * const delimiter = skipspace(start);
9731         Perl_croak(aTHX_
9732                    (const char *)
9733                    (*delimiter == '?'
9734                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9735                     : "Search pattern not terminated" ));
9736     }
9737
9738     pm = (PMOP*)newPMOP(type, 0);
9739     if (PL_multi_open == '?') {
9740         /* This is the only point in the code that sets PMf_ONCE:  */
9741         pm->op_pmflags |= PMf_ONCE;
9742
9743         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9744            allows us to restrict the list needed by reset to just the ??
9745            matches.  */
9746         assert(type != OP_TRANS);
9747         if (PL_curstash) {
9748             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9749             U32 elements;
9750             if (!mg) {
9751                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9752                                  0);
9753             }
9754             elements = mg->mg_len / sizeof(PMOP**);
9755             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9756             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9757             mg->mg_len = elements * sizeof(PMOP**);
9758             PmopSTASH_set(pm,PL_curstash);
9759         }
9760     }
9761 #ifdef PERL_MAD
9762     modstart = s;
9763 #endif
9764
9765     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9766      * anon CV. False positives like qr/[(?{]/ are harmless */
9767
9768     if (type == OP_QR) {
9769         STRLEN len;
9770         char *e, *p = SvPV(PL_lex_stuff, len);
9771         e = p + len;
9772         for (; p < e; p++) {
9773             if (p[0] == '(' && p[1] == '?'
9774                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9775             {
9776                 pm->op_pmflags |= PMf_HAS_CV;
9777                 break;
9778             }
9779         }
9780         pm->op_pmflags |= PMf_IS_QR;
9781     }
9782
9783     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9784 #ifdef PERL_MAD
9785     if (PL_madskills && modstart != s) {
9786         SV* tmptoken = newSVpvn(modstart, s - modstart);
9787         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9788     }
9789 #endif
9790     /* issue a warning if /c is specified,but /g is not */
9791     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9792     {
9793         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9794                        "Use of /c modifier is meaningless without /g" );
9795     }
9796
9797     PL_lex_op = (OP*)pm;
9798     pl_yylval.ival = OP_MATCH;
9799     return s;
9800 }
9801
9802 STATIC char *
9803 S_scan_subst(pTHX_ char *start)
9804 {
9805     dVAR;
9806     char *s;
9807     PMOP *pm;
9808     I32 first_start;
9809     line_t first_line;
9810     I32 es = 0;
9811     char charset = '\0';    /* character set modifier */
9812 #ifdef PERL_MAD
9813     char *modstart;
9814 #endif
9815
9816     PERL_ARGS_ASSERT_SCAN_SUBST;
9817
9818     pl_yylval.ival = OP_NULL;
9819
9820     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9821                  TRUE /* look for escaped bracketed metas */ );
9822
9823     if (!s)
9824         Perl_croak(aTHX_ "Substitution pattern not terminated");
9825
9826     if (s[-1] == PL_multi_open)
9827         s--;
9828 #ifdef PERL_MAD
9829     if (PL_madskills) {
9830         CURMAD('q', PL_thisopen);
9831         CURMAD('_', PL_thiswhite);
9832         CURMAD('E', PL_thisstuff);
9833         CURMAD('Q', PL_thisclose);
9834         PL_realtokenstart = s - SvPVX(PL_linestr);
9835     }
9836 #endif
9837
9838     first_start = PL_multi_start;
9839     first_line = CopLINE(PL_curcop);
9840     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9841     if (!s) {
9842         if (PL_lex_stuff) {
9843             SvREFCNT_dec(PL_lex_stuff);
9844             PL_lex_stuff = NULL;
9845         }
9846         Perl_croak(aTHX_ "Substitution replacement not terminated");
9847     }
9848     PL_multi_start = first_start;       /* so whole substitution is taken together */
9849
9850     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9851
9852 #ifdef PERL_MAD
9853     if (PL_madskills) {
9854         CURMAD('z', PL_thisopen);
9855         CURMAD('R', PL_thisstuff);
9856         CURMAD('Z', PL_thisclose);
9857     }
9858     modstart = s;
9859 #endif
9860
9861     while (*s) {
9862         if (*s == EXEC_PAT_MOD) {
9863             s++;
9864             es++;
9865         }
9866         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9867         {
9868             break;
9869         }
9870     }
9871
9872 #ifdef PERL_MAD
9873     if (PL_madskills) {
9874         if (modstart != s)
9875             curmad('m', newSVpvn(modstart, s - modstart));
9876         append_madprops(PL_thismad, (OP*)pm, 0);
9877         PL_thismad = 0;
9878     }
9879 #endif
9880     if ((pm->op_pmflags & PMf_CONTINUE)) {
9881         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9882     }
9883
9884     if (es) {
9885         SV * const repl = newSVpvs("");
9886
9887         PL_multi_end = 0;
9888         pm->op_pmflags |= PMf_EVAL;
9889         while (es-- > 0) {
9890             if (es)
9891                 sv_catpvs(repl, "eval ");
9892             else
9893                 sv_catpvs(repl, "do ");
9894         }
9895         sv_catpvs(repl, "{");
9896         sv_catsv(repl, PL_sublex_info.repl);
9897         sv_catpvs(repl, "}");
9898         SvEVALED_on(repl);
9899         SvREFCNT_dec(PL_sublex_info.repl);
9900         PL_sublex_info.repl = repl;
9901     }
9902     if (CopLINE(PL_curcop) != first_line) {
9903         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9904         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9905             CopLINE(PL_curcop) - first_line;
9906         CopLINE_set(PL_curcop, first_line);
9907     }
9908
9909     PL_lex_op = (OP*)pm;
9910     pl_yylval.ival = OP_SUBST;
9911     return s;
9912 }
9913
9914 STATIC char *
9915 S_scan_trans(pTHX_ char *start)
9916 {
9917     dVAR;
9918     char* s;
9919     OP *o;
9920     U8 squash;
9921     U8 del;
9922     U8 complement;
9923     bool nondestruct = 0;
9924 #ifdef PERL_MAD
9925     char *modstart;
9926 #endif
9927
9928     PERL_ARGS_ASSERT_SCAN_TRANS;
9929
9930     pl_yylval.ival = OP_NULL;
9931
9932     s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9933     if (!s)
9934         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9935
9936     if (s[-1] == PL_multi_open)
9937         s--;
9938 #ifdef PERL_MAD
9939     if (PL_madskills) {
9940         CURMAD('q', PL_thisopen);
9941         CURMAD('_', PL_thiswhite);
9942         CURMAD('E', PL_thisstuff);
9943         CURMAD('Q', PL_thisclose);
9944         PL_realtokenstart = s - SvPVX(PL_linestr);
9945     }
9946 #endif
9947
9948     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9949     if (!s) {
9950         if (PL_lex_stuff) {
9951             SvREFCNT_dec(PL_lex_stuff);
9952             PL_lex_stuff = NULL;
9953         }
9954         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9955     }
9956     if (PL_madskills) {
9957         CURMAD('z', PL_thisopen);
9958         CURMAD('R', PL_thisstuff);
9959         CURMAD('Z', PL_thisclose);
9960     }
9961
9962     complement = del = squash = 0;
9963 #ifdef PERL_MAD
9964     modstart = s;
9965 #endif
9966     while (1) {
9967         switch (*s) {
9968         case 'c':
9969             complement = OPpTRANS_COMPLEMENT;
9970             break;
9971         case 'd':
9972             del = OPpTRANS_DELETE;
9973             break;
9974         case 's':
9975             squash = OPpTRANS_SQUASH;
9976             break;
9977         case 'r':
9978             nondestruct = 1;
9979             break;
9980         default:
9981             goto no_more;
9982         }
9983         s++;
9984     }
9985   no_more:
9986
9987     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9988     o->op_private &= ~OPpTRANS_ALL;
9989     o->op_private |= del|squash|complement|
9990       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9991       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9992
9993     PL_lex_op = o;
9994     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9995
9996 #ifdef PERL_MAD
9997     if (PL_madskills) {
9998         if (modstart != s)
9999             curmad('m', newSVpvn(modstart, s - modstart));
10000         append_madprops(PL_thismad, o, 0);
10001         PL_thismad = 0;
10002     }
10003 #endif
10004
10005     return s;
10006 }
10007
10008 /* scan_heredoc
10009    Takes a pointer to the first < in <<FOO.
10010    Returns a pointer to the byte following <<FOO.
10011
10012    This function scans a heredoc, which involves different methods
10013    depending on whether we are in a string eval, quoted construct, etc.
10014    This is because PL_linestr could containing a single line of input, or
10015    a whole string being evalled, or the contents of the current quote-
10016    like operator.
10017
10018    The two basic methods are:
10019     - Steal lines from the input stream
10020     - Scan the heredoc in PL_linestr and remove it therefrom
10021
10022    In a file scope or filtered eval, the first method is used; in a
10023    string eval, the second.
10024
10025    In a quote-like operator, we have to choose between the two,
10026    depending on where we can find a newline.  We peek into outer lex-
10027    ing scopes until we find one with a newline in it.  If we reach the
10028    outermost lexing scope and it is a file, we use the stream method.
10029    Otherwise it is treated as an eval.
10030 */
10031
10032 STATIC char *
10033 S_scan_heredoc(pTHX_ char *s)
10034 {
10035     dVAR;
10036     I32 op_type = OP_SCALAR;
10037     I32 len;
10038     SV *tmpstr;
10039     char term;
10040     char *d;
10041     char *e;
10042     char *peek;
10043     const bool infile = PL_rsfp || PL_parser->filtered;
10044     const line_t origline = CopLINE(PL_curcop);
10045     LEXSHARED *shared = PL_parser->lex_shared;
10046 #ifdef PERL_MAD
10047     I32 stuffstart = s - SvPVX(PL_linestr);
10048     char *tstart;
10049
10050     PL_realtokenstart = -1;
10051 #endif
10052
10053     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10054
10055     s += 2;
10056     d = PL_tokenbuf + 1;
10057     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10058     *PL_tokenbuf = '\n';
10059     peek = s;
10060     while (SPACE_OR_TAB(*peek))
10061         peek++;
10062     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10063         s = peek;
10064         term = *s++;
10065         s = delimcpy(d, e, s, PL_bufend, term, &len);
10066         if (s == PL_bufend)
10067             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10068         d += len;
10069         s++;
10070     }
10071     else {
10072         if (*s == '\\')
10073             /* <<\FOO is equivalent to <<'FOO' */
10074             s++, term = '\'';
10075         else
10076             term = '"';
10077         if (!isWORDCHAR_lazy_if(s,UTF))
10078             deprecate("bare << to mean <<\"\"");
10079         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10080             if (d < e)
10081                 *d++ = *s;
10082         }
10083     }
10084     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10085         Perl_croak(aTHX_ "Delimiter for here document is too long");
10086     *d++ = '\n';
10087     *d = '\0';
10088     len = d - PL_tokenbuf;
10089
10090 #ifdef PERL_MAD
10091     if (PL_madskills) {
10092         tstart = PL_tokenbuf + 1;
10093         PL_thisclose = newSVpvn(tstart, len - 1);
10094         tstart = SvPVX(PL_linestr) + stuffstart;
10095         PL_thisopen = newSVpvn(tstart, s - tstart);
10096         stuffstart = s - SvPVX(PL_linestr);
10097     }
10098 #endif
10099 #ifndef PERL_STRICT_CR
10100     d = strchr(s, '\r');
10101     if (d) {
10102         char * const olds = s;
10103         s = d;
10104         while (s < PL_bufend) {
10105             if (*s == '\r') {
10106                 *d++ = '\n';
10107                 if (*++s == '\n')
10108                     s++;
10109             }
10110             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10111                 *d++ = *s++;
10112                 s++;
10113             }
10114             else
10115                 *d++ = *s++;
10116         }
10117         *d = '\0';
10118         PL_bufend = d;
10119         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10120         s = olds;
10121     }
10122 #endif
10123 #ifdef PERL_MAD
10124     if (PL_madskills) {
10125         tstart = SvPVX(PL_linestr) + stuffstart;
10126         if (PL_thisstuff)
10127             sv_catpvn(PL_thisstuff, tstart, s - tstart);
10128         else
10129             PL_thisstuff = newSVpvn(tstart, s - tstart);
10130     }
10131
10132     stuffstart = s - SvPVX(PL_linestr);
10133 #endif
10134
10135     tmpstr = newSV_type(SVt_PVIV);
10136     SvGROW(tmpstr, 80);
10137     if (term == '\'') {
10138         op_type = OP_CONST;
10139         SvIV_set(tmpstr, -1);
10140     }
10141     else if (term == '`') {
10142         op_type = OP_BACKTICK;
10143         SvIV_set(tmpstr, '\\');
10144     }
10145
10146     PL_multi_start = origline + 1 + PL_parser->herelines;
10147     PL_multi_open = PL_multi_close = '<';
10148     /* inside a string eval or quote-like operator */
10149     if (!infile || PL_lex_inwhat) {
10150         SV *linestr;
10151         char *bufend;
10152         char * const olds = s;
10153         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10154         /* These two fields are not set until an inner lexing scope is
10155            entered.  But we need them set here. */
10156         shared->ls_bufptr  = s;
10157         shared->ls_linestr = PL_linestr;
10158         if (PL_lex_inwhat)
10159           /* Look for a newline.  If the current buffer does not have one,
10160              peek into the line buffer of the parent lexing scope, going
10161              up as many levels as necessary to find one with a newline
10162              after bufptr.
10163            */
10164           while (!(s = (char *)memchr(
10165                     (void *)shared->ls_bufptr, '\n',
10166                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10167                 ))) {
10168             shared = shared->ls_prev;
10169             /* shared is only null if we have gone beyond the outermost
10170                lexing scope.  In a file, we will have broken out of the
10171                loop in the previous iteration.  In an eval, the string buf-
10172                fer ends with "\n;", so the while condition above will have
10173                evaluated to false.  So shared can never be null. */
10174             assert(shared);
10175             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10176                most lexing scope.  In a file, shared->ls_linestr at that
10177                level is just one line, so there is no body to steal. */
10178             if (infile && !shared->ls_prev) {
10179                 s = olds;
10180                 goto streaming;
10181             }
10182           }
10183         else {  /* eval */
10184             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10185             assert(s);
10186         }
10187         linestr = shared->ls_linestr;
10188         bufend = SvEND(linestr);
10189         d = s;
10190         while (s < bufend - len + 1 &&
10191           memNE(s,PL_tokenbuf,len) ) {
10192             if (*s++ == '\n')
10193                 ++PL_parser->herelines;
10194         }
10195         if (s >= bufend - len + 1) {
10196             goto interminable;
10197         }
10198         sv_setpvn(tmpstr,d+1,s-d);
10199 #ifdef PERL_MAD
10200         if (PL_madskills) {
10201             if (PL_thisstuff)
10202                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10203             else
10204                 PL_thisstuff = newSVpvn(d + 1, s - d);
10205             stuffstart = s - SvPVX(PL_linestr);
10206         }
10207 #endif
10208         s += len - 1;
10209         /* the preceding stmt passes a newline */
10210         PL_parser->herelines++;
10211
10212         /* s now points to the newline after the heredoc terminator.
10213            d points to the newline before the body of the heredoc.
10214          */
10215
10216         /* We are going to modify linestr in place here, so set
10217            aside copies of the string if necessary for re-evals or
10218            (caller $n)[6]. */
10219         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10220            check shared->re_eval_str. */
10221         if (shared->re_eval_start || shared->re_eval_str) {
10222             /* Set aside the rest of the regexp */
10223             if (!shared->re_eval_str)
10224                 shared->re_eval_str =
10225                        newSVpvn(shared->re_eval_start,
10226                                 bufend - shared->re_eval_start);
10227             shared->re_eval_start -= s-d;
10228         }
10229         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10230             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10231             cx->blk_eval.cur_text == linestr)
10232         {
10233             cx->blk_eval.cur_text = newSVsv(linestr);
10234             SvSCREAM_on(cx->blk_eval.cur_text);
10235         }
10236         /* Copy everything from s onwards back to d. */
10237         Move(s,d,bufend-s + 1,char);
10238         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10239         /* Setting PL_bufend only applies when we have not dug deeper
10240            into other scopes, because sublex_done sets PL_bufend to
10241            SvEND(PL_linestr). */
10242         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10243         s = olds;
10244     }
10245     else
10246     {
10247       SV *linestr_save;
10248      streaming:
10249       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10250       term = PL_tokenbuf[1];
10251       len--;
10252       linestr_save = PL_linestr; /* must restore this afterwards */
10253       d = s;                     /* and this */
10254       PL_linestr = newSVpvs("");
10255       PL_bufend = SvPVX(PL_linestr);
10256       while (1) {
10257 #ifdef PERL_MAD
10258         if (PL_madskills) {
10259             tstart = SvPVX(PL_linestr) + stuffstart;
10260             if (PL_thisstuff)
10261                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10262             else
10263                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10264         }
10265 #endif
10266         PL_bufptr = PL_bufend;
10267         CopLINE_set(PL_curcop,
10268                     origline + 1 + PL_parser->herelines);
10269         if (!lex_next_chunk(LEX_NO_TERM)
10270          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10271             SvREFCNT_dec(linestr_save);
10272             goto interminable;
10273         }
10274         CopLINE_set(PL_curcop, origline);
10275         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10276             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10277             /* ^That should be enough to avoid this needing to grow:  */
10278             sv_catpvs(PL_linestr, "\n\0");
10279             assert(s == SvPVX(PL_linestr));
10280             PL_bufend = SvEND(PL_linestr);
10281         }
10282         s = PL_bufptr;
10283 #ifdef PERL_MAD
10284         stuffstart = s - SvPVX(PL_linestr);
10285 #endif
10286         PL_parser->herelines++;
10287         PL_last_lop = PL_last_uni = NULL;
10288 #ifndef PERL_STRICT_CR
10289         if (PL_bufend - PL_linestart >= 2) {
10290             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10291                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10292             {
10293                 PL_bufend[-2] = '\n';
10294                 PL_bufend--;
10295                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10296             }
10297             else if (PL_bufend[-1] == '\r')
10298                 PL_bufend[-1] = '\n';
10299         }
10300         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10301             PL_bufend[-1] = '\n';
10302 #endif
10303         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10304             SvREFCNT_dec(PL_linestr);
10305             PL_linestr = linestr_save;
10306             PL_linestart = SvPVX(linestr_save);
10307             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10308             s = d;
10309             break;
10310         }
10311         else {
10312             sv_catsv(tmpstr,PL_linestr);
10313         }
10314       }
10315     }
10316     PL_multi_end = origline + PL_parser->herelines;
10317     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10318         SvPV_shrink_to_cur(tmpstr);
10319     }
10320     if (!IN_BYTES) {
10321         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10322             SvUTF8_on(tmpstr);
10323         else if (PL_encoding)
10324             sv_recode_to_utf8(tmpstr, PL_encoding);
10325     }
10326     PL_lex_stuff = tmpstr;
10327     pl_yylval.ival = op_type;
10328     return s;
10329
10330   interminable:
10331     SvREFCNT_dec(tmpstr);
10332     CopLINE_set(PL_curcop, origline);
10333     missingterm(PL_tokenbuf + 1);
10334 }
10335
10336 /* scan_inputsymbol
10337    takes: current position in input buffer
10338    returns: new position in input buffer
10339    side-effects: pl_yylval and lex_op are set.
10340
10341    This code handles:
10342
10343    <>           read from ARGV
10344    <FH>         read from filehandle
10345    <pkg::FH>    read from package qualified filehandle
10346    <pkg'FH>     read from package qualified filehandle
10347    <$fh>        read from filehandle in $fh
10348    <*.h>        filename glob
10349
10350 */
10351
10352 STATIC char *
10353 S_scan_inputsymbol(pTHX_ char *start)
10354 {
10355     dVAR;
10356     char *s = start;            /* current position in buffer */
10357     char *end;
10358     I32 len;
10359     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10360     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10361
10362     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10363
10364     end = strchr(s, '\n');
10365     if (!end)
10366         end = PL_bufend;
10367     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10368
10369     /* die if we didn't have space for the contents of the <>,
10370        or if it didn't end, or if we see a newline
10371     */
10372
10373     if (len >= (I32)sizeof PL_tokenbuf)
10374         Perl_croak(aTHX_ "Excessively long <> operator");
10375     if (s >= end)
10376         Perl_croak(aTHX_ "Unterminated <> operator");
10377
10378     s++;
10379
10380     /* check for <$fh>
10381        Remember, only scalar variables are interpreted as filehandles by
10382        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10383        treated as a glob() call.
10384        This code makes use of the fact that except for the $ at the front,
10385        a scalar variable and a filehandle look the same.
10386     */
10387     if (*d == '$' && d[1]) d++;
10388
10389     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10390     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10391         d += UTF ? UTF8SKIP(d) : 1;
10392
10393     /* If we've tried to read what we allow filehandles to look like, and
10394        there's still text left, then it must be a glob() and not a getline.
10395        Use scan_str to pull out the stuff between the <> and treat it
10396        as nothing more than a string.
10397     */
10398
10399     if (d - PL_tokenbuf != len) {
10400         pl_yylval.ival = OP_GLOB;
10401         s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10402         if (!s)
10403            Perl_croak(aTHX_ "Glob not terminated");
10404         return s;
10405     }
10406     else {
10407         bool readline_overriden = FALSE;
10408         GV *gv_readline;
10409         GV **gvp;
10410         /* we're in a filehandle read situation */
10411         d = PL_tokenbuf;
10412
10413         /* turn <> into <ARGV> */
10414         if (!len)
10415             Copy("ARGV",d,5,char);
10416
10417         /* Check whether readline() is overriden */
10418         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10419         if ((gv_readline
10420                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10421                 ||
10422                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10423                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10424                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10425             readline_overriden = TRUE;
10426
10427         /* if <$fh>, create the ops to turn the variable into a
10428            filehandle
10429         */
10430         if (*d == '$') {
10431             /* try to find it in the pad for this block, otherwise find
10432                add symbol table ops
10433             */
10434             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10435             if (tmp != NOT_IN_PAD) {
10436                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10437                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10438                     HEK * const stashname = HvNAME_HEK(stash);
10439                     SV * const sym = sv_2mortal(newSVhek(stashname));
10440                     sv_catpvs(sym, "::");
10441                     sv_catpv(sym, d+1);
10442                     d = SvPVX(sym);
10443                     goto intro_sym;
10444                 }
10445                 else {
10446                     OP * const o = newOP(OP_PADSV, 0);
10447                     o->op_targ = tmp;
10448                     PL_lex_op = readline_overriden
10449                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10450                                 op_append_elem(OP_LIST, o,
10451                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10452                         : (OP*)newUNOP(OP_READLINE, 0, o);
10453                 }
10454             }
10455             else {
10456                 GV *gv;
10457                 ++d;
10458 intro_sym:
10459                 gv = gv_fetchpv(d,
10460                                 (PL_in_eval
10461                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10462                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10463                                 SVt_PV);
10464                 PL_lex_op = readline_overriden
10465                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10466                             op_append_elem(OP_LIST,
10467                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10468                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10469                     : (OP*)newUNOP(OP_READLINE, 0,
10470                             newUNOP(OP_RV2SV, 0,
10471                                 newGVOP(OP_GV, 0, gv)));
10472             }
10473             if (!readline_overriden)
10474                 PL_lex_op->op_flags |= OPf_SPECIAL;
10475             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10476             pl_yylval.ival = OP_NULL;
10477         }
10478
10479         /* If it's none of the above, it must be a literal filehandle
10480            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10481         else {
10482             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10483             PL_lex_op = readline_overriden
10484                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10485                         op_append_elem(OP_LIST,
10486                             newGVOP(OP_GV, 0, gv),
10487                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10488                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10489             pl_yylval.ival = OP_NULL;
10490         }
10491     }
10492
10493     return s;
10494 }
10495
10496
10497 /* scan_str
10498    takes:
10499         start                   position in buffer
10500         keep_quoted             preserve \ on the embedded delimiter(s)
10501         keep_delims             preserve the delimiters around the string
10502         re_reparse              compiling a run-time /(?{})/:
10503                                    collapse // to /,  and skip encoding src
10504         deprecate_escaped_meta  issue a deprecation warning for cer-
10505                                 tain paired metacharacters that appear
10506                                 escaped within it
10507    returns: position to continue reading from buffer
10508    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10509         updates the read buffer.
10510
10511    This subroutine pulls a string out of the input.  It is called for:
10512         q               single quotes           q(literal text)
10513         '               single quotes           'literal text'
10514         qq              double quotes           qq(interpolate $here please)
10515         "               double quotes           "interpolate $here please"
10516         qx              backticks               qx(/bin/ls -l)
10517         `               backticks               `/bin/ls -l`
10518         qw              quote words             @EXPORT_OK = qw( func() $spam )
10519         m//             regexp match            m/this/
10520         s///            regexp substitute       s/this/that/
10521         tr///           string transliterate    tr/this/that/
10522         y///            string transliterate    y/this/that/
10523         ($*@)           sub prototypes          sub foo ($)
10524         (stuff)         sub attr parameters     sub foo : attr(stuff)
10525         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10526
10527    In most of these cases (all but <>, patterns and transliterate)
10528    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10529    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10530    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10531    calls scan_str().
10532
10533    It skips whitespace before the string starts, and treats the first
10534    character as the delimiter.  If the delimiter is one of ([{< then
10535    the corresponding "close" character )]}> is used as the closing
10536    delimiter.  It allows quoting of delimiters, and if the string has
10537    balanced delimiters ([{<>}]) it allows nesting.
10538
10539    On success, the SV with the resulting string is put into lex_stuff or,
10540    if that is already non-NULL, into lex_repl. The second case occurs only
10541    when parsing the RHS of the special constructs s/// and tr/// (y///).
10542    For convenience, the terminating delimiter character is stuffed into
10543    SvIVX of the SV.
10544 */
10545
10546 STATIC char *
10547 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10548                  bool deprecate_escaped_meta
10549     )
10550 {
10551     dVAR;
10552     SV *sv;                     /* scalar value: string */
10553     const char *tmps;           /* temp string, used for delimiter matching */
10554     char *s = start;            /* current position in the buffer */
10555     char term;                  /* terminating character */
10556     char *to;                   /* current position in the sv's data */
10557     I32 brackets = 1;           /* bracket nesting level */
10558     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10559     I32 termcode;               /* terminating char. code */
10560     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10561     STRLEN termlen;             /* length of terminating string */
10562     int last_off = 0;           /* last position for nesting bracket */
10563     char *escaped_open = NULL;
10564     line_t herelines;
10565 #ifdef PERL_MAD
10566     int stuffstart;
10567     char *tstart;
10568 #endif
10569
10570     PERL_ARGS_ASSERT_SCAN_STR;
10571
10572     /* skip space before the delimiter */
10573     if (isSPACE(*s)) {
10574         s = PEEKSPACE(s);
10575     }
10576
10577 #ifdef PERL_MAD
10578     if (PL_realtokenstart >= 0) {
10579         stuffstart = PL_realtokenstart;
10580         PL_realtokenstart = -1;
10581     }
10582     else
10583         stuffstart = start - SvPVX(PL_linestr);
10584 #endif
10585     /* mark where we are, in case we need to report errors */
10586     CLINE;
10587
10588     /* after skipping whitespace, the next character is the terminator */
10589     term = *s;
10590     if (!UTF) {
10591         termcode = termstr[0] = term;
10592         termlen = 1;
10593     }
10594     else {
10595         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10596         Copy(s, termstr, termlen, U8);
10597         if (!UTF8_IS_INVARIANT(term))
10598             has_utf8 = TRUE;
10599     }
10600
10601     /* mark where we are */
10602     PL_multi_start = CopLINE(PL_curcop);
10603     PL_multi_open = term;
10604     herelines = PL_parser->herelines;
10605
10606     /* find corresponding closing delimiter */
10607     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10608         termcode = termstr[0] = term = tmps[5];
10609
10610     PL_multi_close = term;
10611
10612     /* A warning is raised if the input parameter requires it for escaped (by a
10613      * backslash) paired metacharacters {} [] and () when the delimiters are
10614      * those same characters, and the backslash is ineffective.  This doesn't
10615      * happen for <>, as they aren't metas. */
10616     if (deprecate_escaped_meta
10617         && (PL_multi_open == PL_multi_close
10618             || PL_multi_open == '<'
10619             || ! ckWARN_d(WARN_DEPRECATED)))
10620     {
10621         deprecate_escaped_meta = FALSE;
10622     }
10623
10624     /* create a new SV to hold the contents.  79 is the SV's initial length.
10625        What a random number. */
10626     sv = newSV_type(SVt_PVIV);
10627     SvGROW(sv, 80);
10628     SvIV_set(sv, termcode);
10629     (void)SvPOK_only(sv);               /* validate pointer */
10630
10631     /* move past delimiter and try to read a complete string */
10632     if (keep_delims)
10633         sv_catpvn(sv, s, termlen);
10634     s += termlen;
10635 #ifdef PERL_MAD
10636     tstart = SvPVX(PL_linestr) + stuffstart;
10637     if (PL_madskills && !PL_thisopen && !keep_delims) {
10638         PL_thisopen = newSVpvn(tstart, s - tstart);
10639         stuffstart = s - SvPVX(PL_linestr);
10640     }
10641 #endif
10642     for (;;) {
10643         if (PL_encoding && !UTF && !re_reparse) {
10644             bool cont = TRUE;
10645
10646             while (cont) {
10647                 int offset = s - SvPVX_const(PL_linestr);
10648                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10649                                            &offset, (char*)termstr, termlen);
10650                 const char *ns;
10651                 char *svlast;
10652
10653                 if (SvIsCOW(PL_linestr)) {
10654                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10655                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10656                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10657                     char *buf = SvPVX(PL_linestr);
10658                     bufend_pos = PL_parser->bufend - buf;
10659                     bufptr_pos = PL_parser->bufptr - buf;
10660                     oldbufptr_pos = PL_parser->oldbufptr - buf;
10661                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10662                     linestart_pos = PL_parser->linestart - buf;
10663                     last_uni_pos = PL_parser->last_uni
10664                         ? PL_parser->last_uni - buf
10665                         : 0;
10666                     last_lop_pos = PL_parser->last_lop
10667                         ? PL_parser->last_lop - buf
10668                         : 0;
10669                     re_eval_start_pos =
10670                         PL_parser->lex_shared->re_eval_start ?
10671                             PL_parser->lex_shared->re_eval_start - buf : 0;
10672                     s_pos = s - buf;
10673
10674                     sv_force_normal(PL_linestr);
10675
10676                     buf = SvPVX(PL_linestr);
10677                     PL_parser->bufend = buf + bufend_pos;
10678                     PL_parser->bufptr = buf + bufptr_pos;
10679                     PL_parser->oldbufptr = buf + oldbufptr_pos;
10680                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10681                     PL_parser->linestart = buf + linestart_pos;
10682                     if (PL_parser->last_uni)
10683                         PL_parser->last_uni = buf + last_uni_pos;
10684                     if (PL_parser->last_lop)
10685                         PL_parser->last_lop = buf + last_lop_pos;
10686                     if (PL_parser->lex_shared->re_eval_start)
10687                         PL_parser->lex_shared->re_eval_start  =
10688                             buf + re_eval_start_pos;
10689                     s = buf + s_pos;
10690                 }
10691                 ns = SvPVX_const(PL_linestr) + offset;
10692                 svlast = SvEND(sv) - 1;
10693
10694                 for (; s < ns; s++) {
10695                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10696                         COPLINE_INC_WITH_HERELINES;
10697                 }
10698                 if (!found)
10699                     goto read_more_line;
10700                 else {
10701                     /* handle quoted delimiters */
10702                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10703                         const char *t;
10704                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10705                             t--;
10706                         if ((svlast-1 - t) % 2) {
10707                             if (!keep_quoted) {
10708                                 *(svlast-1) = term;
10709                                 *svlast = '\0';
10710                                 SvCUR_set(sv, SvCUR(sv) - 1);
10711                             }
10712                             continue;
10713                         }
10714                     }
10715                     if (PL_multi_open == PL_multi_close) {
10716                         cont = FALSE;
10717                     }
10718                     else {
10719                         const char *t;
10720                         char *w;
10721                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10722                             /* At here, all closes are "was quoted" one,
10723                                so we don't check PL_multi_close. */
10724                             if (*t == '\\') {
10725                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10726                                     t++;
10727                                 else
10728                                     *w++ = *t++;
10729                             }
10730                             else if (*t == PL_multi_open)
10731                                 brackets++;
10732
10733                             *w = *t;
10734                         }
10735                         if (w < t) {
10736                             *w++ = term;
10737                             *w = '\0';
10738                             SvCUR_set(sv, w - SvPVX_const(sv));
10739                         }
10740                         last_off = w - SvPVX(sv);
10741                         if (--brackets <= 0)
10742                             cont = FALSE;
10743                     }
10744                 }
10745             }
10746             if (!keep_delims) {
10747                 SvCUR_set(sv, SvCUR(sv) - 1);
10748                 *SvEND(sv) = '\0';
10749             }
10750             break;
10751         }
10752
10753         /* extend sv if need be */
10754         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10755         /* set 'to' to the next character in the sv's string */
10756         to = SvPVX(sv)+SvCUR(sv);
10757
10758         /* if open delimiter is the close delimiter read unbridle */
10759         if (PL_multi_open == PL_multi_close) {
10760             for (; s < PL_bufend; s++,to++) {
10761                 /* embedded newlines increment the current line number */
10762                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10763                     COPLINE_INC_WITH_HERELINES;
10764                 /* handle quoted delimiters */
10765                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10766                     if (!keep_quoted
10767                         && (s[1] == term
10768                             || (re_reparse && s[1] == '\\'))
10769                     )
10770                         s++;
10771                     /* any other quotes are simply copied straight through */
10772                     else
10773                         *to++ = *s++;
10774                 }
10775                 /* terminate when run out of buffer (the for() condition), or
10776                    have found the terminator */
10777                 else if (*s == term) {
10778                     if (termlen == 1)
10779                         break;
10780                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10781                         break;
10782                 }
10783                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10784                     has_utf8 = TRUE;
10785                 *to = *s;
10786             }
10787         }
10788
10789         /* if the terminator isn't the same as the start character (e.g.,
10790            matched brackets), we have to allow more in the quoting, and
10791            be prepared for nested brackets.
10792         */
10793         else {
10794             /* read until we run out of string, or we find the terminator */
10795             for (; s < PL_bufend; s++,to++) {
10796                 /* embedded newlines increment the line count */
10797                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10798                     COPLINE_INC_WITH_HERELINES;
10799                 /* backslashes can escape the open or closing characters */
10800                 if (*s == '\\' && s+1 < PL_bufend) {
10801                     if (!keep_quoted &&
10802                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10803                     {
10804                         s++;
10805
10806                         /* Here, 'deprecate_escaped_meta' is true iff the
10807                          * delimiters are paired metacharacters, and 's' points
10808                          * to an occurrence of one of them within the string,
10809                          * which was preceded by a backslash.  If this is a
10810                          * context where the delimiter is also a metacharacter,
10811                          * the backslash is useless, and deprecated.  () and []
10812                          * are meta in any context. {} are meta only when
10813                          * appearing in a quantifier or in things like '\p{'
10814                          * (but '\\p{' isn't meta).  They also aren't meta
10815                          * unless there is a matching closed, escaped char
10816                          * later on within the string.  If 's' points to an
10817                          * open, set a flag; if to a close, test that flag, and
10818                          * raise a warning if it was set */
10819
10820                         if (deprecate_escaped_meta) {
10821                             if (*s == PL_multi_open) {
10822                                 if (*s != '{') {
10823                                     escaped_open = s;
10824                                 }
10825                                      /* Look for a closing '\}' */
10826                                 else if (regcurly(s, TRUE)) {
10827                                     escaped_open = s;
10828                                 }
10829                                      /* Look for e.g.  '\x{' */
10830                                 else if (s - start > 2
10831                                          && _generic_isCC(*(s-2),
10832                                              _CC_BACKSLASH_FOO_LBRACE_IS_META))
10833                                 { /* Exclude '\\x', '\\\\x', etc. */
10834                                     char *lookbehind = s - 4;
10835                                     bool is_meta = TRUE;
10836                                     while (lookbehind >= start
10837                                            && *lookbehind == '\\')
10838                                     {
10839                                         is_meta = ! is_meta;
10840                                         lookbehind--;
10841                                     }
10842                                     if (is_meta) {
10843                                         escaped_open = s;
10844                                     }
10845                                 }
10846                             }
10847                             else if (escaped_open) {
10848                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10849                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10850                                 escaped_open = NULL;
10851                             }
10852                         }
10853                     }
10854                     else
10855                         *to++ = *s++;
10856                 }
10857                 /* allow nested opens and closes */
10858                 else if (*s == PL_multi_close && --brackets <= 0)
10859                     break;
10860                 else if (*s == PL_multi_open)
10861                     brackets++;
10862                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10863                     has_utf8 = TRUE;
10864                 *to = *s;
10865             }
10866         }
10867         /* terminate the copied string and update the sv's end-of-string */
10868         *to = '\0';
10869         SvCUR_set(sv, to - SvPVX_const(sv));
10870
10871         /*
10872          * this next chunk reads more into the buffer if we're not done yet
10873          */
10874
10875         if (s < PL_bufend)
10876             break;              /* handle case where we are done yet :-) */
10877
10878 #ifndef PERL_STRICT_CR
10879         if (to - SvPVX_const(sv) >= 2) {
10880             if ((to[-2] == '\r' && to[-1] == '\n') ||
10881                 (to[-2] == '\n' && to[-1] == '\r'))
10882             {
10883                 to[-2] = '\n';
10884                 to--;
10885                 SvCUR_set(sv, to - SvPVX_const(sv));
10886             }
10887             else if (to[-1] == '\r')
10888                 to[-1] = '\n';
10889         }
10890         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10891             to[-1] = '\n';
10892 #endif
10893
10894      read_more_line:
10895         /* if we're out of file, or a read fails, bail and reset the current
10896            line marker so we can report where the unterminated string began
10897         */
10898 #ifdef PERL_MAD
10899         if (PL_madskills) {
10900             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10901             if (PL_thisstuff)
10902                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10903             else
10904                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10905         }
10906 #endif
10907         COPLINE_INC_WITH_HERELINES;
10908         PL_bufptr = PL_bufend;
10909         if (!lex_next_chunk(0)) {
10910             sv_free(sv);
10911             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10912             return NULL;
10913         }
10914         s = PL_bufptr;
10915 #ifdef PERL_MAD
10916         stuffstart = 0;
10917 #endif
10918     }
10919
10920     /* at this point, we have successfully read the delimited string */
10921
10922     if (!PL_encoding || UTF || re_reparse) {
10923 #ifdef PERL_MAD
10924         if (PL_madskills) {
10925             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10926             const int len = s - tstart;
10927             if (PL_thisstuff)
10928                 sv_catpvn(PL_thisstuff, tstart, len);
10929             else
10930                 PL_thisstuff = newSVpvn(tstart, len);
10931             if (!PL_thisclose && !keep_delims)
10932                 PL_thisclose = newSVpvn(s,termlen);
10933         }
10934 #endif
10935
10936         if (keep_delims)
10937             sv_catpvn(sv, s, termlen);
10938         s += termlen;
10939     }
10940 #ifdef PERL_MAD
10941     else {
10942         if (PL_madskills) {
10943             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10944             const int len = s - tstart - termlen;
10945             if (PL_thisstuff)
10946                 sv_catpvn(PL_thisstuff, tstart, len);
10947             else
10948                 PL_thisstuff = newSVpvn(tstart, len);
10949             if (!PL_thisclose && !keep_delims)
10950                 PL_thisclose = newSVpvn(s - termlen,termlen);
10951         }
10952     }
10953 #endif
10954     if (has_utf8 || (PL_encoding && !re_reparse))
10955         SvUTF8_on(sv);
10956
10957     PL_multi_end = CopLINE(PL_curcop);
10958     CopLINE_set(PL_curcop, PL_multi_start);
10959     PL_parser->herelines = herelines;
10960
10961     /* if we allocated too much space, give some back */
10962     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10963         SvLEN_set(sv, SvCUR(sv) + 1);
10964         SvPV_renew(sv, SvLEN(sv));
10965     }
10966
10967     /* decide whether this is the first or second quoted string we've read
10968        for this op
10969     */
10970
10971     if (PL_lex_stuff)
10972         PL_sublex_info.repl = sv;
10973     else
10974         PL_lex_stuff = sv;
10975     return s;
10976 }
10977
10978 /*
10979   scan_num
10980   takes: pointer to position in buffer
10981   returns: pointer to new position in buffer
10982   side-effects: builds ops for the constant in pl_yylval.op
10983
10984   Read a number in any of the formats that Perl accepts:
10985
10986   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10987   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10988   0b[01](_?[01])*
10989   0[0-7](_?[0-7])*
10990   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10991
10992   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10993   thing it reads.
10994
10995   If it reads a number without a decimal point or an exponent, it will
10996   try converting the number to an integer and see if it can do so
10997   without loss of precision.
10998 */
10999
11000 char *
11001 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11002 {
11003     dVAR;
11004     const char *s = start;      /* current position in buffer */
11005     char *d;                    /* destination in temp buffer */
11006     char *e;                    /* end of temp buffer */
11007     NV nv;                              /* number read, as a double */
11008     SV *sv = NULL;                      /* place to put the converted number */
11009     bool floatit;                       /* boolean: int or float? */
11010     const char *lastub = NULL;          /* position of last underbar */
11011     static const char* const number_too_long = "Number too long";
11012
11013     PERL_ARGS_ASSERT_SCAN_NUM;
11014
11015     /* We use the first character to decide what type of number this is */
11016
11017     switch (*s) {
11018     default:
11019         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11020
11021     /* if it starts with a 0, it could be an octal number, a decimal in
11022        0.13 disguise, or a hexadecimal number, or a binary number. */
11023     case '0':
11024         {
11025           /* variables:
11026              u          holds the "number so far"
11027              shift      the power of 2 of the base
11028                         (hex == 4, octal == 3, binary == 1)
11029              overflowed was the number more than we can hold?
11030
11031              Shift is used when we add a digit.  It also serves as an "are
11032              we in octal/hex/binary?" indicator to disallow hex characters
11033              when in octal mode.
11034            */
11035             NV n = 0.0;
11036             UV u = 0;
11037             I32 shift;
11038             bool overflowed = FALSE;
11039             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11040             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11041             static const char* const bases[5] =
11042               { "", "binary", "", "octal", "hexadecimal" };
11043             static const char* const Bases[5] =
11044               { "", "Binary", "", "Octal", "Hexadecimal" };
11045             static const char* const maxima[5] =
11046               { "",
11047                 "0b11111111111111111111111111111111",
11048                 "",
11049                 "037777777777",
11050                 "0xffffffff" };
11051             const char *base, *Base, *max;
11052
11053             /* check for hex */
11054             if (s[1] == 'x' || s[1] == 'X') {
11055                 shift = 4;
11056                 s += 2;
11057                 just_zero = FALSE;
11058             } else if (s[1] == 'b' || s[1] == 'B') {
11059                 shift = 1;
11060                 s += 2;
11061                 just_zero = FALSE;
11062             }
11063             /* check for a decimal in disguise */
11064             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11065                 goto decimal;
11066             /* so it must be octal */
11067             else {
11068                 shift = 3;
11069                 s++;
11070             }
11071
11072             if (*s == '_') {
11073                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11074                                "Misplaced _ in number");
11075                lastub = s++;
11076             }
11077
11078             base = bases[shift];
11079             Base = Bases[shift];
11080             max  = maxima[shift];
11081
11082             /* read the rest of the number */
11083             for (;;) {
11084                 /* x is used in the overflow test,
11085                    b is the digit we're adding on. */
11086                 UV x, b;
11087
11088                 switch (*s) {
11089
11090                 /* if we don't mention it, we're done */
11091                 default:
11092                     goto out;
11093
11094                 /* _ are ignored -- but warned about if consecutive */
11095                 case '_':
11096                     if (lastub && s == lastub + 1)
11097                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11098                                        "Misplaced _ in number");
11099                     lastub = s++;
11100                     break;
11101
11102                 /* 8 and 9 are not octal */
11103                 case '8': case '9':
11104                     if (shift == 3)
11105                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11106                     /* FALL THROUGH */
11107
11108                 /* octal digits */
11109                 case '2': case '3': case '4':
11110                 case '5': case '6': case '7':
11111                     if (shift == 1)
11112                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11113                     /* FALL THROUGH */
11114
11115                 case '0': case '1':
11116                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11117                     goto digit;
11118
11119                 /* hex digits */
11120                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11121                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11122                     /* make sure they said 0x */
11123                     if (shift != 4)
11124                         goto out;
11125                     b = (*s++ & 7) + 9;
11126
11127                     /* Prepare to put the digit we have onto the end
11128                        of the number so far.  We check for overflows.
11129                     */
11130
11131                   digit:
11132                     just_zero = FALSE;
11133                     if (!overflowed) {
11134                         x = u << shift; /* make room for the digit */
11135
11136                         if ((x >> shift) != u
11137                             && !(PL_hints & HINT_NEW_BINARY)) {
11138                             overflowed = TRUE;
11139                             n = (NV) u;
11140                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11141                                              "Integer overflow in %s number",
11142                                              base);
11143                         } else
11144                             u = x | b;          /* add the digit to the end */
11145                     }
11146                     if (overflowed) {
11147                         n *= nvshift[shift];
11148                         /* If an NV has not enough bits in its
11149                          * mantissa to represent an UV this summing of
11150                          * small low-order numbers is a waste of time
11151                          * (because the NV cannot preserve the
11152                          * low-order bits anyway): we could just
11153                          * remember when did we overflow and in the
11154                          * end just multiply n by the right
11155                          * amount. */
11156                         n += (NV) b;
11157                     }
11158                     break;
11159                 }
11160             }
11161
11162           /* if we get here, we had success: make a scalar value from
11163              the number.
11164           */
11165           out:
11166
11167             /* final misplaced underbar check */
11168             if (s[-1] == '_') {
11169                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11170             }
11171
11172             if (overflowed) {
11173                 if (n > 4294967295.0)
11174                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11175                                    "%s number > %s non-portable",
11176                                    Base, max);
11177                 sv = newSVnv(n);
11178             }
11179             else {
11180 #if UVSIZE > 4
11181                 if (u > 0xffffffff)
11182                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11183                                    "%s number > %s non-portable",
11184                                    Base, max);
11185 #endif
11186                 sv = newSVuv(u);
11187             }
11188             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11189                 sv = new_constant(start, s - start, "integer",
11190                                   sv, NULL, NULL, 0);
11191             else if (PL_hints & HINT_NEW_BINARY)
11192                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11193         }
11194         break;
11195
11196     /*
11197       handle decimal numbers.
11198       we're also sent here when we read a 0 as the first digit
11199     */
11200     case '1': case '2': case '3': case '4': case '5':
11201     case '6': case '7': case '8': case '9': case '.':
11202       decimal:
11203         d = PL_tokenbuf;
11204         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11205         floatit = FALSE;
11206
11207         /* read next group of digits and _ and copy into d */
11208         while (isDIGIT(*s) || *s == '_') {
11209             /* skip underscores, checking for misplaced ones
11210                if -w is on
11211             */
11212             if (*s == '_') {
11213                 if (lastub && s == lastub + 1)
11214                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11215                                    "Misplaced _ in number");
11216                 lastub = s++;
11217             }
11218             else {
11219                 /* check for end of fixed-length buffer */
11220                 if (d >= e)
11221                     Perl_croak(aTHX_ "%s", number_too_long);
11222                 /* if we're ok, copy the character */
11223                 *d++ = *s++;
11224             }
11225         }
11226
11227         /* final misplaced underbar check */
11228         if (lastub && s == lastub + 1) {
11229             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11230         }
11231
11232         /* read a decimal portion if there is one.  avoid
11233            3..5 being interpreted as the number 3. followed
11234            by .5
11235         */
11236         if (*s == '.' && s[1] != '.') {
11237             floatit = TRUE;
11238             *d++ = *s++;
11239
11240             if (*s == '_') {
11241                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11242                                "Misplaced _ in number");
11243                 lastub = s;
11244             }
11245
11246             /* copy, ignoring underbars, until we run out of digits.
11247             */
11248             for (; isDIGIT(*s) || *s == '_'; s++) {
11249                 /* fixed length buffer check */
11250                 if (d >= e)
11251                     Perl_croak(aTHX_ "%s", number_too_long);
11252                 if (*s == '_') {
11253                    if (lastub && s == lastub + 1)
11254                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11255                                       "Misplaced _ in number");
11256                    lastub = s;
11257                 }
11258                 else
11259                     *d++ = *s;
11260             }
11261             /* fractional part ending in underbar? */
11262             if (s[-1] == '_') {
11263                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11264                                "Misplaced _ in number");
11265             }
11266             if (*s == '.' && isDIGIT(s[1])) {
11267                 /* oops, it's really a v-string, but without the "v" */
11268                 s = start;
11269                 goto vstring;
11270             }
11271         }
11272
11273         /* read exponent part, if present */
11274         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11275             floatit = TRUE;
11276             s++;
11277
11278             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11279             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11280
11281             /* stray preinitial _ */
11282             if (*s == '_') {
11283                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11284                                "Misplaced _ in number");
11285                 lastub = s++;
11286             }
11287
11288             /* allow positive or negative exponent */
11289             if (*s == '+' || *s == '-')
11290                 *d++ = *s++;
11291
11292             /* stray initial _ */
11293             if (*s == '_') {
11294                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11295                                "Misplaced _ in number");
11296                 lastub = s++;
11297             }
11298
11299             /* read digits of exponent */
11300             while (isDIGIT(*s) || *s == '_') {
11301                 if (isDIGIT(*s)) {
11302                     if (d >= e)
11303                         Perl_croak(aTHX_ "%s", number_too_long);
11304                     *d++ = *s++;
11305                 }
11306                 else {
11307                    if (((lastub && s == lastub + 1) ||
11308                         (!isDIGIT(s[1]) && s[1] != '_')))
11309                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11310                                       "Misplaced _ in number");
11311                    lastub = s++;
11312                 }
11313             }
11314         }
11315
11316
11317         /*
11318            We try to do an integer conversion first if no characters
11319            indicating "float" have been found.
11320          */
11321
11322         if (!floatit) {
11323             UV uv;
11324             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11325
11326             if (flags == IS_NUMBER_IN_UV) {
11327               if (uv <= IV_MAX)
11328                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11329               else
11330                 sv = newSVuv(uv);
11331             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11332               if (uv <= (UV) IV_MIN)
11333                 sv = newSViv(-(IV)uv);
11334               else
11335                 floatit = TRUE;
11336             } else
11337               floatit = TRUE;
11338         }
11339         if (floatit) {
11340             /* terminate the string */
11341             *d = '\0';
11342             nv = Atof(PL_tokenbuf);
11343             sv = newSVnv(nv);
11344         }
11345
11346         if ( floatit
11347              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11348             const char *const key = floatit ? "float" : "integer";
11349             const STRLEN keylen = floatit ? 5 : 7;
11350             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11351                                 key, keylen, sv, NULL, NULL, 0);
11352         }
11353         break;
11354
11355     /* if it starts with a v, it could be a v-string */
11356     case 'v':
11357 vstring:
11358                 sv = newSV(5); /* preallocate storage space */
11359                 ENTER_with_name("scan_vstring");
11360                 SAVEFREESV(sv);
11361                 s = scan_vstring(s, PL_bufend, sv);
11362                 SvREFCNT_inc_simple_void_NN(sv);
11363                 LEAVE_with_name("scan_vstring");
11364         break;
11365     }
11366
11367     /* make the op for the constant and return */
11368
11369     if (sv)
11370         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11371     else
11372         lvalp->opval = NULL;
11373
11374     return (char *)s;
11375 }
11376
11377 STATIC char *
11378 S_scan_formline(pTHX_ char *s)
11379 {
11380     dVAR;
11381     char *eol;
11382     char *t;
11383     SV * const stuff = newSVpvs("");
11384     bool needargs = FALSE;
11385     bool eofmt = FALSE;
11386 #ifdef PERL_MAD
11387     char *tokenstart = s;
11388     SV* savewhite = NULL;
11389
11390     if (PL_madskills) {
11391         savewhite = PL_thiswhite;
11392         PL_thiswhite = 0;
11393     }
11394 #endif
11395
11396     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11397
11398     while (!needargs) {
11399         if (*s == '.') {
11400             t = s+1;
11401 #ifdef PERL_STRICT_CR
11402             while (SPACE_OR_TAB(*t))
11403                 t++;
11404 #else
11405             while (SPACE_OR_TAB(*t) || *t == '\r')
11406                 t++;
11407 #endif
11408             if (*t == '\n' || t == PL_bufend) {
11409                 eofmt = TRUE;
11410                 break;
11411             }
11412         }
11413         eol = (char *) memchr(s,'\n',PL_bufend-s);
11414         if (!eol++)
11415                 eol = PL_bufend;
11416         if (*s != '#') {
11417             for (t = s; t < eol; t++) {
11418                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11419                     needargs = FALSE;
11420                     goto enough;        /* ~~ must be first line in formline */
11421                 }
11422                 if (*t == '@' || *t == '^')
11423                     needargs = TRUE;
11424             }
11425             if (eol > s) {
11426                 sv_catpvn(stuff, s, eol-s);
11427 #ifndef PERL_STRICT_CR
11428                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11429                     char *end = SvPVX(stuff) + SvCUR(stuff);
11430                     end[-2] = '\n';
11431                     end[-1] = '\0';
11432                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11433                 }
11434 #endif
11435             }
11436             else
11437               break;
11438         }
11439         s = (char*)eol;
11440         if ((PL_rsfp || PL_parser->filtered)
11441          && PL_parser->form_lex_state == LEX_NORMAL) {
11442             bool got_some;
11443 #ifdef PERL_MAD
11444             if (PL_madskills) {
11445                 if (PL_thistoken)
11446                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11447                 else
11448                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11449             }
11450 #endif
11451             PL_bufptr = PL_bufend;
11452             COPLINE_INC_WITH_HERELINES;
11453             got_some = lex_next_chunk(0);
11454             CopLINE_dec(PL_curcop);
11455             s = PL_bufptr;
11456 #ifdef PERL_MAD
11457             tokenstart = PL_bufptr;
11458 #endif
11459             if (!got_some)
11460                 break;
11461         }
11462         incline(s);
11463     }
11464   enough:
11465     if (!SvCUR(stuff) || needargs)
11466         PL_lex_state = PL_parser->form_lex_state;
11467     if (SvCUR(stuff)) {
11468         PL_expect = XSTATE;
11469         if (needargs) {
11470             start_force(PL_curforce);
11471             NEXTVAL_NEXTTOKE.ival = 0;
11472             force_next(FORMLBRACK);
11473         }
11474         if (!IN_BYTES) {
11475             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11476                 SvUTF8_on(stuff);
11477             else if (PL_encoding)
11478                 sv_recode_to_utf8(stuff, PL_encoding);
11479         }
11480         start_force(PL_curforce);
11481         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11482         force_next(THING);
11483     }
11484     else {
11485         SvREFCNT_dec(stuff);
11486         if (eofmt)
11487             PL_lex_formbrack = 0;
11488     }
11489 #ifdef PERL_MAD
11490     if (PL_madskills) {
11491         if (PL_thistoken)
11492             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11493         else
11494             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11495         PL_thiswhite = savewhite;
11496     }
11497 #endif
11498     return s;
11499 }
11500
11501 I32
11502 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11503 {
11504     dVAR;
11505     const I32 oldsavestack_ix = PL_savestack_ix;
11506     CV* const outsidecv = PL_compcv;
11507
11508     SAVEI32(PL_subline);
11509     save_item(PL_subname);
11510     SAVESPTR(PL_compcv);
11511
11512     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11513     CvFLAGS(PL_compcv) |= flags;
11514
11515     PL_subline = CopLINE(PL_curcop);
11516     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11517     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11518     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11519     if (outsidecv && CvPADLIST(outsidecv))
11520         CvPADLIST(PL_compcv)->xpadl_outid =
11521             PadlistNAMES(CvPADLIST(outsidecv));
11522
11523     return oldsavestack_ix;
11524 }
11525
11526 static int
11527 S_yywarn(pTHX_ const char *const s, U32 flags)
11528 {
11529     dVAR;
11530
11531     PERL_ARGS_ASSERT_YYWARN;
11532
11533     PL_in_eval |= EVAL_WARNONLY;
11534     yyerror_pv(s, flags);
11535     PL_in_eval &= ~EVAL_WARNONLY;
11536     return 0;
11537 }
11538
11539 int
11540 Perl_yyerror(pTHX_ const char *const s)
11541 {
11542     PERL_ARGS_ASSERT_YYERROR;
11543     return yyerror_pvn(s, strlen(s), 0);
11544 }
11545
11546 int
11547 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11548 {
11549     PERL_ARGS_ASSERT_YYERROR_PV;
11550     return yyerror_pvn(s, strlen(s), flags);
11551 }
11552
11553 int
11554 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11555 {
11556     dVAR;
11557     const char *context = NULL;
11558     int contlen = -1;
11559     SV *msg;
11560     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11561     int yychar  = PL_parser->yychar;
11562
11563     PERL_ARGS_ASSERT_YYERROR_PVN;
11564
11565     if (!yychar || (yychar == ';' && !PL_rsfp))
11566         sv_catpvs(where_sv, "at EOF");
11567     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11568       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11569       PL_oldbufptr != PL_bufptr) {
11570         /*
11571                 Only for NetWare:
11572                 The code below is removed for NetWare because it abends/crashes on NetWare
11573                 when the script has error such as not having the closing quotes like:
11574                     if ($var eq "value)
11575                 Checking of white spaces is anyway done in NetWare code.
11576         */
11577 #ifndef NETWARE
11578         while (isSPACE(*PL_oldoldbufptr))
11579             PL_oldoldbufptr++;
11580 #endif
11581         context = PL_oldoldbufptr;
11582         contlen = PL_bufptr - PL_oldoldbufptr;
11583     }
11584     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11585       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11586         /*
11587                 Only for NetWare:
11588                 The code below is removed for NetWare because it abends/crashes on NetWare
11589                 when the script has error such as not having the closing quotes like:
11590                     if ($var eq "value)
11591                 Checking of white spaces is anyway done in NetWare code.
11592         */
11593 #ifndef NETWARE
11594         while (isSPACE(*PL_oldbufptr))
11595             PL_oldbufptr++;
11596 #endif
11597         context = PL_oldbufptr;
11598         contlen = PL_bufptr - PL_oldbufptr;
11599     }
11600     else if (yychar > 255)
11601         sv_catpvs(where_sv, "next token ???");
11602     else if (yychar == -2) { /* YYEMPTY */
11603         if (PL_lex_state == LEX_NORMAL ||
11604            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11605             sv_catpvs(where_sv, "at end of line");
11606         else if (PL_lex_inpat)
11607             sv_catpvs(where_sv, "within pattern");
11608         else
11609             sv_catpvs(where_sv, "within string");
11610     }
11611     else {
11612         sv_catpvs(where_sv, "next char ");
11613         if (yychar < 32)
11614             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11615         else if (isPRINT_LC(yychar)) {
11616             const char string = yychar;
11617             sv_catpvn(where_sv, &string, 1);
11618         }
11619         else
11620             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11621     }
11622     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11623     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11624         OutCopFILE(PL_curcop),
11625         (IV)(PL_parser->preambling == NOLINE
11626                ? CopLINE(PL_curcop)
11627                : PL_parser->preambling));
11628     if (context)
11629         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11630                              UTF8fARG(UTF, contlen, context));
11631     else
11632         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11633     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11634         Perl_sv_catpvf(aTHX_ msg,
11635         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11636                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11637         PL_multi_end = 0;
11638     }
11639     if (PL_in_eval & EVAL_WARNONLY) {
11640         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11641     }
11642     else
11643         qerror(msg);
11644     if (PL_error_count >= 10) {
11645         SV * errsv;
11646         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11647             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11648                        SVfARG(errsv), OutCopFILE(PL_curcop));
11649         else
11650             Perl_croak(aTHX_ "%s has too many errors.\n",
11651             OutCopFILE(PL_curcop));
11652     }
11653     PL_in_my = 0;
11654     PL_in_my_stash = NULL;
11655     return 0;
11656 }
11657
11658 STATIC char*
11659 S_swallow_bom(pTHX_ U8 *s)
11660 {
11661     dVAR;
11662     const STRLEN slen = SvCUR(PL_linestr);
11663
11664     PERL_ARGS_ASSERT_SWALLOW_BOM;
11665
11666     switch (s[0]) {
11667     case 0xFF:
11668         if (s[1] == 0xFE) {
11669             /* UTF-16 little-endian? (or UTF-32LE?) */
11670             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11671                 /* diag_listed_as: Unsupported script encoding %s */
11672                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11673 #ifndef PERL_NO_UTF16_FILTER
11674             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11675             s += 2;
11676             if (PL_bufend > (char*)s) {
11677                 s = add_utf16_textfilter(s, TRUE);
11678             }
11679 #else
11680             /* diag_listed_as: Unsupported script encoding %s */
11681             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11682 #endif
11683         }
11684         break;
11685     case 0xFE:
11686         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11687 #ifndef PERL_NO_UTF16_FILTER
11688             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11689             s += 2;
11690             if (PL_bufend > (char *)s) {
11691                 s = add_utf16_textfilter(s, FALSE);
11692             }
11693 #else
11694             /* diag_listed_as: Unsupported script encoding %s */
11695             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11696 #endif
11697         }
11698         break;
11699     case BOM_UTF8_FIRST_BYTE: {
11700         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11701         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11702             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11703             s += len + 1;                      /* UTF-8 */
11704         }
11705         break;
11706     }
11707     case 0:
11708         if (slen > 3) {
11709              if (s[1] == 0) {
11710                   if (s[2] == 0xFE && s[3] == 0xFF) {
11711                        /* UTF-32 big-endian */
11712                        /* diag_listed_as: Unsupported script encoding %s */
11713                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11714                   }
11715              }
11716              else if (s[2] == 0 && s[3] != 0) {
11717                   /* Leading bytes
11718                    * 00 xx 00 xx
11719                    * are a good indicator of UTF-16BE. */
11720 #ifndef PERL_NO_UTF16_FILTER
11721                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11722                   s = add_utf16_textfilter(s, FALSE);
11723 #else
11724                   /* diag_listed_as: Unsupported script encoding %s */
11725                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11726 #endif
11727              }
11728         }
11729
11730     default:
11731          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11732                   /* Leading bytes
11733                    * xx 00 xx 00
11734                    * are a good indicator of UTF-16LE. */
11735 #ifndef PERL_NO_UTF16_FILTER
11736               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11737               s = add_utf16_textfilter(s, TRUE);
11738 #else
11739               /* diag_listed_as: Unsupported script encoding %s */
11740               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11741 #endif
11742          }
11743     }
11744     return (char*)s;
11745 }
11746
11747
11748 #ifndef PERL_NO_UTF16_FILTER
11749 static I32
11750 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11751 {
11752     dVAR;
11753     SV *const filter = FILTER_DATA(idx);
11754     /* We re-use this each time round, throwing the contents away before we
11755        return.  */
11756     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11757     SV *const utf8_buffer = filter;
11758     IV status = IoPAGE(filter);
11759     const bool reverse = cBOOL(IoLINES(filter));
11760     I32 retval;
11761
11762     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11763
11764     /* As we're automatically added, at the lowest level, and hence only called
11765        from this file, we can be sure that we're not called in block mode. Hence
11766        don't bother writing code to deal with block mode.  */
11767     if (maxlen) {
11768         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11769     }
11770     if (status < 0) {
11771         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11772     }
11773     DEBUG_P(PerlIO_printf(Perl_debug_log,
11774                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11775                           FPTR2DPTR(void *, S_utf16_textfilter),
11776                           reverse ? 'l' : 'b', idx, maxlen, status,
11777                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11778
11779     while (1) {
11780         STRLEN chars;
11781         STRLEN have;
11782         I32 newlen;
11783         U8 *end;
11784         /* First, look in our buffer of existing UTF-8 data:  */
11785         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11786
11787         if (nl) {
11788             ++nl;
11789         } else if (status == 0) {
11790             /* EOF */
11791             IoPAGE(filter) = 0;
11792             nl = SvEND(utf8_buffer);
11793         }
11794         if (nl) {
11795             STRLEN got = nl - SvPVX(utf8_buffer);
11796             /* Did we have anything to append?  */
11797             retval = got != 0;
11798             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11799             /* Everything else in this code works just fine if SVp_POK isn't
11800                set.  This, however, needs it, and we need it to work, else
11801                we loop infinitely because the buffer is never consumed.  */
11802             sv_chop(utf8_buffer, nl);
11803             break;
11804         }
11805
11806         /* OK, not a complete line there, so need to read some more UTF-16.
11807            Read an extra octect if the buffer currently has an odd number. */
11808         while (1) {
11809             if (status <= 0)
11810                 break;
11811             if (SvCUR(utf16_buffer) >= 2) {
11812                 /* Location of the high octet of the last complete code point.
11813                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11814                    *coupled* with all the benefits of partial reads and
11815                    endianness.  */
11816                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11817                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11818
11819                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11820                     break;
11821                 }
11822
11823                 /* We have the first half of a surrogate. Read more.  */
11824                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11825             }
11826
11827             status = FILTER_READ(idx + 1, utf16_buffer,
11828                                  160 + (SvCUR(utf16_buffer) & 1));
11829             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11830             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11831             if (status < 0) {
11832                 /* Error */
11833                 IoPAGE(filter) = status;
11834                 return status;
11835             }
11836         }
11837
11838         chars = SvCUR(utf16_buffer) >> 1;
11839         have = SvCUR(utf8_buffer);
11840         SvGROW(utf8_buffer, have + chars * 3 + 1);
11841
11842         if (reverse) {
11843             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11844                                          (U8*)SvPVX_const(utf8_buffer) + have,
11845                                          chars * 2, &newlen);
11846         } else {
11847             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11848                                 (U8*)SvPVX_const(utf8_buffer) + have,
11849                                 chars * 2, &newlen);
11850         }
11851         SvCUR_set(utf8_buffer, have + newlen);
11852         *end = '\0';
11853
11854         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11855            it's private to us, and utf16_to_utf8{,reversed} take a
11856            (pointer,length) pair, rather than a NUL-terminated string.  */
11857         if(SvCUR(utf16_buffer) & 1) {
11858             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11859             SvCUR_set(utf16_buffer, 1);
11860         } else {
11861             SvCUR_set(utf16_buffer, 0);
11862         }
11863     }
11864     DEBUG_P(PerlIO_printf(Perl_debug_log,
11865                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11866                           status,
11867                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11868     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11869     return retval;
11870 }
11871
11872 static U8 *
11873 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11874 {
11875     SV *filter = filter_add(S_utf16_textfilter, NULL);
11876
11877     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11878
11879     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11880     sv_setpvs(filter, "");
11881     IoLINES(filter) = reversed;
11882     IoPAGE(filter) = 1; /* Not EOF */
11883
11884     /* Sadly, we have to return a valid pointer, come what may, so we have to
11885        ignore any error return from this.  */
11886     SvCUR_set(PL_linestr, 0);
11887     if (FILTER_READ(0, PL_linestr, 0)) {
11888         SvUTF8_on(PL_linestr);
11889     } else {
11890         SvUTF8_on(PL_linestr);
11891     }
11892     PL_bufend = SvEND(PL_linestr);
11893     return (U8*)SvPVX(PL_linestr);
11894 }
11895 #endif
11896
11897 /*
11898 Returns a pointer to the next character after the parsed
11899 vstring, as well as updating the passed in sv.
11900
11901 Function must be called like
11902
11903         sv = sv_2mortal(newSV(5));
11904         s = scan_vstring(s,e,sv);
11905
11906 where s and e are the start and end of the string.
11907 The sv should already be large enough to store the vstring
11908 passed in, for performance reasons.
11909
11910 This function may croak if fatal warnings are enabled in the
11911 calling scope, hence the sv_2mortal in the example (to prevent
11912 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11913 sv_2mortal.
11914
11915 */
11916
11917 char *
11918 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11919 {
11920     dVAR;
11921     const char *pos = s;
11922     const char *start = s;
11923
11924     PERL_ARGS_ASSERT_SCAN_VSTRING;
11925
11926     if (*pos == 'v') pos++;  /* get past 'v' */
11927     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11928         pos++;
11929     if ( *pos != '.') {
11930         /* this may not be a v-string if followed by => */
11931         const char *next = pos;
11932         while (next < e && isSPACE(*next))
11933             ++next;
11934         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11935             /* return string not v-string */
11936             sv_setpvn(sv,(char *)s,pos-s);
11937             return (char *)pos;
11938         }
11939     }
11940
11941     if (!isALPHA(*pos)) {
11942         U8 tmpbuf[UTF8_MAXBYTES+1];
11943
11944         if (*s == 'v')
11945             s++;  /* get past 'v' */
11946
11947         sv_setpvs(sv, "");
11948
11949         for (;;) {
11950             /* this is atoi() that tolerates underscores */
11951             U8 *tmpend;
11952             UV rev = 0;
11953             const char *end = pos;
11954             UV mult = 1;
11955             while (--end >= s) {
11956                 if (*end != '_') {
11957                     const UV orev = rev;
11958                     rev += (*end - '0') * mult;
11959                     mult *= 10;
11960                     if (orev > rev)
11961                         /* diag_listed_as: Integer overflow in %s number */
11962                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11963                                          "Integer overflow in decimal number");
11964                 }
11965             }
11966 #ifdef EBCDIC
11967             if (rev > 0x7FFFFFFF)
11968                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11969 #endif
11970             /* Append native character for the rev point */
11971             tmpend = uvchr_to_utf8(tmpbuf, rev);
11972             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11973             if (!UVCHR_IS_INVARIANT(rev))
11974                  SvUTF8_on(sv);
11975             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11976                  s = ++pos;
11977             else {
11978                  s = pos;
11979                  break;
11980             }
11981             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11982                  pos++;
11983         }
11984         SvPOK_on(sv);
11985         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11986         SvRMAGICAL_on(sv);
11987     }
11988     return (char *)s;
11989 }
11990
11991 int
11992 Perl_keyword_plugin_standard(pTHX_
11993         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11994 {
11995     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11996     PERL_UNUSED_CONTEXT;
11997     PERL_UNUSED_ARG(keyword_ptr);
11998     PERL_UNUSED_ARG(keyword_len);
11999     PERL_UNUSED_ARG(op_ptr);
12000     return KEYWORD_PLUGIN_DECLINE;
12001 }
12002
12003 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12004 static void
12005 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12006 {
12007     SAVEI32(PL_lex_brackets);
12008     if (PL_lex_brackets > 100)
12009         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12010     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12011     SAVEI32(PL_lex_allbrackets);
12012     PL_lex_allbrackets = 0;
12013     SAVEI8(PL_lex_fakeeof);
12014     PL_lex_fakeeof = (U8)fakeeof;
12015     if(yyparse(gramtype) && !PL_parser->error_count)
12016         qerror(Perl_mess(aTHX_ "Parse error"));
12017 }
12018
12019 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12020 static OP *
12021 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12022 {
12023     OP *o;
12024     ENTER;
12025     SAVEVPTR(PL_eval_root);
12026     PL_eval_root = NULL;
12027     parse_recdescent(gramtype, fakeeof);
12028     o = PL_eval_root;
12029     LEAVE;
12030     return o;
12031 }
12032
12033 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12034 static OP *
12035 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12036 {
12037     OP *exprop;
12038     if (flags & ~PARSE_OPTIONAL)
12039         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12040     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12041     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12042         if (!PL_parser->error_count)
12043             qerror(Perl_mess(aTHX_ "Parse error"));
12044         exprop = newOP(OP_NULL, 0);
12045     }
12046     return exprop;
12047 }
12048
12049 /*
12050 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12051
12052 Parse a Perl arithmetic expression.  This may contain operators of precedence
12053 down to the bit shift operators.  The expression must be followed (and thus
12054 terminated) either by a comparison or lower-precedence operator or by
12055 something that would normally terminate an expression such as semicolon.
12056 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12057 otherwise it is mandatory.  It is up to the caller to ensure that the
12058 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12059 the source of the code to be parsed and the lexical context for the
12060 expression.
12061
12062 The op tree representing the expression is returned.  If an optional
12063 expression is absent, a null pointer is returned, otherwise the pointer
12064 will be non-null.
12065
12066 If an error occurs in parsing or compilation, in most cases a valid op
12067 tree is returned anyway.  The error is reflected in the parser state,
12068 normally resulting in a single exception at the top level of parsing
12069 which covers all the compilation errors that occurred.  Some compilation
12070 errors, however, will throw an exception immediately.
12071
12072 =cut
12073 */
12074
12075 OP *
12076 Perl_parse_arithexpr(pTHX_ U32 flags)
12077 {
12078     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12079 }
12080
12081 /*
12082 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12083
12084 Parse a Perl term expression.  This may contain operators of precedence
12085 down to the assignment operators.  The expression must be followed (and thus
12086 terminated) either by a comma or lower-precedence operator or by
12087 something that would normally terminate an expression such as semicolon.
12088 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12089 otherwise it is mandatory.  It is up to the caller to ensure that the
12090 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12091 the source of the code to be parsed and the lexical context for the
12092 expression.
12093
12094 The op tree representing the expression is returned.  If an optional
12095 expression is absent, a null pointer is returned, otherwise the pointer
12096 will be non-null.
12097
12098 If an error occurs in parsing or compilation, in most cases a valid op
12099 tree is returned anyway.  The error is reflected in the parser state,
12100 normally resulting in a single exception at the top level of parsing
12101 which covers all the compilation errors that occurred.  Some compilation
12102 errors, however, will throw an exception immediately.
12103
12104 =cut
12105 */
12106
12107 OP *
12108 Perl_parse_termexpr(pTHX_ U32 flags)
12109 {
12110     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12111 }
12112
12113 /*
12114 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12115
12116 Parse a Perl list expression.  This may contain operators of precedence
12117 down to the comma operator.  The expression must be followed (and thus
12118 terminated) either by a low-precedence logic operator such as C<or> or by
12119 something that would normally terminate an expression such as semicolon.
12120 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12121 otherwise it is mandatory.  It is up to the caller to ensure that the
12122 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12123 the source of the code to be parsed and the lexical context for the
12124 expression.
12125
12126 The op tree representing the expression is returned.  If an optional
12127 expression is absent, a null pointer is returned, otherwise the pointer
12128 will be non-null.
12129
12130 If an error occurs in parsing or compilation, in most cases a valid op
12131 tree is returned anyway.  The error is reflected in the parser state,
12132 normally resulting in a single exception at the top level of parsing
12133 which covers all the compilation errors that occurred.  Some compilation
12134 errors, however, will throw an exception immediately.
12135
12136 =cut
12137 */
12138
12139 OP *
12140 Perl_parse_listexpr(pTHX_ U32 flags)
12141 {
12142     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12143 }
12144
12145 /*
12146 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12147
12148 Parse a single complete Perl expression.  This allows the full
12149 expression grammar, including the lowest-precedence operators such
12150 as C<or>.  The expression must be followed (and thus terminated) by a
12151 token that an expression would normally be terminated by: end-of-file,
12152 closing bracketing punctuation, semicolon, or one of the keywords that
12153 signals a postfix expression-statement modifier.  If I<flags> includes
12154 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12155 mandatory.  It is up to the caller to ensure that the dynamic parser
12156 state (L</PL_parser> et al) is correctly set to reflect the source of
12157 the code to be parsed and the lexical context for the expression.
12158
12159 The op tree representing the expression is returned.  If an optional
12160 expression is absent, a null pointer is returned, otherwise the pointer
12161 will be non-null.
12162
12163 If an error occurs in parsing or compilation, in most cases a valid op
12164 tree is returned anyway.  The error is reflected in the parser state,
12165 normally resulting in a single exception at the top level of parsing
12166 which covers all the compilation errors that occurred.  Some compilation
12167 errors, however, will throw an exception immediately.
12168
12169 =cut
12170 */
12171
12172 OP *
12173 Perl_parse_fullexpr(pTHX_ U32 flags)
12174 {
12175     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12176 }
12177
12178 /*
12179 =for apidoc Amx|OP *|parse_block|U32 flags
12180
12181 Parse a single complete Perl code block.  This consists of an opening
12182 brace, a sequence of statements, and a closing brace.  The block
12183 constitutes a lexical scope, so C<my> variables and various compile-time
12184 effects can be contained within it.  It is up to the caller to ensure
12185 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12186 reflect the source of the code to be parsed and the lexical context for
12187 the statement.
12188
12189 The op tree representing the code block is returned.  This is always a
12190 real op, never a null pointer.  It will normally be a C<lineseq> list,
12191 including C<nextstate> or equivalent ops.  No ops to construct any kind
12192 of runtime scope are included by virtue of it being a block.
12193
12194 If an error occurs in parsing or compilation, in most cases a valid op
12195 tree (most likely null) is returned anyway.  The error is reflected in
12196 the parser state, normally resulting in a single exception at the top
12197 level of parsing which covers all the compilation errors that occurred.
12198 Some compilation errors, however, will throw an exception immediately.
12199
12200 The I<flags> parameter is reserved for future use, and must always
12201 be zero.
12202
12203 =cut
12204 */
12205
12206 OP *
12207 Perl_parse_block(pTHX_ U32 flags)
12208 {
12209     if (flags)
12210         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12211     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12212 }
12213
12214 /*
12215 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12216
12217 Parse a single unadorned Perl statement.  This may be a normal imperative
12218 statement or a declaration that has compile-time effect.  It does not
12219 include any label or other affixture.  It is up to the caller to ensure
12220 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12221 reflect the source of the code to be parsed and the lexical context for
12222 the statement.
12223
12224 The op tree representing the statement is returned.  This may be a
12225 null pointer if the statement is null, for example if it was actually
12226 a subroutine definition (which has compile-time side effects).  If not
12227 null, it will be ops directly implementing the statement, suitable to
12228 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12229 equivalent op (except for those embedded in a scope contained entirely
12230 within the statement).
12231
12232 If an error occurs in parsing or compilation, in most cases a valid op
12233 tree (most likely null) is returned anyway.  The error is reflected in
12234 the parser state, normally resulting in a single exception at the top
12235 level of parsing which covers all the compilation errors that occurred.
12236 Some compilation errors, however, will throw an exception immediately.
12237
12238 The I<flags> parameter is reserved for future use, and must always
12239 be zero.
12240
12241 =cut
12242 */
12243
12244 OP *
12245 Perl_parse_barestmt(pTHX_ U32 flags)
12246 {
12247     if (flags)
12248         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12249     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12250 }
12251
12252 /*
12253 =for apidoc Amx|SV *|parse_label|U32 flags
12254
12255 Parse a single label, possibly optional, of the type that may prefix a
12256 Perl statement.  It is up to the caller to ensure that the dynamic parser
12257 state (L</PL_parser> et al) is correctly set to reflect the source of
12258 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12259 label is optional, otherwise it is mandatory.
12260
12261 The name of the label is returned in the form of a fresh scalar.  If an
12262 optional label is absent, a null pointer is returned.
12263
12264 If an error occurs in parsing, which can only occur if the label is
12265 mandatory, a valid label is returned anyway.  The error is reflected in
12266 the parser state, normally resulting in a single exception at the top
12267 level of parsing which covers all the compilation errors that occurred.
12268
12269 =cut
12270 */
12271
12272 SV *
12273 Perl_parse_label(pTHX_ U32 flags)
12274 {
12275     if (flags & ~PARSE_OPTIONAL)
12276         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12277     if (PL_lex_state == LEX_KNOWNEXT) {
12278         PL_parser->yychar = yylex();
12279         if (PL_parser->yychar == LABEL) {
12280             char * const lpv = pl_yylval.pval;
12281             STRLEN llen = strlen(lpv);
12282             PL_parser->yychar = YYEMPTY;
12283             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12284         } else {
12285             yyunlex();
12286             goto no_label;
12287         }
12288     } else {
12289         char *s, *t;
12290         STRLEN wlen, bufptr_pos;
12291         lex_read_space(0);
12292         t = s = PL_bufptr;
12293         if (!isIDFIRST_lazy_if(s, UTF))
12294             goto no_label;
12295         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12296         if (word_takes_any_delimeter(s, wlen))
12297             goto no_label;
12298         bufptr_pos = s - SvPVX(PL_linestr);
12299         PL_bufptr = t;
12300         lex_read_space(LEX_KEEP_PREVIOUS);
12301         t = PL_bufptr;
12302         s = SvPVX(PL_linestr) + bufptr_pos;
12303         if (t[0] == ':' && t[1] != ':') {
12304             PL_oldoldbufptr = PL_oldbufptr;
12305             PL_oldbufptr = s;
12306             PL_bufptr = t+1;
12307             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12308         } else {
12309             PL_bufptr = s;
12310             no_label:
12311             if (flags & PARSE_OPTIONAL) {
12312                 return NULL;
12313             } else {
12314                 qerror(Perl_mess(aTHX_ "Parse error"));
12315                 return newSVpvs("x");
12316             }
12317         }
12318     }
12319 }
12320
12321 /*
12322 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12323
12324 Parse a single complete Perl statement.  This may be a normal imperative
12325 statement or a declaration that has compile-time effect, and may include
12326 optional labels.  It is up to the caller to ensure that the dynamic
12327 parser state (L</PL_parser> et al) is correctly set to reflect the source
12328 of the code to be parsed and the lexical context for the statement.
12329
12330 The op tree representing the statement is returned.  This may be a
12331 null pointer if the statement is null, for example if it was actually
12332 a subroutine definition (which has compile-time side effects).  If not
12333 null, it will be the result of a L</newSTATEOP> call, normally including
12334 a C<nextstate> or equivalent op.
12335
12336 If an error occurs in parsing or compilation, in most cases a valid op
12337 tree (most likely null) is returned anyway.  The error is reflected in
12338 the parser state, normally resulting in a single exception at the top
12339 level of parsing which covers all the compilation errors that occurred.
12340 Some compilation errors, however, will throw an exception immediately.
12341
12342 The I<flags> parameter is reserved for future use, and must always
12343 be zero.
12344
12345 =cut
12346 */
12347
12348 OP *
12349 Perl_parse_fullstmt(pTHX_ U32 flags)
12350 {
12351     if (flags)
12352         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12353     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12354 }
12355
12356 /*
12357 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12358
12359 Parse a sequence of zero or more Perl statements.  These may be normal
12360 imperative statements, including optional labels, or declarations
12361 that have compile-time effect, or any mixture thereof.  The statement
12362 sequence ends when a closing brace or end-of-file is encountered in a
12363 place where a new statement could have validly started.  It is up to
12364 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12365 is correctly set to reflect the source of the code to be parsed and the
12366 lexical context for the statements.
12367
12368 The op tree representing the statement sequence is returned.  This may
12369 be a null pointer if the statements were all null, for example if there
12370 were no statements or if there were only subroutine definitions (which
12371 have compile-time side effects).  If not null, it will be a C<lineseq>
12372 list, normally including C<nextstate> or equivalent ops.
12373
12374 If an error occurs in parsing or compilation, in most cases a valid op
12375 tree is returned anyway.  The error is reflected in the parser state,
12376 normally resulting in a single exception at the top level of parsing
12377 which covers all the compilation errors that occurred.  Some compilation
12378 errors, however, will throw an exception immediately.
12379
12380 The I<flags> parameter is reserved for future use, and must always
12381 be zero.
12382
12383 =cut
12384 */
12385
12386 OP *
12387 Perl_parse_stmtseq(pTHX_ U32 flags)
12388 {
12389     OP *stmtseqop;
12390     I32 c;
12391     if (flags)
12392         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12393     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12394     c = lex_peek_unichar(0);
12395     if (c != -1 && c != /*{*/'}')
12396         qerror(Perl_mess(aTHX_ "Parse error"));
12397     return stmtseqop;
12398 }
12399
12400 /*
12401  * Local variables:
12402  * c-indentation-style: bsd
12403  * c-basic-offset: 4
12404  * indent-tabs-mode: nil
12405  * End:
12406  *
12407  * ex: set ts=8 sts=4 sw=4 et:
12408  */