toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_static.c"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_defer            (PL_parser->lex_defer)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_expect           (PL_parser->lex_expect)
  58 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  59 #define PL_lex_inpat            (PL_parser->lex_inpat)
  60 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  61 #define PL_lex_op               (PL_parser->lex_op)
  62 #define PL_lex_repl             (PL_parser->lex_repl)
  63 #define PL_lex_starts           (PL_parser->lex_starts)
  64 #define PL_lex_stuff            (PL_parser->lex_stuff)
  65 #define PL_multi_start          (PL_parser->multi_start)
  66 #define PL_multi_open           (PL_parser->multi_open)
  67 #define PL_multi_close          (PL_parser->multi_close)
  68 #define PL_preambled            (PL_parser->preambled)
  69 #define PL_sublex_info          (PL_parser->sublex_info)
  70 #define PL_linestr              (PL_parser->linestr)
  71 #define PL_expect               (PL_parser->expect)
  72 #define PL_copline              (PL_parser->copline)
  73 #define PL_bufptr               (PL_parser->bufptr)
  74 #define PL_oldbufptr            (PL_parser->oldbufptr)
  75 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  76 #define PL_linestart            (PL_parser->linestart)
  77 #define PL_bufend               (PL_parser->bufend)
  78 #define PL_last_uni             (PL_parser->last_uni)
  79 #define PL_last_lop             (PL_parser->last_lop)
  80 #define PL_last_lop_op          (PL_parser->last_lop_op)
  81 #define PL_lex_state            (PL_parser->lex_state)
  82 #define PL_rsfp                 (PL_parser->rsfp)
  83 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  84 #define PL_in_my                (PL_parser->in_my)
  85 #define PL_in_my_stash          (PL_parser->in_my_stash)
  86 #define PL_tokenbuf             (PL_parser->tokenbuf)
  87 #define PL_multi_end            (PL_parser->multi_end)
  88 #define PL_error_count          (PL_parser->error_count)
  89
  90 #  define PL_nexttoke           (PL_parser->nexttoke)
  91 #  define PL_nexttype           (PL_parser->nexttype)
  92 #  define PL_nextval            (PL_parser->nextval)
  93
  94 static const char* const ident_too_long = "Identifier too long";
  95
  96 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  97
  98 #define XENUMMASK  0x3f
  99 #define XFAKEEOF   0x40
 100 #define XFAKEBRACK 0x80
 101
 102 #ifdef USE_UTF8_SCRIPTS
 103 #   define UTF (!IN_BYTES)
 104 #else
 105 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 106 #endif
 107
 108 /* The maximum number of characters preceding the unrecognized one to display */
 109 #define UNRECOGNIZED_PRECEDE_COUNT 10
 110
 111 /* In variables named $^X, these are the legal values for X.
 112  * 1999-02-27 mjd-perl-patch@plover.com */
 113 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 114
 115 #define SPACE_OR_TAB(c) isBLANK_A(c)
 116
 117 /* LEX_* are values for PL_lex_state, the state of the lexer.
 118  * They are arranged oddly so that the guard on the switch statement
 119  * can get by with a single comparison (if the compiler is smart enough).
 120  *
 121  * These values refer to the various states within a sublex parse,
 122  * i.e. within a double quotish string
 123  */
 124
 125 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 126
 127 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 128 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 129 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 130 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 131 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 132
 133                                    /* at end of code, eg "$x" followed by:  */
 134 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 135 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 136
 137 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 138                                         string or after \E, $foo, etc       */
 139 #define LEX_INTERPCONST          2 /* NOT USED */
 140 #define LEX_FORMLINE             1 /* expecting a format line               */
 141 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 142
 143
 144 #ifdef DEBUGGING
 145 static const char* const lex_state_names[] = {
 146     "KNOWNEXT",
 147     "FORMLINE",
 148     "INTERPCONST",
 149     "INTERPCONCAT",
 150     "INTERPENDMAYBE",
 151     "INTERPEND",
 152     "INTERPSTART",
 153     "INTERPPUSH",
 154     "INTERPCASEMOD",
 155     "INTERPNORMAL",
 156     "NORMAL"
 157 };
 158 #endif
 159
 160 #include "keywords.h"
 161
 162 /* CLINE is a macro that ensures PL_copline has a sane value */
 163
 164 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 165
 166 #  define SKIPSPACE0(s) skipspace(s)
 167 #  define SKIPSPACE1(s) skipspace(s)
 168 #  define SKIPSPACE2(s,tsv) skipspace(s)
 169 #  define PEEKSPACE(s) skipspace(s)
 170
 171 /*
 172  * Convenience functions to return different tokens and prime the
 173  * lexer for the next token.  They all take an argument.
 174  *
 175  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 176  * OPERATOR     : generic operator
 177  * AOPERATOR    : assignment operator
 178  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 179  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 180  * PREREF       : *EXPR where EXPR is not a simple identifier
 181  * TERM         : expression term
 182  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 183  * LOOPX        : loop exiting command (goto, last, dump, etc)
 184  * FTST         : file test operator
 185  * FUN0         : zero-argument function
 186  * FUN0OP       : zero-argument function, with its op created in this file
 187  * FUN1         : not used, except for not, which isn't a UNIOP
 188  * BOop         : bitwise or or xor
 189  * BAop         : bitwise and
 190  * SHop         : shift operator
 191  * PWop         : power operator
 192  * PMop         : pattern-matching operator
 193  * Aop          : addition-level operator
 194  * Mop          : multiplication-level operator
 195  * Eop          : equality-testing operator
 196  * Rop          : relational operator <= != gt
 197  *
 198  * Also see LOP and lop() below.
 199  */
 200
 201 #ifdef DEBUGGING /* Serve -DT. */
 202 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 203 #else
 204 #   define REPORT(retval) (retval)
 205 #endif
 206
 207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 214 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 215 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 216 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 217 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 218 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 219 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 220 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 221 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 222 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 223 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 224 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 225 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 226 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 227 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 228 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 229
 230 /* This bit of chicanery makes a unary function followed by
 231  * a parenthesis into a function with one argument, highest precedence.
 232  * The UNIDOR macro is for unary functions that can be followed by the //
 233  * operator (such as C<shift // 0>).
 234  */
 235 #define UNI3(f,x,have_x) { \
 236         pl_yylval.ival = f; \
 237         if (have_x) PL_expect = x; \
 238         PL_bufptr = s; \
 239         PL_last_uni = PL_oldbufptr; \
 240         PL_last_lop_op = f; \
 241         if (*s == '(') \
 242             return REPORT( (int)FUNC1 ); \
 243         s = PEEKSPACE(s); \
 244         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 245         }
 246 #define UNI(f)    UNI3(f,XTERM,1)
 247 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 248 #define UNIPROTO(f,optional) { \
 249         if (optional) PL_last_uni = PL_oldbufptr; \
 250         OPERATOR(f); \
 251         }
 252
 253 #define UNIBRACK(f) UNI3(f,0,0)
 254
 255 /* grandfather return to old style */
 256 #define OLDLOP(f) \
 257         do { \
 258             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 259                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 260             pl_yylval.ival = (f); \
 261             PL_expect = XTERM; \
 262             PL_bufptr = s; \
 263             return (int)LSTOP; \
 264         } while(0)
 265
 266 #define COPLINE_INC_WITH_HERELINES                  \
 267     STMT_START {                                     \
 268         CopLINE_inc(PL_curcop);                       \
 269         if (PL_parser->herelines)                      \
 270             CopLINE(PL_curcop) += PL_parser->herelines, \
 271             PL_parser->herelines = 0;                    \
 272     } STMT_END
 273 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 274  * is no sublex_push to follow. */
 275 #define COPLINE_SET_FROM_MULTI_END            \
 276     STMT_START {                               \
 277         CopLINE_set(PL_curcop, PL_multi_end);   \
 278         if (PL_multi_end != PL_multi_start)      \
 279             PL_parser->herelines = 0;             \
 280     } STMT_END
 281
 282
 283 #ifdef DEBUGGING
 284
 285 /* how to interpret the pl_yylval associated with the token */
 286 enum token_type {
 287     TOKENTYPE_NONE,
 288     TOKENTYPE_IVAL,
 289     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 290     TOKENTYPE_PVAL,
 291     TOKENTYPE_OPVAL
 292 };
 293
 294 static struct debug_tokens {
 295     const int token;
 296     enum token_type type;
 297     const char *name;
 298 } const debug_tokens[] =
 299 {
 300     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 301     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 302     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 303     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 304     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 305     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 306     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 307     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 308     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 309     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 310     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 311     { DO,               TOKENTYPE_NONE,         "DO" },
 312     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 313     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 314     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 315     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 316     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 317     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 318     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 319     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 320     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 321     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 322     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 323     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 324     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 325     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 326     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 327     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 328     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 329     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 330     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 331     { IF,               TOKENTYPE_IVAL,         "IF" },
 332     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 333     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 334     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 335     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 336     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 337     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 338     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 339     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 340     { MY,               TOKENTYPE_IVAL,         "MY" },
 341     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 342     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 343     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 344     { OROR,             TOKENTYPE_NONE,         "OROR" },
 345     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 346     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 347     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 348     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 349     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 350     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 351     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 352     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 353     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 354     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 355     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 356     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 357     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 358     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 359     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 360     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 361     { SUB,              TOKENTYPE_NONE,         "SUB" },
 362     { THING,            TOKENTYPE_OPVAL,        "THING" },
 363     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 364     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 365     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 366     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 367     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 368     { USE,              TOKENTYPE_IVAL,         "USE" },
 369     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 370     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 371     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 372     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 373     { 0,                TOKENTYPE_NONE,         NULL }
 374 };
 375
 376 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 377
 378 STATIC int
 379 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 380 {
 381     dVAR;
 382
 383     PERL_ARGS_ASSERT_TOKEREPORT;
 384
 385     if (DEBUG_T_TEST) {
 386         const char *name = NULL;
 387         enum token_type type = TOKENTYPE_NONE;
 388         const struct debug_tokens *p;
 389         SV* const report = newSVpvs("<== ");
 390
 391         for (p = debug_tokens; p->token; p++) {
 392             if (p->token == (int)rv) {
 393                 name = p->name;
 394                 type = p->type;
 395                 break;
 396             }
 397         }
 398         if (name)
 399             Perl_sv_catpv(aTHX_ report, name);
 400         else if ((char)rv > ' ' && (char)rv <= '~')
 401         {
 402             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 403             if ((char)rv == 'p')
 404                 sv_catpvs(report, " (pending identifier)");
 405         }
 406         else if (!rv)
 407             sv_catpvs(report, "EOF");
 408         else
 409             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 410         switch (type) {
 411         case TOKENTYPE_NONE:
 412             break;
 413         case TOKENTYPE_IVAL:
 414             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 415             break;
 416         case TOKENTYPE_OPNUM:
 417             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 418                                     PL_op_name[lvalp->ival]);
 419             break;
 420         case TOKENTYPE_PVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 422             break;
 423         case TOKENTYPE_OPVAL:
 424             if (lvalp->opval) {
 425                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 426                                     PL_op_name[lvalp->opval->op_type]);
 427                 if (lvalp->opval->op_type == OP_CONST) {
 428                     Perl_sv_catpvf(aTHX_ report, " %s",
 429                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 430                 }
 431
 432             }
 433             else
 434                 sv_catpvs(report, "(opval=null)");
 435             break;
 436         }
 437         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 438     };
 439     return (int)rv;
 440 }
 441
 442
 443 /* print the buffer with suitable escapes */
 444
 445 STATIC void
 446 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 447 {
 448     SV* const tmp = newSVpvs("");
 449
 450     PERL_ARGS_ASSERT_PRINTBUF;
 451
 452     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 453     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 454     GCC_DIAG_RESTORE;
 455     SvREFCNT_dec(tmp);
 456 }
 457
 458 #endif
 459
 460 static int
 461 S_deprecate_commaless_var_list(pTHX) {
 462     PL_expect = XTERM;
 463     deprecate("comma-less variable list");
 464     return REPORT(','); /* grandfather non-comma-format format */
 465 }
 466
 467 /*
 468  * S_ao
 469  *
 470  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 471  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 472  */
 473
 474 STATIC int
 475 S_ao(pTHX_ int toketype)
 476 {
 477     if (*PL_bufptr == '=') {
 478         PL_bufptr++;
 479         if (toketype == ANDAND)
 480             pl_yylval.ival = OP_ANDASSIGN;
 481         else if (toketype == OROR)
 482             pl_yylval.ival = OP_ORASSIGN;
 483         else if (toketype == DORDOR)
 484             pl_yylval.ival = OP_DORASSIGN;
 485         toketype = ASSIGNOP;
 486     }
 487     return toketype;
 488 }
 489
 490 /*
 491  * S_no_op
 492  * When Perl expects an operator and finds something else, no_op
 493  * prints the warning.  It always prints "<something> found where
 494  * operator expected.  It prints "Missing semicolon on previous line?"
 495  * if the surprise occurs at the start of the line.  "do you need to
 496  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 497  * where the compiler doesn't know if foo is a method call or a function.
 498  * It prints "Missing operator before end of line" if there's nothing
 499  * after the missing operator, or "... before <...>" if there is something
 500  * after the missing operator.
 501  */
 502
 503 STATIC void
 504 S_no_op(pTHX_ const char *const what, char *s)
 505 {
 506     char * const oldbp = PL_bufptr;
 507     const bool is_first = (PL_oldbufptr == PL_linestart);
 508
 509     PERL_ARGS_ASSERT_NO_OP;
 510
 511     if (!s)
 512         s = oldbp;
 513     else
 514         PL_bufptr = s;
 515     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 516     if (ckWARN_d(WARN_SYNTAX)) {
 517         if (is_first)
 518             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 519                     "\t(Missing semicolon on previous line?)\n");
 520         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 521             const char *t;
 522             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 523                                                             t += UTF ? UTF8SKIP(t) : 1)
 524                 NOOP;
 525             if (t < PL_bufptr && isSPACE(*t))
 526                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 527                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 528                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 529         }
 530         else {
 531             assert(s >= oldbp);
 532             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 533                     "\t(Missing operator before %"UTF8f"?)\n",
 534                      UTF8fARG(UTF, s - oldbp, oldbp));
 535         }
 536     }
 537     PL_bufptr = oldbp;
 538 }
 539
 540 /*
 541  * S_missingterm
 542  * Complain about missing quote/regexp/heredoc terminator.
 543  * If it's called with NULL then it cauterizes the line buffer.
 544  * If we're in a delimited string and the delimiter is a control
 545  * character, it's reformatted into a two-char sequence like ^C.
 546  * This is fatal.
 547  */
 548
 549 STATIC void
 550 S_missingterm(pTHX_ char *s)
 551 {
 552     char tmpbuf[3];
 553     char q;
 554     if (s) {
 555         char * const nl = strrchr(s,'\n');
 556         if (nl)
 557             *nl = '\0';
 558     }
 559     else if ((U8) PL_multi_close < 32) {
 560         *tmpbuf = '^';
 561         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 562         tmpbuf[2] = '\0';
 563         s = tmpbuf;
 564     }
 565     else {
 566         *tmpbuf = (char)PL_multi_close;
 567         tmpbuf[1] = '\0';
 568         s = tmpbuf;
 569     }
 570     q = strchr(s,'"') ? '\'' : '"';
 571     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 572 }
 573
 574 #include "feature.h"
 575
 576 /*
 577  * Check whether the named feature is enabled.
 578  */
 579 bool
 580 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 581 {
 582     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 583
 584     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 585
 586     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 587
 588     if (namelen > MAX_FEATURE_LEN)
 589         return FALSE;
 590     memcpy(&he_name[8], name, namelen);
 591
 592     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 593                                      REFCOUNTED_HE_EXISTS));
 594 }
 595
 596 /*
 597  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 598  * utf16-to-utf8-reversed.
 599  */
 600
 601 #ifdef PERL_CR_FILTER
 602 static void
 603 strip_return(SV *sv)
 604 {
 605     const char *s = SvPVX_const(sv);
 606     const char * const e = s + SvCUR(sv);
 607
 608     PERL_ARGS_ASSERT_STRIP_RETURN;
 609
 610     /* outer loop optimized to do nothing if there are no CR-LFs */
 611     while (s < e) {
 612         if (*s++ == '\r' && *s == '\n') {
 613             /* hit a CR-LF, need to copy the rest */
 614             char *d = s - 1;
 615             *d++ = *s++;
 616             while (s < e) {
 617                 if (*s == '\r' && s[1] == '\n')
 618                     s++;
 619                 *d++ = *s++;
 620             }
 621             SvCUR(sv) -= s - d;
 622             return;
 623         }
 624     }
 625 }
 626
 627 STATIC I32
 628 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 629 {
 630     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 631     if (count > 0 && !maxlen)
 632         strip_return(sv);
 633     return count;
 634 }
 635 #endif
 636
 637 /*
 638 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 639
 640 Creates and initialises a new lexer/parser state object, supplying
 641 a context in which to lex and parse from a new source of Perl code.
 642 A pointer to the new state object is placed in L</PL_parser>.  An entry
 643 is made on the save stack so that upon unwinding the new state object
 644 will be destroyed and the former value of L</PL_parser> will be restored.
 645 Nothing else need be done to clean up the parsing context.
 646
 647 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 648 non-null, provides a string (in SV form) containing code to be parsed.
 649 A copy of the string is made, so subsequent modification of I<line>
 650 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 651 from which code will be read to be parsed.  If both are non-null, the
 652 code in I<line> comes first and must consist of complete lines of input,
 653 and I<rsfp> supplies the remainder of the source.
 654
 655 The I<flags> parameter is reserved for future use.  Currently it is only
 656 used by perl internally, so extensions should always pass zero.
 657
 658 =cut
 659 */
 660
 661 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 662    can share filters with the current parser.
 663    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 664    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 665    destruction. This is used to handle the case of defaulting to reading the
 666    script from the standard input because no filename was given on the command
 667    line (without getting confused by situation where STDIN has been closed, so
 668    the script handle is opened on fd 0)  */
 669
 670 void
 671 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 672 {
 673     const char *s = NULL;
 674     yy_parser *parser, *oparser;
 675     if (flags && flags & ~LEX_START_FLAGS)
 676         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 677
 678     /* create and initialise a parser */
 679
 680     Newxz(parser, 1, yy_parser);
 681     parser->old_parser = oparser = PL_parser;
 682     PL_parser = parser;
 683
 684     parser->stack = NULL;
 685     parser->ps = NULL;
 686     parser->stack_size = 0;
 687
 688     /* on scope exit, free this parser and restore any outer one */
 689     SAVEPARSER(parser);
 690     parser->saved_curcop = PL_curcop;
 691
 692     /* initialise lexer state */
 693
 694     parser->nexttoke = 0;
 695     parser->error_count = oparser ? oparser->error_count : 0;
 696     parser->copline = parser->preambling = NOLINE;
 697     parser->lex_state = LEX_NORMAL;
 698     parser->expect = XSTATE;
 699     parser->rsfp = rsfp;
 700     parser->rsfp_filters =
 701       !(flags & LEX_START_SAME_FILTER) || !oparser
 702         ? NULL
 703         : MUTABLE_AV(SvREFCNT_inc(
 704             oparser->rsfp_filters
 705              ? oparser->rsfp_filters
 706              : (oparser->rsfp_filters = newAV())
 707           ));
 708
 709     Newx(parser->lex_brackstack, 120, char);
 710     Newx(parser->lex_casestack, 12, char);
 711     *parser->lex_casestack = '\0';
 712     Newxz(parser->lex_shared, 1, LEXSHARED);
 713
 714     if (line) {
 715         STRLEN len;
 716         s = SvPV_const(line, len);
 717         parser->linestr = flags & LEX_START_COPIED
 718                             ? SvREFCNT_inc_simple_NN(line)
 719                             : newSVpvn_flags(s, len, SvUTF8(line));
 720         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 721     } else {
 722         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 723     }
 724     parser->oldoldbufptr =
 725         parser->oldbufptr =
 726         parser->bufptr =
 727         parser->linestart = SvPVX(parser->linestr);
 728     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 729     parser->last_lop = parser->last_uni = NULL;
 730
 731     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 732                                                         |LEX_DONT_CLOSE_RSFP));
 733     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 734                                                         |LEX_DONT_CLOSE_RSFP));
 735
 736     parser->in_pod = parser->filtered = 0;
 737 }
 738
 739
 740 /* delete a parser object */
 741
 742 void
 743 Perl_parser_free(pTHX_  const yy_parser *parser)
 744 {
 745     PERL_ARGS_ASSERT_PARSER_FREE;
 746
 747     PL_curcop = parser->saved_curcop;
 748     SvREFCNT_dec(parser->linestr);
 749
 750     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 751         PerlIO_clearerr(parser->rsfp);
 752     else if (parser->rsfp && (!parser->old_parser ||
 753                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 754         PerlIO_close(parser->rsfp);
 755     SvREFCNT_dec(parser->rsfp_filters);
 756     SvREFCNT_dec(parser->lex_stuff);
 757     SvREFCNT_dec(parser->sublex_info.repl);
 758
 759     Safefree(parser->lex_brackstack);
 760     Safefree(parser->lex_casestack);
 761     Safefree(parser->lex_shared);
 762     PL_parser = parser->old_parser;
 763     Safefree(parser);
 764 }
 765
 766 void
 767 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 768 {
 769     I32 nexttoke = parser->nexttoke;
 770     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 771     while (nexttoke--) {
 772         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 773          && parser->nextval[nexttoke].opval
 774          && parser->nextval[nexttoke].opval->op_slabbed
 775          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 776             op_free(parser->nextval[nexttoke].opval);
 777             parser->nextval[nexttoke].opval = NULL;
 778         }
 779     }
 780 }
 781
 782
 783 /*
 784 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 785
 786 Buffer scalar containing the chunk currently under consideration of the
 787 text currently being lexed.  This is always a plain string scalar (for
 788 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 789 normal scalar means; instead refer to the buffer directly by the pointer
 790 variables described below.
 791
 792 The lexer maintains various C<char*> pointers to things in the
 793 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 794 reallocated, all of these pointers must be updated.  Don't attempt to
 795 do this manually, but rather use L</lex_grow_linestr> if you need to
 796 reallocate the buffer.
 797
 798 The content of the text chunk in the buffer is commonly exactly one
 799 complete line of input, up to and including a newline terminator,
 800 but there are situations where it is otherwise.  The octets of the
 801 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 802 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 803 flag on this scalar, which may disagree with it.
 804
 805 For direct examination of the buffer, the variable
 806 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 807 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 808 of these pointers is usually preferable to examination of the scalar
 809 through normal scalar means.
 810
 811 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 812
 813 Direct pointer to the end of the chunk of text currently being lexed, the
 814 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 815 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 816 always located at the end of the buffer, and does not count as part of
 817 the buffer's contents.
 818
 819 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 820
 821 Points to the current position of lexing inside the lexer buffer.
 822 Characters around this point may be freely examined, within
 823 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 824 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 825 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 826
 827 Lexing code (whether in the Perl core or not) moves this pointer past
 828 the characters that it consumes.  It is also expected to perform some
 829 bookkeeping whenever a newline character is consumed.  This movement
 830 can be more conveniently performed by the function L</lex_read_to>,
 831 which handles newlines appropriately.
 832
 833 Interpretation of the buffer's octets can be abstracted out by
 834 using the slightly higher-level functions L</lex_peek_unichar> and
 835 L</lex_read_unichar>.
 836
 837 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 838
 839 Points to the start of the current line inside the lexer buffer.
 840 This is useful for indicating at which column an error occurred, and
 841 not much else.  This must be updated by any lexing code that consumes
 842 a newline; the function L</lex_read_to> handles this detail.
 843
 844 =cut
 845 */
 846
 847 /*
 848 =for apidoc Amx|bool|lex_bufutf8
 849
 850 Indicates whether the octets in the lexer buffer
 851 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 852 of Unicode characters.  If not, they should be interpreted as Latin-1
 853 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 854
 855 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 856 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 857 encoding.
 858
 859 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 860 is significant, but not the whole story regarding the input character
 861 encoding.  Normally, when a file is being read, the scalar contains octets
 862 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 863 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 864 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 865 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 866 is in effect.  This logic may change in the future; use this function
 867 instead of implementing the logic yourself.
 868
 869 =cut
 870 */
 871
 872 bool
 873 Perl_lex_bufutf8(pTHX)
 874 {
 875     return UTF;
 876 }
 877
 878 /*
 879 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 880
 881 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 882 at least I<len> octets (including terminating C<NUL>).  Returns a
 883 pointer to the reallocated buffer.  This is necessary before making
 884 any direct modification of the buffer that would increase its length.
 885 L</lex_stuff_pvn> provides a more convenient way to insert text into
 886 the buffer.
 887
 888 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 889 this function updates all of the lexer's variables that point directly
 890 into the buffer.
 891
 892 =cut
 893 */
 894
 895 char *
 896 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 897 {
 898     SV *linestr;
 899     char *buf;
 900     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 901     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 902     linestr = PL_parser->linestr;
 903     buf = SvPVX(linestr);
 904     if (len <= SvLEN(linestr))
 905         return buf;
 906     bufend_pos = PL_parser->bufend - buf;
 907     bufptr_pos = PL_parser->bufptr - buf;
 908     oldbufptr_pos = PL_parser->oldbufptr - buf;
 909     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 910     linestart_pos = PL_parser->linestart - buf;
 911     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 912     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 913     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 914                             PL_parser->lex_shared->re_eval_start - buf : 0;
 915
 916     buf = sv_grow(linestr, len);
 917
 918     PL_parser->bufend = buf + bufend_pos;
 919     PL_parser->bufptr = buf + bufptr_pos;
 920     PL_parser->oldbufptr = buf + oldbufptr_pos;
 921     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 922     PL_parser->linestart = buf + linestart_pos;
 923     if (PL_parser->last_uni)
 924         PL_parser->last_uni = buf + last_uni_pos;
 925     if (PL_parser->last_lop)
 926         PL_parser->last_lop = buf + last_lop_pos;
 927     if (PL_parser->lex_shared->re_eval_start)
 928         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 929     return buf;
 930 }
 931
 932 /*
 933 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 934
 935 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 936 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 937 reallocating the buffer if necessary.  This means that lexing code that
 938 runs later will see the characters as if they had appeared in the input.
 939 It is not recommended to do this as part of normal parsing, and most
 940 uses of this facility run the risk of the inserted characters being
 941 interpreted in an unintended manner.
 942
 943 The string to be inserted is represented by I<len> octets starting
 944 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 945 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 946 The characters are recoded for the lexer buffer, according to how the
 947 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 948 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 949 function is more convenient.
 950
 951 =cut
 952 */
 953
 954 void
 955 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 956 {
 957     dVAR;
 958     char *bufptr;
 959     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 960     if (flags & ~(LEX_STUFF_UTF8))
 961         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 962     if (UTF) {
 963         if (flags & LEX_STUFF_UTF8) {
 964             goto plain_copy;
 965         } else {
 966             STRLEN highhalf = 0;    /* Count of variants */
 967             const char *p, *e = pv+len;
 968             for (p = pv; p != e; p++) {
 969                 if (! UTF8_IS_INVARIANT(*p)) {
 970                     highhalf++;
 971                 }
 972             }
 973             if (!highhalf)
 974                 goto plain_copy;
 975             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 976             bufptr = PL_parser->bufptr;
 977             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 978             SvCUR_set(PL_parser->linestr,
 979                 SvCUR(PL_parser->linestr) + len+highhalf);
 980             PL_parser->bufend += len+highhalf;
 981             for (p = pv; p != e; p++) {
 982                 U8 c = (U8)*p;
 983                 if (! UTF8_IS_INVARIANT(c)) {
 984                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
 985                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
 986                 } else {
 987                     *bufptr++ = (char)c;
 988                 }
 989             }
 990         }
 991     } else {
 992         if (flags & LEX_STUFF_UTF8) {
 993             STRLEN highhalf = 0;
 994             const char *p, *e = pv+len;
 995             for (p = pv; p != e; p++) {
 996                 U8 c = (U8)*p;
 997                 if (UTF8_IS_ABOVE_LATIN1(c)) {
 998                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 999                                 "non-Latin-1 character into Latin-1 input");
1000                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1001                     p++;
1002                     highhalf++;
1003                 } else if (! UTF8_IS_INVARIANT(c)) {
1004                     /* malformed UTF-8 */
1005                     ENTER;
1006                     SAVESPTR(PL_warnhook);
1007                     PL_warnhook = PERL_WARNHOOK_FATAL;
1008                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1009                     LEAVE;
1010                 }
1011             }
1012             if (!highhalf)
1013                 goto plain_copy;
1014             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1015             bufptr = PL_parser->bufptr;
1016             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1017             SvCUR_set(PL_parser->linestr,
1018                 SvCUR(PL_parser->linestr) + len-highhalf);
1019             PL_parser->bufend += len-highhalf;
1020             p = pv;
1021             while (p < e) {
1022                 if (UTF8_IS_INVARIANT(*p)) {
1023                     *bufptr++ = *p;
1024                     p++;
1025                 }
1026                 else {
1027                     assert(p < e -1 );
1028                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1029                     p += 2;
1030                 }
1031             }
1032         } else {
1033           plain_copy:
1034             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1035             bufptr = PL_parser->bufptr;
1036             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1037             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1038             PL_parser->bufend += len;
1039             Copy(pv, bufptr, len, char);
1040         }
1041     }
1042 }
1043
1044 /*
1045 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1046
1047 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049 reallocating the buffer if necessary.  This means that lexing code that
1050 runs later will see the characters as if they had appeared in the input.
1051 It is not recommended to do this as part of normal parsing, and most
1052 uses of this facility run the risk of the inserted characters being
1053 interpreted in an unintended manner.
1054
1055 The string to be inserted is represented by octets starting at I<pv>
1056 and continuing to the first nul.  These octets are interpreted as either
1057 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1058 in I<flags>.  The characters are recoded for the lexer buffer, according
1059 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1060 If it is not convenient to nul-terminate a string to be inserted, the
1061 L</lex_stuff_pvn> function is more appropriate.
1062
1063 =cut
1064 */
1065
1066 void
1067 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1068 {
1069     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1070     lex_stuff_pvn(pv, strlen(pv), flags);
1071 }
1072
1073 /*
1074 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1075
1076 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1077 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1078 reallocating the buffer if necessary.  This means that lexing code that
1079 runs later will see the characters as if they had appeared in the input.
1080 It is not recommended to do this as part of normal parsing, and most
1081 uses of this facility run the risk of the inserted characters being
1082 interpreted in an unintended manner.
1083
1084 The string to be inserted is the string value of I<sv>.  The characters
1085 are recoded for the lexer buffer, according to how the buffer is currently
1086 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1087 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1088 need to construct a scalar.
1089
1090 =cut
1091 */
1092
1093 void
1094 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1095 {
1096     char *pv;
1097     STRLEN len;
1098     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1099     if (flags)
1100         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1101     pv = SvPV(sv, len);
1102     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1103 }
1104
1105 /*
1106 =for apidoc Amx|void|lex_unstuff|char *ptr
1107
1108 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1109 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1110 This hides the discarded text from any lexing code that runs later,
1111 as if the text had never appeared.
1112
1113 This is not the normal way to consume lexed text.  For that, use
1114 L</lex_read_to>.
1115
1116 =cut
1117 */
1118
1119 void
1120 Perl_lex_unstuff(pTHX_ char *ptr)
1121 {
1122     char *buf, *bufend;
1123     STRLEN unstuff_len;
1124     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1125     buf = PL_parser->bufptr;
1126     if (ptr < buf)
1127         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1128     if (ptr == buf)
1129         return;
1130     bufend = PL_parser->bufend;
1131     if (ptr > bufend)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1133     unstuff_len = ptr - buf;
1134     Move(ptr, buf, bufend+1-ptr, char);
1135     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1136     PL_parser->bufend = bufend - unstuff_len;
1137 }
1138
1139 /*
1140 =for apidoc Amx|void|lex_read_to|char *ptr
1141
1142 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1143 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1144 performing the correct bookkeeping whenever a newline character is passed.
1145 This is the normal way to consume lexed text.
1146
1147 Interpretation of the buffer's octets can be abstracted out by
1148 using the slightly higher-level functions L</lex_peek_unichar> and
1149 L</lex_read_unichar>.
1150
1151 =cut
1152 */
1153
1154 void
1155 Perl_lex_read_to(pTHX_ char *ptr)
1156 {
1157     char *s;
1158     PERL_ARGS_ASSERT_LEX_READ_TO;
1159     s = PL_parser->bufptr;
1160     if (ptr < s || ptr > PL_parser->bufend)
1161         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1162     for (; s != ptr; s++)
1163         if (*s == '\n') {
1164             COPLINE_INC_WITH_HERELINES;
1165             PL_parser->linestart = s+1;
1166         }
1167     PL_parser->bufptr = ptr;
1168 }
1169
1170 /*
1171 =for apidoc Amx|void|lex_discard_to|char *ptr
1172
1173 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1174 up to I<ptr>.  The remaining content of the buffer will be moved, and
1175 all pointers into the buffer updated appropriately.  I<ptr> must not
1176 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1177 it is not permitted to discard text that has yet to be lexed.
1178
1179 Normally it is not necessarily to do this directly, because it suffices to
1180 use the implicit discarding behaviour of L</lex_next_chunk> and things
1181 based on it.  However, if a token stretches across multiple lines,
1182 and the lexing code has kept multiple lines of text in the buffer for
1183 that purpose, then after completion of the token it would be wise to
1184 explicitly discard the now-unneeded earlier lines, to avoid future
1185 multi-line tokens growing the buffer without bound.
1186
1187 =cut
1188 */
1189
1190 void
1191 Perl_lex_discard_to(pTHX_ char *ptr)
1192 {
1193     char *buf;
1194     STRLEN discard_len;
1195     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1196     buf = SvPVX(PL_parser->linestr);
1197     if (ptr < buf)
1198         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1199     if (ptr == buf)
1200         return;
1201     if (ptr > PL_parser->bufptr)
1202         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1203     discard_len = ptr - buf;
1204     if (PL_parser->oldbufptr < ptr)
1205         PL_parser->oldbufptr = ptr;
1206     if (PL_parser->oldoldbufptr < ptr)
1207         PL_parser->oldoldbufptr = ptr;
1208     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1209         PL_parser->last_uni = NULL;
1210     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1211         PL_parser->last_lop = NULL;
1212     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1213     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1214     PL_parser->bufend -= discard_len;
1215     PL_parser->bufptr -= discard_len;
1216     PL_parser->oldbufptr -= discard_len;
1217     PL_parser->oldoldbufptr -= discard_len;
1218     if (PL_parser->last_uni)
1219         PL_parser->last_uni -= discard_len;
1220     if (PL_parser->last_lop)
1221         PL_parser->last_lop -= discard_len;
1222 }
1223
1224 /*
1225 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1226
1227 Reads in the next chunk of text to be lexed, appending it to
1228 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1229 looked to the end of the current chunk and wants to know more.  It is
1230 usual, but not necessary, for lexing to have consumed the entirety of
1231 the current chunk at this time.
1232
1233 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1234 chunk (i.e., the current chunk has been entirely consumed), normally the
1235 current chunk will be discarded at the same time that the new chunk is
1236 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1237 will not be discarded.  If the current chunk has not been entirely
1238 consumed, then it will not be discarded regardless of the flag.
1239
1240 Returns true if some new text was added to the buffer, or false if the
1241 buffer has reached the end of the input text.
1242
1243 =cut
1244 */
1245
1246 #define LEX_FAKE_EOF 0x80000000
1247 #define LEX_NO_TERM  0x40000000
1248
1249 bool
1250 Perl_lex_next_chunk(pTHX_ U32 flags)
1251 {
1252     SV *linestr;
1253     char *buf;
1254     STRLEN old_bufend_pos, new_bufend_pos;
1255     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1256     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1257     bool got_some_for_debugger = 0;
1258     bool got_some;
1259     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1260         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1261     linestr = PL_parser->linestr;
1262     buf = SvPVX(linestr);
1263     if (!(flags & LEX_KEEP_PREVIOUS) &&
1264             PL_parser->bufptr == PL_parser->bufend) {
1265         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1266         linestart_pos = 0;
1267         if (PL_parser->last_uni != PL_parser->bufend)
1268             PL_parser->last_uni = NULL;
1269         if (PL_parser->last_lop != PL_parser->bufend)
1270             PL_parser->last_lop = NULL;
1271         last_uni_pos = last_lop_pos = 0;
1272         *buf = 0;
1273         SvCUR(linestr) = 0;
1274     } else {
1275         old_bufend_pos = PL_parser->bufend - buf;
1276         bufptr_pos = PL_parser->bufptr - buf;
1277         oldbufptr_pos = PL_parser->oldbufptr - buf;
1278         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1279         linestart_pos = PL_parser->linestart - buf;
1280         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1281         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1282     }
1283     if (flags & LEX_FAKE_EOF) {
1284         goto eof;
1285     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1286         got_some = 0;
1287     } else if (filter_gets(linestr, old_bufend_pos)) {
1288         got_some = 1;
1289         got_some_for_debugger = 1;
1290     } else if (flags & LEX_NO_TERM) {
1291         got_some = 0;
1292     } else {
1293         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1294             sv_setpvs(linestr, "");
1295         eof:
1296         /* End of real input.  Close filehandle (unless it was STDIN),
1297          * then add implicit termination.
1298          */
1299         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1300             PerlIO_clearerr(PL_parser->rsfp);
1301         else if (PL_parser->rsfp)
1302             (void)PerlIO_close(PL_parser->rsfp);
1303         PL_parser->rsfp = NULL;
1304         PL_parser->in_pod = PL_parser->filtered = 0;
1305         if (!PL_in_eval && PL_minus_p) {
1306             sv_catpvs(linestr,
1307                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1308             PL_minus_n = PL_minus_p = 0;
1309         } else if (!PL_in_eval && PL_minus_n) {
1310             sv_catpvs(linestr, /*{*/";}");
1311             PL_minus_n = 0;
1312         } else
1313             sv_catpvs(linestr, ";");
1314         got_some = 1;
1315     }
1316     buf = SvPVX(linestr);
1317     new_bufend_pos = SvCUR(linestr);
1318     PL_parser->bufend = buf + new_bufend_pos;
1319     PL_parser->bufptr = buf + bufptr_pos;
1320     PL_parser->oldbufptr = buf + oldbufptr_pos;
1321     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1322     PL_parser->linestart = buf + linestart_pos;
1323     if (PL_parser->last_uni)
1324         PL_parser->last_uni = buf + last_uni_pos;
1325     if (PL_parser->last_lop)
1326         PL_parser->last_lop = buf + last_lop_pos;
1327     if (PL_parser->preambling != NOLINE) {
1328         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1329         PL_parser->preambling = NOLINE;
1330     }
1331     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1332             PL_curstash != PL_debstash) {
1333         /* debugger active and we're not compiling the debugger code,
1334          * so store the line into the debugger's array of lines
1335          */
1336         update_debugger_info(NULL, buf+old_bufend_pos,
1337             new_bufend_pos-old_bufend_pos);
1338     }
1339     return got_some;
1340 }
1341
1342 /*
1343 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1344
1345 Looks ahead one (Unicode) character in the text currently being lexed.
1346 Returns the codepoint (unsigned integer value) of the next character,
1347 or -1 if lexing has reached the end of the input text.  To consume the
1348 peeked character, use L</lex_read_unichar>.
1349
1350 If the next character is in (or extends into) the next chunk of input
1351 text, the next chunk will be read in.  Normally the current chunk will be
1352 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1353 then the current chunk will not be discarded.
1354
1355 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1356 is encountered, an exception is generated.
1357
1358 =cut
1359 */
1360
1361 I32
1362 Perl_lex_peek_unichar(pTHX_ U32 flags)
1363 {
1364     dVAR;
1365     char *s, *bufend;
1366     if (flags & ~(LEX_KEEP_PREVIOUS))
1367         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1368     s = PL_parser->bufptr;
1369     bufend = PL_parser->bufend;
1370     if (UTF) {
1371         U8 head;
1372         I32 unichar;
1373         STRLEN len, retlen;
1374         if (s == bufend) {
1375             if (!lex_next_chunk(flags))
1376                 return -1;
1377             s = PL_parser->bufptr;
1378             bufend = PL_parser->bufend;
1379         }
1380         head = (U8)*s;
1381         if (UTF8_IS_INVARIANT(head))
1382             return head;
1383         if (UTF8_IS_START(head)) {
1384             len = UTF8SKIP(&head);
1385             while ((STRLEN)(bufend-s) < len) {
1386                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1387                     break;
1388                 s = PL_parser->bufptr;
1389                 bufend = PL_parser->bufend;
1390             }
1391         }
1392         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1393         if (retlen == (STRLEN)-1) {
1394             /* malformed UTF-8 */
1395             ENTER;
1396             SAVESPTR(PL_warnhook);
1397             PL_warnhook = PERL_WARNHOOK_FATAL;
1398             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1399             LEAVE;
1400         }
1401         return unichar;
1402     } else {
1403         if (s == bufend) {
1404             if (!lex_next_chunk(flags))
1405                 return -1;
1406             s = PL_parser->bufptr;
1407         }
1408         return (U8)*s;
1409     }
1410 }
1411
1412 /*
1413 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1414
1415 Reads the next (Unicode) character in the text currently being lexed.
1416 Returns the codepoint (unsigned integer value) of the character read,
1417 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1418 if lexing has reached the end of the input text.  To non-destructively
1419 examine the next character, use L</lex_peek_unichar> instead.
1420
1421 If the next character is in (or extends into) the next chunk of input
1422 text, the next chunk will be read in.  Normally the current chunk will be
1423 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1424 then the current chunk will not be discarded.
1425
1426 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1427 is encountered, an exception is generated.
1428
1429 =cut
1430 */
1431
1432 I32
1433 Perl_lex_read_unichar(pTHX_ U32 flags)
1434 {
1435     I32 c;
1436     if (flags & ~(LEX_KEEP_PREVIOUS))
1437         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1438     c = lex_peek_unichar(flags);
1439     if (c != -1) {
1440         if (c == '\n')
1441             COPLINE_INC_WITH_HERELINES;
1442         if (UTF)
1443             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1444         else
1445             ++(PL_parser->bufptr);
1446     }
1447     return c;
1448 }
1449
1450 /*
1451 =for apidoc Amx|void|lex_read_space|U32 flags
1452
1453 Reads optional spaces, in Perl style, in the text currently being
1454 lexed.  The spaces may include ordinary whitespace characters and
1455 Perl-style comments.  C<#line> directives are processed if encountered.
1456 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1457 at a non-space character (or the end of the input text).
1458
1459 If spaces extend into the next chunk of input text, the next chunk will
1460 be read in.  Normally the current chunk will be discarded at the same
1461 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1462 chunk will not be discarded.
1463
1464 =cut
1465 */
1466
1467 #define LEX_NO_INCLINE    0x40000000
1468 #define LEX_NO_NEXT_CHUNK 0x80000000
1469
1470 void
1471 Perl_lex_read_space(pTHX_ U32 flags)
1472 {
1473     char *s, *bufend;
1474     const bool can_incline = !(flags & LEX_NO_INCLINE);
1475     bool need_incline = 0;
1476     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1477         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1478     s = PL_parser->bufptr;
1479     bufend = PL_parser->bufend;
1480     while (1) {
1481         char c = *s;
1482         if (c == '#') {
1483             do {
1484                 c = *++s;
1485             } while (!(c == '\n' || (c == 0 && s == bufend)));
1486         } else if (c == '\n') {
1487             s++;
1488             if (can_incline) {
1489                 PL_parser->linestart = s;
1490                 if (s == bufend)
1491                     need_incline = 1;
1492                 else
1493                     incline(s);
1494             }
1495         } else if (isSPACE(c)) {
1496             s++;
1497         } else if (c == 0 && s == bufend) {
1498             bool got_more;
1499             line_t l;
1500             if (flags & LEX_NO_NEXT_CHUNK)
1501                 break;
1502             PL_parser->bufptr = s;
1503             l = CopLINE(PL_curcop);
1504             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1505             got_more = lex_next_chunk(flags);
1506             CopLINE_set(PL_curcop, l);
1507             s = PL_parser->bufptr;
1508             bufend = PL_parser->bufend;
1509             if (!got_more)
1510                 break;
1511             if (can_incline && need_incline && PL_parser->rsfp) {
1512                 incline(s);
1513                 need_incline = 0;
1514             }
1515         } else {
1516             break;
1517         }
1518     }
1519     PL_parser->bufptr = s;
1520 }
1521
1522 /*
1523
1524 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1525
1526 This function performs syntax checking on a prototype, C<proto>.
1527 If C<warn> is true, any illegal characters or mismatched brackets
1528 will trigger illegalproto warnings, declaring that they were
1529 detected in the prototype for C<name>.
1530
1531 The return value is C<true> if this is a valid prototype, and
1532 C<false> if it is not, regardless of whether C<warn> was C<true> or
1533 C<false>.
1534
1535 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1536
1537 =cut
1538
1539  */
1540
1541 bool
1542 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1543 {
1544     STRLEN len, origlen;
1545     char *p = proto ? SvPV(proto, len) : NULL;
1546     bool bad_proto = FALSE;
1547     bool in_brackets = FALSE;
1548     bool after_slash = FALSE;
1549     char greedy_proto = ' ';
1550     bool proto_after_greedy_proto = FALSE;
1551     bool must_be_last = FALSE;
1552     bool underscore = FALSE;
1553     bool bad_proto_after_underscore = FALSE;
1554
1555     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1556
1557     if (!proto)
1558         return TRUE;
1559
1560     origlen = len;
1561     for (; len--; p++) {
1562         if (!isSPACE(*p)) {
1563             if (must_be_last)
1564                 proto_after_greedy_proto = TRUE;
1565             if (underscore) {
1566                 if (!strchr(";@%", *p))
1567                     bad_proto_after_underscore = TRUE;
1568                 underscore = FALSE;
1569             }
1570             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1571                 bad_proto = TRUE;
1572             }
1573             else {
1574                 if (*p == '[')
1575                     in_brackets = TRUE;
1576                 else if (*p == ']')
1577                     in_brackets = FALSE;
1578                 else if ((*p == '@' || *p == '%') &&
1579                     !after_slash &&
1580                     !in_brackets ) {
1581                     must_be_last = TRUE;
1582                     greedy_proto = *p;
1583                 }
1584                 else if (*p == '_')
1585                     underscore = TRUE;
1586             }
1587             if (*p == '\\')
1588                 after_slash = TRUE;
1589             else
1590                 after_slash = FALSE;
1591         }
1592     }
1593
1594     if (warn) {
1595         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1596         p -= origlen;
1597         p = SvUTF8(proto)
1598             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1599                              origlen, UNI_DISPLAY_ISPRINT)
1600             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1601
1602         if (proto_after_greedy_proto)
1603             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1604                         "Prototype after '%c' for %"SVf" : %s",
1605                         greedy_proto, SVfARG(name), p);
1606         if (in_brackets)
1607             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1608                         "Missing ']' in prototype for %"SVf" : %s",
1609                         SVfARG(name), p);
1610         if (bad_proto)
1611             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1612                         "Illegal character in prototype for %"SVf" : %s",
1613                         SVfARG(name), p);
1614         if (bad_proto_after_underscore)
1615             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1616                         "Illegal character after '_' in prototype for %"SVf" : %s",
1617                         SVfARG(name), p);
1618     }
1619
1620     return (! (proto_after_greedy_proto || bad_proto) );
1621 }
1622
1623 /*
1624  * S_incline
1625  * This subroutine has nothing to do with tilting, whether at windmills
1626  * or pinball tables.  Its name is short for "increment line".  It
1627  * increments the current line number in CopLINE(PL_curcop) and checks
1628  * to see whether the line starts with a comment of the form
1629  *    # line 500 "foo.pm"
1630  * If so, it sets the current line number and file to the values in the comment.
1631  */
1632
1633 STATIC void
1634 S_incline(pTHX_ const char *s)
1635 {
1636     const char *t;
1637     const char *n;
1638     const char *e;
1639     line_t line_num;
1640
1641     PERL_ARGS_ASSERT_INCLINE;
1642
1643     COPLINE_INC_WITH_HERELINES;
1644     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1645      && s+1 == PL_bufend && *s == ';') {
1646         /* fake newline in string eval */
1647         CopLINE_dec(PL_curcop);
1648         return;
1649     }
1650     if (*s++ != '#')
1651         return;
1652     while (SPACE_OR_TAB(*s))
1653         s++;
1654     if (strnEQ(s, "line", 4))
1655         s += 4;
1656     else
1657         return;
1658     if (SPACE_OR_TAB(*s))
1659         s++;
1660     else
1661         return;
1662     while (SPACE_OR_TAB(*s))
1663         s++;
1664     if (!isDIGIT(*s))
1665         return;
1666
1667     n = s;
1668     while (isDIGIT(*s))
1669         s++;
1670     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1671         return;
1672     while (SPACE_OR_TAB(*s))
1673         s++;
1674     if (*s == '"' && (t = strchr(s+1, '"'))) {
1675         s++;
1676         e = t + 1;
1677     }
1678     else {
1679         t = s;
1680         while (!isSPACE(*t))
1681             t++;
1682         e = t;
1683     }
1684     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1685         e++;
1686     if (*e != '\n' && *e != '\0')
1687         return;         /* false alarm */
1688
1689     line_num = atoi(n)-1;
1690
1691     if (t - s > 0) {
1692         const STRLEN len = t - s;
1693
1694         if (!PL_rsfp && !PL_parser->filtered) {
1695             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1696              * to *{"::_<newfilename"} */
1697             /* However, the long form of evals is only turned on by the
1698                debugger - usually they're "(eval %lu)" */
1699             GV * const cfgv = CopFILEGV(PL_curcop);
1700             if (cfgv) {
1701                 char smallbuf[128];
1702                 STRLEN tmplen2 = len;
1703                 char *tmpbuf2;
1704                 GV *gv2;
1705
1706                 if (tmplen2 + 2 <= sizeof smallbuf)
1707                     tmpbuf2 = smallbuf;
1708                 else
1709                     Newx(tmpbuf2, tmplen2 + 2, char);
1710
1711                 tmpbuf2[0] = '_';
1712                 tmpbuf2[1] = '<';
1713
1714                 memcpy(tmpbuf2 + 2, s, tmplen2);
1715                 tmplen2 += 2;
1716
1717                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1718                 if (!isGV(gv2)) {
1719                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1720                     /* adjust ${"::_<newfilename"} to store the new file name */
1721                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1722                     /* The line number may differ. If that is the case,
1723                        alias the saved lines that are in the array.
1724                        Otherwise alias the whole array. */
1725                     if (CopLINE(PL_curcop) == line_num) {
1726                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1727                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1728                     }
1729                     else if (GvAV(cfgv)) {
1730                         AV * const av = GvAV(cfgv);
1731                         const I32 start = CopLINE(PL_curcop)+1;
1732                         I32 items = AvFILLp(av) - start;
1733                         if (items > 0) {
1734                             AV * const av2 = GvAVn(gv2);
1735                             SV **svp = AvARRAY(av) + start;
1736                             I32 l = (I32)line_num+1;
1737                             while (items--)
1738                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1739                         }
1740                     }
1741                 }
1742
1743                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1744             }
1745         }
1746         CopFILE_free(PL_curcop);
1747         CopFILE_setn(PL_curcop, s, len);
1748     }
1749     CopLINE_set(PL_curcop, line_num);
1750 }
1751
1752 #define skipspace(s) skipspace_flags(s, 0)
1753
1754
1755 STATIC void
1756 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1757 {
1758     AV *av = CopFILEAVx(PL_curcop);
1759     if (av) {
1760         SV * sv;
1761         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1762         else {
1763             sv = *av_fetch(av, 0, 1);
1764             SvUPGRADE(sv, SVt_PVMG);
1765         }
1766         if (!SvPOK(sv)) sv_setpvs(sv,"");
1767         if (orig_sv)
1768             sv_catsv(sv, orig_sv);
1769         else
1770             sv_catpvn(sv, buf, len);
1771         if (!SvIOK(sv)) {
1772             (void)SvIOK_on(sv);
1773             SvIV_set(sv, 0);
1774         }
1775         if (PL_parser->preambling == NOLINE)
1776             av_store(av, CopLINE(PL_curcop), sv);
1777     }
1778 }
1779
1780 /*
1781  * S_skipspace
1782  * Called to gobble the appropriate amount and type of whitespace.
1783  * Skips comments as well.
1784  */
1785
1786 STATIC char *
1787 S_skipspace_flags(pTHX_ char *s, U32 flags)
1788 {
1789     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1790     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1791         while (s < PL_bufend && SPACE_OR_TAB(*s))
1792             s++;
1793     } else {
1794         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1795         PL_bufptr = s;
1796         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1797                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1798                     LEX_NO_NEXT_CHUNK : 0));
1799         s = PL_bufptr;
1800         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1801         if (PL_linestart > PL_bufptr)
1802             PL_bufptr = PL_linestart;
1803         return s;
1804     }
1805     return s;
1806 }
1807
1808 /*
1809  * S_check_uni
1810  * Check the unary operators to ensure there's no ambiguity in how they're
1811  * used.  An ambiguous piece of code would be:
1812  *     rand + 5
1813  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1814  * the +5 is its argument.
1815  */
1816
1817 STATIC void
1818 S_check_uni(pTHX)
1819 {
1820     const char *s;
1821     const char *t;
1822
1823     if (PL_oldoldbufptr != PL_last_uni)
1824         return;
1825     while (isSPACE(*PL_last_uni))
1826         PL_last_uni++;
1827     s = PL_last_uni;
1828     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1829         s++;
1830     if ((t = strchr(s, '(')) && t < PL_bufptr)
1831         return;
1832
1833     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1834                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1835                      (int)(s - PL_last_uni), PL_last_uni);
1836 }
1837
1838 /*
1839  * LOP : macro to build a list operator.  Its behaviour has been replaced
1840  * with a subroutine, S_lop() for which LOP is just another name.
1841  */
1842
1843 #define LOP(f,x) return lop(f,x,s)
1844
1845 /*
1846  * S_lop
1847  * Build a list operator (or something that might be one).  The rules:
1848  *  - if we have a next token, then it's a list operator [why?]
1849  *  - if the next thing is an opening paren, then it's a function
1850  *  - else it's a list operator
1851  */
1852
1853 STATIC I32
1854 S_lop(pTHX_ I32 f, int x, char *s)
1855 {
1856     PERL_ARGS_ASSERT_LOP;
1857
1858     pl_yylval.ival = f;
1859     CLINE;
1860     PL_expect = x;
1861     PL_bufptr = s;
1862     PL_last_lop = PL_oldbufptr;
1863     PL_last_lop_op = (OPCODE)f;
1864     if (PL_nexttoke)
1865         goto lstop;
1866     if (*s == '(')
1867         return REPORT(FUNC);
1868     s = PEEKSPACE(s);
1869     if (*s == '(')
1870         return REPORT(FUNC);
1871     else {
1872         lstop:
1873         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1874             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1875         return REPORT(LSTOP);
1876     }
1877 }
1878
1879 /*
1880  * S_force_next
1881  * When the lexer realizes it knows the next token (for instance,
1882  * it is reordering tokens for the parser) then it can call S_force_next
1883  * to know what token to return the next time the lexer is called.  Caller
1884  * will need to set PL_nextval[] and possibly PL_expect to ensure
1885  * the lexer handles the token correctly.
1886  */
1887
1888 STATIC void
1889 S_force_next(pTHX_ I32 type)
1890 {
1891 #ifdef DEBUGGING
1892     if (DEBUG_T_TEST) {
1893         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1894         tokereport(type, &NEXTVAL_NEXTTOKE);
1895     }
1896 #endif
1897     PL_nexttype[PL_nexttoke] = type;
1898     PL_nexttoke++;
1899     if (PL_lex_state != LEX_KNOWNEXT) {
1900         PL_lex_defer = PL_lex_state;
1901         PL_lex_expect = PL_expect;
1902         PL_lex_state = LEX_KNOWNEXT;
1903     }
1904 }
1905
1906 /*
1907  * S_postderef
1908  *
1909  * This subroutine handles postfix deref syntax after the arrow has already
1910  * been emitted.  @* $* etc. are emitted as two separate token right here.
1911  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1912  * only the first, leaving yylex to find the next.
1913  */
1914
1915 static int
1916 S_postderef(pTHX_ int const funny, char const next)
1917 {
1918     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1919     assert(strchr("*[{", next));
1920     if (next == '*') {
1921         PL_expect = XOPERATOR;
1922         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1923             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1924             PL_lex_state = LEX_INTERPEND;
1925             force_next(POSTJOIN);
1926         }
1927         force_next(next);
1928         PL_bufptr+=2;
1929     }
1930     else {
1931         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1932          && !PL_lex_brackets)
1933             PL_lex_dojoin = 2;
1934         PL_expect = XOPERATOR;
1935         PL_bufptr++;
1936     }
1937     return funny;
1938 }
1939
1940 void
1941 Perl_yyunlex(pTHX)
1942 {
1943     int yyc = PL_parser->yychar;
1944     if (yyc != YYEMPTY) {
1945         if (yyc) {
1946             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1947             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1948                 PL_lex_allbrackets--;
1949                 PL_lex_brackets--;
1950                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1951             } else if (yyc == '('/*)*/) {
1952                 PL_lex_allbrackets--;
1953                 yyc |= (2<<24);
1954             }
1955             force_next(yyc);
1956         }
1957         PL_parser->yychar = YYEMPTY;
1958     }
1959 }
1960
1961 STATIC SV *
1962 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1963 {
1964     SV * const sv = newSVpvn_utf8(start, len,
1965                                   !IN_BYTES
1966                                   && UTF
1967                                   && !is_ascii_string((const U8*)start, len)
1968                                   && is_utf8_string((const U8*)start, len));
1969     return sv;
1970 }
1971
1972 /*
1973  * S_force_word
1974  * When the lexer knows the next thing is a word (for instance, it has
1975  * just seen -> and it knows that the next char is a word char, then
1976  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1977  * lookahead.
1978  *
1979  * Arguments:
1980  *   char *start : buffer position (must be within PL_linestr)
1981  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1982  *   int check_keyword : if true, Perl checks to make sure the word isn't
1983  *       a keyword (do this if the word is a label, e.g. goto FOO)
1984  *   int allow_pack : if true, : characters will also be allowed (require,
1985  *       use, etc. do this)
1986  *   int allow_initial_tick : used by the "sub" lexer only.
1987  */
1988
1989 STATIC char *
1990 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
1991 {
1992     char *s;
1993     STRLEN len;
1994
1995     PERL_ARGS_ASSERT_FORCE_WORD;
1996
1997     start = SKIPSPACE1(start);
1998     s = start;
1999     if (isIDFIRST_lazy_if(s,UTF) ||
2000         (allow_pack && *s == ':') )
2001     {
2002         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2003         if (check_keyword) {
2004           char *s2 = PL_tokenbuf;
2005           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2006             s2 += 6, len -= 6;
2007           if (keyword(s2, len, 0))
2008             return start;
2009         }
2010         if (token == METHOD) {
2011             s = SKIPSPACE1(s);
2012             if (*s == '(')
2013                 PL_expect = XTERM;
2014             else {
2015                 PL_expect = XOPERATOR;
2016             }
2017         }
2018         NEXTVAL_NEXTTOKE.opval
2019             = (OP*)newSVOP(OP_CONST,0,
2020                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2021         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2022         force_next(token);
2023     }
2024     return s;
2025 }
2026
2027 /*
2028  * S_force_ident
2029  * Called when the lexer wants $foo *foo &foo etc, but the program
2030  * text only contains the "foo" portion.  The first argument is a pointer
2031  * to the "foo", and the second argument is the type symbol to prefix.
2032  * Forces the next token to be a "WORD".
2033  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2034  */
2035
2036 STATIC void
2037 S_force_ident(pTHX_ const char *s, int kind)
2038 {
2039     PERL_ARGS_ASSERT_FORCE_IDENT;
2040
2041     if (s[0]) {
2042         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2043         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2044                                                                 UTF ? SVf_UTF8 : 0));
2045         NEXTVAL_NEXTTOKE.opval = o;
2046         force_next(WORD);
2047         if (kind) {
2048             o->op_private = OPpCONST_ENTERED;
2049             /* XXX see note in pp_entereval() for why we forgo typo
2050                warnings if the symbol must be introduced in an eval.
2051                GSAR 96-10-12 */
2052             gv_fetchpvn_flags(s, len,
2053                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2054                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2055                               kind == '$' ? SVt_PV :
2056                               kind == '@' ? SVt_PVAV :
2057                               kind == '%' ? SVt_PVHV :
2058                               SVt_PVGV
2059                               );
2060         }
2061     }
2062 }
2063
2064 static void
2065 S_force_ident_maybe_lex(pTHX_ char pit)
2066 {
2067     NEXTVAL_NEXTTOKE.ival = pit;
2068     force_next('p');
2069 }
2070
2071 NV
2072 Perl_str_to_version(pTHX_ SV *sv)
2073 {
2074     NV retval = 0.0;
2075     NV nshift = 1.0;
2076     STRLEN len;
2077     const char *start = SvPV_const(sv,len);
2078     const char * const end = start + len;
2079     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2080
2081     PERL_ARGS_ASSERT_STR_TO_VERSION;
2082
2083     while (start < end) {
2084         STRLEN skip;
2085         UV n;
2086         if (utf)
2087             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2088         else {
2089             n = *(U8*)start;
2090             skip = 1;
2091         }
2092         retval += ((NV)n)/nshift;
2093         start += skip;
2094         nshift *= 1000;
2095     }
2096     return retval;
2097 }
2098
2099 /*
2100  * S_force_version
2101  * Forces the next token to be a version number.
2102  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2103  * and if "guessing" is TRUE, then no new token is created (and the caller
2104  * must use an alternative parsing method).
2105  */
2106
2107 STATIC char *
2108 S_force_version(pTHX_ char *s, int guessing)
2109 {
2110     OP *version = NULL;
2111     char *d;
2112
2113     PERL_ARGS_ASSERT_FORCE_VERSION;
2114
2115     s = SKIPSPACE1(s);
2116
2117     d = s;
2118     if (*d == 'v')
2119         d++;
2120     if (isDIGIT(*d)) {
2121         while (isDIGIT(*d) || *d == '_' || *d == '.')
2122             d++;
2123         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2124             SV *ver;
2125             s = scan_num(s, &pl_yylval);
2126             version = pl_yylval.opval;
2127             ver = cSVOPx(version)->op_sv;
2128             if (SvPOK(ver) && !SvNIOK(ver)) {
2129                 SvUPGRADE(ver, SVt_PVNV);
2130                 SvNV_set(ver, str_to_version(ver));
2131                 SvNOK_on(ver);          /* hint that it is a version */
2132             }
2133         }
2134         else if (guessing) {
2135             return s;
2136         }
2137     }
2138
2139     /* NOTE: The parser sees the package name and the VERSION swapped */
2140     NEXTVAL_NEXTTOKE.opval = version;
2141     force_next(WORD);
2142
2143     return s;
2144 }
2145
2146 /*
2147  * S_force_strict_version
2148  * Forces the next token to be a version number using strict syntax rules.
2149  */
2150
2151 STATIC char *
2152 S_force_strict_version(pTHX_ char *s)
2153 {
2154     OP *version = NULL;
2155     const char *errstr = NULL;
2156
2157     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2158
2159     while (isSPACE(*s)) /* leading whitespace */
2160         s++;
2161
2162     if (is_STRICT_VERSION(s,&errstr)) {
2163         SV *ver = newSV(0);
2164         s = (char *)scan_version(s, ver, 0);
2165         version = newSVOP(OP_CONST, 0, ver);
2166     }
2167     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2168             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2169     {
2170         PL_bufptr = s;
2171         if (errstr)
2172             yyerror(errstr); /* version required */
2173         return s;
2174     }
2175
2176     /* NOTE: The parser sees the package name and the VERSION swapped */
2177     NEXTVAL_NEXTTOKE.opval = version;
2178     force_next(WORD);
2179
2180     return s;
2181 }
2182
2183 /*
2184  * S_tokeq
2185  * Tokenize a quoted string passed in as an SV.  It finds the next
2186  * chunk, up to end of string or a backslash.  It may make a new
2187  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2188  * turns \\ into \.
2189  */
2190
2191 STATIC SV *
2192 S_tokeq(pTHX_ SV *sv)
2193 {
2194     char *s;
2195     char *send;
2196     char *d;
2197     SV *pv = sv;
2198
2199     PERL_ARGS_ASSERT_TOKEQ;
2200
2201     assert (SvPOK(sv));
2202     assert (SvLEN(sv));
2203     assert (!SvIsCOW(sv));
2204     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2205         goto finish;
2206     s = SvPVX(sv);
2207     send = SvEND(sv);
2208     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2209     while (s < send && !(*s == '\\' && s[1] == '\\'))
2210         s++;
2211     if (s == send)
2212         goto finish;
2213     d = s;
2214     if ( PL_hints & HINT_NEW_STRING ) {
2215         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2216                             SVs_TEMP | SvUTF8(sv));
2217     }
2218     while (s < send) {
2219         if (*s == '\\') {
2220             if (s + 1 < send && (s[1] == '\\'))
2221                 s++;            /* all that, just for this */
2222         }
2223         *d++ = *s++;
2224     }
2225     *d = '\0';
2226     SvCUR_set(sv, d - SvPVX_const(sv));
2227   finish:
2228     if ( PL_hints & HINT_NEW_STRING )
2229        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2230     return sv;
2231 }
2232
2233 /*
2234  * Now come three functions related to double-quote context,
2235  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2236  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2237  * interact with PL_lex_state, and create fake ( ... ) argument lists
2238  * to handle functions and concatenation.
2239  * For example,
2240  *   "foo\lbar"
2241  * is tokenised as
2242  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2243  */
2244
2245 /*
2246  * S_sublex_start
2247  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2248  *
2249  * Pattern matching will set PL_lex_op to the pattern-matching op to
2250  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2251  *
2252  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2253  *
2254  * Everything else becomes a FUNC.
2255  *
2256  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2257  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2258  * call to S_sublex_push().
2259  */
2260
2261 STATIC I32
2262 S_sublex_start(pTHX)
2263 {
2264     const I32 op_type = pl_yylval.ival;
2265
2266     if (op_type == OP_NULL) {
2267         pl_yylval.opval = PL_lex_op;
2268         PL_lex_op = NULL;
2269         return THING;
2270     }
2271     if (op_type == OP_CONST) {
2272         SV *sv = tokeq(PL_lex_stuff);
2273
2274         if (SvTYPE(sv) == SVt_PVIV) {
2275             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2276             STRLEN len;
2277             const char * const p = SvPV_const(sv, len);
2278             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2279             SvREFCNT_dec(sv);
2280             sv = nsv;
2281         }
2282         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2283         PL_lex_stuff = NULL;
2284         return THING;
2285     }
2286
2287     PL_sublex_info.super_state = PL_lex_state;
2288     PL_sublex_info.sub_inwhat = (U16)op_type;
2289     PL_sublex_info.sub_op = PL_lex_op;
2290     PL_lex_state = LEX_INTERPPUSH;
2291
2292     PL_expect = XTERM;
2293     if (PL_lex_op) {
2294         pl_yylval.opval = PL_lex_op;
2295         PL_lex_op = NULL;
2296         return PMFUNC;
2297     }
2298     else
2299         return FUNC;
2300 }
2301
2302 /*
2303  * S_sublex_push
2304  * Create a new scope to save the lexing state.  The scope will be
2305  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2306  * to the uc, lc, etc. found before.
2307  * Sets PL_lex_state to LEX_INTERPCONCAT.
2308  */
2309
2310 STATIC I32
2311 S_sublex_push(pTHX)
2312 {
2313     LEXSHARED *shared;
2314     const bool is_heredoc = PL_multi_close == '<';
2315     ENTER;
2316
2317     PL_lex_state = PL_sublex_info.super_state;
2318     SAVEI8(PL_lex_dojoin);
2319     SAVEI32(PL_lex_brackets);
2320     SAVEI32(PL_lex_allbrackets);
2321     SAVEI32(PL_lex_formbrack);
2322     SAVEI8(PL_lex_fakeeof);
2323     SAVEI32(PL_lex_casemods);
2324     SAVEI32(PL_lex_starts);
2325     SAVEI8(PL_lex_state);
2326     SAVESPTR(PL_lex_repl);
2327     SAVEVPTR(PL_lex_inpat);
2328     SAVEI16(PL_lex_inwhat);
2329     if (is_heredoc)
2330     {
2331         SAVECOPLINE(PL_curcop);
2332         SAVEI32(PL_multi_end);
2333         SAVEI32(PL_parser->herelines);
2334         PL_parser->herelines = 0;
2335     }
2336     SAVEI8(PL_multi_close);
2337     SAVEPPTR(PL_bufptr);
2338     SAVEPPTR(PL_bufend);
2339     SAVEPPTR(PL_oldbufptr);
2340     SAVEPPTR(PL_oldoldbufptr);
2341     SAVEPPTR(PL_last_lop);
2342     SAVEPPTR(PL_last_uni);
2343     SAVEPPTR(PL_linestart);
2344     SAVESPTR(PL_linestr);
2345     SAVEGENERICPV(PL_lex_brackstack);
2346     SAVEGENERICPV(PL_lex_casestack);
2347     SAVEGENERICPV(PL_parser->lex_shared);
2348     SAVEBOOL(PL_parser->lex_re_reparsing);
2349     SAVEI32(PL_copline);
2350
2351     /* The here-doc parser needs to be able to peek into outer lexing
2352        scopes to find the body of the here-doc.  So we put PL_linestr and
2353        PL_bufptr into lex_shared, to ‘share’ those values.
2354      */
2355     PL_parser->lex_shared->ls_linestr = PL_linestr;
2356     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2357
2358     PL_linestr = PL_lex_stuff;
2359     PL_lex_repl = PL_sublex_info.repl;
2360     PL_lex_stuff = NULL;
2361     PL_sublex_info.repl = NULL;
2362
2363     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2364         = SvPVX(PL_linestr);
2365     PL_bufend += SvCUR(PL_linestr);
2366     PL_last_lop = PL_last_uni = NULL;
2367     SAVEFREESV(PL_linestr);
2368     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2369
2370     PL_lex_dojoin = FALSE;
2371     PL_lex_brackets = PL_lex_formbrack = 0;
2372     PL_lex_allbrackets = 0;
2373     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2374     Newx(PL_lex_brackstack, 120, char);
2375     Newx(PL_lex_casestack, 12, char);
2376     PL_lex_casemods = 0;
2377     *PL_lex_casestack = '\0';
2378     PL_lex_starts = 0;
2379     PL_lex_state = LEX_INTERPCONCAT;
2380     if (is_heredoc)
2381         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2382     PL_copline = NOLINE;
2383
2384     Newxz(shared, 1, LEXSHARED);
2385     shared->ls_prev = PL_parser->lex_shared;
2386     PL_parser->lex_shared = shared;
2387
2388     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2389     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2390     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2391         PL_lex_inpat = PL_sublex_info.sub_op;
2392     else
2393         PL_lex_inpat = NULL;
2394
2395     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2396     PL_in_eval &= ~EVAL_RE_REPARSING;
2397
2398     return '(';
2399 }
2400
2401 /*
2402  * S_sublex_done
2403  * Restores lexer state after a S_sublex_push.
2404  */
2405
2406 STATIC I32
2407 S_sublex_done(pTHX)
2408 {
2409     if (!PL_lex_starts++) {
2410         SV * const sv = newSVpvs("");
2411         if (SvUTF8(PL_linestr))
2412             SvUTF8_on(sv);
2413         PL_expect = XOPERATOR;
2414         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2415         return THING;
2416     }
2417
2418     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2419         PL_lex_state = LEX_INTERPCASEMOD;
2420         return yylex();
2421     }
2422
2423     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2424     assert(PL_lex_inwhat != OP_TRANSR);
2425     if (PL_lex_repl) {
2426         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2427         PL_linestr = PL_lex_repl;
2428         PL_lex_inpat = 0;
2429         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2430         PL_bufend += SvCUR(PL_linestr);
2431         PL_last_lop = PL_last_uni = NULL;
2432         PL_lex_dojoin = FALSE;
2433         PL_lex_brackets = 0;
2434         PL_lex_allbrackets = 0;
2435         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2436         PL_lex_casemods = 0;
2437         *PL_lex_casestack = '\0';
2438         PL_lex_starts = 0;
2439         if (SvEVALED(PL_lex_repl)) {
2440             PL_lex_state = LEX_INTERPNORMAL;
2441             PL_lex_starts++;
2442             /*  we don't clear PL_lex_repl here, so that we can check later
2443                 whether this is an evalled subst; that means we rely on the
2444                 logic to ensure sublex_done() is called again only via the
2445                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2446         }
2447         else {
2448             PL_lex_state = LEX_INTERPCONCAT;
2449             PL_lex_repl = NULL;
2450         }
2451         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2452             CopLINE(PL_curcop) +=
2453                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2454                  + PL_parser->herelines;
2455             PL_parser->herelines = 0;
2456         }
2457         return ',';
2458     }
2459     else {
2460         const line_t l = CopLINE(PL_curcop);
2461         LEAVE;
2462         if (PL_multi_close == '<')
2463             PL_parser->herelines += l - PL_multi_end;
2464         PL_bufend = SvPVX(PL_linestr);
2465         PL_bufend += SvCUR(PL_linestr);
2466         PL_expect = XOPERATOR;
2467         PL_sublex_info.sub_inwhat = 0;
2468         return ')';
2469     }
2470 }
2471
2472 PERL_STATIC_INLINE SV*
2473 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2474 {
2475     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2476      * interior, hence to the "}".  Finds what the name resolves to, returning
2477      * an SV* containing it; NULL if no valid one found */
2478
2479     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2480
2481     HV * table;
2482     SV **cvp;
2483     SV *cv;
2484     SV *rv;
2485     HV *stash;
2486     const U8* first_bad_char_loc;
2487     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2488
2489     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2490
2491     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2492                                      e - backslash_ptr,
2493                                      &first_bad_char_loc))
2494     {
2495         /* If warnings are on, this will print a more detailed analysis of what
2496          * is wrong than the error message below */
2497         utf8n_to_uvchr(first_bad_char_loc,
2498                        e - ((char *) first_bad_char_loc),
2499                        NULL, 0);
2500
2501         /* We deliberately don't try to print the malformed character, which
2502          * might not print very well; it also may be just the first of many
2503          * malformations, so don't print what comes after it */
2504         yyerror(Perl_form(aTHX_
2505             "Malformed UTF-8 character immediately after '%.*s'",
2506             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2507         return NULL;
2508     }
2509
2510     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2511                         /* include the <}> */
2512                         e - backslash_ptr + 1);
2513     if (! SvPOK(res)) {
2514         SvREFCNT_dec_NN(res);
2515         return NULL;
2516     }
2517
2518     /* See if the charnames handler is the Perl core's, and if so, we can skip
2519      * the validation needed for a user-supplied one, as Perl's does its own
2520      * validation. */
2521     table = GvHV(PL_hintgv);             /* ^H */
2522     cvp = hv_fetchs(table, "charnames", FALSE);
2523     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2524         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2525     {
2526         const char * const name = HvNAME(stash);
2527         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2528          && strEQ(name, "_charnames")) {
2529            return res;
2530        }
2531     }
2532
2533     /* Here, it isn't Perl's charname handler.  We can't rely on a
2534      * user-supplied handler to validate the input name.  For non-ut8 input,
2535      * look to see that the first character is legal.  Then loop through the
2536      * rest checking that each is a continuation */
2537
2538     /* This code makes the reasonable assumption that the only Latin1-range
2539      * characters that begin a character name alias are alphabetic, otherwise
2540      * would have to create a isCHARNAME_BEGIN macro */
2541
2542     if (! UTF) {
2543         if (! isALPHAU(*s)) {
2544             goto bad_charname;
2545         }
2546         s++;
2547         while (s < e) {
2548             if (! isCHARNAME_CONT(*s)) {
2549                 goto bad_charname;
2550             }
2551             if (*s == ' ' && *(s-1) == ' ') {
2552                 goto multi_spaces;
2553             }
2554             if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2555                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2556                            "NO-BREAK SPACE in a charnames "
2557                            "alias definition is deprecated");
2558             }
2559             s++;
2560         }
2561     }
2562     else {
2563         /* Similarly for utf8.  For invariants can check directly; for other
2564          * Latin1, can calculate their code point and check; otherwise  use a
2565          * swash */
2566         if (UTF8_IS_INVARIANT(*s)) {
2567             if (! isALPHAU(*s)) {
2568                 goto bad_charname;
2569             }
2570             s++;
2571         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2572             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2573                 goto bad_charname;
2574             }
2575             s += 2;
2576         }
2577         else {
2578             if (! PL_utf8_charname_begin) {
2579                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2580                 PL_utf8_charname_begin = _core_swash_init("utf8",
2581                                                         "_Perl_Charname_Begin",
2582                                                         &PL_sv_undef,
2583                                                         1, 0, NULL, &flags);
2584             }
2585             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2586                 goto bad_charname;
2587             }
2588             s += UTF8SKIP(s);
2589         }
2590
2591         while (s < e) {
2592             if (UTF8_IS_INVARIANT(*s)) {
2593                 if (! isCHARNAME_CONT(*s)) {
2594                     goto bad_charname;
2595                 }
2596                 if (*s == ' ' && *(s-1) == ' ') {
2597                     goto multi_spaces;
2598                 }
2599                 s++;
2600             }
2601             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2602                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2603                 {
2604                     goto bad_charname;
2605                 }
2606                 if (*s == *NBSP_UTF8
2607                     && *(s+1) == *(NBSP_UTF8+1)
2608                     && ckWARN_d(WARN_DEPRECATED))
2609                 {
2610                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2611                                 "NO-BREAK SPACE in a charnames "
2612                                 "alias definition is deprecated");
2613                 }
2614                 s += 2;
2615             }
2616             else {
2617                 if (! PL_utf8_charname_continue) {
2618                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2619                     PL_utf8_charname_continue = _core_swash_init("utf8",
2620                                                 "_Perl_Charname_Continue",
2621                                                 &PL_sv_undef,
2622                                                 1, 0, NULL, &flags);
2623                 }
2624                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2625                     goto bad_charname;
2626                 }
2627                 s += UTF8SKIP(s);
2628             }
2629         }
2630     }
2631     if (*(s-1) == ' ') {
2632         yyerror_pv(
2633             Perl_form(aTHX_
2634             "charnames alias definitions may not contain trailing "
2635             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2636             (int)(s - backslash_ptr + 1), backslash_ptr,
2637             (int)(e - s + 1), s + 1
2638             ),
2639         UTF ? SVf_UTF8 : 0);
2640         return NULL;
2641     }
2642
2643     if (SvUTF8(res)) { /* Don't accept malformed input */
2644         const U8* first_bad_char_loc;
2645         STRLEN len;
2646         const char* const str = SvPV_const(res, len);
2647         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2648             /* If warnings are on, this will print a more detailed analysis of
2649              * what is wrong than the error message below */
2650             utf8n_to_uvchr(first_bad_char_loc,
2651                            (char *) first_bad_char_loc - str,
2652                            NULL, 0);
2653
2654             /* We deliberately don't try to print the malformed character,
2655              * which might not print very well; it also may be just the first
2656              * of many malformations, so don't print what comes after it */
2657             yyerror_pv(
2658               Perl_form(aTHX_
2659                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2660                  (int) (e - backslash_ptr + 1), backslash_ptr,
2661                  (int) ((char *) first_bad_char_loc - str), str
2662               ),
2663               SVf_UTF8);
2664             return NULL;
2665         }
2666     }
2667
2668     return res;
2669
2670   bad_charname: {
2671
2672         /* The final %.*s makes sure that should the trailing NUL be missing
2673          * that this print won't run off the end of the string */
2674         yyerror_pv(
2675           Perl_form(aTHX_
2676             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2677             (int)(s - backslash_ptr + 1), backslash_ptr,
2678             (int)(e - s + 1), s + 1
2679           ),
2680           UTF ? SVf_UTF8 : 0);
2681         return NULL;
2682     }
2683
2684   multi_spaces:
2685         yyerror_pv(
2686           Perl_form(aTHX_
2687             "charnames alias definitions may not contain a sequence of "
2688             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2689             (int)(s - backslash_ptr + 1), backslash_ptr,
2690             (int)(e - s + 1), s + 1
2691           ),
2692           UTF ? SVf_UTF8 : 0);
2693         return NULL;
2694 }
2695
2696 /*
2697   scan_const
2698
2699   Extracts the next constant part of a pattern, double-quoted string,
2700   or transliteration.  This is terrifying code.
2701
2702   For example, in parsing the double-quoted string "ab\x63$d", it would
2703   stop at the '$' and return an OP_CONST containing 'abc'.
2704
2705   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2706   processing a pattern (PL_lex_inpat is true), a transliteration
2707   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2708
2709   Returns a pointer to the character scanned up to. If this is
2710   advanced from the start pointer supplied (i.e. if anything was
2711   successfully parsed), will leave an OP_CONST for the substring scanned
2712   in pl_yylval. Caller must intuit reason for not parsing further
2713   by looking at the next characters herself.
2714
2715   In patterns:
2716     expand:
2717       \N{FOO}  => \N{U+hex_for_character_FOO}
2718       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2719
2720     pass through:
2721         all other \-char, including \N and \N{ apart from \N{ABC}
2722
2723     stops on:
2724         @ and $ where it appears to be a var, but not for $ as tail anchor
2725         \l \L \u \U \Q \E
2726         (?{  or  (??{
2727
2728
2729   In transliterations:
2730     characters are VERY literal, except for - not at the start or end
2731     of the string, which indicates a range. If the range is in bytes,
2732     scan_const expands the range to the full set of intermediate
2733     characters. If the range is in utf8, the hyphen is replaced with
2734     a certain range mark which will be handled by pmtrans() in op.c.
2735
2736   In double-quoted strings:
2737     backslashes:
2738       double-quoted style: \r and \n
2739       constants: \x31, etc.
2740       deprecated backrefs: \1 (in substitution replacements)
2741       case and quoting: \U \Q \E
2742     stops on @ and $
2743
2744   scan_const does *not* construct ops to handle interpolated strings.
2745   It stops processing as soon as it finds an embedded $ or @ variable
2746   and leaves it to the caller to work out what's going on.
2747
2748   embedded arrays (whether in pattern or not) could be:
2749       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2750
2751   $ in double-quoted strings must be the symbol of an embedded scalar.
2752
2753   $ in pattern could be $foo or could be tail anchor.  Assumption:
2754   it's a tail anchor if $ is the last thing in the string, or if it's
2755   followed by one of "()| \r\n\t"
2756
2757   \1 (backreferences) are turned into $1 in substitutions
2758
2759   The structure of the code is
2760       while (there's a character to process) {
2761           handle transliteration ranges
2762           skip regexp comments /(?#comment)/ and codes /(?{code})/
2763           skip #-initiated comments in //x patterns
2764           check for embedded arrays
2765           check for embedded scalars
2766           if (backslash) {
2767               deprecate \1 in substitution replacements
2768               handle string-changing backslashes \l \U \Q \E, etc.
2769               switch (what was escaped) {
2770                   handle \- in a transliteration (becomes a literal -)
2771                   if a pattern and not \N{, go treat as regular character
2772                   handle \132 (octal characters)
2773                   handle \x15 and \x{1234} (hex characters)
2774                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2775                   handle \cV (control characters)
2776                   handle printf-style backslashes (\f, \r, \n, etc)
2777               } (end switch)
2778               continue
2779           } (end if backslash)
2780           handle regular character
2781     } (end while character to read)
2782
2783 */
2784
2785 STATIC char *
2786 S_scan_const(pTHX_ char *start)
2787 {
2788     char *send = PL_bufend;             /* end of the constant */
2789     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2790                                            on sizing. */
2791     char *s = start;                    /* start of the constant */
2792     char *d = SvPVX(sv);                /* destination for copies */
2793     bool dorange = FALSE;               /* are we in a translit range? */
2794     bool didrange = FALSE;              /* did we just finish a range? */
2795     bool in_charclass = FALSE;          /* within /[...]/ */
2796     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2797     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2798                                            UTF8?  But, this can show as true
2799                                            when the source isn't utf8, as for
2800                                            example when it is entirely composed
2801                                            of hex constants */
2802     SV *res;                            /* result from charnames */
2803
2804     /* Note on sizing:  The scanned constant is placed into sv, which is
2805      * initialized by newSV() assuming one byte of output for every byte of
2806      * input.  This routine expects newSV() to allocate an extra byte for a
2807      * trailing NUL, which this routine will append if it gets to the end of
2808      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2809      * CAPITAL LETTER A}), or more output than input if the constant ends up
2810      * recoded to utf8, but each time a construct is found that might increase
2811      * the needed size, SvGROW() is called.  Its size parameter each time is
2812      * based on the best guess estimate at the time, namely the length used so
2813      * far, plus the length the current construct will occupy, plus room for
2814      * the trailing NUL, plus one byte for every input byte still unscanned */
2815
2816     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2817                        before set */
2818 #ifdef EBCDIC
2819     UV literal_endpoint = 0;
2820     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2821 #endif
2822
2823     PERL_ARGS_ASSERT_SCAN_CONST;
2824
2825     assert(PL_lex_inwhat != OP_TRANSR);
2826     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2827         /* If we are doing a trans and we know we want UTF8 set expectation */
2828         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2829         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2830     }
2831
2832     /* Protect sv from errors and fatal warnings. */
2833     ENTER_with_name("scan_const");
2834     SAVEFREESV(sv);
2835
2836     while (s < send || dorange) {
2837
2838         /* get transliterations out of the way (they're most literal) */
2839         if (PL_lex_inwhat == OP_TRANS) {
2840             /* expand a range A-Z to the full set of characters.  AIE! */
2841             if (dorange) {
2842                 I32 i;                          /* current expanded character */
2843                 I32 min;                        /* first character in range */
2844                 I32 max;                        /* last character in range */
2845
2846 #ifdef EBCDIC
2847                 UV uvmax = 0;
2848 #endif
2849
2850                 if (has_utf8
2851 #ifdef EBCDIC
2852                     && !native_range
2853 #endif
2854                 ) {
2855                     char * const c = (char*)utf8_hop((U8*)d, -1);
2856                     char *e = d++;
2857                     while (e-- > c)
2858                         *(e + 1) = *e;
2859                     *c = (char) ILLEGAL_UTF8_BYTE;
2860                     /* mark the range as done, and continue */
2861                     dorange = FALSE;
2862                     didrange = TRUE;
2863                     continue;
2864                 }
2865
2866                 i = d - SvPVX_const(sv);                /* remember current offset */
2867 #ifdef EBCDIC
2868                 SvGROW(sv,
2869                        SvLEN(sv) + ((has_utf8)
2870                                     ?  (512 - UTF_CONTINUATION_MARK
2871                                         + UNISKIP(0x100))
2872                                     : 256));
2873                 /* How many two-byte within 0..255: 128 in UTF-8,
2874                  * 96 in UTF-8-mod. */
2875 #else
2876                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2877 #endif
2878                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2879 #ifdef EBCDIC
2880                 if (has_utf8) {
2881                     int j;
2882                     for (j = 0; j <= 1; j++) {
2883                         char * const c = (char*)utf8_hop((U8*)d, -1);
2884                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2885                         if (j)
2886                             min = (U8)uv;
2887                         else if (uv < 256)
2888                             max = (U8)uv;
2889                         else {
2890                             max = (U8)0xff; /* only to \xff */
2891                             uvmax = uv; /* \x{100} to uvmax */
2892                         }
2893                         d = c; /* eat endpoint chars */
2894                      }
2895                 }
2896                else {
2897 #endif
2898                    d -= 2;              /* eat the first char and the - */
2899                    min = (U8)*d;        /* first char in range */
2900                    max = (U8)d[1];      /* last char in range  */
2901 #ifdef EBCDIC
2902                }
2903 #endif
2904
2905                 if (min > max) {
2906                     Perl_croak(aTHX_
2907                                "Invalid range \"%c-%c\" in transliteration operator",
2908                                (char)min, (char)max);
2909                 }
2910
2911 #ifdef EBCDIC
2912                 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2913                  * any subsets of these ranges into individual characters */
2914                 if (literal_endpoint == 2 &&
2915                     ((isLOWER_A(min) && isLOWER_A(max)) ||
2916                      (isUPPER_A(min) && isUPPER_A(max))))
2917                 {
2918                     for (i = min; i <= max; i++) {
2919                         if (isALPHA_A(i))
2920                             *d++ = i;
2921                     }
2922                 }
2923                 else
2924 #endif
2925                     for (i = min; i <= max; i++)
2926 #ifdef EBCDIC
2927                         if (has_utf8) {
2928                             append_utf8_from_native_byte(i, &d);
2929                         }
2930                         else
2931 #endif
2932                             *d++ = (char)i;
2933
2934 #ifdef EBCDIC
2935                 if (uvmax) {
2936                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2937                     if (uvmax > 0x101)
2938                         *d++ = (char) ILLEGAL_UTF8_BYTE;
2939                     if (uvmax > 0x100)
2940                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2941                 }
2942 #endif
2943
2944                 /* mark the range as done, and continue */
2945                 dorange = FALSE;
2946                 didrange = TRUE;
2947 #ifdef EBCDIC
2948                 literal_endpoint = 0;
2949 #endif
2950                 continue;
2951             }
2952
2953             /* range begins (ignore - as first or last char) */
2954             else if (*s == '-' && s+1 < send  && s != start) {
2955                 if (didrange) {
2956                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2957                 }
2958                 if (has_utf8
2959 #ifdef EBCDIC
2960                     && !native_range
2961 #endif
2962                     ) {
2963                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
2964                     s++;
2965                     continue;
2966                 }
2967                 dorange = TRUE;
2968                 s++;
2969             }
2970             else {
2971                 didrange = FALSE;
2972 #ifdef EBCDIC
2973                 literal_endpoint = 0;
2974                 native_range = TRUE;
2975 #endif
2976             }
2977         }
2978
2979         /* if we get here, we're not doing a transliteration */
2980
2981         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2982             char *s1 = s-1;
2983             int esc = 0;
2984             while (s1 >= start && *s1-- == '\\')
2985                 esc = !esc;
2986             if (!esc)
2987                 in_charclass = TRUE;
2988         }
2989
2990         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2991             char *s1 = s-1;
2992             int esc = 0;
2993             while (s1 >= start && *s1-- == '\\')
2994                 esc = !esc;
2995             if (!esc)
2996                 in_charclass = FALSE;
2997         }
2998
2999         /* skip for regexp comments /(?#comment)/, except for the last
3000          * char, which will be done separately.
3001          * Stop on (?{..}) and friends */
3002
3003         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3004             if (s[2] == '#') {
3005                 while (s+1 < send && *s != ')')
3006                     *d++ = *s++;
3007             }
3008             else if (!PL_lex_casemods &&
3009                      (    s[2] == '{' /* This should match regcomp.c */
3010                       || (s[2] == '?' && s[3] == '{')))
3011             {
3012                 break;
3013             }
3014         }
3015
3016         /* likewise skip #-initiated comments in //x patterns */
3017         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3018           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3019             while (s+1 < send && *s != '\n')
3020                 *d++ = *s++;
3021         }
3022
3023         /* no further processing of single-quoted regex */
3024         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3025             goto default_action;
3026
3027         /* check for embedded arrays
3028            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3029            */
3030         else if (*s == '@' && s[1]) {
3031             if (isWORDCHAR_lazy_if(s+1,UTF))
3032                 break;
3033             if (strchr(":'{$", s[1]))
3034                 break;
3035             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3036                 break; /* in regexp, neither @+ nor @- are interpolated */
3037         }
3038
3039         /* check for embedded scalars.  only stop if we're sure it's a
3040            variable.
3041         */
3042         else if (*s == '$') {
3043             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3044                 break;
3045             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3046                 if (s[1] == '\\') {
3047                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3048                                    "Possible unintended interpolation of $\\ in regex");
3049                 }
3050                 break;          /* in regexp, $ might be tail anchor */
3051             }
3052         }
3053
3054         /* End of else if chain - OP_TRANS rejoin rest */
3055
3056         /* backslashes */
3057         if (*s == '\\' && s+1 < send) {
3058             char* e;    /* Can be used for ending '}', etc. */
3059
3060             s++;
3061
3062             /* warn on \1 - \9 in substitution replacements, but note that \11
3063              * is an octal; and \19 is \1 followed by '9' */
3064             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3065                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3066             {
3067                 /* diag_listed_as: \%d better written as $%d */
3068                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3069                 *--s = '$';
3070                 break;
3071             }
3072
3073             /* string-change backslash escapes */
3074             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3075                 --s;
3076                 break;
3077             }
3078             /* In a pattern, process \N, but skip any other backslash escapes.
3079              * This is because we don't want to translate an escape sequence
3080              * into a meta symbol and have the regex compiler use the meta
3081              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3082              * in spite of this, we do have to process \N here while the proper
3083              * charnames handler is in scope.  See bugs #56444 and #62056.
3084              * There is a complication because \N in a pattern may also stand
3085              * for 'match a non-nl', and not mean a charname, in which case its
3086              * processing should be deferred to the regex compiler.  To be a
3087              * charname it must be followed immediately by a '{', and not look
3088              * like \N followed by a curly quantifier, i.e., not something like
3089              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3090              * quantifier */
3091             else if (PL_lex_inpat
3092                     && (*s != 'N'
3093                         || s[1] != '{'
3094                         || regcurly(s + 1)))
3095             {
3096                 *d++ = '\\';
3097                 goto default_action;
3098             }
3099
3100             switch (*s) {
3101
3102             /* quoted - in transliterations */
3103             case '-':
3104                 if (PL_lex_inwhat == OP_TRANS) {
3105                     *d++ = *s++;
3106                     continue;
3107                 }
3108                 /* FALLTHROUGH */
3109             default:
3110                 {
3111                     if ((isALPHANUMERIC(*s)))
3112                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3113                                        "Unrecognized escape \\%c passed through",
3114                                        *s);
3115                     /* default action is to copy the quoted character */
3116                     goto default_action;
3117                 }
3118
3119             /* eg. \132 indicates the octal constant 0132 */
3120             case '0': case '1': case '2': case '3':
3121             case '4': case '5': case '6': case '7':
3122                 {
3123                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3124                     STRLEN len = 3;
3125                     uv = grok_oct(s, &len, &flags, NULL);
3126                     s += len;
3127                     if (len < 3 && s < send && isDIGIT(*s)
3128                         && ckWARN(WARN_MISC))
3129                     {
3130                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3131                                     "%s", form_short_octal_warning(s, len));
3132                     }
3133                 }
3134                 goto NUM_ESCAPE_INSERT;
3135
3136             /* eg. \o{24} indicates the octal constant \024 */
3137             case 'o':
3138                 {
3139                     const char* error;
3140
3141                     bool valid = grok_bslash_o(&s, &uv, &error,
3142                                                TRUE, /* Output warning */
3143                                                FALSE, /* Not strict */
3144                                                TRUE, /* Output warnings for
3145                                                          non-portables */
3146                                                UTF);
3147                     if (! valid) {
3148                         yyerror(error);
3149                         continue;
3150                     }
3151                     goto NUM_ESCAPE_INSERT;
3152                 }
3153
3154             /* eg. \x24 indicates the hex constant 0x24 */
3155             case 'x':
3156                 {
3157                     const char* error;
3158
3159                     bool valid = grok_bslash_x(&s, &uv, &error,
3160                                                TRUE, /* Output warning */
3161                                                FALSE, /* Not strict */
3162                                                TRUE,  /* Output warnings for
3163                                                          non-portables */
3164                                                UTF);
3165                     if (! valid) {
3166                         yyerror(error);
3167                         continue;
3168                     }
3169                 }
3170
3171               NUM_ESCAPE_INSERT:
3172                 /* Insert oct or hex escaped character.  There will always be
3173                  * enough room in sv since such escapes will be longer than any
3174                  * UTF-8 sequence they can end up as, except if they force us
3175                  * to recode the rest of the string into utf8 */
3176
3177                 /* Here uv is the ordinal of the next character being added */
3178                 if (!UVCHR_IS_INVARIANT(uv)) {
3179                     if (!has_utf8 && uv > 255) {
3180                         /* Might need to recode whatever we have accumulated so
3181                          * far if it contains any chars variant in utf8 or
3182                          * utf-ebcdic. */
3183
3184                         SvCUR_set(sv, d - SvPVX_const(sv));
3185                         SvPOK_on(sv);
3186                         *d = '\0';
3187                         /* See Note on sizing above.  */
3188                         sv_utf8_upgrade_flags_grow(sv,
3189                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3190                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3191                         d = SvPVX(sv) + SvCUR(sv);
3192                         has_utf8 = TRUE;
3193                     }
3194
3195                     if (has_utf8) {
3196                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3197                         if (PL_lex_inwhat == OP_TRANS &&
3198                             PL_sublex_info.sub_op) {
3199                             PL_sublex_info.sub_op->op_private |=
3200                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3201                                              : OPpTRANS_TO_UTF);
3202                         }
3203 #ifdef EBCDIC
3204                         if (uv > 255 && !dorange)
3205                             native_range = FALSE;
3206 #endif
3207                     }
3208                     else {
3209                         *d++ = (char)uv;
3210                     }
3211                 }
3212                 else {
3213                     *d++ = (char) uv;
3214                 }
3215                 continue;
3216
3217             case 'N':
3218                 /* In a non-pattern \N must be a named character, like \N{LATIN
3219                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3220                  * mean to match a non-newline.  For non-patterns, named
3221                  * characters are converted to their string equivalents. In
3222                  * patterns, named characters are not converted to their
3223                  * ultimate forms for the same reasons that other escapes
3224                  * aren't.  Instead, they are converted to the \N{U+...} form
3225                  * to get the value from the charnames that is in effect right
3226                  * now, while preserving the fact that it was a named character
3227                  * so that the regex compiler knows this */
3228
3229                 /* The structure of this section of code (besides checking for
3230                  * errors and upgrading to utf8) is:
3231                  *  Further disambiguate between the two meanings of \N, and if
3232                  *      not a charname, go process it elsewhere
3233                  *  If of form \N{U+...}, pass it through if a pattern;
3234                  *      otherwise convert to utf8
3235                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3236                  *  pattern; otherwise convert to utf8 */
3237
3238                 /* Here, s points to the 'N'; the test below is guaranteed to
3239                  * succeed if we are being called on a pattern as we already
3240                  * know from a test above that the next character is a '{'.
3241                  * On a non-pattern \N must mean 'named sequence, which
3242                  * requires braces */
3243                 s++;
3244                 if (*s != '{') {
3245                     yyerror("Missing braces on \\N{}");
3246                     continue;
3247                 }
3248                 s++;
3249
3250                 /* If there is no matching '}', it is an error. */
3251                 if (! (e = strchr(s, '}'))) {
3252                     if (! PL_lex_inpat) {
3253                         yyerror("Missing right brace on \\N{}");
3254                     } else {
3255                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3256                     }
3257                     continue;
3258                 }
3259
3260                 /* Here it looks like a named character */
3261
3262                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3263                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3264                                 | PERL_SCAN_DISALLOW_PREFIX;
3265                     STRLEN len;
3266
3267                     /* For \N{U+...}, the '...' is a unicode value even on
3268                      * EBCDIC machines */
3269                     s += 2;         /* Skip to next char after the 'U+' */
3270                     len = e - s;
3271                     uv = grok_hex(s, &len, &flags, NULL);
3272                     if (len == 0 || len != (STRLEN)(e - s)) {
3273                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3274                         s = e + 1;
3275                         continue;
3276                     }
3277
3278                     if (PL_lex_inpat) {
3279
3280                         /* On non-EBCDIC platforms, pass through to the regex
3281                          * compiler unchanged.  The reason we evaluated the
3282                          * number above is to make sure there wasn't a syntax
3283                          * error.  But on EBCDIC we convert to native so
3284                          * downstream code can continue to assume it's native
3285                          */
3286                         s -= 5;     /* Include the '\N{U+' */
3287 #ifdef EBCDIC
3288                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3289                                                                and the \0 */
3290                                     "\\N{U+%X}",
3291                                     (unsigned int) UNI_TO_NATIVE(uv));
3292 #else
3293                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3294                         d += e - s + 1;
3295 #endif
3296                     }
3297                     else {  /* Not a pattern: convert the hex to string */
3298
3299                          /* If destination is not in utf8, unconditionally
3300                           * recode it to be so.  This is because \N{} implies
3301                           * Unicode semantics, and scalars have to be in utf8
3302                           * to guarantee those semantics */
3303                         if (! has_utf8) {
3304                             SvCUR_set(sv, d - SvPVX_const(sv));
3305                             SvPOK_on(sv);
3306                             *d = '\0';
3307                             /* See Note on sizing above.  */
3308                             sv_utf8_upgrade_flags_grow(
3309                                         sv,
3310                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3311                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3312                             d = SvPVX(sv) + SvCUR(sv);
3313                             has_utf8 = TRUE;
3314                         }
3315
3316                         /* Add the (Unicode) code point to the output. */
3317                         if (UNI_IS_INVARIANT(uv)) {
3318                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3319                         }
3320                         else {
3321                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3322                         }
3323                     }
3324                 }
3325                 else /* Here is \N{NAME} but not \N{U+...}. */
3326                      if ((res = get_and_check_backslash_N_name(s, e)))
3327                 {
3328                     STRLEN len;
3329                     const char *str = SvPV_const(res, len);
3330                     if (PL_lex_inpat) {
3331
3332                         if (! len) { /* The name resolved to an empty string */
3333                             Copy("\\N{}", d, 4, char);
3334                             d += 4;
3335                         }
3336                         else {
3337                             /* In order to not lose information for the regex
3338                             * compiler, pass the result in the specially made
3339                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3340                             * the code points in hex of each character
3341                             * returned by charnames */
3342
3343                             const char *str_end = str + len;
3344                             const STRLEN off = d - SvPVX_const(sv);
3345
3346                             if (! SvUTF8(res)) {
3347                                 /* For the non-UTF-8 case, we can determine the
3348                                  * exact length needed without having to parse
3349                                  * through the string.  Each character takes up
3350                                  * 2 hex digits plus either a trailing dot or
3351                                  * the "}" */
3352                                 d = off + SvGROW(sv, off
3353                                                     + 3 * len
3354                                                     + 6 /* For the "\N{U+", and
3355                                                            trailing NUL */
3356                                                     + (STRLEN)(send - e));
3357                                 Copy("\\N{U+", d, 5, char);
3358                                 d += 5;
3359                                 while (str < str_end) {
3360                                     char hex_string[4];
3361                                     int len =
3362                                         my_snprintf(hex_string,
3363                                                     sizeof(hex_string),
3364                                                     "%02X.", (U8) *str);
3365                                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3366                                     Copy(hex_string, d, 3, char);
3367                                     d += 3;
3368                                     str++;
3369                                 }
3370                                 d--;    /* We will overwrite below the final
3371                                            dot with a right brace */
3372                             }
3373                             else {
3374                                 STRLEN char_length; /* cur char's byte length */
3375
3376                                 /* and the number of bytes after this is
3377                                  * translated into hex digits */
3378                                 STRLEN output_length;
3379
3380                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3381                                  * for max('U+', '.'); and 1 for NUL */
3382                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3383
3384                                 /* Get the first character of the result. */
3385                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3386                                                         len,
3387                                                         &char_length,
3388                                                         UTF8_ALLOW_ANYUV);
3389                                 /* Convert first code point to hex, including
3390                                  * the boiler plate before it. */
3391                                 output_length =
3392                                     my_snprintf(hex_string, sizeof(hex_string),
3393                                                 "\\N{U+%X",
3394                                                 (unsigned int) uv);
3395
3396                                 /* Make sure there is enough space to hold it */
3397                                 d = off + SvGROW(sv, off
3398                                                     + output_length
3399                                                     + (STRLEN)(send - e)
3400                                                     + 2);       /* '}' + NUL */
3401                                 /* And output it */
3402                                 Copy(hex_string, d, output_length, char);
3403                                 d += output_length;
3404
3405                                 /* For each subsequent character, append dot and
3406                                 * its ordinal in hex */
3407                                 while ((str += char_length) < str_end) {
3408                                     const STRLEN off = d - SvPVX_const(sv);
3409                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3410                                                             str_end - str,
3411                                                             &char_length,
3412                                                             UTF8_ALLOW_ANYUV);
3413                                     output_length =
3414                                         my_snprintf(hex_string,
3415                                                     sizeof(hex_string),
3416                                                     ".%X",
3417                                                     (unsigned int) uv);
3418
3419                                     d = off + SvGROW(sv, off
3420                                                         + output_length
3421                                                         + (STRLEN)(send - e)
3422                                                         + 2);   /* '}' +  NUL */
3423                                     Copy(hex_string, d, output_length, char);
3424                                     d += output_length;
3425                                 }
3426                             }
3427
3428                             *d++ = '}'; /* Done.  Add the trailing brace */
3429                         }
3430                     }
3431                     else { /* Here, not in a pattern.  Convert the name to a
3432                             * string. */
3433
3434                          /* If destination is not in utf8, unconditionally
3435                           * recode it to be so.  This is because \N{} implies
3436                           * Unicode semantics, and scalars have to be in utf8
3437                           * to guarantee those semantics */
3438                         if (! has_utf8) {
3439                             SvCUR_set(sv, d - SvPVX_const(sv));
3440                             SvPOK_on(sv);
3441                             *d = '\0';
3442                             /* See Note on sizing above.  */
3443                             sv_utf8_upgrade_flags_grow(sv,
3444                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3445                                                 len + (STRLEN)(send - s) + 1);
3446                             d = SvPVX(sv) + SvCUR(sv);
3447                             has_utf8 = TRUE;
3448                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3449
3450                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3451                              * set correctly here). */
3452                             const STRLEN off = d - SvPVX_const(sv);
3453                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3454                         }
3455                         if (! SvUTF8(res)) {    /* Make sure is \N{} return is UTF-8 */
3456                             sv_utf8_upgrade(res);
3457                             str = SvPV_const(res, len);
3458                         }
3459                         Copy(str, d, len, char);
3460                         d += len;
3461                     }
3462
3463                     SvREFCNT_dec(res);
3464
3465                 } /* End \N{NAME} */
3466 #ifdef EBCDIC
3467                 if (!dorange)
3468                     native_range = FALSE; /* \N{} is defined to be Unicode */
3469 #endif
3470                 s = e + 1;  /* Point to just after the '}' */
3471                 continue;
3472
3473             /* \c is a control character */
3474             case 'c':
3475                 s++;
3476                 if (s < send) {
3477                     *d++ = grok_bslash_c(*s++, 1);
3478                 }
3479                 else {
3480                     yyerror("Missing control char name in \\c");
3481                 }
3482                 continue;
3483
3484             /* printf-style backslashes, formfeeds, newlines, etc */
3485             case 'b':
3486                 *d++ = '\b';
3487                 break;
3488             case 'n':
3489                 *d++ = '\n';
3490                 break;
3491             case 'r':
3492                 *d++ = '\r';
3493                 break;
3494             case 'f':
3495                 *d++ = '\f';
3496                 break;
3497             case 't':
3498                 *d++ = '\t';
3499                 break;
3500             case 'e':
3501                 *d++ = ASCII_TO_NATIVE('\033');
3502                 break;
3503             case 'a':
3504                 *d++ = '\a';
3505                 break;
3506             } /* end switch */
3507
3508             s++;
3509             continue;
3510         } /* end if (backslash) */
3511 #ifdef EBCDIC
3512         else
3513             literal_endpoint++;
3514 #endif
3515
3516     default_action:
3517         /* If we started with encoded form, or already know we want it,
3518            then encode the next character */
3519         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3520             STRLEN len  = 1;
3521
3522
3523             /* One might think that it is wasted effort in the case of the
3524              * source being utf8 (this_utf8 == TRUE) to take the next character
3525              * in the source, convert it to an unsigned value, and then convert
3526              * it back again.  But the source has not been validated here.  The
3527              * routine that does the conversion checks for errors like
3528              * malformed utf8 */
3529
3530             const UV nextuv   = (this_utf8)
3531                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3532                                 : (UV) ((U8) *s);
3533             const STRLEN need = UNISKIP(nextuv);
3534             if (!has_utf8) {
3535                 SvCUR_set(sv, d - SvPVX_const(sv));
3536                 SvPOK_on(sv);
3537                 *d = '\0';
3538                 /* See Note on sizing above.  */
3539                 sv_utf8_upgrade_flags_grow(sv,
3540                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3541                                         need + (STRLEN)(send - s) + 1);
3542                 d = SvPVX(sv) + SvCUR(sv);
3543                 has_utf8 = TRUE;
3544             } else if (need > len) {
3545                 /* encoded value larger than old, may need extra space (NOTE:
3546                  * SvCUR() is not set correctly here).   See Note on sizing
3547                  * above.  */
3548                 const STRLEN off = d - SvPVX_const(sv);
3549                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3550             }
3551             s += len;
3552
3553             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3554 #ifdef EBCDIC
3555             if (uv > 255 && !dorange)
3556                 native_range = FALSE;
3557 #endif
3558         }
3559         else {
3560             *d++ = *s++;
3561         }
3562     } /* while loop to process each character */
3563
3564     /* terminate the string and set up the sv */
3565     *d = '\0';
3566     SvCUR_set(sv, d - SvPVX_const(sv));
3567     if (SvCUR(sv) >= SvLEN(sv))
3568         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3569                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3570
3571     SvPOK_on(sv);
3572     if (PL_encoding && !has_utf8) {
3573         sv_recode_to_utf8(sv, PL_encoding);
3574         if (SvUTF8(sv))
3575             has_utf8 = TRUE;
3576     }
3577     if (has_utf8) {
3578         SvUTF8_on(sv);
3579         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3580             PL_sublex_info.sub_op->op_private |=
3581                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3582         }
3583     }
3584
3585     /* shrink the sv if we allocated more than we used */
3586     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3587         SvPV_shrink_to_cur(sv);
3588     }
3589
3590     /* return the substring (via pl_yylval) only if we parsed anything */
3591     if (s > start) {
3592         char *s2 = start;
3593         for (; s2 < s; s2++) {
3594             if (*s2 == '\n')
3595                 COPLINE_INC_WITH_HERELINES;
3596         }
3597         SvREFCNT_inc_simple_void_NN(sv);
3598         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3599             && ! PL_parser->lex_re_reparsing)
3600         {
3601             const char *const key = PL_lex_inpat ? "qr" : "q";
3602             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3603             const char *type;
3604             STRLEN typelen;
3605
3606             if (PL_lex_inwhat == OP_TRANS) {
3607                 type = "tr";
3608                 typelen = 2;
3609             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3610                 type = "s";
3611                 typelen = 1;
3612             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3613                 type = "q";
3614                 typelen = 1;
3615             } else  {
3616                 type = "qq";
3617                 typelen = 2;
3618             }
3619
3620             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3621                                 type, typelen);
3622         }
3623         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3624     }
3625     LEAVE_with_name("scan_const");
3626     return s;
3627 }
3628
3629 /* S_intuit_more
3630  * Returns TRUE if there's more to the expression (e.g., a subscript),
3631  * FALSE otherwise.
3632  *
3633  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3634  *
3635  * ->[ and ->{ return TRUE
3636  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3637  * { and [ outside a pattern are always subscripts, so return TRUE
3638  * if we're outside a pattern and it's not { or [, then return FALSE
3639  * if we're in a pattern and the first char is a {
3640  *   {4,5} (any digits around the comma) returns FALSE
3641  * if we're in a pattern and the first char is a [
3642  *   [] returns FALSE
3643  *   [SOMETHING] has a funky algorithm to decide whether it's a
3644  *      character class or not.  It has to deal with things like
3645  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3646  * anything else returns TRUE
3647  */
3648
3649 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3650
3651 STATIC int
3652 S_intuit_more(pTHX_ char *s)
3653 {
3654     PERL_ARGS_ASSERT_INTUIT_MORE;
3655
3656     if (PL_lex_brackets)
3657         return TRUE;
3658     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3659         return TRUE;
3660     if (*s == '-' && s[1] == '>'
3661      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3662      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3663         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3664         return TRUE;
3665     if (*s != '{' && *s != '[')
3666         return FALSE;
3667     if (!PL_lex_inpat)
3668         return TRUE;
3669
3670     /* In a pattern, so maybe we have {n,m}. */
3671     if (*s == '{') {
3672         if (regcurly(s)) {
3673             return FALSE;
3674         }
3675         return TRUE;
3676     }
3677
3678     /* On the other hand, maybe we have a character class */
3679
3680     s++;
3681     if (*s == ']' || *s == '^')
3682         return FALSE;
3683     else {
3684         /* this is terrifying, and it works */
3685         int weight;
3686         char seen[256];
3687         const char * const send = strchr(s,']');
3688         unsigned char un_char, last_un_char;
3689         char tmpbuf[sizeof PL_tokenbuf * 4];
3690
3691         if (!send)              /* has to be an expression */
3692             return TRUE;
3693         weight = 2;             /* let's weigh the evidence */
3694
3695         if (*s == '$')
3696             weight -= 3;
3697         else if (isDIGIT(*s)) {
3698             if (s[1] != ']') {
3699                 if (isDIGIT(s[1]) && s[2] == ']')
3700                     weight -= 10;
3701             }
3702             else
3703                 weight -= 100;
3704         }
3705         Zero(seen,256,char);
3706         un_char = 255;
3707         for (; s < send; s++) {
3708             last_un_char = un_char;
3709             un_char = (unsigned char)*s;
3710             switch (*s) {
3711             case '@':
3712             case '&':
3713             case '$':
3714                 weight -= seen[un_char] * 10;
3715                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3716                     int len;
3717                     char *tmp = PL_bufend;
3718                     PL_bufend = (char*)send;
3719                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3720                     PL_bufend = tmp;
3721                     len = (int)strlen(tmpbuf);
3722                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3723                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3724                         weight -= 100;
3725                     else
3726                         weight -= 10;
3727                 }
3728                 else if (*s == '$' && s[1] &&
3729                   strchr("[#!%*<>()-=",s[1])) {
3730                     if (/*{*/ strchr("])} =",s[2]))
3731                         weight -= 10;
3732                     else
3733                         weight -= 1;
3734                 }
3735                 break;
3736             case '\\':
3737                 un_char = 254;
3738                 if (s[1]) {
3739                     if (strchr("wds]",s[1]))
3740                         weight += 100;
3741                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3742                         weight += 1;
3743                     else if (strchr("rnftbxcav",s[1]))
3744                         weight += 40;
3745                     else if (isDIGIT(s[1])) {
3746                         weight += 40;
3747                         while (s[1] && isDIGIT(s[1]))
3748                             s++;
3749                     }
3750                 }
3751                 else
3752                     weight += 100;
3753                 break;
3754             case '-':
3755                 if (s[1] == '\\')
3756                     weight += 50;
3757                 if (strchr("aA01! ",last_un_char))
3758                     weight += 30;
3759                 if (strchr("zZ79~",s[1]))
3760                     weight += 30;
3761                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3762                     weight -= 5;        /* cope with negative subscript */
3763                 break;
3764             default:
3765                 if (!isWORDCHAR(last_un_char)
3766                     && !(last_un_char == '$' || last_un_char == '@'
3767                          || last_un_char == '&')
3768                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3769                     char *d = tmpbuf;
3770                     while (isALPHA(*s))
3771                         *d++ = *s++;
3772                     *d = '\0';
3773                     if (keyword(tmpbuf, d - tmpbuf, 0))
3774                         weight -= 150;
3775                 }
3776                 if (un_char == last_un_char + 1)
3777                     weight += 5;
3778                 weight -= seen[un_char];
3779                 break;
3780             }
3781             seen[un_char]++;
3782         }
3783         if (weight >= 0)        /* probably a character class */
3784             return FALSE;
3785     }
3786
3787     return TRUE;
3788 }
3789
3790 /*
3791  * S_intuit_method
3792  *
3793  * Does all the checking to disambiguate
3794  *   foo bar
3795  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3796  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3797  *
3798  * First argument is the stuff after the first token, e.g. "bar".
3799  *
3800  * Not a method if foo is a filehandle.
3801  * Not a method if foo is a subroutine prototyped to take a filehandle.
3802  * Not a method if it's really "Foo $bar"
3803  * Method if it's "foo $bar"
3804  * Not a method if it's really "print foo $bar"
3805  * Method if it's really "foo package::" (interpreted as package->foo)
3806  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3807  * Not a method if bar is a filehandle or package, but is quoted with
3808  *   =>
3809  */
3810
3811 STATIC int
3812 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3813 {
3814     char *s = start + (*start == '$');
3815     char tmpbuf[sizeof PL_tokenbuf];
3816     STRLEN len;
3817     GV* indirgv;
3818
3819     PERL_ARGS_ASSERT_INTUIT_METHOD;
3820
3821     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3822             return 0;
3823     if (cv && SvPOK(cv)) {
3824         const char *proto = CvPROTO(cv);
3825         if (proto) {
3826             while (*proto && (isSPACE(*proto) || *proto == ';'))
3827                 proto++;
3828             if (*proto == '*')
3829                 return 0;
3830         }
3831     }
3832
3833     if (*start == '$') {
3834         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3835                 isUPPER(*PL_tokenbuf))
3836             return 0;
3837         s = PEEKSPACE(s);
3838         PL_bufptr = start;
3839         PL_expect = XREF;
3840         return *s == '(' ? FUNCMETH : METHOD;
3841     }
3842
3843     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3844     /* start is the beginning of the possible filehandle/object,
3845      * and s is the end of it
3846      * tmpbuf is a copy of it (but with single quotes as double colons)
3847      */
3848
3849     if (!keyword(tmpbuf, len, 0)) {
3850         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3851             len -= 2;
3852             tmpbuf[len] = '\0';
3853             goto bare_package;
3854         }
3855         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3856         if (indirgv && GvCVu(indirgv))
3857             return 0;
3858         /* filehandle or package name makes it a method */
3859         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3860             s = PEEKSPACE(s);
3861             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3862                 return 0;       /* no assumptions -- "=>" quotes bareword */
3863       bare_package:
3864             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3865                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3866             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3867             PL_expect = XTERM;
3868             force_next(WORD);
3869             PL_bufptr = s;
3870             return *s == '(' ? FUNCMETH : METHOD;
3871         }
3872     }
3873     return 0;
3874 }
3875
3876 /* Encoded script support. filter_add() effectively inserts a
3877  * 'pre-processing' function into the current source input stream.
3878  * Note that the filter function only applies to the current source file
3879  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3880  *
3881  * The datasv parameter (which may be NULL) can be used to pass
3882  * private data to this instance of the filter. The filter function
3883  * can recover the SV using the FILTER_DATA macro and use it to
3884  * store private buffers and state information.
3885  *
3886  * The supplied datasv parameter is upgraded to a PVIO type
3887  * and the IoDIRP/IoANY field is used to store the function pointer,
3888  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3889  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3890  * private use must be set using malloc'd pointers.
3891  */
3892
3893 SV *
3894 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3895 {
3896     if (!funcp)
3897         return NULL;
3898
3899     if (!PL_parser)
3900         return NULL;
3901
3902     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3903         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3904
3905     if (!PL_rsfp_filters)
3906         PL_rsfp_filters = newAV();
3907     if (!datasv)
3908         datasv = newSV(0);
3909     SvUPGRADE(datasv, SVt_PVIO);
3910     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3911     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3912     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3913                           FPTR2DPTR(void *, IoANY(datasv)),
3914                           SvPV_nolen(datasv)));
3915     av_unshift(PL_rsfp_filters, 1);
3916     av_store(PL_rsfp_filters, 0, datasv) ;
3917     if (
3918         !PL_parser->filtered
3919      && PL_parser->lex_flags & LEX_EVALBYTES
3920      && PL_bufptr < PL_bufend
3921     ) {
3922         const char *s = PL_bufptr;
3923         while (s < PL_bufend) {
3924             if (*s == '\n') {
3925                 SV *linestr = PL_parser->linestr;
3926                 char *buf = SvPVX(linestr);
3927                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3928                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3929                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3930                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3931                 STRLEN const last_uni_pos =
3932                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3933                 STRLEN const last_lop_pos =
3934                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3935                 av_push(PL_rsfp_filters, linestr);
3936                 PL_parser->linestr =
3937                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3938                 buf = SvPVX(PL_parser->linestr);
3939                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3940                 PL_parser->bufptr = buf + bufptr_pos;
3941                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3942                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3943                 PL_parser->linestart = buf + linestart_pos;
3944                 if (PL_parser->last_uni)
3945                     PL_parser->last_uni = buf + last_uni_pos;
3946                 if (PL_parser->last_lop)
3947                     PL_parser->last_lop = buf + last_lop_pos;
3948                 SvLEN(linestr) = SvCUR(linestr);
3949                 SvCUR(linestr) = s-SvPVX(linestr);
3950                 PL_parser->filtered = 1;
3951                 break;
3952             }
3953             s++;
3954         }
3955     }
3956     return(datasv);
3957 }
3958
3959
3960 /* Delete most recently added instance of this filter function. */
3961 void
3962 Perl_filter_del(pTHX_ filter_t funcp)
3963 {
3964     SV *datasv;
3965
3966     PERL_ARGS_ASSERT_FILTER_DEL;
3967
3968 #ifdef DEBUGGING
3969     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3970                           FPTR2DPTR(void*, funcp)));
3971 #endif
3972     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3973         return;
3974     /* if filter is on top of stack (usual case) just pop it off */
3975     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3976     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3977         sv_free(av_pop(PL_rsfp_filters));
3978
3979         return;
3980     }
3981     /* we need to search for the correct entry and clear it     */
3982     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3983 }
3984
3985
3986 /* Invoke the idxth filter function for the current rsfp.        */
3987 /* maxlen 0 = read one text line */
3988 I32
3989 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3990 {
3991     filter_t funcp;
3992     SV *datasv = NULL;
3993     /* This API is bad. It should have been using unsigned int for maxlen.
3994        Not sure if we want to change the API, but if not we should sanity
3995        check the value here.  */
3996     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
3997
3998     PERL_ARGS_ASSERT_FILTER_READ;
3999
4000     if (!PL_parser || !PL_rsfp_filters)
4001         return -1;
4002     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4003         /* Provide a default input filter to make life easy.    */
4004         /* Note that we append to the line. This is handy.      */
4005         DEBUG_P(PerlIO_printf(Perl_debug_log,
4006                               "filter_read %d: from rsfp\n", idx));
4007         if (correct_length) {
4008             /* Want a block */
4009             int len ;
4010             const int old_len = SvCUR(buf_sv);
4011
4012             /* ensure buf_sv is large enough */
4013             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4014             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4015                                    correct_length)) <= 0) {
4016                 if (PerlIO_error(PL_rsfp))
4017                     return -1;          /* error */
4018                 else
4019                     return 0 ;          /* end of file */
4020             }
4021             SvCUR_set(buf_sv, old_len + len) ;
4022             SvPVX(buf_sv)[old_len + len] = '\0';
4023         } else {
4024             /* Want a line */
4025             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4026                 if (PerlIO_error(PL_rsfp))
4027                     return -1;          /* error */
4028                 else
4029                     return 0 ;          /* end of file */
4030             }
4031         }
4032         return SvCUR(buf_sv);
4033     }
4034     /* Skip this filter slot if filter has been deleted */
4035     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4036         DEBUG_P(PerlIO_printf(Perl_debug_log,
4037                               "filter_read %d: skipped (filter deleted)\n",
4038                               idx));
4039         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4040     }
4041     if (SvTYPE(datasv) != SVt_PVIO) {
4042         if (correct_length) {
4043             /* Want a block */
4044             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4045             if (!remainder) return 0; /* eof */
4046             if (correct_length > remainder) correct_length = remainder;
4047             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4048             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4049         } else {
4050             /* Want a line */
4051             const char *s = SvEND(datasv);
4052             const char *send = SvPVX(datasv) + SvLEN(datasv);
4053             while (s < send) {
4054                 if (*s == '\n') {
4055                     s++;
4056                     break;
4057                 }
4058                 s++;
4059             }
4060             if (s == send) return 0; /* eof */
4061             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4062             SvCUR_set(datasv, s-SvPVX(datasv));
4063         }
4064         return SvCUR(buf_sv);
4065     }
4066     /* Get function pointer hidden within datasv        */
4067     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4068     DEBUG_P(PerlIO_printf(Perl_debug_log,
4069                           "filter_read %d: via function %p (%s)\n",
4070                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4071     /* Call function. The function is expected to       */
4072     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4073     /* Return: <0:error, =0:eof, >0:not eof             */
4074     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4075 }
4076
4077 STATIC char *
4078 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4079 {
4080     PERL_ARGS_ASSERT_FILTER_GETS;
4081
4082 #ifdef PERL_CR_FILTER
4083     if (!PL_rsfp_filters) {
4084         filter_add(S_cr_textfilter,NULL);
4085     }
4086 #endif
4087     if (PL_rsfp_filters) {
4088         if (!append)
4089             SvCUR_set(sv, 0);   /* start with empty line        */
4090         if (FILTER_READ(0, sv, 0) > 0)
4091             return ( SvPVX(sv) ) ;
4092         else
4093             return NULL ;
4094     }
4095     else
4096         return (sv_gets(sv, PL_rsfp, append));
4097 }
4098
4099 STATIC HV *
4100 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4101 {
4102     GV *gv;
4103
4104     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4105
4106     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4107         return PL_curstash;
4108
4109     if (len > 2 &&
4110         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4111         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4112     {
4113         return GvHV(gv);                        /* Foo:: */
4114     }
4115
4116     /* use constant CLASS => 'MyClass' */
4117     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4118     if (gv && GvCV(gv)) {
4119         SV * const sv = cv_const_sv(GvCV(gv));
4120         if (sv)
4121             pkgname = SvPV_const(sv, len);
4122     }
4123
4124     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4125 }
4126
4127
4128 STATIC char *
4129 S_tokenize_use(pTHX_ int is_use, char *s) {
4130     PERL_ARGS_ASSERT_TOKENIZE_USE;
4131
4132     if (PL_expect != XSTATE)
4133         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4134                     is_use ? "use" : "no"));
4135     PL_expect = XTERM;
4136     s = SKIPSPACE1(s);
4137     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4138         s = force_version(s, TRUE);
4139         if (*s == ';' || *s == '}'
4140                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4141             NEXTVAL_NEXTTOKE.opval = NULL;
4142             force_next(WORD);
4143         }
4144         else if (*s == 'v') {
4145             s = force_word(s,WORD,FALSE,TRUE);
4146             s = force_version(s, FALSE);
4147         }
4148     }
4149     else {
4150         s = force_word(s,WORD,FALSE,TRUE);
4151         s = force_version(s, FALSE);
4152     }
4153     pl_yylval.ival = is_use;
4154     return s;
4155 }
4156 #ifdef DEBUGGING
4157     static const char* const exp_name[] =
4158         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4159           "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4160         };
4161 #endif
4162
4163 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4164 STATIC bool
4165 S_word_takes_any_delimeter(char *p, STRLEN len)
4166 {
4167     return (len == 1 && strchr("msyq", p[0])) ||
4168            (len == 2 && (
4169             (p[0] == 't' && p[1] == 'r') ||
4170             (p[0] == 'q' && strchr("qwxr", p[1]))));
4171 }
4172
4173 static void
4174 S_check_scalar_slice(pTHX_ char *s)
4175 {
4176     s++;
4177     while (*s == ' ' || *s == '\t') s++;
4178     if (*s == 'q' && s[1] == 'w'
4179      && !isWORDCHAR_lazy_if(s+2,UTF))
4180         return;
4181     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4182         s += UTF ? UTF8SKIP(s) : 1;
4183     if (*s == '}' || *s == ']')
4184         pl_yylval.ival = OPpSLICEWARNING;
4185 }
4186
4187 /*
4188   yylex
4189
4190   Works out what to call the token just pulled out of the input
4191   stream.  The yacc parser takes care of taking the ops we return and
4192   stitching them into a tree.
4193
4194   Returns:
4195     The type of the next token
4196
4197   Structure:
4198       Switch based on the current state:
4199           - if we already built the token before, use it
4200           - if we have a case modifier in a string, deal with that
4201           - handle other cases of interpolation inside a string
4202           - scan the next line if we are inside a format
4203       In the normal state switch on the next character:
4204           - default:
4205             if alphabetic, go to key lookup
4206             unrecoginized character - croak
4207           - 0/4/26: handle end-of-line or EOF
4208           - cases for whitespace
4209           - \n and #: handle comments and line numbers
4210           - various operators, brackets and sigils
4211           - numbers
4212           - quotes
4213           - 'v': vstrings (or go to key lookup)
4214           - 'x' repetition operator (or go to key lookup)
4215           - other ASCII alphanumerics (key lookup begins here):
4216               word before => ?
4217               keyword plugin
4218               scan built-in keyword (but do nothing with it yet)
4219               check for statement label
4220               check for lexical subs
4221                   goto just_a_word if there is one
4222               see whether built-in keyword is overridden
4223               switch on keyword number:
4224                   - default: just_a_word:
4225                       not a built-in keyword; handle bareword lookup
4226                       disambiguate between method and sub call
4227                       fall back to bareword
4228                   - cases for built-in keywords
4229 */
4230
4231
4232 int
4233 Perl_yylex(pTHX)
4234 {
4235     dVAR;
4236     char *s = PL_bufptr;
4237     char *d;
4238     STRLEN len;
4239     bool bof = FALSE;
4240     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4241     U8 formbrack = 0;
4242     U32 fake_eof = 0;
4243
4244     /* orig_keyword, gvp, and gv are initialized here because
4245      * jump to the label just_a_word_zero can bypass their
4246      * initialization later. */
4247     I32 orig_keyword = 0;
4248     GV *gv = NULL;
4249     GV **gvp = NULL;
4250
4251     DEBUG_T( {
4252         SV* tmp = newSVpvs("");
4253         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4254             (IV)CopLINE(PL_curcop),
4255             lex_state_names[PL_lex_state],
4256             exp_name[PL_expect],
4257             pv_display(tmp, s, strlen(s), 0, 60));
4258         SvREFCNT_dec(tmp);
4259     } );
4260
4261     switch (PL_lex_state) {
4262     case LEX_NORMAL:
4263     case LEX_INTERPNORMAL:
4264         break;
4265
4266     /* when we've already built the next token, just pull it out of the queue */
4267     case LEX_KNOWNEXT:
4268         PL_nexttoke--;
4269         pl_yylval = PL_nextval[PL_nexttoke];
4270         if (!PL_nexttoke) {
4271             PL_lex_state = PL_lex_defer;
4272             PL_expect = PL_lex_expect;
4273             PL_lex_defer = LEX_NORMAL;
4274         }
4275         {
4276             I32 next_type;
4277             next_type = PL_nexttype[PL_nexttoke];
4278             if (next_type & (7<<24)) {
4279                 if (next_type & (1<<24)) {
4280                     if (PL_lex_brackets > 100)
4281                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4282                     PL_lex_brackstack[PL_lex_brackets++] =
4283                         (char) ((next_type >> 16) & 0xff);
4284                 }
4285                 if (next_type & (2<<24))
4286                     PL_lex_allbrackets++;
4287                 if (next_type & (4<<24))
4288                     PL_lex_allbrackets--;
4289                 next_type &= 0xffff;
4290             }
4291             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4292         }
4293
4294     /* interpolated case modifiers like \L \U, including \Q and \E.
4295        when we get here, PL_bufptr is at the \
4296     */
4297     case LEX_INTERPCASEMOD:
4298 #ifdef DEBUGGING
4299         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4300             Perl_croak(aTHX_
4301                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4302                        PL_bufptr, PL_bufend, *PL_bufptr);
4303 #endif
4304         /* handle \E or end of string */
4305         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4306             /* if at a \E */
4307             if (PL_lex_casemods) {
4308                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4309                 PL_lex_casestack[PL_lex_casemods] = '\0';
4310
4311                 if (PL_bufptr != PL_bufend
4312                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4313                         || oldmod == 'F')) {
4314                     PL_bufptr += 2;
4315                     PL_lex_state = LEX_INTERPCONCAT;
4316                 }
4317                 PL_lex_allbrackets--;
4318                 return REPORT(')');
4319             }
4320             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4321                /* Got an unpaired \E */
4322                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4323                         "Useless use of \\E");
4324             }
4325             if (PL_bufptr != PL_bufend)
4326                 PL_bufptr += 2;
4327             PL_lex_state = LEX_INTERPCONCAT;
4328             return yylex();
4329         }
4330         else {
4331             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4332               "### Saw case modifier\n"); });
4333             s = PL_bufptr + 1;
4334             if (s[1] == '\\' && s[2] == 'E') {
4335                 PL_bufptr = s + 3;
4336                 PL_lex_state = LEX_INTERPCONCAT;
4337                 return yylex();
4338             }
4339             else {
4340                 I32 tmp;
4341                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4342                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4343                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4344                     (strchr(PL_lex_casestack, 'L')
4345                         || strchr(PL_lex_casestack, 'U')
4346                         || strchr(PL_lex_casestack, 'F'))) {
4347                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4348                     PL_lex_allbrackets--;
4349                     return REPORT(')');
4350                 }
4351                 if (PL_lex_casemods > 10)
4352                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4353                 PL_lex_casestack[PL_lex_casemods++] = *s;
4354                 PL_lex_casestack[PL_lex_casemods] = '\0';
4355                 PL_lex_state = LEX_INTERPCONCAT;
4356                 NEXTVAL_NEXTTOKE.ival = 0;
4357                 force_next((2<<24)|'(');
4358                 if (*s == 'l')
4359                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4360                 else if (*s == 'u')
4361                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4362                 else if (*s == 'L')
4363                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4364                 else if (*s == 'U')
4365                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4366                 else if (*s == 'Q')
4367                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4368                 else if (*s == 'F')
4369                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4370                 else
4371                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4372                 PL_bufptr = s + 1;
4373             }
4374             force_next(FUNC);
4375             if (PL_lex_starts) {
4376                 s = PL_bufptr;
4377                 PL_lex_starts = 0;
4378                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4379                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4380                     OPERATOR(',');
4381                 else
4382                     Aop(OP_CONCAT);
4383             }
4384             else
4385                 return yylex();
4386         }
4387
4388     case LEX_INTERPPUSH:
4389         return REPORT(sublex_push());
4390
4391     case LEX_INTERPSTART:
4392         if (PL_bufptr == PL_bufend)
4393             return REPORT(sublex_done());
4394         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4395               "### Interpolated variable\n"); });
4396         PL_expect = XTERM;
4397         /* for /@a/, we leave the joining for the regex engine to do
4398          * (unless we're within \Q etc) */
4399         PL_lex_dojoin = (*PL_bufptr == '@'
4400                             && (!PL_lex_inpat || PL_lex_casemods));
4401         PL_lex_state = LEX_INTERPNORMAL;
4402         if (PL_lex_dojoin) {
4403             NEXTVAL_NEXTTOKE.ival = 0;
4404             force_next(',');
4405             force_ident("\"", '$');
4406             NEXTVAL_NEXTTOKE.ival = 0;
4407             force_next('$');
4408             NEXTVAL_NEXTTOKE.ival = 0;
4409             force_next((2<<24)|'(');
4410             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4411             force_next(FUNC);
4412         }
4413         /* Convert (?{...}) and friends to 'do {...}' */
4414         if (PL_lex_inpat && *PL_bufptr == '(') {
4415             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4416             PL_bufptr += 2;
4417             if (*PL_bufptr != '{')
4418                 PL_bufptr++;
4419             PL_expect = XTERMBLOCK;
4420             force_next(DO);
4421         }
4422
4423         if (PL_lex_starts++) {
4424             s = PL_bufptr;
4425             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4426             if (!PL_lex_casemods && PL_lex_inpat)
4427                 OPERATOR(',');
4428             else
4429                 Aop(OP_CONCAT);
4430         }
4431         return yylex();
4432
4433     case LEX_INTERPENDMAYBE:
4434         if (intuit_more(PL_bufptr)) {
4435             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4436             break;
4437         }
4438         /* FALLTHROUGH */
4439
4440     case LEX_INTERPEND:
4441         if (PL_lex_dojoin) {
4442             const U8 dojoin_was = PL_lex_dojoin;
4443             PL_lex_dojoin = FALSE;
4444             PL_lex_state = LEX_INTERPCONCAT;
4445             PL_lex_allbrackets--;
4446             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4447         }
4448         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4449             && SvEVALED(PL_lex_repl))
4450         {
4451             if (PL_bufptr != PL_bufend)
4452                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4453             PL_lex_repl = NULL;
4454         }
4455         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4456            re_eval_str.  If the here-doc body’s length equals the previous
4457            value of re_eval_start, re_eval_start will now be null.  So
4458            check re_eval_str as well. */
4459         if (PL_parser->lex_shared->re_eval_start
4460          || PL_parser->lex_shared->re_eval_str) {
4461             SV *sv;
4462             if (*PL_bufptr != ')')
4463                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4464             PL_bufptr++;
4465             /* having compiled a (?{..}) expression, return the original
4466              * text too, as a const */
4467             if (PL_parser->lex_shared->re_eval_str) {
4468                 sv = PL_parser->lex_shared->re_eval_str;
4469                 PL_parser->lex_shared->re_eval_str = NULL;
4470                 SvCUR_set(sv,
4471                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4472                 SvPV_shrink_to_cur(sv);
4473             }
4474             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4475                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4476             NEXTVAL_NEXTTOKE.opval =
4477                     (OP*)newSVOP(OP_CONST, 0,
4478                                  sv);
4479             force_next(THING);
4480             PL_parser->lex_shared->re_eval_start = NULL;
4481             PL_expect = XTERM;
4482             return REPORT(',');
4483         }
4484
4485         /* FALLTHROUGH */
4486     case LEX_INTERPCONCAT:
4487 #ifdef DEBUGGING
4488         if (PL_lex_brackets)
4489             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4490                        (long) PL_lex_brackets);
4491 #endif
4492         if (PL_bufptr == PL_bufend)
4493             return REPORT(sublex_done());
4494
4495         /* m'foo' still needs to be parsed for possible (?{...}) */
4496         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4497             SV *sv = newSVsv(PL_linestr);
4498             sv = tokeq(sv);
4499             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4500             s = PL_bufend;
4501         }
4502         else {
4503             s = scan_const(PL_bufptr);
4504             if (*s == '\\')
4505                 PL_lex_state = LEX_INTERPCASEMOD;
4506             else
4507                 PL_lex_state = LEX_INTERPSTART;
4508         }
4509
4510         if (s != PL_bufptr) {
4511             NEXTVAL_NEXTTOKE = pl_yylval;
4512             PL_expect = XTERM;
4513             force_next(THING);
4514             if (PL_lex_starts++) {
4515                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4516                 if (!PL_lex_casemods && PL_lex_inpat)
4517                     OPERATOR(',');
4518                 else
4519                     Aop(OP_CONCAT);
4520             }
4521             else {
4522                 PL_bufptr = s;
4523                 return yylex();
4524             }
4525         }
4526
4527         return yylex();
4528     case LEX_FORMLINE:
4529         s = scan_formline(PL_bufptr);
4530         if (!PL_lex_formbrack)
4531         {
4532             formbrack = 1;
4533             goto rightbracket;
4534         }
4535         PL_bufptr = s;
4536         return yylex();
4537     }
4538
4539     /* We really do *not* want PL_linestr ever becoming a COW. */
4540     assert (!SvIsCOW(PL_linestr));
4541     s = PL_bufptr;
4542     PL_oldoldbufptr = PL_oldbufptr;
4543     PL_oldbufptr = s;
4544     PL_parser->saw_infix_sigil = 0;
4545
4546   retry:
4547     switch (*s) {
4548     default:
4549         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4550             goto keylookup;
4551         {
4552         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4553         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4554                                                     UTF8SKIP(s),
4555                                                     SVs_TEMP | SVf_UTF8),
4556                                             10, UNI_DISPLAY_ISPRINT)
4557                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4558         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4559         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4560             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4561         } else {
4562             d = PL_linestart;
4563         }
4564         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4565                           UTF8fARG(UTF, (s - d), d),
4566                          (int) len + 1);
4567     }
4568     case 4:
4569     case 26:
4570         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4571     case 0:
4572         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4573             PL_last_uni = 0;
4574             PL_last_lop = 0;
4575             if (PL_lex_brackets &&
4576                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4577                 yyerror((const char *)
4578                         (PL_lex_formbrack
4579                          ? "Format not terminated"
4580                          : "Missing right curly or square bracket"));
4581             }
4582             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4583                         "### Tokener got EOF\n");
4584             } );
4585             TOKEN(0);
4586         }
4587         if (s++ < PL_bufend)
4588             goto retry;                 /* ignore stray nulls */
4589         PL_last_uni = 0;
4590         PL_last_lop = 0;
4591         if (!PL_in_eval && !PL_preambled) {
4592             PL_preambled = TRUE;
4593             if (PL_perldb) {
4594                 /* Generate a string of Perl code to load the debugger.
4595                  * If PERL5DB is set, it will return the contents of that,
4596                  * otherwise a compile-time require of perl5db.pl.  */
4597
4598                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4599
4600                 if (pdb) {
4601                     sv_setpv(PL_linestr, pdb);
4602                     sv_catpvs(PL_linestr,";");
4603                 } else {
4604                     SETERRNO(0,SS_NORMAL);
4605                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4606                 }
4607                 PL_parser->preambling = CopLINE(PL_curcop);
4608             } else
4609                 sv_setpvs(PL_linestr,"");
4610             if (PL_preambleav) {
4611                 SV **svp = AvARRAY(PL_preambleav);
4612                 SV **const end = svp + AvFILLp(PL_preambleav);
4613                 while(svp <= end) {
4614                     sv_catsv(PL_linestr, *svp);
4615                     ++svp;
4616                     sv_catpvs(PL_linestr, ";");
4617                 }
4618                 sv_free(MUTABLE_SV(PL_preambleav));
4619                 PL_preambleav = NULL;
4620             }
4621             if (PL_minus_E)
4622                 sv_catpvs(PL_linestr,
4623                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4624             if (PL_minus_n || PL_minus_p) {
4625                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4626                 if (PL_minus_l)
4627                     sv_catpvs(PL_linestr,"chomp;");
4628                 if (PL_minus_a) {
4629                     if (PL_minus_F) {
4630                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4631                              || *PL_splitstr == '"')
4632                               && strchr(PL_splitstr + 1, *PL_splitstr))
4633                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4634                         else {
4635                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4636                                bytes can be used as quoting characters.  :-) */
4637                             const char *splits = PL_splitstr;
4638                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4639                             do {
4640                                 /* Need to \ \s  */
4641                                 if (*splits == '\\')
4642                                     sv_catpvn(PL_linestr, splits, 1);
4643                                 sv_catpvn(PL_linestr, splits, 1);
4644                             } while (*splits++);
4645                             /* This loop will embed the trailing NUL of
4646                                PL_linestr as the last thing it does before
4647                                terminating.  */
4648                             sv_catpvs(PL_linestr, ");");
4649                         }
4650                     }
4651                     else
4652                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4653                 }
4654             }
4655             sv_catpvs(PL_linestr, "\n");
4656             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4657             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4658             PL_last_lop = PL_last_uni = NULL;
4659             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4660                 update_debugger_info(PL_linestr, NULL, 0);
4661             goto retry;
4662         }
4663         do {
4664             fake_eof = 0;
4665             bof = PL_rsfp ? TRUE : FALSE;
4666             if (0) {
4667               fake_eof:
4668                 fake_eof = LEX_FAKE_EOF;
4669             }
4670             PL_bufptr = PL_bufend;
4671             COPLINE_INC_WITH_HERELINES;
4672             if (!lex_next_chunk(fake_eof)) {
4673                 CopLINE_dec(PL_curcop);
4674                 s = PL_bufptr;
4675                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4676             }
4677             CopLINE_dec(PL_curcop);
4678             s = PL_bufptr;
4679             /* If it looks like the start of a BOM or raw UTF-16,
4680              * check if it in fact is. */
4681             if (bof && PL_rsfp &&
4682                      (*s == 0 ||
4683                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4684                       *(U8*)s >= 0xFE ||
4685                       s[1] == 0)) {
4686                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4687                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4688 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4689                 /* offset may include swallowed CR */
4690                 if (!bof)
4691                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4692 #endif
4693                 if (bof) {
4694                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4695                     s = swallow_bom((U8*)s);
4696                 }
4697             }
4698             if (PL_parser->in_pod) {
4699                 /* Incest with pod. */
4700                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4701                     sv_setpvs(PL_linestr, "");
4702                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4703                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4704                     PL_last_lop = PL_last_uni = NULL;
4705                     PL_parser->in_pod = 0;
4706                 }
4707             }
4708             if (PL_rsfp || PL_parser->filtered)
4709                 incline(s);
4710         } while (PL_parser->in_pod);
4711         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4712         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4713         PL_last_lop = PL_last_uni = NULL;
4714         if (CopLINE(PL_curcop) == 1) {
4715             while (s < PL_bufend && isSPACE(*s))
4716                 s++;
4717             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4718                 s++;
4719             d = NULL;
4720             if (!PL_in_eval) {
4721                 if (*s == '#' && *(s+1) == '!')
4722                     d = s + 2;
4723 #ifdef ALTERNATE_SHEBANG
4724                 else {
4725                     static char const as[] = ALTERNATE_SHEBANG;
4726                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4727                         d = s + (sizeof(as) - 1);
4728                 }
4729 #endif /* ALTERNATE_SHEBANG */
4730             }
4731             if (d) {
4732                 char *ipath;
4733                 char *ipathend;
4734
4735                 while (isSPACE(*d))
4736                     d++;
4737                 ipath = d;
4738                 while (*d && !isSPACE(*d))
4739                     d++;
4740                 ipathend = d;
4741
4742 #ifdef ARG_ZERO_IS_SCRIPT
4743                 if (ipathend > ipath) {
4744                     /*
4745                      * HP-UX (at least) sets argv[0] to the script name,
4746                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4747                      * at least, set argv[0] to the basename of the Perl
4748                      * interpreter. So, having found "#!", we'll set it right.
4749                      */
4750                     SV* copfilesv = CopFILESV(PL_curcop);
4751                     if (copfilesv) {
4752                         SV * const x =
4753                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4754                                              SVt_PV)); /* $^X */
4755                         assert(SvPOK(x) || SvGMAGICAL(x));
4756                         if (sv_eq(x, copfilesv)) {
4757                             sv_setpvn(x, ipath, ipathend - ipath);
4758                             SvSETMAGIC(x);
4759                         }
4760                         else {
4761                             STRLEN blen;
4762                             STRLEN llen;
4763                             const char *bstart = SvPV_const(copfilesv, blen);
4764                             const char * const lstart = SvPV_const(x, llen);
4765                             if (llen < blen) {
4766                                 bstart += blen - llen;
4767                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
4768                                     sv_setpvn(x, ipath, ipathend - ipath);
4769                                     SvSETMAGIC(x);
4770                                 }
4771                             }
4772                         }
4773                     }
4774                     else {
4775                         /* Anything to do if no copfilesv? */
4776                     }
4777                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4778                 }
4779 #endif /* ARG_ZERO_IS_SCRIPT */
4780
4781                 /*
4782                  * Look for options.
4783                  */
4784                 d = instr(s,"perl -");
4785                 if (!d) {
4786                     d = instr(s,"perl");
4787 #if defined(DOSISH)
4788                     /* avoid getting into infinite loops when shebang
4789                      * line contains "Perl" rather than "perl" */
4790                     if (!d) {
4791                         for (d = ipathend-4; d >= ipath; --d) {
4792                             if ((*d == 'p' || *d == 'P')
4793                                 && !ibcmp(d, "perl", 4))
4794                             {
4795                                 break;
4796                             }
4797                         }
4798                         if (d < ipath)
4799                             d = NULL;
4800                     }
4801 #endif
4802                 }
4803 #ifdef ALTERNATE_SHEBANG
4804                 /*
4805                  * If the ALTERNATE_SHEBANG on this system starts with a
4806                  * character that can be part of a Perl expression, then if
4807                  * we see it but not "perl", we're probably looking at the
4808                  * start of Perl code, not a request to hand off to some
4809                  * other interpreter.  Similarly, if "perl" is there, but
4810                  * not in the first 'word' of the line, we assume the line
4811                  * contains the start of the Perl program.
4812                  */
4813                 if (d && *s != '#') {
4814                     const char *c = ipath;
4815                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4816                         c++;
4817                     if (c < d)
4818                         d = NULL;       /* "perl" not in first word; ignore */
4819                     else
4820                         *s = '#';       /* Don't try to parse shebang line */
4821                 }
4822 #endif /* ALTERNATE_SHEBANG */
4823                 if (!d &&
4824                     *s == '#' &&
4825                     ipathend > ipath &&
4826                     !PL_minus_c &&
4827                     !instr(s,"indir") &&
4828                     instr(PL_origargv[0],"perl"))
4829                 {
4830                     dVAR;
4831                     char **newargv;
4832
4833                     *ipathend = '\0';
4834                     s = ipathend + 1;
4835                     while (s < PL_bufend && isSPACE(*s))
4836                         s++;
4837                     if (s < PL_bufend) {
4838                         Newx(newargv,PL_origargc+3,char*);
4839                         newargv[1] = s;
4840                         while (s < PL_bufend && !isSPACE(*s))
4841                             s++;
4842                         *s = '\0';
4843                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4844                     }
4845                     else
4846                         newargv = PL_origargv;
4847                     newargv[0] = ipath;
4848                     PERL_FPU_PRE_EXEC
4849                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4850                     PERL_FPU_POST_EXEC
4851                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4852                 }
4853                 if (d) {
4854                     while (*d && !isSPACE(*d))
4855                         d++;
4856                     while (SPACE_OR_TAB(*d))
4857                         d++;
4858
4859                     if (*d++ == '-') {
4860                         const bool switches_done = PL_doswitches;
4861                         const U32 oldpdb = PL_perldb;
4862                         const bool oldn = PL_minus_n;
4863                         const bool oldp = PL_minus_p;
4864                         const char *d1 = d;
4865
4866                         do {
4867                             bool baduni = FALSE;
4868                             if (*d1 == 'C') {
4869                                 const char *d2 = d1 + 1;
4870                                 if (parse_unicode_opts((const char **)&d2)
4871                                     != PL_unicode)
4872                                     baduni = TRUE;
4873                             }
4874                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4875                                 const char * const m = d1;
4876                                 while (*d1 && !isSPACE(*d1))
4877                                     d1++;
4878                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4879                                       (int)(d1 - m), m);
4880                             }
4881                             d1 = moreswitches(d1);
4882                         } while (d1);
4883                         if (PL_doswitches && !switches_done) {
4884                             int argc = PL_origargc;
4885                             char **argv = PL_origargv;
4886                             do {
4887                                 argc--,argv++;
4888                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4889                             init_argv_symbols(argc,argv);
4890                         }
4891                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4892                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4893                               /* if we have already added "LINE: while (<>) {",
4894                                  we must not do it again */
4895                         {
4896                             sv_setpvs(PL_linestr, "");
4897                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4898                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4899                             PL_last_lop = PL_last_uni = NULL;
4900                             PL_preambled = FALSE;
4901                             if (PERLDB_LINE || PERLDB_SAVESRC)
4902                                 (void)gv_fetchfile(PL_origfilename);
4903                             goto retry;
4904                         }
4905                     }
4906                 }
4907             }
4908         }
4909         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4910             PL_lex_state = LEX_FORMLINE;
4911             NEXTVAL_NEXTTOKE.ival = 0;
4912             force_next(FORMRBRACK);
4913             TOKEN(';');
4914         }
4915         goto retry;
4916     case '\r':
4917 #ifdef PERL_STRICT_CR
4918         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4919         Perl_croak(aTHX_
4920       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4921 #endif
4922     case ' ': case '\t': case '\f': case 013:
4923         s++;
4924         goto retry;
4925     case '#':
4926     case '\n':
4927         if (PL_lex_state != LEX_NORMAL ||
4928              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4929             const bool in_comment = *s == '#';
4930             if (*s == '#' && s == PL_linestart && PL_in_eval
4931              && !PL_rsfp && !PL_parser->filtered) {
4932                 /* handle eval qq[#line 1 "foo"\n ...] */
4933                 CopLINE_dec(PL_curcop);
4934                 incline(s);
4935             }
4936             d = s;
4937             while (d < PL_bufend && *d != '\n')
4938                 d++;
4939             if (d < PL_bufend)
4940                 d++;
4941             else if (d > PL_bufend)
4942                 /* Found by Ilya: feed random input to Perl. */
4943                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
4944                            d, PL_bufend);
4945             s = d;
4946             if (in_comment && d == PL_bufend
4947                 && PL_lex_state == LEX_INTERPNORMAL
4948                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
4949                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
4950             else
4951                 incline(s);
4952             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4953                 PL_lex_state = LEX_FORMLINE;
4954                 NEXTVAL_NEXTTOKE.ival = 0;
4955                 force_next(FORMRBRACK);
4956                 TOKEN(';');
4957             }
4958         }
4959         else {
4960             while (s < PL_bufend && *s != '\n')
4961                 s++;
4962             if (s < PL_bufend)
4963                 {
4964                     s++;
4965                     if (s < PL_bufend)
4966                         incline(s);
4967                 }
4968             else if (s > PL_bufend)
4969                 /* Found by Ilya: feed random input to Perl. */
4970                 Perl_croak(aTHX_ "panic: input overflow");
4971         }
4972         goto retry;
4973     case '-':
4974         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
4975             I32 ftst = 0;
4976             char tmp;
4977
4978             s++;
4979             PL_bufptr = s;
4980             tmp = *s++;
4981
4982             while (s < PL_bufend && SPACE_OR_TAB(*s))
4983                 s++;
4984
4985             if (strnEQ(s,"=>",2)) {
4986                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
4987                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4988                 OPERATOR('-');          /* unary minus */
4989             }
4990             switch (tmp) {
4991             case 'r': ftst = OP_FTEREAD;        break;
4992             case 'w': ftst = OP_FTEWRITE;       break;
4993             case 'x': ftst = OP_FTEEXEC;        break;
4994             case 'o': ftst = OP_FTEOWNED;       break;
4995             case 'R': ftst = OP_FTRREAD;        break;
4996             case 'W': ftst = OP_FTRWRITE;       break;
4997             case 'X': ftst = OP_FTREXEC;        break;
4998             case 'O': ftst = OP_FTROWNED;       break;
4999             case 'e': ftst = OP_FTIS;           break;
5000             case 'z': ftst = OP_FTZERO;         break;
5001             case 's': ftst = OP_FTSIZE;         break;
5002             case 'f': ftst = OP_FTFILE;         break;
5003             case 'd': ftst = OP_FTDIR;          break;
5004             case 'l': ftst = OP_FTLINK;         break;
5005             case 'p': ftst = OP_FTPIPE;         break;
5006             case 'S': ftst = OP_FTSOCK;         break;
5007             case 'u': ftst = OP_FTSUID;         break;
5008             case 'g': ftst = OP_FTSGID;         break;
5009             case 'k': ftst = OP_FTSVTX;         break;
5010             case 'b': ftst = OP_FTBLK;          break;
5011             case 'c': ftst = OP_FTCHR;          break;
5012             case 't': ftst = OP_FTTTY;          break;
5013             case 'T': ftst = OP_FTTEXT;         break;
5014             case 'B': ftst = OP_FTBINARY;       break;
5015             case 'M': case 'A': case 'C':
5016                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5017                 switch (tmp) {
5018                 case 'M': ftst = OP_FTMTIME;    break;
5019                 case 'A': ftst = OP_FTATIME;    break;
5020                 case 'C': ftst = OP_FTCTIME;    break;
5021                 default:                        break;
5022                 }
5023                 break;
5024             default:
5025                 break;
5026             }
5027             if (ftst) {
5028                 PL_last_uni = PL_oldbufptr;
5029                 PL_last_lop_op = (OPCODE)ftst;
5030                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5031                         "### Saw file test %c\n", (int)tmp);
5032                 } );
5033                 FTST(ftst);
5034             }
5035             else {
5036                 /* Assume it was a minus followed by a one-letter named
5037                  * subroutine call (or a -bareword), then. */
5038                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5039                         "### '-%c' looked like a file test but was not\n",
5040                         (int) tmp);
5041                 } );
5042                 s = --PL_bufptr;
5043             }
5044         }
5045         {
5046             const char tmp = *s++;
5047             if (*s == tmp) {
5048                 s++;
5049                 if (PL_expect == XOPERATOR)
5050                     TERM(POSTDEC);
5051                 else
5052                     OPERATOR(PREDEC);
5053             }
5054             else if (*s == '>') {
5055                 s++;
5056                 s = SKIPSPACE1(s);
5057                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5058                     ((*s == '$' || *s == '&') && s[1] == '*')
5059                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5060                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5061                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5062                  ))
5063                 {
5064                     Perl_ck_warner_d(aTHX_
5065                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5066                         "Postfix dereference is experimental"
5067                     );
5068                     PL_expect = XPOSTDEREF;
5069                     TOKEN(ARROW);
5070                 }
5071                 if (isIDFIRST_lazy_if(s,UTF)) {
5072                     s = force_word(s,METHOD,FALSE,TRUE);
5073                     TOKEN(ARROW);
5074                 }
5075                 else if (*s == '$')
5076                     OPERATOR(ARROW);
5077                 else
5078                     TERM(ARROW);
5079             }
5080             if (PL_expect == XOPERATOR) {
5081                 if (*s == '=' && !PL_lex_allbrackets &&
5082                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5083                     s--;
5084                     TOKEN(0);
5085                 }
5086                 Aop(OP_SUBTRACT);
5087             }
5088             else {
5089                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5090                     check_uni();
5091                 OPERATOR('-');          /* unary minus */
5092             }
5093         }
5094
5095     case '+':
5096         {
5097             const char tmp = *s++;
5098             if (*s == tmp) {
5099                 s++;
5100                 if (PL_expect == XOPERATOR)
5101                     TERM(POSTINC);
5102                 else
5103                     OPERATOR(PREINC);
5104             }
5105             if (PL_expect == XOPERATOR) {
5106                 if (*s == '=' && !PL_lex_allbrackets &&
5107                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5108                     s--;
5109                     TOKEN(0);
5110                 }
5111                 Aop(OP_ADD);
5112             }
5113             else {
5114                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5115                     check_uni();
5116                 OPERATOR('+');
5117             }
5118         }
5119
5120     case '*':
5121         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5122         if (PL_expect != XOPERATOR) {
5123             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5124             PL_expect = XOPERATOR;
5125             force_ident(PL_tokenbuf, '*');
5126             if (!*PL_tokenbuf)
5127                 PREREF('*');
5128             TERM('*');
5129         }
5130         s++;
5131         if (*s == '*') {
5132             s++;
5133             if (*s == '=' && !PL_lex_allbrackets &&
5134                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5135                 s -= 2;
5136                 TOKEN(0);
5137             }
5138             PWop(OP_POW);
5139         }
5140         if (*s == '=' && !PL_lex_allbrackets &&
5141                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5142             s--;
5143             TOKEN(0);
5144         }
5145         PL_parser->saw_infix_sigil = 1;
5146         Mop(OP_MULTIPLY);
5147
5148     case '%':
5149     {
5150         if (PL_expect == XOPERATOR) {
5151             if (s[1] == '=' && !PL_lex_allbrackets &&
5152                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5153                 TOKEN(0);
5154             ++s;
5155             PL_parser->saw_infix_sigil = 1;
5156             Mop(OP_MODULO);
5157         }
5158         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5159         PL_tokenbuf[0] = '%';
5160         s = scan_ident(s, PL_tokenbuf + 1,
5161                 sizeof PL_tokenbuf - 1, FALSE);
5162         pl_yylval.ival = 0;
5163         if (!PL_tokenbuf[1]) {
5164             PREREF('%');
5165         }
5166         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5167             if (*s == '[')
5168                 PL_tokenbuf[0] = '@';
5169         }
5170         PL_expect = XOPERATOR;
5171         force_ident_maybe_lex('%');
5172         TERM('%');
5173     }
5174     case '^':
5175         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5176                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5177             TOKEN(0);
5178         s++;
5179         BOop(OP_BIT_XOR);
5180     case '[':
5181         if (PL_lex_brackets > 100)
5182             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5183         PL_lex_brackstack[PL_lex_brackets++] = 0;
5184         PL_lex_allbrackets++;
5185         {
5186             const char tmp = *s++;
5187             OPERATOR(tmp);
5188         }
5189     case '~':
5190         if (s[1] == '~'
5191             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5192         {
5193             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5194                 TOKEN(0);
5195             s += 2;
5196             Perl_ck_warner_d(aTHX_
5197                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5198                 "Smartmatch is experimental");
5199             Eop(OP_SMARTMATCH);
5200         }
5201         s++;
5202         OPERATOR('~');
5203     case ',':
5204         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5205             TOKEN(0);
5206         s++;
5207         OPERATOR(',');
5208     case ':':
5209         if (s[1] == ':') {
5210             len = 0;
5211             goto just_a_word_zero_gv;
5212         }
5213         s++;
5214         {
5215         OP *attrs;
5216
5217         switch (PL_expect) {
5218         case XOPERATOR:
5219             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5220                 break;
5221             PL_bufptr = s;      /* update in case we back off */
5222             if (*s == '=') {
5223                 Perl_croak(aTHX_
5224                            "Use of := for an empty attribute list is not allowed");
5225             }
5226             goto grabattrs;
5227         case XATTRBLOCK:
5228             PL_expect = XBLOCK;
5229             goto grabattrs;
5230         case XATTRTERM:
5231             PL_expect = XTERMBLOCK;
5232          grabattrs:
5233             s = PEEKSPACE(s);
5234             attrs = NULL;
5235             while (isIDFIRST_lazy_if(s,UTF)) {
5236                 I32 tmp;
5237                 SV *sv;
5238                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5239                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5240                     if (tmp < 0) tmp = -tmp;
5241                     switch (tmp) {
5242                     case KEY_or:
5243                     case KEY_and:
5244                     case KEY_for:
5245                     case KEY_foreach:
5246                     case KEY_unless:
5247                     case KEY_if:
5248                     case KEY_while:
5249                     case KEY_until:
5250                         goto got_attrs;
5251                     default:
5252                         break;
5253                     }
5254                 }
5255                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5256                 if (*d == '(') {
5257                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5258                     COPLINE_SET_FROM_MULTI_END;
5259                     if (!d) {
5260                         /* MUST advance bufptr here to avoid bogus
5261                            "at end of line" context messages from yyerror().
5262                          */
5263                         PL_bufptr = s + len;
5264                         yyerror("Unterminated attribute parameter in attribute list");
5265                         if (attrs)
5266                             op_free(attrs);
5267                         sv_free(sv);
5268                         return REPORT(0);       /* EOF indicator */
5269                     }
5270                 }
5271                 if (PL_lex_stuff) {
5272                     sv_catsv(sv, PL_lex_stuff);
5273                     attrs = op_append_elem(OP_LIST, attrs,
5274                                         newSVOP(OP_CONST, 0, sv));
5275                     SvREFCNT_dec(PL_lex_stuff);
5276                     PL_lex_stuff = NULL;
5277                 }
5278                 else {
5279                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5280                         sv_free(sv);
5281                         if (PL_in_my == KEY_our) {
5282                             deprecate(":unique");
5283                         }
5284                         else
5285                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5286                     }
5287
5288                     /* NOTE: any CV attrs applied here need to be part of
5289                        the CVf_BUILTIN_ATTRS define in cv.h! */
5290                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5291                         sv_free(sv);
5292                         CvLVALUE_on(PL_compcv);
5293                     }
5294                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5295                         sv_free(sv);
5296                         deprecate(":locked");
5297                     }
5298                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5299                         sv_free(sv);
5300                         CvMETHOD_on(PL_compcv);
5301                     }
5302                     /* After we've set the flags, it could be argued that
5303                        we don't need to do the attributes.pm-based setting
5304                        process, and shouldn't bother appending recognized
5305                        flags.  To experiment with that, uncomment the
5306                        following "else".  (Note that's already been
5307                        uncommented.  That keeps the above-applied built-in
5308                        attributes from being intercepted (and possibly
5309                        rejected) by a package's attribute routines, but is
5310                        justified by the performance win for the common case
5311                        of applying only built-in attributes.) */
5312                     else
5313                         attrs = op_append_elem(OP_LIST, attrs,
5314                                             newSVOP(OP_CONST, 0,
5315                                                     sv));
5316                 }
5317                 s = PEEKSPACE(d);
5318                 if (*s == ':' && s[1] != ':')
5319                     s = PEEKSPACE(s+1);
5320                 else if (s == d)
5321                     break;      /* require real whitespace or :'s */
5322                 /* XXX losing whitespace on sequential attributes here */
5323             }
5324             {
5325                 if (*s != ';' && *s != '}' &&
5326                     !(PL_expect == XOPERATOR
5327                         ? (*s == '=' ||  *s == ')')
5328                         : (*s == '{' ||  *s == '('))) {
5329                     const char q = ((*s == '\'') ? '"' : '\'');
5330                     /* If here for an expression, and parsed no attrs, back
5331                        off. */
5332                     if (PL_expect == XOPERATOR && !attrs) {
5333                         s = PL_bufptr;
5334                         break;
5335                     }
5336                     /* MUST advance bufptr here to avoid bogus "at end of line"
5337                        context messages from yyerror().
5338                     */
5339                     PL_bufptr = s;
5340                     yyerror( (const char *)
5341                              (*s
5342                               ? Perl_form(aTHX_ "Invalid separator character "
5343                                           "%c%c%c in attribute list", q, *s, q)
5344                               : "Unterminated attribute list" ) );
5345                     if (attrs)
5346                         op_free(attrs);
5347                     OPERATOR(':');
5348                 }
5349             }
5350         got_attrs:
5351             if (attrs) {
5352                 NEXTVAL_NEXTTOKE.opval = attrs;
5353                 force_next(THING);
5354             }
5355             TOKEN(COLONATTR);
5356         }
5357         }
5358         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5359             s--;
5360             TOKEN(0);
5361         }
5362         PL_lex_allbrackets--;
5363         OPERATOR(':');
5364     case '(':
5365         s++;
5366         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5367             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5368         else
5369             PL_expect = XTERM;
5370         s = SKIPSPACE1(s);
5371         PL_lex_allbrackets++;
5372         TOKEN('(');
5373     case ';':
5374         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5375             TOKEN(0);
5376         CLINE;
5377         s++;
5378         OPERATOR(';');
5379     case ')':
5380         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5381             TOKEN(0);
5382         s++;
5383         PL_lex_allbrackets--;
5384         s = SKIPSPACE1(s);
5385         if (*s == '{')
5386             PREBLOCK(')');
5387         TERM(')');
5388     case ']':
5389         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5390             TOKEN(0);
5391         s++;
5392         if (PL_lex_brackets <= 0)
5393             /* diag_listed_as: Unmatched right %s bracket */
5394             yyerror("Unmatched right square bracket");
5395         else
5396             --PL_lex_brackets;
5397         PL_lex_allbrackets--;
5398         if (PL_lex_state == LEX_INTERPNORMAL) {
5399             if (PL_lex_brackets == 0) {
5400                 if (*s == '-' && s[1] == '>')
5401                     PL_lex_state = LEX_INTERPENDMAYBE;
5402                 else if (*s != '[' && *s != '{')
5403                     PL_lex_state = LEX_INTERPEND;
5404             }
5405         }
5406         TERM(']');
5407     case '{':
5408         s++;
5409       leftbracket:
5410         if (PL_lex_brackets > 100) {
5411             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5412         }
5413         switch (PL_expect) {
5414         case XTERM:
5415             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5416             PL_lex_allbrackets++;
5417             OPERATOR(HASHBRACK);
5418         case XOPERATOR:
5419             while (s < PL_bufend && SPACE_OR_TAB(*s))
5420                 s++;
5421             d = s;
5422             PL_tokenbuf[0] = '\0';
5423             if (d < PL_bufend && *d == '-') {
5424                 PL_tokenbuf[0] = '-';
5425                 d++;
5426                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5427                     d++;
5428             }
5429             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5430                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5431                               FALSE, &len);
5432                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5433                     d++;
5434                 if (*d == '}') {
5435                     const char minus = (PL_tokenbuf[0] == '-');
5436                     s = force_word(s + minus, WORD, FALSE, TRUE);
5437                     if (minus)
5438                         force_next('-');
5439                 }
5440             }
5441             /* FALLTHROUGH */
5442         case XATTRBLOCK:
5443         case XBLOCK:
5444             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5445             PL_lex_allbrackets++;
5446             PL_expect = XSTATE;
5447             break;
5448         case XATTRTERM:
5449         case XTERMBLOCK:
5450             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5451             PL_lex_allbrackets++;
5452             PL_expect = XSTATE;
5453             break;
5454         default: {
5455                 const char *t;
5456                 if (PL_oldoldbufptr == PL_last_lop)
5457                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5458                 else
5459                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5460                 PL_lex_allbrackets++;
5461                 s = SKIPSPACE1(s);
5462                 if (*s == '}') {
5463                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5464                         PL_expect = XTERM;
5465                         /* This hack is to get the ${} in the message. */
5466                         PL_bufptr = s+1;
5467                         yyerror("syntax error");
5468                         break;
5469                     }
5470                     OPERATOR(HASHBRACK);
5471                 }
5472                 /* This hack serves to disambiguate a pair of curlies
5473                  * as being a block or an anon hash.  Normally, expectation
5474                  * determines that, but in cases where we're not in a
5475                  * position to expect anything in particular (like inside
5476                  * eval"") we have to resolve the ambiguity.  This code
5477                  * covers the case where the first term in the curlies is a
5478                  * quoted string.  Most other cases need to be explicitly
5479                  * disambiguated by prepending a "+" before the opening
5480                  * curly in order to force resolution as an anon hash.
5481                  *
5482                  * XXX should probably propagate the outer expectation
5483                  * into eval"" to rely less on this hack, but that could
5484                  * potentially break current behavior of eval"".
5485                  * GSAR 97-07-21
5486                  */
5487                 t = s;
5488                 if (*s == '\'' || *s == '"' || *s == '`') {
5489                     /* common case: get past first string, handling escapes */
5490                     for (t++; t < PL_bufend && *t != *s;)
5491                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5492                             t++;
5493                     t++;
5494                 }
5495                 else if (*s == 'q') {
5496                     if (++t < PL_bufend
5497                         && (!isWORDCHAR(*t)
5498                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5499                                 && !isWORDCHAR(*t))))
5500                     {
5501                         /* skip q//-like construct */
5502                         const char *tmps;
5503                         char open, close, term;
5504                         I32 brackets = 1;
5505
5506                         while (t < PL_bufend && isSPACE(*t))
5507                             t++;
5508                         /* check for q => */
5509                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5510                             OPERATOR(HASHBRACK);
5511                         }
5512                         term = *t;
5513                         open = term;
5514                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5515                             term = tmps[5];
5516                         close = term;
5517                         if (open == close)
5518                             for (t++; t < PL_bufend; t++) {
5519                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5520                                     t++;
5521                                 else if (*t == open)
5522                                     break;
5523                             }
5524                         else {
5525                             for (t++; t < PL_bufend; t++) {
5526                                 if (*t == '\\' && t+1 < PL_bufend)
5527                                     t++;
5528                                 else if (*t == close && --brackets <= 0)
5529                                     break;
5530                                 else if (*t == open)
5531                                     brackets++;
5532                             }
5533                         }
5534                         t++;
5535                     }
5536                     else
5537                         /* skip plain q word */
5538                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5539                              t += UTF8SKIP(t);
5540                 }
5541                 else if (isWORDCHAR_lazy_if(t,UTF)) {
5542                     t += UTF8SKIP(t);
5543                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5544                          t += UTF8SKIP(t);
5545                 }
5546                 while (t < PL_bufend && isSPACE(*t))
5547                     t++;
5548                 /* if comma follows first term, call it an anon hash */
5549                 /* XXX it could be a comma expression with loop modifiers */
5550                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5551                                    || (*t == '=' && t[1] == '>')))
5552                     OPERATOR(HASHBRACK);
5553                 if (PL_expect == XREF)
5554                     PL_expect = XTERM;
5555                 else {
5556                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5557                     PL_expect = XSTATE;
5558                 }
5559             }
5560             break;
5561         }
5562         pl_yylval.ival = CopLINE(PL_curcop);
5563         if (isSPACE(*s) || *s == '#')
5564             PL_copline = NOLINE;   /* invalidate current command line number */
5565         TOKEN(formbrack ? '=' : '{');
5566     case '}':
5567         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5568             TOKEN(0);
5569       rightbracket:
5570         s++;
5571         if (PL_lex_brackets <= 0)
5572             /* diag_listed_as: Unmatched right %s bracket */
5573             yyerror("Unmatched right curly bracket");
5574         else
5575             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5576         PL_lex_allbrackets--;
5577         if (PL_lex_state == LEX_INTERPNORMAL) {
5578             if (PL_lex_brackets == 0) {
5579                 if (PL_expect & XFAKEBRACK) {
5580                     PL_expect &= XENUMMASK;
5581                     PL_lex_state = LEX_INTERPEND;
5582                     PL_bufptr = s;
5583                     return yylex();     /* ignore fake brackets */
5584                 }
5585                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5586                  && SvEVALED(PL_lex_repl))
5587                     PL_lex_state = LEX_INTERPEND;
5588                 else if (*s == '-' && s[1] == '>')
5589                     PL_lex_state = LEX_INTERPENDMAYBE;
5590                 else if (*s != '[' && *s != '{')
5591                     PL_lex_state = LEX_INTERPEND;
5592             }
5593         }
5594         if (PL_expect & XFAKEBRACK) {
5595             PL_expect &= XENUMMASK;
5596             PL_bufptr = s;
5597             return yylex();             /* ignore fake brackets */
5598         }
5599         force_next(formbrack ? '.' : '}');
5600         if (formbrack) LEAVE;
5601         if (formbrack == 2) { /* means . where arguments were expected */
5602             force_next(';');
5603             TOKEN(FORMRBRACK);
5604         }
5605         TOKEN(';');
5606     case '&':
5607         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5608         s++;
5609         if (*s++ == '&') {
5610             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5611                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5612                 s -= 2;
5613                 TOKEN(0);
5614             }
5615             AOPERATOR(ANDAND);
5616         }
5617         s--;
5618         if (PL_expect == XOPERATOR) {
5619             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5620                 && isIDFIRST_lazy_if(s,UTF))
5621             {
5622                 CopLINE_dec(PL_curcop);
5623                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5624                 CopLINE_inc(PL_curcop);
5625             }
5626             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5627                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5628                 s--;
5629                 TOKEN(0);
5630             }
5631             PL_parser->saw_infix_sigil = 1;
5632             BAop(OP_BIT_AND);
5633         }
5634
5635         PL_tokenbuf[0] = '&';
5636         s = scan_ident(s - 1, PL_tokenbuf + 1,
5637                        sizeof PL_tokenbuf - 1, TRUE);
5638         if (PL_tokenbuf[1]) {
5639             PL_expect = XOPERATOR;
5640             force_ident_maybe_lex('&');
5641         }
5642         else
5643             PREREF('&');
5644         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5645         TERM('&');
5646
5647     case '|':
5648         s++;
5649         if (*s++ == '|') {
5650             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5651                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5652                 s -= 2;
5653                 TOKEN(0);
5654             }
5655             AOPERATOR(OROR);
5656         }
5657         s--;
5658         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5659                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5660             s--;
5661             TOKEN(0);
5662         }
5663         BOop(OP_BIT_OR);
5664     case '=':
5665         s++;
5666         {
5667             const char tmp = *s++;
5668             if (tmp == '=') {
5669                 if (!PL_lex_allbrackets &&
5670                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5671                     s -= 2;
5672                     TOKEN(0);
5673                 }
5674                 Eop(OP_EQ);
5675             }
5676             if (tmp == '>') {
5677                 if (!PL_lex_allbrackets &&
5678                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5679                     s -= 2;
5680                     TOKEN(0);
5681                 }
5682                 OPERATOR(',');
5683             }
5684             if (tmp == '~')
5685                 PMop(OP_MATCH);
5686             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5687                 && strchr("+-*/%.^&|<",tmp))
5688                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5689                             "Reversed %c= operator",(int)tmp);
5690             s--;
5691             if (PL_expect == XSTATE && isALPHA(tmp) &&
5692                 (s == PL_linestart+1 || s[-2] == '\n') )
5693                 {
5694                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5695                         || PL_lex_state != LEX_NORMAL) {
5696                         d = PL_bufend;
5697                         while (s < d) {
5698                             if (*s++ == '\n') {
5699                                 incline(s);
5700                                 if (strnEQ(s,"=cut",4)) {
5701                                     s = strchr(s,'\n');
5702                                     if (s)
5703                                         s++;
5704                                     else
5705                                         s = d;
5706                                     incline(s);
5707                                     goto retry;
5708                                 }
5709                             }
5710                         }
5711                         goto retry;
5712                     }
5713                     s = PL_bufend;
5714                     PL_parser->in_pod = 1;
5715                     goto retry;
5716                 }
5717         }
5718         if (PL_expect == XBLOCK) {
5719             const char *t = s;
5720 #ifdef PERL_STRICT_CR
5721             while (SPACE_OR_TAB(*t))
5722 #else
5723             while (SPACE_OR_TAB(*t) || *t == '\r')
5724 #endif
5725                 t++;
5726             if (*t == '\n' || *t == '#') {
5727                 formbrack = 1;
5728                 ENTER;
5729                 SAVEI8(PL_parser->form_lex_state);
5730                 SAVEI32(PL_lex_formbrack);
5731                 PL_parser->form_lex_state = PL_lex_state;
5732                 PL_lex_formbrack = PL_lex_brackets + 1;
5733                 goto leftbracket;
5734             }
5735         }
5736         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5737             s--;
5738             TOKEN(0);
5739         }
5740         pl_yylval.ival = 0;
5741         OPERATOR(ASSIGNOP);
5742     case '!':
5743         s++;
5744         {
5745             const char tmp = *s++;
5746             if (tmp == '=') {
5747                 /* was this !=~ where !~ was meant?
5748                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5749
5750                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5751                     const char *t = s+1;
5752
5753                     while (t < PL_bufend && isSPACE(*t))
5754                         ++t;
5755
5756                     if (*t == '/' || *t == '?' ||
5757                         ((*t == 'm' || *t == 's' || *t == 'y')
5758                          && !isWORDCHAR(t[1])) ||
5759                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5760                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5761                                     "!=~ should be !~");
5762                 }
5763                 if (!PL_lex_allbrackets &&
5764                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5765                     s -= 2;
5766                     TOKEN(0);
5767                 }
5768                 Eop(OP_NE);
5769             }
5770             if (tmp == '~')
5771                 PMop(OP_NOT);
5772         }
5773         s--;
5774         OPERATOR('!');
5775     case '<':
5776         if (PL_expect != XOPERATOR) {
5777             if (s[1] != '<' && !strchr(s,'>'))
5778                 check_uni();
5779             if (s[1] == '<')
5780                 s = scan_heredoc(s);
5781             else
5782                 s = scan_inputsymbol(s);
5783             PL_expect = XOPERATOR;
5784             TOKEN(sublex_start());
5785         }
5786         s++;
5787         {
5788             char tmp = *s++;
5789             if (tmp == '<') {
5790                 if (*s == '=' && !PL_lex_allbrackets &&
5791                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5792                     s -= 2;
5793                     TOKEN(0);
5794                 }
5795                 SHop(OP_LEFT_SHIFT);
5796             }
5797             if (tmp == '=') {
5798                 tmp = *s++;
5799                 if (tmp == '>') {
5800                     if (!PL_lex_allbrackets &&
5801                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5802                         s -= 3;
5803                         TOKEN(0);
5804                     }
5805                     Eop(OP_NCMP);
5806                 }
5807                 s--;
5808                 if (!PL_lex_allbrackets &&
5809                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5810                     s -= 2;
5811                     TOKEN(0);
5812                 }
5813                 Rop(OP_LE);
5814             }
5815         }
5816         s--;
5817         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5818             s--;
5819             TOKEN(0);
5820         }
5821         Rop(OP_LT);
5822     case '>':
5823         s++;
5824         {
5825             const char tmp = *s++;
5826             if (tmp == '>') {
5827                 if (*s == '=' && !PL_lex_allbrackets &&
5828                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5829                     s -= 2;
5830                     TOKEN(0);
5831                 }
5832                 SHop(OP_RIGHT_SHIFT);
5833             }
5834             else if (tmp == '=') {
5835                 if (!PL_lex_allbrackets &&
5836                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5837                     s -= 2;
5838                     TOKEN(0);
5839                 }
5840                 Rop(OP_GE);
5841             }
5842         }
5843         s--;
5844         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5845             s--;
5846             TOKEN(0);
5847         }
5848         Rop(OP_GT);
5849
5850     case '$':
5851         CLINE;
5852
5853         if (PL_expect == XOPERATOR) {
5854             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5855                 return deprecate_commaless_var_list();
5856             }
5857         }
5858         else if (PL_expect == XPOSTDEREF) {
5859             if (s[1] == '#') {
5860                 s++;
5861                 POSTDEREF(DOLSHARP);
5862             }
5863             POSTDEREF('$');
5864         }
5865
5866         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5867             PL_tokenbuf[0] = '@';
5868             s = scan_ident(s + 1, PL_tokenbuf + 1,
5869                            sizeof PL_tokenbuf - 1, FALSE);
5870             if (PL_expect == XOPERATOR)
5871                 no_op("Array length", s);
5872             if (!PL_tokenbuf[1])
5873                 PREREF(DOLSHARP);
5874             PL_expect = XOPERATOR;
5875             force_ident_maybe_lex('#');
5876             TOKEN(DOLSHARP);
5877         }
5878
5879         PL_tokenbuf[0] = '$';
5880         s = scan_ident(s, PL_tokenbuf + 1,
5881                        sizeof PL_tokenbuf - 1, FALSE);
5882         if (PL_expect == XOPERATOR)
5883             no_op("Scalar", s);
5884         if (!PL_tokenbuf[1]) {
5885             if (s == PL_bufend)
5886                 yyerror("Final $ should be \\$ or $name");
5887             PREREF('$');
5888         }
5889
5890         d = s;
5891         {
5892             const char tmp = *s;
5893             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5894                 s = SKIPSPACE1(s);
5895
5896             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5897                 && intuit_more(s)) {
5898                 if (*s == '[') {
5899                     PL_tokenbuf[0] = '@';
5900                     if (ckWARN(WARN_SYNTAX)) {
5901                         char *t = s+1;
5902
5903                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
5904                             t++;
5905                         if (*t++ == ',') {
5906                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5907                             while (t < PL_bufend && *t != ']')
5908                                 t++;
5909                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5910                                         "Multidimensional syntax %.*s not supported",
5911                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5912                         }
5913                     }
5914                 }
5915                 else if (*s == '{') {
5916                     char *t;
5917                     PL_tokenbuf[0] = '%';
5918                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5919                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5920                         {
5921                             char tmpbuf[sizeof PL_tokenbuf];
5922                             do {
5923                                 t++;
5924                             } while (isSPACE(*t));
5925                             if (isIDFIRST_lazy_if(t,UTF)) {
5926                                 STRLEN len;
5927                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5928                                               &len);
5929                                 while (isSPACE(*t))
5930                                     t++;
5931                                 if (*t == ';'
5932                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
5933                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5934                                         "You need to quote \"%"UTF8f"\"",
5935                                          UTF8fARG(UTF, len, tmpbuf));
5936                             }
5937                         }
5938                 }
5939             }
5940
5941             PL_expect = XOPERATOR;
5942             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5943                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5944                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5945                     PL_expect = XOPERATOR;
5946                 else if (strchr("$@\"'`q", *s))
5947                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5948                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5949                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5950                 else if (isIDFIRST_lazy_if(s,UTF)) {
5951                     char tmpbuf[sizeof PL_tokenbuf];
5952                     int t2;
5953                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5954                     if ((t2 = keyword(tmpbuf, len, 0))) {
5955                         /* binary operators exclude handle interpretations */
5956                         switch (t2) {
5957                         case -KEY_x:
5958                         case -KEY_eq:
5959                         case -KEY_ne:
5960                         case -KEY_gt:
5961                         case -KEY_lt:
5962                         case -KEY_ge:
5963                         case -KEY_le:
5964                         case -KEY_cmp:
5965                             break;
5966                         default:
5967                             PL_expect = XTERM;  /* e.g. print $fh length() */
5968                             break;
5969                         }
5970                     }
5971                     else {
5972                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5973                     }
5974                 }
5975                 else if (isDIGIT(*s))
5976                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5977                 else if (*s == '.' && isDIGIT(s[1]))
5978                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5979                 else if ((*s == '?' || *s == '-' || *s == '+')
5980                          && !isSPACE(s[1]) && s[1] != '=')
5981                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5982                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5983                          && s[1] != '/')
5984                     PL_expect = XTERM;          /* e.g. print $fh /.../
5985                                                    XXX except DORDOR operator
5986                                                 */
5987                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5988                          && s[2] != '=')
5989                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5990             }
5991         }
5992         force_ident_maybe_lex('$');
5993         TOKEN('$');
5994
5995     case '@':
5996         if (PL_expect == XOPERATOR)
5997             no_op("Array", s);
5998         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
5999         PL_tokenbuf[0] = '@';
6000         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6001         pl_yylval.ival = 0;
6002         if (!PL_tokenbuf[1]) {
6003             PREREF('@');
6004         }
6005         if (PL_lex_state == LEX_NORMAL)
6006             s = SKIPSPACE1(s);
6007         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6008             if (*s == '{')
6009                 PL_tokenbuf[0] = '%';
6010
6011             /* Warn about @ where they meant $. */
6012             if (*s == '[' || *s == '{') {
6013                 if (ckWARN(WARN_SYNTAX)) {
6014                     S_check_scalar_slice(aTHX_ s);
6015                 }
6016             }
6017         }
6018         PL_expect = XOPERATOR;
6019         force_ident_maybe_lex('@');
6020         TERM('@');
6021
6022      case '/':                  /* may be division, defined-or, or pattern */
6023         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6024             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6025                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6026                 TOKEN(0);
6027             s += 2;
6028             AOPERATOR(DORDOR);
6029         }
6030         else if (PL_expect == XOPERATOR) {
6031             s++;
6032             if (*s == '=' && !PL_lex_allbrackets &&
6033                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6034                 s--;
6035                 TOKEN(0);
6036             }
6037             Mop(OP_DIVIDE);
6038         }
6039         else {
6040             /* Disable warning on "study /blah/" */
6041             if (PL_oldoldbufptr == PL_last_uni
6042              && (*PL_last_uni != 's' || s - PL_last_uni < 5
6043                  || memNE(PL_last_uni, "study", 5)
6044                  || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6045              ))
6046                 check_uni();
6047             s = scan_pat(s,OP_MATCH);
6048             TERM(sublex_start());
6049         }
6050
6051      case '?':                  /* conditional */
6052         s++;
6053         if (!PL_lex_allbrackets &&
6054             PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6055             s--;
6056             TOKEN(0);
6057         }
6058         PL_lex_allbrackets++;
6059         OPERATOR('?');
6060
6061     case '.':
6062         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6063 #ifdef PERL_STRICT_CR
6064             && s[1] == '\n'
6065 #else
6066             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6067 #endif
6068             && (s == PL_linestart || s[-1] == '\n') )
6069         {
6070             PL_expect = XSTATE;
6071             formbrack = 2; /* dot seen where arguments expected */
6072             goto rightbracket;
6073         }
6074         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6075             s += 3;
6076             OPERATOR(YADAYADA);
6077         }
6078         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6079             char tmp = *s++;
6080             if (*s == tmp) {
6081                 if (!PL_lex_allbrackets &&
6082                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6083                     s--;
6084                     TOKEN(0);
6085                 }
6086                 s++;
6087                 if (*s == tmp) {
6088                     s++;
6089                     pl_yylval.ival = OPf_SPECIAL;
6090                 }
6091                 else
6092                     pl_yylval.ival = 0;
6093                 OPERATOR(DOTDOT);
6094             }
6095             if (*s == '=' && !PL_lex_allbrackets &&
6096                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6097                 s--;
6098                 TOKEN(0);
6099             }
6100             Aop(OP_CONCAT);
6101         }
6102         /* FALLTHROUGH */
6103     case '0': case '1': case '2': case '3': case '4':
6104     case '5': case '6': case '7': case '8': case '9':
6105         s = scan_num(s, &pl_yylval);
6106         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6107         if (PL_expect == XOPERATOR)
6108             no_op("Number",s);
6109         TERM(THING);
6110
6111     case '\'':
6112         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6113         if (!s)
6114             missingterm(NULL);
6115         COPLINE_SET_FROM_MULTI_END;
6116         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6117         if (PL_expect == XOPERATOR) {
6118             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6119                 return deprecate_commaless_var_list();
6120             }
6121             else
6122                 no_op("String",s);
6123         }
6124         pl_yylval.ival = OP_CONST;
6125         TERM(sublex_start());
6126
6127     case '"':
6128         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6129         DEBUG_T( {
6130             if (s)
6131                 printbuf("### Saw string before %s\n", s);
6132             else
6133                 PerlIO_printf(Perl_debug_log,
6134                              "### Saw unterminated string\n");
6135         } );
6136         if (PL_expect == XOPERATOR) {
6137             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6138                 return deprecate_commaless_var_list();
6139             }
6140             else
6141                 no_op("String",s);
6142         }
6143         if (!s)
6144             missingterm(NULL);
6145         pl_yylval.ival = OP_CONST;
6146         /* FIXME. I think that this can be const if char *d is replaced by
6147            more localised variables.  */
6148         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6149             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6150                 pl_yylval.ival = OP_STRINGIFY;
6151                 break;
6152             }
6153         }
6154         if (pl_yylval.ival == OP_CONST)
6155             COPLINE_SET_FROM_MULTI_END;
6156         TERM(sublex_start());
6157
6158     case '`':
6159         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6160         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6161         if (PL_expect == XOPERATOR)
6162             no_op("Backticks",s);
6163         if (!s)
6164             missingterm(NULL);
6165         pl_yylval.ival = OP_BACKTICK;
6166         TERM(sublex_start());
6167
6168     case '\\':
6169         s++;
6170         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6171          && isDIGIT(*s))
6172             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6173                            *s, *s);
6174         if (PL_expect == XOPERATOR)
6175             no_op("Backslash",s);
6176         OPERATOR(REFGEN);
6177
6178     case 'v':
6179         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6180             char *start = s + 2;
6181             while (isDIGIT(*start) || *start == '_')
6182                 start++;
6183             if (*start == '.' && isDIGIT(start[1])) {
6184                 s = scan_num(s, &pl_yylval);
6185                 TERM(THING);
6186             }
6187             else if ((*start == ':' && start[1] == ':')
6188                   || (PL_expect == XSTATE && *start == ':'))
6189                 goto keylookup;
6190             else if (PL_expect == XSTATE) {
6191                 d = start;
6192                 while (d < PL_bufend && isSPACE(*d)) d++;
6193                 if (*d == ':') goto keylookup;
6194             }
6195             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6196             if (!isALPHA(*start) && (PL_expect == XTERM
6197                         || PL_expect == XREF || PL_expect == XSTATE
6198                         || PL_expect == XTERMORDORDOR)) {
6199                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6200                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6201                 if (!gv) {
6202                     s = scan_num(s, &pl_yylval);
6203                     TERM(THING);
6204                 }
6205             }
6206         }
6207         goto keylookup;
6208     case 'x':
6209         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6210             s++;
6211             Mop(OP_REPEAT);
6212         }
6213         goto keylookup;
6214
6215     case '_':
6216     case 'a': case 'A':
6217     case 'b': case 'B':
6218     case 'c': case 'C':
6219     case 'd': case 'D':
6220     case 'e': case 'E':
6221     case 'f': case 'F':
6222     case 'g': case 'G':
6223     case 'h': case 'H':
6224     case 'i': case 'I':
6225     case 'j': case 'J':
6226     case 'k': case 'K':
6227     case 'l': case 'L':
6228     case 'm': case 'M':
6229     case 'n': case 'N':
6230     case 'o': case 'O':
6231     case 'p': case 'P':
6232     case 'q': case 'Q':
6233     case 'r': case 'R':
6234     case 's': case 'S':
6235     case 't': case 'T':
6236     case 'u': case 'U':
6237               case 'V':
6238     case 'w': case 'W':
6239               case 'X':
6240     case 'y': case 'Y':
6241     case 'z': case 'Z':
6242
6243       keylookup: {
6244         bool anydelim;
6245         bool lex;
6246         I32 tmp;
6247         SV *sv;
6248         CV *cv;
6249         PADOFFSET off;
6250         OP *rv2cv_op;
6251
6252         lex = FALSE;
6253         orig_keyword = 0;
6254         off = 0;
6255         sv = NULL;
6256         cv = NULL;
6257         gv = NULL;
6258         gvp = NULL;
6259         rv2cv_op = NULL;
6260
6261         PL_bufptr = s;
6262         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6263
6264         /* Some keywords can be followed by any delimiter, including ':' */
6265         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6266
6267         /* x::* is just a word, unless x is "CORE" */
6268         if (!anydelim && *s == ':' && s[1] == ':') {
6269             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6270             goto just_a_word;
6271         }
6272
6273         d = s;
6274         while (d < PL_bufend && isSPACE(*d))
6275                 d++;    /* no comments skipped here, or s### is misparsed */
6276
6277         /* Is this a word before a => operator? */
6278         if (*d == '=' && d[1] == '>') {
6279           fat_arrow:
6280             CLINE;
6281             pl_yylval.opval
6282                 = (OP*)newSVOP(OP_CONST, 0,
6283                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6284             pl_yylval.opval->op_private = OPpCONST_BARE;
6285             TERM(WORD);
6286         }
6287
6288         /* Check for plugged-in keyword */
6289         {
6290             OP *o;
6291             int result;
6292             char *saved_bufptr = PL_bufptr;
6293             PL_bufptr = s;
6294             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6295             s = PL_bufptr;
6296             if (result == KEYWORD_PLUGIN_DECLINE) {
6297                 /* not a plugged-in keyword */
6298                 PL_bufptr = saved_bufptr;
6299             } else if (result == KEYWORD_PLUGIN_STMT) {
6300                 pl_yylval.opval = o;
6301                 CLINE;
6302                 PL_expect = XSTATE;
6303                 return REPORT(PLUGSTMT);
6304             } else if (result == KEYWORD_PLUGIN_EXPR) {
6305                 pl_yylval.opval = o;
6306                 CLINE;
6307                 PL_expect = XOPERATOR;
6308                 return REPORT(PLUGEXPR);
6309             } else {
6310                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6311                                         PL_tokenbuf);
6312             }
6313         }
6314
6315         /* Check for built-in keyword */
6316         tmp = keyword(PL_tokenbuf, len, 0);
6317
6318         /* Is this a label? */
6319         if (!anydelim && PL_expect == XSTATE
6320               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6321             s = d + 1;
6322             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6323             pl_yylval.pval[len] = '\0';
6324             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6325             CLINE;
6326             TOKEN(LABEL);
6327         }
6328
6329         /* Check for lexical sub */
6330         if (PL_expect != XOPERATOR) {
6331             char tmpbuf[sizeof PL_tokenbuf + 1];
6332             *tmpbuf = '&';
6333             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6334             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6335             if (off != NOT_IN_PAD) {
6336                 assert(off); /* we assume this is boolean-true below */
6337                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6338                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6339                     HEK * const stashname = HvNAME_HEK(stash);
6340                     sv = newSVhek(stashname);
6341                     sv_catpvs(sv, "::");
6342                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6343                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6344                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6345                                     SVt_PVCV);
6346                     off = 0;
6347                     if (!gv) {
6348                         sv_free(sv);
6349                         sv = NULL;
6350                         goto just_a_word;
6351                     }
6352                 }
6353                 else {
6354                     rv2cv_op = newOP(OP_PADANY, 0);
6355                     rv2cv_op->op_targ = off;
6356                     cv = find_lexical_cv(off);
6357                 }
6358                 lex = TRUE;
6359                 goto just_a_word;
6360             }
6361             off = 0;
6362         }
6363
6364         if (tmp < 0) {                  /* second-class keyword? */
6365             GV *ogv = NULL;     /* override (winner) */
6366             GV *hgv = NULL;     /* hidden (loser) */
6367             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6368                 CV *cv;
6369                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6370                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6371                                             SVt_PVCV)) &&
6372                     (cv = GvCVu(gv)))
6373                 {
6374                     if (GvIMPORTED_CV(gv))
6375                         ogv = gv;
6376                     else if (! CvMETHOD(cv))
6377                         hgv = gv;
6378                 }
6379                 if (!ogv &&
6380                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6381                                           len, FALSE)) &&
6382                     (gv = *gvp) && (
6383                         isGV_with_GP(gv)
6384                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
6385                             :   SvPCS_IMPORTED(gv)
6386                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6387                                          len, 0), 1)
6388                    ))
6389                 {
6390                     ogv = gv;
6391                 }
6392             }
6393             if (ogv) {
6394                 orig_keyword = tmp;
6395                 tmp = 0;                /* overridden by import or by GLOBAL */
6396             }
6397             else if (gv && !gvp
6398                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6399                      && GvCVu(gv))
6400             {
6401                 tmp = 0;                /* any sub overrides "weak" keyword */
6402             }
6403             else {                      /* no override */
6404                 tmp = -tmp;
6405                 if (tmp == KEY_dump) {
6406                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6407                                    "dump() better written as CORE::dump()");
6408                 }
6409                 gv = NULL;
6410                 gvp = 0;
6411                 if (hgv && tmp != KEY_x)        /* never ambiguous */
6412                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6413                                    "Ambiguous call resolved as CORE::%s(), "
6414                                    "qualify as such or use &",
6415                                    GvENAME(hgv));
6416             }
6417         }
6418
6419         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6420          && (!anydelim || *s != '#')) {
6421             /* no override, and not s### either; skipspace is safe here
6422              * check for => on following line */
6423             bool arrow;
6424             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6425             STRLEN   soff = s         - SvPVX(PL_linestr);
6426             s = skipspace_flags(s, LEX_NO_INCLINE);
6427             arrow = *s == '=' && s[1] == '>';
6428             PL_bufptr = SvPVX(PL_linestr) + bufoff;
6429             s         = SvPVX(PL_linestr) +   soff;
6430             if (arrow)
6431                 goto fat_arrow;
6432         }
6433
6434       reserved_word:
6435         switch (tmp) {
6436
6437         default:                        /* not a keyword */
6438             /* Trade off - by using this evil construction we can pull the
6439                variable gv into the block labelled keylookup. If not, then
6440                we have to give it function scope so that the goto from the
6441                earlier ':' case doesn't bypass the initialisation.  */
6442             if (0) {
6443             just_a_word_zero_gv:
6444                 sv = NULL;
6445                 cv = NULL;
6446                 gv = NULL;
6447                 gvp = NULL;
6448                 rv2cv_op = NULL;
6449                 orig_keyword = 0;
6450                 lex = 0;
6451                 off = 0;
6452             }
6453           just_a_word: {
6454                 int pkgname = 0;
6455                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6456                 const char penultchar =
6457                     lastchar && PL_bufptr - 2 >= PL_linestart
6458                          ? PL_bufptr[-2]
6459                          : 0;
6460
6461
6462                 /* Get the rest if it looks like a package qualifier */
6463
6464                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6465                     STRLEN morelen;
6466                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6467                                   TRUE, &morelen);
6468                     if (!morelen)
6469                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6470                                 UTF8fARG(UTF, len, PL_tokenbuf),
6471                                 *s == '\'' ? "'" : "::");
6472                     len += morelen;
6473                     pkgname = 1;
6474                 }
6475
6476                 if (PL_expect == XOPERATOR) {
6477                     if (PL_bufptr == PL_linestart) {
6478                         CopLINE_dec(PL_curcop);
6479                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6480                         CopLINE_inc(PL_curcop);
6481                     }
6482                     else
6483                         no_op("Bareword",s);
6484                 }
6485
6486                 /* Look for a subroutine with this name in current package,
6487                    unless this is a lexical sub, or name is "Foo::",
6488                    in which case Foo is a bareword
6489                    (and a package name). */
6490
6491                 if (len > 2 &&
6492                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6493                 {
6494                     if (ckWARN(WARN_BAREWORD)
6495                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6496                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6497                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
6498                            UTF8fARG(UTF, len, PL_tokenbuf));
6499                     len -= 2;
6500                     PL_tokenbuf[len] = '\0';
6501                     gv = NULL;
6502                     gvp = 0;
6503                 }
6504                 else {
6505                     if (!lex && !gv) {
6506                         /* Mustn't actually add anything to a symbol table.
6507                            But also don't want to "initialise" any placeholder
6508                            constants that might already be there into full
6509                            blown PVGVs with attached PVCV.  */
6510                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6511                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6512                                                SVt_PVCV);
6513                     }
6514                     len = 0;
6515                 }
6516
6517                 /* if we saw a global override before, get the right name */
6518
6519                 if (!sv)
6520                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6521                     len ? len : strlen(PL_tokenbuf));
6522                 if (gvp) {
6523                     SV * const tmp_sv = sv;
6524                     sv = newSVpvs("CORE::GLOBAL::");
6525                     sv_catsv(sv, tmp_sv);
6526                     SvREFCNT_dec(tmp_sv);
6527                 }
6528
6529
6530                 /* Presume this is going to be a bareword of some sort. */
6531                 CLINE;
6532                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6533                 pl_yylval.opval->op_private = OPpCONST_BARE;
6534
6535                 /* And if "Foo::", then that's what it certainly is. */
6536                 if (len)
6537                     goto safe_bareword;
6538
6539                 if (!off)
6540                 {
6541                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6542                     const_op->op_private = OPpCONST_BARE;
6543                     rv2cv_op = newCVREF(0, const_op);
6544                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6545                 }
6546
6547                 /* See if it's the indirect object for a list operator. */
6548
6549                 if (PL_oldoldbufptr &&
6550                     PL_oldoldbufptr < PL_bufptr &&
6551                     (PL_oldoldbufptr == PL_last_lop
6552                      || PL_oldoldbufptr == PL_last_uni) &&
6553                     /* NO SKIPSPACE BEFORE HERE! */
6554                     (PL_expect == XREF ||
6555                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6556                 {
6557                     bool immediate_paren = *s == '(';
6558
6559                     /* (Now we can afford to cross potential line boundary.) */
6560                     s = SKIPSPACE2(s,nextPL_nextwhite);
6561
6562                     /* Two barewords in a row may indicate method call. */
6563
6564                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6565                         (tmp = intuit_method(s, gv, cv))) {
6566                         op_free(rv2cv_op);
6567                         if (tmp == METHOD && !PL_lex_allbrackets &&
6568                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6569                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6570                         return REPORT(tmp);
6571                     }
6572
6573                     /* If not a declared subroutine, it's an indirect object. */
6574                     /* (But it's an indir obj regardless for sort.) */
6575                     /* Also, if "_" follows a filetest operator, it's a bareword */
6576
6577                     if (
6578                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6579                          (!cv &&
6580                         (PL_last_lop_op != OP_MAPSTART &&
6581                          PL_last_lop_op != OP_GREPSTART))))
6582                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6583                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6584                        )
6585                     {
6586                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6587                         goto bareword;
6588                     }
6589                 }
6590
6591                 PL_expect = XOPERATOR;
6592                 s = skipspace(s);
6593
6594                 /* Is this a word before a => operator? */
6595                 if (*s == '=' && s[1] == '>' && !pkgname) {
6596                     op_free(rv2cv_op);
6597                     CLINE;
6598                     /* This is our own scalar, created a few lines above,
6599                        so this is safe. */
6600                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
6601                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6602                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6603                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6604                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
6605                     TERM(WORD);
6606                 }
6607
6608                 /* If followed by a paren, it's certainly a subroutine. */
6609                 if (*s == '(') {
6610                     CLINE;
6611                     if (cv) {
6612                         d = s + 1;
6613                         while (SPACE_OR_TAB(*d))
6614                             d++;
6615                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6616                             s = d + 1;
6617                             goto its_constant;
6618                         }
6619                     }
6620                     NEXTVAL_NEXTTOKE.opval =
6621                         off ? rv2cv_op : pl_yylval.opval;
6622                     PL_expect = XOPERATOR;
6623                     if (off)
6624                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
6625                     else op_free(rv2cv_op),        force_next(WORD);
6626                     pl_yylval.ival = 0;
6627                     TOKEN('&');
6628                 }
6629
6630                 /* If followed by var or block, call it a method (unless sub) */
6631
6632                 if ((*s == '$' || *s == '{') && !cv) {
6633                     op_free(rv2cv_op);
6634                     PL_last_lop = PL_oldbufptr;
6635                     PL_last_lop_op = OP_METHOD;
6636                     if (!PL_lex_allbrackets &&
6637                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6638                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6639                     PREBLOCK(METHOD);
6640                 }
6641
6642                 /* If followed by a bareword, see if it looks like indir obj. */
6643
6644                 if (!orig_keyword
6645                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6646                         && (tmp = intuit_method(s, gv, cv))) {
6647                     op_free(rv2cv_op);
6648                     if (tmp == METHOD && !PL_lex_allbrackets &&
6649                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6650                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6651                     return REPORT(tmp);
6652                 }
6653
6654                 /* Not a method, so call it a subroutine (if defined) */
6655
6656                 if (cv) {
6657                     if (lastchar == '-' && penultchar != '-') {
6658                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
6659                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6660                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
6661                              UTF8fARG(UTF, l, PL_tokenbuf),
6662                              UTF8fARG(UTF, l, PL_tokenbuf));
6663                     }
6664                     /* Check for a constant sub */
6665                     if ((sv = cv_const_sv_or_av(cv))) {
6666                   its_constant:
6667                         op_free(rv2cv_op);
6668                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6669                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6670                         if (SvTYPE(sv) == SVt_PVAV)
6671                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6672                                                       pl_yylval.opval);
6673                         else {
6674                             pl_yylval.opval->op_private = 0;
6675                             pl_yylval.opval->op_folded = 1;
6676                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
6677                         }
6678                         TOKEN(WORD);
6679                     }
6680
6681                     op_free(pl_yylval.opval);
6682                     pl_yylval.opval =
6683                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6684                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6685                     PL_last_lop = PL_oldbufptr;
6686                     PL_last_lop_op = OP_ENTERSUB;
6687                     /* Is there a prototype? */
6688                     if (
6689                         SvPOK(cv))
6690                     {
6691                         STRLEN protolen = CvPROTOLEN(cv);
6692                         const char *proto = CvPROTO(cv);
6693                         bool optional;
6694                         proto = S_strip_spaces(aTHX_ proto, &protolen);
6695                         if (!protolen)
6696                             TERM(FUNC0SUB);
6697                         if ((optional = *proto == ';'))
6698                           do
6699                             proto++;
6700                           while (*proto == ';');
6701                         if (
6702                             (
6703                                 (
6704                                     *proto == '$' || *proto == '_'
6705                                  || *proto == '*' || *proto == '+'
6706                                 )
6707                              && proto[1] == '\0'
6708                             )
6709                          || (
6710                              *proto == '\\' && proto[1] && proto[2] == '\0'
6711                             )
6712                         )
6713                             UNIPROTO(UNIOPSUB,optional);
6714                         if (*proto == '\\' && proto[1] == '[') {
6715                             const char *p = proto + 2;
6716                             while(*p && *p != ']')
6717                                 ++p;
6718                             if(*p == ']' && !p[1])
6719                                 UNIPROTO(UNIOPSUB,optional);
6720                         }
6721                         if (*proto == '&' && *s == '{') {
6722                             if (PL_curstash)
6723                                 sv_setpvs(PL_subname, "__ANON__");
6724                             else
6725                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6726                             if (!PL_lex_allbrackets &&
6727                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6728                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6729                             PREBLOCK(LSTOPSUB);
6730                         }
6731                     }
6732                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6733                     PL_expect = XTERM;
6734                     force_next(off ? PRIVATEREF : WORD);
6735                     if (!PL_lex_allbrackets &&
6736                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6737                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6738                     TOKEN(NOAMP);
6739                 }
6740
6741                 /* Call it a bare word */
6742
6743                 if (PL_hints & HINT_STRICT_SUBS)
6744                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6745                 else {
6746                 bareword:
6747                     /* after "print" and similar functions (corresponding to
6748                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6749                      * a filehandle should be subject to "strict subs".
6750                      * Likewise for the optional indirect-object argument to system
6751                      * or exec, which can't be a bareword */
6752                     if ((PL_last_lop_op == OP_PRINT
6753                             || PL_last_lop_op == OP_PRTF
6754                             || PL_last_lop_op == OP_SAY
6755                             || PL_last_lop_op == OP_SYSTEM
6756                             || PL_last_lop_op == OP_EXEC)
6757                             && (PL_hints & HINT_STRICT_SUBS))
6758                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6759                     if (lastchar != '-') {
6760                         if (ckWARN(WARN_RESERVED)) {
6761                             d = PL_tokenbuf;
6762                             while (isLOWER(*d))
6763                                 d++;
6764                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6765                             {
6766                                 /* PL_warn_reserved is constant */
6767                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6768                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6769                                        PL_tokenbuf);
6770                                 GCC_DIAG_RESTORE;
6771                             }
6772                         }
6773                     }
6774                 }
6775                 op_free(rv2cv_op);
6776
6777             safe_bareword:
6778                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6779                  && saw_infix_sigil) {
6780                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6781                                      "Operator or semicolon missing before %c%"UTF8f,
6782                                      lastchar,
6783                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
6784                                               PL_tokenbuf));
6785                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6786                                      "Ambiguous use of %c resolved as operator %c",
6787                                      lastchar, lastchar);
6788                 }
6789                 TOKEN(WORD);
6790             }
6791
6792         case KEY___FILE__:
6793             FUN0OP(
6794                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6795             );
6796
6797         case KEY___LINE__:
6798             FUN0OP(
6799                 (OP*)newSVOP(OP_CONST, 0,
6800                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6801             );
6802
6803         case KEY___PACKAGE__:
6804             FUN0OP(
6805                 (OP*)newSVOP(OP_CONST, 0,
6806                                         (PL_curstash
6807                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6808                                          : &PL_sv_undef))
6809             );
6810
6811         case KEY___DATA__:
6812         case KEY___END__: {
6813             GV *gv;
6814             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6815                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6816                                         ? PL_curstash
6817                                         : PL_defstash;
6818                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6819                 if (!isGV(gv))
6820                     gv_init(gv,stash,"DATA",4,0);
6821                 GvMULTI_on(gv);
6822                 if (!GvIO(gv))
6823                     GvIOp(gv) = newIO();
6824                 IoIFP(GvIOp(gv)) = PL_rsfp;
6825 #if defined(HAS_FCNTL) && defined(F_SETFD)
6826                 {
6827                     const int fd = PerlIO_fileno(PL_rsfp);
6828                     fcntl(fd,F_SETFD,fd >= 3);
6829                 }
6830 #endif
6831                 /* Mark this internal pseudo-handle as clean */
6832                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6833                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6834                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6835                 else
6836                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6837 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6838                 /* if the script was opened in binmode, we need to revert
6839                  * it to text mode for compatibility; but only iff it has CRs
6840                  * XXX this is a questionable hack at best. */
6841                 if (PL_bufend-PL_bufptr > 2
6842                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6843                 {
6844                     Off_t loc = 0;
6845                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6846                         loc = PerlIO_tell(PL_rsfp);
6847                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6848                     }
6849 #ifdef NETWARE
6850                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6851 #else
6852                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6853 #endif  /* NETWARE */
6854                         if (loc > 0)
6855                             PerlIO_seek(PL_rsfp, loc, 0);
6856                     }
6857                 }
6858 #endif
6859 #ifdef PERLIO_LAYERS
6860                 if (!IN_BYTES) {
6861                     if (UTF)
6862                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6863                     else if (PL_encoding) {
6864                         SV *name;
6865                         dSP;
6866                         ENTER;
6867                         SAVETMPS;
6868                         PUSHMARK(sp);
6869                         XPUSHs(PL_encoding);
6870                         PUTBACK;
6871                         call_method("name", G_SCALAR);
6872                         SPAGAIN;
6873                         name = POPs;
6874                         PUTBACK;
6875                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6876                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6877                                                       SVfARG(name)));
6878                         FREETMPS;
6879                         LEAVE;
6880                     }
6881                 }
6882 #endif
6883                 PL_rsfp = NULL;
6884             }
6885             goto fake_eof;
6886         }
6887
6888         case KEY___SUB__:
6889             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
6890
6891         case KEY_AUTOLOAD:
6892         case KEY_DESTROY:
6893         case KEY_BEGIN:
6894         case KEY_UNITCHECK:
6895         case KEY_CHECK:
6896         case KEY_INIT:
6897         case KEY_END:
6898             if (PL_expect == XSTATE) {
6899                 s = PL_bufptr;
6900                 goto really_sub;
6901             }
6902             goto just_a_word;
6903
6904         case_KEY_CORE:
6905             {
6906                 STRLEN olen = len;
6907                 d = s;
6908                 s += 2;
6909                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6910                 if ((*s == ':' && s[1] == ':')
6911                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
6912                 {
6913                     s = d;
6914                     len = olen;
6915                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
6916                     goto just_a_word;
6917                 }
6918                 if (!tmp)
6919                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
6920                                       UTF8fARG(UTF, len, PL_tokenbuf));
6921                 if (tmp < 0)
6922                     tmp = -tmp;
6923                 else if (tmp == KEY_require || tmp == KEY_do
6924                       || tmp == KEY_glob)
6925                     /* that's a way to remember we saw "CORE::" */
6926                     orig_keyword = tmp;
6927                 goto reserved_word;
6928             }
6929
6930         case KEY_abs:
6931             UNI(OP_ABS);
6932
6933         case KEY_alarm:
6934             UNI(OP_ALARM);
6935
6936         case KEY_accept:
6937             LOP(OP_ACCEPT,XTERM);
6938
6939         case KEY_and:
6940             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
6941                 return REPORT(0);
6942             OPERATOR(ANDOP);
6943
6944         case KEY_atan2:
6945             LOP(OP_ATAN2,XTERM);
6946
6947         case KEY_bind:
6948             LOP(OP_BIND,XTERM);
6949
6950         case KEY_binmode:
6951             LOP(OP_BINMODE,XTERM);
6952
6953         case KEY_bless:
6954             LOP(OP_BLESS,XTERM);
6955
6956         case KEY_break:
6957             FUN0(OP_BREAK);
6958
6959         case KEY_chop:
6960             UNI(OP_CHOP);
6961
6962         case KEY_continue:
6963                     /* We have to disambiguate the two senses of
6964                       "continue". If the next token is a '{' then
6965                       treat it as the start of a continue block;
6966                       otherwise treat it as a control operator.
6967                      */
6968                     s = skipspace(s);
6969                     if (*s == '{')
6970             PREBLOCK(CONTINUE);
6971                     else
6972                         FUN0(OP_CONTINUE);
6973
6974         case KEY_chdir:
6975             /* may use HOME */
6976             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6977             UNI(OP_CHDIR);
6978
6979         case KEY_close:
6980             UNI(OP_CLOSE);
6981
6982         case KEY_closedir:
6983             UNI(OP_CLOSEDIR);
6984
6985         case KEY_cmp:
6986             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6987                 return REPORT(0);
6988             Eop(OP_SCMP);
6989
6990         case KEY_caller:
6991             UNI(OP_CALLER);
6992
6993         case KEY_crypt:
6994 #ifdef FCRYPT
6995             if (!PL_cryptseen) {
6996                 PL_cryptseen = TRUE;
6997                 init_des();
6998             }
6999 #endif
7000             LOP(OP_CRYPT,XTERM);
7001
7002         case KEY_chmod:
7003             LOP(OP_CHMOD,XTERM);
7004
7005         case KEY_chown:
7006             LOP(OP_CHOWN,XTERM);
7007
7008         case KEY_connect:
7009             LOP(OP_CONNECT,XTERM);
7010
7011         case KEY_chr:
7012             UNI(OP_CHR);
7013
7014         case KEY_cos:
7015             UNI(OP_COS);
7016
7017         case KEY_chroot:
7018             UNI(OP_CHROOT);
7019
7020         case KEY_default:
7021             PREBLOCK(DEFAULT);
7022
7023         case KEY_do:
7024             s = SKIPSPACE1(s);
7025             if (*s == '{')
7026                 PRETERMBLOCK(DO);
7027             if (*s != '\'') {
7028                 *PL_tokenbuf = '&';
7029                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7030                               1, &len);
7031                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7032                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7033                     d = SKIPSPACE1(d);
7034                     if (*d == '(') {
7035                         force_ident_maybe_lex('&');
7036                         s = d;
7037                     }
7038                 }
7039             }
7040             if (orig_keyword == KEY_do) {
7041                 orig_keyword = 0;
7042                 pl_yylval.ival = 1;
7043             }
7044             else
7045                 pl_yylval.ival = 0;
7046             OPERATOR(DO);
7047
7048         case KEY_die:
7049             PL_hints |= HINT_BLOCK_SCOPE;
7050             LOP(OP_DIE,XTERM);
7051
7052         case KEY_defined:
7053             UNI(OP_DEFINED);
7054
7055         case KEY_delete:
7056             UNI(OP_DELETE);
7057
7058         case KEY_dbmopen:
7059             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7060                               STR_WITH_LEN("NDBM_File::"),
7061                               STR_WITH_LEN("DB_File::"),
7062                               STR_WITH_LEN("GDBM_File::"),
7063                               STR_WITH_LEN("SDBM_File::"),
7064                               STR_WITH_LEN("ODBM_File::"),
7065                               NULL);
7066             LOP(OP_DBMOPEN,XTERM);
7067
7068         case KEY_dbmclose:
7069             UNI(OP_DBMCLOSE);
7070
7071         case KEY_dump:
7072             PL_expect = XOPERATOR;
7073             s = force_word(s,WORD,TRUE,FALSE);
7074             LOOPX(OP_DUMP);
7075
7076         case KEY_else:
7077             PREBLOCK(ELSE);
7078
7079         case KEY_elsif:
7080             pl_yylval.ival = CopLINE(PL_curcop);
7081             OPERATOR(ELSIF);
7082
7083         case KEY_eq:
7084             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7085                 return REPORT(0);
7086             Eop(OP_SEQ);
7087
7088         case KEY_exists:
7089             UNI(OP_EXISTS);
7090
7091         case KEY_exit:
7092             UNI(OP_EXIT);
7093
7094         case KEY_eval:
7095             s = SKIPSPACE1(s);
7096             if (*s == '{') { /* block eval */
7097                 PL_expect = XTERMBLOCK;
7098                 UNIBRACK(OP_ENTERTRY);
7099             }
7100             else { /* string eval */
7101                 PL_expect = XTERM;
7102                 UNIBRACK(OP_ENTEREVAL);
7103             }
7104
7105         case KEY_evalbytes:
7106             PL_expect = XTERM;
7107             UNIBRACK(-OP_ENTEREVAL);
7108
7109         case KEY_eof:
7110             UNI(OP_EOF);
7111
7112         case KEY_exp:
7113             UNI(OP_EXP);
7114
7115         case KEY_each:
7116             UNI(OP_EACH);
7117
7118         case KEY_exec:
7119             LOP(OP_EXEC,XREF);
7120
7121         case KEY_endhostent:
7122             FUN0(OP_EHOSTENT);
7123
7124         case KEY_endnetent:
7125             FUN0(OP_ENETENT);
7126
7127         case KEY_endservent:
7128             FUN0(OP_ESERVENT);
7129
7130         case KEY_endprotoent:
7131             FUN0(OP_EPROTOENT);
7132
7133         case KEY_endpwent:
7134             FUN0(OP_EPWENT);
7135
7136         case KEY_endgrent:
7137             FUN0(OP_EGRENT);
7138
7139         case KEY_for:
7140         case KEY_foreach:
7141             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7142                 return REPORT(0);
7143             pl_yylval.ival = CopLINE(PL_curcop);
7144             s = SKIPSPACE1(s);
7145             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7146                 char *p = s;
7147
7148                 if ((PL_bufend - p) >= 3 &&
7149                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7150                     p += 2;
7151                 else if ((PL_bufend - p) >= 4 &&
7152                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7153                     p += 3;
7154                 p = PEEKSPACE(p);
7155                 /* skip optional package name, as in "for my abc $x (..)" */
7156                 if (isIDFIRST_lazy_if(p,UTF)) {
7157                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7158                     p = PEEKSPACE(p);
7159                 }
7160                 if (*p != '$')
7161                     Perl_croak(aTHX_ "Missing $ on loop variable");
7162             }
7163             OPERATOR(FOR);
7164
7165         case KEY_formline:
7166             LOP(OP_FORMLINE,XTERM);
7167
7168         case KEY_fork:
7169             FUN0(OP_FORK);
7170
7171         case KEY_fc:
7172             UNI(OP_FC);
7173
7174         case KEY_fcntl:
7175             LOP(OP_FCNTL,XTERM);
7176
7177         case KEY_fileno:
7178             UNI(OP_FILENO);
7179
7180         case KEY_flock:
7181             LOP(OP_FLOCK,XTERM);
7182
7183         case KEY_gt:
7184             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7185                 return REPORT(0);
7186             Rop(OP_SGT);
7187
7188         case KEY_ge:
7189             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7190                 return REPORT(0);
7191             Rop(OP_SGE);
7192
7193         case KEY_grep:
7194             LOP(OP_GREPSTART, XREF);
7195
7196         case KEY_goto:
7197             PL_expect = XOPERATOR;
7198             s = force_word(s,WORD,TRUE,FALSE);
7199             LOOPX(OP_GOTO);
7200
7201         case KEY_gmtime:
7202             UNI(OP_GMTIME);
7203
7204         case KEY_getc:
7205             UNIDOR(OP_GETC);
7206
7207         case KEY_getppid:
7208             FUN0(OP_GETPPID);
7209
7210         case KEY_getpgrp:
7211             UNI(OP_GETPGRP);
7212
7213         case KEY_getpriority:
7214             LOP(OP_GETPRIORITY,XTERM);
7215
7216         case KEY_getprotobyname:
7217             UNI(OP_GPBYNAME);
7218
7219         case KEY_getprotobynumber:
7220             LOP(OP_GPBYNUMBER,XTERM);
7221
7222         case KEY_getprotoent:
7223             FUN0(OP_GPROTOENT);
7224
7225         case KEY_getpwent:
7226             FUN0(OP_GPWENT);
7227
7228         case KEY_getpwnam:
7229             UNI(OP_GPWNAM);
7230
7231         case KEY_getpwuid:
7232             UNI(OP_GPWUID);
7233
7234         case KEY_getpeername:
7235             UNI(OP_GETPEERNAME);
7236
7237         case KEY_gethostbyname:
7238             UNI(OP_GHBYNAME);
7239
7240         case KEY_gethostbyaddr:
7241             LOP(OP_GHBYADDR,XTERM);
7242
7243         case KEY_gethostent:
7244             FUN0(OP_GHOSTENT);
7245
7246         case KEY_getnetbyname:
7247             UNI(OP_GNBYNAME);
7248
7249         case KEY_getnetbyaddr:
7250             LOP(OP_GNBYADDR,XTERM);
7251
7252         case KEY_getnetent:
7253             FUN0(OP_GNETENT);
7254
7255         case KEY_getservbyname:
7256             LOP(OP_GSBYNAME,XTERM);
7257
7258         case KEY_getservbyport:
7259             LOP(OP_GSBYPORT,XTERM);
7260
7261         case KEY_getservent:
7262             FUN0(OP_GSERVENT);
7263
7264         case KEY_getsockname:
7265             UNI(OP_GETSOCKNAME);
7266
7267         case KEY_getsockopt:
7268             LOP(OP_GSOCKOPT,XTERM);
7269
7270         case KEY_getgrent:
7271             FUN0(OP_GGRENT);
7272
7273         case KEY_getgrnam:
7274             UNI(OP_GGRNAM);
7275
7276         case KEY_getgrgid:
7277             UNI(OP_GGRGID);
7278
7279         case KEY_getlogin:
7280             FUN0(OP_GETLOGIN);
7281
7282         case KEY_given:
7283             pl_yylval.ival = CopLINE(PL_curcop);
7284             Perl_ck_warner_d(aTHX_
7285                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7286                 "given is experimental");
7287             OPERATOR(GIVEN);
7288
7289         case KEY_glob:
7290             LOP(
7291              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7292              XTERM
7293             );
7294
7295         case KEY_hex:
7296             UNI(OP_HEX);
7297
7298         case KEY_if:
7299             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7300                 return REPORT(0);
7301             pl_yylval.ival = CopLINE(PL_curcop);
7302             OPERATOR(IF);
7303
7304         case KEY_index:
7305             LOP(OP_INDEX,XTERM);
7306
7307         case KEY_int:
7308             UNI(OP_INT);
7309
7310         case KEY_ioctl:
7311             LOP(OP_IOCTL,XTERM);
7312
7313         case KEY_join:
7314             LOP(OP_JOIN,XTERM);
7315
7316         case KEY_keys:
7317             UNI(OP_KEYS);
7318
7319         case KEY_kill:
7320             LOP(OP_KILL,XTERM);
7321
7322         case KEY_last:
7323             PL_expect = XOPERATOR;
7324             s = force_word(s,WORD,TRUE,FALSE);
7325             LOOPX(OP_LAST);
7326
7327         case KEY_lc:
7328             UNI(OP_LC);
7329
7330         case KEY_lcfirst:
7331             UNI(OP_LCFIRST);
7332
7333         case KEY_local:
7334             pl_yylval.ival = 0;
7335             OPERATOR(LOCAL);
7336
7337         case KEY_length:
7338             UNI(OP_LENGTH);
7339
7340         case KEY_lt:
7341             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7342                 return REPORT(0);
7343             Rop(OP_SLT);
7344
7345         case KEY_le:
7346             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7347                 return REPORT(0);
7348             Rop(OP_SLE);
7349
7350         case KEY_localtime:
7351             UNI(OP_LOCALTIME);
7352
7353         case KEY_log:
7354             UNI(OP_LOG);
7355
7356         case KEY_link:
7357             LOP(OP_LINK,XTERM);
7358
7359         case KEY_listen:
7360             LOP(OP_LISTEN,XTERM);
7361
7362         case KEY_lock:
7363             UNI(OP_LOCK);
7364
7365         case KEY_lstat:
7366             UNI(OP_LSTAT);
7367
7368         case KEY_m:
7369             s = scan_pat(s,OP_MATCH);
7370             TERM(sublex_start());
7371
7372         case KEY_map:
7373             LOP(OP_MAPSTART, XREF);
7374
7375         case KEY_mkdir:
7376             LOP(OP_MKDIR,XTERM);
7377
7378         case KEY_msgctl:
7379             LOP(OP_MSGCTL,XTERM);
7380
7381         case KEY_msgget:
7382             LOP(OP_MSGGET,XTERM);
7383
7384         case KEY_msgrcv:
7385             LOP(OP_MSGRCV,XTERM);
7386
7387         case KEY_msgsnd:
7388             LOP(OP_MSGSND,XTERM);
7389
7390         case KEY_our:
7391         case KEY_my:
7392         case KEY_state:
7393             PL_in_my = (U16)tmp;
7394             s = SKIPSPACE1(s);
7395             if (isIDFIRST_lazy_if(s,UTF)) {
7396                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7397                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7398                 {
7399                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7400                         Perl_croak(aTHX_
7401                                   "Experimental \"%s\" subs not enabled",
7402                                    tmp == KEY_my    ? "my"    :
7403                                    tmp == KEY_state ? "state" : "our");
7404                     Perl_ck_warner_d(aTHX_
7405                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7406                         "The lexical_subs feature is experimental");
7407                     goto really_sub;
7408                 }
7409                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7410                 if (!PL_in_my_stash) {
7411                     char tmpbuf[1024];
7412                     int len;
7413                     PL_bufptr = s;
7414                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7415                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7416                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7417                 }
7418             }
7419             pl_yylval.ival = 1;
7420             OPERATOR(MY);
7421
7422         case KEY_next:
7423             PL_expect = XOPERATOR;
7424             s = force_word(s,WORD,TRUE,FALSE);
7425             LOOPX(OP_NEXT);
7426
7427         case KEY_ne:
7428             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7429                 return REPORT(0);
7430             Eop(OP_SNE);
7431
7432         case KEY_no:
7433             s = tokenize_use(0, s);
7434             TERM(USE);
7435
7436         case KEY_not:
7437             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7438                 FUN1(OP_NOT);
7439             else {
7440                 if (!PL_lex_allbrackets &&
7441                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7442                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7443                 OPERATOR(NOTOP);
7444             }
7445
7446         case KEY_open:
7447             s = SKIPSPACE1(s);
7448             if (isIDFIRST_lazy_if(s,UTF)) {
7449           const char *t;
7450           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7451               &len);
7452                 for (t=d; isSPACE(*t);)
7453                     t++;
7454                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7455                     /* [perl #16184] */
7456                     && !(t[0] == '=' && t[1] == '>')
7457                     && !(t[0] == ':' && t[1] == ':')
7458                     && !keyword(s, d-s, 0)
7459                 ) {
7460                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7461                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7462                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7463                 }
7464             }
7465             LOP(OP_OPEN,XTERM);
7466
7467         case KEY_or:
7468             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7469                 return REPORT(0);
7470             pl_yylval.ival = OP_OR;
7471             OPERATOR(OROP);
7472
7473         case KEY_ord:
7474             UNI(OP_ORD);
7475
7476         case KEY_oct:
7477             UNI(OP_OCT);
7478
7479         case KEY_opendir:
7480             LOP(OP_OPEN_DIR,XTERM);
7481
7482         case KEY_print:
7483             checkcomma(s,PL_tokenbuf,"filehandle");
7484             LOP(OP_PRINT,XREF);
7485
7486         case KEY_printf:
7487             checkcomma(s,PL_tokenbuf,"filehandle");
7488             LOP(OP_PRTF,XREF);
7489
7490         case KEY_prototype:
7491             UNI(OP_PROTOTYPE);
7492
7493         case KEY_push:
7494             LOP(OP_PUSH,XTERM);
7495
7496         case KEY_pop:
7497             UNIDOR(OP_POP);
7498
7499         case KEY_pos:
7500             UNIDOR(OP_POS);
7501
7502         case KEY_pack:
7503             LOP(OP_PACK,XTERM);
7504
7505         case KEY_package:
7506             s = force_word(s,WORD,FALSE,TRUE);
7507             s = SKIPSPACE1(s);
7508             s = force_strict_version(s);
7509             PL_lex_expect = XBLOCK;
7510             OPERATOR(PACKAGE);
7511
7512         case KEY_pipe:
7513             LOP(OP_PIPE_OP,XTERM);
7514
7515         case KEY_q:
7516             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7517             if (!s)
7518                 missingterm(NULL);
7519             COPLINE_SET_FROM_MULTI_END;
7520             pl_yylval.ival = OP_CONST;
7521             TERM(sublex_start());
7522
7523         case KEY_quotemeta:
7524             UNI(OP_QUOTEMETA);
7525
7526         case KEY_qw: {
7527             OP *words = NULL;
7528             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7529             if (!s)
7530                 missingterm(NULL);
7531             COPLINE_SET_FROM_MULTI_END;
7532             PL_expect = XOPERATOR;
7533             if (SvCUR(PL_lex_stuff)) {
7534                 int warned_comma = !ckWARN(WARN_QW);
7535                 int warned_comment = warned_comma;
7536                 d = SvPV_force(PL_lex_stuff, len);
7537                 while (len) {
7538                     for (; isSPACE(*d) && len; --len, ++d)
7539                         /**/;
7540                     if (len) {
7541                         SV *sv;
7542                         const char *b = d;
7543                         if (!warned_comma || !warned_comment) {
7544                             for (; !isSPACE(*d) && len; --len, ++d) {
7545                                 if (!warned_comma && *d == ',') {
7546                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7547                                         "Possible attempt to separate words with commas");
7548                                     ++warned_comma;
7549                                 }
7550                                 else if (!warned_comment && *d == '#') {
7551                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7552                                         "Possible attempt to put comments in qw() list");
7553                                     ++warned_comment;
7554                                 }
7555                             }
7556                         }
7557                         else {
7558                             for (; !isSPACE(*d) && len; --len, ++d)
7559                                 /**/;
7560                         }
7561                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7562                         words = op_append_elem(OP_LIST, words,
7563                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7564                     }
7565                 }
7566             }
7567             if (!words)
7568                 words = newNULLLIST();
7569             if (PL_lex_stuff) {
7570                 SvREFCNT_dec(PL_lex_stuff);
7571                 PL_lex_stuff = NULL;
7572             }
7573             PL_expect = XOPERATOR;
7574             pl_yylval.opval = sawparens(words);
7575             TOKEN(QWLIST);
7576         }
7577
7578         case KEY_qq:
7579             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7580             if (!s)
7581                 missingterm(NULL);
7582             pl_yylval.ival = OP_STRINGIFY;
7583             if (SvIVX(PL_lex_stuff) == '\'')
7584                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7585             TERM(sublex_start());
7586
7587         case KEY_qr:
7588             s = scan_pat(s,OP_QR);
7589             TERM(sublex_start());
7590
7591         case KEY_qx:
7592             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7593             if (!s)
7594                 missingterm(NULL);
7595             pl_yylval.ival = OP_BACKTICK;
7596             TERM(sublex_start());
7597
7598         case KEY_return:
7599             OLDLOP(OP_RETURN);
7600
7601         case KEY_require:
7602             s = SKIPSPACE1(s);
7603             PL_expect = XOPERATOR;
7604             if (isDIGIT(*s)) {
7605                 s = force_version(s, FALSE);
7606             }
7607             else if (*s != 'v' || !isDIGIT(s[1])
7608                     || (s = force_version(s, TRUE), *s == 'v'))
7609             {
7610                 *PL_tokenbuf = '\0';
7611                 s = force_word(s,WORD,TRUE,TRUE);
7612                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7613                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7614                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7615                 else if (*s == '<')
7616                     yyerror("<> at require-statement should be quotes");
7617             }
7618             if (orig_keyword == KEY_require) {
7619                 orig_keyword = 0;
7620                 pl_yylval.ival = 1;
7621             }
7622             else
7623                 pl_yylval.ival = 0;
7624             PL_expect = XTERM;
7625             PL_bufptr = s;
7626             PL_last_uni = PL_oldbufptr;
7627             PL_last_lop_op = OP_REQUIRE;
7628             s = skipspace(s);
7629             return REPORT( (int)REQUIRE );
7630
7631         case KEY_reset:
7632             UNI(OP_RESET);
7633
7634         case KEY_redo:
7635             PL_expect = XOPERATOR;
7636             s = force_word(s,WORD,TRUE,FALSE);
7637             LOOPX(OP_REDO);
7638
7639         case KEY_rename:
7640             LOP(OP_RENAME,XTERM);
7641
7642         case KEY_rand:
7643             UNI(OP_RAND);
7644
7645         case KEY_rmdir:
7646             UNI(OP_RMDIR);
7647
7648         case KEY_rindex:
7649             LOP(OP_RINDEX,XTERM);
7650
7651         case KEY_read:
7652             LOP(OP_READ,XTERM);
7653
7654         case KEY_readdir:
7655             UNI(OP_READDIR);
7656
7657         case KEY_readline:
7658             UNIDOR(OP_READLINE);
7659
7660         case KEY_readpipe:
7661             UNIDOR(OP_BACKTICK);
7662
7663         case KEY_rewinddir:
7664             UNI(OP_REWINDDIR);
7665
7666         case KEY_recv:
7667             LOP(OP_RECV,XTERM);
7668
7669         case KEY_reverse:
7670             LOP(OP_REVERSE,XTERM);
7671
7672         case KEY_readlink:
7673             UNIDOR(OP_READLINK);
7674
7675         case KEY_ref:
7676             UNI(OP_REF);
7677
7678         case KEY_s:
7679             s = scan_subst(s);
7680             if (pl_yylval.opval)
7681                 TERM(sublex_start());
7682             else
7683                 TOKEN(1);       /* force error */
7684
7685         case KEY_say:
7686             checkcomma(s,PL_tokenbuf,"filehandle");
7687             LOP(OP_SAY,XREF);
7688
7689         case KEY_chomp:
7690             UNI(OP_CHOMP);
7691
7692         case KEY_scalar:
7693             UNI(OP_SCALAR);
7694
7695         case KEY_select:
7696             LOP(OP_SELECT,XTERM);
7697
7698         case KEY_seek:
7699             LOP(OP_SEEK,XTERM);
7700
7701         case KEY_semctl:
7702             LOP(OP_SEMCTL,XTERM);
7703
7704         case KEY_semget:
7705             LOP(OP_SEMGET,XTERM);
7706
7707         case KEY_semop:
7708             LOP(OP_SEMOP,XTERM);
7709
7710         case KEY_send:
7711             LOP(OP_SEND,XTERM);
7712
7713         case KEY_setpgrp:
7714             LOP(OP_SETPGRP,XTERM);
7715
7716         case KEY_setpriority:
7717             LOP(OP_SETPRIORITY,XTERM);
7718
7719         case KEY_sethostent:
7720             UNI(OP_SHOSTENT);
7721
7722         case KEY_setnetent:
7723             UNI(OP_SNETENT);
7724
7725         case KEY_setservent:
7726             UNI(OP_SSERVENT);
7727
7728         case KEY_setprotoent:
7729             UNI(OP_SPROTOENT);
7730
7731         case KEY_setpwent:
7732             FUN0(OP_SPWENT);
7733
7734         case KEY_setgrent:
7735             FUN0(OP_SGRENT);
7736
7737         case KEY_seekdir:
7738             LOP(OP_SEEKDIR,XTERM);
7739
7740         case KEY_setsockopt:
7741             LOP(OP_SSOCKOPT,XTERM);
7742
7743         case KEY_shift:
7744             UNIDOR(OP_SHIFT);
7745
7746         case KEY_shmctl:
7747             LOP(OP_SHMCTL,XTERM);
7748
7749         case KEY_shmget:
7750             LOP(OP_SHMGET,XTERM);
7751
7752         case KEY_shmread:
7753             LOP(OP_SHMREAD,XTERM);
7754
7755         case KEY_shmwrite:
7756             LOP(OP_SHMWRITE,XTERM);
7757
7758         case KEY_shutdown:
7759             LOP(OP_SHUTDOWN,XTERM);
7760
7761         case KEY_sin:
7762             UNI(OP_SIN);
7763
7764         case KEY_sleep:
7765             UNI(OP_SLEEP);
7766
7767         case KEY_socket:
7768             LOP(OP_SOCKET,XTERM);
7769
7770         case KEY_socketpair:
7771             LOP(OP_SOCKPAIR,XTERM);
7772
7773         case KEY_sort:
7774             checkcomma(s,PL_tokenbuf,"subroutine name");
7775             s = SKIPSPACE1(s);
7776             PL_expect = XTERM;
7777             s = force_word(s,WORD,TRUE,TRUE);
7778             LOP(OP_SORT,XREF);
7779
7780         case KEY_split:
7781             LOP(OP_SPLIT,XTERM);
7782
7783         case KEY_sprintf:
7784             LOP(OP_SPRINTF,XTERM);
7785
7786         case KEY_splice:
7787             LOP(OP_SPLICE,XTERM);
7788
7789         case KEY_sqrt:
7790             UNI(OP_SQRT);
7791
7792         case KEY_srand:
7793             UNI(OP_SRAND);
7794
7795         case KEY_stat:
7796             UNI(OP_STAT);
7797
7798         case KEY_study:
7799             UNI(OP_STUDY);
7800
7801         case KEY_substr:
7802             LOP(OP_SUBSTR,XTERM);
7803
7804         case KEY_format:
7805         case KEY_sub:
7806           really_sub:
7807             {
7808                 char * const tmpbuf = PL_tokenbuf + 1;
7809                 expectation attrful;
7810                 bool have_name, have_proto;
7811                 const int key = tmp;
7812                 SV *format_name = NULL;
7813
7814                 d = s;
7815                 s = skipspace(s);
7816
7817                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7818                     (*s == ':' && s[1] == ':'))
7819                 {
7820
7821                     PL_expect = XBLOCK;
7822                     attrful = XATTRBLOCK;
7823                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7824                                   &len);
7825                     if (key == KEY_format)
7826                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7827                     *PL_tokenbuf = '&';
7828                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
7829                      || pad_findmy_pvn(
7830                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
7831                         ) != NOT_IN_PAD)
7832                         sv_setpvn(PL_subname, tmpbuf, len);
7833                     else {
7834                         sv_setsv(PL_subname,PL_curstname);
7835                         sv_catpvs(PL_subname,"::");
7836                         sv_catpvn(PL_subname,tmpbuf,len);
7837                     }
7838                     if (SvUTF8(PL_linestr))
7839                         SvUTF8_on(PL_subname);
7840                     have_name = TRUE;
7841
7842
7843                     s = skipspace(d);
7844                 }
7845                 else {
7846                     if (key == KEY_my || key == KEY_our || key==KEY_state)
7847                     {
7848                         *d = '\0';
7849                         /* diag_listed_as: Missing name in "%s sub" */
7850                         Perl_croak(aTHX_
7851                                   "Missing name in \"%s\"", PL_bufptr);
7852                     }
7853                     PL_expect = XTERMBLOCK;
7854                     attrful = XATTRTERM;
7855                     sv_setpvs(PL_subname,"?");
7856                     have_name = FALSE;
7857                 }
7858
7859                 if (key == KEY_format) {
7860                     if (format_name) {
7861                         NEXTVAL_NEXTTOKE.opval
7862                             = (OP*)newSVOP(OP_CONST,0, format_name);
7863                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7864                         force_next(WORD);
7865                     }
7866                     PREBLOCK(FORMAT);
7867                 }
7868
7869                 /* Look for a prototype */
7870                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
7871                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7872                     COPLINE_SET_FROM_MULTI_END;
7873                     if (!s)
7874                         Perl_croak(aTHX_ "Prototype not terminated");
7875                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
7876                     have_proto = TRUE;
7877
7878                     s = skipspace(s);
7879                 }
7880                 else
7881                     have_proto = FALSE;
7882
7883                 if (*s == ':' && s[1] != ':')
7884                     PL_expect = attrful;
7885                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
7886                     if (!have_name)
7887                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7888                     else if (*s != ';' && *s != '}')
7889                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7890                 }
7891
7892                 if (have_proto) {
7893                     NEXTVAL_NEXTTOKE.opval =
7894                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7895                     PL_lex_stuff = NULL;
7896                     force_next(THING);
7897                 }
7898                 if (!have_name) {
7899                     if (PL_curstash)
7900                         sv_setpvs(PL_subname, "__ANON__");
7901                     else
7902                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7903                     TOKEN(ANONSUB);
7904                 }
7905                 force_ident_maybe_lex('&');
7906                 TOKEN(SUB);
7907             }
7908
7909         case KEY_system:
7910             LOP(OP_SYSTEM,XREF);
7911
7912         case KEY_symlink:
7913             LOP(OP_SYMLINK,XTERM);
7914
7915         case KEY_syscall:
7916             LOP(OP_SYSCALL,XTERM);
7917
7918         case KEY_sysopen:
7919             LOP(OP_SYSOPEN,XTERM);
7920
7921         case KEY_sysseek:
7922             LOP(OP_SYSSEEK,XTERM);
7923
7924         case KEY_sysread:
7925             LOP(OP_SYSREAD,XTERM);
7926
7927         case KEY_syswrite:
7928             LOP(OP_SYSWRITE,XTERM);
7929
7930         case KEY_tr:
7931         case KEY_y:
7932             s = scan_trans(s);
7933             TERM(sublex_start());
7934
7935         case KEY_tell:
7936             UNI(OP_TELL);
7937
7938         case KEY_telldir:
7939             UNI(OP_TELLDIR);
7940
7941         case KEY_tie:
7942             LOP(OP_TIE,XTERM);
7943
7944         case KEY_tied:
7945             UNI(OP_TIED);
7946
7947         case KEY_time:
7948             FUN0(OP_TIME);
7949
7950         case KEY_times:
7951             FUN0(OP_TMS);
7952
7953         case KEY_truncate:
7954             LOP(OP_TRUNCATE,XTERM);
7955
7956         case KEY_uc:
7957             UNI(OP_UC);
7958
7959         case KEY_ucfirst:
7960             UNI(OP_UCFIRST);
7961
7962         case KEY_untie:
7963             UNI(OP_UNTIE);
7964
7965         case KEY_until:
7966             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7967                 return REPORT(0);
7968             pl_yylval.ival = CopLINE(PL_curcop);
7969             OPERATOR(UNTIL);
7970
7971         case KEY_unless:
7972             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7973                 return REPORT(0);
7974             pl_yylval.ival = CopLINE(PL_curcop);
7975             OPERATOR(UNLESS);
7976
7977         case KEY_unlink:
7978             LOP(OP_UNLINK,XTERM);
7979
7980         case KEY_undef:
7981             UNIDOR(OP_UNDEF);
7982
7983         case KEY_unpack:
7984             LOP(OP_UNPACK,XTERM);
7985
7986         case KEY_utime:
7987             LOP(OP_UTIME,XTERM);
7988
7989         case KEY_umask:
7990             UNIDOR(OP_UMASK);
7991
7992         case KEY_unshift:
7993             LOP(OP_UNSHIFT,XTERM);
7994
7995         case KEY_use:
7996             s = tokenize_use(1, s);
7997             OPERATOR(USE);
7998
7999         case KEY_values:
8000             UNI(OP_VALUES);
8001
8002         case KEY_vec:
8003             LOP(OP_VEC,XTERM);
8004
8005         case KEY_when:
8006             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8007                 return REPORT(0);
8008             pl_yylval.ival = CopLINE(PL_curcop);
8009             Perl_ck_warner_d(aTHX_
8010                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8011                 "when is experimental");
8012             OPERATOR(WHEN);
8013
8014         case KEY_while:
8015             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8016                 return REPORT(0);
8017             pl_yylval.ival = CopLINE(PL_curcop);
8018             OPERATOR(WHILE);
8019
8020         case KEY_warn:
8021             PL_hints |= HINT_BLOCK_SCOPE;
8022             LOP(OP_WARN,XTERM);
8023
8024         case KEY_wait:
8025             FUN0(OP_WAIT);
8026
8027         case KEY_waitpid:
8028             LOP(OP_WAITPID,XTERM);
8029
8030         case KEY_wantarray:
8031             FUN0(OP_WANTARRAY);
8032
8033         case KEY_write:
8034             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8035              * we use the same number on EBCDIC */
8036             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8037             UNI(OP_ENTERWRITE);
8038
8039         case KEY_x:
8040             if (PL_expect == XOPERATOR) {
8041                 if (*s == '=' && !PL_lex_allbrackets &&
8042                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8043                     return REPORT(0);
8044                 Mop(OP_REPEAT);
8045             }
8046             check_uni();
8047             goto just_a_word;
8048
8049         case KEY_xor:
8050             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8051                 return REPORT(0);
8052             pl_yylval.ival = OP_XOR;
8053             OPERATOR(OROP);
8054         }
8055     }}
8056 }
8057
8058 /*
8059   S_pending_ident
8060
8061   Looks up an identifier in the pad or in a package
8062
8063   Returns:
8064     PRIVATEREF if this is a lexical name.
8065     WORD       if this belongs to a package.
8066
8067   Structure:
8068       if we're in a my declaration
8069           croak if they tried to say my($foo::bar)
8070           build the ops for a my() declaration
8071       if it's an access to a my() variable
8072           build ops for access to a my() variable
8073       if in a dq string, and they've said @foo and we can't find @foo
8074           warn
8075       build ops for a bareword
8076 */
8077
8078 static int
8079 S_pending_ident(pTHX)
8080 {
8081     PADOFFSET tmp = 0;
8082     const char pit = (char)pl_yylval.ival;
8083     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8084     /* All routes through this function want to know if there is a colon.  */
8085     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8086
8087     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8088           "### Pending identifier '%s'\n", PL_tokenbuf); });
8089
8090     /* if we're in a my(), we can't allow dynamics here.
8091        $foo'bar has already been turned into $foo::bar, so
8092        just check for colons.
8093
8094        if it's a legal name, the OP is a PADANY.
8095     */
8096     if (PL_in_my) {
8097         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8098             if (has_colon)
8099                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8100                                   "variable %s in \"our\"",
8101                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8102             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8103         }
8104         else {
8105             if (has_colon) {
8106                 /* PL_no_myglob is constant */
8107                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8108                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8109                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8110                             UTF ? SVf_UTF8 : 0);
8111                 GCC_DIAG_RESTORE;
8112             }
8113
8114             pl_yylval.opval = newOP(OP_PADANY, 0);
8115             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8116                                                         UTF ? SVf_UTF8 : 0);
8117             return PRIVATEREF;
8118         }
8119     }
8120
8121     /*
8122        build the ops for accesses to a my() variable.
8123     */
8124
8125     if (!has_colon) {
8126         if (!PL_in_my)
8127             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8128                                     UTF ? SVf_UTF8 : 0);
8129         if (tmp != NOT_IN_PAD) {
8130             /* might be an "our" variable" */
8131             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8132                 /* build ops for a bareword */
8133                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8134                 HEK * const stashname = HvNAME_HEK(stash);
8135                 SV *  const sym = newSVhek(stashname);
8136                 sv_catpvs(sym, "::");
8137                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8138                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8139                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8140                 if (pit != '&')
8141                   gv_fetchsv(sym,
8142                     (PL_in_eval
8143                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8144                         : GV_ADDMULTI
8145                     ),
8146                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8147                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8148                      : SVt_PVHV));
8149                 return WORD;
8150             }
8151
8152             pl_yylval.opval = newOP(OP_PADANY, 0);
8153             pl_yylval.opval->op_targ = tmp;
8154             return PRIVATEREF;
8155         }
8156     }
8157
8158     /*
8159        Whine if they've said @foo in a doublequoted string,
8160        and @foo isn't a variable we can find in the symbol
8161        table.
8162     */
8163     if (ckWARN(WARN_AMBIGUOUS) &&
8164         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8165         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8166                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8167         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8168                 /* DO NOT warn for @- and @+ */
8169                 && !( PL_tokenbuf[2] == '\0' &&
8170                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8171            )
8172         {
8173             /* Downgraded from fatal to warning 20000522 mjd */
8174             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8175                         "Possible unintended interpolation of %"UTF8f
8176                         " in string",
8177                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8178         }
8179     }
8180
8181     /* build ops for a bareword */
8182     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8183                                    newSVpvn_flags(PL_tokenbuf + 1,
8184                                                       tokenbuf_len - 1,
8185                                                       UTF ? SVf_UTF8 : 0 ));
8186     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8187     if (pit != '&')
8188         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8189                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8190                      | ( UTF ? SVf_UTF8 : 0 ),
8191                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8192                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8193                       : SVt_PVHV));
8194     return WORD;
8195 }
8196
8197 STATIC void
8198 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8199 {
8200     PERL_ARGS_ASSERT_CHECKCOMMA;
8201
8202     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8203         if (ckWARN(WARN_SYNTAX)) {
8204             int level = 1;
8205             const char *w;
8206             for (w = s+2; *w && level; w++) {
8207                 if (*w == '(')
8208                     ++level;
8209                 else if (*w == ')')
8210                     --level;
8211             }
8212             while (isSPACE(*w))
8213                 ++w;
8214             /* the list of chars below is for end of statements or
8215              * block / parens, boolean operators (&&, ||, //) and branch
8216              * constructs (or, and, if, until, unless, while, err, for).
8217              * Not a very solid hack... */
8218             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8219                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8220                             "%s (...) interpreted as function",name);
8221         }
8222     }
8223     while (s < PL_bufend && isSPACE(*s))
8224         s++;
8225     if (*s == '(')
8226         s++;
8227     while (s < PL_bufend && isSPACE(*s))
8228         s++;
8229     if (isIDFIRST_lazy_if(s,UTF)) {
8230         const char * const w = s;
8231         s += UTF ? UTF8SKIP(s) : 1;
8232         while (isWORDCHAR_lazy_if(s,UTF))
8233             s += UTF ? UTF8SKIP(s) : 1;
8234         while (s < PL_bufend && isSPACE(*s))
8235             s++;
8236         if (*s == ',') {
8237             GV* gv;
8238             if (keyword(w, s - w, 0))
8239                 return;
8240
8241             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8242             if (gv && GvCVu(gv))
8243                 return;
8244             Perl_croak(aTHX_ "No comma allowed after %s", what);
8245         }
8246     }
8247 }
8248
8249 /* S_new_constant(): do any overload::constant lookup.
8250
8251    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8252    Best used as sv=new_constant(..., sv, ...).
8253    If s, pv are NULL, calls subroutine with one argument,
8254    and <type> is used with error messages only.
8255    <type> is assumed to be well formed UTF-8 */
8256
8257 STATIC SV *
8258 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8259                SV *sv, SV *pv, const char *type, STRLEN typelen)
8260 {
8261     dSP;
8262     HV * table = GvHV(PL_hintgv);                /* ^H */
8263     SV *res;
8264     SV *errsv = NULL;
8265     SV **cvp;
8266     SV *cv, *typesv;
8267     const char *why1 = "", *why2 = "", *why3 = "";
8268
8269     PERL_ARGS_ASSERT_NEW_CONSTANT;
8270     /* We assume that this is true: */
8271     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8272     assert(type || s);
8273
8274     /* charnames doesn't work well if there have been errors found */
8275     if (PL_error_count > 0 && *key == 'c')
8276     {
8277         SvREFCNT_dec_NN(sv);
8278         return &PL_sv_undef;
8279     }
8280
8281     sv_2mortal(sv);                     /* Parent created it permanently */
8282     if (!table
8283         || ! (PL_hints & HINT_LOCALIZE_HH)
8284         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8285         || ! SvOK(*cvp))
8286     {
8287         char *msg;
8288
8289         /* Here haven't found what we're looking for.  If it is charnames,
8290          * perhaps it needs to be loaded.  Try doing that before giving up */
8291         if (*key == 'c') {
8292             Perl_load_module(aTHX_
8293                             0,
8294                             newSVpvs("_charnames"),
8295                              /* version parameter; no need to specify it, as if
8296                               * we get too early a version, will fail anyway,
8297                               * not being able to find '_charnames' */
8298                             NULL,
8299                             newSVpvs(":full"),
8300                             newSVpvs(":short"),
8301                             NULL);
8302             assert(sp == PL_stack_sp);
8303             table = GvHV(PL_hintgv);
8304             if (table
8305                 && (PL_hints & HINT_LOCALIZE_HH)
8306                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8307                 && SvOK(*cvp))
8308             {
8309                 goto now_ok;
8310             }
8311         }
8312         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8313             msg = Perl_form(aTHX_
8314                                "Constant(%.*s) unknown",
8315                                 (int)(type ? typelen : len),
8316                                 (type ? type: s));
8317         }
8318         else {
8319             why1 = "$^H{";
8320             why2 = key;
8321             why3 = "} is not defined";
8322         report:
8323             if (*key == 'c') {
8324                 msg = Perl_form(aTHX_
8325                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8326                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8327                       );
8328             }
8329             else {
8330                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8331                                     (int)(type ? typelen : len),
8332                                     (type ? type: s), why1, why2, why3);
8333             }
8334         }
8335         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8336         return SvREFCNT_inc_simple_NN(sv);
8337     }
8338 now_ok:
8339     cv = *cvp;
8340     if (!pv && s)
8341         pv = newSVpvn_flags(s, len, SVs_TEMP);
8342     if (type && pv)
8343         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8344     else
8345         typesv = &PL_sv_undef;
8346
8347     PUSHSTACKi(PERLSI_OVERLOAD);
8348     ENTER ;
8349     SAVETMPS;
8350
8351     PUSHMARK(SP) ;
8352     EXTEND(sp, 3);
8353     if (pv)
8354         PUSHs(pv);
8355     PUSHs(sv);
8356     if (pv)
8357         PUSHs(typesv);
8358     PUTBACK;
8359     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8360
8361     SPAGAIN ;
8362
8363     /* Check the eval first */
8364     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8365         STRLEN errlen;
8366         const char * errstr;
8367         sv_catpvs(errsv, "Propagated");
8368         errstr = SvPV_const(errsv, errlen);
8369         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8370         (void)POPs;
8371         res = SvREFCNT_inc_simple_NN(sv);
8372     }
8373     else {
8374         res = POPs;
8375         SvREFCNT_inc_simple_void_NN(res);
8376     }
8377
8378     PUTBACK ;
8379     FREETMPS ;
8380     LEAVE ;
8381     POPSTACK;
8382
8383     if (!SvOK(res)) {
8384         why1 = "Call to &{$^H{";
8385         why2 = key;
8386         why3 = "}} did not return a defined value";
8387         sv = res;
8388         (void)sv_2mortal(sv);
8389         goto report;
8390     }
8391
8392     return res;
8393 }
8394
8395 PERL_STATIC_INLINE void
8396 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8397     PERL_ARGS_ASSERT_PARSE_IDENT;
8398
8399     for (;;) {
8400         if (*d >= e)
8401             Perl_croak(aTHX_ "%s", ident_too_long);
8402         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8403              /* The UTF-8 case must come first, otherwise things
8404              * like c\N{COMBINING TILDE} would start failing, as the
8405              * isWORDCHAR_A case below would gobble the 'c' up.
8406              */
8407
8408             char *t = *s + UTF8SKIP(*s);
8409             while (isIDCONT_utf8((U8*)t))
8410                 t += UTF8SKIP(t);
8411             if (*d + (t - *s) > e)
8412                 Perl_croak(aTHX_ "%s", ident_too_long);
8413             Copy(*s, *d, t - *s, char);
8414             *d += t - *s;
8415             *s = t;
8416         }
8417         else if ( isWORDCHAR_A(**s) ) {
8418             do {
8419                 *(*d)++ = *(*s)++;
8420             } while (isWORDCHAR_A(**s) && *d < e);
8421         }
8422         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8423             *(*d)++ = ':';
8424             *(*d)++ = ':';
8425             (*s)++;
8426         }
8427         else if (allow_package && **s == ':' && (*s)[1] == ':'
8428            /* Disallow things like Foo::$bar. For the curious, this is
8429             * the code path that triggers the "Bad name after" warning
8430             * when looking for barewords.
8431             */
8432            && (*s)[2] != '$') {
8433             *(*d)++ = *(*s)++;
8434             *(*d)++ = *(*s)++;
8435         }
8436         else
8437             break;
8438     }
8439     return;
8440 }
8441
8442 /* Returns a NUL terminated string, with the length of the string written to
8443    *slp
8444    */
8445 STATIC char *
8446 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8447 {
8448     char *d = dest;
8449     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8450     bool is_utf8 = cBOOL(UTF);
8451
8452     PERL_ARGS_ASSERT_SCAN_WORD;
8453
8454     parse_ident(&s, &d, e, allow_package, is_utf8);
8455     *d = '\0';
8456     *slp = d - dest;
8457     return s;
8458 }
8459
8460 STATIC char *
8461 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8462 {
8463     I32 herelines = PL_parser->herelines;
8464     SSize_t bracket = -1;
8465     char funny = *s++;
8466     char *d = dest;
8467     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8468     bool is_utf8 = cBOOL(UTF);
8469     I32 orig_copline = 0, tmp_copline = 0;
8470
8471     PERL_ARGS_ASSERT_SCAN_IDENT;
8472
8473     if (isSPACE(*s))
8474         s = PEEKSPACE(s);
8475     if (isDIGIT(*s)) {
8476         while (isDIGIT(*s)) {
8477             if (d >= e)
8478                 Perl_croak(aTHX_ "%s", ident_too_long);
8479             *d++ = *s++;
8480         }
8481     }
8482     else {
8483         parse_ident(&s, &d, e, 1, is_utf8);
8484     }
8485     *d = '\0';
8486     d = dest;
8487     if (*d) {
8488         /* Either a digit variable, or parse_ident() found an identifier
8489            (anything valid as a bareword), so job done and return.  */
8490         if (PL_lex_state != LEX_NORMAL)
8491             PL_lex_state = LEX_INTERPENDMAYBE;
8492         return s;
8493     }
8494     if (*s == '$' && s[1] &&
8495       (isIDFIRST_lazy_if(s+1,is_utf8)
8496          || isDIGIT_A((U8)s[1])
8497          || s[1] == '$'
8498          || s[1] == '{'
8499          || strnEQ(s+1,"::",2)) )
8500     {
8501         /* Dereferencing a value in a scalar variable.
8502            The alternatives are different syntaxes for a scalar variable.
8503            Using ' as a leading package separator isn't allowed. :: is.   */
8504         return s;
8505     }
8506     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8507     if (*s == '{') {
8508         bracket = s - SvPVX(PL_linestr);
8509         s++;
8510         orig_copline = CopLINE(PL_curcop);
8511         if (s < PL_bufend && isSPACE(*s)) {
8512             s = PEEKSPACE(s);
8513         }
8514     }
8515
8516 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8517  * iff Unicode semantics are to be used.  The legal ones are any of:
8518  *  a) ASCII digits
8519  *  b) ASCII punctuation
8520  *  c) When not under Unicode rules, any upper Latin1 character
8521  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
8522  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
8523  *     the \s ones. */
8524 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
8525                                    || isDIGIT_A((U8)(d))                    \
8526                                    || (!(u) && !isASCII((U8)(d)))           \
8527                                    || ((((U8)(d)) < 32)                     \
8528                                        && (((((U8)(d)) >= 14)               \
8529                                            || (((U8)(d)) <= 8 && (d) != 0) \
8530                                            || (((U8)(d)) == 13))))          \
8531                                    || (((U8)(d)) == toCTRL('?')))
8532     if (s < PL_bufend
8533         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
8534     {
8535         if ( isCNTRL_A((U8)*s) ) {
8536             deprecate("literal control characters in variable names");
8537         }
8538
8539         if (is_utf8) {
8540             const STRLEN skip = UTF8SKIP(s);
8541             STRLEN i;
8542             d[skip] = '\0';
8543             for ( i = 0; i < skip; i++ )
8544                 d[i] = *s++;
8545         }
8546         else {
8547             *d = *s++;
8548             d[1] = '\0';
8549         }
8550     }
8551     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8552     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8553         *d = toCTRL(*s);
8554         s++;
8555     }
8556     /* Warn about ambiguous code after unary operators if {...} notation isn't
8557        used.  There's no difference in ambiguity; it's merely a heuristic
8558        about when not to warn.  */
8559     else if (ck_uni && bracket == -1)
8560         check_uni();
8561     if (bracket != -1) {
8562         /* If we were processing {...} notation then...  */
8563         if (isIDFIRST_lazy_if(d,is_utf8)) {
8564             /* if it starts as a valid identifier, assume that it is one.
8565                (the later check for } being at the expected point will trap
8566                cases where this doesn't pan out.)  */
8567         d += is_utf8 ? UTF8SKIP(d) : 1;
8568         parse_ident(&s, &d, e, 1, is_utf8);
8569             *d = '\0';
8570             tmp_copline = CopLINE(PL_curcop);
8571             if (s < PL_bufend && isSPACE(*s)) {
8572                 s = PEEKSPACE(s);
8573             }
8574             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8575                 /* ${foo[0]} and ${foo{bar}} notation.  */
8576                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8577                     const char * const brack =
8578                         (const char *)
8579                         ((*s == '[') ? "[...]" : "{...}");
8580                     orig_copline = CopLINE(PL_curcop);
8581                     CopLINE_set(PL_curcop, tmp_copline);
8582    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8583                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8584                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8585                         funny, dest, brack, funny, dest, brack);
8586                     CopLINE_set(PL_curcop, orig_copline);
8587                 }
8588                 bracket++;
8589                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8590                 PL_lex_allbrackets++;
8591                 return s;
8592             }
8593         }
8594         /* Handle extended ${^Foo} variables
8595          * 1999-02-27 mjd-perl-patch@plover.com */
8596         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8597                  && isWORDCHAR(*s))
8598         {
8599             d++;
8600             while (isWORDCHAR(*s) && d < e) {
8601                 *d++ = *s++;
8602             }
8603             if (d >= e)
8604                 Perl_croak(aTHX_ "%s", ident_too_long);
8605             *d = '\0';
8606         }
8607
8608         if ( !tmp_copline )
8609             tmp_copline = CopLINE(PL_curcop);
8610         if (s < PL_bufend && isSPACE(*s)) {
8611             s = PEEKSPACE(s);
8612         }
8613
8614         /* Expect to find a closing } after consuming any trailing whitespace.
8615          */
8616         if (*s == '}') {
8617             s++;
8618             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8619                 PL_lex_state = LEX_INTERPEND;
8620                 PL_expect = XREF;
8621             }
8622             if (PL_lex_state == LEX_NORMAL) {
8623                 if (ckWARN(WARN_AMBIGUOUS) &&
8624                     (keyword(dest, d - dest, 0)
8625                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8626                 {
8627                     SV *tmp = newSVpvn_flags( dest, d - dest,
8628                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8629                     if (funny == '#')
8630                         funny = '@';
8631                     orig_copline = CopLINE(PL_curcop);
8632                     CopLINE_set(PL_curcop, tmp_copline);
8633                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8634                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8635                         funny, SVfARG(tmp), funny, SVfARG(tmp));
8636                     CopLINE_set(PL_curcop, orig_copline);
8637                 }
8638             }
8639         }
8640         else {
8641             /* Didn't find the closing } at the point we expected, so restore
8642                state such that the next thing to process is the opening { and */
8643             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8644             CopLINE_set(PL_curcop, orig_copline);
8645             PL_parser->herelines = herelines;
8646             *dest = '\0';
8647         }
8648     }
8649     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8650         PL_lex_state = LEX_INTERPEND;
8651     return s;
8652 }
8653
8654 static bool
8655 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8656
8657     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8658      * the parse starting at 's', based on the subset that are valid in this
8659      * context input to this routine in 'valid_flags'. Advances s.  Returns
8660      * TRUE if the input should be treated as a valid flag, so the next char
8661      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
8662      * first call on the current regex.  This routine will set it to any
8663      * charset modifier found.  The caller shouldn't change it.  This way,
8664      * another charset modifier encountered in the parse can be detected as an
8665      * error, as we have decided to allow only one */
8666
8667     const char c = **s;
8668     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8669
8670     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8671         if (isWORDCHAR_lazy_if(*s, UTF)) {
8672             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8673                        UTF ? SVf_UTF8 : 0);
8674             (*s) += charlen;
8675             /* Pretend that it worked, so will continue processing before
8676              * dieing */
8677             return TRUE;
8678         }
8679         return FALSE;
8680     }
8681
8682     switch (c) {
8683
8684         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8685         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8686         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8687         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8688         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8689         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8690         case LOCALE_PAT_MOD:
8691             if (*charset) {
8692                 goto multiple_charsets;
8693             }
8694             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8695             *charset = c;
8696             break;
8697         case UNICODE_PAT_MOD:
8698             if (*charset) {
8699                 goto multiple_charsets;
8700             }
8701             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8702             *charset = c;
8703             break;
8704         case ASCII_RESTRICT_PAT_MOD:
8705             if (! *charset) {
8706                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8707             }
8708             else {
8709
8710                 /* Error if previous modifier wasn't an 'a', but if it was, see
8711                  * if, and accept, a second occurrence (only) */
8712                 if (*charset != 'a'
8713                     || get_regex_charset(*pmfl)
8714                         != REGEX_ASCII_RESTRICTED_CHARSET)
8715                 {
8716                         goto multiple_charsets;
8717                 }
8718                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8719             }
8720             *charset = c;
8721             break;
8722         case DEPENDS_PAT_MOD:
8723             if (*charset) {
8724                 goto multiple_charsets;
8725             }
8726             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8727             *charset = c;
8728             break;
8729     }
8730
8731     (*s)++;
8732     return TRUE;
8733
8734     multiple_charsets:
8735         if (*charset != c) {
8736             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8737         }
8738         else if (c == 'a') {
8739   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8740             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8741         }
8742         else {
8743             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8744         }
8745
8746         /* Pretend that it worked, so will continue processing before dieing */
8747         (*s)++;
8748         return TRUE;
8749 }
8750
8751 STATIC char *
8752 S_scan_pat(pTHX_ char *start, I32 type)
8753 {
8754     PMOP *pm;
8755     char *s;
8756     const char * const valid_flags =
8757         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8758     char charset = '\0';    /* character set modifier */
8759
8760     PERL_ARGS_ASSERT_SCAN_PAT;
8761
8762     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8763     if (!s)
8764         Perl_croak(aTHX_ "Search pattern not terminated");
8765
8766     pm = (PMOP*)newPMOP(type, 0);
8767     if (PL_multi_open == '?') {
8768         /* This is the only point in the code that sets PMf_ONCE:  */
8769         pm->op_pmflags |= PMf_ONCE;
8770
8771         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8772            allows us to restrict the list needed by reset to just the ??
8773            matches.  */
8774         assert(type != OP_TRANS);
8775         if (PL_curstash) {
8776             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8777             U32 elements;
8778             if (!mg) {
8779                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8780                                  0);
8781             }
8782             elements = mg->mg_len / sizeof(PMOP**);
8783             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8784             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8785             mg->mg_len = elements * sizeof(PMOP**);
8786             PmopSTASH_set(pm,PL_curstash);
8787         }
8788     }
8789
8790     /* if qr/...(?{..}).../, then need to parse the pattern within a new
8791      * anon CV. False positives like qr/[(?{]/ are harmless */
8792
8793     if (type == OP_QR) {
8794         STRLEN len;
8795         char *e, *p = SvPV(PL_lex_stuff, len);
8796         e = p + len;
8797         for (; p < e; p++) {
8798             if (p[0] == '(' && p[1] == '?'
8799                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8800             {
8801                 pm->op_pmflags |= PMf_HAS_CV;
8802                 break;
8803             }
8804         }
8805         pm->op_pmflags |= PMf_IS_QR;
8806     }
8807
8808     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8809     /* issue a warning if /c is specified,but /g is not */
8810     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8811     {
8812         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8813                        "Use of /c modifier is meaningless without /g" );
8814     }
8815
8816     PL_lex_op = (OP*)pm;
8817     pl_yylval.ival = OP_MATCH;
8818     return s;
8819 }
8820
8821 STATIC char *
8822 S_scan_subst(pTHX_ char *start)
8823 {
8824     char *s;
8825     PMOP *pm;
8826     I32 first_start;
8827     line_t first_line;
8828     I32 es = 0;
8829     char charset = '\0';    /* character set modifier */
8830     char *t;
8831
8832     PERL_ARGS_ASSERT_SCAN_SUBST;
8833
8834     pl_yylval.ival = OP_NULL;
8835
8836     s = scan_str(start, TRUE, FALSE, FALSE, &t);
8837
8838     if (!s)
8839         Perl_croak(aTHX_ "Substitution pattern not terminated");
8840
8841     s = t;
8842
8843     first_start = PL_multi_start;
8844     first_line = CopLINE(PL_curcop);
8845     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8846     if (!s) {
8847         if (PL_lex_stuff) {
8848             SvREFCNT_dec(PL_lex_stuff);
8849             PL_lex_stuff = NULL;
8850         }
8851         Perl_croak(aTHX_ "Substitution replacement not terminated");
8852     }
8853     PL_multi_start = first_start;       /* so whole substitution is taken together */
8854
8855     pm = (PMOP*)newPMOP(OP_SUBST, 0);
8856
8857
8858     while (*s) {
8859         if (*s == EXEC_PAT_MOD) {
8860             s++;
8861             es++;
8862         }
8863         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
8864         {
8865             break;
8866         }
8867     }
8868
8869     if ((pm->op_pmflags & PMf_CONTINUE)) {
8870         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
8871     }
8872
8873     if (es) {
8874         SV * const repl = newSVpvs("");
8875
8876         PL_multi_end = 0;
8877         pm->op_pmflags |= PMf_EVAL;
8878         while (es-- > 0) {
8879             if (es)
8880                 sv_catpvs(repl, "eval ");
8881             else
8882                 sv_catpvs(repl, "do ");
8883         }
8884         sv_catpvs(repl, "{");
8885         sv_catsv(repl, PL_sublex_info.repl);
8886         sv_catpvs(repl, "}");
8887         SvEVALED_on(repl);
8888         SvREFCNT_dec(PL_sublex_info.repl);
8889         PL_sublex_info.repl = repl;
8890     }
8891     if (CopLINE(PL_curcop) != first_line) {
8892         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
8893         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
8894             CopLINE(PL_curcop) - first_line;
8895         CopLINE_set(PL_curcop, first_line);
8896     }
8897
8898     PL_lex_op = (OP*)pm;
8899     pl_yylval.ival = OP_SUBST;
8900     return s;
8901 }
8902
8903 STATIC char *
8904 S_scan_trans(pTHX_ char *start)
8905 {
8906     char* s;
8907     OP *o;
8908     U8 squash;
8909     U8 del;
8910     U8 complement;
8911     bool nondestruct = 0;
8912     char *t;
8913
8914     PERL_ARGS_ASSERT_SCAN_TRANS;
8915
8916     pl_yylval.ival = OP_NULL;
8917
8918     s = scan_str(start,FALSE,FALSE,FALSE,&t);
8919     if (!s)
8920         Perl_croak(aTHX_ "Transliteration pattern not terminated");
8921
8922     s = t;
8923
8924     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8925     if (!s) {
8926         if (PL_lex_stuff) {
8927             SvREFCNT_dec(PL_lex_stuff);
8928             PL_lex_stuff = NULL;
8929         }
8930         Perl_croak(aTHX_ "Transliteration replacement not terminated");
8931     }
8932
8933     complement = del = squash = 0;
8934     while (1) {
8935         switch (*s) {
8936         case 'c':
8937             complement = OPpTRANS_COMPLEMENT;
8938             break;
8939         case 'd':
8940             del = OPpTRANS_DELETE;
8941             break;
8942         case 's':
8943             squash = OPpTRANS_SQUASH;
8944             break;
8945         case 'r':
8946             nondestruct = 1;
8947             break;
8948         default:
8949             goto no_more;
8950         }
8951         s++;
8952     }
8953   no_more:
8954
8955     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
8956     o->op_private &= ~OPpTRANS_ALL;
8957     o->op_private |= del|squash|complement|
8958       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
8959       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
8960
8961     PL_lex_op = o;
8962     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
8963
8964
8965     return s;
8966 }
8967
8968 /* scan_heredoc
8969    Takes a pointer to the first < in <<FOO.
8970    Returns a pointer to the byte following <<FOO.
8971
8972    This function scans a heredoc, which involves different methods
8973    depending on whether we are in a string eval, quoted construct, etc.
8974    This is because PL_linestr could containing a single line of input, or
8975    a whole string being evalled, or the contents of the current quote-
8976    like operator.
8977
8978    The two basic methods are:
8979     - Steal lines from the input stream
8980     - Scan the heredoc in PL_linestr and remove it therefrom
8981
8982    In a file scope or filtered eval, the first method is used; in a
8983    string eval, the second.
8984
8985    In a quote-like operator, we have to choose between the two,
8986    depending on where we can find a newline.  We peek into outer lex-
8987    ing scopes until we find one with a newline in it.  If we reach the
8988    outermost lexing scope and it is a file, we use the stream method.
8989    Otherwise it is treated as an eval.
8990 */
8991
8992 STATIC char *
8993 S_scan_heredoc(pTHX_ char *s)
8994 {
8995     I32 op_type = OP_SCALAR;
8996     I32 len;
8997     SV *tmpstr;
8998     char term;
8999     char *d;
9000     char *e;
9001     char *peek;
9002     const bool infile = PL_rsfp || PL_parser->filtered;
9003     const line_t origline = CopLINE(PL_curcop);
9004     LEXSHARED *shared = PL_parser->lex_shared;
9005
9006     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9007
9008     s += 2;
9009     d = PL_tokenbuf + 1;
9010     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9011     *PL_tokenbuf = '\n';
9012     peek = s;
9013     while (SPACE_OR_TAB(*peek))
9014         peek++;
9015     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9016         s = peek;
9017         term = *s++;
9018         s = delimcpy(d, e, s, PL_bufend, term, &len);
9019         if (s == PL_bufend)
9020             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9021         d += len;
9022         s++;
9023     }
9024     else {
9025         if (*s == '\\')
9026             /* <<\FOO is equivalent to <<'FOO' */
9027             s++, term = '\'';
9028         else
9029             term = '"';
9030         if (!isWORDCHAR_lazy_if(s,UTF))
9031             deprecate("bare << to mean <<\"\"");
9032         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9033             if (d < e)
9034                 *d++ = *s;
9035         }
9036     }
9037     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9038         Perl_croak(aTHX_ "Delimiter for here document is too long");
9039     *d++ = '\n';
9040     *d = '\0';
9041     len = d - PL_tokenbuf;
9042
9043 #ifndef PERL_STRICT_CR
9044     d = strchr(s, '\r');
9045     if (d) {
9046         char * const olds = s;
9047         s = d;
9048         while (s < PL_bufend) {
9049             if (*s == '\r') {
9050                 *d++ = '\n';
9051                 if (*++s == '\n')
9052                     s++;
9053             }
9054             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9055                 *d++ = *s++;
9056                 s++;
9057             }
9058             else
9059                 *d++ = *s++;
9060         }
9061         *d = '\0';
9062         PL_bufend = d;
9063         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9064         s = olds;
9065     }
9066 #endif
9067
9068     tmpstr = newSV_type(SVt_PVIV);
9069     SvGROW(tmpstr, 80);
9070     if (term == '\'') {
9071         op_type = OP_CONST;
9072         SvIV_set(tmpstr, -1);
9073     }
9074     else if (term == '`') {
9075         op_type = OP_BACKTICK;
9076         SvIV_set(tmpstr, '\\');
9077     }
9078
9079     PL_multi_start = origline + 1 + PL_parser->herelines;
9080     PL_multi_open = PL_multi_close = '<';
9081     /* inside a string eval or quote-like operator */
9082     if (!infile || PL_lex_inwhat) {
9083         SV *linestr;
9084         char *bufend;
9085         char * const olds = s;
9086         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9087         /* These two fields are not set until an inner lexing scope is
9088            entered.  But we need them set here. */
9089         shared->ls_bufptr  = s;
9090         shared->ls_linestr = PL_linestr;
9091         if (PL_lex_inwhat)
9092           /* Look for a newline.  If the current buffer does not have one,
9093              peek into the line buffer of the parent lexing scope, going
9094              up as many levels as necessary to find one with a newline
9095              after bufptr.
9096            */
9097           while (!(s = (char *)memchr(
9098                     (void *)shared->ls_bufptr, '\n',
9099                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9100                 ))) {
9101             shared = shared->ls_prev;
9102             /* shared is only null if we have gone beyond the outermost
9103                lexing scope.  In a file, we will have broken out of the
9104                loop in the previous iteration.  In an eval, the string buf-
9105                fer ends with "\n;", so the while condition above will have
9106                evaluated to false.  So shared can never be null. */
9107             assert(shared);
9108             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9109                most lexing scope.  In a file, shared->ls_linestr at that
9110                level is just one line, so there is no body to steal. */
9111             if (infile && !shared->ls_prev) {
9112                 s = olds;
9113                 goto streaming;
9114             }
9115           }
9116         else {  /* eval */
9117             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9118             assert(s);
9119         }
9120         linestr = shared->ls_linestr;
9121         bufend = SvEND(linestr);
9122         d = s;
9123         while (s < bufend - len + 1 &&
9124           memNE(s,PL_tokenbuf,len) ) {
9125             if (*s++ == '\n')
9126                 ++PL_parser->herelines;
9127         }
9128         if (s >= bufend - len + 1) {
9129             goto interminable;
9130         }
9131         sv_setpvn(tmpstr,d+1,s-d);
9132         s += len - 1;
9133         /* the preceding stmt passes a newline */
9134         PL_parser->herelines++;
9135
9136         /* s now points to the newline after the heredoc terminator.
9137            d points to the newline before the body of the heredoc.
9138          */
9139
9140         /* We are going to modify linestr in place here, so set
9141            aside copies of the string if necessary for re-evals or
9142            (caller $n)[6]. */
9143         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9144            check shared->re_eval_str. */
9145         if (shared->re_eval_start || shared->re_eval_str) {
9146             /* Set aside the rest of the regexp */
9147             if (!shared->re_eval_str)
9148                 shared->re_eval_str =
9149                        newSVpvn(shared->re_eval_start,
9150                                 bufend - shared->re_eval_start);
9151             shared->re_eval_start -= s-d;
9152         }
9153         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9154             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9155             cx->blk_eval.cur_text == linestr)
9156         {
9157             cx->blk_eval.cur_text = newSVsv(linestr);
9158             SvSCREAM_on(cx->blk_eval.cur_text);
9159         }
9160         /* Copy everything from s onwards back to d. */
9161         Move(s,d,bufend-s + 1,char);
9162         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9163         /* Setting PL_bufend only applies when we have not dug deeper
9164            into other scopes, because sublex_done sets PL_bufend to
9165            SvEND(PL_linestr). */
9166         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9167         s = olds;
9168     }
9169     else
9170     {
9171       SV *linestr_save;
9172      streaming:
9173       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9174       term = PL_tokenbuf[1];
9175       len--;
9176       linestr_save = PL_linestr; /* must restore this afterwards */
9177       d = s;                     /* and this */
9178       PL_linestr = newSVpvs("");
9179       PL_bufend = SvPVX(PL_linestr);
9180       while (1) {
9181         PL_bufptr = PL_bufend;
9182         CopLINE_set(PL_curcop,
9183                     origline + 1 + PL_parser->herelines);
9184         if (!lex_next_chunk(LEX_NO_TERM)
9185          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9186             SvREFCNT_dec(linestr_save);
9187             goto interminable;
9188         }
9189         CopLINE_set(PL_curcop, origline);
9190         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9191             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9192             /* ^That should be enough to avoid this needing to grow:  */
9193             sv_catpvs(PL_linestr, "\n\0");
9194             assert(s == SvPVX(PL_linestr));
9195             PL_bufend = SvEND(PL_linestr);
9196         }
9197         s = PL_bufptr;
9198         PL_parser->herelines++;
9199         PL_last_lop = PL_last_uni = NULL;
9200 #ifndef PERL_STRICT_CR
9201         if (PL_bufend - PL_linestart >= 2) {
9202             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9203                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9204             {
9205                 PL_bufend[-2] = '\n';
9206                 PL_bufend--;
9207                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9208             }
9209             else if (PL_bufend[-1] == '\r')
9210                 PL_bufend[-1] = '\n';
9211         }
9212         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9213             PL_bufend[-1] = '\n';
9214 #endif
9215         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9216             SvREFCNT_dec(PL_linestr);
9217             PL_linestr = linestr_save;
9218             PL_linestart = SvPVX(linestr_save);
9219             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9220             s = d;
9221             break;
9222         }
9223         else {
9224             sv_catsv(tmpstr,PL_linestr);
9225         }
9226       }
9227     }
9228     PL_multi_end = origline + PL_parser->herelines;
9229     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9230         SvPV_shrink_to_cur(tmpstr);
9231     }
9232     if (!IN_BYTES) {
9233         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9234             SvUTF8_on(tmpstr);
9235         else if (PL_encoding)
9236             sv_recode_to_utf8(tmpstr, PL_encoding);
9237     }
9238     PL_lex_stuff = tmpstr;
9239     pl_yylval.ival = op_type;
9240     return s;
9241
9242   interminable:
9243     SvREFCNT_dec(tmpstr);
9244     CopLINE_set(PL_curcop, origline);
9245     missingterm(PL_tokenbuf + 1);
9246 }
9247
9248 /* scan_inputsymbol
9249    takes: current position in input buffer
9250    returns: new position in input buffer
9251    side-effects: pl_yylval and lex_op are set.
9252
9253    This code handles:
9254
9255    <>           read from ARGV
9256    <FH>         read from filehandle
9257    <pkg::FH>    read from package qualified filehandle
9258    <pkg'FH>     read from package qualified filehandle
9259    <$fh>        read from filehandle in $fh
9260    <*.h>        filename glob
9261
9262 */
9263
9264 STATIC char *
9265 S_scan_inputsymbol(pTHX_ char *start)
9266 {
9267     char *s = start;            /* current position in buffer */
9268     char *end;
9269     I32 len;
9270     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9271     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9272
9273     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9274
9275     end = strchr(s, '\n');
9276     if (!end)
9277         end = PL_bufend;
9278     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9279
9280     /* die if we didn't have space for the contents of the <>,
9281        or if it didn't end, or if we see a newline
9282     */
9283
9284     if (len >= (I32)sizeof PL_tokenbuf)
9285         Perl_croak(aTHX_ "Excessively long <> operator");
9286     if (s >= end)
9287         Perl_croak(aTHX_ "Unterminated <> operator");
9288
9289     s++;
9290
9291     /* check for <$fh>
9292        Remember, only scalar variables are interpreted as filehandles by
9293        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9294        treated as a glob() call.
9295        This code makes use of the fact that except for the $ at the front,
9296        a scalar variable and a filehandle look the same.
9297     */
9298     if (*d == '$' && d[1]) d++;
9299
9300     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9301     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9302         d += UTF ? UTF8SKIP(d) : 1;
9303
9304     /* If we've tried to read what we allow filehandles to look like, and
9305        there's still text left, then it must be a glob() and not a getline.
9306        Use scan_str to pull out the stuff between the <> and treat it
9307        as nothing more than a string.
9308     */
9309
9310     if (d - PL_tokenbuf != len) {
9311         pl_yylval.ival = OP_GLOB;
9312         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9313         if (!s)
9314            Perl_croak(aTHX_ "Glob not terminated");
9315         return s;
9316     }
9317     else {
9318         bool readline_overriden = FALSE;
9319         GV *gv_readline;
9320         /* we're in a filehandle read situation */
9321         d = PL_tokenbuf;
9322
9323         /* turn <> into <ARGV> */
9324         if (!len)
9325             Copy("ARGV",d,5,char);
9326
9327         /* Check whether readline() is overriden */
9328         if ((gv_readline = gv_override("readline",8)))
9329             readline_overriden = TRUE;
9330
9331         /* if <$fh>, create the ops to turn the variable into a
9332            filehandle
9333         */
9334         if (*d == '$') {
9335             /* try to find it in the pad for this block, otherwise find
9336                add symbol table ops
9337             */
9338             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9339             if (tmp != NOT_IN_PAD) {
9340                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9341                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9342                     HEK * const stashname = HvNAME_HEK(stash);
9343                     SV * const sym = sv_2mortal(newSVhek(stashname));
9344                     sv_catpvs(sym, "::");
9345                     sv_catpv(sym, d+1);
9346                     d = SvPVX(sym);
9347                     goto intro_sym;
9348                 }
9349                 else {
9350                     OP * const o = newOP(OP_PADSV, 0);
9351                     o->op_targ = tmp;
9352                     PL_lex_op = readline_overriden
9353                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9354                                 op_append_elem(OP_LIST, o,
9355                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9356                         : (OP*)newUNOP(OP_READLINE, 0, o);
9357                 }
9358             }
9359             else {
9360                 GV *gv;
9361                 ++d;
9362 intro_sym:
9363                 gv = gv_fetchpv(d,
9364                                 (PL_in_eval
9365                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9366                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9367                                 SVt_PV);
9368                 PL_lex_op = readline_overriden
9369                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9370                             op_append_elem(OP_LIST,
9371                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9372                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9373                     : (OP*)newUNOP(OP_READLINE, 0,
9374                             newUNOP(OP_RV2SV, 0,
9375                                 newGVOP(OP_GV, 0, gv)));
9376             }
9377             if (!readline_overriden)
9378                 PL_lex_op->op_flags |= OPf_SPECIAL;
9379             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9380             pl_yylval.ival = OP_NULL;
9381         }
9382
9383         /* If it's none of the above, it must be a literal filehandle
9384            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9385         else {
9386             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9387             PL_lex_op = readline_overriden
9388                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9389                         op_append_elem(OP_LIST,
9390                             newGVOP(OP_GV, 0, gv),
9391                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9392                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9393             pl_yylval.ival = OP_NULL;
9394         }
9395     }
9396
9397     return s;
9398 }
9399
9400
9401 /* scan_str
9402    takes:
9403         start                   position in buffer
9404         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9405                                 only if they are of the open/close form
9406         keep_delims             preserve the delimiters around the string
9407         re_reparse              compiling a run-time /(?{})/:
9408                                    collapse // to /,  and skip encoding src
9409         delimp                  if non-null, this is set to the position of
9410                                 the closing delimiter, or just after it if
9411                                 the closing and opening delimiters differ
9412                                 (i.e., the opening delimiter of a substitu-
9413                                 tion replacement)
9414    returns: position to continue reading from buffer
9415    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9416         updates the read buffer.
9417
9418    This subroutine pulls a string out of the input.  It is called for:
9419         q               single quotes           q(literal text)
9420         '               single quotes           'literal text'
9421         qq              double quotes           qq(interpolate $here please)
9422         "               double quotes           "interpolate $here please"
9423         qx              backticks               qx(/bin/ls -l)
9424         `               backticks               `/bin/ls -l`
9425         qw              quote words             @EXPORT_OK = qw( func() $spam )
9426         m//             regexp match            m/this/
9427         s///            regexp substitute       s/this/that/
9428         tr///           string transliterate    tr/this/that/
9429         y///            string transliterate    y/this/that/
9430         ($*@)           sub prototypes          sub foo ($)
9431         (stuff)         sub attr parameters     sub foo : attr(stuff)
9432         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9433
9434    In most of these cases (all but <>, patterns and transliterate)
9435    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9436    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9437    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9438    calls scan_str().
9439
9440    It skips whitespace before the string starts, and treats the first
9441    character as the delimiter.  If the delimiter is one of ([{< then
9442    the corresponding "close" character )]}> is used as the closing
9443    delimiter.  It allows quoting of delimiters, and if the string has
9444    balanced delimiters ([{<>}]) it allows nesting.
9445
9446    On success, the SV with the resulting string is put into lex_stuff or,
9447    if that is already non-NULL, into lex_repl. The second case occurs only
9448    when parsing the RHS of the special constructs s/// and tr/// (y///).
9449    For convenience, the terminating delimiter character is stuffed into
9450    SvIVX of the SV.
9451 */
9452
9453 STATIC char *
9454 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9455                  char **delimp
9456     )
9457 {
9458     SV *sv;                     /* scalar value: string */
9459     const char *tmps;           /* temp string, used for delimiter matching */
9460     char *s = start;            /* current position in the buffer */
9461     char term;                  /* terminating character */
9462     char *to;                   /* current position in the sv's data */
9463     I32 brackets = 1;           /* bracket nesting level */
9464     bool has_utf8 = FALSE;      /* is there any utf8 content? */
9465     I32 termcode;               /* terminating char. code */
9466     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
9467     STRLEN termlen;             /* length of terminating string */
9468     int last_off = 0;           /* last position for nesting bracket */
9469     line_t herelines;
9470
9471     PERL_ARGS_ASSERT_SCAN_STR;
9472
9473     /* skip space before the delimiter */
9474     if (isSPACE(*s)) {
9475         s = PEEKSPACE(s);
9476     }
9477
9478     /* mark where we are, in case we need to report errors */
9479     CLINE;
9480
9481     /* after skipping whitespace, the next character is the terminator */
9482     term = *s;
9483     if (!UTF) {
9484         termcode = termstr[0] = term;
9485         termlen = 1;
9486     }
9487     else {
9488         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9489         Copy(s, termstr, termlen, U8);
9490         if (!UTF8_IS_INVARIANT(term))
9491             has_utf8 = TRUE;
9492     }
9493
9494     /* mark where we are */
9495     PL_multi_start = CopLINE(PL_curcop);
9496     PL_multi_open = term;
9497     herelines = PL_parser->herelines;
9498
9499     /* find corresponding closing delimiter */
9500     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9501         termcode = termstr[0] = term = tmps[5];
9502
9503     PL_multi_close = term;
9504
9505     if (PL_multi_open == PL_multi_close) {
9506         keep_bracketed_quoted = FALSE;
9507     }
9508
9509     /* create a new SV to hold the contents.  79 is the SV's initial length.
9510        What a random number. */
9511     sv = newSV_type(SVt_PVIV);
9512     SvGROW(sv, 80);
9513     SvIV_set(sv, termcode);
9514     (void)SvPOK_only(sv);               /* validate pointer */
9515
9516     /* move past delimiter and try to read a complete string */
9517     if (keep_delims)
9518         sv_catpvn(sv, s, termlen);
9519     s += termlen;
9520     for (;;) {
9521         if (PL_encoding && !UTF && !re_reparse) {
9522             bool cont = TRUE;
9523
9524             while (cont) {
9525                 int offset = s - SvPVX_const(PL_linestr);
9526                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9527                                            &offset, (char*)termstr, termlen);
9528                 const char *ns;
9529                 char *svlast;
9530
9531                 if (SvIsCOW(PL_linestr)) {
9532                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9533                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9534                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9535                     char *buf = SvPVX(PL_linestr);
9536                     bufend_pos = PL_parser->bufend - buf;
9537                     bufptr_pos = PL_parser->bufptr - buf;
9538                     oldbufptr_pos = PL_parser->oldbufptr - buf;
9539                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9540                     linestart_pos = PL_parser->linestart - buf;
9541                     last_uni_pos = PL_parser->last_uni
9542                         ? PL_parser->last_uni - buf
9543                         : 0;
9544                     last_lop_pos = PL_parser->last_lop
9545                         ? PL_parser->last_lop - buf
9546                         : 0;
9547                     re_eval_start_pos =
9548                         PL_parser->lex_shared->re_eval_start ?
9549                             PL_parser->lex_shared->re_eval_start - buf : 0;
9550                     s_pos = s - buf;
9551
9552                     sv_force_normal(PL_linestr);
9553
9554                     buf = SvPVX(PL_linestr);
9555                     PL_parser->bufend = buf + bufend_pos;
9556                     PL_parser->bufptr = buf + bufptr_pos;
9557                     PL_parser->oldbufptr = buf + oldbufptr_pos;
9558                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9559                     PL_parser->linestart = buf + linestart_pos;
9560                     if (PL_parser->last_uni)
9561                         PL_parser->last_uni = buf + last_uni_pos;
9562                     if (PL_parser->last_lop)
9563                         PL_parser->last_lop = buf + last_lop_pos;
9564                     if (PL_parser->lex_shared->re_eval_start)
9565                         PL_parser->lex_shared->re_eval_start  =
9566                             buf + re_eval_start_pos;
9567                     s = buf + s_pos;
9568                 }
9569                 ns = SvPVX_const(PL_linestr) + offset;
9570                 svlast = SvEND(sv) - 1;
9571
9572                 for (; s < ns; s++) {
9573                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9574                         COPLINE_INC_WITH_HERELINES;
9575                 }
9576                 if (!found)
9577                     goto read_more_line;
9578                 else {
9579                     /* handle quoted delimiters */
9580                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9581                         const char *t;
9582                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9583                             t--;
9584                         if ((svlast-1 - t) % 2) {
9585                             if (!keep_bracketed_quoted) {
9586                                 *(svlast-1) = term;
9587                                 *svlast = '\0';
9588                                 SvCUR_set(sv, SvCUR(sv) - 1);
9589                             }
9590                             continue;
9591                         }
9592                     }
9593                     if (PL_multi_open == PL_multi_close) {
9594                         cont = FALSE;
9595                     }
9596                     else {
9597                         const char *t;
9598                         char *w;
9599                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9600                             /* At here, all closes are "was quoted" one,
9601                                so we don't check PL_multi_close. */
9602                             if (*t == '\\') {
9603                                 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9604                                     t++;
9605                                 else
9606                                     *w++ = *t++;
9607                             }
9608                             else if (*t == PL_multi_open)
9609                                 brackets++;
9610
9611                             *w = *t;
9612                         }
9613                         if (w < t) {
9614                             *w++ = term;
9615                             *w = '\0';
9616                             SvCUR_set(sv, w - SvPVX_const(sv));
9617                         }
9618                         last_off = w - SvPVX(sv);
9619                         if (--brackets <= 0)
9620                             cont = FALSE;
9621                     }
9622                 }
9623             }
9624             if (!keep_delims) {
9625                 SvCUR_set(sv, SvCUR(sv) - 1);
9626                 *SvEND(sv) = '\0';
9627             }
9628             break;
9629         }
9630
9631         /* extend sv if need be */
9632         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9633         /* set 'to' to the next character in the sv's string */
9634         to = SvPVX(sv)+SvCUR(sv);
9635
9636         /* if open delimiter is the close delimiter read unbridle */
9637         if (PL_multi_open == PL_multi_close) {
9638             for (; s < PL_bufend; s++,to++) {
9639                 /* embedded newlines increment the current line number */
9640                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9641                     COPLINE_INC_WITH_HERELINES;
9642                 /* handle quoted delimiters */
9643                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9644                     if (!keep_bracketed_quoted
9645                         && (s[1] == term
9646                             || (re_reparse && s[1] == '\\'))
9647                     )
9648                         s++;
9649                     else /* any other quotes are simply copied straight through */
9650                         *to++ = *s++;
9651                 }
9652                 /* terminate when run out of buffer (the for() condition), or
9653                    have found the terminator */
9654                 else if (*s == term) {
9655                     if (termlen == 1)
9656                         break;
9657                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9658                         break;
9659                 }
9660                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9661                     has_utf8 = TRUE;
9662                 *to = *s;
9663             }
9664         }
9665
9666         /* if the terminator isn't the same as the start character (e.g.,
9667            matched brackets), we have to allow more in the quoting, and
9668            be prepared for nested brackets.
9669         */
9670         else {
9671             /* read until we run out of string, or we find the terminator */
9672             for (; s < PL_bufend; s++,to++) {
9673                 /* embedded newlines increment the line count */
9674                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9675                     COPLINE_INC_WITH_HERELINES;
9676                 /* backslashes can escape the open or closing characters */
9677                 if (*s == '\\' && s+1 < PL_bufend) {
9678                     if (!keep_bracketed_quoted &&
9679                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9680                     {
9681                         s++;
9682                     }
9683                     else
9684                         *to++ = *s++;
9685                 }
9686                 /* allow nested opens and closes */
9687                 else if (*s == PL_multi_close && --brackets <= 0)
9688                     break;
9689                 else if (*s == PL_multi_open)
9690                     brackets++;
9691                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9692                     has_utf8 = TRUE;
9693                 *to = *s;
9694             }
9695         }
9696         /* terminate the copied string and update the sv's end-of-string */
9697         *to = '\0';
9698         SvCUR_set(sv, to - SvPVX_const(sv));
9699
9700         /*
9701          * this next chunk reads more into the buffer if we're not done yet
9702          */
9703
9704         if (s < PL_bufend)
9705             break;              /* handle case where we are done yet :-) */
9706
9707 #ifndef PERL_STRICT_CR
9708         if (to - SvPVX_const(sv) >= 2) {
9709             if ((to[-2] == '\r' && to[-1] == '\n') ||
9710                 (to[-2] == '\n' && to[-1] == '\r'))
9711             {
9712                 to[-2] = '\n';
9713                 to--;
9714                 SvCUR_set(sv, to - SvPVX_const(sv));
9715             }
9716             else if (to[-1] == '\r')
9717                 to[-1] = '\n';
9718         }
9719         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9720             to[-1] = '\n';
9721 #endif
9722
9723      read_more_line:
9724         /* if we're out of file, or a read fails, bail and reset the current
9725            line marker so we can report where the unterminated string began
9726         */
9727         COPLINE_INC_WITH_HERELINES;
9728         PL_bufptr = PL_bufend;
9729         if (!lex_next_chunk(0)) {
9730             sv_free(sv);
9731             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9732             return NULL;
9733         }
9734         s = PL_bufptr;
9735     }
9736
9737     /* at this point, we have successfully read the delimited string */
9738
9739     if (!PL_encoding || UTF || re_reparse) {
9740
9741         if (keep_delims)
9742             sv_catpvn(sv, s, termlen);
9743         s += termlen;
9744     }
9745     if (has_utf8 || (PL_encoding && !re_reparse))
9746         SvUTF8_on(sv);
9747
9748     PL_multi_end = CopLINE(PL_curcop);
9749     CopLINE_set(PL_curcop, PL_multi_start);
9750     PL_parser->herelines = herelines;
9751
9752     /* if we allocated too much space, give some back */
9753     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9754         SvLEN_set(sv, SvCUR(sv) + 1);
9755         SvPV_renew(sv, SvLEN(sv));
9756     }
9757
9758     /* decide whether this is the first or second quoted string we've read
9759        for this op
9760     */
9761
9762     if (PL_lex_stuff)
9763         PL_sublex_info.repl = sv;
9764     else
9765         PL_lex_stuff = sv;
9766     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9767     return s;
9768 }
9769
9770 /*
9771   scan_num
9772   takes: pointer to position in buffer
9773   returns: pointer to new position in buffer
9774   side-effects: builds ops for the constant in pl_yylval.op
9775
9776   Read a number in any of the formats that Perl accepts:
9777
9778   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9779   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9780   0b[01](_?[01])*
9781   0[0-7](_?[0-7])*
9782   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9783
9784   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9785   thing it reads.
9786
9787   If it reads a number without a decimal point or an exponent, it will
9788   try converting the number to an integer and see if it can do so
9789   without loss of precision.
9790 */
9791
9792 char *
9793 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9794 {
9795     const char *s = start;      /* current position in buffer */
9796     char *d;                    /* destination in temp buffer */
9797     char *e;                    /* end of temp buffer */
9798     NV nv;                              /* number read, as a double */
9799     SV *sv = NULL;                      /* place to put the converted number */
9800     bool floatit;                       /* boolean: int or float? */
9801     const char *lastub = NULL;          /* position of last underbar */
9802     static const char* const number_too_long = "Number too long";
9803
9804     PERL_ARGS_ASSERT_SCAN_NUM;
9805
9806     /* We use the first character to decide what type of number this is */
9807
9808     switch (*s) {
9809     default:
9810         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
9811
9812     /* if it starts with a 0, it could be an octal number, a decimal in
9813        0.13 disguise, or a hexadecimal number, or a binary number. */
9814     case '0':
9815         {
9816           /* variables:
9817              u          holds the "number so far"
9818              shift      the power of 2 of the base
9819                         (hex == 4, octal == 3, binary == 1)
9820              overflowed was the number more than we can hold?
9821
9822              Shift is used when we add a digit.  It also serves as an "are
9823              we in octal/hex/binary?" indicator to disallow hex characters
9824              when in octal mode.
9825            */
9826             NV n = 0.0;
9827             UV u = 0;
9828             I32 shift;
9829             bool overflowed = FALSE;
9830             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
9831             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
9832             static const char* const bases[5] =
9833               { "", "binary", "", "octal", "hexadecimal" };
9834             static const char* const Bases[5] =
9835               { "", "Binary", "", "Octal", "Hexadecimal" };
9836             static const char* const maxima[5] =
9837               { "",
9838                 "0b11111111111111111111111111111111",
9839                 "",
9840                 "037777777777",
9841                 "0xffffffff" };
9842             const char *base, *Base, *max;
9843
9844             /* check for hex */
9845             if (s[1] == 'x' || s[1] == 'X') {
9846                 shift = 4;
9847                 s += 2;
9848                 just_zero = FALSE;
9849             } else if (s[1] == 'b' || s[1] == 'B') {
9850                 shift = 1;
9851                 s += 2;
9852                 just_zero = FALSE;
9853             }
9854             /* check for a decimal in disguise */
9855             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
9856                 goto decimal;
9857             /* so it must be octal */
9858             else {
9859                 shift = 3;
9860                 s++;
9861             }
9862
9863             if (*s == '_') {
9864                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9865                                "Misplaced _ in number");
9866                lastub = s++;
9867             }
9868
9869             base = bases[shift];
9870             Base = Bases[shift];
9871             max  = maxima[shift];
9872
9873             /* read the rest of the number */
9874             for (;;) {
9875                 /* x is used in the overflow test,
9876                    b is the digit we're adding on. */
9877                 UV x, b;
9878
9879                 switch (*s) {
9880
9881                 /* if we don't mention it, we're done */
9882                 default:
9883                     goto out;
9884
9885                 /* _ are ignored -- but warned about if consecutive */
9886                 case '_':
9887                     if (lastub && s == lastub + 1)
9888                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9889                                        "Misplaced _ in number");
9890                     lastub = s++;
9891                     break;
9892
9893                 /* 8 and 9 are not octal */
9894                 case '8': case '9':
9895                     if (shift == 3)
9896                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
9897                     /* FALLTHROUGH */
9898
9899                 /* octal digits */
9900                 case '2': case '3': case '4':
9901                 case '5': case '6': case '7':
9902                     if (shift == 1)
9903                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
9904                     /* FALLTHROUGH */
9905
9906                 case '0': case '1':
9907                     b = *s++ & 15;              /* ASCII digit -> value of digit */
9908                     goto digit;
9909
9910                 /* hex digits */
9911                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
9912                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
9913                     /* make sure they said 0x */
9914                     if (shift != 4)
9915                         goto out;
9916                     b = (*s++ & 7) + 9;
9917
9918                     /* Prepare to put the digit we have onto the end
9919                        of the number so far.  We check for overflows.
9920                     */
9921
9922                   digit:
9923                     just_zero = FALSE;
9924                     if (!overflowed) {
9925                         x = u << shift; /* make room for the digit */
9926
9927                         if ((x >> shift) != u
9928                             && !(PL_hints & HINT_NEW_BINARY)) {
9929                             overflowed = TRUE;
9930                             n = (NV) u;
9931                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
9932                                              "Integer overflow in %s number",
9933                                              base);
9934                         } else
9935                             u = x | b;          /* add the digit to the end */
9936                     }
9937                     if (overflowed) {
9938                         n *= nvshift[shift];
9939                         /* If an NV has not enough bits in its
9940                          * mantissa to represent an UV this summing of
9941                          * small low-order numbers is a waste of time
9942                          * (because the NV cannot preserve the
9943                          * low-order bits anyway): we could just
9944                          * remember when did we overflow and in the
9945                          * end just multiply n by the right
9946                          * amount. */
9947                         n += (NV) b;
9948                     }
9949                     break;
9950                 }
9951             }
9952
9953           /* if we get here, we had success: make a scalar value from
9954              the number.
9955           */
9956           out:
9957
9958             /* final misplaced underbar check */
9959             if (s[-1] == '_') {
9960                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
9961             }
9962
9963             if (overflowed) {
9964                 if (n > 4294967295.0)
9965                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
9966                                    "%s number > %s non-portable",
9967                                    Base, max);
9968                 sv = newSVnv(n);
9969             }
9970             else {
9971 #if UVSIZE > 4
9972                 if (u > 0xffffffff)
9973                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
9974                                    "%s number > %s non-portable",
9975                                    Base, max);
9976 #endif
9977                 sv = newSVuv(u);
9978             }
9979             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
9980                 sv = new_constant(start, s - start, "integer",
9981                                   sv, NULL, NULL, 0);
9982             else if (PL_hints & HINT_NEW_BINARY)
9983                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
9984         }
9985         break;
9986
9987     /*
9988       handle decimal numbers.
9989       we're also sent here when we read a 0 as the first digit
9990     */
9991     case '1': case '2': case '3': case '4': case '5':
9992     case '6': case '7': case '8': case '9': case '.':
9993       decimal:
9994         d = PL_tokenbuf;
9995         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
9996         floatit = FALSE;
9997
9998         /* read next group of digits and _ and copy into d */
9999         while (isDIGIT(*s) || *s == '_') {
10000             /* skip underscores, checking for misplaced ones
10001                if -w is on
10002             */
10003             if (*s == '_') {
10004                 if (lastub && s == lastub + 1)
10005                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10006                                    "Misplaced _ in number");
10007                 lastub = s++;
10008             }
10009             else {
10010                 /* check for end of fixed-length buffer */
10011                 if (d >= e)
10012                     Perl_croak(aTHX_ "%s", number_too_long);
10013                 /* if we're ok, copy the character */
10014                 *d++ = *s++;
10015             }
10016         }
10017
10018         /* final misplaced underbar check */
10019         if (lastub && s == lastub + 1) {
10020             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10021         }
10022
10023         /* read a decimal portion if there is one.  avoid
10024            3..5 being interpreted as the number 3. followed
10025            by .5
10026         */
10027         if (*s == '.' && s[1] != '.') {
10028             floatit = TRUE;
10029             *d++ = *s++;
10030
10031             if (*s == '_') {
10032                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10033                                "Misplaced _ in number");
10034                 lastub = s;
10035             }
10036
10037             /* copy, ignoring underbars, until we run out of digits.
10038             */
10039             for (; isDIGIT(*s) || *s == '_'; s++) {
10040                 /* fixed length buffer check */
10041                 if (d >= e)
10042                     Perl_croak(aTHX_ "%s", number_too_long);
10043                 if (*s == '_') {
10044                    if (lastub && s == lastub + 1)
10045                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10046                                       "Misplaced _ in number");
10047                    lastub = s;
10048                 }
10049                 else
10050                     *d++ = *s;
10051             }
10052             /* fractional part ending in underbar? */
10053             if (s[-1] == '_') {
10054                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10055                                "Misplaced _ in number");
10056             }
10057             if (*s == '.' && isDIGIT(s[1])) {
10058                 /* oops, it's really a v-string, but without the "v" */
10059                 s = start;
10060                 goto vstring;
10061             }
10062         }
10063
10064         /* read exponent part, if present */
10065         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10066             floatit = TRUE;
10067             s++;
10068
10069             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10070             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10071
10072             /* stray preinitial _ */
10073             if (*s == '_') {
10074                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10075                                "Misplaced _ in number");
10076                 lastub = s++;
10077             }
10078
10079             /* allow positive or negative exponent */
10080             if (*s == '+' || *s == '-')
10081                 *d++ = *s++;
10082
10083             /* stray initial _ */
10084             if (*s == '_') {
10085                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10086                                "Misplaced _ in number");
10087                 lastub = s++;
10088             }
10089
10090             /* read digits of exponent */
10091             while (isDIGIT(*s) || *s == '_') {
10092                 if (isDIGIT(*s)) {
10093                     if (d >= e)
10094                         Perl_croak(aTHX_ "%s", number_too_long);
10095                     *d++ = *s++;
10096                 }
10097                 else {
10098                    if (((lastub && s == lastub + 1) ||
10099                         (!isDIGIT(s[1]) && s[1] != '_')))
10100                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10101                                       "Misplaced _ in number");
10102                    lastub = s++;
10103                 }
10104             }
10105         }
10106
10107
10108         /*
10109            We try to do an integer conversion first if no characters
10110            indicating "float" have been found.
10111          */
10112
10113         if (!floatit) {
10114             UV uv;
10115             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10116
10117             if (flags == IS_NUMBER_IN_UV) {
10118               if (uv <= IV_MAX)
10119                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10120               else
10121                 sv = newSVuv(uv);
10122             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10123               if (uv <= (UV) IV_MIN)
10124                 sv = newSViv(-(IV)uv);
10125               else
10126                 floatit = TRUE;
10127             } else
10128               floatit = TRUE;
10129         }
10130         if (floatit) {
10131             STORE_NUMERIC_LOCAL_SET_STANDARD();
10132             /* terminate the string */
10133             *d = '\0';
10134             nv = Atof(PL_tokenbuf);
10135             RESTORE_NUMERIC_LOCAL();
10136             sv = newSVnv(nv);
10137         }
10138
10139         if ( floatit
10140              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10141             const char *const key = floatit ? "float" : "integer";
10142             const STRLEN keylen = floatit ? 5 : 7;
10143             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10144                                 key, keylen, sv, NULL, NULL, 0);
10145         }
10146         break;
10147
10148     /* if it starts with a v, it could be a v-string */
10149     case 'v':
10150 vstring:
10151                 sv = newSV(5); /* preallocate storage space */
10152                 ENTER_with_name("scan_vstring");
10153                 SAVEFREESV(sv);
10154                 s = scan_vstring(s, PL_bufend, sv);
10155                 SvREFCNT_inc_simple_void_NN(sv);
10156                 LEAVE_with_name("scan_vstring");
10157         break;
10158     }
10159
10160     /* make the op for the constant and return */
10161
10162     if (sv)
10163         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10164     else
10165         lvalp->opval = NULL;
10166
10167     return (char *)s;
10168 }
10169
10170 STATIC char *
10171 S_scan_formline(pTHX_ char *s)
10172 {
10173     char *eol;
10174     char *t;
10175     SV * const stuff = newSVpvs("");
10176     bool needargs = FALSE;
10177     bool eofmt = FALSE;
10178
10179     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10180
10181     while (!needargs) {
10182         if (*s == '.') {
10183             t = s+1;
10184 #ifdef PERL_STRICT_CR
10185             while (SPACE_OR_TAB(*t))
10186                 t++;
10187 #else
10188             while (SPACE_OR_TAB(*t) || *t == '\r')
10189                 t++;
10190 #endif
10191             if (*t == '\n' || t == PL_bufend) {
10192                 eofmt = TRUE;
10193                 break;
10194             }
10195         }
10196         eol = (char *) memchr(s,'\n',PL_bufend-s);
10197         if (!eol++)
10198                 eol = PL_bufend;
10199         if (*s != '#') {
10200             for (t = s; t < eol; t++) {
10201                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10202                     needargs = FALSE;
10203                     goto enough;        /* ~~ must be first line in formline */
10204                 }
10205                 if (*t == '@' || *t == '^')
10206                     needargs = TRUE;
10207             }
10208             if (eol > s) {
10209                 sv_catpvn(stuff, s, eol-s);
10210 #ifndef PERL_STRICT_CR
10211                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10212                     char *end = SvPVX(stuff) + SvCUR(stuff);
10213                     end[-2] = '\n';
10214                     end[-1] = '\0';
10215                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10216                 }
10217 #endif
10218             }
10219             else
10220               break;
10221         }
10222         s = (char*)eol;
10223         if ((PL_rsfp || PL_parser->filtered)
10224          && PL_parser->form_lex_state == LEX_NORMAL) {
10225             bool got_some;
10226             PL_bufptr = PL_bufend;
10227             COPLINE_INC_WITH_HERELINES;
10228             got_some = lex_next_chunk(0);
10229             CopLINE_dec(PL_curcop);
10230             s = PL_bufptr;
10231             if (!got_some)
10232                 break;
10233         }
10234         incline(s);
10235     }
10236   enough:
10237     if (!SvCUR(stuff) || needargs)
10238         PL_lex_state = PL_parser->form_lex_state;
10239     if (SvCUR(stuff)) {
10240         PL_expect = XSTATE;
10241         if (needargs) {
10242             const char *s2 = s;
10243             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10244                 || *s2 == 013)
10245                 s2++;
10246             if (*s2 == '{') {
10247                 PL_expect = XTERMBLOCK;
10248                 NEXTVAL_NEXTTOKE.ival = 0;
10249                 force_next(DO);
10250             }
10251             NEXTVAL_NEXTTOKE.ival = 0;
10252             force_next(FORMLBRACK);
10253         }
10254         if (!IN_BYTES) {
10255             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10256                 SvUTF8_on(stuff);
10257             else if (PL_encoding)
10258                 sv_recode_to_utf8(stuff, PL_encoding);
10259         }
10260         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10261         force_next(THING);
10262     }
10263     else {
10264         SvREFCNT_dec(stuff);
10265         if (eofmt)
10266             PL_lex_formbrack = 0;
10267     }
10268     return s;
10269 }
10270
10271 I32
10272 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10273 {
10274     const I32 oldsavestack_ix = PL_savestack_ix;
10275     CV* const outsidecv = PL_compcv;
10276
10277     SAVEI32(PL_subline);
10278     save_item(PL_subname);
10279     SAVESPTR(PL_compcv);
10280
10281     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10282     CvFLAGS(PL_compcv) |= flags;
10283
10284     PL_subline = CopLINE(PL_curcop);
10285     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10286     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10287     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10288     if (outsidecv && CvPADLIST(outsidecv))
10289         CvPADLIST(PL_compcv)->xpadl_outid =
10290             PadlistNAMES(CvPADLIST(outsidecv));
10291
10292     return oldsavestack_ix;
10293 }
10294
10295 static int
10296 S_yywarn(pTHX_ const char *const s, U32 flags)
10297 {
10298     PERL_ARGS_ASSERT_YYWARN;
10299
10300     PL_in_eval |= EVAL_WARNONLY;
10301     yyerror_pv(s, flags);
10302     PL_in_eval &= ~EVAL_WARNONLY;
10303     return 0;
10304 }
10305
10306 int
10307 Perl_yyerror(pTHX_ const char *const s)
10308 {
10309     PERL_ARGS_ASSERT_YYERROR;
10310     return yyerror_pvn(s, strlen(s), 0);
10311 }
10312
10313 int
10314 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10315 {
10316     PERL_ARGS_ASSERT_YYERROR_PV;
10317     return yyerror_pvn(s, strlen(s), flags);
10318 }
10319
10320 int
10321 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10322 {
10323     const char *context = NULL;
10324     int contlen = -1;
10325     SV *msg;
10326     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10327     int yychar  = PL_parser->yychar;
10328
10329     PERL_ARGS_ASSERT_YYERROR_PVN;
10330
10331     if (!yychar || (yychar == ';' && !PL_rsfp))
10332         sv_catpvs(where_sv, "at EOF");
10333     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10334       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10335       PL_oldbufptr != PL_bufptr) {
10336         /*
10337                 Only for NetWare:
10338                 The code below is removed for NetWare because it abends/crashes on NetWare
10339                 when the script has error such as not having the closing quotes like:
10340                     if ($var eq "value)
10341                 Checking of white spaces is anyway done in NetWare code.
10342         */
10343 #ifndef NETWARE
10344         while (isSPACE(*PL_oldoldbufptr))
10345             PL_oldoldbufptr++;
10346 #endif
10347         context = PL_oldoldbufptr;
10348         contlen = PL_bufptr - PL_oldoldbufptr;
10349     }
10350     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10351       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10352         /*
10353                 Only for NetWare:
10354                 The code below is removed for NetWare because it abends/crashes on NetWare
10355                 when the script has error such as not having the closing quotes like:
10356                     if ($var eq "value)
10357                 Checking of white spaces is anyway done in NetWare code.
10358         */
10359 #ifndef NETWARE
10360         while (isSPACE(*PL_oldbufptr))
10361             PL_oldbufptr++;
10362 #endif
10363         context = PL_oldbufptr;
10364         contlen = PL_bufptr - PL_oldbufptr;
10365     }
10366     else if (yychar > 255)
10367         sv_catpvs(where_sv, "next token ???");
10368     else if (yychar == -2) { /* YYEMPTY */
10369         if (PL_lex_state == LEX_NORMAL ||
10370            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10371             sv_catpvs(where_sv, "at end of line");
10372         else if (PL_lex_inpat)
10373             sv_catpvs(where_sv, "within pattern");
10374         else
10375             sv_catpvs(where_sv, "within string");
10376     }
10377     else {
10378         sv_catpvs(where_sv, "next char ");
10379         if (yychar < 32)
10380             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10381         else if (isPRINT_LC(yychar)) {
10382             const char string = yychar;
10383             sv_catpvn(where_sv, &string, 1);
10384         }
10385         else
10386             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10387     }
10388     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10389     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10390         OutCopFILE(PL_curcop),
10391         (IV)(PL_parser->preambling == NOLINE
10392                ? CopLINE(PL_curcop)
10393                : PL_parser->preambling));
10394     if (context)
10395         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10396                              UTF8fARG(UTF, contlen, context));
10397     else
10398         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10399     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10400         Perl_sv_catpvf(aTHX_ msg,
10401         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10402                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10403         PL_multi_end = 0;
10404     }
10405     if (PL_in_eval & EVAL_WARNONLY) {
10406         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10407     }
10408     else
10409         qerror(msg);
10410     if (PL_error_count >= 10) {
10411         SV * errsv;
10412         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10413             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10414                        SVfARG(errsv), OutCopFILE(PL_curcop));
10415         else
10416             Perl_croak(aTHX_ "%s has too many errors.\n",
10417             OutCopFILE(PL_curcop));
10418     }
10419     PL_in_my = 0;
10420     PL_in_my_stash = NULL;
10421     return 0;
10422 }
10423
10424 STATIC char*
10425 S_swallow_bom(pTHX_ U8 *s)
10426 {
10427     const STRLEN slen = SvCUR(PL_linestr);
10428
10429     PERL_ARGS_ASSERT_SWALLOW_BOM;
10430
10431     switch (s[0]) {
10432     case 0xFF:
10433         if (s[1] == 0xFE) {
10434             /* UTF-16 little-endian? (or UTF-32LE?) */
10435             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10436                 /* diag_listed_as: Unsupported script encoding %s */
10437                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10438 #ifndef PERL_NO_UTF16_FILTER
10439             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10440             s += 2;
10441             if (PL_bufend > (char*)s) {
10442                 s = add_utf16_textfilter(s, TRUE);
10443             }
10444 #else
10445             /* diag_listed_as: Unsupported script encoding %s */
10446             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10447 #endif
10448         }
10449         break;
10450     case 0xFE:
10451         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10452 #ifndef PERL_NO_UTF16_FILTER
10453             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10454             s += 2;
10455             if (PL_bufend > (char *)s) {
10456                 s = add_utf16_textfilter(s, FALSE);
10457             }
10458 #else
10459             /* diag_listed_as: Unsupported script encoding %s */
10460             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10461 #endif
10462         }
10463         break;
10464     case BOM_UTF8_FIRST_BYTE: {
10465         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10466         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10467             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10468             s += len + 1;                      /* UTF-8 */
10469         }
10470         break;
10471     }
10472     case 0:
10473         if (slen > 3) {
10474              if (s[1] == 0) {
10475                   if (s[2] == 0xFE && s[3] == 0xFF) {
10476                        /* UTF-32 big-endian */
10477                        /* diag_listed_as: Unsupported script encoding %s */
10478                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10479                   }
10480              }
10481              else if (s[2] == 0 && s[3] != 0) {
10482                   /* Leading bytes
10483                    * 00 xx 00 xx
10484                    * are a good indicator of UTF-16BE. */
10485 #ifndef PERL_NO_UTF16_FILTER
10486                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10487                   s = add_utf16_textfilter(s, FALSE);
10488 #else
10489                   /* diag_listed_as: Unsupported script encoding %s */
10490                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10491 #endif
10492              }
10493         }
10494         break;
10495
10496     default:
10497          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10498                   /* Leading bytes
10499                    * xx 00 xx 00
10500                    * are a good indicator of UTF-16LE. */
10501 #ifndef PERL_NO_UTF16_FILTER
10502               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10503               s = add_utf16_textfilter(s, TRUE);
10504 #else
10505               /* diag_listed_as: Unsupported script encoding %s */
10506               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10507 #endif
10508          }
10509     }
10510     return (char*)s;
10511 }
10512
10513
10514 #ifndef PERL_NO_UTF16_FILTER
10515 static I32
10516 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10517 {
10518     SV *const filter = FILTER_DATA(idx);
10519     /* We re-use this each time round, throwing the contents away before we
10520        return.  */
10521     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10522     SV *const utf8_buffer = filter;
10523     IV status = IoPAGE(filter);
10524     const bool reverse = cBOOL(IoLINES(filter));
10525     I32 retval;
10526
10527     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10528
10529     /* As we're automatically added, at the lowest level, and hence only called
10530        from this file, we can be sure that we're not called in block mode. Hence
10531        don't bother writing code to deal with block mode.  */
10532     if (maxlen) {
10533         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10534     }
10535     if (status < 0) {
10536         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10537     }
10538     DEBUG_P(PerlIO_printf(Perl_debug_log,
10539                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10540                           FPTR2DPTR(void *, S_utf16_textfilter),
10541                           reverse ? 'l' : 'b', idx, maxlen, status,
10542                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10543
10544     while (1) {
10545         STRLEN chars;
10546         STRLEN have;
10547         I32 newlen;
10548         U8 *end;
10549         /* First, look in our buffer of existing UTF-8 data:  */
10550         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10551
10552         if (nl) {
10553             ++nl;
10554         } else if (status == 0) {
10555             /* EOF */
10556             IoPAGE(filter) = 0;
10557             nl = SvEND(utf8_buffer);
10558         }
10559         if (nl) {
10560             STRLEN got = nl - SvPVX(utf8_buffer);
10561             /* Did we have anything to append?  */
10562             retval = got != 0;
10563             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10564             /* Everything else in this code works just fine if SVp_POK isn't
10565                set.  This, however, needs it, and we need it to work, else
10566                we loop infinitely because the buffer is never consumed.  */
10567             sv_chop(utf8_buffer, nl);
10568             break;
10569         }
10570
10571         /* OK, not a complete line there, so need to read some more UTF-16.
10572            Read an extra octect if the buffer currently has an odd number. */
10573         while (1) {
10574             if (status <= 0)
10575                 break;
10576             if (SvCUR(utf16_buffer) >= 2) {
10577                 /* Location of the high octet of the last complete code point.
10578                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10579                    *coupled* with all the benefits of partial reads and
10580                    endianness.  */
10581                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10582                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10583
10584                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10585                     break;
10586                 }
10587
10588                 /* We have the first half of a surrogate. Read more.  */
10589                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10590             }
10591
10592             status = FILTER_READ(idx + 1, utf16_buffer,
10593                                  160 + (SvCUR(utf16_buffer) & 1));
10594             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10595             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10596             if (status < 0) {
10597                 /* Error */
10598                 IoPAGE(filter) = status;
10599                 return status;
10600             }
10601         }
10602
10603         chars = SvCUR(utf16_buffer) >> 1;
10604         have = SvCUR(utf8_buffer);
10605         SvGROW(utf8_buffer, have + chars * 3 + 1);
10606
10607         if (reverse) {
10608             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10609                                          (U8*)SvPVX_const(utf8_buffer) + have,
10610                                          chars * 2, &newlen);
10611         } else {
10612             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10613                                 (U8*)SvPVX_const(utf8_buffer) + have,
10614                                 chars * 2, &newlen);
10615         }
10616         SvCUR_set(utf8_buffer, have + newlen);
10617         *end = '\0';
10618
10619         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10620            it's private to us, and utf16_to_utf8{,reversed} take a
10621            (pointer,length) pair, rather than a NUL-terminated string.  */
10622         if(SvCUR(utf16_buffer) & 1) {
10623             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10624             SvCUR_set(utf16_buffer, 1);
10625         } else {
10626             SvCUR_set(utf16_buffer, 0);
10627         }
10628     }
10629     DEBUG_P(PerlIO_printf(Perl_debug_log,
10630                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10631                           status,
10632                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10633     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10634     return retval;
10635 }
10636
10637 static U8 *
10638 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10639 {
10640     SV *filter = filter_add(S_utf16_textfilter, NULL);
10641
10642     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10643
10644     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10645     sv_setpvs(filter, "");
10646     IoLINES(filter) = reversed;
10647     IoPAGE(filter) = 1; /* Not EOF */
10648
10649     /* Sadly, we have to return a valid pointer, come what may, so we have to
10650        ignore any error return from this.  */
10651     SvCUR_set(PL_linestr, 0);
10652     if (FILTER_READ(0, PL_linestr, 0)) {
10653         SvUTF8_on(PL_linestr);
10654     } else {
10655         SvUTF8_on(PL_linestr);
10656     }
10657     PL_bufend = SvEND(PL_linestr);
10658     return (U8*)SvPVX(PL_linestr);
10659 }
10660 #endif
10661
10662 /*
10663 Returns a pointer to the next character after the parsed
10664 vstring, as well as updating the passed in sv.
10665
10666 Function must be called like
10667
10668         sv = sv_2mortal(newSV(5));
10669         s = scan_vstring(s,e,sv);
10670
10671 where s and e are the start and end of the string.
10672 The sv should already be large enough to store the vstring
10673 passed in, for performance reasons.
10674
10675 This function may croak if fatal warnings are enabled in the
10676 calling scope, hence the sv_2mortal in the example (to prevent
10677 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
10678 sv_2mortal.
10679
10680 */
10681
10682 char *
10683 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10684 {
10685     const char *pos = s;
10686     const char *start = s;
10687
10688     PERL_ARGS_ASSERT_SCAN_VSTRING;
10689
10690     if (*pos == 'v') pos++;  /* get past 'v' */
10691     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10692         pos++;
10693     if ( *pos != '.') {
10694         /* this may not be a v-string if followed by => */
10695         const char *next = pos;
10696         while (next < e && isSPACE(*next))
10697             ++next;
10698         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10699             /* return string not v-string */
10700             sv_setpvn(sv,(char *)s,pos-s);
10701             return (char *)pos;
10702         }
10703     }
10704
10705     if (!isALPHA(*pos)) {
10706         U8 tmpbuf[UTF8_MAXBYTES+1];
10707
10708         if (*s == 'v')
10709             s++;  /* get past 'v' */
10710
10711         sv_setpvs(sv, "");
10712
10713         for (;;) {
10714             /* this is atoi() that tolerates underscores */
10715             U8 *tmpend;
10716             UV rev = 0;
10717             const char *end = pos;
10718             UV mult = 1;
10719             while (--end >= s) {
10720                 if (*end != '_') {
10721                     const UV orev = rev;
10722                     rev += (*end - '0') * mult;
10723                     mult *= 10;
10724                     if (orev > rev)
10725                         /* diag_listed_as: Integer overflow in %s number */
10726                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10727                                          "Integer overflow in decimal number");
10728                 }
10729             }
10730 #ifdef EBCDIC
10731             if (rev > 0x7FFFFFFF)
10732                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10733 #endif
10734             /* Append native character for the rev point */
10735             tmpend = uvchr_to_utf8(tmpbuf, rev);
10736             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10737             if (!UVCHR_IS_INVARIANT(rev))
10738                  SvUTF8_on(sv);
10739             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10740                  s = ++pos;
10741             else {
10742                  s = pos;
10743                  break;
10744             }
10745             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10746                  pos++;
10747         }
10748         SvPOK_on(sv);
10749         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10750         SvRMAGICAL_on(sv);
10751     }
10752     return (char *)s;
10753 }
10754
10755 int
10756 Perl_keyword_plugin_standard(pTHX_
10757         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10758 {
10759     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10760     PERL_UNUSED_CONTEXT;
10761     PERL_UNUSED_ARG(keyword_ptr);
10762     PERL_UNUSED_ARG(keyword_len);
10763     PERL_UNUSED_ARG(op_ptr);
10764     return KEYWORD_PLUGIN_DECLINE;
10765 }
10766
10767 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10768 static void
10769 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
10770 {
10771     SAVEI32(PL_lex_brackets);
10772     if (PL_lex_brackets > 100)
10773         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10774     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
10775     SAVEI32(PL_lex_allbrackets);
10776     PL_lex_allbrackets = 0;
10777     SAVEI8(PL_lex_fakeeof);
10778     PL_lex_fakeeof = (U8)fakeeof;
10779     if(yyparse(gramtype) && !PL_parser->error_count)
10780         qerror(Perl_mess(aTHX_ "Parse error"));
10781 }
10782
10783 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
10784 static OP *
10785 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
10786 {
10787     OP *o;
10788     ENTER;
10789     SAVEVPTR(PL_eval_root);
10790     PL_eval_root = NULL;
10791     parse_recdescent(gramtype, fakeeof);
10792     o = PL_eval_root;
10793     LEAVE;
10794     return o;
10795 }
10796
10797 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
10798 static OP *
10799 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
10800 {
10801     OP *exprop;
10802     if (flags & ~PARSE_OPTIONAL)
10803         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
10804     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
10805     if (!exprop && !(flags & PARSE_OPTIONAL)) {
10806         if (!PL_parser->error_count)
10807             qerror(Perl_mess(aTHX_ "Parse error"));
10808         exprop = newOP(OP_NULL, 0);
10809     }
10810     return exprop;
10811 }
10812
10813 /*
10814 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
10815
10816 Parse a Perl arithmetic expression.  This may contain operators of precedence
10817 down to the bit shift operators.  The expression must be followed (and thus
10818 terminated) either by a comparison or lower-precedence operator or by
10819 something that would normally terminate an expression such as semicolon.
10820 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10821 otherwise it is mandatory.  It is up to the caller to ensure that the
10822 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10823 the source of the code to be parsed and the lexical context for the
10824 expression.
10825
10826 The op tree representing the expression is returned.  If an optional
10827 expression is absent, a null pointer is returned, otherwise the pointer
10828 will be non-null.
10829
10830 If an error occurs in parsing or compilation, in most cases a valid op
10831 tree is returned anyway.  The error is reflected in the parser state,
10832 normally resulting in a single exception at the top level of parsing
10833 which covers all the compilation errors that occurred.  Some compilation
10834 errors, however, will throw an exception immediately.
10835
10836 =cut
10837 */
10838
10839 OP *
10840 Perl_parse_arithexpr(pTHX_ U32 flags)
10841 {
10842     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
10843 }
10844
10845 /*
10846 =for apidoc Amx|OP *|parse_termexpr|U32 flags
10847
10848 Parse a Perl term expression.  This may contain operators of precedence
10849 down to the assignment operators.  The expression must be followed (and thus
10850 terminated) either by a comma or lower-precedence operator or by
10851 something that would normally terminate an expression such as semicolon.
10852 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10853 otherwise it is mandatory.  It is up to the caller to ensure that the
10854 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10855 the source of the code to be parsed and the lexical context for the
10856 expression.
10857
10858 The op tree representing the expression is returned.  If an optional
10859 expression is absent, a null pointer is returned, otherwise the pointer
10860 will be non-null.
10861
10862 If an error occurs in parsing or compilation, in most cases a valid op
10863 tree is returned anyway.  The error is reflected in the parser state,
10864 normally resulting in a single exception at the top level of parsing
10865 which covers all the compilation errors that occurred.  Some compilation
10866 errors, however, will throw an exception immediately.
10867
10868 =cut
10869 */
10870
10871 OP *
10872 Perl_parse_termexpr(pTHX_ U32 flags)
10873 {
10874     return parse_expr(LEX_FAKEEOF_COMMA, flags);
10875 }
10876
10877 /*
10878 =for apidoc Amx|OP *|parse_listexpr|U32 flags
10879
10880 Parse a Perl list expression.  This may contain operators of precedence
10881 down to the comma operator.  The expression must be followed (and thus
10882 terminated) either by a low-precedence logic operator such as C<or> or by
10883 something that would normally terminate an expression such as semicolon.
10884 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10885 otherwise it is mandatory.  It is up to the caller to ensure that the
10886 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10887 the source of the code to be parsed and the lexical context for the
10888 expression.
10889
10890 The op tree representing the expression is returned.  If an optional
10891 expression is absent, a null pointer is returned, otherwise the pointer
10892 will be non-null.
10893
10894 If an error occurs in parsing or compilation, in most cases a valid op
10895 tree is returned anyway.  The error is reflected in the parser state,
10896 normally resulting in a single exception at the top level of parsing
10897 which covers all the compilation errors that occurred.  Some compilation
10898 errors, however, will throw an exception immediately.
10899
10900 =cut
10901 */
10902
10903 OP *
10904 Perl_parse_listexpr(pTHX_ U32 flags)
10905 {
10906     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
10907 }
10908
10909 /*
10910 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
10911
10912 Parse a single complete Perl expression.  This allows the full
10913 expression grammar, including the lowest-precedence operators such
10914 as C<or>.  The expression must be followed (and thus terminated) by a
10915 token that an expression would normally be terminated by: end-of-file,
10916 closing bracketing punctuation, semicolon, or one of the keywords that
10917 signals a postfix expression-statement modifier.  If I<flags> includes
10918 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
10919 mandatory.  It is up to the caller to ensure that the dynamic parser
10920 state (L</PL_parser> et al) is correctly set to reflect the source of
10921 the code to be parsed and the lexical context for the expression.
10922
10923 The op tree representing the expression is returned.  If an optional
10924 expression is absent, a null pointer is returned, otherwise the pointer
10925 will be non-null.
10926
10927 If an error occurs in parsing or compilation, in most cases a valid op
10928 tree is returned anyway.  The error is reflected in the parser state,
10929 normally resulting in a single exception at the top level of parsing
10930 which covers all the compilation errors that occurred.  Some compilation
10931 errors, however, will throw an exception immediately.
10932
10933 =cut
10934 */
10935
10936 OP *
10937 Perl_parse_fullexpr(pTHX_ U32 flags)
10938 {
10939     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
10940 }
10941
10942 /*
10943 =for apidoc Amx|OP *|parse_block|U32 flags
10944
10945 Parse a single complete Perl code block.  This consists of an opening
10946 brace, a sequence of statements, and a closing brace.  The block
10947 constitutes a lexical scope, so C<my> variables and various compile-time
10948 effects can be contained within it.  It is up to the caller to ensure
10949 that the dynamic parser state (L</PL_parser> et al) is correctly set to
10950 reflect the source of the code to be parsed and the lexical context for
10951 the statement.
10952
10953 The op tree representing the code block is returned.  This is always a
10954 real op, never a null pointer.  It will normally be a C<lineseq> list,
10955 including C<nextstate> or equivalent ops.  No ops to construct any kind
10956 of runtime scope are included by virtue of it being a block.
10957
10958 If an error occurs in parsing or compilation, in most cases a valid op
10959 tree (most likely null) is returned anyway.  The error is reflected in
10960 the parser state, normally resulting in a single exception at the top
10961 level of parsing which covers all the compilation errors that occurred.
10962 Some compilation errors, however, will throw an exception immediately.
10963
10964 The I<flags> parameter is reserved for future use, and must always
10965 be zero.
10966
10967 =cut
10968 */
10969
10970 OP *
10971 Perl_parse_block(pTHX_ U32 flags)
10972 {
10973     if (flags)
10974         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
10975     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
10976 }
10977
10978 /*
10979 =for apidoc Amx|OP *|parse_barestmt|U32 flags
10980
10981 Parse a single unadorned Perl statement.  This may be a normal imperative
10982 statement or a declaration that has compile-time effect.  It does not
10983 include any label or other affixture.  It is up to the caller to ensure
10984 that the dynamic parser state (L</PL_parser> et al) is correctly set to
10985 reflect the source of the code to be parsed and the lexical context for
10986 the statement.
10987
10988 The op tree representing the statement is returned.  This may be a
10989 null pointer if the statement is null, for example if it was actually
10990 a subroutine definition (which has compile-time side effects).  If not
10991 null, it will be ops directly implementing the statement, suitable to
10992 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
10993 equivalent op (except for those embedded in a scope contained entirely
10994 within the statement).
10995
10996 If an error occurs in parsing or compilation, in most cases a valid op
10997 tree (most likely null) is returned anyway.  The error is reflected in
10998 the parser state, normally resulting in a single exception at the top
10999 level of parsing which covers all the compilation errors that occurred.
11000 Some compilation errors, however, will throw an exception immediately.
11001
11002 The I<flags> parameter is reserved for future use, and must always
11003 be zero.
11004
11005 =cut
11006 */
11007
11008 OP *
11009 Perl_parse_barestmt(pTHX_ U32 flags)
11010 {
11011     if (flags)
11012         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11013     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11014 }
11015
11016 /*
11017 =for apidoc Amx|SV *|parse_label|U32 flags
11018
11019 Parse a single label, possibly optional, of the type that may prefix a
11020 Perl statement.  It is up to the caller to ensure that the dynamic parser
11021 state (L</PL_parser> et al) is correctly set to reflect the source of
11022 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11023 label is optional, otherwise it is mandatory.
11024
11025 The name of the label is returned in the form of a fresh scalar.  If an
11026 optional label is absent, a null pointer is returned.
11027
11028 If an error occurs in parsing, which can only occur if the label is
11029 mandatory, a valid label is returned anyway.  The error is reflected in
11030 the parser state, normally resulting in a single exception at the top
11031 level of parsing which covers all the compilation errors that occurred.
11032
11033 =cut
11034 */
11035
11036 SV *
11037 Perl_parse_label(pTHX_ U32 flags)
11038 {
11039     if (flags & ~PARSE_OPTIONAL)
11040         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11041     if (PL_lex_state == LEX_KNOWNEXT) {
11042         PL_parser->yychar = yylex();
11043         if (PL_parser->yychar == LABEL) {
11044             char * const lpv = pl_yylval.pval;
11045             STRLEN llen = strlen(lpv);
11046             PL_parser->yychar = YYEMPTY;
11047             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11048         } else {
11049             yyunlex();
11050             goto no_label;
11051         }
11052     } else {
11053         char *s, *t;
11054         STRLEN wlen, bufptr_pos;
11055         lex_read_space(0);
11056         t = s = PL_bufptr;
11057         if (!isIDFIRST_lazy_if(s, UTF))
11058             goto no_label;
11059         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11060         if (word_takes_any_delimeter(s, wlen))
11061             goto no_label;
11062         bufptr_pos = s - SvPVX(PL_linestr);
11063         PL_bufptr = t;
11064         lex_read_space(LEX_KEEP_PREVIOUS);
11065         t = PL_bufptr;
11066         s = SvPVX(PL_linestr) + bufptr_pos;
11067         if (t[0] == ':' && t[1] != ':') {
11068             PL_oldoldbufptr = PL_oldbufptr;
11069             PL_oldbufptr = s;
11070             PL_bufptr = t+1;
11071             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11072         } else {
11073             PL_bufptr = s;
11074             no_label:
11075             if (flags & PARSE_OPTIONAL) {
11076                 return NULL;
11077             } else {
11078                 qerror(Perl_mess(aTHX_ "Parse error"));
11079                 return newSVpvs("x");
11080             }
11081         }
11082     }
11083 }
11084
11085 /*
11086 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11087
11088 Parse a single complete Perl statement.  This may be a normal imperative
11089 statement or a declaration that has compile-time effect, and may include
11090 optional labels.  It is up to the caller to ensure that the dynamic
11091 parser state (L</PL_parser> et al) is correctly set to reflect the source
11092 of the code to be parsed and the lexical context for the statement.
11093
11094 The op tree representing the statement is returned.  This may be a
11095 null pointer if the statement is null, for example if it was actually
11096 a subroutine definition (which has compile-time side effects).  If not
11097 null, it will be the result of a L</newSTATEOP> call, normally including
11098 a C<nextstate> or equivalent op.
11099
11100 If an error occurs in parsing or compilation, in most cases a valid op
11101 tree (most likely null) is returned anyway.  The error is reflected in
11102 the parser state, normally resulting in a single exception at the top
11103 level of parsing which covers all the compilation errors that occurred.
11104 Some compilation errors, however, will throw an exception immediately.
11105
11106 The I<flags> parameter is reserved for future use, and must always
11107 be zero.
11108
11109 =cut
11110 */
11111
11112 OP *
11113 Perl_parse_fullstmt(pTHX_ U32 flags)
11114 {
11115     if (flags)
11116         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11117     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11118 }
11119
11120 /*
11121 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11122
11123 Parse a sequence of zero or more Perl statements.  These may be normal
11124 imperative statements, including optional labels, or declarations
11125 that have compile-time effect, or any mixture thereof.  The statement
11126 sequence ends when a closing brace or end-of-file is encountered in a
11127 place where a new statement could have validly started.  It is up to
11128 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11129 is correctly set to reflect the source of the code to be parsed and the
11130 lexical context for the statements.
11131
11132 The op tree representing the statement sequence is returned.  This may
11133 be a null pointer if the statements were all null, for example if there
11134 were no statements or if there were only subroutine definitions (which
11135 have compile-time side effects).  If not null, it will be a C<lineseq>
11136 list, normally including C<nextstate> or equivalent ops.
11137
11138 If an error occurs in parsing or compilation, in most cases a valid op
11139 tree is returned anyway.  The error is reflected in the parser state,
11140 normally resulting in a single exception at the top level of parsing
11141 which covers all the compilation errors that occurred.  Some compilation
11142 errors, however, will throw an exception immediately.
11143
11144 The I<flags> parameter is reserved for future use, and must always
11145 be zero.
11146
11147 =cut
11148 */
11149
11150 OP *
11151 Perl_parse_stmtseq(pTHX_ U32 flags)
11152 {
11153     OP *stmtseqop;
11154     I32 c;
11155     if (flags)
11156         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11157     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11158     c = lex_peek_unichar(0);
11159     if (c != -1 && c != /*{*/'}')
11160         qerror(Perl_mess(aTHX_ "Parse error"));
11161     return stmtseqop;
11162 }
11163
11164 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11165 static void
11166 S_lex_token_boundary(pTHX)
11167 {
11168     PL_oldoldbufptr = PL_oldbufptr;
11169     PL_oldbufptr = PL_bufptr;
11170 }
11171
11172 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11173 static OP *
11174 S_parse_opt_lexvar(pTHX)
11175 {
11176     I32 sigil, c;
11177     char *s, *d;
11178     OP *var;
11179     lex_token_boundary();
11180     sigil = lex_read_unichar(0);
11181     if (lex_peek_unichar(0) == '#') {
11182         qerror(Perl_mess(aTHX_ "Parse error"));
11183         return NULL;
11184     }
11185     lex_read_space(0);
11186     c = lex_peek_unichar(0);
11187     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11188         return NULL;
11189     s = PL_bufptr;
11190     d = PL_tokenbuf + 1;
11191     PL_tokenbuf[0] = (char)sigil;
11192     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11193     PL_bufptr = s;
11194     if (d == PL_tokenbuf+1)
11195         return NULL;
11196     *d = 0;
11197     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11198                 OPf_MOD | (OPpLVAL_INTRO<<8));
11199     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11200     return var;
11201 }
11202
11203 OP *
11204 Perl_parse_subsignature(pTHX)
11205 {
11206     I32 c;
11207     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11208     OP *initops = NULL;
11209     lex_read_space(0);
11210     c = lex_peek_unichar(0);
11211     while (c != /*(*/')') {
11212         switch (c) {
11213             case '$': {
11214                 OP *var, *expr;
11215                 if (prev_type == 2)
11216                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11217                 var = parse_opt_lexvar();
11218                 expr = var ?
11219                     newBINOP(OP_AELEM, 0,
11220                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11221                             OP_RV2AV),
11222                         newSVOP(OP_CONST, 0, newSViv(pos))) :
11223                     NULL;
11224                 lex_read_space(0);
11225                 c = lex_peek_unichar(0);
11226                 if (c == '=') {
11227                     lex_token_boundary();
11228                     lex_read_unichar(0);
11229                     lex_read_space(0);
11230                     c = lex_peek_unichar(0);
11231                     if (c == ',' || c == /*(*/')') {
11232                         if (var)
11233                             qerror(Perl_mess(aTHX_ "Optional parameter "
11234                                     "lacks default expression"));
11235                     } else {
11236                         OP *defexpr = parse_termexpr(0);
11237                         if (defexpr->op_type == OP_UNDEF &&
11238                                 !(defexpr->op_flags & OPf_KIDS)) {
11239                             op_free(defexpr);
11240                         } else {
11241                             OP *ifop =
11242                                 newBINOP(OP_GE, 0,
11243                                     scalar(newUNOP(OP_RV2AV, 0,
11244                                             newGVOP(OP_GV, 0, PL_defgv))),
11245                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
11246                             expr = var ?
11247                                 newCONDOP(0, ifop, expr, defexpr) :
11248                                 newLOGOP(OP_OR, 0, ifop, defexpr);
11249                         }
11250                     }
11251                     prev_type = 1;
11252                 } else {
11253                     if (prev_type == 1)
11254                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
11255                                 "follows optional parameter"));
11256                     prev_type = 0;
11257                     min_arity = pos + 1;
11258                 }
11259                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11260                 if (expr)
11261                     initops = op_append_list(OP_LINESEQ, initops,
11262                                 newSTATEOP(0, NULL, expr));
11263                 max_arity = ++pos;
11264             } break;
11265             case '@':
11266             case '%': {
11267                 OP *var;
11268                 if (prev_type == 2)
11269                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11270                 var = parse_opt_lexvar();
11271                 if (c == '%') {
11272                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11273                             newBINOP(OP_BIT_AND, 0,
11274                                 scalar(newUNOP(OP_RV2AV, 0,
11275                                     newGVOP(OP_GV, 0, PL_defgv))),
11276                                 newSVOP(OP_CONST, 0, newSViv(1))),
11277                             newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11278                                 newSVOP(OP_CONST, 0,
11279                                     newSVpvs("Odd name/value argument "
11280                                         "for subroutine"))));
11281                     if (pos != min_arity)
11282                         chkop = newLOGOP(OP_AND, 0,
11283                                     newBINOP(OP_GT, 0,
11284                                         scalar(newUNOP(OP_RV2AV, 0,
11285                                             newGVOP(OP_GV, 0, PL_defgv))),
11286                                         newSVOP(OP_CONST, 0, newSViv(pos))),
11287                                     chkop);
11288                     initops = op_append_list(OP_LINESEQ,
11289                                 newSTATEOP(0, NULL, chkop),
11290                                 initops);
11291                 }
11292                 if (var) {
11293                     OP *slice = pos ?
11294                         op_prepend_elem(OP_ASLICE,
11295                             newOP(OP_PUSHMARK, 0),
11296                             newLISTOP(OP_ASLICE, 0,
11297                                 list(newRANGE(0,
11298                                     newSVOP(OP_CONST, 0, newSViv(pos)),
11299                                     newUNOP(OP_AV2ARYLEN, 0,
11300                                         ref(newUNOP(OP_RV2AV, 0,
11301                                                 newGVOP(OP_GV, 0, PL_defgv)),
11302                                             OP_AV2ARYLEN)))),
11303                                 ref(newUNOP(OP_RV2AV, 0,
11304                                         newGVOP(OP_GV, 0, PL_defgv)),
11305                                     OP_ASLICE))) :
11306                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11307                     initops = op_append_list(OP_LINESEQ, initops,
11308                         newSTATEOP(0, NULL,
11309                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
11310                 }
11311                 prev_type = 2;
11312                 max_arity = -1;
11313             } break;
11314             default:
11315                 parse_error:
11316                 qerror(Perl_mess(aTHX_ "Parse error"));
11317                 return NULL;
11318         }
11319         lex_read_space(0);
11320         c = lex_peek_unichar(0);
11321         switch (c) {
11322             case /*(*/')': break;
11323             case ',':
11324                 do {
11325                     lex_token_boundary();
11326                     lex_read_unichar(0);
11327                     lex_read_space(0);
11328                     c = lex_peek_unichar(0);
11329                 } while (c == ',');
11330                 break;
11331             default:
11332                 goto parse_error;
11333         }
11334     }
11335     if (min_arity != 0) {
11336         initops = op_append_list(OP_LINESEQ,
11337             newSTATEOP(0, NULL,
11338                 newLOGOP(OP_OR, 0,
11339                     newBINOP(OP_GE, 0,
11340                         scalar(newUNOP(OP_RV2AV, 0,
11341                             newGVOP(OP_GV, 0, PL_defgv))),
11342                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
11343                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11344                         newSVOP(OP_CONST, 0,
11345                             newSVpvs("Too few arguments for subroutine"))))),
11346             initops);
11347     }
11348     if (max_arity != -1) {
11349         initops = op_append_list(OP_LINESEQ,
11350             newSTATEOP(0, NULL,
11351                 newLOGOP(OP_OR, 0,
11352                     newBINOP(OP_LE, 0,
11353                         scalar(newUNOP(OP_RV2AV, 0,
11354                             newGVOP(OP_GV, 0, PL_defgv))),
11355                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
11356                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11357                         newSVOP(OP_CONST, 0,
11358                             newSVpvs("Too many arguments for subroutine"))))),
11359             initops);
11360     }
11361     return initops;
11362 }
11363
11364 /*
11365  * Local variables:
11366  * c-indentation-style: bsd
11367  * c-basic-offset: 4
11368  * indent-tabs-mode: nil
11369  * End:
11370  *
11371  * ex: set ts=8 sts=4 sw=4 et:
11372  */