toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_static.c"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_defer            (PL_parser->lex_defer)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_expect           (PL_parser->lex_expect)
  58 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  59 #define PL_lex_inpat            (PL_parser->lex_inpat)
  60 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  61 #define PL_lex_op               (PL_parser->lex_op)
  62 #define PL_lex_repl             (PL_parser->lex_repl)
  63 #define PL_lex_starts           (PL_parser->lex_starts)
  64 #define PL_lex_stuff            (PL_parser->lex_stuff)
  65 #define PL_multi_start          (PL_parser->multi_start)
  66 #define PL_multi_open           (PL_parser->multi_open)
  67 #define PL_multi_close          (PL_parser->multi_close)
  68 #define PL_preambled            (PL_parser->preambled)
  69 #define PL_sublex_info          (PL_parser->sublex_info)
  70 #define PL_linestr              (PL_parser->linestr)
  71 #define PL_expect               (PL_parser->expect)
  72 #define PL_copline              (PL_parser->copline)
  73 #define PL_bufptr               (PL_parser->bufptr)
  74 #define PL_oldbufptr            (PL_parser->oldbufptr)
  75 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  76 #define PL_linestart            (PL_parser->linestart)
  77 #define PL_bufend               (PL_parser->bufend)
  78 #define PL_last_uni             (PL_parser->last_uni)
  79 #define PL_last_lop             (PL_parser->last_lop)
  80 #define PL_last_lop_op          (PL_parser->last_lop_op)
  81 #define PL_lex_state            (PL_parser->lex_state)
  82 #define PL_rsfp                 (PL_parser->rsfp)
  83 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  84 #define PL_in_my                (PL_parser->in_my)
  85 #define PL_in_my_stash          (PL_parser->in_my_stash)
  86 #define PL_tokenbuf             (PL_parser->tokenbuf)
  87 #define PL_multi_end            (PL_parser->multi_end)
  88 #define PL_error_count          (PL_parser->error_count)
  89
  90 #  define PL_nexttoke           (PL_parser->nexttoke)
  91 #  define PL_nexttype           (PL_parser->nexttype)
  92 #  define PL_nextval            (PL_parser->nextval)
  93
  94 static const char* const ident_too_long = "Identifier too long";
  95
  96 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  97
  98 #define XENUMMASK  0x3f
  99 #define XFAKEEOF   0x40
 100 #define XFAKEBRACK 0x80
 101
 102 #ifdef USE_UTF8_SCRIPTS
 103 #   define UTF (!IN_BYTES)
 104 #else
 105 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 106 #endif
 107
 108 /* The maximum number of characters preceding the unrecognized one to display */
 109 #define UNRECOGNIZED_PRECEDE_COUNT 10
 110
 111 /* In variables named $^X, these are the legal values for X.
 112  * 1999-02-27 mjd-perl-patch@plover.com */
 113 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 114
 115 #define SPACE_OR_TAB(c) isBLANK_A(c)
 116
 117 /* LEX_* are values for PL_lex_state, the state of the lexer.
 118  * They are arranged oddly so that the guard on the switch statement
 119  * can get by with a single comparison (if the compiler is smart enough).
 120  *
 121  * These values refer to the various states within a sublex parse,
 122  * i.e. within a double quotish string
 123  */
 124
 125 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 126
 127 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 128 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 129 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 130 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 131 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 132
 133                                    /* at end of code, eg "$x" followed by:  */
 134 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 135 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 136
 137 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 138                                         string or after \E, $foo, etc       */
 139 #define LEX_INTERPCONST          2 /* NOT USED */
 140 #define LEX_FORMLINE             1 /* expecting a format line               */
 141 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 142
 143
 144 #ifdef DEBUGGING
 145 static const char* const lex_state_names[] = {
 146     "KNOWNEXT",
 147     "FORMLINE",
 148     "INTERPCONST",
 149     "INTERPCONCAT",
 150     "INTERPENDMAYBE",
 151     "INTERPEND",
 152     "INTERPSTART",
 153     "INTERPPUSH",
 154     "INTERPCASEMOD",
 155     "INTERPNORMAL",
 156     "NORMAL"
 157 };
 158 #endif
 159
 160 #include "keywords.h"
 161
 162 /* CLINE is a macro that ensures PL_copline has a sane value */
 163
 164 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 165
 166 #  define SKIPSPACE0(s) skipspace(s)
 167 #  define SKIPSPACE1(s) skipspace(s)
 168 #  define SKIPSPACE2(s,tsv) skipspace(s)
 169 #  define PEEKSPACE(s) skipspace(s)
 170
 171 /*
 172  * Convenience functions to return different tokens and prime the
 173  * lexer for the next token.  They all take an argument.
 174  *
 175  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 176  * OPERATOR     : generic operator
 177  * AOPERATOR    : assignment operator
 178  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 179  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 180  * PREREF       : *EXPR where EXPR is not a simple identifier
 181  * TERM         : expression term
 182  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 183  * LOOPX        : loop exiting command (goto, last, dump, etc)
 184  * FTST         : file test operator
 185  * FUN0         : zero-argument function
 186  * FUN0OP       : zero-argument function, with its op created in this file
 187  * FUN1         : not used, except for not, which isn't a UNIOP
 188  * BOop         : bitwise or or xor
 189  * BAop         : bitwise and
 190  * SHop         : shift operator
 191  * PWop         : power operator
 192  * PMop         : pattern-matching operator
 193  * Aop          : addition-level operator
 194  * Mop          : multiplication-level operator
 195  * Eop          : equality-testing operator
 196  * Rop          : relational operator <= != gt
 197  *
 198  * Also see LOP and lop() below.
 199  */
 200
 201 #ifdef DEBUGGING /* Serve -DT. */
 202 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 203 #else
 204 #   define REPORT(retval) (retval)
 205 #endif
 206
 207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 214 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 215 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 216 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 217 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 218 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 219 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 220 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 221 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 222 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 223 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 224 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 225 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 226 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 227 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 228 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 229
 230 /* This bit of chicanery makes a unary function followed by
 231  * a parenthesis into a function with one argument, highest precedence.
 232  * The UNIDOR macro is for unary functions that can be followed by the //
 233  * operator (such as C<shift // 0>).
 234  */
 235 #define UNI3(f,x,have_x) { \
 236         pl_yylval.ival = f; \
 237         if (have_x) PL_expect = x; \
 238         PL_bufptr = s; \
 239         PL_last_uni = PL_oldbufptr; \
 240         PL_last_lop_op = f; \
 241         if (*s == '(') \
 242             return REPORT( (int)FUNC1 ); \
 243         s = PEEKSPACE(s); \
 244         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 245         }
 246 #define UNI(f)    UNI3(f,XTERM,1)
 247 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 248 #define UNIPROTO(f,optional) { \
 249         if (optional) PL_last_uni = PL_oldbufptr; \
 250         OPERATOR(f); \
 251         }
 252
 253 #define UNIBRACK(f) UNI3(f,0,0)
 254
 255 /* grandfather return to old style */
 256 #define OLDLOP(f) \
 257         do { \
 258             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 259                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 260             pl_yylval.ival = (f); \
 261             PL_expect = XTERM; \
 262             PL_bufptr = s; \
 263             return (int)LSTOP; \
 264         } while(0)
 265
 266 #define COPLINE_INC_WITH_HERELINES                  \
 267     STMT_START {                                     \
 268         CopLINE_inc(PL_curcop);                       \
 269         if (PL_parser->herelines)                      \
 270             CopLINE(PL_curcop) += PL_parser->herelines, \
 271             PL_parser->herelines = 0;                    \
 272     } STMT_END
 273 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 274  * is no sublex_push to follow. */
 275 #define COPLINE_SET_FROM_MULTI_END            \
 276     STMT_START {                               \
 277         CopLINE_set(PL_curcop, PL_multi_end);   \
 278         if (PL_multi_end != PL_multi_start)      \
 279             PL_parser->herelines = 0;             \
 280     } STMT_END
 281
 282
 283 #ifdef DEBUGGING
 284
 285 /* how to interpret the pl_yylval associated with the token */
 286 enum token_type {
 287     TOKENTYPE_NONE,
 288     TOKENTYPE_IVAL,
 289     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 290     TOKENTYPE_PVAL,
 291     TOKENTYPE_OPVAL
 292 };
 293
 294 static struct debug_tokens {
 295     const int token;
 296     enum token_type type;
 297     const char *name;
 298 } const debug_tokens[] =
 299 {
 300     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 301     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 302     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 303     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 304     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 305     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 306     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 307     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 308     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 309     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 310     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 311     { DO,               TOKENTYPE_NONE,         "DO" },
 312     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 313     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 314     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 315     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 316     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 317     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 318     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 319     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 320     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 321     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 322     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 323     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 324     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 325     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 326     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 327     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 328     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 329     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 330     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 331     { IF,               TOKENTYPE_IVAL,         "IF" },
 332     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 333     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 334     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 335     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 336     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 337     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 338     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 339     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 340     { MY,               TOKENTYPE_IVAL,         "MY" },
 341     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 342     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 343     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 344     { OROR,             TOKENTYPE_NONE,         "OROR" },
 345     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 346     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 347     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 348     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 349     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 350     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 351     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 352     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 353     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 354     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 355     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 356     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 357     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 358     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 359     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 360     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 361     { SUB,              TOKENTYPE_NONE,         "SUB" },
 362     { THING,            TOKENTYPE_OPVAL,        "THING" },
 363     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 364     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 365     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 366     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 367     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 368     { USE,              TOKENTYPE_IVAL,         "USE" },
 369     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 370     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 371     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 372     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 373     { 0,                TOKENTYPE_NONE,         NULL }
 374 };
 375
 376 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 377
 378 STATIC int
 379 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 380 {
 381     dVAR;
 382
 383     PERL_ARGS_ASSERT_TOKEREPORT;
 384
 385     if (DEBUG_T_TEST) {
 386         const char *name = NULL;
 387         enum token_type type = TOKENTYPE_NONE;
 388         const struct debug_tokens *p;
 389         SV* const report = newSVpvs("<== ");
 390
 391         for (p = debug_tokens; p->token; p++) {
 392             if (p->token == (int)rv) {
 393                 name = p->name;
 394                 type = p->type;
 395                 break;
 396             }
 397         }
 398         if (name)
 399             Perl_sv_catpv(aTHX_ report, name);
 400         else if ((char)rv > ' ' && (char)rv <= '~')
 401         {
 402             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 403             if ((char)rv == 'p')
 404                 sv_catpvs(report, " (pending identifier)");
 405         }
 406         else if (!rv)
 407             sv_catpvs(report, "EOF");
 408         else
 409             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 410         switch (type) {
 411         case TOKENTYPE_NONE:
 412             break;
 413         case TOKENTYPE_IVAL:
 414             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 415             break;
 416         case TOKENTYPE_OPNUM:
 417             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 418                                     PL_op_name[lvalp->ival]);
 419             break;
 420         case TOKENTYPE_PVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 422             break;
 423         case TOKENTYPE_OPVAL:
 424             if (lvalp->opval) {
 425                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 426                                     PL_op_name[lvalp->opval->op_type]);
 427                 if (lvalp->opval->op_type == OP_CONST) {
 428                     Perl_sv_catpvf(aTHX_ report, " %s",
 429                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 430                 }
 431
 432             }
 433             else
 434                 sv_catpvs(report, "(opval=null)");
 435             break;
 436         }
 437         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 438     };
 439     return (int)rv;
 440 }
 441
 442
 443 /* print the buffer with suitable escapes */
 444
 445 STATIC void
 446 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 447 {
 448     SV* const tmp = newSVpvs("");
 449
 450     PERL_ARGS_ASSERT_PRINTBUF;
 451
 452     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 453     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 454     GCC_DIAG_RESTORE;
 455     SvREFCNT_dec(tmp);
 456 }
 457
 458 #endif
 459
 460 static int
 461 S_deprecate_commaless_var_list(pTHX) {
 462     PL_expect = XTERM;
 463     deprecate("comma-less variable list");
 464     return REPORT(','); /* grandfather non-comma-format format */
 465 }
 466
 467 /*
 468  * S_ao
 469  *
 470  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 471  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 472  */
 473
 474 STATIC int
 475 S_ao(pTHX_ int toketype)
 476 {
 477     dVAR;
 478     if (*PL_bufptr == '=') {
 479         PL_bufptr++;
 480         if (toketype == ANDAND)
 481             pl_yylval.ival = OP_ANDASSIGN;
 482         else if (toketype == OROR)
 483             pl_yylval.ival = OP_ORASSIGN;
 484         else if (toketype == DORDOR)
 485             pl_yylval.ival = OP_DORASSIGN;
 486         toketype = ASSIGNOP;
 487     }
 488     return toketype;
 489 }
 490
 491 /*
 492  * S_no_op
 493  * When Perl expects an operator and finds something else, no_op
 494  * prints the warning.  It always prints "<something> found where
 495  * operator expected.  It prints "Missing semicolon on previous line?"
 496  * if the surprise occurs at the start of the line.  "do you need to
 497  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 498  * where the compiler doesn't know if foo is a method call or a function.
 499  * It prints "Missing operator before end of line" if there's nothing
 500  * after the missing operator, or "... before <...>" if there is something
 501  * after the missing operator.
 502  */
 503
 504 STATIC void
 505 S_no_op(pTHX_ const char *const what, char *s)
 506 {
 507     dVAR;
 508     char * const oldbp = PL_bufptr;
 509     const bool is_first = (PL_oldbufptr == PL_linestart);
 510
 511     PERL_ARGS_ASSERT_NO_OP;
 512
 513     if (!s)
 514         s = oldbp;
 515     else
 516         PL_bufptr = s;
 517     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 518     if (ckWARN_d(WARN_SYNTAX)) {
 519         if (is_first)
 520             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 521                     "\t(Missing semicolon on previous line?)\n");
 522         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 523             const char *t;
 524             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 525                                                             t += UTF ? UTF8SKIP(t) : 1)
 526                 NOOP;
 527             if (t < PL_bufptr && isSPACE(*t))
 528                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 529                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 530                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 531         }
 532         else {
 533             assert(s >= oldbp);
 534             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                     "\t(Missing operator before %"UTF8f"?)\n",
 536                      UTF8fARG(UTF, s - oldbp, oldbp));
 537         }
 538     }
 539     PL_bufptr = oldbp;
 540 }
 541
 542 /*
 543  * S_missingterm
 544  * Complain about missing quote/regexp/heredoc terminator.
 545  * If it's called with NULL then it cauterizes the line buffer.
 546  * If we're in a delimited string and the delimiter is a control
 547  * character, it's reformatted into a two-char sequence like ^C.
 548  * This is fatal.
 549  */
 550
 551 STATIC void
 552 S_missingterm(pTHX_ char *s)
 553 {
 554     dVAR;
 555     char tmpbuf[3];
 556     char q;
 557     if (s) {
 558         char * const nl = strrchr(s,'\n');
 559         if (nl)
 560             *nl = '\0';
 561     }
 562     else if ((U8) PL_multi_close < 32) {
 563         *tmpbuf = '^';
 564         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 565         tmpbuf[2] = '\0';
 566         s = tmpbuf;
 567     }
 568     else {
 569         *tmpbuf = (char)PL_multi_close;
 570         tmpbuf[1] = '\0';
 571         s = tmpbuf;
 572     }
 573     q = strchr(s,'"') ? '\'' : '"';
 574     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 575 }
 576
 577 #include "feature.h"
 578
 579 /*
 580  * Check whether the named feature is enabled.
 581  */
 582 bool
 583 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 584 {
 585     dVAR;
 586     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 587
 588     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 589
 590     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 591
 592     if (namelen > MAX_FEATURE_LEN)
 593         return FALSE;
 594     memcpy(&he_name[8], name, namelen);
 595
 596     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 597                                      REFCOUNTED_HE_EXISTS));
 598 }
 599
 600 /*
 601  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 602  * utf16-to-utf8-reversed.
 603  */
 604
 605 #ifdef PERL_CR_FILTER
 606 static void
 607 strip_return(SV *sv)
 608 {
 609     const char *s = SvPVX_const(sv);
 610     const char * const e = s + SvCUR(sv);
 611
 612     PERL_ARGS_ASSERT_STRIP_RETURN;
 613
 614     /* outer loop optimized to do nothing if there are no CR-LFs */
 615     while (s < e) {
 616         if (*s++ == '\r' && *s == '\n') {
 617             /* hit a CR-LF, need to copy the rest */
 618             char *d = s - 1;
 619             *d++ = *s++;
 620             while (s < e) {
 621                 if (*s == '\r' && s[1] == '\n')
 622                     s++;
 623                 *d++ = *s++;
 624             }
 625             SvCUR(sv) -= s - d;
 626             return;
 627         }
 628     }
 629 }
 630
 631 STATIC I32
 632 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 633 {
 634     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 635     if (count > 0 && !maxlen)
 636         strip_return(sv);
 637     return count;
 638 }
 639 #endif
 640
 641 /*
 642 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 643
 644 Creates and initialises a new lexer/parser state object, supplying
 645 a context in which to lex and parse from a new source of Perl code.
 646 A pointer to the new state object is placed in L</PL_parser>.  An entry
 647 is made on the save stack so that upon unwinding the new state object
 648 will be destroyed and the former value of L</PL_parser> will be restored.
 649 Nothing else need be done to clean up the parsing context.
 650
 651 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 652 non-null, provides a string (in SV form) containing code to be parsed.
 653 A copy of the string is made, so subsequent modification of I<line>
 654 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 655 from which code will be read to be parsed.  If both are non-null, the
 656 code in I<line> comes first and must consist of complete lines of input,
 657 and I<rsfp> supplies the remainder of the source.
 658
 659 The I<flags> parameter is reserved for future use.  Currently it is only
 660 used by perl internally, so extensions should always pass zero.
 661
 662 =cut
 663 */
 664
 665 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 666    can share filters with the current parser.
 667    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 668    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 669    destruction. This is used to handle the case of defaulting to reading the
 670    script from the standard input because no filename was given on the command
 671    line (without getting confused by situation where STDIN has been closed, so
 672    the script handle is opened on fd 0)  */
 673
 674 void
 675 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 676 {
 677     dVAR;
 678     const char *s = NULL;
 679     yy_parser *parser, *oparser;
 680     if (flags && flags & ~LEX_START_FLAGS)
 681         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 682
 683     /* create and initialise a parser */
 684
 685     Newxz(parser, 1, yy_parser);
 686     parser->old_parser = oparser = PL_parser;
 687     PL_parser = parser;
 688
 689     parser->stack = NULL;
 690     parser->ps = NULL;
 691     parser->stack_size = 0;
 692
 693     /* on scope exit, free this parser and restore any outer one */
 694     SAVEPARSER(parser);
 695     parser->saved_curcop = PL_curcop;
 696
 697     /* initialise lexer state */
 698
 699     parser->nexttoke = 0;
 700     parser->error_count = oparser ? oparser->error_count : 0;
 701     parser->copline = parser->preambling = NOLINE;
 702     parser->lex_state = LEX_NORMAL;
 703     parser->expect = XSTATE;
 704     parser->rsfp = rsfp;
 705     parser->rsfp_filters =
 706       !(flags & LEX_START_SAME_FILTER) || !oparser
 707         ? NULL
 708         : MUTABLE_AV(SvREFCNT_inc(
 709             oparser->rsfp_filters
 710              ? oparser->rsfp_filters
 711              : (oparser->rsfp_filters = newAV())
 712           ));
 713
 714     Newx(parser->lex_brackstack, 120, char);
 715     Newx(parser->lex_casestack, 12, char);
 716     *parser->lex_casestack = '\0';
 717     Newxz(parser->lex_shared, 1, LEXSHARED);
 718
 719     if (line) {
 720         STRLEN len;
 721         s = SvPV_const(line, len);
 722         parser->linestr = flags & LEX_START_COPIED
 723                             ? SvREFCNT_inc_simple_NN(line)
 724                             : newSVpvn_flags(s, len, SvUTF8(line));
 725         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 726     } else {
 727         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 728     }
 729     parser->oldoldbufptr =
 730         parser->oldbufptr =
 731         parser->bufptr =
 732         parser->linestart = SvPVX(parser->linestr);
 733     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 734     parser->last_lop = parser->last_uni = NULL;
 735
 736     assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 737                                                         |LEX_DONT_CLOSE_RSFP));
 738     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 739                                                         |LEX_DONT_CLOSE_RSFP));
 740
 741     parser->in_pod = parser->filtered = 0;
 742 }
 743
 744
 745 /* delete a parser object */
 746
 747 void
 748 Perl_parser_free(pTHX_  const yy_parser *parser)
 749 {
 750     PERL_ARGS_ASSERT_PARSER_FREE;
 751
 752     PL_curcop = parser->saved_curcop;
 753     SvREFCNT_dec(parser->linestr);
 754
 755     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 756         PerlIO_clearerr(parser->rsfp);
 757     else if (parser->rsfp && (!parser->old_parser ||
 758                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 759         PerlIO_close(parser->rsfp);
 760     SvREFCNT_dec(parser->rsfp_filters);
 761     SvREFCNT_dec(parser->lex_stuff);
 762     SvREFCNT_dec(parser->sublex_info.repl);
 763
 764     Safefree(parser->lex_brackstack);
 765     Safefree(parser->lex_casestack);
 766     Safefree(parser->lex_shared);
 767     PL_parser = parser->old_parser;
 768     Safefree(parser);
 769 }
 770
 771 void
 772 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 773 {
 774     I32 nexttoke = parser->nexttoke;
 775     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 776     while (nexttoke--) {
 777         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 778          && parser->nextval[nexttoke].opval
 779          && parser->nextval[nexttoke].opval->op_slabbed
 780          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 781             op_free(parser->nextval[nexttoke].opval);
 782             parser->nextval[nexttoke].opval = NULL;
 783         }
 784     }
 785 }
 786
 787
 788 /*
 789 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 790
 791 Buffer scalar containing the chunk currently under consideration of the
 792 text currently being lexed.  This is always a plain string scalar (for
 793 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 794 normal scalar means; instead refer to the buffer directly by the pointer
 795 variables described below.
 796
 797 The lexer maintains various C<char*> pointers to things in the
 798 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 799 reallocated, all of these pointers must be updated.  Don't attempt to
 800 do this manually, but rather use L</lex_grow_linestr> if you need to
 801 reallocate the buffer.
 802
 803 The content of the text chunk in the buffer is commonly exactly one
 804 complete line of input, up to and including a newline terminator,
 805 but there are situations where it is otherwise.  The octets of the
 806 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 807 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 808 flag on this scalar, which may disagree with it.
 809
 810 For direct examination of the buffer, the variable
 811 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 812 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 813 of these pointers is usually preferable to examination of the scalar
 814 through normal scalar means.
 815
 816 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 817
 818 Direct pointer to the end of the chunk of text currently being lexed, the
 819 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 820 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 821 always located at the end of the buffer, and does not count as part of
 822 the buffer's contents.
 823
 824 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 825
 826 Points to the current position of lexing inside the lexer buffer.
 827 Characters around this point may be freely examined, within
 828 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 829 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 830 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 831
 832 Lexing code (whether in the Perl core or not) moves this pointer past
 833 the characters that it consumes.  It is also expected to perform some
 834 bookkeeping whenever a newline character is consumed.  This movement
 835 can be more conveniently performed by the function L</lex_read_to>,
 836 which handles newlines appropriately.
 837
 838 Interpretation of the buffer's octets can be abstracted out by
 839 using the slightly higher-level functions L</lex_peek_unichar> and
 840 L</lex_read_unichar>.
 841
 842 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 843
 844 Points to the start of the current line inside the lexer buffer.
 845 This is useful for indicating at which column an error occurred, and
 846 not much else.  This must be updated by any lexing code that consumes
 847 a newline; the function L</lex_read_to> handles this detail.
 848
 849 =cut
 850 */
 851
 852 /*
 853 =for apidoc Amx|bool|lex_bufutf8
 854
 855 Indicates whether the octets in the lexer buffer
 856 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 857 of Unicode characters.  If not, they should be interpreted as Latin-1
 858 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 859
 860 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 861 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 862 encoding.
 863
 864 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 865 is significant, but not the whole story regarding the input character
 866 encoding.  Normally, when a file is being read, the scalar contains octets
 867 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 868 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 869 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 870 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 871 is in effect.  This logic may change in the future; use this function
 872 instead of implementing the logic yourself.
 873
 874 =cut
 875 */
 876
 877 bool
 878 Perl_lex_bufutf8(pTHX)
 879 {
 880     return UTF;
 881 }
 882
 883 /*
 884 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 885
 886 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 887 at least I<len> octets (including terminating C<NUL>).  Returns a
 888 pointer to the reallocated buffer.  This is necessary before making
 889 any direct modification of the buffer that would increase its length.
 890 L</lex_stuff_pvn> provides a more convenient way to insert text into
 891 the buffer.
 892
 893 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 894 this function updates all of the lexer's variables that point directly
 895 into the buffer.
 896
 897 =cut
 898 */
 899
 900 char *
 901 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 902 {
 903     SV *linestr;
 904     char *buf;
 905     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 906     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 907     linestr = PL_parser->linestr;
 908     buf = SvPVX(linestr);
 909     if (len <= SvLEN(linestr))
 910         return buf;
 911     bufend_pos = PL_parser->bufend - buf;
 912     bufptr_pos = PL_parser->bufptr - buf;
 913     oldbufptr_pos = PL_parser->oldbufptr - buf;
 914     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 915     linestart_pos = PL_parser->linestart - buf;
 916     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 917     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 918     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 919                             PL_parser->lex_shared->re_eval_start - buf : 0;
 920
 921     buf = sv_grow(linestr, len);
 922
 923     PL_parser->bufend = buf + bufend_pos;
 924     PL_parser->bufptr = buf + bufptr_pos;
 925     PL_parser->oldbufptr = buf + oldbufptr_pos;
 926     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 927     PL_parser->linestart = buf + linestart_pos;
 928     if (PL_parser->last_uni)
 929         PL_parser->last_uni = buf + last_uni_pos;
 930     if (PL_parser->last_lop)
 931         PL_parser->last_lop = buf + last_lop_pos;
 932     if (PL_parser->lex_shared->re_eval_start)
 933         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 934     return buf;
 935 }
 936
 937 /*
 938 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 939
 940 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 941 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 942 reallocating the buffer if necessary.  This means that lexing code that
 943 runs later will see the characters as if they had appeared in the input.
 944 It is not recommended to do this as part of normal parsing, and most
 945 uses of this facility run the risk of the inserted characters being
 946 interpreted in an unintended manner.
 947
 948 The string to be inserted is represented by I<len> octets starting
 949 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 950 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 951 The characters are recoded for the lexer buffer, according to how the
 952 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 953 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 954 function is more convenient.
 955
 956 =cut
 957 */
 958
 959 void
 960 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 961 {
 962     dVAR;
 963     char *bufptr;
 964     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 965     if (flags & ~(LEX_STUFF_UTF8))
 966         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 967     if (UTF) {
 968         if (flags & LEX_STUFF_UTF8) {
 969             goto plain_copy;
 970         } else {
 971             STRLEN highhalf = 0;    /* Count of variants */
 972             const char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 if (! UTF8_IS_INVARIANT(*p)) {
 975                     highhalf++;
 976                 }
 977             }
 978             if (!highhalf)
 979                 goto plain_copy;
 980             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 981             bufptr = PL_parser->bufptr;
 982             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 983             SvCUR_set(PL_parser->linestr,
 984                 SvCUR(PL_parser->linestr) + len+highhalf);
 985             PL_parser->bufend += len+highhalf;
 986             for (p = pv; p != e; p++) {
 987                 U8 c = (U8)*p;
 988                 if (! UTF8_IS_INVARIANT(c)) {
 989                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
 990                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
 991                 } else {
 992                     *bufptr++ = (char)c;
 993                 }
 994             }
 995         }
 996     } else {
 997         if (flags & LEX_STUFF_UTF8) {
 998             STRLEN highhalf = 0;
 999             const char *p, *e = pv+len;
1000             for (p = pv; p != e; p++) {
1001                 U8 c = (U8)*p;
1002                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1003                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1004                                 "non-Latin-1 character into Latin-1 input");
1005                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1006                     p++;
1007                     highhalf++;
1008                 } else if (! UTF8_IS_INVARIANT(c)) {
1009                     /* malformed UTF-8 */
1010                     ENTER;
1011                     SAVESPTR(PL_warnhook);
1012                     PL_warnhook = PERL_WARNHOOK_FATAL;
1013                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1014                     LEAVE;
1015                 }
1016             }
1017             if (!highhalf)
1018                 goto plain_copy;
1019             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1020             bufptr = PL_parser->bufptr;
1021             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1022             SvCUR_set(PL_parser->linestr,
1023                 SvCUR(PL_parser->linestr) + len-highhalf);
1024             PL_parser->bufend += len-highhalf;
1025             p = pv;
1026             while (p < e) {
1027                 if (UTF8_IS_INVARIANT(*p)) {
1028                     *bufptr++ = *p;
1029                     p++;
1030                 }
1031                 else {
1032                     assert(p < e -1 );
1033                     *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1034                     p += 2;
1035                 }
1036             }
1037         } else {
1038           plain_copy:
1039             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1040             bufptr = PL_parser->bufptr;
1041             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1042             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1043             PL_parser->bufend += len;
1044             Copy(pv, bufptr, len, char);
1045         }
1046     }
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1051
1052 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1053 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1054 reallocating the buffer if necessary.  This means that lexing code that
1055 runs later will see the characters as if they had appeared in the input.
1056 It is not recommended to do this as part of normal parsing, and most
1057 uses of this facility run the risk of the inserted characters being
1058 interpreted in an unintended manner.
1059
1060 The string to be inserted is represented by octets starting at I<pv>
1061 and continuing to the first nul.  These octets are interpreted as either
1062 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1063 in I<flags>.  The characters are recoded for the lexer buffer, according
1064 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1065 If it is not convenient to nul-terminate a string to be inserted, the
1066 L</lex_stuff_pvn> function is more appropriate.
1067
1068 =cut
1069 */
1070
1071 void
1072 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1073 {
1074     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1075     lex_stuff_pvn(pv, strlen(pv), flags);
1076 }
1077
1078 /*
1079 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1080
1081 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1082 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1083 reallocating the buffer if necessary.  This means that lexing code that
1084 runs later will see the characters as if they had appeared in the input.
1085 It is not recommended to do this as part of normal parsing, and most
1086 uses of this facility run the risk of the inserted characters being
1087 interpreted in an unintended manner.
1088
1089 The string to be inserted is the string value of I<sv>.  The characters
1090 are recoded for the lexer buffer, according to how the buffer is currently
1091 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1092 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1093 need to construct a scalar.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1100 {
1101     char *pv;
1102     STRLEN len;
1103     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1104     if (flags)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1106     pv = SvPV(sv, len);
1107     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_unstuff|char *ptr
1112
1113 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1114 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1115 This hides the discarded text from any lexing code that runs later,
1116 as if the text had never appeared.
1117
1118 This is not the normal way to consume lexed text.  For that, use
1119 L</lex_read_to>.
1120
1121 =cut
1122 */
1123
1124 void
1125 Perl_lex_unstuff(pTHX_ char *ptr)
1126 {
1127     char *buf, *bufend;
1128     STRLEN unstuff_len;
1129     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1130     buf = PL_parser->bufptr;
1131     if (ptr < buf)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1133     if (ptr == buf)
1134         return;
1135     bufend = PL_parser->bufend;
1136     if (ptr > bufend)
1137         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138     unstuff_len = ptr - buf;
1139     Move(ptr, buf, bufend+1-ptr, char);
1140     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1141     PL_parser->bufend = bufend - unstuff_len;
1142 }
1143
1144 /*
1145 =for apidoc Amx|void|lex_read_to|char *ptr
1146
1147 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1148 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1149 performing the correct bookkeeping whenever a newline character is passed.
1150 This is the normal way to consume lexed text.
1151
1152 Interpretation of the buffer's octets can be abstracted out by
1153 using the slightly higher-level functions L</lex_peek_unichar> and
1154 L</lex_read_unichar>.
1155
1156 =cut
1157 */
1158
1159 void
1160 Perl_lex_read_to(pTHX_ char *ptr)
1161 {
1162     char *s;
1163     PERL_ARGS_ASSERT_LEX_READ_TO;
1164     s = PL_parser->bufptr;
1165     if (ptr < s || ptr > PL_parser->bufend)
1166         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1167     for (; s != ptr; s++)
1168         if (*s == '\n') {
1169             COPLINE_INC_WITH_HERELINES;
1170             PL_parser->linestart = s+1;
1171         }
1172     PL_parser->bufptr = ptr;
1173 }
1174
1175 /*
1176 =for apidoc Amx|void|lex_discard_to|char *ptr
1177
1178 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1179 up to I<ptr>.  The remaining content of the buffer will be moved, and
1180 all pointers into the buffer updated appropriately.  I<ptr> must not
1181 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1182 it is not permitted to discard text that has yet to be lexed.
1183
1184 Normally it is not necessarily to do this directly, because it suffices to
1185 use the implicit discarding behaviour of L</lex_next_chunk> and things
1186 based on it.  However, if a token stretches across multiple lines,
1187 and the lexing code has kept multiple lines of text in the buffer for
1188 that purpose, then after completion of the token it would be wise to
1189 explicitly discard the now-unneeded earlier lines, to avoid future
1190 multi-line tokens growing the buffer without bound.
1191
1192 =cut
1193 */
1194
1195 void
1196 Perl_lex_discard_to(pTHX_ char *ptr)
1197 {
1198     char *buf;
1199     STRLEN discard_len;
1200     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1201     buf = SvPVX(PL_parser->linestr);
1202     if (ptr < buf)
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1204     if (ptr == buf)
1205         return;
1206     if (ptr > PL_parser->bufptr)
1207         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208     discard_len = ptr - buf;
1209     if (PL_parser->oldbufptr < ptr)
1210         PL_parser->oldbufptr = ptr;
1211     if (PL_parser->oldoldbufptr < ptr)
1212         PL_parser->oldoldbufptr = ptr;
1213     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1214         PL_parser->last_uni = NULL;
1215     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1216         PL_parser->last_lop = NULL;
1217     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1218     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1219     PL_parser->bufend -= discard_len;
1220     PL_parser->bufptr -= discard_len;
1221     PL_parser->oldbufptr -= discard_len;
1222     PL_parser->oldoldbufptr -= discard_len;
1223     if (PL_parser->last_uni)
1224         PL_parser->last_uni -= discard_len;
1225     if (PL_parser->last_lop)
1226         PL_parser->last_lop -= discard_len;
1227 }
1228
1229 /*
1230 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1231
1232 Reads in the next chunk of text to be lexed, appending it to
1233 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1234 looked to the end of the current chunk and wants to know more.  It is
1235 usual, but not necessary, for lexing to have consumed the entirety of
1236 the current chunk at this time.
1237
1238 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1239 chunk (i.e., the current chunk has been entirely consumed), normally the
1240 current chunk will be discarded at the same time that the new chunk is
1241 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1242 will not be discarded.  If the current chunk has not been entirely
1243 consumed, then it will not be discarded regardless of the flag.
1244
1245 Returns true if some new text was added to the buffer, or false if the
1246 buffer has reached the end of the input text.
1247
1248 =cut
1249 */
1250
1251 #define LEX_FAKE_EOF 0x80000000
1252 #define LEX_NO_TERM  0x40000000
1253
1254 bool
1255 Perl_lex_next_chunk(pTHX_ U32 flags)
1256 {
1257     SV *linestr;
1258     char *buf;
1259     STRLEN old_bufend_pos, new_bufend_pos;
1260     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1261     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1262     bool got_some_for_debugger = 0;
1263     bool got_some;
1264     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1265         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1266     linestr = PL_parser->linestr;
1267     buf = SvPVX(linestr);
1268     if (!(flags & LEX_KEEP_PREVIOUS) &&
1269             PL_parser->bufptr == PL_parser->bufend) {
1270         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1271         linestart_pos = 0;
1272         if (PL_parser->last_uni != PL_parser->bufend)
1273             PL_parser->last_uni = NULL;
1274         if (PL_parser->last_lop != PL_parser->bufend)
1275             PL_parser->last_lop = NULL;
1276         last_uni_pos = last_lop_pos = 0;
1277         *buf = 0;
1278         SvCUR(linestr) = 0;
1279     } else {
1280         old_bufend_pos = PL_parser->bufend - buf;
1281         bufptr_pos = PL_parser->bufptr - buf;
1282         oldbufptr_pos = PL_parser->oldbufptr - buf;
1283         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1284         linestart_pos = PL_parser->linestart - buf;
1285         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1286         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1287     }
1288     if (flags & LEX_FAKE_EOF) {
1289         goto eof;
1290     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1291         got_some = 0;
1292     } else if (filter_gets(linestr, old_bufend_pos)) {
1293         got_some = 1;
1294         got_some_for_debugger = 1;
1295     } else if (flags & LEX_NO_TERM) {
1296         got_some = 0;
1297     } else {
1298         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1299             sv_setpvs(linestr, "");
1300         eof:
1301         /* End of real input.  Close filehandle (unless it was STDIN),
1302          * then add implicit termination.
1303          */
1304         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1305             PerlIO_clearerr(PL_parser->rsfp);
1306         else if (PL_parser->rsfp)
1307             (void)PerlIO_close(PL_parser->rsfp);
1308         PL_parser->rsfp = NULL;
1309         PL_parser->in_pod = PL_parser->filtered = 0;
1310         if (!PL_in_eval && PL_minus_p) {
1311             sv_catpvs(linestr,
1312                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1313             PL_minus_n = PL_minus_p = 0;
1314         } else if (!PL_in_eval && PL_minus_n) {
1315             sv_catpvs(linestr, /*{*/";}");
1316             PL_minus_n = 0;
1317         } else
1318             sv_catpvs(linestr, ";");
1319         got_some = 1;
1320     }
1321     buf = SvPVX(linestr);
1322     new_bufend_pos = SvCUR(linestr);
1323     PL_parser->bufend = buf + new_bufend_pos;
1324     PL_parser->bufptr = buf + bufptr_pos;
1325     PL_parser->oldbufptr = buf + oldbufptr_pos;
1326     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1327     PL_parser->linestart = buf + linestart_pos;
1328     if (PL_parser->last_uni)
1329         PL_parser->last_uni = buf + last_uni_pos;
1330     if (PL_parser->last_lop)
1331         PL_parser->last_lop = buf + last_lop_pos;
1332     if (PL_parser->preambling != NOLINE) {
1333         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1334         PL_parser->preambling = NOLINE;
1335     }
1336     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1337             PL_curstash != PL_debstash) {
1338         /* debugger active and we're not compiling the debugger code,
1339          * so store the line into the debugger's array of lines
1340          */
1341         update_debugger_info(NULL, buf+old_bufend_pos,
1342             new_bufend_pos-old_bufend_pos);
1343     }
1344     return got_some;
1345 }
1346
1347 /*
1348 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1349
1350 Looks ahead one (Unicode) character in the text currently being lexed.
1351 Returns the codepoint (unsigned integer value) of the next character,
1352 or -1 if lexing has reached the end of the input text.  To consume the
1353 peeked character, use L</lex_read_unichar>.
1354
1355 If the next character is in (or extends into) the next chunk of input
1356 text, the next chunk will be read in.  Normally the current chunk will be
1357 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1358 then the current chunk will not be discarded.
1359
1360 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1361 is encountered, an exception is generated.
1362
1363 =cut
1364 */
1365
1366 I32
1367 Perl_lex_peek_unichar(pTHX_ U32 flags)
1368 {
1369     dVAR;
1370     char *s, *bufend;
1371     if (flags & ~(LEX_KEEP_PREVIOUS))
1372         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1373     s = PL_parser->bufptr;
1374     bufend = PL_parser->bufend;
1375     if (UTF) {
1376         U8 head;
1377         I32 unichar;
1378         STRLEN len, retlen;
1379         if (s == bufend) {
1380             if (!lex_next_chunk(flags))
1381                 return -1;
1382             s = PL_parser->bufptr;
1383             bufend = PL_parser->bufend;
1384         }
1385         head = (U8)*s;
1386         if (UTF8_IS_INVARIANT(head))
1387             return head;
1388         if (UTF8_IS_START(head)) {
1389             len = UTF8SKIP(&head);
1390             while ((STRLEN)(bufend-s) < len) {
1391                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1392                     break;
1393                 s = PL_parser->bufptr;
1394                 bufend = PL_parser->bufend;
1395             }
1396         }
1397         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1398         if (retlen == (STRLEN)-1) {
1399             /* malformed UTF-8 */
1400             ENTER;
1401             SAVESPTR(PL_warnhook);
1402             PL_warnhook = PERL_WARNHOOK_FATAL;
1403             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1404             LEAVE;
1405         }
1406         return unichar;
1407     } else {
1408         if (s == bufend) {
1409             if (!lex_next_chunk(flags))
1410                 return -1;
1411             s = PL_parser->bufptr;
1412         }
1413         return (U8)*s;
1414     }
1415 }
1416
1417 /*
1418 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1419
1420 Reads the next (Unicode) character in the text currently being lexed.
1421 Returns the codepoint (unsigned integer value) of the character read,
1422 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1423 if lexing has reached the end of the input text.  To non-destructively
1424 examine the next character, use L</lex_peek_unichar> instead.
1425
1426 If the next character is in (or extends into) the next chunk of input
1427 text, the next chunk will be read in.  Normally the current chunk will be
1428 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1429 then the current chunk will not be discarded.
1430
1431 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1432 is encountered, an exception is generated.
1433
1434 =cut
1435 */
1436
1437 I32
1438 Perl_lex_read_unichar(pTHX_ U32 flags)
1439 {
1440     I32 c;
1441     if (flags & ~(LEX_KEEP_PREVIOUS))
1442         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1443     c = lex_peek_unichar(flags);
1444     if (c != -1) {
1445         if (c == '\n')
1446             COPLINE_INC_WITH_HERELINES;
1447         if (UTF)
1448             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1449         else
1450             ++(PL_parser->bufptr);
1451     }
1452     return c;
1453 }
1454
1455 /*
1456 =for apidoc Amx|void|lex_read_space|U32 flags
1457
1458 Reads optional spaces, in Perl style, in the text currently being
1459 lexed.  The spaces may include ordinary whitespace characters and
1460 Perl-style comments.  C<#line> directives are processed if encountered.
1461 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1462 at a non-space character (or the end of the input text).
1463
1464 If spaces extend into the next chunk of input text, the next chunk will
1465 be read in.  Normally the current chunk will be discarded at the same
1466 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1467 chunk will not be discarded.
1468
1469 =cut
1470 */
1471
1472 #define LEX_NO_INCLINE    0x40000000
1473 #define LEX_NO_NEXT_CHUNK 0x80000000
1474
1475 void
1476 Perl_lex_read_space(pTHX_ U32 flags)
1477 {
1478     char *s, *bufend;
1479     const bool can_incline = !(flags & LEX_NO_INCLINE);
1480     bool need_incline = 0;
1481     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1482         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1483     s = PL_parser->bufptr;
1484     bufend = PL_parser->bufend;
1485     while (1) {
1486         char c = *s;
1487         if (c == '#') {
1488             do {
1489                 c = *++s;
1490             } while (!(c == '\n' || (c == 0 && s == bufend)));
1491         } else if (c == '\n') {
1492             s++;
1493             if (can_incline) {
1494                 PL_parser->linestart = s;
1495                 if (s == bufend)
1496                     need_incline = 1;
1497                 else
1498                     incline(s);
1499             }
1500         } else if (isSPACE(c)) {
1501             s++;
1502         } else if (c == 0 && s == bufend) {
1503             bool got_more;
1504             line_t l;
1505             if (flags & LEX_NO_NEXT_CHUNK)
1506                 break;
1507             PL_parser->bufptr = s;
1508             l = CopLINE(PL_curcop);
1509             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1510             got_more = lex_next_chunk(flags);
1511             CopLINE_set(PL_curcop, l);
1512             s = PL_parser->bufptr;
1513             bufend = PL_parser->bufend;
1514             if (!got_more)
1515                 break;
1516             if (can_incline && need_incline && PL_parser->rsfp) {
1517                 incline(s);
1518                 need_incline = 0;
1519             }
1520         } else {
1521             break;
1522         }
1523     }
1524     PL_parser->bufptr = s;
1525 }
1526
1527 /*
1528
1529 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1530
1531 This function performs syntax checking on a prototype, C<proto>.
1532 If C<warn> is true, any illegal characters or mismatched brackets
1533 will trigger illegalproto warnings, declaring that they were
1534 detected in the prototype for C<name>.
1535
1536 The return value is C<true> if this is a valid prototype, and
1537 C<false> if it is not, regardless of whether C<warn> was C<true> or
1538 C<false>.
1539
1540 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1541
1542 =cut
1543
1544  */
1545
1546 bool
1547 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1548 {
1549     STRLEN len, origlen;
1550     char *p = proto ? SvPV(proto, len) : NULL;
1551     bool bad_proto = FALSE;
1552     bool in_brackets = FALSE;
1553     bool after_slash = FALSE;
1554     char greedy_proto = ' ';
1555     bool proto_after_greedy_proto = FALSE;
1556     bool must_be_last = FALSE;
1557     bool underscore = FALSE;
1558     bool bad_proto_after_underscore = FALSE;
1559
1560     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1561
1562     if (!proto)
1563         return TRUE;
1564
1565     origlen = len;
1566     for (; len--; p++) {
1567         if (!isSPACE(*p)) {
1568             if (must_be_last)
1569                 proto_after_greedy_proto = TRUE;
1570             if (underscore) {
1571                 if (!strchr(";@%", *p))
1572                     bad_proto_after_underscore = TRUE;
1573                 underscore = FALSE;
1574             }
1575             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1576                 bad_proto = TRUE;
1577             }
1578             else {
1579                 if (*p == '[')
1580                     in_brackets = TRUE;
1581                 else if (*p == ']')
1582                     in_brackets = FALSE;
1583                 else if ((*p == '@' || *p == '%') &&
1584                     !after_slash &&
1585                     !in_brackets ) {
1586                     must_be_last = TRUE;
1587                     greedy_proto = *p;
1588                 }
1589                 else if (*p == '_')
1590                     underscore = TRUE;
1591             }
1592             if (*p == '\\')
1593                 after_slash = TRUE;
1594             else
1595                 after_slash = FALSE;
1596         }
1597     }
1598
1599     if (warn) {
1600         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1601         p -= origlen;
1602         p = SvUTF8(proto)
1603             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1604                              origlen, UNI_DISPLAY_ISPRINT)
1605             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1606
1607         if (proto_after_greedy_proto)
1608             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1609                         "Prototype after '%c' for %"SVf" : %s",
1610                         greedy_proto, SVfARG(name), p);
1611         if (in_brackets)
1612             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1613                         "Missing ']' in prototype for %"SVf" : %s",
1614                         SVfARG(name), p);
1615         if (bad_proto)
1616             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1617                         "Illegal character in prototype for %"SVf" : %s",
1618                         SVfARG(name), p);
1619         if (bad_proto_after_underscore)
1620             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1621                         "Illegal character after '_' in prototype for %"SVf" : %s",
1622                         SVfARG(name), p);
1623     }
1624
1625     return (! (proto_after_greedy_proto || bad_proto) );
1626 }
1627
1628 /*
1629  * S_incline
1630  * This subroutine has nothing to do with tilting, whether at windmills
1631  * or pinball tables.  Its name is short for "increment line".  It
1632  * increments the current line number in CopLINE(PL_curcop) and checks
1633  * to see whether the line starts with a comment of the form
1634  *    # line 500 "foo.pm"
1635  * If so, it sets the current line number and file to the values in the comment.
1636  */
1637
1638 STATIC void
1639 S_incline(pTHX_ const char *s)
1640 {
1641     dVAR;
1642     const char *t;
1643     const char *n;
1644     const char *e;
1645     line_t line_num;
1646
1647     PERL_ARGS_ASSERT_INCLINE;
1648
1649     COPLINE_INC_WITH_HERELINES;
1650     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1651      && s+1 == PL_bufend && *s == ';') {
1652         /* fake newline in string eval */
1653         CopLINE_dec(PL_curcop);
1654         return;
1655     }
1656     if (*s++ != '#')
1657         return;
1658     while (SPACE_OR_TAB(*s))
1659         s++;
1660     if (strnEQ(s, "line", 4))
1661         s += 4;
1662     else
1663         return;
1664     if (SPACE_OR_TAB(*s))
1665         s++;
1666     else
1667         return;
1668     while (SPACE_OR_TAB(*s))
1669         s++;
1670     if (!isDIGIT(*s))
1671         return;
1672
1673     n = s;
1674     while (isDIGIT(*s))
1675         s++;
1676     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1677         return;
1678     while (SPACE_OR_TAB(*s))
1679         s++;
1680     if (*s == '"' && (t = strchr(s+1, '"'))) {
1681         s++;
1682         e = t + 1;
1683     }
1684     else {
1685         t = s;
1686         while (!isSPACE(*t))
1687             t++;
1688         e = t;
1689     }
1690     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1691         e++;
1692     if (*e != '\n' && *e != '\0')
1693         return;         /* false alarm */
1694
1695     line_num = atoi(n)-1;
1696
1697     if (t - s > 0) {
1698         const STRLEN len = t - s;
1699
1700         if (!PL_rsfp && !PL_parser->filtered) {
1701             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1702              * to *{"::_<newfilename"} */
1703             /* However, the long form of evals is only turned on by the
1704                debugger - usually they're "(eval %lu)" */
1705             GV * const cfgv = CopFILEGV(PL_curcop);
1706             if (cfgv) {
1707                 char smallbuf[128];
1708                 STRLEN tmplen2 = len;
1709                 char *tmpbuf2;
1710                 GV *gv2;
1711
1712                 if (tmplen2 + 2 <= sizeof smallbuf)
1713                     tmpbuf2 = smallbuf;
1714                 else
1715                     Newx(tmpbuf2, tmplen2 + 2, char);
1716
1717                 tmpbuf2[0] = '_';
1718                 tmpbuf2[1] = '<';
1719
1720                 memcpy(tmpbuf2 + 2, s, tmplen2);
1721                 tmplen2 += 2;
1722
1723                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1724                 if (!isGV(gv2)) {
1725                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1726                     /* adjust ${"::_<newfilename"} to store the new file name */
1727                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1728                     /* The line number may differ. If that is the case,
1729                        alias the saved lines that are in the array.
1730                        Otherwise alias the whole array. */
1731                     if (CopLINE(PL_curcop) == line_num) {
1732                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1733                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1734                     }
1735                     else if (GvAV(cfgv)) {
1736                         AV * const av = GvAV(cfgv);
1737                         const I32 start = CopLINE(PL_curcop)+1;
1738                         I32 items = AvFILLp(av) - start;
1739                         if (items > 0) {
1740                             AV * const av2 = GvAVn(gv2);
1741                             SV **svp = AvARRAY(av) + start;
1742                             I32 l = (I32)line_num+1;
1743                             while (items--)
1744                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1745                         }
1746                     }
1747                 }
1748
1749                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1750             }
1751         }
1752         CopFILE_free(PL_curcop);
1753         CopFILE_setn(PL_curcop, s, len);
1754     }
1755     CopLINE_set(PL_curcop, line_num);
1756 }
1757
1758 #define skipspace(s) skipspace_flags(s, 0)
1759
1760
1761 STATIC void
1762 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1763 {
1764     AV *av = CopFILEAVx(PL_curcop);
1765     if (av) {
1766         SV * sv;
1767         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1768         else {
1769             sv = *av_fetch(av, 0, 1);
1770             SvUPGRADE(sv, SVt_PVMG);
1771         }
1772         if (!SvPOK(sv)) sv_setpvs(sv,"");
1773         if (orig_sv)
1774             sv_catsv(sv, orig_sv);
1775         else
1776             sv_catpvn(sv, buf, len);
1777         if (!SvIOK(sv)) {
1778             (void)SvIOK_on(sv);
1779             SvIV_set(sv, 0);
1780         }
1781         if (PL_parser->preambling == NOLINE)
1782             av_store(av, CopLINE(PL_curcop), sv);
1783     }
1784 }
1785
1786 /*
1787  * S_skipspace
1788  * Called to gobble the appropriate amount and type of whitespace.
1789  * Skips comments as well.
1790  */
1791
1792 STATIC char *
1793 S_skipspace_flags(pTHX_ char *s, U32 flags)
1794 {
1795     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1796     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1797         while (s < PL_bufend && SPACE_OR_TAB(*s))
1798             s++;
1799     } else {
1800         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1801         PL_bufptr = s;
1802         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1803                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1804                     LEX_NO_NEXT_CHUNK : 0));
1805         s = PL_bufptr;
1806         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1807         if (PL_linestart > PL_bufptr)
1808             PL_bufptr = PL_linestart;
1809         return s;
1810     }
1811     return s;
1812 }
1813
1814 /*
1815  * S_check_uni
1816  * Check the unary operators to ensure there's no ambiguity in how they're
1817  * used.  An ambiguous piece of code would be:
1818  *     rand + 5
1819  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1820  * the +5 is its argument.
1821  */
1822
1823 STATIC void
1824 S_check_uni(pTHX)
1825 {
1826     dVAR;
1827     const char *s;
1828     const char *t;
1829
1830     if (PL_oldoldbufptr != PL_last_uni)
1831         return;
1832     while (isSPACE(*PL_last_uni))
1833         PL_last_uni++;
1834     s = PL_last_uni;
1835     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1836         s++;
1837     if ((t = strchr(s, '(')) && t < PL_bufptr)
1838         return;
1839
1840     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1841                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1842                      (int)(s - PL_last_uni), PL_last_uni);
1843 }
1844
1845 /*
1846  * LOP : macro to build a list operator.  Its behaviour has been replaced
1847  * with a subroutine, S_lop() for which LOP is just another name.
1848  */
1849
1850 #define LOP(f,x) return lop(f,x,s)
1851
1852 /*
1853  * S_lop
1854  * Build a list operator (or something that might be one).  The rules:
1855  *  - if we have a next token, then it's a list operator [why?]
1856  *  - if the next thing is an opening paren, then it's a function
1857  *  - else it's a list operator
1858  */
1859
1860 STATIC I32
1861 S_lop(pTHX_ I32 f, int x, char *s)
1862 {
1863     dVAR;
1864
1865     PERL_ARGS_ASSERT_LOP;
1866
1867     pl_yylval.ival = f;
1868     CLINE;
1869     PL_expect = x;
1870     PL_bufptr = s;
1871     PL_last_lop = PL_oldbufptr;
1872     PL_last_lop_op = (OPCODE)f;
1873     if (PL_nexttoke)
1874         goto lstop;
1875     if (*s == '(')
1876         return REPORT(FUNC);
1877     s = PEEKSPACE(s);
1878     if (*s == '(')
1879         return REPORT(FUNC);
1880     else {
1881         lstop:
1882         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1883             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1884         return REPORT(LSTOP);
1885     }
1886 }
1887
1888 /*
1889  * S_force_next
1890  * When the lexer realizes it knows the next token (for instance,
1891  * it is reordering tokens for the parser) then it can call S_force_next
1892  * to know what token to return the next time the lexer is called.  Caller
1893  * will need to set PL_nextval[] and possibly PL_expect to ensure
1894  * the lexer handles the token correctly.
1895  */
1896
1897 STATIC void
1898 S_force_next(pTHX_ I32 type)
1899 {
1900     dVAR;
1901 #ifdef DEBUGGING
1902     if (DEBUG_T_TEST) {
1903         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1904         tokereport(type, &NEXTVAL_NEXTTOKE);
1905     }
1906 #endif
1907     PL_nexttype[PL_nexttoke] = type;
1908     PL_nexttoke++;
1909     if (PL_lex_state != LEX_KNOWNEXT) {
1910         PL_lex_defer = PL_lex_state;
1911         PL_lex_expect = PL_expect;
1912         PL_lex_state = LEX_KNOWNEXT;
1913     }
1914 }
1915
1916 /*
1917  * S_postderef
1918  *
1919  * This subroutine handles postfix deref syntax after the arrow has already
1920  * been emitted.  @* $* etc. are emitted as two separate token right here.
1921  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1922  * only the first, leaving yylex to find the next.
1923  */
1924
1925 static int
1926 S_postderef(pTHX_ int const funny, char const next)
1927 {
1928     dVAR;
1929     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1930     assert(strchr("*[{", next));
1931     if (next == '*') {
1932         PL_expect = XOPERATOR;
1933         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1934             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1935             PL_lex_state = LEX_INTERPEND;
1936             force_next(POSTJOIN);
1937         }
1938         force_next(next);
1939         PL_bufptr+=2;
1940     }
1941     else {
1942         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1943          && !PL_lex_brackets)
1944             PL_lex_dojoin = 2;
1945         PL_expect = XOPERATOR;
1946         PL_bufptr++;
1947     }
1948     return funny;
1949 }
1950
1951 void
1952 Perl_yyunlex(pTHX)
1953 {
1954     int yyc = PL_parser->yychar;
1955     if (yyc != YYEMPTY) {
1956         if (yyc) {
1957             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1958             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1959                 PL_lex_allbrackets--;
1960                 PL_lex_brackets--;
1961                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1962             } else if (yyc == '('/*)*/) {
1963                 PL_lex_allbrackets--;
1964                 yyc |= (2<<24);
1965             }
1966             force_next(yyc);
1967         }
1968         PL_parser->yychar = YYEMPTY;
1969     }
1970 }
1971
1972 STATIC SV *
1973 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1974 {
1975     dVAR;
1976     SV * const sv = newSVpvn_utf8(start, len,
1977                                   !IN_BYTES
1978                                   && UTF
1979                                   && !is_ascii_string((const U8*)start, len)
1980                                   && is_utf8_string((const U8*)start, len));
1981     return sv;
1982 }
1983
1984 /*
1985  * S_force_word
1986  * When the lexer knows the next thing is a word (for instance, it has
1987  * just seen -> and it knows that the next char is a word char, then
1988  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1989  * lookahead.
1990  *
1991  * Arguments:
1992  *   char *start : buffer position (must be within PL_linestr)
1993  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1994  *   int check_keyword : if true, Perl checks to make sure the word isn't
1995  *       a keyword (do this if the word is a label, e.g. goto FOO)
1996  *   int allow_pack : if true, : characters will also be allowed (require,
1997  *       use, etc. do this)
1998  *   int allow_initial_tick : used by the "sub" lexer only.
1999  */
2000
2001 STATIC char *
2002 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2003 {
2004     dVAR;
2005     char *s;
2006     STRLEN len;
2007
2008     PERL_ARGS_ASSERT_FORCE_WORD;
2009
2010     start = SKIPSPACE1(start);
2011     s = start;
2012     if (isIDFIRST_lazy_if(s,UTF) ||
2013         (allow_pack && *s == ':') )
2014     {
2015         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2016         if (check_keyword) {
2017           char *s2 = PL_tokenbuf;
2018           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2019             s2 += 6, len -= 6;
2020           if (keyword(s2, len, 0))
2021             return start;
2022         }
2023         if (token == METHOD) {
2024             s = SKIPSPACE1(s);
2025             if (*s == '(')
2026                 PL_expect = XTERM;
2027             else {
2028                 PL_expect = XOPERATOR;
2029             }
2030         }
2031         NEXTVAL_NEXTTOKE.opval
2032             = (OP*)newSVOP(OP_CONST,0,
2033                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2034         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2035         force_next(token);
2036     }
2037     return s;
2038 }
2039
2040 /*
2041  * S_force_ident
2042  * Called when the lexer wants $foo *foo &foo etc, but the program
2043  * text only contains the "foo" portion.  The first argument is a pointer
2044  * to the "foo", and the second argument is the type symbol to prefix.
2045  * Forces the next token to be a "WORD".
2046  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2047  */
2048
2049 STATIC void
2050 S_force_ident(pTHX_ const char *s, int kind)
2051 {
2052     dVAR;
2053
2054     PERL_ARGS_ASSERT_FORCE_IDENT;
2055
2056     if (s[0]) {
2057         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2058         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2059                                                                 UTF ? SVf_UTF8 : 0));
2060         NEXTVAL_NEXTTOKE.opval = o;
2061         force_next(WORD);
2062         if (kind) {
2063             o->op_private = OPpCONST_ENTERED;
2064             /* XXX see note in pp_entereval() for why we forgo typo
2065                warnings if the symbol must be introduced in an eval.
2066                GSAR 96-10-12 */
2067             gv_fetchpvn_flags(s, len,
2068                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2069                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2070                               kind == '$' ? SVt_PV :
2071                               kind == '@' ? SVt_PVAV :
2072                               kind == '%' ? SVt_PVHV :
2073                               SVt_PVGV
2074                               );
2075         }
2076     }
2077 }
2078
2079 static void
2080 S_force_ident_maybe_lex(pTHX_ char pit)
2081 {
2082     NEXTVAL_NEXTTOKE.ival = pit;
2083     force_next('p');
2084 }
2085
2086 NV
2087 Perl_str_to_version(pTHX_ SV *sv)
2088 {
2089     NV retval = 0.0;
2090     NV nshift = 1.0;
2091     STRLEN len;
2092     const char *start = SvPV_const(sv,len);
2093     const char * const end = start + len;
2094     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2095
2096     PERL_ARGS_ASSERT_STR_TO_VERSION;
2097
2098     while (start < end) {
2099         STRLEN skip;
2100         UV n;
2101         if (utf)
2102             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2103         else {
2104             n = *(U8*)start;
2105             skip = 1;
2106         }
2107         retval += ((NV)n)/nshift;
2108         start += skip;
2109         nshift *= 1000;
2110     }
2111     return retval;
2112 }
2113
2114 /*
2115  * S_force_version
2116  * Forces the next token to be a version number.
2117  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2118  * and if "guessing" is TRUE, then no new token is created (and the caller
2119  * must use an alternative parsing method).
2120  */
2121
2122 STATIC char *
2123 S_force_version(pTHX_ char *s, int guessing)
2124 {
2125     dVAR;
2126     OP *version = NULL;
2127     char *d;
2128
2129     PERL_ARGS_ASSERT_FORCE_VERSION;
2130
2131     s = SKIPSPACE1(s);
2132
2133     d = s;
2134     if (*d == 'v')
2135         d++;
2136     if (isDIGIT(*d)) {
2137         while (isDIGIT(*d) || *d == '_' || *d == '.')
2138             d++;
2139         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2140             SV *ver;
2141             s = scan_num(s, &pl_yylval);
2142             version = pl_yylval.opval;
2143             ver = cSVOPx(version)->op_sv;
2144             if (SvPOK(ver) && !SvNIOK(ver)) {
2145                 SvUPGRADE(ver, SVt_PVNV);
2146                 SvNV_set(ver, str_to_version(ver));
2147                 SvNOK_on(ver);          /* hint that it is a version */
2148             }
2149         }
2150         else if (guessing) {
2151             return s;
2152         }
2153     }
2154
2155     /* NOTE: The parser sees the package name and the VERSION swapped */
2156     NEXTVAL_NEXTTOKE.opval = version;
2157     force_next(WORD);
2158
2159     return s;
2160 }
2161
2162 /*
2163  * S_force_strict_version
2164  * Forces the next token to be a version number using strict syntax rules.
2165  */
2166
2167 STATIC char *
2168 S_force_strict_version(pTHX_ char *s)
2169 {
2170     dVAR;
2171     OP *version = NULL;
2172     const char *errstr = NULL;
2173
2174     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2175
2176     while (isSPACE(*s)) /* leading whitespace */
2177         s++;
2178
2179     if (is_STRICT_VERSION(s,&errstr)) {
2180         SV *ver = newSV(0);
2181         s = (char *)scan_version(s, ver, 0);
2182         version = newSVOP(OP_CONST, 0, ver);
2183     }
2184     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2185             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2186     {
2187         PL_bufptr = s;
2188         if (errstr)
2189             yyerror(errstr); /* version required */
2190         return s;
2191     }
2192
2193     /* NOTE: The parser sees the package name and the VERSION swapped */
2194     NEXTVAL_NEXTTOKE.opval = version;
2195     force_next(WORD);
2196
2197     return s;
2198 }
2199
2200 /*
2201  * S_tokeq
2202  * Tokenize a quoted string passed in as an SV.  It finds the next
2203  * chunk, up to end of string or a backslash.  It may make a new
2204  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2205  * turns \\ into \.
2206  */
2207
2208 STATIC SV *
2209 S_tokeq(pTHX_ SV *sv)
2210 {
2211     dVAR;
2212     char *s;
2213     char *send;
2214     char *d;
2215     SV *pv = sv;
2216
2217     PERL_ARGS_ASSERT_TOKEQ;
2218
2219     assert (SvPOK(sv));
2220     assert (SvLEN(sv));
2221     assert (!SvIsCOW(sv));
2222     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2223         goto finish;
2224     s = SvPVX(sv);
2225     send = SvEND(sv);
2226     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2227     while (s < send && !(*s == '\\' && s[1] == '\\'))
2228         s++;
2229     if (s == send)
2230         goto finish;
2231     d = s;
2232     if ( PL_hints & HINT_NEW_STRING ) {
2233         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2234                             SVs_TEMP | SvUTF8(sv));
2235     }
2236     while (s < send) {
2237         if (*s == '\\') {
2238             if (s + 1 < send && (s[1] == '\\'))
2239                 s++;            /* all that, just for this */
2240         }
2241         *d++ = *s++;
2242     }
2243     *d = '\0';
2244     SvCUR_set(sv, d - SvPVX_const(sv));
2245   finish:
2246     if ( PL_hints & HINT_NEW_STRING )
2247        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2248     return sv;
2249 }
2250
2251 /*
2252  * Now come three functions related to double-quote context,
2253  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2254  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2255  * interact with PL_lex_state, and create fake ( ... ) argument lists
2256  * to handle functions and concatenation.
2257  * For example,
2258  *   "foo\lbar"
2259  * is tokenised as
2260  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2261  */
2262
2263 /*
2264  * S_sublex_start
2265  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2266  *
2267  * Pattern matching will set PL_lex_op to the pattern-matching op to
2268  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2269  *
2270  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2271  *
2272  * Everything else becomes a FUNC.
2273  *
2274  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2275  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2276  * call to S_sublex_push().
2277  */
2278
2279 STATIC I32
2280 S_sublex_start(pTHX)
2281 {
2282     dVAR;
2283     const I32 op_type = pl_yylval.ival;
2284
2285     if (op_type == OP_NULL) {
2286         pl_yylval.opval = PL_lex_op;
2287         PL_lex_op = NULL;
2288         return THING;
2289     }
2290     if (op_type == OP_CONST) {
2291         SV *sv = tokeq(PL_lex_stuff);
2292
2293         if (SvTYPE(sv) == SVt_PVIV) {
2294             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2295             STRLEN len;
2296             const char * const p = SvPV_const(sv, len);
2297             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2298             SvREFCNT_dec(sv);
2299             sv = nsv;
2300         }
2301         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2302         PL_lex_stuff = NULL;
2303         return THING;
2304     }
2305
2306     PL_sublex_info.super_state = PL_lex_state;
2307     PL_sublex_info.sub_inwhat = (U16)op_type;
2308     PL_sublex_info.sub_op = PL_lex_op;
2309     PL_lex_state = LEX_INTERPPUSH;
2310
2311     PL_expect = XTERM;
2312     if (PL_lex_op) {
2313         pl_yylval.opval = PL_lex_op;
2314         PL_lex_op = NULL;
2315         return PMFUNC;
2316     }
2317     else
2318         return FUNC;
2319 }
2320
2321 /*
2322  * S_sublex_push
2323  * Create a new scope to save the lexing state.  The scope will be
2324  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2325  * to the uc, lc, etc. found before.
2326  * Sets PL_lex_state to LEX_INTERPCONCAT.
2327  */
2328
2329 STATIC I32
2330 S_sublex_push(pTHX)
2331 {
2332     dVAR;
2333     LEXSHARED *shared;
2334     const bool is_heredoc = PL_multi_close == '<';
2335     ENTER;
2336
2337     PL_lex_state = PL_sublex_info.super_state;
2338     SAVEI8(PL_lex_dojoin);
2339     SAVEI32(PL_lex_brackets);
2340     SAVEI32(PL_lex_allbrackets);
2341     SAVEI32(PL_lex_formbrack);
2342     SAVEI8(PL_lex_fakeeof);
2343     SAVEI32(PL_lex_casemods);
2344     SAVEI32(PL_lex_starts);
2345     SAVEI8(PL_lex_state);
2346     SAVESPTR(PL_lex_repl);
2347     SAVEVPTR(PL_lex_inpat);
2348     SAVEI16(PL_lex_inwhat);
2349     if (is_heredoc)
2350     {
2351         SAVECOPLINE(PL_curcop);
2352         SAVEI32(PL_multi_end);
2353         SAVEI32(PL_parser->herelines);
2354         PL_parser->herelines = 0;
2355     }
2356     SAVEI8(PL_multi_close);
2357     SAVEPPTR(PL_bufptr);
2358     SAVEPPTR(PL_bufend);
2359     SAVEPPTR(PL_oldbufptr);
2360     SAVEPPTR(PL_oldoldbufptr);
2361     SAVEPPTR(PL_last_lop);
2362     SAVEPPTR(PL_last_uni);
2363     SAVEPPTR(PL_linestart);
2364     SAVESPTR(PL_linestr);
2365     SAVEGENERICPV(PL_lex_brackstack);
2366     SAVEGENERICPV(PL_lex_casestack);
2367     SAVEGENERICPV(PL_parser->lex_shared);
2368     SAVEBOOL(PL_parser->lex_re_reparsing);
2369     SAVEI32(PL_copline);
2370
2371     /* The here-doc parser needs to be able to peek into outer lexing
2372        scopes to find the body of the here-doc.  So we put PL_linestr and
2373        PL_bufptr into lex_shared, to ‘share’ those values.
2374      */
2375     PL_parser->lex_shared->ls_linestr = PL_linestr;
2376     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2377
2378     PL_linestr = PL_lex_stuff;
2379     PL_lex_repl = PL_sublex_info.repl;
2380     PL_lex_stuff = NULL;
2381     PL_sublex_info.repl = NULL;
2382
2383     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2384         = SvPVX(PL_linestr);
2385     PL_bufend += SvCUR(PL_linestr);
2386     PL_last_lop = PL_last_uni = NULL;
2387     SAVEFREESV(PL_linestr);
2388     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2389
2390     PL_lex_dojoin = FALSE;
2391     PL_lex_brackets = PL_lex_formbrack = 0;
2392     PL_lex_allbrackets = 0;
2393     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2394     Newx(PL_lex_brackstack, 120, char);
2395     Newx(PL_lex_casestack, 12, char);
2396     PL_lex_casemods = 0;
2397     *PL_lex_casestack = '\0';
2398     PL_lex_starts = 0;
2399     PL_lex_state = LEX_INTERPCONCAT;
2400     if (is_heredoc)
2401         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2402     PL_copline = NOLINE;
2403
2404     Newxz(shared, 1, LEXSHARED);
2405     shared->ls_prev = PL_parser->lex_shared;
2406     PL_parser->lex_shared = shared;
2407
2408     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2409     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2410     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2411         PL_lex_inpat = PL_sublex_info.sub_op;
2412     else
2413         PL_lex_inpat = NULL;
2414
2415     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2416     PL_in_eval &= ~EVAL_RE_REPARSING;
2417
2418     return '(';
2419 }
2420
2421 /*
2422  * S_sublex_done
2423  * Restores lexer state after a S_sublex_push.
2424  */
2425
2426 STATIC I32
2427 S_sublex_done(pTHX)
2428 {
2429     dVAR;
2430     if (!PL_lex_starts++) {
2431         SV * const sv = newSVpvs("");
2432         if (SvUTF8(PL_linestr))
2433             SvUTF8_on(sv);
2434         PL_expect = XOPERATOR;
2435         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2436         return THING;
2437     }
2438
2439     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2440         PL_lex_state = LEX_INTERPCASEMOD;
2441         return yylex();
2442     }
2443
2444     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2445     assert(PL_lex_inwhat != OP_TRANSR);
2446     if (PL_lex_repl) {
2447         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2448         PL_linestr = PL_lex_repl;
2449         PL_lex_inpat = 0;
2450         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2451         PL_bufend += SvCUR(PL_linestr);
2452         PL_last_lop = PL_last_uni = NULL;
2453         PL_lex_dojoin = FALSE;
2454         PL_lex_brackets = 0;
2455         PL_lex_allbrackets = 0;
2456         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2457         PL_lex_casemods = 0;
2458         *PL_lex_casestack = '\0';
2459         PL_lex_starts = 0;
2460         if (SvEVALED(PL_lex_repl)) {
2461             PL_lex_state = LEX_INTERPNORMAL;
2462             PL_lex_starts++;
2463             /*  we don't clear PL_lex_repl here, so that we can check later
2464                 whether this is an evalled subst; that means we rely on the
2465                 logic to ensure sublex_done() is called again only via the
2466                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2467         }
2468         else {
2469             PL_lex_state = LEX_INTERPCONCAT;
2470             PL_lex_repl = NULL;
2471         }
2472         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2473             CopLINE(PL_curcop) +=
2474                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2475                  + PL_parser->herelines;
2476             PL_parser->herelines = 0;
2477         }
2478         return ',';
2479     }
2480     else {
2481         const line_t l = CopLINE(PL_curcop);
2482         LEAVE;
2483         if (PL_multi_close == '<')
2484             PL_parser->herelines += l - PL_multi_end;
2485         PL_bufend = SvPVX(PL_linestr);
2486         PL_bufend += SvCUR(PL_linestr);
2487         PL_expect = XOPERATOR;
2488         PL_sublex_info.sub_inwhat = 0;
2489         return ')';
2490     }
2491 }
2492
2493 PERL_STATIC_INLINE SV*
2494 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2495 {
2496     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2497      * interior, hence to the "}".  Finds what the name resolves to, returning
2498      * an SV* containing it; NULL if no valid one found */
2499
2500     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2501
2502     HV * table;
2503     SV **cvp;
2504     SV *cv;
2505     SV *rv;
2506     HV *stash;
2507     const U8* first_bad_char_loc;
2508     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2509
2510     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2511
2512     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2513                                      e - backslash_ptr,
2514                                      &first_bad_char_loc))
2515     {
2516         /* If warnings are on, this will print a more detailed analysis of what
2517          * is wrong than the error message below */
2518         utf8n_to_uvchr(first_bad_char_loc,
2519                        e - ((char *) first_bad_char_loc),
2520                        NULL, 0);
2521
2522         /* We deliberately don't try to print the malformed character, which
2523          * might not print very well; it also may be just the first of many
2524          * malformations, so don't print what comes after it */
2525         yyerror(Perl_form(aTHX_
2526             "Malformed UTF-8 character immediately after '%.*s'",
2527             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2528         return NULL;
2529     }
2530
2531     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2532                         /* include the <}> */
2533                         e - backslash_ptr + 1);
2534     if (! SvPOK(res)) {
2535         SvREFCNT_dec_NN(res);
2536         return NULL;
2537     }
2538
2539     /* See if the charnames handler is the Perl core's, and if so, we can skip
2540      * the validation needed for a user-supplied one, as Perl's does its own
2541      * validation. */
2542     table = GvHV(PL_hintgv);             /* ^H */
2543     cvp = hv_fetchs(table, "charnames", FALSE);
2544     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2545         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2546     {
2547         const char * const name = HvNAME(stash);
2548         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2549          && strEQ(name, "_charnames")) {
2550            return res;
2551        }
2552     }
2553
2554     /* Here, it isn't Perl's charname handler.  We can't rely on a
2555      * user-supplied handler to validate the input name.  For non-ut8 input,
2556      * look to see that the first character is legal.  Then loop through the
2557      * rest checking that each is a continuation */
2558
2559     /* This code makes the reasonable assumption that the only Latin1-range
2560      * characters that begin a character name alias are alphabetic, otherwise
2561      * would have to create a isCHARNAME_BEGIN macro */
2562
2563     if (! UTF) {
2564         if (! isALPHAU(*s)) {
2565             goto bad_charname;
2566         }
2567         s++;
2568         while (s < e) {
2569             if (! isCHARNAME_CONT(*s)) {
2570                 goto bad_charname;
2571             }
2572             if (*s == ' ' && *(s-1) == ' ') {
2573                 goto multi_spaces;
2574             }
2575             if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2576                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2577                            "NO-BREAK SPACE in a charnames "
2578                            "alias definition is deprecated");
2579             }
2580             s++;
2581         }
2582     }
2583     else {
2584         /* Similarly for utf8.  For invariants can check directly; for other
2585          * Latin1, can calculate their code point and check; otherwise  use a
2586          * swash */
2587         if (UTF8_IS_INVARIANT(*s)) {
2588             if (! isALPHAU(*s)) {
2589                 goto bad_charname;
2590             }
2591             s++;
2592         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2593             if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2594                 goto bad_charname;
2595             }
2596             s += 2;
2597         }
2598         else {
2599             if (! PL_utf8_charname_begin) {
2600                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2601                 PL_utf8_charname_begin = _core_swash_init("utf8",
2602                                                         "_Perl_Charname_Begin",
2603                                                         &PL_sv_undef,
2604                                                         1, 0, NULL, &flags);
2605             }
2606             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2607                 goto bad_charname;
2608             }
2609             s += UTF8SKIP(s);
2610         }
2611
2612         while (s < e) {
2613             if (UTF8_IS_INVARIANT(*s)) {
2614                 if (! isCHARNAME_CONT(*s)) {
2615                     goto bad_charname;
2616                 }
2617                 if (*s == ' ' && *(s-1) == ' ') {
2618                     goto multi_spaces;
2619                 }
2620                 s++;
2621             }
2622             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2623                 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2624                 {
2625                     goto bad_charname;
2626                 }
2627                 if (*s == *NBSP_UTF8
2628                     && *(s+1) == *(NBSP_UTF8+1)
2629                     && ckWARN_d(WARN_DEPRECATED))
2630                 {
2631                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2632                                 "NO-BREAK SPACE in a charnames "
2633                                 "alias definition is deprecated");
2634                 }
2635                 s += 2;
2636             }
2637             else {
2638                 if (! PL_utf8_charname_continue) {
2639                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2640                     PL_utf8_charname_continue = _core_swash_init("utf8",
2641                                                 "_Perl_Charname_Continue",
2642                                                 &PL_sv_undef,
2643                                                 1, 0, NULL, &flags);
2644                 }
2645                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2646                     goto bad_charname;
2647                 }
2648                 s += UTF8SKIP(s);
2649             }
2650         }
2651     }
2652     if (*(s-1) == ' ') {
2653         yyerror_pv(
2654             Perl_form(aTHX_
2655             "charnames alias definitions may not contain trailing "
2656             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2657             (int)(s - backslash_ptr + 1), backslash_ptr,
2658             (int)(e - s + 1), s + 1
2659             ),
2660         UTF ? SVf_UTF8 : 0);
2661         return NULL;
2662     }
2663
2664     if (SvUTF8(res)) { /* Don't accept malformed input */
2665         const U8* first_bad_char_loc;
2666         STRLEN len;
2667         const char* const str = SvPV_const(res, len);
2668         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2669             /* If warnings are on, this will print a more detailed analysis of
2670              * what is wrong than the error message below */
2671             utf8n_to_uvchr(first_bad_char_loc,
2672                            (char *) first_bad_char_loc - str,
2673                            NULL, 0);
2674
2675             /* We deliberately don't try to print the malformed character,
2676              * which might not print very well; it also may be just the first
2677              * of many malformations, so don't print what comes after it */
2678             yyerror_pv(
2679               Perl_form(aTHX_
2680                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2681                  (int) (e - backslash_ptr + 1), backslash_ptr,
2682                  (int) ((char *) first_bad_char_loc - str), str
2683               ),
2684               SVf_UTF8);
2685             return NULL;
2686         }
2687     }
2688
2689     return res;
2690
2691   bad_charname: {
2692
2693         /* The final %.*s makes sure that should the trailing NUL be missing
2694          * that this print won't run off the end of the string */
2695         yyerror_pv(
2696           Perl_form(aTHX_
2697             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2698             (int)(s - backslash_ptr + 1), backslash_ptr,
2699             (int)(e - s + 1), s + 1
2700           ),
2701           UTF ? SVf_UTF8 : 0);
2702         return NULL;
2703     }
2704
2705   multi_spaces:
2706         yyerror_pv(
2707           Perl_form(aTHX_
2708             "charnames alias definitions may not contain a sequence of "
2709             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2710             (int)(s - backslash_ptr + 1), backslash_ptr,
2711             (int)(e - s + 1), s + 1
2712           ),
2713           UTF ? SVf_UTF8 : 0);
2714         return NULL;
2715 }
2716
2717 /*
2718   scan_const
2719
2720   Extracts the next constant part of a pattern, double-quoted string,
2721   or transliteration.  This is terrifying code.
2722
2723   For example, in parsing the double-quoted string "ab\x63$d", it would
2724   stop at the '$' and return an OP_CONST containing 'abc'.
2725
2726   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2727   processing a pattern (PL_lex_inpat is true), a transliteration
2728   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2729
2730   Returns a pointer to the character scanned up to. If this is
2731   advanced from the start pointer supplied (i.e. if anything was
2732   successfully parsed), will leave an OP_CONST for the substring scanned
2733   in pl_yylval. Caller must intuit reason for not parsing further
2734   by looking at the next characters herself.
2735
2736   In patterns:
2737     expand:
2738       \N{FOO}  => \N{U+hex_for_character_FOO}
2739       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2740
2741     pass through:
2742         all other \-char, including \N and \N{ apart from \N{ABC}
2743
2744     stops on:
2745         @ and $ where it appears to be a var, but not for $ as tail anchor
2746         \l \L \u \U \Q \E
2747         (?{  or  (??{
2748
2749
2750   In transliterations:
2751     characters are VERY literal, except for - not at the start or end
2752     of the string, which indicates a range. If the range is in bytes,
2753     scan_const expands the range to the full set of intermediate
2754     characters. If the range is in utf8, the hyphen is replaced with
2755     a certain range mark which will be handled by pmtrans() in op.c.
2756
2757   In double-quoted strings:
2758     backslashes:
2759       double-quoted style: \r and \n
2760       constants: \x31, etc.
2761       deprecated backrefs: \1 (in substitution replacements)
2762       case and quoting: \U \Q \E
2763     stops on @ and $
2764
2765   scan_const does *not* construct ops to handle interpolated strings.
2766   It stops processing as soon as it finds an embedded $ or @ variable
2767   and leaves it to the caller to work out what's going on.
2768
2769   embedded arrays (whether in pattern or not) could be:
2770       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2771
2772   $ in double-quoted strings must be the symbol of an embedded scalar.
2773
2774   $ in pattern could be $foo or could be tail anchor.  Assumption:
2775   it's a tail anchor if $ is the last thing in the string, or if it's
2776   followed by one of "()| \r\n\t"
2777
2778   \1 (backreferences) are turned into $1 in substitutions
2779
2780   The structure of the code is
2781       while (there's a character to process) {
2782           handle transliteration ranges
2783           skip regexp comments /(?#comment)/ and codes /(?{code})/
2784           skip #-initiated comments in //x patterns
2785           check for embedded arrays
2786           check for embedded scalars
2787           if (backslash) {
2788               deprecate \1 in substitution replacements
2789               handle string-changing backslashes \l \U \Q \E, etc.
2790               switch (what was escaped) {
2791                   handle \- in a transliteration (becomes a literal -)
2792                   if a pattern and not \N{, go treat as regular character
2793                   handle \132 (octal characters)
2794                   handle \x15 and \x{1234} (hex characters)
2795                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2796                   handle \cV (control characters)
2797                   handle printf-style backslashes (\f, \r, \n, etc)
2798               } (end switch)
2799               continue
2800           } (end if backslash)
2801           handle regular character
2802     } (end while character to read)
2803
2804 */
2805
2806 STATIC char *
2807 S_scan_const(pTHX_ char *start)
2808 {
2809     dVAR;
2810     char *send = PL_bufend;             /* end of the constant */
2811     SV *sv = newSV(send - start);               /* sv for the constant.  See
2812                                                    note below on sizing. */
2813     char *s = start;                    /* start of the constant */
2814     char *d = SvPVX(sv);                /* destination for copies */
2815     bool dorange = FALSE;                       /* are we in a translit range? */
2816     bool didrange = FALSE;                      /* did we just finish a range? */
2817     bool in_charclass = FALSE;                  /* within /[...]/ */
2818     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2819     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2820                                                    to be UTF8?  But, this can
2821                                                    show as true when the source
2822                                                    isn't utf8, as for example
2823                                                    when it is entirely composed
2824                                                    of hex constants */
2825     SV *res;                            /* result from charnames */
2826
2827     /* Note on sizing:  The scanned constant is placed into sv, which is
2828      * initialized by newSV() assuming one byte of output for every byte of
2829      * input.  This routine expects newSV() to allocate an extra byte for a
2830      * trailing NUL, which this routine will append if it gets to the end of
2831      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2832      * CAPITAL LETTER A}), or more output than input if the constant ends up
2833      * recoded to utf8, but each time a construct is found that might increase
2834      * the needed size, SvGROW() is called.  Its size parameter each time is
2835      * based on the best guess estimate at the time, namely the length used so
2836      * far, plus the length the current construct will occupy, plus room for
2837      * the trailing NUL, plus one byte for every input byte still unscanned */
2838
2839     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2840                        before set */
2841 #ifdef EBCDIC
2842     UV literal_endpoint = 0;
2843     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2844 #endif
2845
2846     PERL_ARGS_ASSERT_SCAN_CONST;
2847
2848     assert(PL_lex_inwhat != OP_TRANSR);
2849     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2850         /* If we are doing a trans and we know we want UTF8 set expectation */
2851         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2852         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2853     }
2854
2855     /* Protect sv from errors and fatal warnings. */
2856     ENTER_with_name("scan_const");
2857     SAVEFREESV(sv);
2858
2859     while (s < send || dorange) {
2860
2861         /* get transliterations out of the way (they're most literal) */
2862         if (PL_lex_inwhat == OP_TRANS) {
2863             /* expand a range A-Z to the full set of characters.  AIE! */
2864             if (dorange) {
2865                 I32 i;                          /* current expanded character */
2866                 I32 min;                        /* first character in range */
2867                 I32 max;                        /* last character in range */
2868
2869 #ifdef EBCDIC
2870                 UV uvmax = 0;
2871 #endif
2872
2873                 if (has_utf8
2874 #ifdef EBCDIC
2875                     && !native_range
2876 #endif
2877                 ) {
2878                     char * const c = (char*)utf8_hop((U8*)d, -1);
2879                     char *e = d++;
2880                     while (e-- > c)
2881                         *(e + 1) = *e;
2882                     *c = (char) ILLEGAL_UTF8_BYTE;
2883                     /* mark the range as done, and continue */
2884                     dorange = FALSE;
2885                     didrange = TRUE;
2886                     continue;
2887                 }
2888
2889                 i = d - SvPVX_const(sv);                /* remember current offset */
2890 #ifdef EBCDIC
2891                 SvGROW(sv,
2892                        SvLEN(sv) + (has_utf8 ?
2893                                     (512 - UTF_CONTINUATION_MARK +
2894                                      UNISKIP(0x100))
2895                                     : 256));
2896                 /* How many two-byte within 0..255: 128 in UTF-8,
2897                  * 96 in UTF-8-mod. */
2898 #else
2899                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2900 #endif
2901                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2902 #ifdef EBCDIC
2903                 if (has_utf8) {
2904                     int j;
2905                     for (j = 0; j <= 1; j++) {
2906                         char * const c = (char*)utf8_hop((U8*)d, -1);
2907                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2908                         if (j)
2909                             min = (U8)uv;
2910                         else if (uv < 256)
2911                             max = (U8)uv;
2912                         else {
2913                             max = (U8)0xff; /* only to \xff */
2914                             uvmax = uv; /* \x{100} to uvmax */
2915                         }
2916                         d = c; /* eat endpoint chars */
2917                      }
2918                 }
2919                else {
2920 #endif
2921                    d -= 2;              /* eat the first char and the - */
2922                    min = (U8)*d;        /* first char in range */
2923                    max = (U8)d[1];      /* last char in range  */
2924 #ifdef EBCDIC
2925                }
2926 #endif
2927
2928                 if (min > max) {
2929                     Perl_croak(aTHX_
2930                                "Invalid range \"%c-%c\" in transliteration operator",
2931                                (char)min, (char)max);
2932                 }
2933
2934 #ifdef EBCDIC
2935                 if (literal_endpoint == 2 &&
2936                     ((isLOWER_A(min) && isLOWER_A(max)) ||
2937                      (isUPPER_A(min) && isUPPER_A(max))))
2938                 {
2939                     for (i = min; i <= max; i++) {
2940                         if (isALPHA_A(i))
2941                             *d++ = i;
2942                     }
2943                 }
2944                 else
2945 #endif
2946                     for (i = min; i <= max; i++)
2947 #ifdef EBCDIC
2948                         if (has_utf8) {
2949                             append_utf8_from_native_byte(i, &d);
2950                         }
2951                         else
2952 #endif
2953                             *d++ = (char)i;
2954
2955 #ifdef EBCDIC
2956                 if (uvmax) {
2957                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2958                     if (uvmax > 0x101)
2959                         *d++ = (char) ILLEGAL_UTF8_BYTE;
2960                     if (uvmax > 0x100)
2961                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2962                 }
2963 #endif
2964
2965                 /* mark the range as done, and continue */
2966                 dorange = FALSE;
2967                 didrange = TRUE;
2968 #ifdef EBCDIC
2969                 literal_endpoint = 0;
2970 #endif
2971                 continue;
2972             }
2973
2974             /* range begins (ignore - as first or last char) */
2975             else if (*s == '-' && s+1 < send  && s != start) {
2976                 if (didrange) {
2977                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2978                 }
2979                 if (has_utf8
2980 #ifdef EBCDIC
2981                     && !native_range
2982 #endif
2983                     ) {
2984                     *d++ = (char) ILLEGAL_UTF8_BYTE;    /* use illegal utf8 byte--see pmtrans */
2985                     s++;
2986                     continue;
2987                 }
2988                 dorange = TRUE;
2989                 s++;
2990             }
2991             else {
2992                 didrange = FALSE;
2993 #ifdef EBCDIC
2994                 literal_endpoint = 0;
2995                 native_range = TRUE;
2996 #endif
2997             }
2998         }
2999
3000         /* if we get here, we're not doing a transliteration */
3001
3002         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3003             char *s1 = s-1;
3004             int esc = 0;
3005             while (s1 >= start && *s1-- == '\\')
3006                 esc = !esc;
3007             if (!esc)
3008                 in_charclass = TRUE;
3009         }
3010
3011         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3012             char *s1 = s-1;
3013             int esc = 0;
3014             while (s1 >= start && *s1-- == '\\')
3015                 esc = !esc;
3016             if (!esc)
3017                 in_charclass = FALSE;
3018         }
3019
3020         /* skip for regexp comments /(?#comment)/, except for the last
3021          * char, which will be done separately.
3022          * Stop on (?{..}) and friends */
3023
3024         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3025             if (s[2] == '#') {
3026                 while (s+1 < send && *s != ')')
3027                     *d++ = *s++;
3028             }
3029             else if (!PL_lex_casemods &&
3030                      (    s[2] == '{' /* This should match regcomp.c */
3031                       || (s[2] == '?' && s[3] == '{')))
3032             {
3033                 break;
3034             }
3035         }
3036
3037         /* likewise skip #-initiated comments in //x patterns */
3038         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3039           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3040             while (s+1 < send && *s != '\n')
3041                 *d++ = *s++;
3042         }
3043
3044         /* no further processing of single-quoted regex */
3045         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3046             goto default_action;
3047
3048         /* check for embedded arrays
3049            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3050            */
3051         else if (*s == '@' && s[1]) {
3052             if (isWORDCHAR_lazy_if(s+1,UTF))
3053                 break;
3054             if (strchr(":'{$", s[1]))
3055                 break;
3056             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3057                 break; /* in regexp, neither @+ nor @- are interpolated */
3058         }
3059
3060         /* check for embedded scalars.  only stop if we're sure it's a
3061            variable.
3062         */
3063         else if (*s == '$') {
3064             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3065                 break;
3066             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3067                 if (s[1] == '\\') {
3068                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3069                                    "Possible unintended interpolation of $\\ in regex");
3070                 }
3071                 break;          /* in regexp, $ might be tail anchor */
3072             }
3073         }
3074
3075         /* End of else if chain - OP_TRANS rejoin rest */
3076
3077         /* backslashes */
3078         if (*s == '\\' && s+1 < send) {
3079             char* e;    /* Can be used for ending '}', etc. */
3080
3081             s++;
3082
3083             /* warn on \1 - \9 in substitution replacements, but note that \11
3084              * is an octal; and \19 is \1 followed by '9' */
3085             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3086                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3087             {
3088                 /* diag_listed_as: \%d better written as $%d */
3089                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3090                 *--s = '$';
3091                 break;
3092             }
3093
3094             /* string-change backslash escapes */
3095             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3096                 --s;
3097                 break;
3098             }
3099             /* In a pattern, process \N, but skip any other backslash escapes.
3100              * This is because we don't want to translate an escape sequence
3101              * into a meta symbol and have the regex compiler use the meta
3102              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3103              * in spite of this, we do have to process \N here while the proper
3104              * charnames handler is in scope.  See bugs #56444 and #62056.
3105              * There is a complication because \N in a pattern may also stand
3106              * for 'match a non-nl', and not mean a charname, in which case its
3107              * processing should be deferred to the regex compiler.  To be a
3108              * charname it must be followed immediately by a '{', and not look
3109              * like \N followed by a curly quantifier, i.e., not something like
3110              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3111              * quantifier */
3112             else if (PL_lex_inpat
3113                     && (*s != 'N'
3114                         || s[1] != '{'
3115                         || regcurly(s + 1)))
3116             {
3117                 *d++ = '\\';
3118                 goto default_action;
3119             }
3120
3121             switch (*s) {
3122
3123             /* quoted - in transliterations */
3124             case '-':
3125                 if (PL_lex_inwhat == OP_TRANS) {
3126                     *d++ = *s++;
3127                     continue;
3128                 }
3129                 /* FALLTHROUGH */
3130             default:
3131                 {
3132                     if ((isALPHANUMERIC(*s)))
3133                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3134                                        "Unrecognized escape \\%c passed through",
3135                                        *s);
3136                     /* default action is to copy the quoted character */
3137                     goto default_action;
3138                 }
3139
3140             /* eg. \132 indicates the octal constant 0132 */
3141             case '0': case '1': case '2': case '3':
3142             case '4': case '5': case '6': case '7':
3143                 {
3144                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3145                     STRLEN len = 3;
3146                     uv = grok_oct(s, &len, &flags, NULL);
3147                     s += len;
3148                     if (len < 3 && s < send && isDIGIT(*s)
3149                         && ckWARN(WARN_MISC))
3150                     {
3151                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3152                                     "%s", form_short_octal_warning(s, len));
3153                     }
3154                 }
3155                 goto NUM_ESCAPE_INSERT;
3156
3157             /* eg. \o{24} indicates the octal constant \024 */
3158             case 'o':
3159                 {
3160                     const char* error;
3161
3162                     bool valid = grok_bslash_o(&s, &uv, &error,
3163                                                TRUE, /* Output warning */
3164                                                FALSE, /* Not strict */
3165                                                TRUE, /* Output warnings for
3166                                                          non-portables */
3167                                                UTF);
3168                     if (! valid) {
3169                         yyerror(error);
3170                         continue;
3171                     }
3172                     goto NUM_ESCAPE_INSERT;
3173                 }
3174
3175             /* eg. \x24 indicates the hex constant 0x24 */
3176             case 'x':
3177                 {
3178                     const char* error;
3179
3180                     bool valid = grok_bslash_x(&s, &uv, &error,
3181                                                TRUE, /* Output warning */
3182                                                FALSE, /* Not strict */
3183                                                TRUE,  /* Output warnings for
3184                                                          non-portables */
3185                                                UTF);
3186                     if (! valid) {
3187                         yyerror(error);
3188                         continue;
3189                     }
3190                 }
3191
3192               NUM_ESCAPE_INSERT:
3193                 /* Insert oct or hex escaped character.  There will always be
3194                  * enough room in sv since such escapes will be longer than any
3195                  * UTF-8 sequence they can end up as, except if they force us
3196                  * to recode the rest of the string into utf8 */
3197
3198                 /* Here uv is the ordinal of the next character being added */
3199                 if (!UVCHR_IS_INVARIANT(uv)) {
3200                     if (!has_utf8 && uv > 255) {
3201                         /* Might need to recode whatever we have accumulated so
3202                          * far if it contains any chars variant in utf8 or
3203                          * utf-ebcdic. */
3204
3205                         SvCUR_set(sv, d - SvPVX_const(sv));
3206                         SvPOK_on(sv);
3207                         *d = '\0';
3208                         /* See Note on sizing above.  */
3209                         sv_utf8_upgrade_flags_grow(sv,
3210                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3211                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3212                         d = SvPVX(sv) + SvCUR(sv);
3213                         has_utf8 = TRUE;
3214                     }
3215
3216                     if (has_utf8) {
3217                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3218                         if (PL_lex_inwhat == OP_TRANS &&
3219                             PL_sublex_info.sub_op) {
3220                             PL_sublex_info.sub_op->op_private |=
3221                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3222                                              : OPpTRANS_TO_UTF);
3223                         }
3224 #ifdef EBCDIC
3225                         if (uv > 255 && !dorange)
3226                             native_range = FALSE;
3227 #endif
3228                     }
3229                     else {
3230                         *d++ = (char)uv;
3231                     }
3232                 }
3233                 else {
3234                     *d++ = (char) uv;
3235                 }
3236                 continue;
3237
3238             case 'N':
3239                 /* In a non-pattern \N must be a named character, like \N{LATIN
3240                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3241                  * mean to match a non-newline.  For non-patterns, named
3242                  * characters are converted to their string equivalents. In
3243                  * patterns, named characters are not converted to their
3244                  * ultimate forms for the same reasons that other escapes
3245                  * aren't.  Instead, they are converted to the \N{U+...} form
3246                  * to get the value from the charnames that is in effect right
3247                  * now, while preserving the fact that it was a named character
3248                  * so that the regex compiler knows this */
3249
3250                 /* The structure of this section of code (besides checking for
3251                  * errors and upgrading to utf8) is:
3252                  *  Further disambiguate between the two meanings of \N, and if
3253                  *      not a charname, go process it elsewhere
3254                  *  If of form \N{U+...}, pass it through if a pattern;
3255                  *      otherwise convert to utf8
3256                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3257                  *  pattern; otherwise convert to utf8 */
3258
3259                 /* Here, s points to the 'N'; the test below is guaranteed to
3260                  * succeed if we are being called on a pattern as we already
3261                  * know from a test above that the next character is a '{'.
3262                  * On a non-pattern \N must mean 'named sequence, which
3263                  * requires braces */
3264                 s++;
3265                 if (*s != '{') {
3266                     yyerror("Missing braces on \\N{}");
3267                     continue;
3268                 }
3269                 s++;
3270
3271                 /* If there is no matching '}', it is an error. */
3272                 if (! (e = strchr(s, '}'))) {
3273                     if (! PL_lex_inpat) {
3274                         yyerror("Missing right brace on \\N{}");
3275                     } else {
3276                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3277                     }
3278                     continue;
3279                 }
3280
3281                 /* Here it looks like a named character */
3282
3283                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3284                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3285                                 | PERL_SCAN_DISALLOW_PREFIX;
3286                     STRLEN len;
3287
3288                     /* For \N{U+...}, the '...' is a unicode value even on
3289                      * EBCDIC machines */
3290                     s += 2;         /* Skip to next char after the 'U+' */
3291                     len = e - s;
3292                     uv = grok_hex(s, &len, &flags, NULL);
3293                     if (len == 0 || len != (STRLEN)(e - s)) {
3294                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3295                         s = e + 1;
3296                         continue;
3297                     }
3298
3299                     if (PL_lex_inpat) {
3300
3301                         /* On non-EBCDIC platforms, pass through to the regex
3302                          * compiler unchanged.  The reason we evaluated the
3303                          * number above is to make sure there wasn't a syntax
3304                          * error.  But on EBCDIC we convert to native so
3305                          * downstream code can continue to assume it's native
3306                          */
3307                         s -= 5;     /* Include the '\N{U+' */
3308 #ifdef EBCDIC
3309                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3310                                                                and the \0 */
3311                                     "\\N{U+%X}",
3312                                     (unsigned int) UNI_TO_NATIVE(uv));
3313 #else
3314                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3315                         d += e - s + 1;
3316 #endif
3317                     }
3318                     else {  /* Not a pattern: convert the hex to string */
3319
3320                          /* If destination is not in utf8, unconditionally
3321                           * recode it to be so.  This is because \N{} implies
3322                           * Unicode semantics, and scalars have to be in utf8
3323                           * to guarantee those semantics */
3324                         if (! has_utf8) {
3325                             SvCUR_set(sv, d - SvPVX_const(sv));
3326                             SvPOK_on(sv);
3327                             *d = '\0';
3328                             /* See Note on sizing above.  */
3329                             sv_utf8_upgrade_flags_grow(
3330                                         sv,
3331                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3332                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3333                             d = SvPVX(sv) + SvCUR(sv);
3334                             has_utf8 = TRUE;
3335                         }
3336
3337                         /* Add the (Unicode) code point to the output. */
3338                         if (UNI_IS_INVARIANT(uv)) {
3339                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3340                         }
3341                         else {
3342                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3343                         }
3344                     }
3345                 }
3346                 else /* Here is \N{NAME} but not \N{U+...}. */
3347                      if ((res = get_and_check_backslash_N_name(s, e)))
3348                 {
3349                     STRLEN len;
3350                     const char *str = SvPV_const(res, len);
3351                     if (PL_lex_inpat) {
3352
3353                         if (! len) { /* The name resolved to an empty string */
3354                             Copy("\\N{}", d, 4, char);
3355                             d += 4;
3356                         }
3357                         else {
3358                             /* In order to not lose information for the regex
3359                             * compiler, pass the result in the specially made
3360                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3361                             * the code points in hex of each character
3362                             * returned by charnames */
3363
3364                             const char *str_end = str + len;
3365                             const STRLEN off = d - SvPVX_const(sv);
3366
3367                             if (! SvUTF8(res)) {
3368                                 /* For the non-UTF-8 case, we can determine the
3369                                  * exact length needed without having to parse
3370                                  * through the string.  Each character takes up
3371                                  * 2 hex digits plus either a trailing dot or
3372                                  * the "}" */
3373                                 d = off + SvGROW(sv, off
3374                                                     + 3 * len
3375                                                     + 6 /* For the "\N{U+", and
3376                                                            trailing NUL */
3377                                                     + (STRLEN)(send - e));
3378                                 Copy("\\N{U+", d, 5, char);
3379                                 d += 5;
3380                                 while (str < str_end) {
3381                                     char hex_string[4];
3382                                     my_snprintf(hex_string, sizeof(hex_string),
3383                                                 "%02X.", (U8) *str);
3384                                     Copy(hex_string, d, 3, char);
3385                                     d += 3;
3386                                     str++;
3387                                 }
3388                                 d--;    /* We will overwrite below the final
3389                                            dot with a right brace */
3390                             }
3391                             else {
3392                                 STRLEN char_length; /* cur char's byte length */
3393
3394                                 /* and the number of bytes after this is
3395                                  * translated into hex digits */
3396                                 STRLEN output_length;
3397
3398                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3399                                  * for max('U+', '.'); and 1 for NUL */
3400                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3401
3402                                 /* Get the first character of the result. */
3403                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3404                                                         len,
3405                                                         &char_length,
3406                                                         UTF8_ALLOW_ANYUV);
3407                                 /* Convert first code point to hex, including
3408                                  * the boiler plate before it. */
3409                                 output_length =
3410                                     my_snprintf(hex_string, sizeof(hex_string),
3411                                                 "\\N{U+%X",
3412                                                 (unsigned int) uv);
3413
3414                                 /* Make sure there is enough space to hold it */
3415                                 d = off + SvGROW(sv, off
3416                                                     + output_length
3417                                                     + (STRLEN)(send - e)
3418                                                     + 2);       /* '}' + NUL */
3419                                 /* And output it */
3420                                 Copy(hex_string, d, output_length, char);
3421                                 d += output_length;
3422
3423                                 /* For each subsequent character, append dot and
3424                                 * its ordinal in hex */
3425                                 while ((str += char_length) < str_end) {
3426                                     const STRLEN off = d - SvPVX_const(sv);
3427                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3428                                                             str_end - str,
3429                                                             &char_length,
3430                                                             UTF8_ALLOW_ANYUV);
3431                                     output_length =
3432                                         my_snprintf(hex_string,
3433                                                     sizeof(hex_string),
3434                                                     ".%X",
3435                                                     (unsigned int) uv);
3436
3437                                     d = off + SvGROW(sv, off
3438                                                         + output_length
3439                                                         + (STRLEN)(send - e)
3440                                                         + 2);   /* '}' +  NUL */
3441                                     Copy(hex_string, d, output_length, char);
3442                                     d += output_length;
3443                                 }
3444                             }
3445
3446                             *d++ = '}'; /* Done.  Add the trailing brace */
3447                         }
3448                     }
3449                     else { /* Here, not in a pattern.  Convert the name to a
3450                             * string. */
3451
3452                          /* If destination is not in utf8, unconditionally
3453                           * recode it to be so.  This is because \N{} implies
3454                           * Unicode semantics, and scalars have to be in utf8
3455                           * to guarantee those semantics */
3456                         if (! has_utf8) {
3457                             SvCUR_set(sv, d - SvPVX_const(sv));
3458                             SvPOK_on(sv);
3459                             *d = '\0';
3460                             /* See Note on sizing above.  */
3461                             sv_utf8_upgrade_flags_grow(sv,
3462                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3463                                                 len + (STRLEN)(send - s) + 1);
3464                             d = SvPVX(sv) + SvCUR(sv);
3465                             has_utf8 = TRUE;
3466                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3467
3468                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3469                              * set correctly here). */
3470                             const STRLEN off = d - SvPVX_const(sv);
3471                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3472                         }
3473                         if (! SvUTF8(res)) {    /* Make sure is \N{} return is UTF-8 */
3474                             sv_utf8_upgrade(res);
3475                             str = SvPV_const(res, len);
3476                         }
3477                         Copy(str, d, len, char);
3478                         d += len;
3479                     }
3480
3481                     SvREFCNT_dec(res);
3482
3483                 } /* End \N{NAME} */
3484 #ifdef EBCDIC
3485                 if (!dorange)
3486                     native_range = FALSE; /* \N{} is defined to be Unicode */
3487 #endif
3488                 s = e + 1;  /* Point to just after the '}' */
3489                 continue;
3490
3491             /* \c is a control character */
3492             case 'c':
3493                 s++;
3494                 if (s < send) {
3495                     *d++ = grok_bslash_c(*s++, 1);
3496                 }
3497                 else {
3498                     yyerror("Missing control char name in \\c");
3499                 }
3500                 continue;
3501
3502             /* printf-style backslashes, formfeeds, newlines, etc */
3503             case 'b':
3504                 *d++ = '\b';
3505                 break;
3506             case 'n':
3507                 *d++ = '\n';
3508                 break;
3509             case 'r':
3510                 *d++ = '\r';
3511                 break;
3512             case 'f':
3513                 *d++ = '\f';
3514                 break;
3515             case 't':
3516                 *d++ = '\t';
3517                 break;
3518             case 'e':
3519                 *d++ = ASCII_TO_NATIVE('\033');
3520                 break;
3521             case 'a':
3522                 *d++ = '\a';
3523                 break;
3524             } /* end switch */
3525
3526             s++;
3527             continue;
3528         } /* end if (backslash) */
3529 #ifdef EBCDIC
3530         else
3531             literal_endpoint++;
3532 #endif
3533
3534     default_action:
3535         /* If we started with encoded form, or already know we want it,
3536            then encode the next character */
3537         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3538             STRLEN len  = 1;
3539
3540
3541             /* One might think that it is wasted effort in the case of the
3542              * source being utf8 (this_utf8 == TRUE) to take the next character
3543              * in the source, convert it to an unsigned value, and then convert
3544              * it back again.  But the source has not been validated here.  The
3545              * routine that does the conversion checks for errors like
3546              * malformed utf8 */
3547
3548             const UV nextuv   = (this_utf8)
3549                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3550                                 : (UV) ((U8) *s);
3551             const STRLEN need = UNISKIP(nextuv);
3552             if (!has_utf8) {
3553                 SvCUR_set(sv, d - SvPVX_const(sv));
3554                 SvPOK_on(sv);
3555                 *d = '\0';
3556                 /* See Note on sizing above.  */
3557                 sv_utf8_upgrade_flags_grow(sv,
3558                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3559                                         need + (STRLEN)(send - s) + 1);
3560                 d = SvPVX(sv) + SvCUR(sv);
3561                 has_utf8 = TRUE;
3562             } else if (need > len) {
3563                 /* encoded value larger than old, may need extra space (NOTE:
3564                  * SvCUR() is not set correctly here).   See Note on sizing
3565                  * above.  */
3566                 const STRLEN off = d - SvPVX_const(sv);
3567                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3568             }
3569             s += len;
3570
3571             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3572 #ifdef EBCDIC
3573             if (uv > 255 && !dorange)
3574                 native_range = FALSE;
3575 #endif
3576         }
3577         else {
3578             *d++ = *s++;
3579         }
3580     } /* while loop to process each character */
3581
3582     /* terminate the string and set up the sv */
3583     *d = '\0';
3584     SvCUR_set(sv, d - SvPVX_const(sv));
3585     if (SvCUR(sv) >= SvLEN(sv))
3586         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3587                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3588
3589     SvPOK_on(sv);
3590     if (PL_encoding && !has_utf8) {
3591         sv_recode_to_utf8(sv, PL_encoding);
3592         if (SvUTF8(sv))
3593             has_utf8 = TRUE;
3594     }
3595     if (has_utf8) {
3596         SvUTF8_on(sv);
3597         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3598             PL_sublex_info.sub_op->op_private |=
3599                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3600         }
3601     }
3602
3603     /* shrink the sv if we allocated more than we used */
3604     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3605         SvPV_shrink_to_cur(sv);
3606     }
3607
3608     /* return the substring (via pl_yylval) only if we parsed anything */
3609     if (s > start) {
3610         char *s2 = start;
3611         for (; s2 < s; s2++) {
3612             if (*s2 == '\n')
3613                 COPLINE_INC_WITH_HERELINES;
3614         }
3615         SvREFCNT_inc_simple_void_NN(sv);
3616         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3617             && ! PL_parser->lex_re_reparsing)
3618         {
3619             const char *const key = PL_lex_inpat ? "qr" : "q";
3620             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3621             const char *type;
3622             STRLEN typelen;
3623
3624             if (PL_lex_inwhat == OP_TRANS) {
3625                 type = "tr";
3626                 typelen = 2;
3627             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3628                 type = "s";
3629                 typelen = 1;
3630             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3631                 type = "q";
3632                 typelen = 1;
3633             } else  {
3634                 type = "qq";
3635                 typelen = 2;
3636             }
3637
3638             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3639                                 type, typelen);
3640         }
3641         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3642     }
3643     LEAVE_with_name("scan_const");
3644     return s;
3645 }
3646
3647 /* S_intuit_more
3648  * Returns TRUE if there's more to the expression (e.g., a subscript),
3649  * FALSE otherwise.
3650  *
3651  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3652  *
3653  * ->[ and ->{ return TRUE
3654  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3655  * { and [ outside a pattern are always subscripts, so return TRUE
3656  * if we're outside a pattern and it's not { or [, then return FALSE
3657  * if we're in a pattern and the first char is a {
3658  *   {4,5} (any digits around the comma) returns FALSE
3659  * if we're in a pattern and the first char is a [
3660  *   [] returns FALSE
3661  *   [SOMETHING] has a funky algorithm to decide whether it's a
3662  *      character class or not.  It has to deal with things like
3663  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3664  * anything else returns TRUE
3665  */
3666
3667 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3668
3669 STATIC int
3670 S_intuit_more(pTHX_ char *s)
3671 {
3672     dVAR;
3673
3674     PERL_ARGS_ASSERT_INTUIT_MORE;
3675
3676     if (PL_lex_brackets)
3677         return TRUE;
3678     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3679         return TRUE;
3680     if (*s == '-' && s[1] == '>'
3681      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3682      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3683         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3684         return TRUE;
3685     if (*s != '{' && *s != '[')
3686         return FALSE;
3687     if (!PL_lex_inpat)
3688         return TRUE;
3689
3690     /* In a pattern, so maybe we have {n,m}. */
3691     if (*s == '{') {
3692         if (regcurly(s)) {
3693             return FALSE;
3694         }
3695         return TRUE;
3696     }
3697
3698     /* On the other hand, maybe we have a character class */
3699
3700     s++;
3701     if (*s == ']' || *s == '^')
3702         return FALSE;
3703     else {
3704         /* this is terrifying, and it works */
3705         int weight;
3706         char seen[256];
3707         const char * const send = strchr(s,']');
3708         unsigned char un_char, last_un_char;
3709         char tmpbuf[sizeof PL_tokenbuf * 4];
3710
3711         if (!send)              /* has to be an expression */
3712             return TRUE;
3713         weight = 2;             /* let's weigh the evidence */
3714
3715         if (*s == '$')
3716             weight -= 3;
3717         else if (isDIGIT(*s)) {
3718             if (s[1] != ']') {
3719                 if (isDIGIT(s[1]) && s[2] == ']')
3720                     weight -= 10;
3721             }
3722             else
3723                 weight -= 100;
3724         }
3725         Zero(seen,256,char);
3726         un_char = 255;
3727         for (; s < send; s++) {
3728             last_un_char = un_char;
3729             un_char = (unsigned char)*s;
3730             switch (*s) {
3731             case '@':
3732             case '&':
3733             case '$':
3734                 weight -= seen[un_char] * 10;
3735                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3736                     int len;
3737                     char *tmp = PL_bufend;
3738                     PL_bufend = (char*)send;
3739                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3740                     PL_bufend = tmp;
3741                     len = (int)strlen(tmpbuf);
3742                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3743                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3744                         weight -= 100;
3745                     else
3746                         weight -= 10;
3747                 }
3748                 else if (*s == '$' && s[1] &&
3749                   strchr("[#!%*<>()-=",s[1])) {
3750                     if (/*{*/ strchr("])} =",s[2]))
3751                         weight -= 10;
3752                     else
3753                         weight -= 1;
3754                 }
3755                 break;
3756             case '\\':
3757                 un_char = 254;
3758                 if (s[1]) {
3759                     if (strchr("wds]",s[1]))
3760                         weight += 100;
3761                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3762                         weight += 1;
3763                     else if (strchr("rnftbxcav",s[1]))
3764                         weight += 40;
3765                     else if (isDIGIT(s[1])) {
3766                         weight += 40;
3767                         while (s[1] && isDIGIT(s[1]))
3768                             s++;
3769                     }
3770                 }
3771                 else
3772                     weight += 100;
3773                 break;
3774             case '-':
3775                 if (s[1] == '\\')
3776                     weight += 50;
3777                 if (strchr("aA01! ",last_un_char))
3778                     weight += 30;
3779                 if (strchr("zZ79~",s[1]))
3780                     weight += 30;
3781                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3782                     weight -= 5;        /* cope with negative subscript */
3783                 break;
3784             default:
3785                 if (!isWORDCHAR(last_un_char)
3786                     && !(last_un_char == '$' || last_un_char == '@'
3787                          || last_un_char == '&')
3788                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3789                     char *d = tmpbuf;
3790                     while (isALPHA(*s))
3791                         *d++ = *s++;
3792                     *d = '\0';
3793                     if (keyword(tmpbuf, d - tmpbuf, 0))
3794                         weight -= 150;
3795                 }
3796                 if (un_char == last_un_char + 1)
3797                     weight += 5;
3798                 weight -= seen[un_char];
3799                 break;
3800             }
3801             seen[un_char]++;
3802         }
3803         if (weight >= 0)        /* probably a character class */
3804             return FALSE;
3805     }
3806
3807     return TRUE;
3808 }
3809
3810 /*
3811  * S_intuit_method
3812  *
3813  * Does all the checking to disambiguate
3814  *   foo bar
3815  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3816  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3817  *
3818  * First argument is the stuff after the first token, e.g. "bar".
3819  *
3820  * Not a method if foo is a filehandle.
3821  * Not a method if foo is a subroutine prototyped to take a filehandle.
3822  * Not a method if it's really "Foo $bar"
3823  * Method if it's "foo $bar"
3824  * Not a method if it's really "print foo $bar"
3825  * Method if it's really "foo package::" (interpreted as package->foo)
3826  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3827  * Not a method if bar is a filehandle or package, but is quoted with
3828  *   =>
3829  */
3830
3831 STATIC int
3832 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3833 {
3834     dVAR;
3835     char *s = start + (*start == '$');
3836     char tmpbuf[sizeof PL_tokenbuf];
3837     STRLEN len;
3838     GV* indirgv;
3839
3840     PERL_ARGS_ASSERT_INTUIT_METHOD;
3841
3842     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3843             return 0;
3844     if (cv && SvPOK(cv)) {
3845         const char *proto = CvPROTO(cv);
3846         if (proto) {
3847             while (*proto && (isSPACE(*proto) || *proto == ';'))
3848                 proto++;
3849             if (*proto == '*')
3850                 return 0;
3851         }
3852     }
3853
3854     if (*start == '$') {
3855         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3856                 isUPPER(*PL_tokenbuf))
3857             return 0;
3858         s = PEEKSPACE(s);
3859         PL_bufptr = start;
3860         PL_expect = XREF;
3861         return *s == '(' ? FUNCMETH : METHOD;
3862     }
3863
3864     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3865     /* start is the beginning of the possible filehandle/object,
3866      * and s is the end of it
3867      * tmpbuf is a copy of it (but with single quotes as double colons)
3868      */
3869
3870     if (!keyword(tmpbuf, len, 0)) {
3871         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3872             len -= 2;
3873             tmpbuf[len] = '\0';
3874             goto bare_package;
3875         }
3876         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3877         if (indirgv && GvCVu(indirgv))
3878             return 0;
3879         /* filehandle or package name makes it a method */
3880         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3881             s = PEEKSPACE(s);
3882             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3883                 return 0;       /* no assumptions -- "=>" quotes bareword */
3884       bare_package:
3885             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3886                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3887             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3888             PL_expect = XTERM;
3889             force_next(WORD);
3890             PL_bufptr = s;
3891             return *s == '(' ? FUNCMETH : METHOD;
3892         }
3893     }
3894     return 0;
3895 }
3896
3897 /* Encoded script support. filter_add() effectively inserts a
3898  * 'pre-processing' function into the current source input stream.
3899  * Note that the filter function only applies to the current source file
3900  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3901  *
3902  * The datasv parameter (which may be NULL) can be used to pass
3903  * private data to this instance of the filter. The filter function
3904  * can recover the SV using the FILTER_DATA macro and use it to
3905  * store private buffers and state information.
3906  *
3907  * The supplied datasv parameter is upgraded to a PVIO type
3908  * and the IoDIRP/IoANY field is used to store the function pointer,
3909  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3910  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3911  * private use must be set using malloc'd pointers.
3912  */
3913
3914 SV *
3915 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3916 {
3917     dVAR;
3918     if (!funcp)
3919         return NULL;
3920
3921     if (!PL_parser)
3922         return NULL;
3923
3924     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3925         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3926
3927     if (!PL_rsfp_filters)
3928         PL_rsfp_filters = newAV();
3929     if (!datasv)
3930         datasv = newSV(0);
3931     SvUPGRADE(datasv, SVt_PVIO);
3932     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3933     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3934     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3935                           FPTR2DPTR(void *, IoANY(datasv)),
3936                           SvPV_nolen(datasv)));
3937     av_unshift(PL_rsfp_filters, 1);
3938     av_store(PL_rsfp_filters, 0, datasv) ;
3939     if (
3940         !PL_parser->filtered
3941      && PL_parser->lex_flags & LEX_EVALBYTES
3942      && PL_bufptr < PL_bufend
3943     ) {
3944         const char *s = PL_bufptr;
3945         while (s < PL_bufend) {
3946             if (*s == '\n') {
3947                 SV *linestr = PL_parser->linestr;
3948                 char *buf = SvPVX(linestr);
3949                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3950                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3951                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3952                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3953                 STRLEN const last_uni_pos =
3954                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3955                 STRLEN const last_lop_pos =
3956                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3957                 av_push(PL_rsfp_filters, linestr);
3958                 PL_parser->linestr =
3959                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3960                 buf = SvPVX(PL_parser->linestr);
3961                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3962                 PL_parser->bufptr = buf + bufptr_pos;
3963                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3964                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3965                 PL_parser->linestart = buf + linestart_pos;
3966                 if (PL_parser->last_uni)
3967                     PL_parser->last_uni = buf + last_uni_pos;
3968                 if (PL_parser->last_lop)
3969                     PL_parser->last_lop = buf + last_lop_pos;
3970                 SvLEN(linestr) = SvCUR(linestr);
3971                 SvCUR(linestr) = s-SvPVX(linestr);
3972                 PL_parser->filtered = 1;
3973                 break;
3974             }
3975             s++;
3976         }
3977     }
3978     return(datasv);
3979 }
3980
3981
3982 /* Delete most recently added instance of this filter function. */
3983 void
3984 Perl_filter_del(pTHX_ filter_t funcp)
3985 {
3986     dVAR;
3987     SV *datasv;
3988
3989     PERL_ARGS_ASSERT_FILTER_DEL;
3990
3991 #ifdef DEBUGGING
3992     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3993                           FPTR2DPTR(void*, funcp)));
3994 #endif
3995     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3996         return;
3997     /* if filter is on top of stack (usual case) just pop it off */
3998     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3999     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4000         sv_free(av_pop(PL_rsfp_filters));
4001
4002         return;
4003     }
4004     /* we need to search for the correct entry and clear it     */
4005     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4006 }
4007
4008
4009 /* Invoke the idxth filter function for the current rsfp.        */
4010 /* maxlen 0 = read one text line */
4011 I32
4012 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4013 {
4014     dVAR;
4015     filter_t funcp;
4016     SV *datasv = NULL;
4017     /* This API is bad. It should have been using unsigned int for maxlen.
4018        Not sure if we want to change the API, but if not we should sanity
4019        check the value here.  */
4020     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4021
4022     PERL_ARGS_ASSERT_FILTER_READ;
4023
4024     if (!PL_parser || !PL_rsfp_filters)
4025         return -1;
4026     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4027         /* Provide a default input filter to make life easy.    */
4028         /* Note that we append to the line. This is handy.      */
4029         DEBUG_P(PerlIO_printf(Perl_debug_log,
4030                               "filter_read %d: from rsfp\n", idx));
4031         if (correct_length) {
4032             /* Want a block */
4033             int len ;
4034             const int old_len = SvCUR(buf_sv);
4035
4036             /* ensure buf_sv is large enough */
4037             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4038             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4039                                    correct_length)) <= 0) {
4040                 if (PerlIO_error(PL_rsfp))
4041                     return -1;          /* error */
4042                 else
4043                     return 0 ;          /* end of file */
4044             }
4045             SvCUR_set(buf_sv, old_len + len) ;
4046             SvPVX(buf_sv)[old_len + len] = '\0';
4047         } else {
4048             /* Want a line */
4049             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4050                 if (PerlIO_error(PL_rsfp))
4051                     return -1;          /* error */
4052                 else
4053                     return 0 ;          /* end of file */
4054             }
4055         }
4056         return SvCUR(buf_sv);
4057     }
4058     /* Skip this filter slot if filter has been deleted */
4059     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4060         DEBUG_P(PerlIO_printf(Perl_debug_log,
4061                               "filter_read %d: skipped (filter deleted)\n",
4062                               idx));
4063         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4064     }
4065     if (SvTYPE(datasv) != SVt_PVIO) {
4066         if (correct_length) {
4067             /* Want a block */
4068             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4069             if (!remainder) return 0; /* eof */
4070             if (correct_length > remainder) correct_length = remainder;
4071             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4072             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4073         } else {
4074             /* Want a line */
4075             const char *s = SvEND(datasv);
4076             const char *send = SvPVX(datasv) + SvLEN(datasv);
4077             while (s < send) {
4078                 if (*s == '\n') {
4079                     s++;
4080                     break;
4081                 }
4082                 s++;
4083             }
4084             if (s == send) return 0; /* eof */
4085             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4086             SvCUR_set(datasv, s-SvPVX(datasv));
4087         }
4088         return SvCUR(buf_sv);
4089     }
4090     /* Get function pointer hidden within datasv        */
4091     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4092     DEBUG_P(PerlIO_printf(Perl_debug_log,
4093                           "filter_read %d: via function %p (%s)\n",
4094                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4095     /* Call function. The function is expected to       */
4096     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4097     /* Return: <0:error, =0:eof, >0:not eof             */
4098     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4099 }
4100
4101 STATIC char *
4102 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4103 {
4104     dVAR;
4105
4106     PERL_ARGS_ASSERT_FILTER_GETS;
4107
4108 #ifdef PERL_CR_FILTER
4109     if (!PL_rsfp_filters) {
4110         filter_add(S_cr_textfilter,NULL);
4111     }
4112 #endif
4113     if (PL_rsfp_filters) {
4114         if (!append)
4115             SvCUR_set(sv, 0);   /* start with empty line        */
4116         if (FILTER_READ(0, sv, 0) > 0)
4117             return ( SvPVX(sv) ) ;
4118         else
4119             return NULL ;
4120     }
4121     else
4122         return (sv_gets(sv, PL_rsfp, append));
4123 }
4124
4125 STATIC HV *
4126 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4127 {
4128     dVAR;
4129     GV *gv;
4130
4131     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4132
4133     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4134         return PL_curstash;
4135
4136     if (len > 2 &&
4137         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4138         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4139     {
4140         return GvHV(gv);                        /* Foo:: */
4141     }
4142
4143     /* use constant CLASS => 'MyClass' */
4144     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4145     if (gv && GvCV(gv)) {
4146         SV * const sv = cv_const_sv(GvCV(gv));
4147         if (sv)
4148             pkgname = SvPV_const(sv, len);
4149     }
4150
4151     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4152 }
4153
4154
4155 STATIC char *
4156 S_tokenize_use(pTHX_ int is_use, char *s) {
4157     dVAR;
4158
4159     PERL_ARGS_ASSERT_TOKENIZE_USE;
4160
4161     if (PL_expect != XSTATE)
4162         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4163                     is_use ? "use" : "no"));
4164     PL_expect = XTERM;
4165     s = SKIPSPACE1(s);
4166     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4167         s = force_version(s, TRUE);
4168         if (*s == ';' || *s == '}'
4169                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4170             NEXTVAL_NEXTTOKE.opval = NULL;
4171             force_next(WORD);
4172         }
4173         else if (*s == 'v') {
4174             s = force_word(s,WORD,FALSE,TRUE);
4175             s = force_version(s, FALSE);
4176         }
4177     }
4178     else {
4179         s = force_word(s,WORD,FALSE,TRUE);
4180         s = force_version(s, FALSE);
4181     }
4182     pl_yylval.ival = is_use;
4183     return s;
4184 }
4185 #ifdef DEBUGGING
4186     static const char* const exp_name[] =
4187         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4188           "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4189         };
4190 #endif
4191
4192 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4193 STATIC bool
4194 S_word_takes_any_delimeter(char *p, STRLEN len)
4195 {
4196     return (len == 1 && strchr("msyq", p[0])) ||
4197            (len == 2 && (
4198             (p[0] == 't' && p[1] == 'r') ||
4199             (p[0] == 'q' && strchr("qwxr", p[1]))));
4200 }
4201
4202 static void
4203 S_check_scalar_slice(pTHX_ char *s)
4204 {
4205     s++;
4206     while (*s == ' ' || *s == '\t') s++;
4207     if (*s == 'q' && s[1] == 'w'
4208      && !isWORDCHAR_lazy_if(s+2,UTF))
4209         return;
4210     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4211         s += UTF ? UTF8SKIP(s) : 1;
4212     if (*s == '}' || *s == ']')
4213         pl_yylval.ival = OPpSLICEWARNING;
4214 }
4215
4216 /*
4217   yylex
4218
4219   Works out what to call the token just pulled out of the input
4220   stream.  The yacc parser takes care of taking the ops we return and
4221   stitching them into a tree.
4222
4223   Returns:
4224     The type of the next token
4225
4226   Structure:
4227       Switch based on the current state:
4228           - if we already built the token before, use it
4229           - if we have a case modifier in a string, deal with that
4230           - handle other cases of interpolation inside a string
4231           - scan the next line if we are inside a format
4232       In the normal state switch on the next character:
4233           - default:
4234             if alphabetic, go to key lookup
4235             unrecoginized character - croak
4236           - 0/4/26: handle end-of-line or EOF
4237           - cases for whitespace
4238           - \n and #: handle comments and line numbers
4239           - various operators, brackets and sigils
4240           - numbers
4241           - quotes
4242           - 'v': vstrings (or go to key lookup)
4243           - 'x' repetition operator (or go to key lookup)
4244           - other ASCII alphanumerics (key lookup begins here):
4245               word before => ?
4246               keyword plugin
4247               scan built-in keyword (but do nothing with it yet)
4248               check for statement label
4249               check for lexical subs
4250                   goto just_a_word if there is one
4251               see whether built-in keyword is overridden
4252               switch on keyword number:
4253                   - default: just_a_word:
4254                       not a built-in keyword; handle bareword lookup
4255                       disambiguate between method and sub call
4256                       fall back to bareword
4257                   - cases for built-in keywords
4258 */
4259
4260
4261 int
4262 Perl_yylex(pTHX)
4263 {
4264     dVAR;
4265     char *s = PL_bufptr;
4266     char *d;
4267     STRLEN len;
4268     bool bof = FALSE;
4269     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4270     U8 formbrack = 0;
4271     U32 fake_eof = 0;
4272
4273     /* orig_keyword, gvp, and gv are initialized here because
4274      * jump to the label just_a_word_zero can bypass their
4275      * initialization later. */
4276     I32 orig_keyword = 0;
4277     GV *gv = NULL;
4278     GV **gvp = NULL;
4279
4280     DEBUG_T( {
4281         SV* tmp = newSVpvs("");
4282         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4283             (IV)CopLINE(PL_curcop),
4284             lex_state_names[PL_lex_state],
4285             exp_name[PL_expect],
4286             pv_display(tmp, s, strlen(s), 0, 60));
4287         SvREFCNT_dec(tmp);
4288     } );
4289
4290     switch (PL_lex_state) {
4291     case LEX_NORMAL:
4292     case LEX_INTERPNORMAL:
4293         break;
4294
4295     /* when we've already built the next token, just pull it out of the queue */
4296     case LEX_KNOWNEXT:
4297         PL_nexttoke--;
4298         pl_yylval = PL_nextval[PL_nexttoke];
4299         if (!PL_nexttoke) {
4300             PL_lex_state = PL_lex_defer;
4301             PL_expect = PL_lex_expect;
4302             PL_lex_defer = LEX_NORMAL;
4303         }
4304         {
4305             I32 next_type;
4306             next_type = PL_nexttype[PL_nexttoke];
4307             if (next_type & (7<<24)) {
4308                 if (next_type & (1<<24)) {
4309                     if (PL_lex_brackets > 100)
4310                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4311                     PL_lex_brackstack[PL_lex_brackets++] =
4312                         (char) ((next_type >> 16) & 0xff);
4313                 }
4314                 if (next_type & (2<<24))
4315                     PL_lex_allbrackets++;
4316                 if (next_type & (4<<24))
4317                     PL_lex_allbrackets--;
4318                 next_type &= 0xffff;
4319             }
4320             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4321         }
4322
4323     /* interpolated case modifiers like \L \U, including \Q and \E.
4324        when we get here, PL_bufptr is at the \
4325     */
4326     case LEX_INTERPCASEMOD:
4327 #ifdef DEBUGGING
4328         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4329             Perl_croak(aTHX_
4330                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4331                        PL_bufptr, PL_bufend, *PL_bufptr);
4332 #endif
4333         /* handle \E or end of string */
4334         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4335             /* if at a \E */
4336             if (PL_lex_casemods) {
4337                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4338                 PL_lex_casestack[PL_lex_casemods] = '\0';
4339
4340                 if (PL_bufptr != PL_bufend
4341                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4342                         || oldmod == 'F')) {
4343                     PL_bufptr += 2;
4344                     PL_lex_state = LEX_INTERPCONCAT;
4345                 }
4346                 PL_lex_allbrackets--;
4347                 return REPORT(')');
4348             }
4349             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4350                /* Got an unpaired \E */
4351                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4352                         "Useless use of \\E");
4353             }
4354             if (PL_bufptr != PL_bufend)
4355                 PL_bufptr += 2;
4356             PL_lex_state = LEX_INTERPCONCAT;
4357             return yylex();
4358         }
4359         else {
4360             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4361               "### Saw case modifier\n"); });
4362             s = PL_bufptr + 1;
4363             if (s[1] == '\\' && s[2] == 'E') {
4364                 PL_bufptr = s + 3;
4365                 PL_lex_state = LEX_INTERPCONCAT;
4366                 return yylex();
4367             }
4368             else {
4369                 I32 tmp;
4370                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4371                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4372                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4373                     (strchr(PL_lex_casestack, 'L')
4374                         || strchr(PL_lex_casestack, 'U')
4375                         || strchr(PL_lex_casestack, 'F'))) {
4376                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4377                     PL_lex_allbrackets--;
4378                     return REPORT(')');
4379                 }
4380                 if (PL_lex_casemods > 10)
4381                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4382                 PL_lex_casestack[PL_lex_casemods++] = *s;
4383                 PL_lex_casestack[PL_lex_casemods] = '\0';
4384                 PL_lex_state = LEX_INTERPCONCAT;
4385                 NEXTVAL_NEXTTOKE.ival = 0;
4386                 force_next((2<<24)|'(');
4387                 if (*s == 'l')
4388                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4389                 else if (*s == 'u')
4390                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4391                 else if (*s == 'L')
4392                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4393                 else if (*s == 'U')
4394                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4395                 else if (*s == 'Q')
4396                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4397                 else if (*s == 'F')
4398                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4399                 else
4400                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4401                 PL_bufptr = s + 1;
4402             }
4403             force_next(FUNC);
4404             if (PL_lex_starts) {
4405                 s = PL_bufptr;
4406                 PL_lex_starts = 0;
4407                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4408                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4409                     OPERATOR(',');
4410                 else
4411                     Aop(OP_CONCAT);
4412             }
4413             else
4414                 return yylex();
4415         }
4416
4417     case LEX_INTERPPUSH:
4418         return REPORT(sublex_push());
4419
4420     case LEX_INTERPSTART:
4421         if (PL_bufptr == PL_bufend)
4422             return REPORT(sublex_done());
4423         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4424               "### Interpolated variable\n"); });
4425         PL_expect = XTERM;
4426         /* for /@a/, we leave the joining for the regex engine to do
4427          * (unless we're within \Q etc) */
4428         PL_lex_dojoin = (*PL_bufptr == '@'
4429                             && (!PL_lex_inpat || PL_lex_casemods));
4430         PL_lex_state = LEX_INTERPNORMAL;
4431         if (PL_lex_dojoin) {
4432             NEXTVAL_NEXTTOKE.ival = 0;
4433             force_next(',');
4434             force_ident("\"", '$');
4435             NEXTVAL_NEXTTOKE.ival = 0;
4436             force_next('$');
4437             NEXTVAL_NEXTTOKE.ival = 0;
4438             force_next((2<<24)|'(');
4439             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4440             force_next(FUNC);
4441         }
4442         /* Convert (?{...}) and friends to 'do {...}' */
4443         if (PL_lex_inpat && *PL_bufptr == '(') {
4444             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4445             PL_bufptr += 2;
4446             if (*PL_bufptr != '{')
4447                 PL_bufptr++;
4448             PL_expect = XTERMBLOCK;
4449             force_next(DO);
4450         }
4451
4452         if (PL_lex_starts++) {
4453             s = PL_bufptr;
4454             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4455             if (!PL_lex_casemods && PL_lex_inpat)
4456                 OPERATOR(',');
4457             else
4458                 Aop(OP_CONCAT);
4459         }
4460         return yylex();
4461
4462     case LEX_INTERPENDMAYBE:
4463         if (intuit_more(PL_bufptr)) {
4464             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4465             break;
4466         }
4467         /* FALLTHROUGH */
4468
4469     case LEX_INTERPEND:
4470         if (PL_lex_dojoin) {
4471             const U8 dojoin_was = PL_lex_dojoin;
4472             PL_lex_dojoin = FALSE;
4473             PL_lex_state = LEX_INTERPCONCAT;
4474             PL_lex_allbrackets--;
4475             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4476         }
4477         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4478             && SvEVALED(PL_lex_repl))
4479         {
4480             if (PL_bufptr != PL_bufend)
4481                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4482             PL_lex_repl = NULL;
4483         }
4484         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4485            re_eval_str.  If the here-doc body’s length equals the previous
4486            value of re_eval_start, re_eval_start will now be null.  So
4487            check re_eval_str as well. */
4488         if (PL_parser->lex_shared->re_eval_start
4489          || PL_parser->lex_shared->re_eval_str) {
4490             SV *sv;
4491             if (*PL_bufptr != ')')
4492                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4493             PL_bufptr++;
4494             /* having compiled a (?{..}) expression, return the original
4495              * text too, as a const */
4496             if (PL_parser->lex_shared->re_eval_str) {
4497                 sv = PL_parser->lex_shared->re_eval_str;
4498                 PL_parser->lex_shared->re_eval_str = NULL;
4499                 SvCUR_set(sv,
4500                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4501                 SvPV_shrink_to_cur(sv);
4502             }
4503             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4504                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4505             NEXTVAL_NEXTTOKE.opval =
4506                     (OP*)newSVOP(OP_CONST, 0,
4507                                  sv);
4508             force_next(THING);
4509             PL_parser->lex_shared->re_eval_start = NULL;
4510             PL_expect = XTERM;
4511             return REPORT(',');
4512         }
4513
4514         /* FALLTHROUGH */
4515     case LEX_INTERPCONCAT:
4516 #ifdef DEBUGGING
4517         if (PL_lex_brackets)
4518             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4519                        (long) PL_lex_brackets);
4520 #endif
4521         if (PL_bufptr == PL_bufend)
4522             return REPORT(sublex_done());
4523
4524         /* m'foo' still needs to be parsed for possible (?{...}) */
4525         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4526             SV *sv = newSVsv(PL_linestr);
4527             sv = tokeq(sv);
4528             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4529             s = PL_bufend;
4530         }
4531         else {
4532             s = scan_const(PL_bufptr);
4533             if (*s == '\\')
4534                 PL_lex_state = LEX_INTERPCASEMOD;
4535             else
4536                 PL_lex_state = LEX_INTERPSTART;
4537         }
4538
4539         if (s != PL_bufptr) {
4540             NEXTVAL_NEXTTOKE = pl_yylval;
4541             PL_expect = XTERM;
4542             force_next(THING);
4543             if (PL_lex_starts++) {
4544                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4545                 if (!PL_lex_casemods && PL_lex_inpat)
4546                     OPERATOR(',');
4547                 else
4548                     Aop(OP_CONCAT);
4549             }
4550             else {
4551                 PL_bufptr = s;
4552                 return yylex();
4553             }
4554         }
4555
4556         return yylex();
4557     case LEX_FORMLINE:
4558         s = scan_formline(PL_bufptr);
4559         if (!PL_lex_formbrack)
4560         {
4561             formbrack = 1;
4562             goto rightbracket;
4563         }
4564         PL_bufptr = s;
4565         return yylex();
4566     }
4567
4568     /* We really do *not* want PL_linestr ever becoming a COW. */
4569     assert (!SvIsCOW(PL_linestr));
4570     s = PL_bufptr;
4571     PL_oldoldbufptr = PL_oldbufptr;
4572     PL_oldbufptr = s;
4573     PL_parser->saw_infix_sigil = 0;
4574
4575   retry:
4576     switch (*s) {
4577     default:
4578         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4579             goto keylookup;
4580         {
4581         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4582         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4583                                                     UTF8SKIP(s),
4584                                                     SVs_TEMP | SVf_UTF8),
4585                                             10, UNI_DISPLAY_ISPRINT)
4586                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4587         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4588         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4589             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4590         } else {
4591             d = PL_linestart;
4592         }
4593         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4594                           UTF8fARG(UTF, (s - d), d),
4595                          (int) len + 1);
4596     }
4597     case 4:
4598     case 26:
4599         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4600     case 0:
4601         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4602             PL_last_uni = 0;
4603             PL_last_lop = 0;
4604             if (PL_lex_brackets &&
4605                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4606                 yyerror((const char *)
4607                         (PL_lex_formbrack
4608                          ? "Format not terminated"
4609                          : "Missing right curly or square bracket"));
4610             }
4611             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4612                         "### Tokener got EOF\n");
4613             } );
4614             TOKEN(0);
4615         }
4616         if (s++ < PL_bufend)
4617             goto retry;                 /* ignore stray nulls */
4618         PL_last_uni = 0;
4619         PL_last_lop = 0;
4620         if (!PL_in_eval && !PL_preambled) {
4621             PL_preambled = TRUE;
4622             if (PL_perldb) {
4623                 /* Generate a string of Perl code to load the debugger.
4624                  * If PERL5DB is set, it will return the contents of that,
4625                  * otherwise a compile-time require of perl5db.pl.  */
4626
4627                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4628
4629                 if (pdb) {
4630                     sv_setpv(PL_linestr, pdb);
4631                     sv_catpvs(PL_linestr,";");
4632                 } else {
4633                     SETERRNO(0,SS_NORMAL);
4634                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4635                 }
4636                 PL_parser->preambling = CopLINE(PL_curcop);
4637             } else
4638                 sv_setpvs(PL_linestr,"");
4639             if (PL_preambleav) {
4640                 SV **svp = AvARRAY(PL_preambleav);
4641                 SV **const end = svp + AvFILLp(PL_preambleav);
4642                 while(svp <= end) {
4643                     sv_catsv(PL_linestr, *svp);
4644                     ++svp;
4645                     sv_catpvs(PL_linestr, ";");
4646                 }
4647                 sv_free(MUTABLE_SV(PL_preambleav));
4648                 PL_preambleav = NULL;
4649             }
4650             if (PL_minus_E)
4651                 sv_catpvs(PL_linestr,
4652                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4653             if (PL_minus_n || PL_minus_p) {
4654                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4655                 if (PL_minus_l)
4656                     sv_catpvs(PL_linestr,"chomp;");
4657                 if (PL_minus_a) {
4658                     if (PL_minus_F) {
4659                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4660                              || *PL_splitstr == '"')
4661                               && strchr(PL_splitstr + 1, *PL_splitstr))
4662                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4663                         else {
4664                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4665                                bytes can be used as quoting characters.  :-) */
4666                             const char *splits = PL_splitstr;
4667                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4668                             do {
4669                                 /* Need to \ \s  */
4670                                 if (*splits == '\\')
4671                                     sv_catpvn(PL_linestr, splits, 1);
4672                                 sv_catpvn(PL_linestr, splits, 1);
4673                             } while (*splits++);
4674                             /* This loop will embed the trailing NUL of
4675                                PL_linestr as the last thing it does before
4676                                terminating.  */
4677                             sv_catpvs(PL_linestr, ");");
4678                         }
4679                     }
4680                     else
4681                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4682                 }
4683             }
4684             sv_catpvs(PL_linestr, "\n");
4685             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4686             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4687             PL_last_lop = PL_last_uni = NULL;
4688             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4689                 update_debugger_info(PL_linestr, NULL, 0);
4690             goto retry;
4691         }
4692         do {
4693             fake_eof = 0;
4694             bof = PL_rsfp ? TRUE : FALSE;
4695             if (0) {
4696               fake_eof:
4697                 fake_eof = LEX_FAKE_EOF;
4698             }
4699             PL_bufptr = PL_bufend;
4700             COPLINE_INC_WITH_HERELINES;
4701             if (!lex_next_chunk(fake_eof)) {
4702                 CopLINE_dec(PL_curcop);
4703                 s = PL_bufptr;
4704                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4705             }
4706             CopLINE_dec(PL_curcop);
4707             s = PL_bufptr;
4708             /* If it looks like the start of a BOM or raw UTF-16,
4709              * check if it in fact is. */
4710             if (bof && PL_rsfp &&
4711                      (*s == 0 ||
4712                       *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4713                       *(U8*)s >= 0xFE ||
4714                       s[1] == 0)) {
4715                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4716                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4717 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4718                 /* offset may include swallowed CR */
4719                 if (!bof)
4720                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4721 #endif
4722                 if (bof) {
4723                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4724                     s = swallow_bom((U8*)s);
4725                 }
4726             }
4727             if (PL_parser->in_pod) {
4728                 /* Incest with pod. */
4729                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4730                     sv_setpvs(PL_linestr, "");
4731                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4732                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4733                     PL_last_lop = PL_last_uni = NULL;
4734                     PL_parser->in_pod = 0;
4735                 }
4736             }
4737             if (PL_rsfp || PL_parser->filtered)
4738                 incline(s);
4739         } while (PL_parser->in_pod);
4740         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4741         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4742         PL_last_lop = PL_last_uni = NULL;
4743         if (CopLINE(PL_curcop) == 1) {
4744             while (s < PL_bufend && isSPACE(*s))
4745                 s++;
4746             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4747                 s++;
4748             d = NULL;
4749             if (!PL_in_eval) {
4750                 if (*s == '#' && *(s+1) == '!')
4751                     d = s + 2;
4752 #ifdef ALTERNATE_SHEBANG
4753                 else {
4754                     static char const as[] = ALTERNATE_SHEBANG;
4755                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4756                         d = s + (sizeof(as) - 1);
4757                 }
4758 #endif /* ALTERNATE_SHEBANG */
4759             }
4760             if (d) {
4761                 char *ipath;
4762                 char *ipathend;
4763
4764                 while (isSPACE(*d))
4765                     d++;
4766                 ipath = d;
4767                 while (*d && !isSPACE(*d))
4768                     d++;
4769                 ipathend = d;
4770
4771 #ifdef ARG_ZERO_IS_SCRIPT
4772                 if (ipathend > ipath) {
4773                     /*
4774                      * HP-UX (at least) sets argv[0] to the script name,
4775                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4776                      * at least, set argv[0] to the basename of the Perl
4777                      * interpreter. So, having found "#!", we'll set it right.
4778                      */
4779                     SV* copfilesv = CopFILESV(PL_curcop);
4780                     if (copfilesv) {
4781                         SV * const x =
4782                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4783                                              SVt_PV)); /* $^X */
4784                         assert(SvPOK(x) || SvGMAGICAL(x));
4785                         if (sv_eq(x, copfilesv)) {
4786                             sv_setpvn(x, ipath, ipathend - ipath);
4787                             SvSETMAGIC(x);
4788                         }
4789                         else {
4790                             STRLEN blen;
4791                             STRLEN llen;
4792                             const char *bstart = SvPV_const(copfilesv, blen);
4793                             const char * const lstart = SvPV_const(x, llen);
4794                             if (llen < blen) {
4795                                 bstart += blen - llen;
4796                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
4797                                     sv_setpvn(x, ipath, ipathend - ipath);
4798                                     SvSETMAGIC(x);
4799                                 }
4800                             }
4801                         }
4802                     }
4803                     else {
4804                         /* Anything to do if no copfilesv? */
4805                     }
4806                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4807                 }
4808 #endif /* ARG_ZERO_IS_SCRIPT */
4809
4810                 /*
4811                  * Look for options.
4812                  */
4813                 d = instr(s,"perl -");
4814                 if (!d) {
4815                     d = instr(s,"perl");
4816 #if defined(DOSISH)
4817                     /* avoid getting into infinite loops when shebang
4818                      * line contains "Perl" rather than "perl" */
4819                     if (!d) {
4820                         for (d = ipathend-4; d >= ipath; --d) {
4821                             if ((*d == 'p' || *d == 'P')
4822                                 && !ibcmp(d, "perl", 4))
4823                             {
4824                                 break;
4825                             }
4826                         }
4827                         if (d < ipath)
4828                             d = NULL;
4829                     }
4830 #endif
4831                 }
4832 #ifdef ALTERNATE_SHEBANG
4833                 /*
4834                  * If the ALTERNATE_SHEBANG on this system starts with a
4835                  * character that can be part of a Perl expression, then if
4836                  * we see it but not "perl", we're probably looking at the
4837                  * start of Perl code, not a request to hand off to some
4838                  * other interpreter.  Similarly, if "perl" is there, but
4839                  * not in the first 'word' of the line, we assume the line
4840                  * contains the start of the Perl program.
4841                  */
4842                 if (d && *s != '#') {
4843                     const char *c = ipath;
4844                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4845                         c++;
4846                     if (c < d)
4847                         d = NULL;       /* "perl" not in first word; ignore */
4848                     else
4849                         *s = '#';       /* Don't try to parse shebang line */
4850                 }
4851 #endif /* ALTERNATE_SHEBANG */
4852                 if (!d &&
4853                     *s == '#' &&
4854                     ipathend > ipath &&
4855                     !PL_minus_c &&
4856                     !instr(s,"indir") &&
4857                     instr(PL_origargv[0],"perl"))
4858                 {
4859                     dVAR;
4860                     char **newargv;
4861
4862                     *ipathend = '\0';
4863                     s = ipathend + 1;
4864                     while (s < PL_bufend && isSPACE(*s))
4865                         s++;
4866                     if (s < PL_bufend) {
4867                         Newx(newargv,PL_origargc+3,char*);
4868                         newargv[1] = s;
4869                         while (s < PL_bufend && !isSPACE(*s))
4870                             s++;
4871                         *s = '\0';
4872                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4873                     }
4874                     else
4875                         newargv = PL_origargv;
4876                     newargv[0] = ipath;
4877                     PERL_FPU_PRE_EXEC
4878                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4879                     PERL_FPU_POST_EXEC
4880                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4881                 }
4882                 if (d) {
4883                     while (*d && !isSPACE(*d))
4884                         d++;
4885                     while (SPACE_OR_TAB(*d))
4886                         d++;
4887
4888                     if (*d++ == '-') {
4889                         const bool switches_done = PL_doswitches;
4890                         const U32 oldpdb = PL_perldb;
4891                         const bool oldn = PL_minus_n;
4892                         const bool oldp = PL_minus_p;
4893                         const char *d1 = d;
4894
4895                         do {
4896                             bool baduni = FALSE;
4897                             if (*d1 == 'C') {
4898                                 const char *d2 = d1 + 1;
4899                                 if (parse_unicode_opts((const char **)&d2)
4900                                     != PL_unicode)
4901                                     baduni = TRUE;
4902                             }
4903                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4904                                 const char * const m = d1;
4905                                 while (*d1 && !isSPACE(*d1))
4906                                     d1++;
4907                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4908                                       (int)(d1 - m), m);
4909                             }
4910                             d1 = moreswitches(d1);
4911                         } while (d1);
4912                         if (PL_doswitches && !switches_done) {
4913                             int argc = PL_origargc;
4914                             char **argv = PL_origargv;
4915                             do {
4916                                 argc--,argv++;
4917                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4918                             init_argv_symbols(argc,argv);
4919                         }
4920                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4921                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4922                               /* if we have already added "LINE: while (<>) {",
4923                                  we must not do it again */
4924                         {
4925                             sv_setpvs(PL_linestr, "");
4926                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4927                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4928                             PL_last_lop = PL_last_uni = NULL;
4929                             PL_preambled = FALSE;
4930                             if (PERLDB_LINE || PERLDB_SAVESRC)
4931                                 (void)gv_fetchfile(PL_origfilename);
4932                             goto retry;
4933                         }
4934                     }
4935                 }
4936             }
4937         }
4938         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4939             PL_lex_state = LEX_FORMLINE;
4940             NEXTVAL_NEXTTOKE.ival = 0;
4941             force_next(FORMRBRACK);
4942             TOKEN(';');
4943         }
4944         goto retry;
4945     case '\r':
4946 #ifdef PERL_STRICT_CR
4947         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4948         Perl_croak(aTHX_
4949       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4950 #endif
4951     case ' ': case '\t': case '\f': case 013:
4952         s++;
4953         goto retry;
4954     case '#':
4955     case '\n':
4956         if (PL_lex_state != LEX_NORMAL ||
4957              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4958             const bool in_comment = *s == '#';
4959             if (*s == '#' && s == PL_linestart && PL_in_eval
4960              && !PL_rsfp && !PL_parser->filtered) {
4961                 /* handle eval qq[#line 1 "foo"\n ...] */
4962                 CopLINE_dec(PL_curcop);
4963                 incline(s);
4964             }
4965             d = s;
4966             while (d < PL_bufend && *d != '\n')
4967                 d++;
4968             if (d < PL_bufend)
4969                 d++;
4970             else if (d > PL_bufend)
4971                 /* Found by Ilya: feed random input to Perl. */
4972                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
4973                            d, PL_bufend);
4974             s = d;
4975             if (in_comment && d == PL_bufend
4976                 && PL_lex_state == LEX_INTERPNORMAL
4977                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
4978                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
4979             else
4980                 incline(s);
4981             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4982                 PL_lex_state = LEX_FORMLINE;
4983                 NEXTVAL_NEXTTOKE.ival = 0;
4984                 force_next(FORMRBRACK);
4985                 TOKEN(';');
4986             }
4987         }
4988         else {
4989             while (s < PL_bufend && *s != '\n')
4990                 s++;
4991             if (s < PL_bufend)
4992                 {
4993                     s++;
4994                     if (s < PL_bufend)
4995                         incline(s);
4996                 }
4997             else if (s > PL_bufend)
4998                 /* Found by Ilya: feed random input to Perl. */
4999                 Perl_croak(aTHX_ "panic: input overflow");
5000         }
5001         goto retry;
5002     case '-':
5003         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5004             I32 ftst = 0;
5005             char tmp;
5006
5007             s++;
5008             PL_bufptr = s;
5009             tmp = *s++;
5010
5011             while (s < PL_bufend && SPACE_OR_TAB(*s))
5012                 s++;
5013
5014             if (strnEQ(s,"=>",2)) {
5015                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5016                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5017                 OPERATOR('-');          /* unary minus */
5018             }
5019             switch (tmp) {
5020             case 'r': ftst = OP_FTEREAD;        break;
5021             case 'w': ftst = OP_FTEWRITE;       break;
5022             case 'x': ftst = OP_FTEEXEC;        break;
5023             case 'o': ftst = OP_FTEOWNED;       break;
5024             case 'R': ftst = OP_FTRREAD;        break;
5025             case 'W': ftst = OP_FTRWRITE;       break;
5026             case 'X': ftst = OP_FTREXEC;        break;
5027             case 'O': ftst = OP_FTROWNED;       break;
5028             case 'e': ftst = OP_FTIS;           break;
5029             case 'z': ftst = OP_FTZERO;         break;
5030             case 's': ftst = OP_FTSIZE;         break;
5031             case 'f': ftst = OP_FTFILE;         break;
5032             case 'd': ftst = OP_FTDIR;          break;
5033             case 'l': ftst = OP_FTLINK;         break;
5034             case 'p': ftst = OP_FTPIPE;         break;
5035             case 'S': ftst = OP_FTSOCK;         break;
5036             case 'u': ftst = OP_FTSUID;         break;
5037             case 'g': ftst = OP_FTSGID;         break;
5038             case 'k': ftst = OP_FTSVTX;         break;
5039             case 'b': ftst = OP_FTBLK;          break;
5040             case 'c': ftst = OP_FTCHR;          break;
5041             case 't': ftst = OP_FTTTY;          break;
5042             case 'T': ftst = OP_FTTEXT;         break;
5043             case 'B': ftst = OP_FTBINARY;       break;
5044             case 'M': case 'A': case 'C':
5045                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5046                 switch (tmp) {
5047                 case 'M': ftst = OP_FTMTIME;    break;
5048                 case 'A': ftst = OP_FTATIME;    break;
5049                 case 'C': ftst = OP_FTCTIME;    break;
5050                 default:                        break;
5051                 }
5052                 break;
5053             default:
5054                 break;
5055             }
5056             if (ftst) {
5057                 PL_last_uni = PL_oldbufptr;
5058                 PL_last_lop_op = (OPCODE)ftst;
5059                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5060                         "### Saw file test %c\n", (int)tmp);
5061                 } );
5062                 FTST(ftst);
5063             }
5064             else {
5065                 /* Assume it was a minus followed by a one-letter named
5066                  * subroutine call (or a -bareword), then. */
5067                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5068                         "### '-%c' looked like a file test but was not\n",
5069                         (int) tmp);
5070                 } );
5071                 s = --PL_bufptr;
5072             }
5073         }
5074         {
5075             const char tmp = *s++;
5076             if (*s == tmp) {
5077                 s++;
5078                 if (PL_expect == XOPERATOR)
5079                     TERM(POSTDEC);
5080                 else
5081                     OPERATOR(PREDEC);
5082             }
5083             else if (*s == '>') {
5084                 s++;
5085                 s = SKIPSPACE1(s);
5086                 if (FEATURE_POSTDEREF_IS_ENABLED && (
5087                     ((*s == '$' || *s == '&') && s[1] == '*')
5088                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5089                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5090                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5091                  ))
5092                 {
5093                     Perl_ck_warner_d(aTHX_
5094                         packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5095                         "Postfix dereference is experimental"
5096                     );
5097                     PL_expect = XPOSTDEREF;
5098                     TOKEN(ARROW);
5099                 }
5100                 if (isIDFIRST_lazy_if(s,UTF)) {
5101                     s = force_word(s,METHOD,FALSE,TRUE);
5102                     TOKEN(ARROW);
5103                 }
5104                 else if (*s == '$')
5105                     OPERATOR(ARROW);
5106                 else
5107                     TERM(ARROW);
5108             }
5109             if (PL_expect == XOPERATOR) {
5110                 if (*s == '=' && !PL_lex_allbrackets &&
5111                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5112                     s--;
5113                     TOKEN(0);
5114                 }
5115                 Aop(OP_SUBTRACT);
5116             }
5117             else {
5118                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5119                     check_uni();
5120                 OPERATOR('-');          /* unary minus */
5121             }
5122         }
5123
5124     case '+':
5125         {
5126             const char tmp = *s++;
5127             if (*s == tmp) {
5128                 s++;
5129                 if (PL_expect == XOPERATOR)
5130                     TERM(POSTINC);
5131                 else
5132                     OPERATOR(PREINC);
5133             }
5134             if (PL_expect == XOPERATOR) {
5135                 if (*s == '=' && !PL_lex_allbrackets &&
5136                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5137                     s--;
5138                     TOKEN(0);
5139                 }
5140                 Aop(OP_ADD);
5141             }
5142             else {
5143                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5144                     check_uni();
5145                 OPERATOR('+');
5146             }
5147         }
5148
5149     case '*':
5150         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5151         if (PL_expect != XOPERATOR) {
5152             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5153             PL_expect = XOPERATOR;
5154             force_ident(PL_tokenbuf, '*');
5155             if (!*PL_tokenbuf)
5156                 PREREF('*');
5157             TERM('*');
5158         }
5159         s++;
5160         if (*s == '*') {
5161             s++;
5162             if (*s == '=' && !PL_lex_allbrackets &&
5163                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5164                 s -= 2;
5165                 TOKEN(0);
5166             }
5167             PWop(OP_POW);
5168         }
5169         if (*s == '=' && !PL_lex_allbrackets &&
5170                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5171             s--;
5172             TOKEN(0);
5173         }
5174         PL_parser->saw_infix_sigil = 1;
5175         Mop(OP_MULTIPLY);
5176
5177     case '%':
5178     {
5179         if (PL_expect == XOPERATOR) {
5180             if (s[1] == '=' && !PL_lex_allbrackets &&
5181                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5182                 TOKEN(0);
5183             ++s;
5184             PL_parser->saw_infix_sigil = 1;
5185             Mop(OP_MODULO);
5186         }
5187         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5188         PL_tokenbuf[0] = '%';
5189         s = scan_ident(s, PL_tokenbuf + 1,
5190                 sizeof PL_tokenbuf - 1, FALSE);
5191         pl_yylval.ival = 0;
5192         if (!PL_tokenbuf[1]) {
5193             PREREF('%');
5194         }
5195         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5196             if (*s == '[')
5197                 PL_tokenbuf[0] = '@';
5198         }
5199         PL_expect = XOPERATOR;
5200         force_ident_maybe_lex('%');
5201         TERM('%');
5202     }
5203     case '^':
5204         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5205                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5206             TOKEN(0);
5207         s++;
5208         BOop(OP_BIT_XOR);
5209     case '[':
5210         if (PL_lex_brackets > 100)
5211             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5212         PL_lex_brackstack[PL_lex_brackets++] = 0;
5213         PL_lex_allbrackets++;
5214         {
5215             const char tmp = *s++;
5216             OPERATOR(tmp);
5217         }
5218     case '~':
5219         if (s[1] == '~'
5220             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5221         {
5222             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5223                 TOKEN(0);
5224             s += 2;
5225             Perl_ck_warner_d(aTHX_
5226                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5227                 "Smartmatch is experimental");
5228             Eop(OP_SMARTMATCH);
5229         }
5230         s++;
5231         OPERATOR('~');
5232     case ',':
5233         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5234             TOKEN(0);
5235         s++;
5236         OPERATOR(',');
5237     case ':':
5238         if (s[1] == ':') {
5239             len = 0;
5240             goto just_a_word_zero_gv;
5241         }
5242         s++;
5243         {
5244         OP *attrs;
5245
5246         switch (PL_expect) {
5247         case XOPERATOR:
5248             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5249                 break;
5250             PL_bufptr = s;      /* update in case we back off */
5251             if (*s == '=') {
5252                 Perl_croak(aTHX_
5253                            "Use of := for an empty attribute list is not allowed");
5254             }
5255             goto grabattrs;
5256         case XATTRBLOCK:
5257             PL_expect = XBLOCK;
5258             goto grabattrs;
5259         case XATTRTERM:
5260             PL_expect = XTERMBLOCK;
5261          grabattrs:
5262             s = PEEKSPACE(s);
5263             attrs = NULL;
5264             while (isIDFIRST_lazy_if(s,UTF)) {
5265                 I32 tmp;
5266                 SV *sv;
5267                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5268                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5269                     if (tmp < 0) tmp = -tmp;
5270                     switch (tmp) {
5271                     case KEY_or:
5272                     case KEY_and:
5273                     case KEY_for:
5274                     case KEY_foreach:
5275                     case KEY_unless:
5276                     case KEY_if:
5277                     case KEY_while:
5278                     case KEY_until:
5279                         goto got_attrs;
5280                     default:
5281                         break;
5282                     }
5283                 }
5284                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5285                 if (*d == '(') {
5286                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5287                     COPLINE_SET_FROM_MULTI_END;
5288                     if (!d) {
5289                         /* MUST advance bufptr here to avoid bogus
5290                            "at end of line" context messages from yyerror().
5291                          */
5292                         PL_bufptr = s + len;
5293                         yyerror("Unterminated attribute parameter in attribute list");
5294                         if (attrs)
5295                             op_free(attrs);
5296                         sv_free(sv);
5297                         return REPORT(0);       /* EOF indicator */
5298                     }
5299                 }
5300                 if (PL_lex_stuff) {
5301                     sv_catsv(sv, PL_lex_stuff);
5302                     attrs = op_append_elem(OP_LIST, attrs,
5303                                         newSVOP(OP_CONST, 0, sv));
5304                     SvREFCNT_dec(PL_lex_stuff);
5305                     PL_lex_stuff = NULL;
5306                 }
5307                 else {
5308                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5309                         sv_free(sv);
5310                         if (PL_in_my == KEY_our) {
5311                             deprecate(":unique");
5312                         }
5313                         else
5314                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5315                     }
5316
5317                     /* NOTE: any CV attrs applied here need to be part of
5318                        the CVf_BUILTIN_ATTRS define in cv.h! */
5319                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5320                         sv_free(sv);
5321                         CvLVALUE_on(PL_compcv);
5322                     }
5323                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5324                         sv_free(sv);
5325                         deprecate(":locked");
5326                     }
5327                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5328                         sv_free(sv);
5329                         CvMETHOD_on(PL_compcv);
5330                     }
5331                     /* After we've set the flags, it could be argued that
5332                        we don't need to do the attributes.pm-based setting
5333                        process, and shouldn't bother appending recognized
5334                        flags.  To experiment with that, uncomment the
5335                        following "else".  (Note that's already been
5336                        uncommented.  That keeps the above-applied built-in
5337                        attributes from being intercepted (and possibly
5338                        rejected) by a package's attribute routines, but is
5339                        justified by the performance win for the common case
5340                        of applying only built-in attributes.) */
5341                     else
5342                         attrs = op_append_elem(OP_LIST, attrs,
5343                                             newSVOP(OP_CONST, 0,
5344                                                     sv));
5345                 }
5346                 s = PEEKSPACE(d);
5347                 if (*s == ':' && s[1] != ':')
5348                     s = PEEKSPACE(s+1);
5349                 else if (s == d)
5350                     break;      /* require real whitespace or :'s */
5351                 /* XXX losing whitespace on sequential attributes here */
5352             }
5353             {
5354                 if (*s != ';' && *s != '}' &&
5355                     !(PL_expect == XOPERATOR
5356                         ? (*s == '=' ||  *s == ')')
5357                         : (*s == '{' ||  *s == '('))) {
5358                     const char q = ((*s == '\'') ? '"' : '\'');
5359                     /* If here for an expression, and parsed no attrs, back
5360                        off. */
5361                     if (PL_expect == XOPERATOR && !attrs) {
5362                         s = PL_bufptr;
5363                         break;
5364                     }
5365                     /* MUST advance bufptr here to avoid bogus "at end of line"
5366                        context messages from yyerror().
5367                     */
5368                     PL_bufptr = s;
5369                     yyerror( (const char *)
5370                              (*s
5371                               ? Perl_form(aTHX_ "Invalid separator character "
5372                                           "%c%c%c in attribute list", q, *s, q)
5373                               : "Unterminated attribute list" ) );
5374                     if (attrs)
5375                         op_free(attrs);
5376                     OPERATOR(':');
5377                 }
5378             }
5379         got_attrs:
5380             if (attrs) {
5381                 NEXTVAL_NEXTTOKE.opval = attrs;
5382                 force_next(THING);
5383             }
5384             TOKEN(COLONATTR);
5385         }
5386         }
5387         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5388             s--;
5389             TOKEN(0);
5390         }
5391         PL_lex_allbrackets--;
5392         OPERATOR(':');
5393     case '(':
5394         s++;
5395         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5396             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5397         else
5398             PL_expect = XTERM;
5399         s = SKIPSPACE1(s);
5400         PL_lex_allbrackets++;
5401         TOKEN('(');
5402     case ';':
5403         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5404             TOKEN(0);
5405         CLINE;
5406         s++;
5407         OPERATOR(';');
5408     case ')':
5409         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5410             TOKEN(0);
5411         s++;
5412         PL_lex_allbrackets--;
5413         s = SKIPSPACE1(s);
5414         if (*s == '{')
5415             PREBLOCK(')');
5416         TERM(')');
5417     case ']':
5418         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5419             TOKEN(0);
5420         s++;
5421         if (PL_lex_brackets <= 0)
5422             /* diag_listed_as: Unmatched right %s bracket */
5423             yyerror("Unmatched right square bracket");
5424         else
5425             --PL_lex_brackets;
5426         PL_lex_allbrackets--;
5427         if (PL_lex_state == LEX_INTERPNORMAL) {
5428             if (PL_lex_brackets == 0) {
5429                 if (*s == '-' && s[1] == '>')
5430                     PL_lex_state = LEX_INTERPENDMAYBE;
5431                 else if (*s != '[' && *s != '{')
5432                     PL_lex_state = LEX_INTERPEND;
5433             }
5434         }
5435         TERM(']');
5436     case '{':
5437         s++;
5438       leftbracket:
5439         if (PL_lex_brackets > 100) {
5440             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5441         }
5442         switch (PL_expect) {
5443         case XTERM:
5444             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5445             PL_lex_allbrackets++;
5446             OPERATOR(HASHBRACK);
5447         case XOPERATOR:
5448             while (s < PL_bufend && SPACE_OR_TAB(*s))
5449                 s++;
5450             d = s;
5451             PL_tokenbuf[0] = '\0';
5452             if (d < PL_bufend && *d == '-') {
5453                 PL_tokenbuf[0] = '-';
5454                 d++;
5455                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5456                     d++;
5457             }
5458             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5459                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5460                               FALSE, &len);
5461                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5462                     d++;
5463                 if (*d == '}') {
5464                     const char minus = (PL_tokenbuf[0] == '-');
5465                     s = force_word(s + minus, WORD, FALSE, TRUE);
5466                     if (minus)
5467                         force_next('-');
5468                 }
5469             }
5470             /* FALLTHROUGH */
5471         case XATTRBLOCK:
5472         case XBLOCK:
5473             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5474             PL_lex_allbrackets++;
5475             PL_expect = XSTATE;
5476             break;
5477         case XATTRTERM:
5478         case XTERMBLOCK:
5479             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5480             PL_lex_allbrackets++;
5481             PL_expect = XSTATE;
5482             break;
5483         default: {
5484                 const char *t;
5485                 if (PL_oldoldbufptr == PL_last_lop)
5486                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5487                 else
5488                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5489                 PL_lex_allbrackets++;
5490                 s = SKIPSPACE1(s);
5491                 if (*s == '}') {
5492                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5493                         PL_expect = XTERM;
5494                         /* This hack is to get the ${} in the message. */
5495                         PL_bufptr = s+1;
5496                         yyerror("syntax error");
5497                         break;
5498                     }
5499                     OPERATOR(HASHBRACK);
5500                 }
5501                 /* This hack serves to disambiguate a pair of curlies
5502                  * as being a block or an anon hash.  Normally, expectation
5503                  * determines that, but in cases where we're not in a
5504                  * position to expect anything in particular (like inside
5505                  * eval"") we have to resolve the ambiguity.  This code
5506                  * covers the case where the first term in the curlies is a
5507                  * quoted string.  Most other cases need to be explicitly
5508                  * disambiguated by prepending a "+" before the opening
5509                  * curly in order to force resolution as an anon hash.
5510                  *
5511                  * XXX should probably propagate the outer expectation
5512                  * into eval"" to rely less on this hack, but that could
5513                  * potentially break current behavior of eval"".
5514                  * GSAR 97-07-21
5515                  */
5516                 t = s;
5517                 if (*s == '\'' || *s == '"' || *s == '`') {
5518                     /* common case: get past first string, handling escapes */
5519                     for (t++; t < PL_bufend && *t != *s;)
5520                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5521                             t++;
5522                     t++;
5523                 }
5524                 else if (*s == 'q') {
5525                     if (++t < PL_bufend
5526                         && (!isWORDCHAR(*t)
5527                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5528                                 && !isWORDCHAR(*t))))
5529                     {
5530                         /* skip q//-like construct */
5531                         const char *tmps;
5532                         char open, close, term;
5533                         I32 brackets = 1;
5534
5535                         while (t < PL_bufend && isSPACE(*t))
5536                             t++;
5537                         /* check for q => */
5538                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5539                             OPERATOR(HASHBRACK);
5540                         }
5541                         term = *t;
5542                         open = term;
5543                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5544                             term = tmps[5];
5545                         close = term;
5546                         if (open == close)
5547                             for (t++; t < PL_bufend; t++) {
5548                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5549                                     t++;
5550                                 else if (*t == open)
5551                                     break;
5552                             }
5553                         else {
5554                             for (t++; t < PL_bufend; t++) {
5555                                 if (*t == '\\' && t+1 < PL_bufend)
5556                                     t++;
5557                                 else if (*t == close && --brackets <= 0)
5558                                     break;
5559                                 else if (*t == open)
5560                                     brackets++;
5561                             }
5562                         }
5563                         t++;
5564                     }
5565                     else
5566                         /* skip plain q word */
5567                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5568                              t += UTF8SKIP(t);
5569                 }
5570                 else if (isWORDCHAR_lazy_if(t,UTF)) {
5571                     t += UTF8SKIP(t);
5572                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5573                          t += UTF8SKIP(t);
5574                 }
5575                 while (t < PL_bufend && isSPACE(*t))
5576                     t++;
5577                 /* if comma follows first term, call it an anon hash */
5578                 /* XXX it could be a comma expression with loop modifiers */
5579                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5580                                    || (*t == '=' && t[1] == '>')))
5581                     OPERATOR(HASHBRACK);
5582                 if (PL_expect == XREF)
5583                     PL_expect = XTERM;
5584                 else {
5585                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5586                     PL_expect = XSTATE;
5587                 }
5588             }
5589             break;
5590         }
5591         pl_yylval.ival = CopLINE(PL_curcop);
5592         if (isSPACE(*s) || *s == '#')
5593             PL_copline = NOLINE;   /* invalidate current command line number */
5594         TOKEN(formbrack ? '=' : '{');
5595     case '}':
5596         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5597             TOKEN(0);
5598       rightbracket:
5599         s++;
5600         if (PL_lex_brackets <= 0)
5601             /* diag_listed_as: Unmatched right %s bracket */
5602             yyerror("Unmatched right curly bracket");
5603         else
5604             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5605         PL_lex_allbrackets--;
5606         if (PL_lex_state == LEX_INTERPNORMAL) {
5607             if (PL_lex_brackets == 0) {
5608                 if (PL_expect & XFAKEBRACK) {
5609                     PL_expect &= XENUMMASK;
5610                     PL_lex_state = LEX_INTERPEND;
5611                     PL_bufptr = s;
5612                     return yylex();     /* ignore fake brackets */
5613                 }
5614                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5615                  && SvEVALED(PL_lex_repl))
5616                     PL_lex_state = LEX_INTERPEND;
5617                 else if (*s == '-' && s[1] == '>')
5618                     PL_lex_state = LEX_INTERPENDMAYBE;
5619                 else if (*s != '[' && *s != '{')
5620                     PL_lex_state = LEX_INTERPEND;
5621             }
5622         }
5623         if (PL_expect & XFAKEBRACK) {
5624             PL_expect &= XENUMMASK;
5625             PL_bufptr = s;
5626             return yylex();             /* ignore fake brackets */
5627         }
5628         force_next(formbrack ? '.' : '}');
5629         if (formbrack) LEAVE;
5630         if (formbrack == 2) { /* means . where arguments were expected */
5631             force_next(';');
5632             TOKEN(FORMRBRACK);
5633         }
5634         TOKEN(';');
5635     case '&':
5636         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5637         s++;
5638         if (*s++ == '&') {
5639             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5640                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5641                 s -= 2;
5642                 TOKEN(0);
5643             }
5644             AOPERATOR(ANDAND);
5645         }
5646         s--;
5647         if (PL_expect == XOPERATOR) {
5648             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5649                 && isIDFIRST_lazy_if(s,UTF))
5650             {
5651                 CopLINE_dec(PL_curcop);
5652                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5653                 CopLINE_inc(PL_curcop);
5654             }
5655             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5656                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5657                 s--;
5658                 TOKEN(0);
5659             }
5660             PL_parser->saw_infix_sigil = 1;
5661             BAop(OP_BIT_AND);
5662         }
5663
5664         PL_tokenbuf[0] = '&';
5665         s = scan_ident(s - 1, PL_tokenbuf + 1,
5666                        sizeof PL_tokenbuf - 1, TRUE);
5667         if (PL_tokenbuf[1]) {
5668             PL_expect = XOPERATOR;
5669             force_ident_maybe_lex('&');
5670         }
5671         else
5672             PREREF('&');
5673         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5674         TERM('&');
5675
5676     case '|':
5677         s++;
5678         if (*s++ == '|') {
5679             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5680                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5681                 s -= 2;
5682                 TOKEN(0);
5683             }
5684             AOPERATOR(OROR);
5685         }
5686         s--;
5687         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5688                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5689             s--;
5690             TOKEN(0);
5691         }
5692         BOop(OP_BIT_OR);
5693     case '=':
5694         s++;
5695         {
5696             const char tmp = *s++;
5697             if (tmp == '=') {
5698                 if (!PL_lex_allbrackets &&
5699                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5700                     s -= 2;
5701                     TOKEN(0);
5702                 }
5703                 Eop(OP_EQ);
5704             }
5705             if (tmp == '>') {
5706                 if (!PL_lex_allbrackets &&
5707                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5708                     s -= 2;
5709                     TOKEN(0);
5710                 }
5711                 OPERATOR(',');
5712             }
5713             if (tmp == '~')
5714                 PMop(OP_MATCH);
5715             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5716                 && strchr("+-*/%.^&|<",tmp))
5717                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5718                             "Reversed %c= operator",(int)tmp);
5719             s--;
5720             if (PL_expect == XSTATE && isALPHA(tmp) &&
5721                 (s == PL_linestart+1 || s[-2] == '\n') )
5722                 {
5723                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5724                         || PL_lex_state != LEX_NORMAL) {
5725                         d = PL_bufend;
5726                         while (s < d) {
5727                             if (*s++ == '\n') {
5728                                 incline(s);
5729                                 if (strnEQ(s,"=cut",4)) {
5730                                     s = strchr(s,'\n');
5731                                     if (s)
5732                                         s++;
5733                                     else
5734                                         s = d;
5735                                     incline(s);
5736                                     goto retry;
5737                                 }
5738                             }
5739                         }
5740                         goto retry;
5741                     }
5742                     s = PL_bufend;
5743                     PL_parser->in_pod = 1;
5744                     goto retry;
5745                 }
5746         }
5747         if (PL_expect == XBLOCK) {
5748             const char *t = s;
5749 #ifdef PERL_STRICT_CR
5750             while (SPACE_OR_TAB(*t))
5751 #else
5752             while (SPACE_OR_TAB(*t) || *t == '\r')
5753 #endif
5754                 t++;
5755             if (*t == '\n' || *t == '#') {
5756                 formbrack = 1;
5757                 ENTER;
5758                 SAVEI8(PL_parser->form_lex_state);
5759                 SAVEI32(PL_lex_formbrack);
5760                 PL_parser->form_lex_state = PL_lex_state;
5761                 PL_lex_formbrack = PL_lex_brackets + 1;
5762                 goto leftbracket;
5763             }
5764         }
5765         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5766             s--;
5767             TOKEN(0);
5768         }
5769         pl_yylval.ival = 0;
5770         OPERATOR(ASSIGNOP);
5771     case '!':
5772         s++;
5773         {
5774             const char tmp = *s++;
5775             if (tmp == '=') {
5776                 /* was this !=~ where !~ was meant?
5777                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5778
5779                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5780                     const char *t = s+1;
5781
5782                     while (t < PL_bufend && isSPACE(*t))
5783                         ++t;
5784
5785                     if (*t == '/' || *t == '?' ||
5786                         ((*t == 'm' || *t == 's' || *t == 'y')
5787                          && !isWORDCHAR(t[1])) ||
5788                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5789                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5790                                     "!=~ should be !~");
5791                 }
5792                 if (!PL_lex_allbrackets &&
5793                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5794                     s -= 2;
5795                     TOKEN(0);
5796                 }
5797                 Eop(OP_NE);
5798             }
5799             if (tmp == '~')
5800                 PMop(OP_NOT);
5801         }
5802         s--;
5803         OPERATOR('!');
5804     case '<':
5805         if (PL_expect != XOPERATOR) {
5806             if (s[1] != '<' && !strchr(s,'>'))
5807                 check_uni();
5808             if (s[1] == '<')
5809                 s = scan_heredoc(s);
5810             else
5811                 s = scan_inputsymbol(s);
5812             PL_expect = XOPERATOR;
5813             TOKEN(sublex_start());
5814         }
5815         s++;
5816         {
5817             char tmp = *s++;
5818             if (tmp == '<') {
5819                 if (*s == '=' && !PL_lex_allbrackets &&
5820                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5821                     s -= 2;
5822                     TOKEN(0);
5823                 }
5824                 SHop(OP_LEFT_SHIFT);
5825             }
5826             if (tmp == '=') {
5827                 tmp = *s++;
5828                 if (tmp == '>') {
5829                     if (!PL_lex_allbrackets &&
5830                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5831                         s -= 3;
5832                         TOKEN(0);
5833                     }
5834                     Eop(OP_NCMP);
5835                 }
5836                 s--;
5837                 if (!PL_lex_allbrackets &&
5838                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5839                     s -= 2;
5840                     TOKEN(0);
5841                 }
5842                 Rop(OP_LE);
5843             }
5844         }
5845         s--;
5846         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5847             s--;
5848             TOKEN(0);
5849         }
5850         Rop(OP_LT);
5851     case '>':
5852         s++;
5853         {
5854             const char tmp = *s++;
5855             if (tmp == '>') {
5856                 if (*s == '=' && !PL_lex_allbrackets &&
5857                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5858                     s -= 2;
5859                     TOKEN(0);
5860                 }
5861                 SHop(OP_RIGHT_SHIFT);
5862             }
5863             else if (tmp == '=') {
5864                 if (!PL_lex_allbrackets &&
5865                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5866                     s -= 2;
5867                     TOKEN(0);
5868                 }
5869                 Rop(OP_GE);
5870             }
5871         }
5872         s--;
5873         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5874             s--;
5875             TOKEN(0);
5876         }
5877         Rop(OP_GT);
5878
5879     case '$':
5880         CLINE;
5881
5882         if (PL_expect == XOPERATOR) {
5883             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5884                 return deprecate_commaless_var_list();
5885             }
5886         }
5887         else if (PL_expect == XPOSTDEREF) {
5888             if (s[1] == '#') {
5889                 s++;
5890                 POSTDEREF(DOLSHARP);
5891             }
5892             POSTDEREF('$');
5893         }
5894
5895         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5896             PL_tokenbuf[0] = '@';
5897             s = scan_ident(s + 1, PL_tokenbuf + 1,
5898                            sizeof PL_tokenbuf - 1, FALSE);
5899             if (PL_expect == XOPERATOR)
5900                 no_op("Array length", s);
5901             if (!PL_tokenbuf[1])
5902                 PREREF(DOLSHARP);
5903             PL_expect = XOPERATOR;
5904             force_ident_maybe_lex('#');
5905             TOKEN(DOLSHARP);
5906         }
5907
5908         PL_tokenbuf[0] = '$';
5909         s = scan_ident(s, PL_tokenbuf + 1,
5910                        sizeof PL_tokenbuf - 1, FALSE);
5911         if (PL_expect == XOPERATOR)
5912             no_op("Scalar", s);
5913         if (!PL_tokenbuf[1]) {
5914             if (s == PL_bufend)
5915                 yyerror("Final $ should be \\$ or $name");
5916             PREREF('$');
5917         }
5918
5919         d = s;
5920         {
5921             const char tmp = *s;
5922             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5923                 s = SKIPSPACE1(s);
5924
5925             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5926                 && intuit_more(s)) {
5927                 if (*s == '[') {
5928                     PL_tokenbuf[0] = '@';
5929                     if (ckWARN(WARN_SYNTAX)) {
5930                         char *t = s+1;
5931
5932                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
5933                             t++;
5934                         if (*t++ == ',') {
5935                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5936                             while (t < PL_bufend && *t != ']')
5937                                 t++;
5938                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5939                                         "Multidimensional syntax %.*s not supported",
5940                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5941                         }
5942                     }
5943                 }
5944                 else if (*s == '{') {
5945                     char *t;
5946                     PL_tokenbuf[0] = '%';
5947                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5948                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5949                         {
5950                             char tmpbuf[sizeof PL_tokenbuf];
5951                             do {
5952                                 t++;
5953                             } while (isSPACE(*t));
5954                             if (isIDFIRST_lazy_if(t,UTF)) {
5955                                 STRLEN len;
5956                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5957                                               &len);
5958                                 while (isSPACE(*t))
5959                                     t++;
5960                                 if (*t == ';'
5961                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
5962                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5963                                         "You need to quote \"%"UTF8f"\"",
5964                                          UTF8fARG(UTF, len, tmpbuf));
5965                             }
5966                         }
5967                 }
5968             }
5969
5970             PL_expect = XOPERATOR;
5971             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5972                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5973                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5974                     PL_expect = XOPERATOR;
5975                 else if (strchr("$@\"'`q", *s))
5976                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5977                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5978                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5979                 else if (isIDFIRST_lazy_if(s,UTF)) {
5980                     char tmpbuf[sizeof PL_tokenbuf];
5981                     int t2;
5982                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5983                     if ((t2 = keyword(tmpbuf, len, 0))) {
5984                         /* binary operators exclude handle interpretations */
5985                         switch (t2) {
5986                         case -KEY_x:
5987                         case -KEY_eq:
5988                         case -KEY_ne:
5989                         case -KEY_gt:
5990                         case -KEY_lt:
5991                         case -KEY_ge:
5992                         case -KEY_le:
5993                         case -KEY_cmp:
5994                             break;
5995                         default:
5996                             PL_expect = XTERM;  /* e.g. print $fh length() */
5997                             break;
5998                         }
5999                     }
6000                     else {
6001                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6002                     }
6003                 }
6004                 else if (isDIGIT(*s))
6005                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6006                 else if (*s == '.' && isDIGIT(s[1]))
6007                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6008                 else if ((*s == '?' || *s == '-' || *s == '+')
6009                          && !isSPACE(s[1]) && s[1] != '=')
6010                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6011                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6012                          && s[1] != '/')
6013                     PL_expect = XTERM;          /* e.g. print $fh /.../
6014                                                    XXX except DORDOR operator
6015                                                 */
6016                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6017                          && s[2] != '=')
6018                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6019             }
6020         }
6021         force_ident_maybe_lex('$');
6022         TOKEN('$');
6023
6024     case '@':
6025         if (PL_expect == XOPERATOR)
6026             no_op("Array", s);
6027         else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6028         PL_tokenbuf[0] = '@';
6029         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6030         pl_yylval.ival = 0;
6031         if (!PL_tokenbuf[1]) {
6032             PREREF('@');
6033         }
6034         if (PL_lex_state == LEX_NORMAL)
6035             s = SKIPSPACE1(s);
6036         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6037             if (*s == '{')
6038                 PL_tokenbuf[0] = '%';
6039
6040             /* Warn about @ where they meant $. */
6041             if (*s == '[' || *s == '{') {
6042                 if (ckWARN(WARN_SYNTAX)) {
6043                     S_check_scalar_slice(aTHX_ s);
6044                 }
6045             }
6046         }
6047         PL_expect = XOPERATOR;
6048         force_ident_maybe_lex('@');
6049         TERM('@');
6050
6051      case '/':                  /* may be division, defined-or, or pattern */
6052         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6053             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6054                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6055                 TOKEN(0);
6056             s += 2;
6057             AOPERATOR(DORDOR);
6058         }
6059         else if (PL_expect == XOPERATOR) {
6060             s++;
6061             if (*s == '=' && !PL_lex_allbrackets &&
6062                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6063                 s--;
6064                 TOKEN(0);
6065             }
6066             Mop(OP_DIVIDE);
6067         }
6068         else {
6069             /* Disable warning on "study /blah/" */
6070             if (PL_oldoldbufptr == PL_last_uni
6071              && (*PL_last_uni != 's' || s - PL_last_uni < 5
6072                  || memNE(PL_last_uni, "study", 5)
6073                  || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6074              ))
6075                 check_uni();
6076             s = scan_pat(s,OP_MATCH);
6077             TERM(sublex_start());
6078         }
6079
6080      case '?':                  /* conditional */
6081         s++;
6082         if (!PL_lex_allbrackets &&
6083             PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6084             s--;
6085             TOKEN(0);
6086         }
6087         PL_lex_allbrackets++;
6088         OPERATOR('?');
6089
6090     case '.':
6091         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6092 #ifdef PERL_STRICT_CR
6093             && s[1] == '\n'
6094 #else
6095             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6096 #endif
6097             && (s == PL_linestart || s[-1] == '\n') )
6098         {
6099             PL_expect = XSTATE;
6100             formbrack = 2; /* dot seen where arguments expected */
6101             goto rightbracket;
6102         }
6103         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6104             s += 3;
6105             OPERATOR(YADAYADA);
6106         }
6107         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6108             char tmp = *s++;
6109             if (*s == tmp) {
6110                 if (!PL_lex_allbrackets &&
6111                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6112                     s--;
6113                     TOKEN(0);
6114                 }
6115                 s++;
6116                 if (*s == tmp) {
6117                     s++;
6118                     pl_yylval.ival = OPf_SPECIAL;
6119                 }
6120                 else
6121                     pl_yylval.ival = 0;
6122                 OPERATOR(DOTDOT);
6123             }
6124             if (*s == '=' && !PL_lex_allbrackets &&
6125                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6126                 s--;
6127                 TOKEN(0);
6128             }
6129             Aop(OP_CONCAT);
6130         }
6131         /* FALLTHROUGH */
6132     case '0': case '1': case '2': case '3': case '4':
6133     case '5': case '6': case '7': case '8': case '9':
6134         s = scan_num(s, &pl_yylval);
6135         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6136         if (PL_expect == XOPERATOR)
6137             no_op("Number",s);
6138         TERM(THING);
6139
6140     case '\'':
6141         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6142         if (!s)
6143             missingterm(NULL);
6144         COPLINE_SET_FROM_MULTI_END;
6145         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6146         if (PL_expect == XOPERATOR) {
6147             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6148                 return deprecate_commaless_var_list();
6149             }
6150             else
6151                 no_op("String",s);
6152         }
6153         pl_yylval.ival = OP_CONST;
6154         TERM(sublex_start());
6155
6156     case '"':
6157         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6158         DEBUG_T( {
6159             if (s)
6160                 printbuf("### Saw string before %s\n", s);
6161             else
6162                 PerlIO_printf(Perl_debug_log,
6163                              "### Saw unterminated string\n");
6164         } );
6165         if (PL_expect == XOPERATOR) {
6166             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6167                 return deprecate_commaless_var_list();
6168             }
6169             else
6170                 no_op("String",s);
6171         }
6172         if (!s)
6173             missingterm(NULL);
6174         pl_yylval.ival = OP_CONST;
6175         /* FIXME. I think that this can be const if char *d is replaced by
6176            more localised variables.  */
6177         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6178             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6179                 pl_yylval.ival = OP_STRINGIFY;
6180                 break;
6181             }
6182         }
6183         if (pl_yylval.ival == OP_CONST)
6184             COPLINE_SET_FROM_MULTI_END;
6185         TERM(sublex_start());
6186
6187     case '`':
6188         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6189         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6190         if (PL_expect == XOPERATOR)
6191             no_op("Backticks",s);
6192         if (!s)
6193             missingterm(NULL);
6194         pl_yylval.ival = OP_BACKTICK;
6195         TERM(sublex_start());
6196
6197     case '\\':
6198         s++;
6199         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6200          && isDIGIT(*s))
6201             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6202                            *s, *s);
6203         if (PL_expect == XOPERATOR)
6204             no_op("Backslash",s);
6205         OPERATOR(REFGEN);
6206
6207     case 'v':
6208         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6209             char *start = s + 2;
6210             while (isDIGIT(*start) || *start == '_')
6211                 start++;
6212             if (*start == '.' && isDIGIT(start[1])) {
6213                 s = scan_num(s, &pl_yylval);
6214                 TERM(THING);
6215             }
6216             else if ((*start == ':' && start[1] == ':')
6217                   || (PL_expect == XSTATE && *start == ':'))
6218                 goto keylookup;
6219             else if (PL_expect == XSTATE) {
6220                 d = start;
6221                 while (d < PL_bufend && isSPACE(*d)) d++;
6222                 if (*d == ':') goto keylookup;
6223             }
6224             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6225             if (!isALPHA(*start) && (PL_expect == XTERM
6226                         || PL_expect == XREF || PL_expect == XSTATE
6227                         || PL_expect == XTERMORDORDOR)) {
6228                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6229                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6230                 if (!gv) {
6231                     s = scan_num(s, &pl_yylval);
6232                     TERM(THING);
6233                 }
6234             }
6235         }
6236         goto keylookup;
6237     case 'x':
6238         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6239             s++;
6240             Mop(OP_REPEAT);
6241         }
6242         goto keylookup;
6243
6244     case '_':
6245     case 'a': case 'A':
6246     case 'b': case 'B':
6247     case 'c': case 'C':
6248     case 'd': case 'D':
6249     case 'e': case 'E':
6250     case 'f': case 'F':
6251     case 'g': case 'G':
6252     case 'h': case 'H':
6253     case 'i': case 'I':
6254     case 'j': case 'J':
6255     case 'k': case 'K':
6256     case 'l': case 'L':
6257     case 'm': case 'M':
6258     case 'n': case 'N':
6259     case 'o': case 'O':
6260     case 'p': case 'P':
6261     case 'q': case 'Q':
6262     case 'r': case 'R':
6263     case 's': case 'S':
6264     case 't': case 'T':
6265     case 'u': case 'U':
6266               case 'V':
6267     case 'w': case 'W':
6268               case 'X':
6269     case 'y': case 'Y':
6270     case 'z': case 'Z':
6271
6272       keylookup: {
6273         bool anydelim;
6274         bool lex;
6275         I32 tmp;
6276         SV *sv;
6277         CV *cv;
6278         PADOFFSET off;
6279         OP *rv2cv_op;
6280
6281         lex = FALSE;
6282         orig_keyword = 0;
6283         off = 0;
6284         sv = NULL;
6285         cv = NULL;
6286         gv = NULL;
6287         gvp = NULL;
6288         rv2cv_op = NULL;
6289
6290         PL_bufptr = s;
6291         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6292
6293         /* Some keywords can be followed by any delimiter, including ':' */
6294         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6295
6296         /* x::* is just a word, unless x is "CORE" */
6297         if (!anydelim && *s == ':' && s[1] == ':') {
6298             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6299             goto just_a_word;
6300         }
6301
6302         d = s;
6303         while (d < PL_bufend && isSPACE(*d))
6304                 d++;    /* no comments skipped here, or s### is misparsed */
6305
6306         /* Is this a word before a => operator? */
6307         if (*d == '=' && d[1] == '>') {
6308           fat_arrow:
6309             CLINE;
6310             pl_yylval.opval
6311                 = (OP*)newSVOP(OP_CONST, 0,
6312                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6313             pl_yylval.opval->op_private = OPpCONST_BARE;
6314             TERM(WORD);
6315         }
6316
6317         /* Check for plugged-in keyword */
6318         {
6319             OP *o;
6320             int result;
6321             char *saved_bufptr = PL_bufptr;
6322             PL_bufptr = s;
6323             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6324             s = PL_bufptr;
6325             if (result == KEYWORD_PLUGIN_DECLINE) {
6326                 /* not a plugged-in keyword */
6327                 PL_bufptr = saved_bufptr;
6328             } else if (result == KEYWORD_PLUGIN_STMT) {
6329                 pl_yylval.opval = o;
6330                 CLINE;
6331                 PL_expect = XSTATE;
6332                 return REPORT(PLUGSTMT);
6333             } else if (result == KEYWORD_PLUGIN_EXPR) {
6334                 pl_yylval.opval = o;
6335                 CLINE;
6336                 PL_expect = XOPERATOR;
6337                 return REPORT(PLUGEXPR);
6338             } else {
6339                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6340                                         PL_tokenbuf);
6341             }
6342         }
6343
6344         /* Check for built-in keyword */
6345         tmp = keyword(PL_tokenbuf, len, 0);
6346
6347         /* Is this a label? */
6348         if (!anydelim && PL_expect == XSTATE
6349               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6350             s = d + 1;
6351             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6352             pl_yylval.pval[len] = '\0';
6353             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6354             CLINE;
6355             TOKEN(LABEL);
6356         }
6357
6358         /* Check for lexical sub */
6359         if (PL_expect != XOPERATOR) {
6360             char tmpbuf[sizeof PL_tokenbuf + 1];
6361             *tmpbuf = '&';
6362             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6363             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6364             if (off != NOT_IN_PAD) {
6365                 assert(off); /* we assume this is boolean-true below */
6366                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6367                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6368                     HEK * const stashname = HvNAME_HEK(stash);
6369                     sv = newSVhek(stashname);
6370                     sv_catpvs(sv, "::");
6371                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6372                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6373                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6374                                     SVt_PVCV);
6375                     off = 0;
6376                     if (!gv) {
6377                         sv_free(sv);
6378                         sv = NULL;
6379                         goto just_a_word;
6380                     }
6381                 }
6382                 else {
6383                     rv2cv_op = newOP(OP_PADANY, 0);
6384                     rv2cv_op->op_targ = off;
6385                     cv = find_lexical_cv(off);
6386                 }
6387                 lex = TRUE;
6388                 goto just_a_word;
6389             }
6390             off = 0;
6391         }
6392
6393         if (tmp < 0) {                  /* second-class keyword? */
6394             GV *ogv = NULL;     /* override (winner) */
6395             GV *hgv = NULL;     /* hidden (loser) */
6396             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6397                 CV *cv;
6398                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6399                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6400                                             SVt_PVCV)) &&
6401                     (cv = GvCVu(gv)))
6402                 {
6403                     if (GvIMPORTED_CV(gv))
6404                         ogv = gv;
6405                     else if (! CvMETHOD(cv))
6406                         hgv = gv;
6407                 }
6408                 if (!ogv &&
6409                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6410                                           len, FALSE)) &&
6411                     (gv = *gvp) && (
6412                         isGV_with_GP(gv)
6413                             ? GvCVu(gv) && GvIMPORTED_CV(gv)
6414                             :   SvPCS_IMPORTED(gv)
6415                              && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6416                                          len, 0), 1)
6417                    ))
6418                 {
6419                     ogv = gv;
6420                 }
6421             }
6422             if (ogv) {
6423                 orig_keyword = tmp;
6424                 tmp = 0;                /* overridden by import or by GLOBAL */
6425             }
6426             else if (gv && !gvp
6427                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6428                      && GvCVu(gv))
6429             {
6430                 tmp = 0;                /* any sub overrides "weak" keyword */
6431             }
6432             else {                      /* no override */
6433                 tmp = -tmp;
6434                 if (tmp == KEY_dump) {
6435                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6436                                    "dump() better written as CORE::dump()");
6437                 }
6438                 gv = NULL;
6439                 gvp = 0;
6440                 if (hgv && tmp != KEY_x)        /* never ambiguous */
6441                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6442                                    "Ambiguous call resolved as CORE::%s(), "
6443                                    "qualify as such or use &",
6444                                    GvENAME(hgv));
6445             }
6446         }
6447
6448         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6449          && (!anydelim || *s != '#')) {
6450             /* no override, and not s### either; skipspace is safe here
6451              * check for => on following line */
6452             bool arrow;
6453             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6454             STRLEN   soff = s         - SvPVX(PL_linestr);
6455             s = skipspace_flags(s, LEX_NO_INCLINE);
6456             arrow = *s == '=' && s[1] == '>';
6457             PL_bufptr = SvPVX(PL_linestr) + bufoff;
6458             s         = SvPVX(PL_linestr) +   soff;
6459             if (arrow)
6460                 goto fat_arrow;
6461         }
6462
6463       reserved_word:
6464         switch (tmp) {
6465
6466         default:                        /* not a keyword */
6467             /* Trade off - by using this evil construction we can pull the
6468                variable gv into the block labelled keylookup. If not, then
6469                we have to give it function scope so that the goto from the
6470                earlier ':' case doesn't bypass the initialisation.  */
6471             if (0) {
6472             just_a_word_zero_gv:
6473                 sv = NULL;
6474                 cv = NULL;
6475                 gv = NULL;
6476                 gvp = NULL;
6477                 rv2cv_op = NULL;
6478                 orig_keyword = 0;
6479                 lex = 0;
6480                 off = 0;
6481             }
6482           just_a_word: {
6483                 int pkgname = 0;
6484                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6485                 const char penultchar =
6486                     lastchar && PL_bufptr - 2 >= PL_linestart
6487                          ? PL_bufptr[-2]
6488                          : 0;
6489
6490
6491                 /* Get the rest if it looks like a package qualifier */
6492
6493                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6494                     STRLEN morelen;
6495                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6496                                   TRUE, &morelen);
6497                     if (!morelen)
6498                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6499                                 UTF8fARG(UTF, len, PL_tokenbuf),
6500                                 *s == '\'' ? "'" : "::");
6501                     len += morelen;
6502                     pkgname = 1;
6503                 }
6504
6505                 if (PL_expect == XOPERATOR) {
6506                     if (PL_bufptr == PL_linestart) {
6507                         CopLINE_dec(PL_curcop);
6508                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6509                         CopLINE_inc(PL_curcop);
6510                     }
6511                     else
6512                         no_op("Bareword",s);
6513                 }
6514
6515                 /* Look for a subroutine with this name in current package,
6516                    unless this is a lexical sub, or name is "Foo::",
6517                    in which case Foo is a bareword
6518                    (and a package name). */
6519
6520                 if (len > 2 &&
6521                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6522                 {
6523                     if (ckWARN(WARN_BAREWORD)
6524                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6525                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6526                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
6527                            UTF8fARG(UTF, len, PL_tokenbuf));
6528                     len -= 2;
6529                     PL_tokenbuf[len] = '\0';
6530                     gv = NULL;
6531                     gvp = 0;
6532                 }
6533                 else {
6534                     if (!lex && !gv) {
6535                         /* Mustn't actually add anything to a symbol table.
6536                            But also don't want to "initialise" any placeholder
6537                            constants that might already be there into full
6538                            blown PVGVs with attached PVCV.  */
6539                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6540                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6541                                                SVt_PVCV);
6542                     }
6543                     len = 0;
6544                 }
6545
6546                 /* if we saw a global override before, get the right name */
6547
6548                 if (!sv)
6549                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6550                     len ? len : strlen(PL_tokenbuf));
6551                 if (gvp) {
6552                     SV * const tmp_sv = sv;
6553                     sv = newSVpvs("CORE::GLOBAL::");
6554                     sv_catsv(sv, tmp_sv);
6555                     SvREFCNT_dec(tmp_sv);
6556                 }
6557
6558
6559                 /* Presume this is going to be a bareword of some sort. */
6560                 CLINE;
6561                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6562                 pl_yylval.opval->op_private = OPpCONST_BARE;
6563
6564                 /* And if "Foo::", then that's what it certainly is. */
6565                 if (len)
6566                     goto safe_bareword;
6567
6568                 if (!off)
6569                 {
6570                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6571                     const_op->op_private = OPpCONST_BARE;
6572                     rv2cv_op = newCVREF(0, const_op);
6573                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6574                 }
6575
6576                 /* See if it's the indirect object for a list operator. */
6577
6578                 if (PL_oldoldbufptr &&
6579                     PL_oldoldbufptr < PL_bufptr &&
6580                     (PL_oldoldbufptr == PL_last_lop
6581                      || PL_oldoldbufptr == PL_last_uni) &&
6582                     /* NO SKIPSPACE BEFORE HERE! */
6583                     (PL_expect == XREF ||
6584                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6585                 {
6586                     bool immediate_paren = *s == '(';
6587
6588                     /* (Now we can afford to cross potential line boundary.) */
6589                     s = SKIPSPACE2(s,nextPL_nextwhite);
6590
6591                     /* Two barewords in a row may indicate method call. */
6592
6593                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6594                         (tmp = intuit_method(s, gv, cv))) {
6595                         op_free(rv2cv_op);
6596                         if (tmp == METHOD && !PL_lex_allbrackets &&
6597                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6598                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6599                         return REPORT(tmp);
6600                     }
6601
6602                     /* If not a declared subroutine, it's an indirect object. */
6603                     /* (But it's an indir obj regardless for sort.) */
6604                     /* Also, if "_" follows a filetest operator, it's a bareword */
6605
6606                     if (
6607                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6608                          (!cv &&
6609                         (PL_last_lop_op != OP_MAPSTART &&
6610                          PL_last_lop_op != OP_GREPSTART))))
6611                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6612                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6613                        )
6614                     {
6615                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6616                         goto bareword;
6617                     }
6618                 }
6619
6620                 PL_expect = XOPERATOR;
6621                 s = skipspace(s);
6622
6623                 /* Is this a word before a => operator? */
6624                 if (*s == '=' && s[1] == '>' && !pkgname) {
6625                     op_free(rv2cv_op);
6626                     CLINE;
6627                     /* This is our own scalar, created a few lines above,
6628                        so this is safe. */
6629                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
6630                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6631                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6632                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6633                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
6634                     TERM(WORD);
6635                 }
6636
6637                 /* If followed by a paren, it's certainly a subroutine. */
6638                 if (*s == '(') {
6639                     CLINE;
6640                     if (cv) {
6641                         d = s + 1;
6642                         while (SPACE_OR_TAB(*d))
6643                             d++;
6644                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6645                             s = d + 1;
6646                             goto its_constant;
6647                         }
6648                     }
6649                     NEXTVAL_NEXTTOKE.opval =
6650                         off ? rv2cv_op : pl_yylval.opval;
6651                     PL_expect = XOPERATOR;
6652                     if (off)
6653                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
6654                     else op_free(rv2cv_op),        force_next(WORD);
6655                     pl_yylval.ival = 0;
6656                     TOKEN('&');
6657                 }
6658
6659                 /* If followed by var or block, call it a method (unless sub) */
6660
6661                 if ((*s == '$' || *s == '{') && !cv) {
6662                     op_free(rv2cv_op);
6663                     PL_last_lop = PL_oldbufptr;
6664                     PL_last_lop_op = OP_METHOD;
6665                     if (!PL_lex_allbrackets &&
6666                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6667                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6668                     PREBLOCK(METHOD);
6669                 }
6670
6671                 /* If followed by a bareword, see if it looks like indir obj. */
6672
6673                 if (!orig_keyword
6674                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6675                         && (tmp = intuit_method(s, gv, cv))) {
6676                     op_free(rv2cv_op);
6677                     if (tmp == METHOD && !PL_lex_allbrackets &&
6678                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6679                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6680                     return REPORT(tmp);
6681                 }
6682
6683                 /* Not a method, so call it a subroutine (if defined) */
6684
6685                 if (cv) {
6686                     if (lastchar == '-' && penultchar != '-') {
6687                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
6688                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6689                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
6690                              UTF8fARG(UTF, l, PL_tokenbuf),
6691                              UTF8fARG(UTF, l, PL_tokenbuf));
6692                     }
6693                     /* Check for a constant sub */
6694                     if ((sv = cv_const_sv_or_av(cv))) {
6695                   its_constant:
6696                         op_free(rv2cv_op);
6697                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6698                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6699                         if (SvTYPE(sv) == SVt_PVAV)
6700                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6701                                                       pl_yylval.opval);
6702                         else {
6703                             pl_yylval.opval->op_private = 0;
6704                             pl_yylval.opval->op_folded = 1;
6705                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
6706                         }
6707                         TOKEN(WORD);
6708                     }
6709
6710                     op_free(pl_yylval.opval);
6711                     pl_yylval.opval =
6712                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6713                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6714                     PL_last_lop = PL_oldbufptr;
6715                     PL_last_lop_op = OP_ENTERSUB;
6716                     /* Is there a prototype? */
6717                     if (
6718                         SvPOK(cv))
6719                     {
6720                         STRLEN protolen = CvPROTOLEN(cv);
6721                         const char *proto = CvPROTO(cv);
6722                         bool optional;
6723                         proto = S_strip_spaces(aTHX_ proto, &protolen);
6724                         if (!protolen)
6725                             TERM(FUNC0SUB);
6726                         if ((optional = *proto == ';'))
6727                           do
6728                             proto++;
6729                           while (*proto == ';');
6730                         if (
6731                             (
6732                                 (
6733                                     *proto == '$' || *proto == '_'
6734                                  || *proto == '*' || *proto == '+'
6735                                 )
6736                              && proto[1] == '\0'
6737                             )
6738                          || (
6739                              *proto == '\\' && proto[1] && proto[2] == '\0'
6740                             )
6741                         )
6742                             UNIPROTO(UNIOPSUB,optional);
6743                         if (*proto == '\\' && proto[1] == '[') {
6744                             const char *p = proto + 2;
6745                             while(*p && *p != ']')
6746                                 ++p;
6747                             if(*p == ']' && !p[1])
6748                                 UNIPROTO(UNIOPSUB,optional);
6749                         }
6750                         if (*proto == '&' && *s == '{') {
6751                             if (PL_curstash)
6752                                 sv_setpvs(PL_subname, "__ANON__");
6753                             else
6754                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6755                             if (!PL_lex_allbrackets &&
6756                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6757                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6758                             PREBLOCK(LSTOPSUB);
6759                         }
6760                     }
6761                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6762                     PL_expect = XTERM;
6763                     force_next(off ? PRIVATEREF : WORD);
6764                     if (!PL_lex_allbrackets &&
6765                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6766                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6767                     TOKEN(NOAMP);
6768                 }
6769
6770                 /* Call it a bare word */
6771
6772                 if (PL_hints & HINT_STRICT_SUBS)
6773                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6774                 else {
6775                 bareword:
6776                     /* after "print" and similar functions (corresponding to
6777                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6778                      * a filehandle should be subject to "strict subs".
6779                      * Likewise for the optional indirect-object argument to system
6780                      * or exec, which can't be a bareword */
6781                     if ((PL_last_lop_op == OP_PRINT
6782                             || PL_last_lop_op == OP_PRTF
6783                             || PL_last_lop_op == OP_SAY
6784                             || PL_last_lop_op == OP_SYSTEM
6785                             || PL_last_lop_op == OP_EXEC)
6786                             && (PL_hints & HINT_STRICT_SUBS))
6787                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6788                     if (lastchar != '-') {
6789                         if (ckWARN(WARN_RESERVED)) {
6790                             d = PL_tokenbuf;
6791                             while (isLOWER(*d))
6792                                 d++;
6793                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6794                             {
6795                                 /* PL_warn_reserved is constant */
6796                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6797                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6798                                        PL_tokenbuf);
6799                                 GCC_DIAG_RESTORE;
6800                             }
6801                         }
6802                     }
6803                 }
6804                 op_free(rv2cv_op);
6805
6806             safe_bareword:
6807                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6808                  && saw_infix_sigil) {
6809                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6810                                      "Operator or semicolon missing before %c%"UTF8f,
6811                                      lastchar,
6812                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
6813                                               PL_tokenbuf));
6814                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6815                                      "Ambiguous use of %c resolved as operator %c",
6816                                      lastchar, lastchar);
6817                 }
6818                 TOKEN(WORD);
6819             }
6820
6821         case KEY___FILE__:
6822             FUN0OP(
6823                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6824             );
6825
6826         case KEY___LINE__:
6827             FUN0OP(
6828                 (OP*)newSVOP(OP_CONST, 0,
6829                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6830             );
6831
6832         case KEY___PACKAGE__:
6833             FUN0OP(
6834                 (OP*)newSVOP(OP_CONST, 0,
6835                                         (PL_curstash
6836                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6837                                          : &PL_sv_undef))
6838             );
6839
6840         case KEY___DATA__:
6841         case KEY___END__: {
6842             GV *gv;
6843             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6844                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6845                                         ? PL_curstash
6846                                         : PL_defstash;
6847                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6848                 if (!isGV(gv))
6849                     gv_init(gv,stash,"DATA",4,0);
6850                 GvMULTI_on(gv);
6851                 if (!GvIO(gv))
6852                     GvIOp(gv) = newIO();
6853                 IoIFP(GvIOp(gv)) = PL_rsfp;
6854 #if defined(HAS_FCNTL) && defined(F_SETFD)
6855                 {
6856                     const int fd = PerlIO_fileno(PL_rsfp);
6857                     fcntl(fd,F_SETFD,fd >= 3);
6858                 }
6859 #endif
6860                 /* Mark this internal pseudo-handle as clean */
6861                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6862                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6863                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6864                 else
6865                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6866 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6867                 /* if the script was opened in binmode, we need to revert
6868                  * it to text mode for compatibility; but only iff it has CRs
6869                  * XXX this is a questionable hack at best. */
6870                 if (PL_bufend-PL_bufptr > 2
6871                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6872                 {
6873                     Off_t loc = 0;
6874                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6875                         loc = PerlIO_tell(PL_rsfp);
6876                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6877                     }
6878 #ifdef NETWARE
6879                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6880 #else
6881                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6882 #endif  /* NETWARE */
6883                         if (loc > 0)
6884                             PerlIO_seek(PL_rsfp, loc, 0);
6885                     }
6886                 }
6887 #endif
6888 #ifdef PERLIO_LAYERS
6889                 if (!IN_BYTES) {
6890                     if (UTF)
6891                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6892                     else if (PL_encoding) {
6893                         SV *name;
6894                         dSP;
6895                         ENTER;
6896                         SAVETMPS;
6897                         PUSHMARK(sp);
6898                         XPUSHs(PL_encoding);
6899                         PUTBACK;
6900                         call_method("name", G_SCALAR);
6901                         SPAGAIN;
6902                         name = POPs;
6903                         PUTBACK;
6904                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6905                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6906                                                       SVfARG(name)));
6907                         FREETMPS;
6908                         LEAVE;
6909                     }
6910                 }
6911 #endif
6912                 PL_rsfp = NULL;
6913             }
6914             goto fake_eof;
6915         }
6916
6917         case KEY___SUB__:
6918             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
6919
6920         case KEY_AUTOLOAD:
6921         case KEY_DESTROY:
6922         case KEY_BEGIN:
6923         case KEY_UNITCHECK:
6924         case KEY_CHECK:
6925         case KEY_INIT:
6926         case KEY_END:
6927             if (PL_expect == XSTATE) {
6928                 s = PL_bufptr;
6929                 goto really_sub;
6930             }
6931             goto just_a_word;
6932
6933         case_KEY_CORE:
6934             {
6935                 STRLEN olen = len;
6936                 d = s;
6937                 s += 2;
6938                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6939                 if ((*s == ':' && s[1] == ':')
6940                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
6941                 {
6942                     s = d;
6943                     len = olen;
6944                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
6945                     goto just_a_word;
6946                 }
6947                 if (!tmp)
6948                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
6949                                       UTF8fARG(UTF, len, PL_tokenbuf));
6950                 if (tmp < 0)
6951                     tmp = -tmp;
6952                 else if (tmp == KEY_require || tmp == KEY_do
6953                       || tmp == KEY_glob)
6954                     /* that's a way to remember we saw "CORE::" */
6955                     orig_keyword = tmp;
6956                 goto reserved_word;
6957             }
6958
6959         case KEY_abs:
6960             UNI(OP_ABS);
6961
6962         case KEY_alarm:
6963             UNI(OP_ALARM);
6964
6965         case KEY_accept:
6966             LOP(OP_ACCEPT,XTERM);
6967
6968         case KEY_and:
6969             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
6970                 return REPORT(0);
6971             OPERATOR(ANDOP);
6972
6973         case KEY_atan2:
6974             LOP(OP_ATAN2,XTERM);
6975
6976         case KEY_bind:
6977             LOP(OP_BIND,XTERM);
6978
6979         case KEY_binmode:
6980             LOP(OP_BINMODE,XTERM);
6981
6982         case KEY_bless:
6983             LOP(OP_BLESS,XTERM);
6984
6985         case KEY_break:
6986             FUN0(OP_BREAK);
6987
6988         case KEY_chop:
6989             UNI(OP_CHOP);
6990
6991         case KEY_continue:
6992                     /* We have to disambiguate the two senses of
6993                       "continue". If the next token is a '{' then
6994                       treat it as the start of a continue block;
6995                       otherwise treat it as a control operator.
6996                      */
6997                     s = skipspace(s);
6998                     if (*s == '{')
6999             PREBLOCK(CONTINUE);
7000                     else
7001                         FUN0(OP_CONTINUE);
7002
7003         case KEY_chdir:
7004             /* may use HOME */
7005             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7006             UNI(OP_CHDIR);
7007
7008         case KEY_close:
7009             UNI(OP_CLOSE);
7010
7011         case KEY_closedir:
7012             UNI(OP_CLOSEDIR);
7013
7014         case KEY_cmp:
7015             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7016                 return REPORT(0);
7017             Eop(OP_SCMP);
7018
7019         case KEY_caller:
7020             UNI(OP_CALLER);
7021
7022         case KEY_crypt:
7023 #ifdef FCRYPT
7024             if (!PL_cryptseen) {
7025                 PL_cryptseen = TRUE;
7026                 init_des();
7027             }
7028 #endif
7029             LOP(OP_CRYPT,XTERM);
7030
7031         case KEY_chmod:
7032             LOP(OP_CHMOD,XTERM);
7033
7034         case KEY_chown:
7035             LOP(OP_CHOWN,XTERM);
7036
7037         case KEY_connect:
7038             LOP(OP_CONNECT,XTERM);
7039
7040         case KEY_chr:
7041             UNI(OP_CHR);
7042
7043         case KEY_cos:
7044             UNI(OP_COS);
7045
7046         case KEY_chroot:
7047             UNI(OP_CHROOT);
7048
7049         case KEY_default:
7050             PREBLOCK(DEFAULT);
7051
7052         case KEY_do:
7053             s = SKIPSPACE1(s);
7054             if (*s == '{')
7055                 PRETERMBLOCK(DO);
7056             if (*s != '\'') {
7057                 *PL_tokenbuf = '&';
7058                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7059                               1, &len);
7060                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7061                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7062                     d = SKIPSPACE1(d);
7063                     if (*d == '(') {
7064                         force_ident_maybe_lex('&');
7065                         s = d;
7066                     }
7067                 }
7068             }
7069             if (orig_keyword == KEY_do) {
7070                 orig_keyword = 0;
7071                 pl_yylval.ival = 1;
7072             }
7073             else
7074                 pl_yylval.ival = 0;
7075             OPERATOR(DO);
7076
7077         case KEY_die:
7078             PL_hints |= HINT_BLOCK_SCOPE;
7079             LOP(OP_DIE,XTERM);
7080
7081         case KEY_defined:
7082             UNI(OP_DEFINED);
7083
7084         case KEY_delete:
7085             UNI(OP_DELETE);
7086
7087         case KEY_dbmopen:
7088             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7089                               STR_WITH_LEN("NDBM_File::"),
7090                               STR_WITH_LEN("DB_File::"),
7091                               STR_WITH_LEN("GDBM_File::"),
7092                               STR_WITH_LEN("SDBM_File::"),
7093                               STR_WITH_LEN("ODBM_File::"),
7094                               NULL);
7095             LOP(OP_DBMOPEN,XTERM);
7096
7097         case KEY_dbmclose:
7098             UNI(OP_DBMCLOSE);
7099
7100         case KEY_dump:
7101             PL_expect = XOPERATOR;
7102             s = force_word(s,WORD,TRUE,FALSE);
7103             LOOPX(OP_DUMP);
7104
7105         case KEY_else:
7106             PREBLOCK(ELSE);
7107
7108         case KEY_elsif:
7109             pl_yylval.ival = CopLINE(PL_curcop);
7110             OPERATOR(ELSIF);
7111
7112         case KEY_eq:
7113             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7114                 return REPORT(0);
7115             Eop(OP_SEQ);
7116
7117         case KEY_exists:
7118             UNI(OP_EXISTS);
7119
7120         case KEY_exit:
7121             UNI(OP_EXIT);
7122
7123         case KEY_eval:
7124             s = SKIPSPACE1(s);
7125             if (*s == '{') { /* block eval */
7126                 PL_expect = XTERMBLOCK;
7127                 UNIBRACK(OP_ENTERTRY);
7128             }
7129             else { /* string eval */
7130                 PL_expect = XTERM;
7131                 UNIBRACK(OP_ENTEREVAL);
7132             }
7133
7134         case KEY_evalbytes:
7135             PL_expect = XTERM;
7136             UNIBRACK(-OP_ENTEREVAL);
7137
7138         case KEY_eof:
7139             UNI(OP_EOF);
7140
7141         case KEY_exp:
7142             UNI(OP_EXP);
7143
7144         case KEY_each:
7145             UNI(OP_EACH);
7146
7147         case KEY_exec:
7148             LOP(OP_EXEC,XREF);
7149
7150         case KEY_endhostent:
7151             FUN0(OP_EHOSTENT);
7152
7153         case KEY_endnetent:
7154             FUN0(OP_ENETENT);
7155
7156         case KEY_endservent:
7157             FUN0(OP_ESERVENT);
7158
7159         case KEY_endprotoent:
7160             FUN0(OP_EPROTOENT);
7161
7162         case KEY_endpwent:
7163             FUN0(OP_EPWENT);
7164
7165         case KEY_endgrent:
7166             FUN0(OP_EGRENT);
7167
7168         case KEY_for:
7169         case KEY_foreach:
7170             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7171                 return REPORT(0);
7172             pl_yylval.ival = CopLINE(PL_curcop);
7173             s = SKIPSPACE1(s);
7174             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7175                 char *p = s;
7176
7177                 if ((PL_bufend - p) >= 3 &&
7178                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7179                     p += 2;
7180                 else if ((PL_bufend - p) >= 4 &&
7181                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7182                     p += 3;
7183                 p = PEEKSPACE(p);
7184                 /* skip optional package name, as in "for my abc $x (..)" */
7185                 if (isIDFIRST_lazy_if(p,UTF)) {
7186                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7187                     p = PEEKSPACE(p);
7188                 }
7189                 if (*p != '$')
7190                     Perl_croak(aTHX_ "Missing $ on loop variable");
7191             }
7192             OPERATOR(FOR);
7193
7194         case KEY_formline:
7195             LOP(OP_FORMLINE,XTERM);
7196
7197         case KEY_fork:
7198             FUN0(OP_FORK);
7199
7200         case KEY_fc:
7201             UNI(OP_FC);
7202
7203         case KEY_fcntl:
7204             LOP(OP_FCNTL,XTERM);
7205
7206         case KEY_fileno:
7207             UNI(OP_FILENO);
7208
7209         case KEY_flock:
7210             LOP(OP_FLOCK,XTERM);
7211
7212         case KEY_gt:
7213             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7214                 return REPORT(0);
7215             Rop(OP_SGT);
7216
7217         case KEY_ge:
7218             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7219                 return REPORT(0);
7220             Rop(OP_SGE);
7221
7222         case KEY_grep:
7223             LOP(OP_GREPSTART, XREF);
7224
7225         case KEY_goto:
7226             PL_expect = XOPERATOR;
7227             s = force_word(s,WORD,TRUE,FALSE);
7228             LOOPX(OP_GOTO);
7229
7230         case KEY_gmtime:
7231             UNI(OP_GMTIME);
7232
7233         case KEY_getc:
7234             UNIDOR(OP_GETC);
7235
7236         case KEY_getppid:
7237             FUN0(OP_GETPPID);
7238
7239         case KEY_getpgrp:
7240             UNI(OP_GETPGRP);
7241
7242         case KEY_getpriority:
7243             LOP(OP_GETPRIORITY,XTERM);
7244
7245         case KEY_getprotobyname:
7246             UNI(OP_GPBYNAME);
7247
7248         case KEY_getprotobynumber:
7249             LOP(OP_GPBYNUMBER,XTERM);
7250
7251         case KEY_getprotoent:
7252             FUN0(OP_GPROTOENT);
7253
7254         case KEY_getpwent:
7255             FUN0(OP_GPWENT);
7256
7257         case KEY_getpwnam:
7258             UNI(OP_GPWNAM);
7259
7260         case KEY_getpwuid:
7261             UNI(OP_GPWUID);
7262
7263         case KEY_getpeername:
7264             UNI(OP_GETPEERNAME);
7265
7266         case KEY_gethostbyname:
7267             UNI(OP_GHBYNAME);
7268
7269         case KEY_gethostbyaddr:
7270             LOP(OP_GHBYADDR,XTERM);
7271
7272         case KEY_gethostent:
7273             FUN0(OP_GHOSTENT);
7274
7275         case KEY_getnetbyname:
7276             UNI(OP_GNBYNAME);
7277
7278         case KEY_getnetbyaddr:
7279             LOP(OP_GNBYADDR,XTERM);
7280
7281         case KEY_getnetent:
7282             FUN0(OP_GNETENT);
7283
7284         case KEY_getservbyname:
7285             LOP(OP_GSBYNAME,XTERM);
7286
7287         case KEY_getservbyport:
7288             LOP(OP_GSBYPORT,XTERM);
7289
7290         case KEY_getservent:
7291             FUN0(OP_GSERVENT);
7292
7293         case KEY_getsockname:
7294             UNI(OP_GETSOCKNAME);
7295
7296         case KEY_getsockopt:
7297             LOP(OP_GSOCKOPT,XTERM);
7298
7299         case KEY_getgrent:
7300             FUN0(OP_GGRENT);
7301
7302         case KEY_getgrnam:
7303             UNI(OP_GGRNAM);
7304
7305         case KEY_getgrgid:
7306             UNI(OP_GGRGID);
7307
7308         case KEY_getlogin:
7309             FUN0(OP_GETLOGIN);
7310
7311         case KEY_given:
7312             pl_yylval.ival = CopLINE(PL_curcop);
7313             Perl_ck_warner_d(aTHX_
7314                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7315                 "given is experimental");
7316             OPERATOR(GIVEN);
7317
7318         case KEY_glob:
7319             LOP(
7320              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7321              XTERM
7322             );
7323
7324         case KEY_hex:
7325             UNI(OP_HEX);
7326
7327         case KEY_if:
7328             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7329                 return REPORT(0);
7330             pl_yylval.ival = CopLINE(PL_curcop);
7331             OPERATOR(IF);
7332
7333         case KEY_index:
7334             LOP(OP_INDEX,XTERM);
7335
7336         case KEY_int:
7337             UNI(OP_INT);
7338
7339         case KEY_ioctl:
7340             LOP(OP_IOCTL,XTERM);
7341
7342         case KEY_join:
7343             LOP(OP_JOIN,XTERM);
7344
7345         case KEY_keys:
7346             UNI(OP_KEYS);
7347
7348         case KEY_kill:
7349             LOP(OP_KILL,XTERM);
7350
7351         case KEY_last:
7352             PL_expect = XOPERATOR;
7353             s = force_word(s,WORD,TRUE,FALSE);
7354             LOOPX(OP_LAST);
7355
7356         case KEY_lc:
7357             UNI(OP_LC);
7358
7359         case KEY_lcfirst:
7360             UNI(OP_LCFIRST);
7361
7362         case KEY_local:
7363             pl_yylval.ival = 0;
7364             OPERATOR(LOCAL);
7365
7366         case KEY_length:
7367             UNI(OP_LENGTH);
7368
7369         case KEY_lt:
7370             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7371                 return REPORT(0);
7372             Rop(OP_SLT);
7373
7374         case KEY_le:
7375             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7376                 return REPORT(0);
7377             Rop(OP_SLE);
7378
7379         case KEY_localtime:
7380             UNI(OP_LOCALTIME);
7381
7382         case KEY_log:
7383             UNI(OP_LOG);
7384
7385         case KEY_link:
7386             LOP(OP_LINK,XTERM);
7387
7388         case KEY_listen:
7389             LOP(OP_LISTEN,XTERM);
7390
7391         case KEY_lock:
7392             UNI(OP_LOCK);
7393
7394         case KEY_lstat:
7395             UNI(OP_LSTAT);
7396
7397         case KEY_m:
7398             s = scan_pat(s,OP_MATCH);
7399             TERM(sublex_start());
7400
7401         case KEY_map:
7402             LOP(OP_MAPSTART, XREF);
7403
7404         case KEY_mkdir:
7405             LOP(OP_MKDIR,XTERM);
7406
7407         case KEY_msgctl:
7408             LOP(OP_MSGCTL,XTERM);
7409
7410         case KEY_msgget:
7411             LOP(OP_MSGGET,XTERM);
7412
7413         case KEY_msgrcv:
7414             LOP(OP_MSGRCV,XTERM);
7415
7416         case KEY_msgsnd:
7417             LOP(OP_MSGSND,XTERM);
7418
7419         case KEY_our:
7420         case KEY_my:
7421         case KEY_state:
7422             PL_in_my = (U16)tmp;
7423             s = SKIPSPACE1(s);
7424             if (isIDFIRST_lazy_if(s,UTF)) {
7425                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7426                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7427                 {
7428                     if (!FEATURE_LEXSUBS_IS_ENABLED)
7429                         Perl_croak(aTHX_
7430                                   "Experimental \"%s\" subs not enabled",
7431                                    tmp == KEY_my    ? "my"    :
7432                                    tmp == KEY_state ? "state" : "our");
7433                     Perl_ck_warner_d(aTHX_
7434                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7435                         "The lexical_subs feature is experimental");
7436                     goto really_sub;
7437                 }
7438                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7439                 if (!PL_in_my_stash) {
7440                     char tmpbuf[1024];
7441                     PL_bufptr = s;
7442                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7443                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7444                 }
7445             }
7446             pl_yylval.ival = 1;
7447             OPERATOR(MY);
7448
7449         case KEY_next:
7450             PL_expect = XOPERATOR;
7451             s = force_word(s,WORD,TRUE,FALSE);
7452             LOOPX(OP_NEXT);
7453
7454         case KEY_ne:
7455             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7456                 return REPORT(0);
7457             Eop(OP_SNE);
7458
7459         case KEY_no:
7460             s = tokenize_use(0, s);
7461             TERM(USE);
7462
7463         case KEY_not:
7464             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7465                 FUN1(OP_NOT);
7466             else {
7467                 if (!PL_lex_allbrackets &&
7468                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7469                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7470                 OPERATOR(NOTOP);
7471             }
7472
7473         case KEY_open:
7474             s = SKIPSPACE1(s);
7475             if (isIDFIRST_lazy_if(s,UTF)) {
7476           const char *t;
7477           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7478               &len);
7479                 for (t=d; isSPACE(*t);)
7480                     t++;
7481                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7482                     /* [perl #16184] */
7483                     && !(t[0] == '=' && t[1] == '>')
7484                     && !(t[0] == ':' && t[1] == ':')
7485                     && !keyword(s, d-s, 0)
7486                 ) {
7487                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7488                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7489                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7490                 }
7491             }
7492             LOP(OP_OPEN,XTERM);
7493
7494         case KEY_or:
7495             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7496                 return REPORT(0);
7497             pl_yylval.ival = OP_OR;
7498             OPERATOR(OROP);
7499
7500         case KEY_ord:
7501             UNI(OP_ORD);
7502
7503         case KEY_oct:
7504             UNI(OP_OCT);
7505
7506         case KEY_opendir:
7507             LOP(OP_OPEN_DIR,XTERM);
7508
7509         case KEY_print:
7510             checkcomma(s,PL_tokenbuf,"filehandle");
7511             LOP(OP_PRINT,XREF);
7512
7513         case KEY_printf:
7514             checkcomma(s,PL_tokenbuf,"filehandle");
7515             LOP(OP_PRTF,XREF);
7516
7517         case KEY_prototype:
7518             UNI(OP_PROTOTYPE);
7519
7520         case KEY_push:
7521             LOP(OP_PUSH,XTERM);
7522
7523         case KEY_pop:
7524             UNIDOR(OP_POP);
7525
7526         case KEY_pos:
7527             UNIDOR(OP_POS);
7528
7529         case KEY_pack:
7530             LOP(OP_PACK,XTERM);
7531
7532         case KEY_package:
7533             s = force_word(s,WORD,FALSE,TRUE);
7534             s = SKIPSPACE1(s);
7535             s = force_strict_version(s);
7536             PL_lex_expect = XBLOCK;
7537             OPERATOR(PACKAGE);
7538
7539         case KEY_pipe:
7540             LOP(OP_PIPE_OP,XTERM);
7541
7542         case KEY_q:
7543             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7544             if (!s)
7545                 missingterm(NULL);
7546             COPLINE_SET_FROM_MULTI_END;
7547             pl_yylval.ival = OP_CONST;
7548             TERM(sublex_start());
7549
7550         case KEY_quotemeta:
7551             UNI(OP_QUOTEMETA);
7552
7553         case KEY_qw: {
7554             OP *words = NULL;
7555             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7556             if (!s)
7557                 missingterm(NULL);
7558             COPLINE_SET_FROM_MULTI_END;
7559             PL_expect = XOPERATOR;
7560             if (SvCUR(PL_lex_stuff)) {
7561                 int warned_comma = !ckWARN(WARN_QW);
7562                 int warned_comment = warned_comma;
7563                 d = SvPV_force(PL_lex_stuff, len);
7564                 while (len) {
7565                     for (; isSPACE(*d) && len; --len, ++d)
7566                         /**/;
7567                     if (len) {
7568                         SV *sv;
7569                         const char *b = d;
7570                         if (!warned_comma || !warned_comment) {
7571                             for (; !isSPACE(*d) && len; --len, ++d) {
7572                                 if (!warned_comma && *d == ',') {
7573                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7574                                         "Possible attempt to separate words with commas");
7575                                     ++warned_comma;
7576                                 }
7577                                 else if (!warned_comment && *d == '#') {
7578                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7579                                         "Possible attempt to put comments in qw() list");
7580                                     ++warned_comment;
7581                                 }
7582                             }
7583                         }
7584                         else {
7585                             for (; !isSPACE(*d) && len; --len, ++d)
7586                                 /**/;
7587                         }
7588                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7589                         words = op_append_elem(OP_LIST, words,
7590                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7591                     }
7592                 }
7593             }
7594             if (!words)
7595                 words = newNULLLIST();
7596             if (PL_lex_stuff) {
7597                 SvREFCNT_dec(PL_lex_stuff);
7598                 PL_lex_stuff = NULL;
7599             }
7600             PL_expect = XOPERATOR;
7601             pl_yylval.opval = sawparens(words);
7602             TOKEN(QWLIST);
7603         }
7604
7605         case KEY_qq:
7606             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7607             if (!s)
7608                 missingterm(NULL);
7609             pl_yylval.ival = OP_STRINGIFY;
7610             if (SvIVX(PL_lex_stuff) == '\'')
7611                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7612             TERM(sublex_start());
7613
7614         case KEY_qr:
7615             s = scan_pat(s,OP_QR);
7616             TERM(sublex_start());
7617
7618         case KEY_qx:
7619             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7620             if (!s)
7621                 missingterm(NULL);
7622             pl_yylval.ival = OP_BACKTICK;
7623             TERM(sublex_start());
7624
7625         case KEY_return:
7626             OLDLOP(OP_RETURN);
7627
7628         case KEY_require:
7629             s = SKIPSPACE1(s);
7630             PL_expect = XOPERATOR;
7631             if (isDIGIT(*s)) {
7632                 s = force_version(s, FALSE);
7633             }
7634             else if (*s != 'v' || !isDIGIT(s[1])
7635                     || (s = force_version(s, TRUE), *s == 'v'))
7636             {
7637                 *PL_tokenbuf = '\0';
7638                 s = force_word(s,WORD,TRUE,TRUE);
7639                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7640                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7641                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7642                 else if (*s == '<')
7643                     yyerror("<> at require-statement should be quotes");
7644             }
7645             if (orig_keyword == KEY_require) {
7646                 orig_keyword = 0;
7647                 pl_yylval.ival = 1;
7648             }
7649             else
7650                 pl_yylval.ival = 0;
7651             PL_expect = XTERM;
7652             PL_bufptr = s;
7653             PL_last_uni = PL_oldbufptr;
7654             PL_last_lop_op = OP_REQUIRE;
7655             s = skipspace(s);
7656             return REPORT( (int)REQUIRE );
7657
7658         case KEY_reset:
7659             UNI(OP_RESET);
7660
7661         case KEY_redo:
7662             PL_expect = XOPERATOR;
7663             s = force_word(s,WORD,TRUE,FALSE);
7664             LOOPX(OP_REDO);
7665
7666         case KEY_rename:
7667             LOP(OP_RENAME,XTERM);
7668
7669         case KEY_rand:
7670             UNI(OP_RAND);
7671
7672         case KEY_rmdir:
7673             UNI(OP_RMDIR);
7674
7675         case KEY_rindex:
7676             LOP(OP_RINDEX,XTERM);
7677
7678         case KEY_read:
7679             LOP(OP_READ,XTERM);
7680
7681         case KEY_readdir:
7682             UNI(OP_READDIR);
7683
7684         case KEY_readline:
7685             UNIDOR(OP_READLINE);
7686
7687         case KEY_readpipe:
7688             UNIDOR(OP_BACKTICK);
7689
7690         case KEY_rewinddir:
7691             UNI(OP_REWINDDIR);
7692
7693         case KEY_recv:
7694             LOP(OP_RECV,XTERM);
7695
7696         case KEY_reverse:
7697             LOP(OP_REVERSE,XTERM);
7698
7699         case KEY_readlink:
7700             UNIDOR(OP_READLINK);
7701
7702         case KEY_ref:
7703             UNI(OP_REF);
7704
7705         case KEY_s:
7706             s = scan_subst(s);
7707             if (pl_yylval.opval)
7708                 TERM(sublex_start());
7709             else
7710                 TOKEN(1);       /* force error */
7711
7712         case KEY_say:
7713             checkcomma(s,PL_tokenbuf,"filehandle");
7714             LOP(OP_SAY,XREF);
7715
7716         case KEY_chomp:
7717             UNI(OP_CHOMP);
7718
7719         case KEY_scalar:
7720             UNI(OP_SCALAR);
7721
7722         case KEY_select:
7723             LOP(OP_SELECT,XTERM);
7724
7725         case KEY_seek:
7726             LOP(OP_SEEK,XTERM);
7727
7728         case KEY_semctl:
7729             LOP(OP_SEMCTL,XTERM);
7730
7731         case KEY_semget:
7732             LOP(OP_SEMGET,XTERM);
7733
7734         case KEY_semop:
7735             LOP(OP_SEMOP,XTERM);
7736
7737         case KEY_send:
7738             LOP(OP_SEND,XTERM);
7739
7740         case KEY_setpgrp:
7741             LOP(OP_SETPGRP,XTERM);
7742
7743         case KEY_setpriority:
7744             LOP(OP_SETPRIORITY,XTERM);
7745
7746         case KEY_sethostent:
7747             UNI(OP_SHOSTENT);
7748
7749         case KEY_setnetent:
7750             UNI(OP_SNETENT);
7751
7752         case KEY_setservent:
7753             UNI(OP_SSERVENT);
7754
7755         case KEY_setprotoent:
7756             UNI(OP_SPROTOENT);
7757
7758         case KEY_setpwent:
7759             FUN0(OP_SPWENT);
7760
7761         case KEY_setgrent:
7762             FUN0(OP_SGRENT);
7763
7764         case KEY_seekdir:
7765             LOP(OP_SEEKDIR,XTERM);
7766
7767         case KEY_setsockopt:
7768             LOP(OP_SSOCKOPT,XTERM);
7769
7770         case KEY_shift:
7771             UNIDOR(OP_SHIFT);
7772
7773         case KEY_shmctl:
7774             LOP(OP_SHMCTL,XTERM);
7775
7776         case KEY_shmget:
7777             LOP(OP_SHMGET,XTERM);
7778
7779         case KEY_shmread:
7780             LOP(OP_SHMREAD,XTERM);
7781
7782         case KEY_shmwrite:
7783             LOP(OP_SHMWRITE,XTERM);
7784
7785         case KEY_shutdown:
7786             LOP(OP_SHUTDOWN,XTERM);
7787
7788         case KEY_sin:
7789             UNI(OP_SIN);
7790
7791         case KEY_sleep:
7792             UNI(OP_SLEEP);
7793
7794         case KEY_socket:
7795             LOP(OP_SOCKET,XTERM);
7796
7797         case KEY_socketpair:
7798             LOP(OP_SOCKPAIR,XTERM);
7799
7800         case KEY_sort:
7801             checkcomma(s,PL_tokenbuf,"subroutine name");
7802             s = SKIPSPACE1(s);
7803             PL_expect = XTERM;
7804             s = force_word(s,WORD,TRUE,TRUE);
7805             LOP(OP_SORT,XREF);
7806
7807         case KEY_split:
7808             LOP(OP_SPLIT,XTERM);
7809
7810         case KEY_sprintf:
7811             LOP(OP_SPRINTF,XTERM);
7812
7813         case KEY_splice:
7814             LOP(OP_SPLICE,XTERM);
7815
7816         case KEY_sqrt:
7817             UNI(OP_SQRT);
7818
7819         case KEY_srand:
7820             UNI(OP_SRAND);
7821
7822         case KEY_stat:
7823             UNI(OP_STAT);
7824
7825         case KEY_study:
7826             UNI(OP_STUDY);
7827
7828         case KEY_substr:
7829             LOP(OP_SUBSTR,XTERM);
7830
7831         case KEY_format:
7832         case KEY_sub:
7833           really_sub:
7834             {
7835                 char * const tmpbuf = PL_tokenbuf + 1;
7836                 expectation attrful;
7837                 bool have_name, have_proto;
7838                 const int key = tmp;
7839                 SV *format_name = NULL;
7840
7841                 d = s;
7842                 s = skipspace(s);
7843
7844                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7845                     (*s == ':' && s[1] == ':'))
7846                 {
7847
7848                     PL_expect = XBLOCK;
7849                     attrful = XATTRBLOCK;
7850                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7851                                   &len);
7852                     if (key == KEY_format)
7853                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7854                     *PL_tokenbuf = '&';
7855                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
7856                      || pad_findmy_pvn(
7857                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
7858                         ) != NOT_IN_PAD)
7859                         sv_setpvn(PL_subname, tmpbuf, len);
7860                     else {
7861                         sv_setsv(PL_subname,PL_curstname);
7862                         sv_catpvs(PL_subname,"::");
7863                         sv_catpvn(PL_subname,tmpbuf,len);
7864                     }
7865                     if (SvUTF8(PL_linestr))
7866                         SvUTF8_on(PL_subname);
7867                     have_name = TRUE;
7868
7869
7870                     s = skipspace(d);
7871                 }
7872                 else {
7873                     if (key == KEY_my || key == KEY_our || key==KEY_state)
7874                     {
7875                         *d = '\0';
7876                         /* diag_listed_as: Missing name in "%s sub" */
7877                         Perl_croak(aTHX_
7878                                   "Missing name in \"%s\"", PL_bufptr);
7879                     }
7880                     PL_expect = XTERMBLOCK;
7881                     attrful = XATTRTERM;
7882                     sv_setpvs(PL_subname,"?");
7883                     have_name = FALSE;
7884                 }
7885
7886                 if (key == KEY_format) {
7887                     if (format_name) {
7888                         NEXTVAL_NEXTTOKE.opval
7889                             = (OP*)newSVOP(OP_CONST,0, format_name);
7890                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7891                         force_next(WORD);
7892                     }
7893                     PREBLOCK(FORMAT);
7894                 }
7895
7896                 /* Look for a prototype */
7897                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
7898                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7899                     COPLINE_SET_FROM_MULTI_END;
7900                     if (!s)
7901                         Perl_croak(aTHX_ "Prototype not terminated");
7902                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
7903                     have_proto = TRUE;
7904
7905                     s = skipspace(s);
7906                 }
7907                 else
7908                     have_proto = FALSE;
7909
7910                 if (*s == ':' && s[1] != ':')
7911                     PL_expect = attrful;
7912                 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
7913                     if (!have_name)
7914                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7915                     else if (*s != ';' && *s != '}')
7916                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7917                 }
7918
7919                 if (have_proto) {
7920                     NEXTVAL_NEXTTOKE.opval =
7921                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7922                     PL_lex_stuff = NULL;
7923                     force_next(THING);
7924                 }
7925                 if (!have_name) {
7926                     if (PL_curstash)
7927                         sv_setpvs(PL_subname, "__ANON__");
7928                     else
7929                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7930                     TOKEN(ANONSUB);
7931                 }
7932                 force_ident_maybe_lex('&');
7933                 TOKEN(SUB);
7934             }
7935
7936         case KEY_system:
7937             LOP(OP_SYSTEM,XREF);
7938
7939         case KEY_symlink:
7940             LOP(OP_SYMLINK,XTERM);
7941
7942         case KEY_syscall:
7943             LOP(OP_SYSCALL,XTERM);
7944
7945         case KEY_sysopen:
7946             LOP(OP_SYSOPEN,XTERM);
7947
7948         case KEY_sysseek:
7949             LOP(OP_SYSSEEK,XTERM);
7950
7951         case KEY_sysread:
7952             LOP(OP_SYSREAD,XTERM);
7953
7954         case KEY_syswrite:
7955             LOP(OP_SYSWRITE,XTERM);
7956
7957         case KEY_tr:
7958         case KEY_y:
7959             s = scan_trans(s);
7960             TERM(sublex_start());
7961
7962         case KEY_tell:
7963             UNI(OP_TELL);
7964
7965         case KEY_telldir:
7966             UNI(OP_TELLDIR);
7967
7968         case KEY_tie:
7969             LOP(OP_TIE,XTERM);
7970
7971         case KEY_tied:
7972             UNI(OP_TIED);
7973
7974         case KEY_time:
7975             FUN0(OP_TIME);
7976
7977         case KEY_times:
7978             FUN0(OP_TMS);
7979
7980         case KEY_truncate:
7981             LOP(OP_TRUNCATE,XTERM);
7982
7983         case KEY_uc:
7984             UNI(OP_UC);
7985
7986         case KEY_ucfirst:
7987             UNI(OP_UCFIRST);
7988
7989         case KEY_untie:
7990             UNI(OP_UNTIE);
7991
7992         case KEY_until:
7993             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7994                 return REPORT(0);
7995             pl_yylval.ival = CopLINE(PL_curcop);
7996             OPERATOR(UNTIL);
7997
7998         case KEY_unless:
7999             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8000                 return REPORT(0);
8001             pl_yylval.ival = CopLINE(PL_curcop);
8002             OPERATOR(UNLESS);
8003
8004         case KEY_unlink:
8005             LOP(OP_UNLINK,XTERM);
8006
8007         case KEY_undef:
8008             UNIDOR(OP_UNDEF);
8009
8010         case KEY_unpack:
8011             LOP(OP_UNPACK,XTERM);
8012
8013         case KEY_utime:
8014             LOP(OP_UTIME,XTERM);
8015
8016         case KEY_umask:
8017             UNIDOR(OP_UMASK);
8018
8019         case KEY_unshift:
8020             LOP(OP_UNSHIFT,XTERM);
8021
8022         case KEY_use:
8023             s = tokenize_use(1, s);
8024             OPERATOR(USE);
8025
8026         case KEY_values:
8027             UNI(OP_VALUES);
8028
8029         case KEY_vec:
8030             LOP(OP_VEC,XTERM);
8031
8032         case KEY_when:
8033             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8034                 return REPORT(0);
8035             pl_yylval.ival = CopLINE(PL_curcop);
8036             Perl_ck_warner_d(aTHX_
8037                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8038                 "when is experimental");
8039             OPERATOR(WHEN);
8040
8041         case KEY_while:
8042             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8043                 return REPORT(0);
8044             pl_yylval.ival = CopLINE(PL_curcop);
8045             OPERATOR(WHILE);
8046
8047         case KEY_warn:
8048             PL_hints |= HINT_BLOCK_SCOPE;
8049             LOP(OP_WARN,XTERM);
8050
8051         case KEY_wait:
8052             FUN0(OP_WAIT);
8053
8054         case KEY_waitpid:
8055             LOP(OP_WAITPID,XTERM);
8056
8057         case KEY_wantarray:
8058             FUN0(OP_WANTARRAY);
8059
8060         case KEY_write:
8061             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8062              * we use the same number on EBCDIC */
8063             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8064             UNI(OP_ENTERWRITE);
8065
8066         case KEY_x:
8067             if (PL_expect == XOPERATOR) {
8068                 if (*s == '=' && !PL_lex_allbrackets &&
8069                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8070                     return REPORT(0);
8071                 Mop(OP_REPEAT);
8072             }
8073             check_uni();
8074             goto just_a_word;
8075
8076         case KEY_xor:
8077             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8078                 return REPORT(0);
8079             pl_yylval.ival = OP_XOR;
8080             OPERATOR(OROP);
8081         }
8082     }}
8083 }
8084
8085 /*
8086   S_pending_ident
8087
8088   Looks up an identifier in the pad or in a package
8089
8090   Returns:
8091     PRIVATEREF if this is a lexical name.
8092     WORD       if this belongs to a package.
8093
8094   Structure:
8095       if we're in a my declaration
8096           croak if they tried to say my($foo::bar)
8097           build the ops for a my() declaration
8098       if it's an access to a my() variable
8099           build ops for access to a my() variable
8100       if in a dq string, and they've said @foo and we can't find @foo
8101           warn
8102       build ops for a bareword
8103 */
8104
8105 static int
8106 S_pending_ident(pTHX)
8107 {
8108     dVAR;
8109     PADOFFSET tmp = 0;
8110     const char pit = (char)pl_yylval.ival;
8111     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8112     /* All routes through this function want to know if there is a colon.  */
8113     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8114
8115     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8116           "### Pending identifier '%s'\n", PL_tokenbuf); });
8117
8118     /* if we're in a my(), we can't allow dynamics here.
8119        $foo'bar has already been turned into $foo::bar, so
8120        just check for colons.
8121
8122        if it's a legal name, the OP is a PADANY.
8123     */
8124     if (PL_in_my) {
8125         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8126             if (has_colon)
8127                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8128                                   "variable %s in \"our\"",
8129                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8130             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8131         }
8132         else {
8133             if (has_colon) {
8134                 /* PL_no_myglob is constant */
8135                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8136                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8137                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8138                             UTF ? SVf_UTF8 : 0);
8139                 GCC_DIAG_RESTORE;
8140             }
8141
8142             pl_yylval.opval = newOP(OP_PADANY, 0);
8143             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8144                                                         UTF ? SVf_UTF8 : 0);
8145             return PRIVATEREF;
8146         }
8147     }
8148
8149     /*
8150        build the ops for accesses to a my() variable.
8151     */
8152
8153     if (!has_colon) {
8154         if (!PL_in_my)
8155             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8156                                     UTF ? SVf_UTF8 : 0);
8157         if (tmp != NOT_IN_PAD) {
8158             /* might be an "our" variable" */
8159             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8160                 /* build ops for a bareword */
8161                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8162                 HEK * const stashname = HvNAME_HEK(stash);
8163                 SV *  const sym = newSVhek(stashname);
8164                 sv_catpvs(sym, "::");
8165                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8166                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8167                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8168                 if (pit != '&')
8169                   gv_fetchsv(sym,
8170                     (PL_in_eval
8171                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8172                         : GV_ADDMULTI
8173                     ),
8174                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8175                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8176                      : SVt_PVHV));
8177                 return WORD;
8178             }
8179
8180             pl_yylval.opval = newOP(OP_PADANY, 0);
8181             pl_yylval.opval->op_targ = tmp;
8182             return PRIVATEREF;
8183         }
8184     }
8185
8186     /*
8187        Whine if they've said @foo in a doublequoted string,
8188        and @foo isn't a variable we can find in the symbol
8189        table.
8190     */
8191     if (ckWARN(WARN_AMBIGUOUS) &&
8192         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8193         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8194                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8195         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8196                 /* DO NOT warn for @- and @+ */
8197                 && !( PL_tokenbuf[2] == '\0' &&
8198                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8199            )
8200         {
8201             /* Downgraded from fatal to warning 20000522 mjd */
8202             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8203                         "Possible unintended interpolation of %"UTF8f
8204                         " in string",
8205                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8206         }
8207     }
8208
8209     /* build ops for a bareword */
8210     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8211                                    newSVpvn_flags(PL_tokenbuf + 1,
8212                                                       tokenbuf_len - 1,
8213                                                       UTF ? SVf_UTF8 : 0 ));
8214     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8215     if (pit != '&')
8216         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8217                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8218                      | ( UTF ? SVf_UTF8 : 0 ),
8219                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8220                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8221                       : SVt_PVHV));
8222     return WORD;
8223 }
8224
8225 STATIC void
8226 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8227 {
8228     dVAR;
8229
8230     PERL_ARGS_ASSERT_CHECKCOMMA;
8231
8232     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8233         if (ckWARN(WARN_SYNTAX)) {
8234             int level = 1;
8235             const char *w;
8236             for (w = s+2; *w && level; w++) {
8237                 if (*w == '(')
8238                     ++level;
8239                 else if (*w == ')')
8240                     --level;
8241             }
8242             while (isSPACE(*w))
8243                 ++w;
8244             /* the list of chars below is for end of statements or
8245              * block / parens, boolean operators (&&, ||, //) and branch
8246              * constructs (or, and, if, until, unless, while, err, for).
8247              * Not a very solid hack... */
8248             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8249                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8250                             "%s (...) interpreted as function",name);
8251         }
8252     }
8253     while (s < PL_bufend && isSPACE(*s))
8254         s++;
8255     if (*s == '(')
8256         s++;
8257     while (s < PL_bufend && isSPACE(*s))
8258         s++;
8259     if (isIDFIRST_lazy_if(s,UTF)) {
8260         const char * const w = s;
8261         s += UTF ? UTF8SKIP(s) : 1;
8262         while (isWORDCHAR_lazy_if(s,UTF))
8263             s += UTF ? UTF8SKIP(s) : 1;
8264         while (s < PL_bufend && isSPACE(*s))
8265             s++;
8266         if (*s == ',') {
8267             GV* gv;
8268             if (keyword(w, s - w, 0))
8269                 return;
8270
8271             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8272             if (gv && GvCVu(gv))
8273                 return;
8274             Perl_croak(aTHX_ "No comma allowed after %s", what);
8275         }
8276     }
8277 }
8278
8279 /* S_new_constant(): do any overload::constant lookup.
8280
8281    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8282    Best used as sv=new_constant(..., sv, ...).
8283    If s, pv are NULL, calls subroutine with one argument,
8284    and <type> is used with error messages only.
8285    <type> is assumed to be well formed UTF-8 */
8286
8287 STATIC SV *
8288 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8289                SV *sv, SV *pv, const char *type, STRLEN typelen)
8290 {
8291     dVAR; dSP;
8292     HV * table = GvHV(PL_hintgv);                /* ^H */
8293     SV *res;
8294     SV *errsv = NULL;
8295     SV **cvp;
8296     SV *cv, *typesv;
8297     const char *why1 = "", *why2 = "", *why3 = "";
8298
8299     PERL_ARGS_ASSERT_NEW_CONSTANT;
8300     /* We assume that this is true: */
8301     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8302     assert(type || s);
8303
8304     /* charnames doesn't work well if there have been errors found */
8305     if (PL_error_count > 0 && *key == 'c')
8306     {
8307         SvREFCNT_dec_NN(sv);
8308         return &PL_sv_undef;
8309     }
8310
8311     sv_2mortal(sv);                     /* Parent created it permanently */
8312     if (!table
8313         || ! (PL_hints & HINT_LOCALIZE_HH)
8314         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8315         || ! SvOK(*cvp))
8316     {
8317         char *msg;
8318
8319         /* Here haven't found what we're looking for.  If it is charnames,
8320          * perhaps it needs to be loaded.  Try doing that before giving up */
8321         if (*key == 'c') {
8322             Perl_load_module(aTHX_
8323                             0,
8324                             newSVpvs("_charnames"),
8325                              /* version parameter; no need to specify it, as if
8326                               * we get too early a version, will fail anyway,
8327                               * not being able to find '_charnames' */
8328                             NULL,
8329                             newSVpvs(":full"),
8330                             newSVpvs(":short"),
8331                             NULL);
8332             assert(sp == PL_stack_sp);
8333             table = GvHV(PL_hintgv);
8334             if (table
8335                 && (PL_hints & HINT_LOCALIZE_HH)
8336                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8337                 && SvOK(*cvp))
8338             {
8339                 goto now_ok;
8340             }
8341         }
8342         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8343             msg = Perl_form(aTHX_
8344                                "Constant(%.*s) unknown",
8345                                 (int)(type ? typelen : len),
8346                                 (type ? type: s));
8347         }
8348         else {
8349             why1 = "$^H{";
8350             why2 = key;
8351             why3 = "} is not defined";
8352         report:
8353             if (*key == 'c') {
8354                 msg = Perl_form(aTHX_
8355                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8356                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8357                       );
8358             }
8359             else {
8360                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8361                                     (int)(type ? typelen : len),
8362                                     (type ? type: s), why1, why2, why3);
8363             }
8364         }
8365         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8366         return SvREFCNT_inc_simple_NN(sv);
8367     }
8368 now_ok:
8369     cv = *cvp;
8370     if (!pv && s)
8371         pv = newSVpvn_flags(s, len, SVs_TEMP);
8372     if (type && pv)
8373         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8374     else
8375         typesv = &PL_sv_undef;
8376
8377     PUSHSTACKi(PERLSI_OVERLOAD);
8378     ENTER ;
8379     SAVETMPS;
8380
8381     PUSHMARK(SP) ;
8382     EXTEND(sp, 3);
8383     if (pv)
8384         PUSHs(pv);
8385     PUSHs(sv);
8386     if (pv)
8387         PUSHs(typesv);
8388     PUTBACK;
8389     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8390
8391     SPAGAIN ;
8392
8393     /* Check the eval first */
8394     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8395         STRLEN errlen;
8396         const char * errstr;
8397         sv_catpvs(errsv, "Propagated");
8398         errstr = SvPV_const(errsv, errlen);
8399         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8400         (void)POPs;
8401         res = SvREFCNT_inc_simple_NN(sv);
8402     }
8403     else {
8404         res = POPs;
8405         SvREFCNT_inc_simple_void_NN(res);
8406     }
8407
8408     PUTBACK ;
8409     FREETMPS ;
8410     LEAVE ;
8411     POPSTACK;
8412
8413     if (!SvOK(res)) {
8414         why1 = "Call to &{$^H{";
8415         why2 = key;
8416         why3 = "}} did not return a defined value";
8417         sv = res;
8418         (void)sv_2mortal(sv);
8419         goto report;
8420     }
8421
8422     return res;
8423 }
8424
8425 PERL_STATIC_INLINE void
8426 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8427     dVAR;
8428     PERL_ARGS_ASSERT_PARSE_IDENT;
8429
8430     for (;;) {
8431         if (*d >= e)
8432             Perl_croak(aTHX_ "%s", ident_too_long);
8433         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8434              /* The UTF-8 case must come first, otherwise things
8435              * like c\N{COMBINING TILDE} would start failing, as the
8436              * isWORDCHAR_A case below would gobble the 'c' up.
8437              */
8438
8439             char *t = *s + UTF8SKIP(*s);
8440             while (isIDCONT_utf8((U8*)t))
8441                 t += UTF8SKIP(t);
8442             if (*d + (t - *s) > e)
8443                 Perl_croak(aTHX_ "%s", ident_too_long);
8444             Copy(*s, *d, t - *s, char);
8445             *d += t - *s;
8446             *s = t;
8447         }
8448         else if ( isWORDCHAR_A(**s) ) {
8449             do {
8450                 *(*d)++ = *(*s)++;
8451             } while (isWORDCHAR_A(**s) && *d < e);
8452         }
8453         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8454             *(*d)++ = ':';
8455             *(*d)++ = ':';
8456             (*s)++;
8457         }
8458         else if (allow_package && **s == ':' && (*s)[1] == ':'
8459            /* Disallow things like Foo::$bar. For the curious, this is
8460             * the code path that triggers the "Bad name after" warning
8461             * when looking for barewords.
8462             */
8463            && (*s)[2] != '$') {
8464             *(*d)++ = *(*s)++;
8465             *(*d)++ = *(*s)++;
8466         }
8467         else
8468             break;
8469     }
8470     return;
8471 }
8472
8473 /* Returns a NUL terminated string, with the length of the string written to
8474    *slp
8475    */
8476 STATIC char *
8477 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8478 {
8479     dVAR;
8480     char *d = dest;
8481     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8482     bool is_utf8 = cBOOL(UTF);
8483
8484     PERL_ARGS_ASSERT_SCAN_WORD;
8485
8486     parse_ident(&s, &d, e, allow_package, is_utf8);
8487     *d = '\0';
8488     *slp = d - dest;
8489     return s;
8490 }
8491
8492 STATIC char *
8493 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8494 {
8495     dVAR;
8496     I32 herelines = PL_parser->herelines;
8497     SSize_t bracket = -1;
8498     char funny = *s++;
8499     char *d = dest;
8500     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8501     bool is_utf8 = cBOOL(UTF);
8502     I32 orig_copline = 0, tmp_copline = 0;
8503
8504     PERL_ARGS_ASSERT_SCAN_IDENT;
8505
8506     if (isSPACE(*s))
8507         s = PEEKSPACE(s);
8508     if (isDIGIT(*s)) {
8509         while (isDIGIT(*s)) {
8510             if (d >= e)
8511                 Perl_croak(aTHX_ "%s", ident_too_long);
8512             *d++ = *s++;
8513         }
8514     }
8515     else {
8516         parse_ident(&s, &d, e, 1, is_utf8);
8517     }
8518     *d = '\0';
8519     d = dest;
8520     if (*d) {
8521         /* Either a digit variable, or parse_ident() found an identifier
8522            (anything valid as a bareword), so job done and return.  */
8523         if (PL_lex_state != LEX_NORMAL)
8524             PL_lex_state = LEX_INTERPENDMAYBE;
8525         return s;
8526     }
8527     if (*s == '$' && s[1] &&
8528       (isIDFIRST_lazy_if(s+1,is_utf8)
8529          || isDIGIT_A((U8)s[1])
8530          || s[1] == '$'
8531          || s[1] == '{'
8532          || strnEQ(s+1,"::",2)) )
8533     {
8534         /* Dereferencing a value in a scalar variable.
8535            The alternatives are different syntaxes for a scalar variable.
8536            Using ' as a leading package separator isn't allowed. :: is.   */
8537         return s;
8538     }
8539     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8540     if (*s == '{') {
8541         bracket = s - SvPVX(PL_linestr);
8542         s++;
8543         orig_copline = CopLINE(PL_curcop);
8544         if (s < PL_bufend && isSPACE(*s)) {
8545             s = PEEKSPACE(s);
8546         }
8547     }
8548
8549 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8550  * iff Unicode semantics are to be used.  The legal ones are any of:
8551  *  a) ASCII digits
8552  *  b) ASCII punctuation
8553  *  c) When not under Unicode rules, any upper Latin1 character
8554  *  d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
8555  *     been matched by \s on ASCII platforms.  That is: \c?, plus 1-32, minus
8556  *     the \s ones. */
8557 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d))                       \
8558                                    || isDIGIT_A((U8)(d))                    \
8559                                    || (!(u) && !isASCII((U8)(d)))           \
8560                                    || ((((U8)(d)) < 32)                     \
8561                                        && (((((U8)(d)) >= 14)               \
8562                                            || (((U8)(d)) <= 8 && (d) != 0) \
8563                                            || (((U8)(d)) == 13))))          \
8564                                    || (((U8)(d)) == toCTRL('?')))
8565     if (s < PL_bufend
8566         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
8567     {
8568         if ( isCNTRL_A((U8)*s) ) {
8569             deprecate("literal control characters in variable names");
8570         }
8571
8572         if (is_utf8) {
8573             const STRLEN skip = UTF8SKIP(s);
8574             STRLEN i;
8575             d[skip] = '\0';
8576             for ( i = 0; i < skip; i++ )
8577                 d[i] = *s++;
8578         }
8579         else {
8580             *d = *s++;
8581             d[1] = '\0';
8582         }
8583     }
8584     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8585     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8586         *d = toCTRL(*s);
8587         s++;
8588     }
8589     /* Warn about ambiguous code after unary operators if {...} notation isn't
8590        used.  There's no difference in ambiguity; it's merely a heuristic
8591        about when not to warn.  */
8592     else if (ck_uni && bracket == -1)
8593         check_uni();
8594     if (bracket != -1) {
8595         /* If we were processing {...} notation then...  */
8596         if (isIDFIRST_lazy_if(d,is_utf8)) {
8597             /* if it starts as a valid identifier, assume that it is one.
8598                (the later check for } being at the expected point will trap
8599                cases where this doesn't pan out.)  */
8600         d += is_utf8 ? UTF8SKIP(d) : 1;
8601         parse_ident(&s, &d, e, 1, is_utf8);
8602             *d = '\0';
8603             tmp_copline = CopLINE(PL_curcop);
8604             if (s < PL_bufend && isSPACE(*s)) {
8605                 s = PEEKSPACE(s);
8606             }
8607             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8608                 /* ${foo[0]} and ${foo{bar}} notation.  */
8609                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8610                     const char * const brack =
8611                         (const char *)
8612                         ((*s == '[') ? "[...]" : "{...}");
8613                     orig_copline = CopLINE(PL_curcop);
8614                     CopLINE_set(PL_curcop, tmp_copline);
8615    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8616                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8617                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8618                         funny, dest, brack, funny, dest, brack);
8619                     CopLINE_set(PL_curcop, orig_copline);
8620                 }
8621                 bracket++;
8622                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8623                 PL_lex_allbrackets++;
8624                 return s;
8625             }
8626         }
8627         /* Handle extended ${^Foo} variables
8628          * 1999-02-27 mjd-perl-patch@plover.com */
8629         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8630                  && isWORDCHAR(*s))
8631         {
8632             d++;
8633             while (isWORDCHAR(*s) && d < e) {
8634                 *d++ = *s++;
8635             }
8636             if (d >= e)
8637                 Perl_croak(aTHX_ "%s", ident_too_long);
8638             *d = '\0';
8639         }
8640
8641         if ( !tmp_copline )
8642             tmp_copline = CopLINE(PL_curcop);
8643         if (s < PL_bufend && isSPACE(*s)) {
8644             s = PEEKSPACE(s);
8645         }
8646
8647         /* Expect to find a closing } after consuming any trailing whitespace.
8648          */
8649         if (*s == '}') {
8650             s++;
8651             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8652                 PL_lex_state = LEX_INTERPEND;
8653                 PL_expect = XREF;
8654             }
8655             if (PL_lex_state == LEX_NORMAL) {
8656                 if (ckWARN(WARN_AMBIGUOUS) &&
8657                     (keyword(dest, d - dest, 0)
8658                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8659                 {
8660                     SV *tmp = newSVpvn_flags( dest, d - dest,
8661                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8662                     if (funny == '#')
8663                         funny = '@';
8664                     orig_copline = CopLINE(PL_curcop);
8665                     CopLINE_set(PL_curcop, tmp_copline);
8666                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8667                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8668                         funny, SVfARG(tmp), funny, SVfARG(tmp));
8669                     CopLINE_set(PL_curcop, orig_copline);
8670                 }
8671             }
8672         }
8673         else {
8674             /* Didn't find the closing } at the point we expected, so restore
8675                state such that the next thing to process is the opening { and */
8676             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8677             CopLINE_set(PL_curcop, orig_copline);
8678             PL_parser->herelines = herelines;
8679             *dest = '\0';
8680         }
8681     }
8682     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8683         PL_lex_state = LEX_INTERPEND;
8684     return s;
8685 }
8686
8687 static bool
8688 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8689
8690     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8691      * the parse starting at 's', based on the subset that are valid in this
8692      * context input to this routine in 'valid_flags'. Advances s.  Returns
8693      * TRUE if the input should be treated as a valid flag, so the next char
8694      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
8695      * first call on the current regex.  This routine will set it to any
8696      * charset modifier found.  The caller shouldn't change it.  This way,
8697      * another charset modifier encountered in the parse can be detected as an
8698      * error, as we have decided to allow only one */
8699
8700     const char c = **s;
8701     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8702
8703     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8704         if (isWORDCHAR_lazy_if(*s, UTF)) {
8705             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8706                        UTF ? SVf_UTF8 : 0);
8707             (*s) += charlen;
8708             /* Pretend that it worked, so will continue processing before
8709              * dieing */
8710             return TRUE;
8711         }
8712         return FALSE;
8713     }
8714
8715     switch (c) {
8716
8717         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8718         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8719         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8720         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8721         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8722         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8723         case LOCALE_PAT_MOD:
8724             if (*charset) {
8725                 goto multiple_charsets;
8726             }
8727             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8728             *charset = c;
8729             break;
8730         case UNICODE_PAT_MOD:
8731             if (*charset) {
8732                 goto multiple_charsets;
8733             }
8734             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8735             *charset = c;
8736             break;
8737         case ASCII_RESTRICT_PAT_MOD:
8738             if (! *charset) {
8739                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8740             }
8741             else {
8742
8743                 /* Error if previous modifier wasn't an 'a', but if it was, see
8744                  * if, and accept, a second occurrence (only) */
8745                 if (*charset != 'a'
8746                     || get_regex_charset(*pmfl)
8747                         != REGEX_ASCII_RESTRICTED_CHARSET)
8748                 {
8749                         goto multiple_charsets;
8750                 }
8751                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8752             }
8753             *charset = c;
8754             break;
8755         case DEPENDS_PAT_MOD:
8756             if (*charset) {
8757                 goto multiple_charsets;
8758             }
8759             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8760             *charset = c;
8761             break;
8762     }
8763
8764     (*s)++;
8765     return TRUE;
8766
8767     multiple_charsets:
8768         if (*charset != c) {
8769             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8770         }
8771         else if (c == 'a') {
8772   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8773             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8774         }
8775         else {
8776             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8777         }
8778
8779         /* Pretend that it worked, so will continue processing before dieing */
8780         (*s)++;
8781         return TRUE;
8782 }
8783
8784 STATIC char *
8785 S_scan_pat(pTHX_ char *start, I32 type)
8786 {
8787     dVAR;
8788     PMOP *pm;
8789     char *s;
8790     const char * const valid_flags =
8791         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8792     char charset = '\0';    /* character set modifier */
8793
8794     PERL_ARGS_ASSERT_SCAN_PAT;
8795
8796     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8797     if (!s)
8798         Perl_croak(aTHX_ "Search pattern not terminated");
8799
8800     pm = (PMOP*)newPMOP(type, 0);
8801     if (PL_multi_open == '?') {
8802         /* This is the only point in the code that sets PMf_ONCE:  */
8803         pm->op_pmflags |= PMf_ONCE;
8804
8805         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8806            allows us to restrict the list needed by reset to just the ??
8807            matches.  */
8808         assert(type != OP_TRANS);
8809         if (PL_curstash) {
8810             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8811             U32 elements;
8812             if (!mg) {
8813                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8814                                  0);
8815             }
8816             elements = mg->mg_len / sizeof(PMOP**);
8817             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8818             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8819             mg->mg_len = elements * sizeof(PMOP**);
8820             PmopSTASH_set(pm,PL_curstash);
8821         }
8822     }
8823
8824     /* if qr/...(?{..}).../, then need to parse the pattern within a new
8825      * anon CV. False positives like qr/[(?{]/ are harmless */
8826
8827     if (type == OP_QR) {
8828         STRLEN len;
8829         char *e, *p = SvPV(PL_lex_stuff, len);
8830         e = p + len;
8831         for (; p < e; p++) {
8832             if (p[0] == '(' && p[1] == '?'
8833                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8834             {
8835                 pm->op_pmflags |= PMf_HAS_CV;
8836                 break;
8837             }
8838         }
8839         pm->op_pmflags |= PMf_IS_QR;
8840     }
8841
8842     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8843     /* issue a warning if /c is specified,but /g is not */
8844     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8845     {
8846         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8847                        "Use of /c modifier is meaningless without /g" );
8848     }
8849
8850     PL_lex_op = (OP*)pm;
8851     pl_yylval.ival = OP_MATCH;
8852     return s;
8853 }
8854
8855 STATIC char *
8856 S_scan_subst(pTHX_ char *start)
8857 {
8858     dVAR;
8859     char *s;
8860     PMOP *pm;
8861     I32 first_start;
8862     line_t first_line;
8863     I32 es = 0;
8864     char charset = '\0';    /* character set modifier */
8865     char *t;
8866
8867     PERL_ARGS_ASSERT_SCAN_SUBST;
8868
8869     pl_yylval.ival = OP_NULL;
8870
8871     s = scan_str(start, TRUE, FALSE, FALSE, &t);
8872
8873     if (!s)
8874         Perl_croak(aTHX_ "Substitution pattern not terminated");
8875
8876     s = t;
8877
8878     first_start = PL_multi_start;
8879     first_line = CopLINE(PL_curcop);
8880     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8881     if (!s) {
8882         if (PL_lex_stuff) {
8883             SvREFCNT_dec(PL_lex_stuff);
8884             PL_lex_stuff = NULL;
8885         }
8886         Perl_croak(aTHX_ "Substitution replacement not terminated");
8887     }
8888     PL_multi_start = first_start;       /* so whole substitution is taken together */
8889
8890     pm = (PMOP*)newPMOP(OP_SUBST, 0);
8891
8892
8893     while (*s) {
8894         if (*s == EXEC_PAT_MOD) {
8895             s++;
8896             es++;
8897         }
8898         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
8899         {
8900             break;
8901         }
8902     }
8903
8904     if ((pm->op_pmflags & PMf_CONTINUE)) {
8905         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
8906     }
8907
8908     if (es) {
8909         SV * const repl = newSVpvs("");
8910
8911         PL_multi_end = 0;
8912         pm->op_pmflags |= PMf_EVAL;
8913         while (es-- > 0) {
8914             if (es)
8915                 sv_catpvs(repl, "eval ");
8916             else
8917                 sv_catpvs(repl, "do ");
8918         }
8919         sv_catpvs(repl, "{");
8920         sv_catsv(repl, PL_sublex_info.repl);
8921         sv_catpvs(repl, "}");
8922         SvEVALED_on(repl);
8923         SvREFCNT_dec(PL_sublex_info.repl);
8924         PL_sublex_info.repl = repl;
8925     }
8926     if (CopLINE(PL_curcop) != first_line) {
8927         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
8928         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
8929             CopLINE(PL_curcop) - first_line;
8930         CopLINE_set(PL_curcop, first_line);
8931     }
8932
8933     PL_lex_op = (OP*)pm;
8934     pl_yylval.ival = OP_SUBST;
8935     return s;
8936 }
8937
8938 STATIC char *
8939 S_scan_trans(pTHX_ char *start)
8940 {
8941     dVAR;
8942     char* s;
8943     OP *o;
8944     U8 squash;
8945     U8 del;
8946     U8 complement;
8947     bool nondestruct = 0;
8948     char *t;
8949
8950     PERL_ARGS_ASSERT_SCAN_TRANS;
8951
8952     pl_yylval.ival = OP_NULL;
8953
8954     s = scan_str(start,FALSE,FALSE,FALSE,&t);
8955     if (!s)
8956         Perl_croak(aTHX_ "Transliteration pattern not terminated");
8957
8958     s = t;
8959
8960     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8961     if (!s) {
8962         if (PL_lex_stuff) {
8963             SvREFCNT_dec(PL_lex_stuff);
8964             PL_lex_stuff = NULL;
8965         }
8966         Perl_croak(aTHX_ "Transliteration replacement not terminated");
8967     }
8968
8969     complement = del = squash = 0;
8970     while (1) {
8971         switch (*s) {
8972         case 'c':
8973             complement = OPpTRANS_COMPLEMENT;
8974             break;
8975         case 'd':
8976             del = OPpTRANS_DELETE;
8977             break;
8978         case 's':
8979             squash = OPpTRANS_SQUASH;
8980             break;
8981         case 'r':
8982             nondestruct = 1;
8983             break;
8984         default:
8985             goto no_more;
8986         }
8987         s++;
8988     }
8989   no_more:
8990
8991     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
8992     o->op_private &= ~OPpTRANS_ALL;
8993     o->op_private |= del|squash|complement|
8994       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
8995       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
8996
8997     PL_lex_op = o;
8998     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
8999
9000
9001     return s;
9002 }
9003
9004 /* scan_heredoc
9005    Takes a pointer to the first < in <<FOO.
9006    Returns a pointer to the byte following <<FOO.
9007
9008    This function scans a heredoc, which involves different methods
9009    depending on whether we are in a string eval, quoted construct, etc.
9010    This is because PL_linestr could containing a single line of input, or
9011    a whole string being evalled, or the contents of the current quote-
9012    like operator.
9013
9014    The two basic methods are:
9015     - Steal lines from the input stream
9016     - Scan the heredoc in PL_linestr and remove it therefrom
9017
9018    In a file scope or filtered eval, the first method is used; in a
9019    string eval, the second.
9020
9021    In a quote-like operator, we have to choose between the two,
9022    depending on where we can find a newline.  We peek into outer lex-
9023    ing scopes until we find one with a newline in it.  If we reach the
9024    outermost lexing scope and it is a file, we use the stream method.
9025    Otherwise it is treated as an eval.
9026 */
9027
9028 STATIC char *
9029 S_scan_heredoc(pTHX_ char *s)
9030 {
9031     dVAR;
9032     I32 op_type = OP_SCALAR;
9033     I32 len;
9034     SV *tmpstr;
9035     char term;
9036     char *d;
9037     char *e;
9038     char *peek;
9039     const bool infile = PL_rsfp || PL_parser->filtered;
9040     const line_t origline = CopLINE(PL_curcop);
9041     LEXSHARED *shared = PL_parser->lex_shared;
9042
9043     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9044
9045     s += 2;
9046     d = PL_tokenbuf + 1;
9047     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9048     *PL_tokenbuf = '\n';
9049     peek = s;
9050     while (SPACE_OR_TAB(*peek))
9051         peek++;
9052     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9053         s = peek;
9054         term = *s++;
9055         s = delimcpy(d, e, s, PL_bufend, term, &len);
9056         if (s == PL_bufend)
9057             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9058         d += len;
9059         s++;
9060     }
9061     else {
9062         if (*s == '\\')
9063             /* <<\FOO is equivalent to <<'FOO' */
9064             s++, term = '\'';
9065         else
9066             term = '"';
9067         if (!isWORDCHAR_lazy_if(s,UTF))
9068             deprecate("bare << to mean <<\"\"");
9069         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9070             if (d < e)
9071                 *d++ = *s;
9072         }
9073     }
9074     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9075         Perl_croak(aTHX_ "Delimiter for here document is too long");
9076     *d++ = '\n';
9077     *d = '\0';
9078     len = d - PL_tokenbuf;
9079
9080 #ifndef PERL_STRICT_CR
9081     d = strchr(s, '\r');
9082     if (d) {
9083         char * const olds = s;
9084         s = d;
9085         while (s < PL_bufend) {
9086             if (*s == '\r') {
9087                 *d++ = '\n';
9088                 if (*++s == '\n')
9089                     s++;
9090             }
9091             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9092                 *d++ = *s++;
9093                 s++;
9094             }
9095             else
9096                 *d++ = *s++;
9097         }
9098         *d = '\0';
9099         PL_bufend = d;
9100         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9101         s = olds;
9102     }
9103 #endif
9104
9105     tmpstr = newSV_type(SVt_PVIV);
9106     SvGROW(tmpstr, 80);
9107     if (term == '\'') {
9108         op_type = OP_CONST;
9109         SvIV_set(tmpstr, -1);
9110     }
9111     else if (term == '`') {
9112         op_type = OP_BACKTICK;
9113         SvIV_set(tmpstr, '\\');
9114     }
9115
9116     PL_multi_start = origline + 1 + PL_parser->herelines;
9117     PL_multi_open = PL_multi_close = '<';
9118     /* inside a string eval or quote-like operator */
9119     if (!infile || PL_lex_inwhat) {
9120         SV *linestr;
9121         char *bufend;
9122         char * const olds = s;
9123         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9124         /* These two fields are not set until an inner lexing scope is
9125            entered.  But we need them set here. */
9126         shared->ls_bufptr  = s;
9127         shared->ls_linestr = PL_linestr;
9128         if (PL_lex_inwhat)
9129           /* Look for a newline.  If the current buffer does not have one,
9130              peek into the line buffer of the parent lexing scope, going
9131              up as many levels as necessary to find one with a newline
9132              after bufptr.
9133            */
9134           while (!(s = (char *)memchr(
9135                     (void *)shared->ls_bufptr, '\n',
9136                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9137                 ))) {
9138             shared = shared->ls_prev;
9139             /* shared is only null if we have gone beyond the outermost
9140                lexing scope.  In a file, we will have broken out of the
9141                loop in the previous iteration.  In an eval, the string buf-
9142                fer ends with "\n;", so the while condition above will have
9143                evaluated to false.  So shared can never be null. */
9144             assert(shared);
9145             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9146                most lexing scope.  In a file, shared->ls_linestr at that
9147                level is just one line, so there is no body to steal. */
9148             if (infile && !shared->ls_prev) {
9149                 s = olds;
9150                 goto streaming;
9151             }
9152           }
9153         else {  /* eval */
9154             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9155             assert(s);
9156         }
9157         linestr = shared->ls_linestr;
9158         bufend = SvEND(linestr);
9159         d = s;
9160         while (s < bufend - len + 1 &&
9161           memNE(s,PL_tokenbuf,len) ) {
9162             if (*s++ == '\n')
9163                 ++PL_parser->herelines;
9164         }
9165         if (s >= bufend - len + 1) {
9166             goto interminable;
9167         }
9168         sv_setpvn(tmpstr,d+1,s-d);
9169         s += len - 1;
9170         /* the preceding stmt passes a newline */
9171         PL_parser->herelines++;
9172
9173         /* s now points to the newline after the heredoc terminator.
9174            d points to the newline before the body of the heredoc.
9175          */
9176
9177         /* We are going to modify linestr in place here, so set
9178            aside copies of the string if necessary for re-evals or
9179            (caller $n)[6]. */
9180         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9181            check shared->re_eval_str. */
9182         if (shared->re_eval_start || shared->re_eval_str) {
9183             /* Set aside the rest of the regexp */
9184             if (!shared->re_eval_str)
9185                 shared->re_eval_str =
9186                        newSVpvn(shared->re_eval_start,
9187                                 bufend - shared->re_eval_start);
9188             shared->re_eval_start -= s-d;
9189         }
9190         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9191             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9192             cx->blk_eval.cur_text == linestr)
9193         {
9194             cx->blk_eval.cur_text = newSVsv(linestr);
9195             SvSCREAM_on(cx->blk_eval.cur_text);
9196         }
9197         /* Copy everything from s onwards back to d. */
9198         Move(s,d,bufend-s + 1,char);
9199         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9200         /* Setting PL_bufend only applies when we have not dug deeper
9201            into other scopes, because sublex_done sets PL_bufend to
9202            SvEND(PL_linestr). */
9203         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9204         s = olds;
9205     }
9206     else
9207     {
9208       SV *linestr_save;
9209      streaming:
9210       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9211       term = PL_tokenbuf[1];
9212       len--;
9213       linestr_save = PL_linestr; /* must restore this afterwards */
9214       d = s;                     /* and this */
9215       PL_linestr = newSVpvs("");
9216       PL_bufend = SvPVX(PL_linestr);
9217       while (1) {
9218         PL_bufptr = PL_bufend;
9219         CopLINE_set(PL_curcop,
9220                     origline + 1 + PL_parser->herelines);
9221         if (!lex_next_chunk(LEX_NO_TERM)
9222          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9223             SvREFCNT_dec(linestr_save);
9224             goto interminable;
9225         }
9226         CopLINE_set(PL_curcop, origline);
9227         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9228             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9229             /* ^That should be enough to avoid this needing to grow:  */
9230             sv_catpvs(PL_linestr, "\n\0");
9231             assert(s == SvPVX(PL_linestr));
9232             PL_bufend = SvEND(PL_linestr);
9233         }
9234         s = PL_bufptr;
9235         PL_parser->herelines++;
9236         PL_last_lop = PL_last_uni = NULL;
9237 #ifndef PERL_STRICT_CR
9238         if (PL_bufend - PL_linestart >= 2) {
9239             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9240                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9241             {
9242                 PL_bufend[-2] = '\n';
9243                 PL_bufend--;
9244                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9245             }
9246             else if (PL_bufend[-1] == '\r')
9247                 PL_bufend[-1] = '\n';
9248         }
9249         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9250             PL_bufend[-1] = '\n';
9251 #endif
9252         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9253             SvREFCNT_dec(PL_linestr);
9254             PL_linestr = linestr_save;
9255             PL_linestart = SvPVX(linestr_save);
9256             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9257             s = d;
9258             break;
9259         }
9260         else {
9261             sv_catsv(tmpstr,PL_linestr);
9262         }
9263       }
9264     }
9265     PL_multi_end = origline + PL_parser->herelines;
9266     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9267         SvPV_shrink_to_cur(tmpstr);
9268     }
9269     if (!IN_BYTES) {
9270         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9271             SvUTF8_on(tmpstr);
9272         else if (PL_encoding)
9273             sv_recode_to_utf8(tmpstr, PL_encoding);
9274     }
9275     PL_lex_stuff = tmpstr;
9276     pl_yylval.ival = op_type;
9277     return s;
9278
9279   interminable:
9280     SvREFCNT_dec(tmpstr);
9281     CopLINE_set(PL_curcop, origline);
9282     missingterm(PL_tokenbuf + 1);
9283 }
9284
9285 /* scan_inputsymbol
9286    takes: current position in input buffer
9287    returns: new position in input buffer
9288    side-effects: pl_yylval and lex_op are set.
9289
9290    This code handles:
9291
9292    <>           read from ARGV
9293    <FH>         read from filehandle
9294    <pkg::FH>    read from package qualified filehandle
9295    <pkg'FH>     read from package qualified filehandle
9296    <$fh>        read from filehandle in $fh
9297    <*.h>        filename glob
9298
9299 */
9300
9301 STATIC char *
9302 S_scan_inputsymbol(pTHX_ char *start)
9303 {
9304     dVAR;
9305     char *s = start;            /* current position in buffer */
9306     char *end;
9307     I32 len;
9308     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9309     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9310
9311     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9312
9313     end = strchr(s, '\n');
9314     if (!end)
9315         end = PL_bufend;
9316     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9317
9318     /* die if we didn't have space for the contents of the <>,
9319        or if it didn't end, or if we see a newline
9320     */
9321
9322     if (len >= (I32)sizeof PL_tokenbuf)
9323         Perl_croak(aTHX_ "Excessively long <> operator");
9324     if (s >= end)
9325         Perl_croak(aTHX_ "Unterminated <> operator");
9326
9327     s++;
9328
9329     /* check for <$fh>
9330        Remember, only scalar variables are interpreted as filehandles by
9331        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9332        treated as a glob() call.
9333        This code makes use of the fact that except for the $ at the front,
9334        a scalar variable and a filehandle look the same.
9335     */
9336     if (*d == '$' && d[1]) d++;
9337
9338     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9339     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9340         d += UTF ? UTF8SKIP(d) : 1;
9341
9342     /* If we've tried to read what we allow filehandles to look like, and
9343        there's still text left, then it must be a glob() and not a getline.
9344        Use scan_str to pull out the stuff between the <> and treat it
9345        as nothing more than a string.
9346     */
9347
9348     if (d - PL_tokenbuf != len) {
9349         pl_yylval.ival = OP_GLOB;
9350         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9351         if (!s)
9352            Perl_croak(aTHX_ "Glob not terminated");
9353         return s;
9354     }
9355     else {
9356         bool readline_overriden = FALSE;
9357         GV *gv_readline;
9358         /* we're in a filehandle read situation */
9359         d = PL_tokenbuf;
9360
9361         /* turn <> into <ARGV> */
9362         if (!len)
9363             Copy("ARGV",d,5,char);
9364
9365         /* Check whether readline() is overriden */
9366         if ((gv_readline = gv_override("readline",8)))
9367             readline_overriden = TRUE;
9368
9369         /* if <$fh>, create the ops to turn the variable into a
9370            filehandle
9371         */
9372         if (*d == '$') {
9373             /* try to find it in the pad for this block, otherwise find
9374                add symbol table ops
9375             */
9376             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9377             if (tmp != NOT_IN_PAD) {
9378                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9379                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9380                     HEK * const stashname = HvNAME_HEK(stash);
9381                     SV * const sym = sv_2mortal(newSVhek(stashname));
9382                     sv_catpvs(sym, "::");
9383                     sv_catpv(sym, d+1);
9384                     d = SvPVX(sym);
9385                     goto intro_sym;
9386                 }
9387                 else {
9388                     OP * const o = newOP(OP_PADSV, 0);
9389                     o->op_targ = tmp;
9390                     PL_lex_op = readline_overriden
9391                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9392                                 op_append_elem(OP_LIST, o,
9393                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9394                         : (OP*)newUNOP(OP_READLINE, 0, o);
9395                 }
9396             }
9397             else {
9398                 GV *gv;
9399                 ++d;
9400 intro_sym:
9401                 gv = gv_fetchpv(d,
9402                                 (PL_in_eval
9403                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9404                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9405                                 SVt_PV);
9406                 PL_lex_op = readline_overriden
9407                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9408                             op_append_elem(OP_LIST,
9409                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9410                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9411                     : (OP*)newUNOP(OP_READLINE, 0,
9412                             newUNOP(OP_RV2SV, 0,
9413                                 newGVOP(OP_GV, 0, gv)));
9414             }
9415             if (!readline_overriden)
9416                 PL_lex_op->op_flags |= OPf_SPECIAL;
9417             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9418             pl_yylval.ival = OP_NULL;
9419         }
9420
9421         /* If it's none of the above, it must be a literal filehandle
9422            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9423         else {
9424             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9425             PL_lex_op = readline_overriden
9426                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9427                         op_append_elem(OP_LIST,
9428                             newGVOP(OP_GV, 0, gv),
9429                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9430                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9431             pl_yylval.ival = OP_NULL;
9432         }
9433     }
9434
9435     return s;
9436 }
9437
9438
9439 /* scan_str
9440    takes:
9441         start                   position in buffer
9442         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9443                                 only if they are of the open/close form
9444         keep_delims             preserve the delimiters around the string
9445         re_reparse              compiling a run-time /(?{})/:
9446                                    collapse // to /,  and skip encoding src
9447         delimp                  if non-null, this is set to the position of
9448                                 the closing delimiter, or just after it if
9449                                 the closing and opening delimiters differ
9450                                 (i.e., the opening delimiter of a substitu-
9451                                 tion replacement)
9452    returns: position to continue reading from buffer
9453    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9454         updates the read buffer.
9455
9456    This subroutine pulls a string out of the input.  It is called for:
9457         q               single quotes           q(literal text)
9458         '               single quotes           'literal text'
9459         qq              double quotes           qq(interpolate $here please)
9460         "               double quotes           "interpolate $here please"
9461         qx              backticks               qx(/bin/ls -l)
9462         `               backticks               `/bin/ls -l`
9463         qw              quote words             @EXPORT_OK = qw( func() $spam )
9464         m//             regexp match            m/this/
9465         s///            regexp substitute       s/this/that/
9466         tr///           string transliterate    tr/this/that/
9467         y///            string transliterate    y/this/that/
9468         ($*@)           sub prototypes          sub foo ($)
9469         (stuff)         sub attr parameters     sub foo : attr(stuff)
9470         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9471
9472    In most of these cases (all but <>, patterns and transliterate)
9473    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9474    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9475    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9476    calls scan_str().
9477
9478    It skips whitespace before the string starts, and treats the first
9479    character as the delimiter.  If the delimiter is one of ([{< then
9480    the corresponding "close" character )]}> is used as the closing
9481    delimiter.  It allows quoting of delimiters, and if the string has
9482    balanced delimiters ([{<>}]) it allows nesting.
9483
9484    On success, the SV with the resulting string is put into lex_stuff or,
9485    if that is already non-NULL, into lex_repl. The second case occurs only
9486    when parsing the RHS of the special constructs s/// and tr/// (y///).
9487    For convenience, the terminating delimiter character is stuffed into
9488    SvIVX of the SV.
9489 */
9490
9491 STATIC char *
9492 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9493                  char **delimp
9494     )
9495 {
9496     dVAR;
9497     SV *sv;                     /* scalar value: string */
9498     const char *tmps;           /* temp string, used for delimiter matching */
9499     char *s = start;            /* current position in the buffer */
9500     char term;                  /* terminating character */
9501     char *to;                   /* current position in the sv's data */
9502     I32 brackets = 1;           /* bracket nesting level */
9503     bool has_utf8 = FALSE;      /* is there any utf8 content? */
9504     I32 termcode;               /* terminating char. code */
9505     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
9506     STRLEN termlen;             /* length of terminating string */
9507     int last_off = 0;           /* last position for nesting bracket */
9508     line_t herelines;
9509
9510     PERL_ARGS_ASSERT_SCAN_STR;
9511
9512     /* skip space before the delimiter */
9513     if (isSPACE(*s)) {
9514         s = PEEKSPACE(s);
9515     }
9516
9517     /* mark where we are, in case we need to report errors */
9518     CLINE;
9519
9520     /* after skipping whitespace, the next character is the terminator */
9521     term = *s;
9522     if (!UTF) {
9523         termcode = termstr[0] = term;
9524         termlen = 1;
9525     }
9526     else {
9527         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9528         Copy(s, termstr, termlen, U8);
9529         if (!UTF8_IS_INVARIANT(term))
9530             has_utf8 = TRUE;
9531     }
9532
9533     /* mark where we are */
9534     PL_multi_start = CopLINE(PL_curcop);
9535     PL_multi_open = term;
9536     herelines = PL_parser->herelines;
9537
9538     /* find corresponding closing delimiter */
9539     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9540         termcode = termstr[0] = term = tmps[5];
9541
9542     PL_multi_close = term;
9543
9544     if (PL_multi_open == PL_multi_close) {
9545         keep_bracketed_quoted = FALSE;
9546     }
9547
9548     /* create a new SV to hold the contents.  79 is the SV's initial length.
9549        What a random number. */
9550     sv = newSV_type(SVt_PVIV);
9551     SvGROW(sv, 80);
9552     SvIV_set(sv, termcode);
9553     (void)SvPOK_only(sv);               /* validate pointer */
9554
9555     /* move past delimiter and try to read a complete string */
9556     if (keep_delims)
9557         sv_catpvn(sv, s, termlen);
9558     s += termlen;
9559     for (;;) {
9560         if (PL_encoding && !UTF && !re_reparse) {
9561             bool cont = TRUE;
9562
9563             while (cont) {
9564                 int offset = s - SvPVX_const(PL_linestr);
9565                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9566                                            &offset, (char*)termstr, termlen);
9567                 const char *ns;
9568                 char *svlast;
9569
9570                 if (SvIsCOW(PL_linestr)) {
9571                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9572                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9573                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9574                     char *buf = SvPVX(PL_linestr);
9575                     bufend_pos = PL_parser->bufend - buf;
9576                     bufptr_pos = PL_parser->bufptr - buf;
9577                     oldbufptr_pos = PL_parser->oldbufptr - buf;
9578                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9579                     linestart_pos = PL_parser->linestart - buf;
9580                     last_uni_pos = PL_parser->last_uni
9581                         ? PL_parser->last_uni - buf
9582                         : 0;
9583                     last_lop_pos = PL_parser->last_lop
9584                         ? PL_parser->last_lop - buf
9585                         : 0;
9586                     re_eval_start_pos =
9587                         PL_parser->lex_shared->re_eval_start ?
9588                             PL_parser->lex_shared->re_eval_start - buf : 0;
9589                     s_pos = s - buf;
9590
9591                     sv_force_normal(PL_linestr);
9592
9593                     buf = SvPVX(PL_linestr);
9594                     PL_parser->bufend = buf + bufend_pos;
9595                     PL_parser->bufptr = buf + bufptr_pos;
9596                     PL_parser->oldbufptr = buf + oldbufptr_pos;
9597                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9598                     PL_parser->linestart = buf + linestart_pos;
9599                     if (PL_parser->last_uni)
9600                         PL_parser->last_uni = buf + last_uni_pos;
9601                     if (PL_parser->last_lop)
9602                         PL_parser->last_lop = buf + last_lop_pos;
9603                     if (PL_parser->lex_shared->re_eval_start)
9604                         PL_parser->lex_shared->re_eval_start  =
9605                             buf + re_eval_start_pos;
9606                     s = buf + s_pos;
9607                 }
9608                 ns = SvPVX_const(PL_linestr) + offset;
9609                 svlast = SvEND(sv) - 1;
9610
9611                 for (; s < ns; s++) {
9612                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9613                         COPLINE_INC_WITH_HERELINES;
9614                 }
9615                 if (!found)
9616                     goto read_more_line;
9617                 else {
9618                     /* handle quoted delimiters */
9619                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9620                         const char *t;
9621                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9622                             t--;
9623                         if ((svlast-1 - t) % 2) {
9624                             if (!keep_bracketed_quoted) {
9625                                 *(svlast-1) = term;
9626                                 *svlast = '\0';
9627                                 SvCUR_set(sv, SvCUR(sv) - 1);
9628                             }
9629                             continue;
9630                         }
9631                     }
9632                     if (PL_multi_open == PL_multi_close) {
9633                         cont = FALSE;
9634                     }
9635                     else {
9636                         const char *t;
9637                         char *w;
9638                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9639                             /* At here, all closes are "was quoted" one,
9640                                so we don't check PL_multi_close. */
9641                             if (*t == '\\') {
9642                                 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9643                                     t++;
9644                                 else
9645                                     *w++ = *t++;
9646                             }
9647                             else if (*t == PL_multi_open)
9648                                 brackets++;
9649
9650                             *w = *t;
9651                         }
9652                         if (w < t) {
9653                             *w++ = term;
9654                             *w = '\0';
9655                             SvCUR_set(sv, w - SvPVX_const(sv));
9656                         }
9657                         last_off = w - SvPVX(sv);
9658                         if (--brackets <= 0)
9659                             cont = FALSE;
9660                     }
9661                 }
9662             }
9663             if (!keep_delims) {
9664                 SvCUR_set(sv, SvCUR(sv) - 1);
9665                 *SvEND(sv) = '\0';
9666             }
9667             break;
9668         }
9669
9670         /* extend sv if need be */
9671         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9672         /* set 'to' to the next character in the sv's string */
9673         to = SvPVX(sv)+SvCUR(sv);
9674
9675         /* if open delimiter is the close delimiter read unbridle */
9676         if (PL_multi_open == PL_multi_close) {
9677             for (; s < PL_bufend; s++,to++) {
9678                 /* embedded newlines increment the current line number */
9679                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9680                     COPLINE_INC_WITH_HERELINES;
9681                 /* handle quoted delimiters */
9682                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9683                     if (!keep_bracketed_quoted
9684                         && (s[1] == term
9685                             || (re_reparse && s[1] == '\\'))
9686                     )
9687                         s++;
9688                     else /* any other quotes are simply copied straight through */
9689                         *to++ = *s++;
9690                 }
9691                 /* terminate when run out of buffer (the for() condition), or
9692                    have found the terminator */
9693                 else if (*s == term) {
9694                     if (termlen == 1)
9695                         break;
9696                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9697                         break;
9698                 }
9699                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9700                     has_utf8 = TRUE;
9701                 *to = *s;
9702             }
9703         }
9704
9705         /* if the terminator isn't the same as the start character (e.g.,
9706            matched brackets), we have to allow more in the quoting, and
9707            be prepared for nested brackets.
9708         */
9709         else {
9710             /* read until we run out of string, or we find the terminator */
9711             for (; s < PL_bufend; s++,to++) {
9712                 /* embedded newlines increment the line count */
9713                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9714                     COPLINE_INC_WITH_HERELINES;
9715                 /* backslashes can escape the open or closing characters */
9716                 if (*s == '\\' && s+1 < PL_bufend) {
9717                     if (!keep_bracketed_quoted &&
9718                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9719                     {
9720                         s++;
9721                     }
9722                     else
9723                         *to++ = *s++;
9724                 }
9725                 /* allow nested opens and closes */
9726                 else if (*s == PL_multi_close && --brackets <= 0)
9727                     break;
9728                 else if (*s == PL_multi_open)
9729                     brackets++;
9730                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9731                     has_utf8 = TRUE;
9732                 *to = *s;
9733             }
9734         }
9735         /* terminate the copied string and update the sv's end-of-string */
9736         *to = '\0';
9737         SvCUR_set(sv, to - SvPVX_const(sv));
9738
9739         /*
9740          * this next chunk reads more into the buffer if we're not done yet
9741          */
9742
9743         if (s < PL_bufend)
9744             break;              /* handle case where we are done yet :-) */
9745
9746 #ifndef PERL_STRICT_CR
9747         if (to - SvPVX_const(sv) >= 2) {
9748             if ((to[-2] == '\r' && to[-1] == '\n') ||
9749                 (to[-2] == '\n' && to[-1] == '\r'))
9750             {
9751                 to[-2] = '\n';
9752                 to--;
9753                 SvCUR_set(sv, to - SvPVX_const(sv));
9754             }
9755             else if (to[-1] == '\r')
9756                 to[-1] = '\n';
9757         }
9758         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9759             to[-1] = '\n';
9760 #endif
9761
9762      read_more_line:
9763         /* if we're out of file, or a read fails, bail and reset the current
9764            line marker so we can report where the unterminated string began
9765         */
9766         COPLINE_INC_WITH_HERELINES;
9767         PL_bufptr = PL_bufend;
9768         if (!lex_next_chunk(0)) {
9769             sv_free(sv);
9770             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9771             return NULL;
9772         }
9773         s = PL_bufptr;
9774     }
9775
9776     /* at this point, we have successfully read the delimited string */
9777
9778     if (!PL_encoding || UTF || re_reparse) {
9779
9780         if (keep_delims)
9781             sv_catpvn(sv, s, termlen);
9782         s += termlen;
9783     }
9784     if (has_utf8 || (PL_encoding && !re_reparse))
9785         SvUTF8_on(sv);
9786
9787     PL_multi_end = CopLINE(PL_curcop);
9788     CopLINE_set(PL_curcop, PL_multi_start);
9789     PL_parser->herelines = herelines;
9790
9791     /* if we allocated too much space, give some back */
9792     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9793         SvLEN_set(sv, SvCUR(sv) + 1);
9794         SvPV_renew(sv, SvLEN(sv));
9795     }
9796
9797     /* decide whether this is the first or second quoted string we've read
9798        for this op
9799     */
9800
9801     if (PL_lex_stuff)
9802         PL_sublex_info.repl = sv;
9803     else
9804         PL_lex_stuff = sv;
9805     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9806     return s;
9807 }
9808
9809 /*
9810   scan_num
9811   takes: pointer to position in buffer
9812   returns: pointer to new position in buffer
9813   side-effects: builds ops for the constant in pl_yylval.op
9814
9815   Read a number in any of the formats that Perl accepts:
9816
9817   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9818   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9819   0b[01](_?[01])*
9820   0[0-7](_?[0-7])*
9821   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9822
9823   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9824   thing it reads.
9825
9826   If it reads a number without a decimal point or an exponent, it will
9827   try converting the number to an integer and see if it can do so
9828   without loss of precision.
9829 */
9830
9831 char *
9832 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9833 {
9834     dVAR;
9835     const char *s = start;      /* current position in buffer */
9836     char *d;                    /* destination in temp buffer */
9837     char *e;                    /* end of temp buffer */
9838     NV nv;                              /* number read, as a double */
9839     SV *sv = NULL;                      /* place to put the converted number */
9840     bool floatit;                       /* boolean: int or float? */
9841     const char *lastub = NULL;          /* position of last underbar */
9842     static const char* const number_too_long = "Number too long";
9843
9844     PERL_ARGS_ASSERT_SCAN_NUM;
9845
9846     /* We use the first character to decide what type of number this is */
9847
9848     switch (*s) {
9849     default:
9850         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
9851
9852     /* if it starts with a 0, it could be an octal number, a decimal in
9853        0.13 disguise, or a hexadecimal number, or a binary number. */
9854     case '0':
9855         {
9856           /* variables:
9857              u          holds the "number so far"
9858              shift      the power of 2 of the base
9859                         (hex == 4, octal == 3, binary == 1)
9860              overflowed was the number more than we can hold?
9861
9862              Shift is used when we add a digit.  It also serves as an "are
9863              we in octal/hex/binary?" indicator to disallow hex characters
9864              when in octal mode.
9865            */
9866             NV n = 0.0;
9867             UV u = 0;
9868             I32 shift;
9869             bool overflowed = FALSE;
9870             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
9871             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
9872             static const char* const bases[5] =
9873               { "", "binary", "", "octal", "hexadecimal" };
9874             static const char* const Bases[5] =
9875               { "", "Binary", "", "Octal", "Hexadecimal" };
9876             static const char* const maxima[5] =
9877               { "",
9878                 "0b11111111111111111111111111111111",
9879                 "",
9880                 "037777777777",
9881                 "0xffffffff" };
9882             const char *base, *Base, *max;
9883
9884             /* check for hex */
9885             if (s[1] == 'x' || s[1] == 'X') {
9886                 shift = 4;
9887                 s += 2;
9888                 just_zero = FALSE;
9889             } else if (s[1] == 'b' || s[1] == 'B') {
9890                 shift = 1;
9891                 s += 2;
9892                 just_zero = FALSE;
9893             }
9894             /* check for a decimal in disguise */
9895             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
9896                 goto decimal;
9897             /* so it must be octal */
9898             else {
9899                 shift = 3;
9900                 s++;
9901             }
9902
9903             if (*s == '_') {
9904                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9905                                "Misplaced _ in number");
9906                lastub = s++;
9907             }
9908
9909             base = bases[shift];
9910             Base = Bases[shift];
9911             max  = maxima[shift];
9912
9913             /* read the rest of the number */
9914             for (;;) {
9915                 /* x is used in the overflow test,
9916                    b is the digit we're adding on. */
9917                 UV x, b;
9918
9919                 switch (*s) {
9920
9921                 /* if we don't mention it, we're done */
9922                 default:
9923                     goto out;
9924
9925                 /* _ are ignored -- but warned about if consecutive */
9926                 case '_':
9927                     if (lastub && s == lastub + 1)
9928                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9929                                        "Misplaced _ in number");
9930                     lastub = s++;
9931                     break;
9932
9933                 /* 8 and 9 are not octal */
9934                 case '8': case '9':
9935                     if (shift == 3)
9936                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
9937                     /* FALLTHROUGH */
9938
9939                 /* octal digits */
9940                 case '2': case '3': case '4':
9941                 case '5': case '6': case '7':
9942                     if (shift == 1)
9943                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
9944                     /* FALLTHROUGH */
9945
9946                 case '0': case '1':
9947                     b = *s++ & 15;              /* ASCII digit -> value of digit */
9948                     goto digit;
9949
9950                 /* hex digits */
9951                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
9952                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
9953                     /* make sure they said 0x */
9954                     if (shift != 4)
9955                         goto out;
9956                     b = (*s++ & 7) + 9;
9957
9958                     /* Prepare to put the digit we have onto the end
9959                        of the number so far.  We check for overflows.
9960                     */
9961
9962                   digit:
9963                     just_zero = FALSE;
9964                     if (!overflowed) {
9965                         x = u << shift; /* make room for the digit */
9966
9967                         if ((x >> shift) != u
9968                             && !(PL_hints & HINT_NEW_BINARY)) {
9969                             overflowed = TRUE;
9970                             n = (NV) u;
9971                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
9972                                              "Integer overflow in %s number",
9973                                              base);
9974                         } else
9975                             u = x | b;          /* add the digit to the end */
9976                     }
9977                     if (overflowed) {
9978                         n *= nvshift[shift];
9979                         /* If an NV has not enough bits in its
9980                          * mantissa to represent an UV this summing of
9981                          * small low-order numbers is a waste of time
9982                          * (because the NV cannot preserve the
9983                          * low-order bits anyway): we could just
9984                          * remember when did we overflow and in the
9985                          * end just multiply n by the right
9986                          * amount. */
9987                         n += (NV) b;
9988                     }
9989                     break;
9990                 }
9991             }
9992
9993           /* if we get here, we had success: make a scalar value from
9994              the number.
9995           */
9996           out:
9997
9998             /* final misplaced underbar check */
9999             if (s[-1] == '_') {
10000                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10001             }
10002
10003             if (overflowed) {
10004                 if (n > 4294967295.0)
10005                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10006                                    "%s number > %s non-portable",
10007                                    Base, max);
10008                 sv = newSVnv(n);
10009             }
10010             else {
10011 #if UVSIZE > 4
10012                 if (u > 0xffffffff)
10013                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10014                                    "%s number > %s non-portable",
10015                                    Base, max);
10016 #endif
10017                 sv = newSVuv(u);
10018             }
10019             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10020                 sv = new_constant(start, s - start, "integer",
10021                                   sv, NULL, NULL, 0);
10022             else if (PL_hints & HINT_NEW_BINARY)
10023                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10024         }
10025         break;
10026
10027     /*
10028       handle decimal numbers.
10029       we're also sent here when we read a 0 as the first digit
10030     */
10031     case '1': case '2': case '3': case '4': case '5':
10032     case '6': case '7': case '8': case '9': case '.':
10033       decimal:
10034         d = PL_tokenbuf;
10035         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10036         floatit = FALSE;
10037
10038         /* read next group of digits and _ and copy into d */
10039         while (isDIGIT(*s) || *s == '_') {
10040             /* skip underscores, checking for misplaced ones
10041                if -w is on
10042             */
10043             if (*s == '_') {
10044                 if (lastub && s == lastub + 1)
10045                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10046                                    "Misplaced _ in number");
10047                 lastub = s++;
10048             }
10049             else {
10050                 /* check for end of fixed-length buffer */
10051                 if (d >= e)
10052                     Perl_croak(aTHX_ "%s", number_too_long);
10053                 /* if we're ok, copy the character */
10054                 *d++ = *s++;
10055             }
10056         }
10057
10058         /* final misplaced underbar check */
10059         if (lastub && s == lastub + 1) {
10060             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10061         }
10062
10063         /* read a decimal portion if there is one.  avoid
10064            3..5 being interpreted as the number 3. followed
10065            by .5
10066         */
10067         if (*s == '.' && s[1] != '.') {
10068             floatit = TRUE;
10069             *d++ = *s++;
10070
10071             if (*s == '_') {
10072                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10073                                "Misplaced _ in number");
10074                 lastub = s;
10075             }
10076
10077             /* copy, ignoring underbars, until we run out of digits.
10078             */
10079             for (; isDIGIT(*s) || *s == '_'; s++) {
10080                 /* fixed length buffer check */
10081                 if (d >= e)
10082                     Perl_croak(aTHX_ "%s", number_too_long);
10083                 if (*s == '_') {
10084                    if (lastub && s == lastub + 1)
10085                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10086                                       "Misplaced _ in number");
10087                    lastub = s;
10088                 }
10089                 else
10090                     *d++ = *s;
10091             }
10092             /* fractional part ending in underbar? */
10093             if (s[-1] == '_') {
10094                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10095                                "Misplaced _ in number");
10096             }
10097             if (*s == '.' && isDIGIT(s[1])) {
10098                 /* oops, it's really a v-string, but without the "v" */
10099                 s = start;
10100                 goto vstring;
10101             }
10102         }
10103
10104         /* read exponent part, if present */
10105         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10106             floatit = TRUE;
10107             s++;
10108
10109             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10110             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10111
10112             /* stray preinitial _ */
10113             if (*s == '_') {
10114                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10115                                "Misplaced _ in number");
10116                 lastub = s++;
10117             }
10118
10119             /* allow positive or negative exponent */
10120             if (*s == '+' || *s == '-')
10121                 *d++ = *s++;
10122
10123             /* stray initial _ */
10124             if (*s == '_') {
10125                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10126                                "Misplaced _ in number");
10127                 lastub = s++;
10128             }
10129
10130             /* read digits of exponent */
10131             while (isDIGIT(*s) || *s == '_') {
10132                 if (isDIGIT(*s)) {
10133                     if (d >= e)
10134                         Perl_croak(aTHX_ "%s", number_too_long);
10135                     *d++ = *s++;
10136                 }
10137                 else {
10138                    if (((lastub && s == lastub + 1) ||
10139                         (!isDIGIT(s[1]) && s[1] != '_')))
10140                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10141                                       "Misplaced _ in number");
10142                    lastub = s++;
10143                 }
10144             }
10145         }
10146
10147
10148         /*
10149            We try to do an integer conversion first if no characters
10150            indicating "float" have been found.
10151          */
10152
10153         if (!floatit) {
10154             UV uv;
10155             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10156
10157             if (flags == IS_NUMBER_IN_UV) {
10158               if (uv <= IV_MAX)
10159                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10160               else
10161                 sv = newSVuv(uv);
10162             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10163               if (uv <= (UV) IV_MIN)
10164                 sv = newSViv(-(IV)uv);
10165               else
10166                 floatit = TRUE;
10167             } else
10168               floatit = TRUE;
10169         }
10170         if (floatit) {
10171             STORE_NUMERIC_LOCAL_SET_STANDARD();
10172             /* terminate the string */
10173             *d = '\0';
10174             nv = Atof(PL_tokenbuf);
10175             RESTORE_NUMERIC_LOCAL();
10176             sv = newSVnv(nv);
10177         }
10178
10179         if ( floatit
10180              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10181             const char *const key = floatit ? "float" : "integer";
10182             const STRLEN keylen = floatit ? 5 : 7;
10183             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10184                                 key, keylen, sv, NULL, NULL, 0);
10185         }
10186         break;
10187
10188     /* if it starts with a v, it could be a v-string */
10189     case 'v':
10190 vstring:
10191                 sv = newSV(5); /* preallocate storage space */
10192                 ENTER_with_name("scan_vstring");
10193                 SAVEFREESV(sv);
10194                 s = scan_vstring(s, PL_bufend, sv);
10195                 SvREFCNT_inc_simple_void_NN(sv);
10196                 LEAVE_with_name("scan_vstring");
10197         break;
10198     }
10199
10200     /* make the op for the constant and return */
10201
10202     if (sv)
10203         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10204     else
10205         lvalp->opval = NULL;
10206
10207     return (char *)s;
10208 }
10209
10210 STATIC char *
10211 S_scan_formline(pTHX_ char *s)
10212 {
10213     dVAR;
10214     char *eol;
10215     char *t;
10216     SV * const stuff = newSVpvs("");
10217     bool needargs = FALSE;
10218     bool eofmt = FALSE;
10219
10220     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10221
10222     while (!needargs) {
10223         if (*s == '.') {
10224             t = s+1;
10225 #ifdef PERL_STRICT_CR
10226             while (SPACE_OR_TAB(*t))
10227                 t++;
10228 #else
10229             while (SPACE_OR_TAB(*t) || *t == '\r')
10230                 t++;
10231 #endif
10232             if (*t == '\n' || t == PL_bufend) {
10233                 eofmt = TRUE;
10234                 break;
10235             }
10236         }
10237         eol = (char *) memchr(s,'\n',PL_bufend-s);
10238         if (!eol++)
10239                 eol = PL_bufend;
10240         if (*s != '#') {
10241             for (t = s; t < eol; t++) {
10242                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10243                     needargs = FALSE;
10244                     goto enough;        /* ~~ must be first line in formline */
10245                 }
10246                 if (*t == '@' || *t == '^')
10247                     needargs = TRUE;
10248             }
10249             if (eol > s) {
10250                 sv_catpvn(stuff, s, eol-s);
10251 #ifndef PERL_STRICT_CR
10252                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10253                     char *end = SvPVX(stuff) + SvCUR(stuff);
10254                     end[-2] = '\n';
10255                     end[-1] = '\0';
10256                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10257                 }
10258 #endif
10259             }
10260             else
10261               break;
10262         }
10263         s = (char*)eol;
10264         if ((PL_rsfp || PL_parser->filtered)
10265          && PL_parser->form_lex_state == LEX_NORMAL) {
10266             bool got_some;
10267             PL_bufptr = PL_bufend;
10268             COPLINE_INC_WITH_HERELINES;
10269             got_some = lex_next_chunk(0);
10270             CopLINE_dec(PL_curcop);
10271             s = PL_bufptr;
10272             if (!got_some)
10273                 break;
10274         }
10275         incline(s);
10276     }
10277   enough:
10278     if (!SvCUR(stuff) || needargs)
10279         PL_lex_state = PL_parser->form_lex_state;
10280     if (SvCUR(stuff)) {
10281         PL_expect = XSTATE;
10282         if (needargs) {
10283             const char *s2 = s;
10284             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10285                 || *s2 == 013)
10286                 s2++;
10287             if (*s2 == '{') {
10288                 PL_expect = XTERMBLOCK;
10289                 NEXTVAL_NEXTTOKE.ival = 0;
10290                 force_next(DO);
10291             }
10292             NEXTVAL_NEXTTOKE.ival = 0;
10293             force_next(FORMLBRACK);
10294         }
10295         if (!IN_BYTES) {
10296             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10297                 SvUTF8_on(stuff);
10298             else if (PL_encoding)
10299                 sv_recode_to_utf8(stuff, PL_encoding);
10300         }
10301         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10302         force_next(THING);
10303     }
10304     else {
10305         SvREFCNT_dec(stuff);
10306         if (eofmt)
10307             PL_lex_formbrack = 0;
10308     }
10309     return s;
10310 }
10311
10312 I32
10313 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10314 {
10315     dVAR;
10316     const I32 oldsavestack_ix = PL_savestack_ix;
10317     CV* const outsidecv = PL_compcv;
10318
10319     SAVEI32(PL_subline);
10320     save_item(PL_subname);
10321     SAVESPTR(PL_compcv);
10322
10323     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10324     CvFLAGS(PL_compcv) |= flags;
10325
10326     PL_subline = CopLINE(PL_curcop);
10327     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10328     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10329     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10330     if (outsidecv && CvPADLIST(outsidecv))
10331         CvPADLIST(PL_compcv)->xpadl_outid =
10332             PadlistNAMES(CvPADLIST(outsidecv));
10333
10334     return oldsavestack_ix;
10335 }
10336
10337 static int
10338 S_yywarn(pTHX_ const char *const s, U32 flags)
10339 {
10340     dVAR;
10341
10342     PERL_ARGS_ASSERT_YYWARN;
10343
10344     PL_in_eval |= EVAL_WARNONLY;
10345     yyerror_pv(s, flags);
10346     PL_in_eval &= ~EVAL_WARNONLY;
10347     return 0;
10348 }
10349
10350 int
10351 Perl_yyerror(pTHX_ const char *const s)
10352 {
10353     PERL_ARGS_ASSERT_YYERROR;
10354     return yyerror_pvn(s, strlen(s), 0);
10355 }
10356
10357 int
10358 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10359 {
10360     PERL_ARGS_ASSERT_YYERROR_PV;
10361     return yyerror_pvn(s, strlen(s), flags);
10362 }
10363
10364 int
10365 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10366 {
10367     dVAR;
10368     const char *context = NULL;
10369     int contlen = -1;
10370     SV *msg;
10371     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10372     int yychar  = PL_parser->yychar;
10373
10374     PERL_ARGS_ASSERT_YYERROR_PVN;
10375
10376     if (!yychar || (yychar == ';' && !PL_rsfp))
10377         sv_catpvs(where_sv, "at EOF");
10378     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10379       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10380       PL_oldbufptr != PL_bufptr) {
10381         /*
10382                 Only for NetWare:
10383                 The code below is removed for NetWare because it abends/crashes on NetWare
10384                 when the script has error such as not having the closing quotes like:
10385                     if ($var eq "value)
10386                 Checking of white spaces is anyway done in NetWare code.
10387         */
10388 #ifndef NETWARE
10389         while (isSPACE(*PL_oldoldbufptr))
10390             PL_oldoldbufptr++;
10391 #endif
10392         context = PL_oldoldbufptr;
10393         contlen = PL_bufptr - PL_oldoldbufptr;
10394     }
10395     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10396       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10397         /*
10398                 Only for NetWare:
10399                 The code below is removed for NetWare because it abends/crashes on NetWare
10400                 when the script has error such as not having the closing quotes like:
10401                     if ($var eq "value)
10402                 Checking of white spaces is anyway done in NetWare code.
10403         */
10404 #ifndef NETWARE
10405         while (isSPACE(*PL_oldbufptr))
10406             PL_oldbufptr++;
10407 #endif
10408         context = PL_oldbufptr;
10409         contlen = PL_bufptr - PL_oldbufptr;
10410     }
10411     else if (yychar > 255)
10412         sv_catpvs(where_sv, "next token ???");
10413     else if (yychar == -2) { /* YYEMPTY */
10414         if (PL_lex_state == LEX_NORMAL ||
10415            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10416             sv_catpvs(where_sv, "at end of line");
10417         else if (PL_lex_inpat)
10418             sv_catpvs(where_sv, "within pattern");
10419         else
10420             sv_catpvs(where_sv, "within string");
10421     }
10422     else {
10423         sv_catpvs(where_sv, "next char ");
10424         if (yychar < 32)
10425             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10426         else if (isPRINT_LC(yychar)) {
10427             const char string = yychar;
10428             sv_catpvn(where_sv, &string, 1);
10429         }
10430         else
10431             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10432     }
10433     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10434     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10435         OutCopFILE(PL_curcop),
10436         (IV)(PL_parser->preambling == NOLINE
10437                ? CopLINE(PL_curcop)
10438                : PL_parser->preambling));
10439     if (context)
10440         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10441                              UTF8fARG(UTF, contlen, context));
10442     else
10443         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10444     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10445         Perl_sv_catpvf(aTHX_ msg,
10446         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10447                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10448         PL_multi_end = 0;
10449     }
10450     if (PL_in_eval & EVAL_WARNONLY) {
10451         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10452     }
10453     else
10454         qerror(msg);
10455     if (PL_error_count >= 10) {
10456         SV * errsv;
10457         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10458             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10459                        SVfARG(errsv), OutCopFILE(PL_curcop));
10460         else
10461             Perl_croak(aTHX_ "%s has too many errors.\n",
10462             OutCopFILE(PL_curcop));
10463     }
10464     PL_in_my = 0;
10465     PL_in_my_stash = NULL;
10466     return 0;
10467 }
10468
10469 STATIC char*
10470 S_swallow_bom(pTHX_ U8 *s)
10471 {
10472     dVAR;
10473     const STRLEN slen = SvCUR(PL_linestr);
10474
10475     PERL_ARGS_ASSERT_SWALLOW_BOM;
10476
10477     switch (s[0]) {
10478     case 0xFF:
10479         if (s[1] == 0xFE) {
10480             /* UTF-16 little-endian? (or UTF-32LE?) */
10481             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10482                 /* diag_listed_as: Unsupported script encoding %s */
10483                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10484 #ifndef PERL_NO_UTF16_FILTER
10485             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10486             s += 2;
10487             if (PL_bufend > (char*)s) {
10488                 s = add_utf16_textfilter(s, TRUE);
10489             }
10490 #else
10491             /* diag_listed_as: Unsupported script encoding %s */
10492             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10493 #endif
10494         }
10495         break;
10496     case 0xFE:
10497         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10498 #ifndef PERL_NO_UTF16_FILTER
10499             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10500             s += 2;
10501             if (PL_bufend > (char *)s) {
10502                 s = add_utf16_textfilter(s, FALSE);
10503             }
10504 #else
10505             /* diag_listed_as: Unsupported script encoding %s */
10506             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10507 #endif
10508         }
10509         break;
10510     case BOM_UTF8_FIRST_BYTE: {
10511         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10512         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10513             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10514             s += len + 1;                      /* UTF-8 */
10515         }
10516         break;
10517     }
10518     case 0:
10519         if (slen > 3) {
10520              if (s[1] == 0) {
10521                   if (s[2] == 0xFE && s[3] == 0xFF) {
10522                        /* UTF-32 big-endian */
10523                        /* diag_listed_as: Unsupported script encoding %s */
10524                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10525                   }
10526              }
10527              else if (s[2] == 0 && s[3] != 0) {
10528                   /* Leading bytes
10529                    * 00 xx 00 xx
10530                    * are a good indicator of UTF-16BE. */
10531 #ifndef PERL_NO_UTF16_FILTER
10532                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10533                   s = add_utf16_textfilter(s, FALSE);
10534 #else
10535                   /* diag_listed_as: Unsupported script encoding %s */
10536                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10537 #endif
10538              }
10539         }
10540         break;
10541
10542     default:
10543          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10544                   /* Leading bytes
10545                    * xx 00 xx 00
10546                    * are a good indicator of UTF-16LE. */
10547 #ifndef PERL_NO_UTF16_FILTER
10548               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10549               s = add_utf16_textfilter(s, TRUE);
10550 #else
10551               /* diag_listed_as: Unsupported script encoding %s */
10552               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10553 #endif
10554          }
10555     }
10556     return (char*)s;
10557 }
10558
10559
10560 #ifndef PERL_NO_UTF16_FILTER
10561 static I32
10562 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10563 {
10564     dVAR;
10565     SV *const filter = FILTER_DATA(idx);
10566     /* We re-use this each time round, throwing the contents away before we
10567        return.  */
10568     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10569     SV *const utf8_buffer = filter;
10570     IV status = IoPAGE(filter);
10571     const bool reverse = cBOOL(IoLINES(filter));
10572     I32 retval;
10573
10574     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10575
10576     /* As we're automatically added, at the lowest level, and hence only called
10577        from this file, we can be sure that we're not called in block mode. Hence
10578        don't bother writing code to deal with block mode.  */
10579     if (maxlen) {
10580         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10581     }
10582     if (status < 0) {
10583         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10584     }
10585     DEBUG_P(PerlIO_printf(Perl_debug_log,
10586                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10587                           FPTR2DPTR(void *, S_utf16_textfilter),
10588                           reverse ? 'l' : 'b', idx, maxlen, status,
10589                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10590
10591     while (1) {
10592         STRLEN chars;
10593         STRLEN have;
10594         I32 newlen;
10595         U8 *end;
10596         /* First, look in our buffer of existing UTF-8 data:  */
10597         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10598
10599         if (nl) {
10600             ++nl;
10601         } else if (status == 0) {
10602             /* EOF */
10603             IoPAGE(filter) = 0;
10604             nl = SvEND(utf8_buffer);
10605         }
10606         if (nl) {
10607             STRLEN got = nl - SvPVX(utf8_buffer);
10608             /* Did we have anything to append?  */
10609             retval = got != 0;
10610             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10611             /* Everything else in this code works just fine if SVp_POK isn't
10612                set.  This, however, needs it, and we need it to work, else
10613                we loop infinitely because the buffer is never consumed.  */
10614             sv_chop(utf8_buffer, nl);
10615             break;
10616         }
10617
10618         /* OK, not a complete line there, so need to read some more UTF-16.
10619            Read an extra octect if the buffer currently has an odd number. */
10620         while (1) {
10621             if (status <= 0)
10622                 break;
10623             if (SvCUR(utf16_buffer) >= 2) {
10624                 /* Location of the high octet of the last complete code point.
10625                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10626                    *coupled* with all the benefits of partial reads and
10627                    endianness.  */
10628                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10629                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10630
10631                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10632                     break;
10633                 }
10634
10635                 /* We have the first half of a surrogate. Read more.  */
10636                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10637             }
10638
10639             status = FILTER_READ(idx + 1, utf16_buffer,
10640                                  160 + (SvCUR(utf16_buffer) & 1));
10641             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10642             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10643             if (status < 0) {
10644                 /* Error */
10645                 IoPAGE(filter) = status;
10646                 return status;
10647             }
10648         }
10649
10650         chars = SvCUR(utf16_buffer) >> 1;
10651         have = SvCUR(utf8_buffer);
10652         SvGROW(utf8_buffer, have + chars * 3 + 1);
10653
10654         if (reverse) {
10655             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10656                                          (U8*)SvPVX_const(utf8_buffer) + have,
10657                                          chars * 2, &newlen);
10658         } else {
10659             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10660                                 (U8*)SvPVX_const(utf8_buffer) + have,
10661                                 chars * 2, &newlen);
10662         }
10663         SvCUR_set(utf8_buffer, have + newlen);
10664         *end = '\0';
10665
10666         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10667            it's private to us, and utf16_to_utf8{,reversed} take a
10668            (pointer,length) pair, rather than a NUL-terminated string.  */
10669         if(SvCUR(utf16_buffer) & 1) {
10670             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10671             SvCUR_set(utf16_buffer, 1);
10672         } else {
10673             SvCUR_set(utf16_buffer, 0);
10674         }
10675     }
10676     DEBUG_P(PerlIO_printf(Perl_debug_log,
10677                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10678                           status,
10679                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10680     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10681     return retval;
10682 }
10683
10684 static U8 *
10685 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10686 {
10687     SV *filter = filter_add(S_utf16_textfilter, NULL);
10688
10689     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10690
10691     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10692     sv_setpvs(filter, "");
10693     IoLINES(filter) = reversed;
10694     IoPAGE(filter) = 1; /* Not EOF */
10695
10696     /* Sadly, we have to return a valid pointer, come what may, so we have to
10697        ignore any error return from this.  */
10698     SvCUR_set(PL_linestr, 0);
10699     if (FILTER_READ(0, PL_linestr, 0)) {
10700         SvUTF8_on(PL_linestr);
10701     } else {
10702         SvUTF8_on(PL_linestr);
10703     }
10704     PL_bufend = SvEND(PL_linestr);
10705     return (U8*)SvPVX(PL_linestr);
10706 }
10707 #endif
10708
10709 /*
10710 Returns a pointer to the next character after the parsed
10711 vstring, as well as updating the passed in sv.
10712
10713 Function must be called like
10714
10715         sv = sv_2mortal(newSV(5));
10716         s = scan_vstring(s,e,sv);
10717
10718 where s and e are the start and end of the string.
10719 The sv should already be large enough to store the vstring
10720 passed in, for performance reasons.
10721
10722 This function may croak if fatal warnings are enabled in the
10723 calling scope, hence the sv_2mortal in the example (to prevent
10724 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
10725 sv_2mortal.
10726
10727 */
10728
10729 char *
10730 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10731 {
10732     dVAR;
10733     const char *pos = s;
10734     const char *start = s;
10735
10736     PERL_ARGS_ASSERT_SCAN_VSTRING;
10737
10738     if (*pos == 'v') pos++;  /* get past 'v' */
10739     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10740         pos++;
10741     if ( *pos != '.') {
10742         /* this may not be a v-string if followed by => */
10743         const char *next = pos;
10744         while (next < e && isSPACE(*next))
10745             ++next;
10746         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10747             /* return string not v-string */
10748             sv_setpvn(sv,(char *)s,pos-s);
10749             return (char *)pos;
10750         }
10751     }
10752
10753     if (!isALPHA(*pos)) {
10754         U8 tmpbuf[UTF8_MAXBYTES+1];
10755
10756         if (*s == 'v')
10757             s++;  /* get past 'v' */
10758
10759         sv_setpvs(sv, "");
10760
10761         for (;;) {
10762             /* this is atoi() that tolerates underscores */
10763             U8 *tmpend;
10764             UV rev = 0;
10765             const char *end = pos;
10766             UV mult = 1;
10767             while (--end >= s) {
10768                 if (*end != '_') {
10769                     const UV orev = rev;
10770                     rev += (*end - '0') * mult;
10771                     mult *= 10;
10772                     if (orev > rev)
10773                         /* diag_listed_as: Integer overflow in %s number */
10774                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10775                                          "Integer overflow in decimal number");
10776                 }
10777             }
10778 #ifdef EBCDIC
10779             if (rev > 0x7FFFFFFF)
10780                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10781 #endif
10782             /* Append native character for the rev point */
10783             tmpend = uvchr_to_utf8(tmpbuf, rev);
10784             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10785             if (!UVCHR_IS_INVARIANT(rev))
10786                  SvUTF8_on(sv);
10787             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10788                  s = ++pos;
10789             else {
10790                  s = pos;
10791                  break;
10792             }
10793             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10794                  pos++;
10795         }
10796         SvPOK_on(sv);
10797         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10798         SvRMAGICAL_on(sv);
10799     }
10800     return (char *)s;
10801 }
10802
10803 int
10804 Perl_keyword_plugin_standard(pTHX_
10805         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10806 {
10807     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10808     PERL_UNUSED_CONTEXT;
10809     PERL_UNUSED_ARG(keyword_ptr);
10810     PERL_UNUSED_ARG(keyword_len);
10811     PERL_UNUSED_ARG(op_ptr);
10812     return KEYWORD_PLUGIN_DECLINE;
10813 }
10814
10815 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10816 static void
10817 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
10818 {
10819     SAVEI32(PL_lex_brackets);
10820     if (PL_lex_brackets > 100)
10821         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10822     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
10823     SAVEI32(PL_lex_allbrackets);
10824     PL_lex_allbrackets = 0;
10825     SAVEI8(PL_lex_fakeeof);
10826     PL_lex_fakeeof = (U8)fakeeof;
10827     if(yyparse(gramtype) && !PL_parser->error_count)
10828         qerror(Perl_mess(aTHX_ "Parse error"));
10829 }
10830
10831 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
10832 static OP *
10833 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
10834 {
10835     OP *o;
10836     ENTER;
10837     SAVEVPTR(PL_eval_root);
10838     PL_eval_root = NULL;
10839     parse_recdescent(gramtype, fakeeof);
10840     o = PL_eval_root;
10841     LEAVE;
10842     return o;
10843 }
10844
10845 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
10846 static OP *
10847 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
10848 {
10849     OP *exprop;
10850     if (flags & ~PARSE_OPTIONAL)
10851         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
10852     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
10853     if (!exprop && !(flags & PARSE_OPTIONAL)) {
10854         if (!PL_parser->error_count)
10855             qerror(Perl_mess(aTHX_ "Parse error"));
10856         exprop = newOP(OP_NULL, 0);
10857     }
10858     return exprop;
10859 }
10860
10861 /*
10862 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
10863
10864 Parse a Perl arithmetic expression.  This may contain operators of precedence
10865 down to the bit shift operators.  The expression must be followed (and thus
10866 terminated) either by a comparison or lower-precedence operator or by
10867 something that would normally terminate an expression such as semicolon.
10868 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10869 otherwise it is mandatory.  It is up to the caller to ensure that the
10870 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10871 the source of the code to be parsed and the lexical context for the
10872 expression.
10873
10874 The op tree representing the expression is returned.  If an optional
10875 expression is absent, a null pointer is returned, otherwise the pointer
10876 will be non-null.
10877
10878 If an error occurs in parsing or compilation, in most cases a valid op
10879 tree is returned anyway.  The error is reflected in the parser state,
10880 normally resulting in a single exception at the top level of parsing
10881 which covers all the compilation errors that occurred.  Some compilation
10882 errors, however, will throw an exception immediately.
10883
10884 =cut
10885 */
10886
10887 OP *
10888 Perl_parse_arithexpr(pTHX_ U32 flags)
10889 {
10890     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
10891 }
10892
10893 /*
10894 =for apidoc Amx|OP *|parse_termexpr|U32 flags
10895
10896 Parse a Perl term expression.  This may contain operators of precedence
10897 down to the assignment operators.  The expression must be followed (and thus
10898 terminated) either by a comma or lower-precedence operator or by
10899 something that would normally terminate an expression such as semicolon.
10900 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10901 otherwise it is mandatory.  It is up to the caller to ensure that the
10902 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10903 the source of the code to be parsed and the lexical context for the
10904 expression.
10905
10906 The op tree representing the expression is returned.  If an optional
10907 expression is absent, a null pointer is returned, otherwise the pointer
10908 will be non-null.
10909
10910 If an error occurs in parsing or compilation, in most cases a valid op
10911 tree is returned anyway.  The error is reflected in the parser state,
10912 normally resulting in a single exception at the top level of parsing
10913 which covers all the compilation errors that occurred.  Some compilation
10914 errors, however, will throw an exception immediately.
10915
10916 =cut
10917 */
10918
10919 OP *
10920 Perl_parse_termexpr(pTHX_ U32 flags)
10921 {
10922     return parse_expr(LEX_FAKEEOF_COMMA, flags);
10923 }
10924
10925 /*
10926 =for apidoc Amx|OP *|parse_listexpr|U32 flags
10927
10928 Parse a Perl list expression.  This may contain operators of precedence
10929 down to the comma operator.  The expression must be followed (and thus
10930 terminated) either by a low-precedence logic operator such as C<or> or by
10931 something that would normally terminate an expression such as semicolon.
10932 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10933 otherwise it is mandatory.  It is up to the caller to ensure that the
10934 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10935 the source of the code to be parsed and the lexical context for the
10936 expression.
10937
10938 The op tree representing the expression is returned.  If an optional
10939 expression is absent, a null pointer is returned, otherwise the pointer
10940 will be non-null.
10941
10942 If an error occurs in parsing or compilation, in most cases a valid op
10943 tree is returned anyway.  The error is reflected in the parser state,
10944 normally resulting in a single exception at the top level of parsing
10945 which covers all the compilation errors that occurred.  Some compilation
10946 errors, however, will throw an exception immediately.
10947
10948 =cut
10949 */
10950
10951 OP *
10952 Perl_parse_listexpr(pTHX_ U32 flags)
10953 {
10954     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
10955 }
10956
10957 /*
10958 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
10959
10960 Parse a single complete Perl expression.  This allows the full
10961 expression grammar, including the lowest-precedence operators such
10962 as C<or>.  The expression must be followed (and thus terminated) by a
10963 token that an expression would normally be terminated by: end-of-file,
10964 closing bracketing punctuation, semicolon, or one of the keywords that
10965 signals a postfix expression-statement modifier.  If I<flags> includes
10966 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
10967 mandatory.  It is up to the caller to ensure that the dynamic parser
10968 state (L</PL_parser> et al) is correctly set to reflect the source of
10969 the code to be parsed and the lexical context for the expression.
10970
10971 The op tree representing the expression is returned.  If an optional
10972 expression is absent, a null pointer is returned, otherwise the pointer
10973 will be non-null.
10974
10975 If an error occurs in parsing or compilation, in most cases a valid op
10976 tree is returned anyway.  The error is reflected in the parser state,
10977 normally resulting in a single exception at the top level of parsing
10978 which covers all the compilation errors that occurred.  Some compilation
10979 errors, however, will throw an exception immediately.
10980
10981 =cut
10982 */
10983
10984 OP *
10985 Perl_parse_fullexpr(pTHX_ U32 flags)
10986 {
10987     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
10988 }
10989
10990 /*
10991 =for apidoc Amx|OP *|parse_block|U32 flags
10992
10993 Parse a single complete Perl code block.  This consists of an opening
10994 brace, a sequence of statements, and a closing brace.  The block
10995 constitutes a lexical scope, so C<my> variables and various compile-time
10996 effects can be contained within it.  It is up to the caller to ensure
10997 that the dynamic parser state (L</PL_parser> et al) is correctly set to
10998 reflect the source of the code to be parsed and the lexical context for
10999 the statement.
11000
11001 The op tree representing the code block is returned.  This is always a
11002 real op, never a null pointer.  It will normally be a C<lineseq> list,
11003 including C<nextstate> or equivalent ops.  No ops to construct any kind
11004 of runtime scope are included by virtue of it being a block.
11005
11006 If an error occurs in parsing or compilation, in most cases a valid op
11007 tree (most likely null) is returned anyway.  The error is reflected in
11008 the parser state, normally resulting in a single exception at the top
11009 level of parsing which covers all the compilation errors that occurred.
11010 Some compilation errors, however, will throw an exception immediately.
11011
11012 The I<flags> parameter is reserved for future use, and must always
11013 be zero.
11014
11015 =cut
11016 */
11017
11018 OP *
11019 Perl_parse_block(pTHX_ U32 flags)
11020 {
11021     if (flags)
11022         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11023     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11024 }
11025
11026 /*
11027 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11028
11029 Parse a single unadorned Perl statement.  This may be a normal imperative
11030 statement or a declaration that has compile-time effect.  It does not
11031 include any label or other affixture.  It is up to the caller to ensure
11032 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11033 reflect the source of the code to be parsed and the lexical context for
11034 the statement.
11035
11036 The op tree representing the statement is returned.  This may be a
11037 null pointer if the statement is null, for example if it was actually
11038 a subroutine definition (which has compile-time side effects).  If not
11039 null, it will be ops directly implementing the statement, suitable to
11040 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11041 equivalent op (except for those embedded in a scope contained entirely
11042 within the statement).
11043
11044 If an error occurs in parsing or compilation, in most cases a valid op
11045 tree (most likely null) is returned anyway.  The error is reflected in
11046 the parser state, normally resulting in a single exception at the top
11047 level of parsing which covers all the compilation errors that occurred.
11048 Some compilation errors, however, will throw an exception immediately.
11049
11050 The I<flags> parameter is reserved for future use, and must always
11051 be zero.
11052
11053 =cut
11054 */
11055
11056 OP *
11057 Perl_parse_barestmt(pTHX_ U32 flags)
11058 {
11059     if (flags)
11060         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11061     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11062 }
11063
11064 /*
11065 =for apidoc Amx|SV *|parse_label|U32 flags
11066
11067 Parse a single label, possibly optional, of the type that may prefix a
11068 Perl statement.  It is up to the caller to ensure that the dynamic parser
11069 state (L</PL_parser> et al) is correctly set to reflect the source of
11070 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11071 label is optional, otherwise it is mandatory.
11072
11073 The name of the label is returned in the form of a fresh scalar.  If an
11074 optional label is absent, a null pointer is returned.
11075
11076 If an error occurs in parsing, which can only occur if the label is
11077 mandatory, a valid label is returned anyway.  The error is reflected in
11078 the parser state, normally resulting in a single exception at the top
11079 level of parsing which covers all the compilation errors that occurred.
11080
11081 =cut
11082 */
11083
11084 SV *
11085 Perl_parse_label(pTHX_ U32 flags)
11086 {
11087     if (flags & ~PARSE_OPTIONAL)
11088         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11089     if (PL_lex_state == LEX_KNOWNEXT) {
11090         PL_parser->yychar = yylex();
11091         if (PL_parser->yychar == LABEL) {
11092             char * const lpv = pl_yylval.pval;
11093             STRLEN llen = strlen(lpv);
11094             PL_parser->yychar = YYEMPTY;
11095             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11096         } else {
11097             yyunlex();
11098             goto no_label;
11099         }
11100     } else {
11101         char *s, *t;
11102         STRLEN wlen, bufptr_pos;
11103         lex_read_space(0);
11104         t = s = PL_bufptr;
11105         if (!isIDFIRST_lazy_if(s, UTF))
11106             goto no_label;
11107         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11108         if (word_takes_any_delimeter(s, wlen))
11109             goto no_label;
11110         bufptr_pos = s - SvPVX(PL_linestr);
11111         PL_bufptr = t;
11112         lex_read_space(LEX_KEEP_PREVIOUS);
11113         t = PL_bufptr;
11114         s = SvPVX(PL_linestr) + bufptr_pos;
11115         if (t[0] == ':' && t[1] != ':') {
11116             PL_oldoldbufptr = PL_oldbufptr;
11117             PL_oldbufptr = s;
11118             PL_bufptr = t+1;
11119             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11120         } else {
11121             PL_bufptr = s;
11122             no_label:
11123             if (flags & PARSE_OPTIONAL) {
11124                 return NULL;
11125             } else {
11126                 qerror(Perl_mess(aTHX_ "Parse error"));
11127                 return newSVpvs("x");
11128             }
11129         }
11130     }
11131 }
11132
11133 /*
11134 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11135
11136 Parse a single complete Perl statement.  This may be a normal imperative
11137 statement or a declaration that has compile-time effect, and may include
11138 optional labels.  It is up to the caller to ensure that the dynamic
11139 parser state (L</PL_parser> et al) is correctly set to reflect the source
11140 of the code to be parsed and the lexical context for the statement.
11141
11142 The op tree representing the statement is returned.  This may be a
11143 null pointer if the statement is null, for example if it was actually
11144 a subroutine definition (which has compile-time side effects).  If not
11145 null, it will be the result of a L</newSTATEOP> call, normally including
11146 a C<nextstate> or equivalent op.
11147
11148 If an error occurs in parsing or compilation, in most cases a valid op
11149 tree (most likely null) is returned anyway.  The error is reflected in
11150 the parser state, normally resulting in a single exception at the top
11151 level of parsing which covers all the compilation errors that occurred.
11152 Some compilation errors, however, will throw an exception immediately.
11153
11154 The I<flags> parameter is reserved for future use, and must always
11155 be zero.
11156
11157 =cut
11158 */
11159
11160 OP *
11161 Perl_parse_fullstmt(pTHX_ U32 flags)
11162 {
11163     if (flags)
11164         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11165     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11166 }
11167
11168 /*
11169 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11170
11171 Parse a sequence of zero or more Perl statements.  These may be normal
11172 imperative statements, including optional labels, or declarations
11173 that have compile-time effect, or any mixture thereof.  The statement
11174 sequence ends when a closing brace or end-of-file is encountered in a
11175 place where a new statement could have validly started.  It is up to
11176 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11177 is correctly set to reflect the source of the code to be parsed and the
11178 lexical context for the statements.
11179
11180 The op tree representing the statement sequence is returned.  This may
11181 be a null pointer if the statements were all null, for example if there
11182 were no statements or if there were only subroutine definitions (which
11183 have compile-time side effects).  If not null, it will be a C<lineseq>
11184 list, normally including C<nextstate> or equivalent ops.
11185
11186 If an error occurs in parsing or compilation, in most cases a valid op
11187 tree is returned anyway.  The error is reflected in the parser state,
11188 normally resulting in a single exception at the top level of parsing
11189 which covers all the compilation errors that occurred.  Some compilation
11190 errors, however, will throw an exception immediately.
11191
11192 The I<flags> parameter is reserved for future use, and must always
11193 be zero.
11194
11195 =cut
11196 */
11197
11198 OP *
11199 Perl_parse_stmtseq(pTHX_ U32 flags)
11200 {
11201     OP *stmtseqop;
11202     I32 c;
11203     if (flags)
11204         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11205     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11206     c = lex_peek_unichar(0);
11207     if (c != -1 && c != /*{*/'}')
11208         qerror(Perl_mess(aTHX_ "Parse error"));
11209     return stmtseqop;
11210 }
11211
11212 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11213 static void
11214 S_lex_token_boundary(pTHX)
11215 {
11216     PL_oldoldbufptr = PL_oldbufptr;
11217     PL_oldbufptr = PL_bufptr;
11218 }
11219
11220 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11221 static OP *
11222 S_parse_opt_lexvar(pTHX)
11223 {
11224     I32 sigil, c;
11225     char *s, *d;
11226     OP *var;
11227     lex_token_boundary();
11228     sigil = lex_read_unichar(0);
11229     if (lex_peek_unichar(0) == '#') {
11230         qerror(Perl_mess(aTHX_ "Parse error"));
11231         return NULL;
11232     }
11233     lex_read_space(0);
11234     c = lex_peek_unichar(0);
11235     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11236         return NULL;
11237     s = PL_bufptr;
11238     d = PL_tokenbuf + 1;
11239     PL_tokenbuf[0] = (char)sigil;
11240     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11241     PL_bufptr = s;
11242     if (d == PL_tokenbuf+1)
11243         return NULL;
11244     *d = 0;
11245     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11246                 OPf_MOD | (OPpLVAL_INTRO<<8));
11247     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11248     return var;
11249 }
11250
11251 OP *
11252 Perl_parse_subsignature(pTHX)
11253 {
11254     I32 c;
11255     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11256     OP *initops = NULL;
11257     lex_read_space(0);
11258     c = lex_peek_unichar(0);
11259     while (c != /*(*/')') {
11260         switch (c) {
11261             case '$': {
11262                 OP *var, *expr;
11263                 if (prev_type == 2)
11264                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11265                 var = parse_opt_lexvar();
11266                 expr = var ?
11267                     newBINOP(OP_AELEM, 0,
11268                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11269                             OP_RV2AV),
11270                         newSVOP(OP_CONST, 0, newSViv(pos))) :
11271                     NULL;
11272                 lex_read_space(0);
11273                 c = lex_peek_unichar(0);
11274                 if (c == '=') {
11275                     lex_token_boundary();
11276                     lex_read_unichar(0);
11277                     lex_read_space(0);
11278                     c = lex_peek_unichar(0);
11279                     if (c == ',' || c == /*(*/')') {
11280                         if (var)
11281                             qerror(Perl_mess(aTHX_ "Optional parameter "
11282                                     "lacks default expression"));
11283                     } else {
11284                         OP *defexpr = parse_termexpr(0);
11285                         if (defexpr->op_type == OP_UNDEF &&
11286                                 !(defexpr->op_flags & OPf_KIDS)) {
11287                             op_free(defexpr);
11288                         } else {
11289                             OP *ifop =
11290                                 newBINOP(OP_GE, 0,
11291                                     scalar(newUNOP(OP_RV2AV, 0,
11292                                             newGVOP(OP_GV, 0, PL_defgv))),
11293                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
11294                             expr = var ?
11295                                 newCONDOP(0, ifop, expr, defexpr) :
11296                                 newLOGOP(OP_OR, 0, ifop, defexpr);
11297                         }
11298                     }
11299                     prev_type = 1;
11300                 } else {
11301                     if (prev_type == 1)
11302                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
11303                                 "follows optional parameter"));
11304                     prev_type = 0;
11305                     min_arity = pos + 1;
11306                 }
11307                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11308                 if (expr)
11309                     initops = op_append_list(OP_LINESEQ, initops,
11310                                 newSTATEOP(0, NULL, expr));
11311                 max_arity = ++pos;
11312             } break;
11313             case '@':
11314             case '%': {
11315                 OP *var;
11316                 if (prev_type == 2)
11317                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11318                 var = parse_opt_lexvar();
11319                 if (c == '%') {
11320                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11321                             newBINOP(OP_BIT_AND, 0,
11322                                 scalar(newUNOP(OP_RV2AV, 0,
11323                                     newGVOP(OP_GV, 0, PL_defgv))),
11324                                 newSVOP(OP_CONST, 0, newSViv(1))),
11325                             newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11326                                 newSVOP(OP_CONST, 0,
11327                                     newSVpvs("Odd name/value argument "
11328                                         "for subroutine"))));
11329                     if (pos != min_arity)
11330                         chkop = newLOGOP(OP_AND, 0,
11331                                     newBINOP(OP_GT, 0,
11332                                         scalar(newUNOP(OP_RV2AV, 0,
11333                                             newGVOP(OP_GV, 0, PL_defgv))),
11334                                         newSVOP(OP_CONST, 0, newSViv(pos))),
11335                                     chkop);
11336                     initops = op_append_list(OP_LINESEQ,
11337                                 newSTATEOP(0, NULL, chkop),
11338                                 initops);
11339                 }
11340                 if (var) {
11341                     OP *slice = pos ?
11342                         op_prepend_elem(OP_ASLICE,
11343                             newOP(OP_PUSHMARK, 0),
11344                             newLISTOP(OP_ASLICE, 0,
11345                                 list(newRANGE(0,
11346                                     newSVOP(OP_CONST, 0, newSViv(pos)),
11347                                     newUNOP(OP_AV2ARYLEN, 0,
11348                                         ref(newUNOP(OP_RV2AV, 0,
11349                                                 newGVOP(OP_GV, 0, PL_defgv)),
11350                                             OP_AV2ARYLEN)))),
11351                                 ref(newUNOP(OP_RV2AV, 0,
11352                                         newGVOP(OP_GV, 0, PL_defgv)),
11353                                     OP_ASLICE))) :
11354                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11355                     initops = op_append_list(OP_LINESEQ, initops,
11356                         newSTATEOP(0, NULL,
11357                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
11358                 }
11359                 prev_type = 2;
11360                 max_arity = -1;
11361             } break;
11362             default:
11363                 parse_error:
11364                 qerror(Perl_mess(aTHX_ "Parse error"));
11365                 return NULL;
11366         }
11367         lex_read_space(0);
11368         c = lex_peek_unichar(0);
11369         switch (c) {
11370             case /*(*/')': break;
11371             case ',':
11372                 do {
11373                     lex_token_boundary();
11374                     lex_read_unichar(0);
11375                     lex_read_space(0);
11376                     c = lex_peek_unichar(0);
11377                 } while (c == ',');
11378                 break;
11379             default:
11380                 goto parse_error;
11381         }
11382     }
11383     if (min_arity != 0) {
11384         initops = op_append_list(OP_LINESEQ,
11385             newSTATEOP(0, NULL,
11386                 newLOGOP(OP_OR, 0,
11387                     newBINOP(OP_GE, 0,
11388                         scalar(newUNOP(OP_RV2AV, 0,
11389                             newGVOP(OP_GV, 0, PL_defgv))),
11390                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
11391                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11392                         newSVOP(OP_CONST, 0,
11393                             newSVpvs("Too few arguments for subroutine"))))),
11394             initops);
11395     }
11396     if (max_arity != -1) {
11397         initops = op_append_list(OP_LINESEQ,
11398             newSTATEOP(0, NULL,
11399                 newLOGOP(OP_OR, 0,
11400                     newBINOP(OP_LE, 0,
11401                         scalar(newUNOP(OP_RV2AV, 0,
11402                             newGVOP(OP_GV, 0, PL_defgv))),
11403                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
11404                     newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
11405                         newSVOP(OP_CONST, 0,
11406                             newSVpvs("Too many arguments for subroutine"))))),
11407             initops);
11408     }
11409     return initops;
11410 }
11411
11412 /*
11413  * Local variables:
11414  * c-indentation-style: bsd
11415  * c-basic-offset: 4
11416  * indent-tabs-mode: nil
11417  * End:
11418  *
11419  * ex: set ts=8 sts=4 sw=4 et:
11420  */