toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_sublex_info          (PL_parser->sublex_info)
  68 #define PL_linestr              (PL_parser->linestr)
  69 #define PL_expect               (PL_parser->expect)
  70 #define PL_copline              (PL_parser->copline)
  71 #define PL_bufptr               (PL_parser->bufptr)
  72 #define PL_oldbufptr            (PL_parser->oldbufptr)
  73 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  74 #define PL_linestart            (PL_parser->linestart)
  75 #define PL_bufend               (PL_parser->bufend)
  76 #define PL_last_uni             (PL_parser->last_uni)
  77 #define PL_last_lop             (PL_parser->last_lop)
  78 #define PL_last_lop_op          (PL_parser->last_lop_op)
  79 #define PL_lex_state            (PL_parser->lex_state)
  80 #define PL_rsfp                 (PL_parser->rsfp)
  81 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  82 #define PL_in_my                (PL_parser->in_my)
  83 #define PL_in_my_stash          (PL_parser->in_my_stash)
  84 #define PL_tokenbuf             (PL_parser->tokenbuf)
  85 #define PL_multi_end            (PL_parser->multi_end)
  86 #define PL_error_count          (PL_parser->error_count)
  87
  88 #  define PL_nexttoke           (PL_parser->nexttoke)
  89 #  define PL_nexttype           (PL_parser->nexttype)
  90 #  define PL_nextval            (PL_parser->nextval)
  91
  92 static const char* const ident_too_long = "Identifier too long";
  93
  94 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  95
  96 #define XENUMMASK  0x3f
  97 #define XFAKEEOF   0x40
  98 #define XFAKEBRACK 0x80
  99
 100 #ifdef USE_UTF8_SCRIPTS
 101 #   define UTF cBOOL(!IN_BYTES)
 102 #else
 103 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 104 #endif
 105
 106 /* The maximum number of characters preceding the unrecognized one to display */
 107 #define UNRECOGNIZED_PRECEDE_COUNT 10
 108
 109 /* In variables named $^X, these are the legal values for X.
 110  * 1999-02-27 mjd-perl-patch@plover.com */
 111 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 112
 113 #define SPACE_OR_TAB(c) isBLANK_A(c)
 114
 115 #define HEXFP_PEEK(s)     \
 116     (((s[0] == '.') && \
 117       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 118      isALPHA_FOLD_EQ(s[0], 'p'))
 119
 120 /* LEX_* are values for PL_lex_state, the state of the lexer.
 121  * They are arranged oddly so that the guard on the switch statement
 122  * can get by with a single comparison (if the compiler is smart enough).
 123  *
 124  * These values refer to the various states within a sublex parse,
 125  * i.e. within a double quotish string
 126  */
 127
 128 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 129
 130 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 131 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 132 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 133 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 134 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 135
 136                                    /* at end of code, eg "$x" followed by:  */
 137 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 138 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 139
 140 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 141                                         string or after \E, $foo, etc       */
 142 #define LEX_INTERPCONST          2 /* NOT USED */
 143 #define LEX_FORMLINE             1 /* expecting a format line               */
 144
 145
 146 #ifdef DEBUGGING
 147 static const char* const lex_state_names[] = {
 148     "KNOWNEXT",
 149     "FORMLINE",
 150     "INTERPCONST",
 151     "INTERPCONCAT",
 152     "INTERPENDMAYBE",
 153     "INTERPEND",
 154     "INTERPSTART",
 155     "INTERPPUSH",
 156     "INTERPCASEMOD",
 157     "INTERPNORMAL",
 158     "NORMAL"
 159 };
 160 #endif
 161
 162 #include "keywords.h"
 163
 164 /* CLINE is a macro that ensures PL_copline has a sane value */
 165
 166 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 167
 168 /*
 169  * Convenience functions to return different tokens and prime the
 170  * lexer for the next token.  They all take an argument.
 171  *
 172  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 173  * OPERATOR     : generic operator
 174  * AOPERATOR    : assignment operator
 175  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 176  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 177  * PREREF       : *EXPR where EXPR is not a simple identifier
 178  * TERM         : expression term
 179  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 180  * LOOPX        : loop exiting command (goto, last, dump, etc)
 181  * FTST         : file test operator
 182  * FUN0         : zero-argument function
 183  * FUN0OP       : zero-argument function, with its op created in this file
 184  * FUN1         : not used, except for not, which isn't a UNIOP
 185  * BOop         : bitwise or or xor
 186  * BAop         : bitwise and
 187  * BCop         : bitwise complement
 188  * SHop         : shift operator
 189  * PWop         : power operator
 190  * PMop         : pattern-matching operator
 191  * Aop          : addition-level operator
 192  * AopNOASSIGN  : addition-level operator that is never part of .=
 193  * Mop          : multiplication-level operator
 194  * Eop          : equality-testing operator
 195  * Rop          : relational operator <= != gt
 196  *
 197  * Also see LOP and lop() below.
 198  */
 199
 200 #ifdef DEBUGGING /* Serve -DT. */
 201 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 202 #else
 203 #   define REPORT(retval) (retval)
 204 #endif
 205
 206 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 207 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 208 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 209 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 210 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 211 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 212 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 213 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 214 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
 215                          pl_yylval.ival=f, \
 216                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 217                          REPORT((int)LOOPEX))
 218 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 219 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 220 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 221 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 222 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 223 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 224 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 225                        REPORT('~')
 226 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 227 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 228 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 229 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 230 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 231 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 232 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 233 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 234
 235 /* This bit of chicanery makes a unary function followed by
 236  * a parenthesis into a function with one argument, highest precedence.
 237  * The UNIDOR macro is for unary functions that can be followed by the //
 238  * operator (such as C<shift // 0>).
 239  */
 240 #define UNI3(f,x,have_x) { \
 241         pl_yylval.ival = f; \
 242         if (have_x) PL_expect = x; \
 243         PL_bufptr = s; \
 244         PL_last_uni = PL_oldbufptr; \
 245         PL_last_lop_op = f; \
 246         if (*s == '(') \
 247             return REPORT( (int)FUNC1 ); \
 248         s = skipspace(s); \
 249         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 250         }
 251 #define UNI(f)    UNI3(f,XTERM,1)
 252 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 253 #define UNIPROTO(f,optional) { \
 254         if (optional) PL_last_uni = PL_oldbufptr; \
 255         OPERATOR(f); \
 256         }
 257
 258 #define UNIBRACK(f) UNI3(f,0,0)
 259
 260 /* grandfather return to old style */
 261 #define OLDLOP(f) \
 262         do { \
 263             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 264                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 265             pl_yylval.ival = (f); \
 266             PL_expect = XTERM; \
 267             PL_bufptr = s; \
 268             return (int)LSTOP; \
 269         } while(0)
 270
 271 #define COPLINE_INC_WITH_HERELINES                  \
 272     STMT_START {                                     \
 273         CopLINE_inc(PL_curcop);                       \
 274         if (PL_parser->herelines)                      \
 275             CopLINE(PL_curcop) += PL_parser->herelines, \
 276             PL_parser->herelines = 0;                    \
 277     } STMT_END
 278 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 279  * is no sublex_push to follow. */
 280 #define COPLINE_SET_FROM_MULTI_END            \
 281     STMT_START {                               \
 282         CopLINE_set(PL_curcop, PL_multi_end);   \
 283         if (PL_multi_end != PL_multi_start)      \
 284             PL_parser->herelines = 0;             \
 285     } STMT_END
 286
 287
 288 #ifdef DEBUGGING
 289
 290 /* how to interpret the pl_yylval associated with the token */
 291 enum token_type {
 292     TOKENTYPE_NONE,
 293     TOKENTYPE_IVAL,
 294     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 295     TOKENTYPE_PVAL,
 296     TOKENTYPE_OPVAL
 297 };
 298
 299 static struct debug_tokens {
 300     const int token;
 301     enum token_type type;
 302     const char *name;
 303 } const debug_tokens[] =
 304 {
 305     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 306     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 307     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 308     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 309     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 310     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 311     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 312     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 313     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 314     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 315     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 316     { DO,               TOKENTYPE_NONE,         "DO" },
 317     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 318     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 319     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 320     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 321     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 322     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 323     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 324     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 325     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 326     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 327     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 328     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 329     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 330     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 331     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 332     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 333     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 334     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 335     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 336     { IF,               TOKENTYPE_IVAL,         "IF" },
 337     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 338     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 339     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 340     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 341     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 342     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 343     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 344     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 345     { MY,               TOKENTYPE_IVAL,         "MY" },
 346     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 347     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 348     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 349     { OROR,             TOKENTYPE_NONE,         "OROR" },
 350     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 351     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 352     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 353     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 354     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 355     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 356     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 357     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 358     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 359     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 360     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 361     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 362     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 363     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 364     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 365     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 366     { SUB,              TOKENTYPE_NONE,         "SUB" },
 367     { THING,            TOKENTYPE_OPVAL,        "THING" },
 368     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 369     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 370     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 371     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 372     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 373     { USE,              TOKENTYPE_IVAL,         "USE" },
 374     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 375     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 376     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 377     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 378     { 0,                TOKENTYPE_NONE,         NULL }
 379 };
 380
 381 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 382
 383 STATIC int
 384 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 385 {
 386     PERL_ARGS_ASSERT_TOKEREPORT;
 387
 388     if (DEBUG_T_TEST) {
 389         const char *name = NULL;
 390         enum token_type type = TOKENTYPE_NONE;
 391         const struct debug_tokens *p;
 392         SV* const report = newSVpvs("<== ");
 393
 394         for (p = debug_tokens; p->token; p++) {
 395             if (p->token == (int)rv) {
 396                 name = p->name;
 397                 type = p->type;
 398                 break;
 399             }
 400         }
 401         if (name)
 402             Perl_sv_catpv(aTHX_ report, name);
 403         else if (isGRAPH(rv))
 404         {
 405             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 406             if ((char)rv == 'p')
 407                 sv_catpvs(report, " (pending identifier)");
 408         }
 409         else if (!rv)
 410             sv_catpvs(report, "EOF");
 411         else
 412             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 413         switch (type) {
 414         case TOKENTYPE_NONE:
 415             break;
 416         case TOKENTYPE_IVAL:
 417             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 418             break;
 419         case TOKENTYPE_OPNUM:
 420             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 421                                     PL_op_name[lvalp->ival]);
 422             break;
 423         case TOKENTYPE_PVAL:
 424             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 425             break;
 426         case TOKENTYPE_OPVAL:
 427             if (lvalp->opval) {
 428                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 429                                     PL_op_name[lvalp->opval->op_type]);
 430                 if (lvalp->opval->op_type == OP_CONST) {
 431                     Perl_sv_catpvf(aTHX_ report, " %s",
 432                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 433                 }
 434
 435             }
 436             else
 437                 sv_catpvs(report, "(opval=null)");
 438             break;
 439         }
 440         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 441     };
 442     return (int)rv;
 443 }
 444
 445
 446 /* print the buffer with suitable escapes */
 447
 448 STATIC void
 449 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 450 {
 451     SV* const tmp = newSVpvs("");
 452
 453     PERL_ARGS_ASSERT_PRINTBUF;
 454
 455     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 456     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 457     GCC_DIAG_RESTORE;
 458     SvREFCNT_dec(tmp);
 459 }
 460
 461 #endif
 462
 463 static int
 464 S_deprecate_commaless_var_list(pTHX) {
 465     PL_expect = XTERM;
 466     deprecate("comma-less variable list");
 467     return REPORT(','); /* grandfather non-comma-format format */
 468 }
 469
 470 /*
 471  * S_ao
 472  *
 473  * This subroutine looks for an '=' next to the operator that has just been
 474  * parsed and turns it into an ASSIGNOP if it finds one.
 475  */
 476
 477 STATIC int
 478 S_ao(pTHX_ int toketype)
 479 {
 480     if (*PL_bufptr == '=') {
 481         PL_bufptr++;
 482         if (toketype == ANDAND)
 483             pl_yylval.ival = OP_ANDASSIGN;
 484         else if (toketype == OROR)
 485             pl_yylval.ival = OP_ORASSIGN;
 486         else if (toketype == DORDOR)
 487             pl_yylval.ival = OP_DORASSIGN;
 488         toketype = ASSIGNOP;
 489     }
 490     return REPORT(toketype);
 491 }
 492
 493 /*
 494  * S_no_op
 495  * When Perl expects an operator and finds something else, no_op
 496  * prints the warning.  It always prints "<something> found where
 497  * operator expected.  It prints "Missing semicolon on previous line?"
 498  * if the surprise occurs at the start of the line.  "do you need to
 499  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 500  * where the compiler doesn't know if foo is a method call or a function.
 501  * It prints "Missing operator before end of line" if there's nothing
 502  * after the missing operator, or "... before <...>" if there is something
 503  * after the missing operator.
 504  *
 505  * PL_bufptr is expected to point to the start of the thing that was found,
 506  * and s after the next token or partial token.
 507  */
 508
 509 STATIC void
 510 S_no_op(pTHX_ const char *const what, char *s)
 511 {
 512     char * const oldbp = PL_bufptr;
 513     const bool is_first = (PL_oldbufptr == PL_linestart);
 514
 515     PERL_ARGS_ASSERT_NO_OP;
 516
 517     if (!s)
 518         s = oldbp;
 519     else
 520         PL_bufptr = s;
 521     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 522     if (ckWARN_d(WARN_SYNTAX)) {
 523         if (is_first)
 524             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 525                     "\t(Missing semicolon on previous line?)\n");
 526         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 527             const char *t;
 528             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 529                                                             t += UTF ? UTF8SKIP(t) : 1)
 530                 NOOP;
 531             if (t < PL_bufptr && isSPACE(*t))
 532                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 533                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 534                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 535         }
 536         else {
 537             assert(s >= oldbp);
 538             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                     "\t(Missing operator before %"UTF8f"?)\n",
 540                      UTF8fARG(UTF, s - oldbp, oldbp));
 541         }
 542     }
 543     PL_bufptr = oldbp;
 544 }
 545
 546 /*
 547  * S_missingterm
 548  * Complain about missing quote/regexp/heredoc terminator.
 549  * If it's called with NULL then it cauterizes the line buffer.
 550  * If we're in a delimited string and the delimiter is a control
 551  * character, it's reformatted into a two-char sequence like ^C.
 552  * This is fatal.
 553  */
 554
 555 STATIC void
 556 S_missingterm(pTHX_ char *s)
 557 {
 558     char tmpbuf[3];
 559     char q;
 560     if (s) {
 561         char * const nl = strrchr(s,'\n');
 562         if (nl)
 563             *nl = '\0';
 564     }
 565     else if ((U8) PL_multi_close < 32) {
 566         *tmpbuf = '^';
 567         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 568         tmpbuf[2] = '\0';
 569         s = tmpbuf;
 570     }
 571     else {
 572         *tmpbuf = (char)PL_multi_close;
 573         tmpbuf[1] = '\0';
 574         s = tmpbuf;
 575     }
 576     q = strchr(s,'"') ? '\'' : '"';
 577     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 578 }
 579
 580 #include "feature.h"
 581
 582 /*
 583  * Check whether the named feature is enabled.
 584  */
 585 bool
 586 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 587 {
 588     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 589
 590     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 591
 592     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 593
 594     if (namelen > MAX_FEATURE_LEN)
 595         return FALSE;
 596     memcpy(&he_name[8], name, namelen);
 597
 598     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 599                                      REFCOUNTED_HE_EXISTS));
 600 }
 601
 602 /*
 603  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 604  * utf16-to-utf8-reversed.
 605  */
 606
 607 #ifdef PERL_CR_FILTER
 608 static void
 609 strip_return(SV *sv)
 610 {
 611     const char *s = SvPVX_const(sv);
 612     const char * const e = s + SvCUR(sv);
 613
 614     PERL_ARGS_ASSERT_STRIP_RETURN;
 615
 616     /* outer loop optimized to do nothing if there are no CR-LFs */
 617     while (s < e) {
 618         if (*s++ == '\r' && *s == '\n') {
 619             /* hit a CR-LF, need to copy the rest */
 620             char *d = s - 1;
 621             *d++ = *s++;
 622             while (s < e) {
 623                 if (*s == '\r' && s[1] == '\n')
 624                     s++;
 625                 *d++ = *s++;
 626             }
 627             SvCUR(sv) -= s - d;
 628             return;
 629         }
 630     }
 631 }
 632
 633 STATIC I32
 634 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 635 {
 636     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 637     if (count > 0 && !maxlen)
 638         strip_return(sv);
 639     return count;
 640 }
 641 #endif
 642
 643 /*
 644 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 645
 646 Creates and initialises a new lexer/parser state object, supplying
 647 a context in which to lex and parse from a new source of Perl code.
 648 A pointer to the new state object is placed in L</PL_parser>.  An entry
 649 is made on the save stack so that upon unwinding the new state object
 650 will be destroyed and the former value of L</PL_parser> will be restored.
 651 Nothing else need be done to clean up the parsing context.
 652
 653 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 654 non-null, provides a string (in SV form) containing code to be parsed.
 655 A copy of the string is made, so subsequent modification of C<line>
 656 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 657 from which code will be read to be parsed.  If both are non-null, the
 658 code in C<line> comes first and must consist of complete lines of input,
 659 and C<rsfp> supplies the remainder of the source.
 660
 661 The C<flags> parameter is reserved for future use.  Currently it is only
 662 used by perl internally, so extensions should always pass zero.
 663
 664 =cut
 665 */
 666
 667 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 668    can share filters with the current parser.
 669    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 670    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 671    destruction. This is used to handle the case of defaulting to reading the
 672    script from the standard input because no filename was given on the command
 673    line (without getting confused by situation where STDIN has been closed, so
 674    the script handle is opened on fd 0)  */
 675
 676 void
 677 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 678 {
 679     const char *s = NULL;
 680     yy_parser *parser, *oparser;
 681     if (flags && flags & ~LEX_START_FLAGS)
 682         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 683
 684     /* create and initialise a parser */
 685
 686     Newxz(parser, 1, yy_parser);
 687     parser->old_parser = oparser = PL_parser;
 688     PL_parser = parser;
 689
 690     parser->stack = NULL;
 691     parser->ps = NULL;
 692     parser->stack_size = 0;
 693
 694     /* on scope exit, free this parser and restore any outer one */
 695     SAVEPARSER(parser);
 696     parser->saved_curcop = PL_curcop;
 697
 698     /* initialise lexer state */
 699
 700     parser->nexttoke = 0;
 701     parser->error_count = oparser ? oparser->error_count : 0;
 702     parser->copline = parser->preambling = NOLINE;
 703     parser->lex_state = LEX_NORMAL;
 704     parser->expect = XSTATE;
 705     parser->rsfp = rsfp;
 706     parser->rsfp_filters =
 707       !(flags & LEX_START_SAME_FILTER) || !oparser
 708         ? NULL
 709         : MUTABLE_AV(SvREFCNT_inc(
 710             oparser->rsfp_filters
 711              ? oparser->rsfp_filters
 712              : (oparser->rsfp_filters = newAV())
 713           ));
 714
 715     Newx(parser->lex_brackstack, 120, char);
 716     Newx(parser->lex_casestack, 12, char);
 717     *parser->lex_casestack = '\0';
 718     Newxz(parser->lex_shared, 1, LEXSHARED);
 719
 720     if (line) {
 721         STRLEN len;
 722         s = SvPV_const(line, len);
 723         parser->linestr = flags & LEX_START_COPIED
 724                             ? SvREFCNT_inc_simple_NN(line)
 725                             : newSVpvn_flags(s, len, SvUTF8(line));
 726         sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
 727     } else {
 728         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 729     }
 730     parser->oldoldbufptr =
 731         parser->oldbufptr =
 732         parser->bufptr =
 733         parser->linestart = SvPVX(parser->linestr);
 734     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 735     parser->last_lop = parser->last_uni = NULL;
 736
 737     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 738                                                         |LEX_DONT_CLOSE_RSFP));
 739     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 740                                                         |LEX_DONT_CLOSE_RSFP));
 741
 742     parser->in_pod = parser->filtered = 0;
 743 }
 744
 745
 746 /* delete a parser object */
 747
 748 void
 749 Perl_parser_free(pTHX_  const yy_parser *parser)
 750 {
 751     PERL_ARGS_ASSERT_PARSER_FREE;
 752
 753     PL_curcop = parser->saved_curcop;
 754     SvREFCNT_dec(parser->linestr);
 755
 756     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 757         PerlIO_clearerr(parser->rsfp);
 758     else if (parser->rsfp && (!parser->old_parser
 759           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 760         PerlIO_close(parser->rsfp);
 761     SvREFCNT_dec(parser->rsfp_filters);
 762     SvREFCNT_dec(parser->lex_stuff);
 763     SvREFCNT_dec(parser->sublex_info.repl);
 764
 765     Safefree(parser->lex_brackstack);
 766     Safefree(parser->lex_casestack);
 767     Safefree(parser->lex_shared);
 768     PL_parser = parser->old_parser;
 769     Safefree(parser);
 770 }
 771
 772 void
 773 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 774 {
 775     I32 nexttoke = parser->nexttoke;
 776     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 777     while (nexttoke--) {
 778         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 779          && parser->nextval[nexttoke].opval
 780          && parser->nextval[nexttoke].opval->op_slabbed
 781          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 782             op_free(parser->nextval[nexttoke].opval);
 783             parser->nextval[nexttoke].opval = NULL;
 784         }
 785     }
 786 }
 787
 788
 789 /*
 790 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 791
 792 Buffer scalar containing the chunk currently under consideration of the
 793 text currently being lexed.  This is always a plain string scalar (for
 794 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 795 normal scalar means; instead refer to the buffer directly by the pointer
 796 variables described below.
 797
 798 The lexer maintains various C<char*> pointers to things in the
 799 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 800 reallocated, all of these pointers must be updated.  Don't attempt to
 801 do this manually, but rather use L</lex_grow_linestr> if you need to
 802 reallocate the buffer.
 803
 804 The content of the text chunk in the buffer is commonly exactly one
 805 complete line of input, up to and including a newline terminator,
 806 but there are situations where it is otherwise.  The octets of the
 807 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 808 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 809 flag on this scalar, which may disagree with it.
 810
 811 For direct examination of the buffer, the variable
 812 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 813 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 814 of these pointers is usually preferable to examination of the scalar
 815 through normal scalar means.
 816
 817 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 818
 819 Direct pointer to the end of the chunk of text currently being lexed, the
 820 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 821 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 822 always located at the end of the buffer, and does not count as part of
 823 the buffer's contents.
 824
 825 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 826
 827 Points to the current position of lexing inside the lexer buffer.
 828 Characters around this point may be freely examined, within
 829 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 830 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 831 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 832
 833 Lexing code (whether in the Perl core or not) moves this pointer past
 834 the characters that it consumes.  It is also expected to perform some
 835 bookkeeping whenever a newline character is consumed.  This movement
 836 can be more conveniently performed by the function L</lex_read_to>,
 837 which handles newlines appropriately.
 838
 839 Interpretation of the buffer's octets can be abstracted out by
 840 using the slightly higher-level functions L</lex_peek_unichar> and
 841 L</lex_read_unichar>.
 842
 843 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 844
 845 Points to the start of the current line inside the lexer buffer.
 846 This is useful for indicating at which column an error occurred, and
 847 not much else.  This must be updated by any lexing code that consumes
 848 a newline; the function L</lex_read_to> handles this detail.
 849
 850 =cut
 851 */
 852
 853 /*
 854 =for apidoc Amx|bool|lex_bufutf8
 855
 856 Indicates whether the octets in the lexer buffer
 857 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 858 of Unicode characters.  If not, they should be interpreted as Latin-1
 859 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 860
 861 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 862 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 863 encoding.
 864
 865 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 866 is significant, but not the whole story regarding the input character
 867 encoding.  Normally, when a file is being read, the scalar contains octets
 868 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 869 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 870 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 871 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 872 is in effect.  This logic may change in the future; use this function
 873 instead of implementing the logic yourself.
 874
 875 =cut
 876 */
 877
 878 bool
 879 Perl_lex_bufutf8(pTHX)
 880 {
 881     return UTF;
 882 }
 883
 884 /*
 885 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 886
 887 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 888 at least C<len> octets (including terminating C<NUL>).  Returns a
 889 pointer to the reallocated buffer.  This is necessary before making
 890 any direct modification of the buffer that would increase its length.
 891 L</lex_stuff_pvn> provides a more convenient way to insert text into
 892 the buffer.
 893
 894 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 895 this function updates all of the lexer's variables that point directly
 896 into the buffer.
 897
 898 =cut
 899 */
 900
 901 char *
 902 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 903 {
 904     SV *linestr;
 905     char *buf;
 906     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 907     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 908     linestr = PL_parser->linestr;
 909     buf = SvPVX(linestr);
 910     if (len <= SvLEN(linestr))
 911         return buf;
 912     bufend_pos = PL_parser->bufend - buf;
 913     bufptr_pos = PL_parser->bufptr - buf;
 914     oldbufptr_pos = PL_parser->oldbufptr - buf;
 915     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 916     linestart_pos = PL_parser->linestart - buf;
 917     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 918     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 919     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 920                             PL_parser->lex_shared->re_eval_start - buf : 0;
 921
 922     buf = sv_grow(linestr, len);
 923
 924     PL_parser->bufend = buf + bufend_pos;
 925     PL_parser->bufptr = buf + bufptr_pos;
 926     PL_parser->oldbufptr = buf + oldbufptr_pos;
 927     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 928     PL_parser->linestart = buf + linestart_pos;
 929     if (PL_parser->last_uni)
 930         PL_parser->last_uni = buf + last_uni_pos;
 931     if (PL_parser->last_lop)
 932         PL_parser->last_lop = buf + last_lop_pos;
 933     if (PL_parser->lex_shared->re_eval_start)
 934         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 935     return buf;
 936 }
 937
 938 /*
 939 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 940
 941 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 942 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 943 reallocating the buffer if necessary.  This means that lexing code that
 944 runs later will see the characters as if they had appeared in the input.
 945 It is not recommended to do this as part of normal parsing, and most
 946 uses of this facility run the risk of the inserted characters being
 947 interpreted in an unintended manner.
 948
 949 The string to be inserted is represented by C<len> octets starting
 950 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 951 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
 952 The characters are recoded for the lexer buffer, according to how the
 953 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 954 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 955 function is more convenient.
 956
 957 =cut
 958 */
 959
 960 void
 961 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 962 {
 963     dVAR;
 964     char *bufptr;
 965     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 966     if (flags & ~(LEX_STUFF_UTF8))
 967         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 968     if (UTF) {
 969         if (flags & LEX_STUFF_UTF8) {
 970             goto plain_copy;
 971         } else {
 972             STRLEN highhalf = 0;    /* Count of variants */
 973             const char *p, *e = pv+len;
 974             for (p = pv; p != e; p++) {
 975                 if (! UTF8_IS_INVARIANT(*p)) {
 976                     highhalf++;
 977                 }
 978             }
 979             if (!highhalf)
 980                 goto plain_copy;
 981             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 982             bufptr = PL_parser->bufptr;
 983             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 984             SvCUR_set(PL_parser->linestr,
 985                 SvCUR(PL_parser->linestr) + len+highhalf);
 986             PL_parser->bufend += len+highhalf;
 987             for (p = pv; p != e; p++) {
 988                 U8 c = (U8)*p;
 989                 if (! UTF8_IS_INVARIANT(c)) {
 990                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
 991                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
 992                 } else {
 993                     *bufptr++ = (char)c;
 994                 }
 995             }
 996         }
 997     } else {
 998         if (flags & LEX_STUFF_UTF8) {
 999             STRLEN highhalf = 0;
1000             const char *p, *e = pv+len;
1001             for (p = pv; p != e; p++) {
1002                 U8 c = (U8)*p;
1003                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1004                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1005                                 "non-Latin-1 character into Latin-1 input");
1006                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1007                     p++;
1008                     highhalf++;
1009                 } else if (! UTF8_IS_INVARIANT(c)) {
1010                     /* malformed UTF-8 */
1011                     ENTER;
1012                     SAVESPTR(PL_warnhook);
1013                     PL_warnhook = PERL_WARNHOOK_FATAL;
1014                     utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1015                     LEAVE;
1016                 }
1017             }
1018             if (!highhalf)
1019                 goto plain_copy;
1020             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1021             bufptr = PL_parser->bufptr;
1022             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1023             SvCUR_set(PL_parser->linestr,
1024                 SvCUR(PL_parser->linestr) + len-highhalf);
1025             PL_parser->bufend += len-highhalf;
1026             p = pv;
1027             while (p < e) {
1028                 if (UTF8_IS_INVARIANT(*p)) {
1029                     *bufptr++ = *p;
1030                     p++;
1031                 }
1032                 else {
1033                     assert(p < e -1 );
1034                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1035                     p += 2;
1036                 }
1037             }
1038         } else {
1039           plain_copy:
1040             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1041             bufptr = PL_parser->bufptr;
1042             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1043             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1044             PL_parser->bufend += len;
1045             Copy(pv, bufptr, len, char);
1046         }
1047     }
1048 }
1049
1050 /*
1051 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1052
1053 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1054 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1055 reallocating the buffer if necessary.  This means that lexing code that
1056 runs later will see the characters as if they had appeared in the input.
1057 It is not recommended to do this as part of normal parsing, and most
1058 uses of this facility run the risk of the inserted characters being
1059 interpreted in an unintended manner.
1060
1061 The string to be inserted is represented by octets starting at C<pv>
1062 and continuing to the first nul.  These octets are interpreted as either
1063 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1064 in C<flags>.  The characters are recoded for the lexer buffer, according
1065 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1066 If it is not convenient to nul-terminate a string to be inserted, the
1067 L</lex_stuff_pvn> function is more appropriate.
1068
1069 =cut
1070 */
1071
1072 void
1073 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1074 {
1075     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1076     lex_stuff_pvn(pv, strlen(pv), flags);
1077 }
1078
1079 /*
1080 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1081
1082 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1083 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1084 reallocating the buffer if necessary.  This means that lexing code that
1085 runs later will see the characters as if they had appeared in the input.
1086 It is not recommended to do this as part of normal parsing, and most
1087 uses of this facility run the risk of the inserted characters being
1088 interpreted in an unintended manner.
1089
1090 The string to be inserted is the string value of C<sv>.  The characters
1091 are recoded for the lexer buffer, according to how the buffer is currently
1092 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1093 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1094 need to construct a scalar.
1095
1096 =cut
1097 */
1098
1099 void
1100 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1101 {
1102     char *pv;
1103     STRLEN len;
1104     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1105     if (flags)
1106         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1107     pv = SvPV(sv, len);
1108     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1109 }
1110
1111 /*
1112 =for apidoc Amx|void|lex_unstuff|char *ptr
1113
1114 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1115 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1116 This hides the discarded text from any lexing code that runs later,
1117 as if the text had never appeared.
1118
1119 This is not the normal way to consume lexed text.  For that, use
1120 L</lex_read_to>.
1121
1122 =cut
1123 */
1124
1125 void
1126 Perl_lex_unstuff(pTHX_ char *ptr)
1127 {
1128     char *buf, *bufend;
1129     STRLEN unstuff_len;
1130     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1131     buf = PL_parser->bufptr;
1132     if (ptr < buf)
1133         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1134     if (ptr == buf)
1135         return;
1136     bufend = PL_parser->bufend;
1137     if (ptr > bufend)
1138         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1139     unstuff_len = ptr - buf;
1140     Move(ptr, buf, bufend+1-ptr, char);
1141     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1142     PL_parser->bufend = bufend - unstuff_len;
1143 }
1144
1145 /*
1146 =for apidoc Amx|void|lex_read_to|char *ptr
1147
1148 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1149 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1150 performing the correct bookkeeping whenever a newline character is passed.
1151 This is the normal way to consume lexed text.
1152
1153 Interpretation of the buffer's octets can be abstracted out by
1154 using the slightly higher-level functions L</lex_peek_unichar> and
1155 L</lex_read_unichar>.
1156
1157 =cut
1158 */
1159
1160 void
1161 Perl_lex_read_to(pTHX_ char *ptr)
1162 {
1163     char *s;
1164     PERL_ARGS_ASSERT_LEX_READ_TO;
1165     s = PL_parser->bufptr;
1166     if (ptr < s || ptr > PL_parser->bufend)
1167         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1168     for (; s != ptr; s++)
1169         if (*s == '\n') {
1170             COPLINE_INC_WITH_HERELINES;
1171             PL_parser->linestart = s+1;
1172         }
1173     PL_parser->bufptr = ptr;
1174 }
1175
1176 /*
1177 =for apidoc Amx|void|lex_discard_to|char *ptr
1178
1179 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1180 up to C<ptr>.  The remaining content of the buffer will be moved, and
1181 all pointers into the buffer updated appropriately.  C<ptr> must not
1182 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1183 it is not permitted to discard text that has yet to be lexed.
1184
1185 Normally it is not necessarily to do this directly, because it suffices to
1186 use the implicit discarding behaviour of L</lex_next_chunk> and things
1187 based on it.  However, if a token stretches across multiple lines,
1188 and the lexing code has kept multiple lines of text in the buffer for
1189 that purpose, then after completion of the token it would be wise to
1190 explicitly discard the now-unneeded earlier lines, to avoid future
1191 multi-line tokens growing the buffer without bound.
1192
1193 =cut
1194 */
1195
1196 void
1197 Perl_lex_discard_to(pTHX_ char *ptr)
1198 {
1199     char *buf;
1200     STRLEN discard_len;
1201     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1202     buf = SvPVX(PL_parser->linestr);
1203     if (ptr < buf)
1204         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1205     if (ptr == buf)
1206         return;
1207     if (ptr > PL_parser->bufptr)
1208         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1209     discard_len = ptr - buf;
1210     if (PL_parser->oldbufptr < ptr)
1211         PL_parser->oldbufptr = ptr;
1212     if (PL_parser->oldoldbufptr < ptr)
1213         PL_parser->oldoldbufptr = ptr;
1214     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1215         PL_parser->last_uni = NULL;
1216     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1217         PL_parser->last_lop = NULL;
1218     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1219     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1220     PL_parser->bufend -= discard_len;
1221     PL_parser->bufptr -= discard_len;
1222     PL_parser->oldbufptr -= discard_len;
1223     PL_parser->oldoldbufptr -= discard_len;
1224     if (PL_parser->last_uni)
1225         PL_parser->last_uni -= discard_len;
1226     if (PL_parser->last_lop)
1227         PL_parser->last_lop -= discard_len;
1228 }
1229
1230 /*
1231 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1232
1233 Reads in the next chunk of text to be lexed, appending it to
1234 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1235 looked to the end of the current chunk and wants to know more.  It is
1236 usual, but not necessary, for lexing to have consumed the entirety of
1237 the current chunk at this time.
1238
1239 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1240 chunk (i.e., the current chunk has been entirely consumed), normally the
1241 current chunk will be discarded at the same time that the new chunk is
1242 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1243 will not be discarded.  If the current chunk has not been entirely
1244 consumed, then it will not be discarded regardless of the flag.
1245
1246 Returns true if some new text was added to the buffer, or false if the
1247 buffer has reached the end of the input text.
1248
1249 =cut
1250 */
1251
1252 #define LEX_FAKE_EOF 0x80000000
1253 #define LEX_NO_TERM  0x40000000 /* here-doc */
1254
1255 bool
1256 Perl_lex_next_chunk(pTHX_ U32 flags)
1257 {
1258     SV *linestr;
1259     char *buf;
1260     STRLEN old_bufend_pos, new_bufend_pos;
1261     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1262     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1263     bool got_some_for_debugger = 0;
1264     bool got_some;
1265     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1266         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1267     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1268         return FALSE;
1269     linestr = PL_parser->linestr;
1270     buf = SvPVX(linestr);
1271     if (!(flags & LEX_KEEP_PREVIOUS)
1272           && PL_parser->bufptr == PL_parser->bufend)
1273     {
1274         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1275         linestart_pos = 0;
1276         if (PL_parser->last_uni != PL_parser->bufend)
1277             PL_parser->last_uni = NULL;
1278         if (PL_parser->last_lop != PL_parser->bufend)
1279             PL_parser->last_lop = NULL;
1280         last_uni_pos = last_lop_pos = 0;
1281         *buf = 0;
1282         SvCUR(linestr) = 0;
1283     } else {
1284         old_bufend_pos = PL_parser->bufend - buf;
1285         bufptr_pos = PL_parser->bufptr - buf;
1286         oldbufptr_pos = PL_parser->oldbufptr - buf;
1287         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1288         linestart_pos = PL_parser->linestart - buf;
1289         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1290         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1291     }
1292     if (flags & LEX_FAKE_EOF) {
1293         goto eof;
1294     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1295         got_some = 0;
1296     } else if (filter_gets(linestr, old_bufend_pos)) {
1297         got_some = 1;
1298         got_some_for_debugger = 1;
1299     } else if (flags & LEX_NO_TERM) {
1300         got_some = 0;
1301     } else {
1302         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1303             sv_setpvs(linestr, "");
1304         eof:
1305         /* End of real input.  Close filehandle (unless it was STDIN),
1306          * then add implicit termination.
1307          */
1308         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1309             PerlIO_clearerr(PL_parser->rsfp);
1310         else if (PL_parser->rsfp)
1311             (void)PerlIO_close(PL_parser->rsfp);
1312         PL_parser->rsfp = NULL;
1313         PL_parser->in_pod = PL_parser->filtered = 0;
1314         if (!PL_in_eval && PL_minus_p) {
1315             sv_catpvs(linestr,
1316                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1317             PL_minus_n = PL_minus_p = 0;
1318         } else if (!PL_in_eval && PL_minus_n) {
1319             sv_catpvs(linestr, /*{*/";}");
1320             PL_minus_n = 0;
1321         } else
1322             sv_catpvs(linestr, ";");
1323         got_some = 1;
1324     }
1325     buf = SvPVX(linestr);
1326     new_bufend_pos = SvCUR(linestr);
1327     PL_parser->bufend = buf + new_bufend_pos;
1328     PL_parser->bufptr = buf + bufptr_pos;
1329     PL_parser->oldbufptr = buf + oldbufptr_pos;
1330     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1331     PL_parser->linestart = buf + linestart_pos;
1332     if (PL_parser->last_uni)
1333         PL_parser->last_uni = buf + last_uni_pos;
1334     if (PL_parser->last_lop)
1335         PL_parser->last_lop = buf + last_lop_pos;
1336     if (PL_parser->preambling != NOLINE) {
1337         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1338         PL_parser->preambling = NOLINE;
1339     }
1340     if (   got_some_for_debugger
1341         && PERLDB_LINE_OR_SAVESRC
1342         && PL_curstash != PL_debstash)
1343     {
1344         /* debugger active and we're not compiling the debugger code,
1345          * so store the line into the debugger's array of lines
1346          */
1347         update_debugger_info(NULL, buf+old_bufend_pos,
1348             new_bufend_pos-old_bufend_pos);
1349     }
1350     return got_some;
1351 }
1352
1353 /*
1354 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1355
1356 Looks ahead one (Unicode) character in the text currently being lexed.
1357 Returns the codepoint (unsigned integer value) of the next character,
1358 or -1 if lexing has reached the end of the input text.  To consume the
1359 peeked character, use L</lex_read_unichar>.
1360
1361 If the next character is in (or extends into) the next chunk of input
1362 text, the next chunk will be read in.  Normally the current chunk will be
1363 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1364 bit set, then the current chunk will not be discarded.
1365
1366 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367 is encountered, an exception is generated.
1368
1369 =cut
1370 */
1371
1372 I32
1373 Perl_lex_peek_unichar(pTHX_ U32 flags)
1374 {
1375     dVAR;
1376     char *s, *bufend;
1377     if (flags & ~(LEX_KEEP_PREVIOUS))
1378         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1379     s = PL_parser->bufptr;
1380     bufend = PL_parser->bufend;
1381     if (UTF) {
1382         U8 head;
1383         I32 unichar;
1384         STRLEN len, retlen;
1385         if (s == bufend) {
1386             if (!lex_next_chunk(flags))
1387                 return -1;
1388             s = PL_parser->bufptr;
1389             bufend = PL_parser->bufend;
1390         }
1391         head = (U8)*s;
1392         if (UTF8_IS_INVARIANT(head))
1393             return head;
1394         if (UTF8_IS_START(head)) {
1395             len = UTF8SKIP(&head);
1396             while ((STRLEN)(bufend-s) < len) {
1397                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1398                     break;
1399                 s = PL_parser->bufptr;
1400                 bufend = PL_parser->bufend;
1401             }
1402         }
1403         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1404         if (retlen == (STRLEN)-1) {
1405             /* malformed UTF-8 */
1406             ENTER;
1407             SAVESPTR(PL_warnhook);
1408             PL_warnhook = PERL_WARNHOOK_FATAL;
1409             utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1410             LEAVE;
1411         }
1412         return unichar;
1413     } else {
1414         if (s == bufend) {
1415             if (!lex_next_chunk(flags))
1416                 return -1;
1417             s = PL_parser->bufptr;
1418         }
1419         return (U8)*s;
1420     }
1421 }
1422
1423 /*
1424 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1425
1426 Reads the next (Unicode) character in the text currently being lexed.
1427 Returns the codepoint (unsigned integer value) of the character read,
1428 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1429 if lexing has reached the end of the input text.  To non-destructively
1430 examine the next character, use L</lex_peek_unichar> instead.
1431
1432 If the next character is in (or extends into) the next chunk of input
1433 text, the next chunk will be read in.  Normally the current chunk will be
1434 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1435 bit set, then the current chunk will not be discarded.
1436
1437 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1438 is encountered, an exception is generated.
1439
1440 =cut
1441 */
1442
1443 I32
1444 Perl_lex_read_unichar(pTHX_ U32 flags)
1445 {
1446     I32 c;
1447     if (flags & ~(LEX_KEEP_PREVIOUS))
1448         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1449     c = lex_peek_unichar(flags);
1450     if (c != -1) {
1451         if (c == '\n')
1452             COPLINE_INC_WITH_HERELINES;
1453         if (UTF)
1454             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1455         else
1456             ++(PL_parser->bufptr);
1457     }
1458     return c;
1459 }
1460
1461 /*
1462 =for apidoc Amx|void|lex_read_space|U32 flags
1463
1464 Reads optional spaces, in Perl style, in the text currently being
1465 lexed.  The spaces may include ordinary whitespace characters and
1466 Perl-style comments.  C<#line> directives are processed if encountered.
1467 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1468 at a non-space character (or the end of the input text).
1469
1470 If spaces extend into the next chunk of input text, the next chunk will
1471 be read in.  Normally the current chunk will be discarded at the same
1472 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1473 chunk will not be discarded.
1474
1475 =cut
1476 */
1477
1478 #define LEX_NO_INCLINE    0x40000000
1479 #define LEX_NO_NEXT_CHUNK 0x80000000
1480
1481 void
1482 Perl_lex_read_space(pTHX_ U32 flags)
1483 {
1484     char *s, *bufend;
1485     const bool can_incline = !(flags & LEX_NO_INCLINE);
1486     bool need_incline = 0;
1487     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1488         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1489     s = PL_parser->bufptr;
1490     bufend = PL_parser->bufend;
1491     while (1) {
1492         char c = *s;
1493         if (c == '#') {
1494             do {
1495                 c = *++s;
1496             } while (!(c == '\n' || (c == 0 && s == bufend)));
1497         } else if (c == '\n') {
1498             s++;
1499             if (can_incline) {
1500                 PL_parser->linestart = s;
1501                 if (s == bufend)
1502                     need_incline = 1;
1503                 else
1504                     incline(s);
1505             }
1506         } else if (isSPACE(c)) {
1507             s++;
1508         } else if (c == 0 && s == bufend) {
1509             bool got_more;
1510             line_t l;
1511             if (flags & LEX_NO_NEXT_CHUNK)
1512                 break;
1513             PL_parser->bufptr = s;
1514             l = CopLINE(PL_curcop);
1515             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1516             got_more = lex_next_chunk(flags);
1517             CopLINE_set(PL_curcop, l);
1518             s = PL_parser->bufptr;
1519             bufend = PL_parser->bufend;
1520             if (!got_more)
1521                 break;
1522             if (can_incline && need_incline && PL_parser->rsfp) {
1523                 incline(s);
1524                 need_incline = 0;
1525             }
1526         } else if (!c) {
1527             s++;
1528         } else {
1529             break;
1530         }
1531     }
1532     PL_parser->bufptr = s;
1533 }
1534
1535 /*
1536
1537 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1538
1539 This function performs syntax checking on a prototype, C<proto>.
1540 If C<warn> is true, any illegal characters or mismatched brackets
1541 will trigger illegalproto warnings, declaring that they were
1542 detected in the prototype for C<name>.
1543
1544 The return value is C<true> if this is a valid prototype, and
1545 C<false> if it is not, regardless of whether C<warn> was C<true> or
1546 C<false>.
1547
1548 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1549
1550 =cut
1551
1552  */
1553
1554 bool
1555 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1556 {
1557     STRLEN len, origlen;
1558     char *p = proto ? SvPV(proto, len) : NULL;
1559     bool bad_proto = FALSE;
1560     bool in_brackets = FALSE;
1561     bool after_slash = FALSE;
1562     char greedy_proto = ' ';
1563     bool proto_after_greedy_proto = FALSE;
1564     bool must_be_last = FALSE;
1565     bool underscore = FALSE;
1566     bool bad_proto_after_underscore = FALSE;
1567
1568     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1569
1570     if (!proto)
1571         return TRUE;
1572
1573     origlen = len;
1574     for (; len--; p++) {
1575         if (!isSPACE(*p)) {
1576             if (must_be_last)
1577                 proto_after_greedy_proto = TRUE;
1578             if (underscore) {
1579                 if (!strchr(";@%", *p))
1580                     bad_proto_after_underscore = TRUE;
1581                 underscore = FALSE;
1582             }
1583             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1584                 bad_proto = TRUE;
1585             }
1586             else {
1587                 if (*p == '[')
1588                     in_brackets = TRUE;
1589                 else if (*p == ']')
1590                     in_brackets = FALSE;
1591                 else if ((*p == '@' || *p == '%')
1592                          && !after_slash
1593                          && !in_brackets )
1594                 {
1595                     must_be_last = TRUE;
1596                     greedy_proto = *p;
1597                 }
1598                 else if (*p == '_')
1599                     underscore = TRUE;
1600             }
1601             if (*p == '\\')
1602                 after_slash = TRUE;
1603             else
1604                 after_slash = FALSE;
1605         }
1606     }
1607
1608     if (warn) {
1609         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1610         p -= origlen;
1611         p = SvUTF8(proto)
1612             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1613                              origlen, UNI_DISPLAY_ISPRINT)
1614             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1615
1616         if (proto_after_greedy_proto)
1617             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1618                         "Prototype after '%c' for %"SVf" : %s",
1619                         greedy_proto, SVfARG(name), p);
1620         if (in_brackets)
1621             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1622                         "Missing ']' in prototype for %"SVf" : %s",
1623                         SVfARG(name), p);
1624         if (bad_proto)
1625             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1626                         "Illegal character in prototype for %"SVf" : %s",
1627                         SVfARG(name), p);
1628         if (bad_proto_after_underscore)
1629             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1630                         "Illegal character after '_' in prototype for %"SVf" : %s",
1631                         SVfARG(name), p);
1632     }
1633
1634     return (! (proto_after_greedy_proto || bad_proto) );
1635 }
1636
1637 /*
1638  * S_incline
1639  * This subroutine has nothing to do with tilting, whether at windmills
1640  * or pinball tables.  Its name is short for "increment line".  It
1641  * increments the current line number in CopLINE(PL_curcop) and checks
1642  * to see whether the line starts with a comment of the form
1643  *    # line 500 "foo.pm"
1644  * If so, it sets the current line number and file to the values in the comment.
1645  */
1646
1647 STATIC void
1648 S_incline(pTHX_ const char *s)
1649 {
1650     const char *t;
1651     const char *n;
1652     const char *e;
1653     line_t line_num;
1654     UV uv;
1655
1656     PERL_ARGS_ASSERT_INCLINE;
1657
1658     COPLINE_INC_WITH_HERELINES;
1659     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1660      && s+1 == PL_bufend && *s == ';') {
1661         /* fake newline in string eval */
1662         CopLINE_dec(PL_curcop);
1663         return;
1664     }
1665     if (*s++ != '#')
1666         return;
1667     while (SPACE_OR_TAB(*s))
1668         s++;
1669     if (strnEQ(s, "line", 4))
1670         s += 4;
1671     else
1672         return;
1673     if (SPACE_OR_TAB(*s))
1674         s++;
1675     else
1676         return;
1677     while (SPACE_OR_TAB(*s))
1678         s++;
1679     if (!isDIGIT(*s))
1680         return;
1681
1682     n = s;
1683     while (isDIGIT(*s))
1684         s++;
1685     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1686         return;
1687     while (SPACE_OR_TAB(*s))
1688         s++;
1689     if (*s == '"' && (t = strchr(s+1, '"'))) {
1690         s++;
1691         e = t + 1;
1692     }
1693     else {
1694         t = s;
1695         while (*t && !isSPACE(*t))
1696             t++;
1697         e = t;
1698     }
1699     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1700         e++;
1701     if (*e != '\n' && *e != '\0')
1702         return;         /* false alarm */
1703
1704     if (!grok_atoUV(n, &uv, &e))
1705         return;
1706     line_num = ((line_t)uv) - 1;
1707
1708     if (t - s > 0) {
1709         const STRLEN len = t - s;
1710
1711         if (!PL_rsfp && !PL_parser->filtered) {
1712             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1713              * to *{"::_<newfilename"} */
1714             /* However, the long form of evals is only turned on by the
1715                debugger - usually they're "(eval %lu)" */
1716             GV * const cfgv = CopFILEGV(PL_curcop);
1717             if (cfgv) {
1718                 char smallbuf[128];
1719                 STRLEN tmplen2 = len;
1720                 char *tmpbuf2;
1721                 GV *gv2;
1722
1723                 if (tmplen2 + 2 <= sizeof smallbuf)
1724                     tmpbuf2 = smallbuf;
1725                 else
1726                     Newx(tmpbuf2, tmplen2 + 2, char);
1727
1728                 tmpbuf2[0] = '_';
1729                 tmpbuf2[1] = '<';
1730
1731                 memcpy(tmpbuf2 + 2, s, tmplen2);
1732                 tmplen2 += 2;
1733
1734                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1735                 if (!isGV(gv2)) {
1736                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1737                     /* adjust ${"::_<newfilename"} to store the new file name */
1738                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1739                     /* The line number may differ. If that is the case,
1740                        alias the saved lines that are in the array.
1741                        Otherwise alias the whole array. */
1742                     if (CopLINE(PL_curcop) == line_num) {
1743                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1744                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1745                     }
1746                     else if (GvAV(cfgv)) {
1747                         AV * const av = GvAV(cfgv);
1748                         const I32 start = CopLINE(PL_curcop)+1;
1749                         I32 items = AvFILLp(av) - start;
1750                         if (items > 0) {
1751                             AV * const av2 = GvAVn(gv2);
1752                             SV **svp = AvARRAY(av) + start;
1753                             I32 l = (I32)line_num+1;
1754                             while (items--)
1755                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1756                         }
1757                     }
1758                 }
1759
1760                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1761             }
1762         }
1763         CopFILE_free(PL_curcop);
1764         CopFILE_setn(PL_curcop, s, len);
1765     }
1766     CopLINE_set(PL_curcop, line_num);
1767 }
1768
1769 #define skipspace(s) skipspace_flags(s, 0)
1770
1771
1772 STATIC void
1773 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1774 {
1775     AV *av = CopFILEAVx(PL_curcop);
1776     if (av) {
1777         SV * sv;
1778         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1779         else {
1780             sv = *av_fetch(av, 0, 1);
1781             SvUPGRADE(sv, SVt_PVMG);
1782         }
1783         if (!SvPOK(sv)) sv_setpvs(sv,"");
1784         if (orig_sv)
1785             sv_catsv(sv, orig_sv);
1786         else
1787             sv_catpvn(sv, buf, len);
1788         if (!SvIOK(sv)) {
1789             (void)SvIOK_on(sv);
1790             SvIV_set(sv, 0);
1791         }
1792         if (PL_parser->preambling == NOLINE)
1793             av_store(av, CopLINE(PL_curcop), sv);
1794     }
1795 }
1796
1797 /*
1798  * S_skipspace
1799  * Called to gobble the appropriate amount and type of whitespace.
1800  * Skips comments as well.
1801  */
1802
1803 STATIC char *
1804 S_skipspace_flags(pTHX_ char *s, U32 flags)
1805 {
1806     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1807     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1808         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1809             s++;
1810     } else {
1811         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1812         PL_bufptr = s;
1813         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1814                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1815                     LEX_NO_NEXT_CHUNK : 0));
1816         s = PL_bufptr;
1817         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1818         if (PL_linestart > PL_bufptr)
1819             PL_bufptr = PL_linestart;
1820         return s;
1821     }
1822     return s;
1823 }
1824
1825 /*
1826  * S_check_uni
1827  * Check the unary operators to ensure there's no ambiguity in how they're
1828  * used.  An ambiguous piece of code would be:
1829  *     rand + 5
1830  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1831  * the +5 is its argument.
1832  */
1833
1834 STATIC void
1835 S_check_uni(pTHX)
1836 {
1837     const char *s;
1838     const char *t;
1839
1840     if (PL_oldoldbufptr != PL_last_uni)
1841         return;
1842     while (isSPACE(*PL_last_uni))
1843         PL_last_uni++;
1844     s = PL_last_uni;
1845     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1846         s += UTF ? UTF8SKIP(s) : 1;
1847     if ((t = strchr(s, '(')) && t < PL_bufptr)
1848         return;
1849
1850     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1851                      "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
1852                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1853 }
1854
1855 /*
1856  * LOP : macro to build a list operator.  Its behaviour has been replaced
1857  * with a subroutine, S_lop() for which LOP is just another name.
1858  */
1859
1860 #define LOP(f,x) return lop(f,x,s)
1861
1862 /*
1863  * S_lop
1864  * Build a list operator (or something that might be one).  The rules:
1865  *  - if we have a next token, then it's a list operator (no parens) for
1866  *    which the next token has already been parsed; e.g.,
1867  *       sort foo @args
1868  *       sort foo (@args)
1869  *  - if the next thing is an opening paren, then it's a function
1870  *  - else it's a list operator
1871  */
1872
1873 STATIC I32
1874 S_lop(pTHX_ I32 f, int x, char *s)
1875 {
1876     PERL_ARGS_ASSERT_LOP;
1877
1878     pl_yylval.ival = f;
1879     CLINE;
1880     PL_bufptr = s;
1881     PL_last_lop = PL_oldbufptr;
1882     PL_last_lop_op = (OPCODE)f;
1883     if (PL_nexttoke)
1884         goto lstop;
1885     PL_expect = x;
1886     if (*s == '(')
1887         return REPORT(FUNC);
1888     s = skipspace(s);
1889     if (*s == '(')
1890         return REPORT(FUNC);
1891     else {
1892         lstop:
1893         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1894             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1895         return REPORT(LSTOP);
1896     }
1897 }
1898
1899 /*
1900  * S_force_next
1901  * When the lexer realizes it knows the next token (for instance,
1902  * it is reordering tokens for the parser) then it can call S_force_next
1903  * to know what token to return the next time the lexer is called.  Caller
1904  * will need to set PL_nextval[] and possibly PL_expect to ensure
1905  * the lexer handles the token correctly.
1906  */
1907
1908 STATIC void
1909 S_force_next(pTHX_ I32 type)
1910 {
1911 #ifdef DEBUGGING
1912     if (DEBUG_T_TEST) {
1913         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1914         tokereport(type, &NEXTVAL_NEXTTOKE);
1915     }
1916 #endif
1917     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1918     PL_nexttype[PL_nexttoke] = type;
1919     PL_nexttoke++;
1920 }
1921
1922 /*
1923  * S_postderef
1924  *
1925  * This subroutine handles postfix deref syntax after the arrow has already
1926  * been emitted.  @* $* etc. are emitted as two separate token right here.
1927  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1928  * only the first, leaving yylex to find the next.
1929  */
1930
1931 static int
1932 S_postderef(pTHX_ int const funny, char const next)
1933 {
1934     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1935     if (next == '*') {
1936         PL_expect = XOPERATOR;
1937         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1938             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1939             PL_lex_state = LEX_INTERPEND;
1940             if ('@' == funny)
1941                 force_next(POSTJOIN);
1942         }
1943         force_next(next);
1944         PL_bufptr+=2;
1945     }
1946     else {
1947         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1948          && !PL_lex_brackets)
1949             PL_lex_dojoin = 2;
1950         PL_expect = XOPERATOR;
1951         PL_bufptr++;
1952     }
1953     return funny;
1954 }
1955
1956 void
1957 Perl_yyunlex(pTHX)
1958 {
1959     int yyc = PL_parser->yychar;
1960     if (yyc != YYEMPTY) {
1961         if (yyc) {
1962             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1963             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1964                 PL_lex_allbrackets--;
1965                 PL_lex_brackets--;
1966                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1967             } else if (yyc == '('/*)*/) {
1968                 PL_lex_allbrackets--;
1969                 yyc |= (2<<24);
1970             }
1971             force_next(yyc);
1972         }
1973         PL_parser->yychar = YYEMPTY;
1974     }
1975 }
1976
1977 STATIC SV *
1978 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1979 {
1980     SV * const sv = newSVpvn_utf8(start, len,
1981                                   !IN_BYTES
1982                                   && UTF
1983                                   && !is_invariant_string((const U8*)start, len)
1984                                   && is_utf8_string((const U8*)start, len));
1985     return sv;
1986 }
1987
1988 /*
1989  * S_force_word
1990  * When the lexer knows the next thing is a word (for instance, it has
1991  * just seen -> and it knows that the next char is a word char, then
1992  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1993  * lookahead.
1994  *
1995  * Arguments:
1996  *   char *start : buffer position (must be within PL_linestr)
1997  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1998  *   int check_keyword : if true, Perl checks to make sure the word isn't
1999  *       a keyword (do this if the word is a label, e.g. goto FOO)
2000  *   int allow_pack : if true, : characters will also be allowed (require,
2001  *       use, etc. do this)
2002  */
2003
2004 STATIC char *
2005 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2006 {
2007     char *s;
2008     STRLEN len;
2009
2010     PERL_ARGS_ASSERT_FORCE_WORD;
2011
2012     start = skipspace(start);
2013     s = start;
2014     if (isIDFIRST_lazy_if(s,UTF)
2015         || (allow_pack && *s == ':' && s[1] == ':') )
2016     {
2017         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2018         if (check_keyword) {
2019           char *s2 = PL_tokenbuf;
2020           STRLEN len2 = len;
2021           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2022             s2 += 6, len2 -= 6;
2023           if (keyword(s2, len2, 0))
2024             return start;
2025         }
2026         if (token == METHOD) {
2027             s = skipspace(s);
2028             if (*s == '(')
2029                 PL_expect = XTERM;
2030             else {
2031                 PL_expect = XOPERATOR;
2032             }
2033         }
2034         NEXTVAL_NEXTTOKE.opval
2035             = (OP*)newSVOP(OP_CONST,0,
2036                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2037         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2038         force_next(token);
2039     }
2040     return s;
2041 }
2042
2043 /*
2044  * S_force_ident
2045  * Called when the lexer wants $foo *foo &foo etc, but the program
2046  * text only contains the "foo" portion.  The first argument is a pointer
2047  * to the "foo", and the second argument is the type symbol to prefix.
2048  * Forces the next token to be a "WORD".
2049  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2050  */
2051
2052 STATIC void
2053 S_force_ident(pTHX_ const char *s, int kind)
2054 {
2055     PERL_ARGS_ASSERT_FORCE_IDENT;
2056
2057     if (s[0]) {
2058         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2059         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2060                                                                 UTF ? SVf_UTF8 : 0));
2061         NEXTVAL_NEXTTOKE.opval = o;
2062         force_next(WORD);
2063         if (kind) {
2064             o->op_private = OPpCONST_ENTERED;
2065             /* XXX see note in pp_entereval() for why we forgo typo
2066                warnings if the symbol must be introduced in an eval.
2067                GSAR 96-10-12 */
2068             gv_fetchpvn_flags(s, len,
2069                               (PL_in_eval ? GV_ADDMULTI
2070                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2071                               kind == '$' ? SVt_PV :
2072                               kind == '@' ? SVt_PVAV :
2073                               kind == '%' ? SVt_PVHV :
2074                               SVt_PVGV
2075                               );
2076         }
2077     }
2078 }
2079
2080 static void
2081 S_force_ident_maybe_lex(pTHX_ char pit)
2082 {
2083     NEXTVAL_NEXTTOKE.ival = pit;
2084     force_next('p');
2085 }
2086
2087 NV
2088 Perl_str_to_version(pTHX_ SV *sv)
2089 {
2090     NV retval = 0.0;
2091     NV nshift = 1.0;
2092     STRLEN len;
2093     const char *start = SvPV_const(sv,len);
2094     const char * const end = start + len;
2095     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2096
2097     PERL_ARGS_ASSERT_STR_TO_VERSION;
2098
2099     while (start < end) {
2100         STRLEN skip;
2101         UV n;
2102         if (utf)
2103             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2104         else {
2105             n = *(U8*)start;
2106             skip = 1;
2107         }
2108         retval += ((NV)n)/nshift;
2109         start += skip;
2110         nshift *= 1000;
2111     }
2112     return retval;
2113 }
2114
2115 /*
2116  * S_force_version
2117  * Forces the next token to be a version number.
2118  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2119  * and if "guessing" is TRUE, then no new token is created (and the caller
2120  * must use an alternative parsing method).
2121  */
2122
2123 STATIC char *
2124 S_force_version(pTHX_ char *s, int guessing)
2125 {
2126     OP *version = NULL;
2127     char *d;
2128
2129     PERL_ARGS_ASSERT_FORCE_VERSION;
2130
2131     s = skipspace(s);
2132
2133     d = s;
2134     if (*d == 'v')
2135         d++;
2136     if (isDIGIT(*d)) {
2137         while (isDIGIT(*d) || *d == '_' || *d == '.')
2138             d++;
2139         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2140             SV *ver;
2141             s = scan_num(s, &pl_yylval);
2142             version = pl_yylval.opval;
2143             ver = cSVOPx(version)->op_sv;
2144             if (SvPOK(ver) && !SvNIOK(ver)) {
2145                 SvUPGRADE(ver, SVt_PVNV);
2146                 SvNV_set(ver, str_to_version(ver));
2147                 SvNOK_on(ver);          /* hint that it is a version */
2148             }
2149         }
2150         else if (guessing) {
2151             return s;
2152         }
2153     }
2154
2155     /* NOTE: The parser sees the package name and the VERSION swapped */
2156     NEXTVAL_NEXTTOKE.opval = version;
2157     force_next(WORD);
2158
2159     return s;
2160 }
2161
2162 /*
2163  * S_force_strict_version
2164  * Forces the next token to be a version number using strict syntax rules.
2165  */
2166
2167 STATIC char *
2168 S_force_strict_version(pTHX_ char *s)
2169 {
2170     OP *version = NULL;
2171     const char *errstr = NULL;
2172
2173     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2174
2175     while (isSPACE(*s)) /* leading whitespace */
2176         s++;
2177
2178     if (is_STRICT_VERSION(s,&errstr)) {
2179         SV *ver = newSV(0);
2180         s = (char *)scan_version(s, ver, 0);
2181         version = newSVOP(OP_CONST, 0, ver);
2182     }
2183     else if ((*s != ';' && *s != '{' && *s != '}' )
2184              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2185     {
2186         PL_bufptr = s;
2187         if (errstr)
2188             yyerror(errstr); /* version required */
2189         return s;
2190     }
2191
2192     /* NOTE: The parser sees the package name and the VERSION swapped */
2193     NEXTVAL_NEXTTOKE.opval = version;
2194     force_next(WORD);
2195
2196     return s;
2197 }
2198
2199 /*
2200  * S_tokeq
2201  * Tokenize a quoted string passed in as an SV.  It finds the next
2202  * chunk, up to end of string or a backslash.  It may make a new
2203  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2204  * turns \\ into \.
2205  */
2206
2207 STATIC SV *
2208 S_tokeq(pTHX_ SV *sv)
2209 {
2210     char *s;
2211     char *send;
2212     char *d;
2213     SV *pv = sv;
2214
2215     PERL_ARGS_ASSERT_TOKEQ;
2216
2217     assert (SvPOK(sv));
2218     assert (SvLEN(sv));
2219     assert (!SvIsCOW(sv));
2220     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2221         goto finish;
2222     s = SvPVX(sv);
2223     send = SvEND(sv);
2224     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2225     while (s < send && !(*s == '\\' && s[1] == '\\'))
2226         s++;
2227     if (s == send)
2228         goto finish;
2229     d = s;
2230     if ( PL_hints & HINT_NEW_STRING ) {
2231         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2232                             SVs_TEMP | SvUTF8(sv));
2233     }
2234     while (s < send) {
2235         if (*s == '\\') {
2236             if (s + 1 < send && (s[1] == '\\'))
2237                 s++;            /* all that, just for this */
2238         }
2239         *d++ = *s++;
2240     }
2241     *d = '\0';
2242     SvCUR_set(sv, d - SvPVX_const(sv));
2243   finish:
2244     if ( PL_hints & HINT_NEW_STRING )
2245        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2246     return sv;
2247 }
2248
2249 /*
2250  * Now come three functions related to double-quote context,
2251  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2252  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2253  * interact with PL_lex_state, and create fake ( ... ) argument lists
2254  * to handle functions and concatenation.
2255  * For example,
2256  *   "foo\lbar"
2257  * is tokenised as
2258  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2259  */
2260
2261 /*
2262  * S_sublex_start
2263  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2264  *
2265  * Pattern matching will set PL_lex_op to the pattern-matching op to
2266  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2267  *
2268  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2269  *
2270  * Everything else becomes a FUNC.
2271  *
2272  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2273  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2274  * call to S_sublex_push().
2275  */
2276
2277 STATIC I32
2278 S_sublex_start(pTHX)
2279 {
2280     const I32 op_type = pl_yylval.ival;
2281
2282     if (op_type == OP_NULL) {
2283         pl_yylval.opval = PL_lex_op;
2284         PL_lex_op = NULL;
2285         return THING;
2286     }
2287     if (op_type == OP_CONST) {
2288         SV *sv = PL_lex_stuff;
2289         PL_lex_stuff = NULL;
2290         sv = tokeq(sv);
2291
2292         if (SvTYPE(sv) == SVt_PVIV) {
2293             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2294             STRLEN len;
2295             const char * const p = SvPV_const(sv, len);
2296             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2297             SvREFCNT_dec(sv);
2298             sv = nsv;
2299         }
2300         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2301         return THING;
2302     }
2303
2304     PL_sublex_info.super_state = PL_lex_state;
2305     PL_sublex_info.sub_inwhat = (U16)op_type;
2306     PL_sublex_info.sub_op = PL_lex_op;
2307     PL_lex_state = LEX_INTERPPUSH;
2308
2309     PL_expect = XTERM;
2310     if (PL_lex_op) {
2311         pl_yylval.opval = PL_lex_op;
2312         PL_lex_op = NULL;
2313         return PMFUNC;
2314     }
2315     else
2316         return FUNC;
2317 }
2318
2319 /*
2320  * S_sublex_push
2321  * Create a new scope to save the lexing state.  The scope will be
2322  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2323  * to the uc, lc, etc. found before.
2324  * Sets PL_lex_state to LEX_INTERPCONCAT.
2325  */
2326
2327 STATIC I32
2328 S_sublex_push(pTHX)
2329 {
2330     LEXSHARED *shared;
2331     const bool is_heredoc = PL_multi_close == '<';
2332     ENTER;
2333
2334     PL_lex_state = PL_sublex_info.super_state;
2335     SAVEI8(PL_lex_dojoin);
2336     SAVEI32(PL_lex_brackets);
2337     SAVEI32(PL_lex_allbrackets);
2338     SAVEI32(PL_lex_formbrack);
2339     SAVEI8(PL_lex_fakeeof);
2340     SAVEI32(PL_lex_casemods);
2341     SAVEI32(PL_lex_starts);
2342     SAVEI8(PL_lex_state);
2343     SAVESPTR(PL_lex_repl);
2344     SAVEVPTR(PL_lex_inpat);
2345     SAVEI16(PL_lex_inwhat);
2346     if (is_heredoc)
2347     {
2348         SAVECOPLINE(PL_curcop);
2349         SAVEI32(PL_multi_end);
2350         SAVEI32(PL_parser->herelines);
2351         PL_parser->herelines = 0;
2352     }
2353     SAVEI8(PL_multi_close);
2354     SAVEPPTR(PL_bufptr);
2355     SAVEPPTR(PL_bufend);
2356     SAVEPPTR(PL_oldbufptr);
2357     SAVEPPTR(PL_oldoldbufptr);
2358     SAVEPPTR(PL_last_lop);
2359     SAVEPPTR(PL_last_uni);
2360     SAVEPPTR(PL_linestart);
2361     SAVESPTR(PL_linestr);
2362     SAVEGENERICPV(PL_lex_brackstack);
2363     SAVEGENERICPV(PL_lex_casestack);
2364     SAVEGENERICPV(PL_parser->lex_shared);
2365     SAVEBOOL(PL_parser->lex_re_reparsing);
2366     SAVEI32(PL_copline);
2367
2368     /* The here-doc parser needs to be able to peek into outer lexing
2369        scopes to find the body of the here-doc.  So we put PL_linestr and
2370        PL_bufptr into lex_shared, to ‘share’ those values.
2371      */
2372     PL_parser->lex_shared->ls_linestr = PL_linestr;
2373     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2374
2375     PL_linestr = PL_lex_stuff;
2376     PL_lex_repl = PL_sublex_info.repl;
2377     PL_lex_stuff = NULL;
2378     PL_sublex_info.repl = NULL;
2379
2380     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2381        set for an inner quote-like operator and then an error causes scope-
2382        popping.  We must not have a PL_lex_stuff value left dangling, as
2383        that breaks assumptions elsewhere.  See bug #123617.  */
2384     SAVEGENERICSV(PL_lex_stuff);
2385     SAVEGENERICSV(PL_sublex_info.repl);
2386
2387     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2388         = SvPVX(PL_linestr);
2389     PL_bufend += SvCUR(PL_linestr);
2390     PL_last_lop = PL_last_uni = NULL;
2391     SAVEFREESV(PL_linestr);
2392     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2393
2394     PL_lex_dojoin = FALSE;
2395     PL_lex_brackets = PL_lex_formbrack = 0;
2396     PL_lex_allbrackets = 0;
2397     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2398     Newx(PL_lex_brackstack, 120, char);
2399     Newx(PL_lex_casestack, 12, char);
2400     PL_lex_casemods = 0;
2401     *PL_lex_casestack = '\0';
2402     PL_lex_starts = 0;
2403     PL_lex_state = LEX_INTERPCONCAT;
2404     if (is_heredoc)
2405         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2406     PL_copline = NOLINE;
2407
2408     Newxz(shared, 1, LEXSHARED);
2409     shared->ls_prev = PL_parser->lex_shared;
2410     PL_parser->lex_shared = shared;
2411
2412     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2413     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2414     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2415         PL_lex_inpat = PL_sublex_info.sub_op;
2416     else
2417         PL_lex_inpat = NULL;
2418
2419     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2420     PL_in_eval &= ~EVAL_RE_REPARSING;
2421
2422     return '(';
2423 }
2424
2425 /*
2426  * S_sublex_done
2427  * Restores lexer state after a S_sublex_push.
2428  */
2429
2430 STATIC I32
2431 S_sublex_done(pTHX)
2432 {
2433     if (!PL_lex_starts++) {
2434         SV * const sv = newSVpvs("");
2435         if (SvUTF8(PL_linestr))
2436             SvUTF8_on(sv);
2437         PL_expect = XOPERATOR;
2438         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2439         return THING;
2440     }
2441
2442     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2443         PL_lex_state = LEX_INTERPCASEMOD;
2444         return yylex();
2445     }
2446
2447     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2448     assert(PL_lex_inwhat != OP_TRANSR);
2449     if (PL_lex_repl) {
2450         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2451         PL_linestr = PL_lex_repl;
2452         PL_lex_inpat = 0;
2453         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2454         PL_bufend += SvCUR(PL_linestr);
2455         PL_last_lop = PL_last_uni = NULL;
2456         PL_lex_dojoin = FALSE;
2457         PL_lex_brackets = 0;
2458         PL_lex_allbrackets = 0;
2459         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2460         PL_lex_casemods = 0;
2461         *PL_lex_casestack = '\0';
2462         PL_lex_starts = 0;
2463         if (SvEVALED(PL_lex_repl)) {
2464             PL_lex_state = LEX_INTERPNORMAL;
2465             PL_lex_starts++;
2466             /*  we don't clear PL_lex_repl here, so that we can check later
2467                 whether this is an evalled subst; that means we rely on the
2468                 logic to ensure sublex_done() is called again only via the
2469                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2470         }
2471         else {
2472             PL_lex_state = LEX_INTERPCONCAT;
2473             PL_lex_repl = NULL;
2474         }
2475         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2476             CopLINE(PL_curcop) +=
2477                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2478                  + PL_parser->herelines;
2479             PL_parser->herelines = 0;
2480         }
2481         return '/';
2482     }
2483     else {
2484         const line_t l = CopLINE(PL_curcop);
2485         LEAVE;
2486         if (PL_multi_close == '<')
2487             PL_parser->herelines += l - PL_multi_end;
2488         PL_bufend = SvPVX(PL_linestr);
2489         PL_bufend += SvCUR(PL_linestr);
2490         PL_expect = XOPERATOR;
2491         return ')';
2492     }
2493 }
2494
2495 PERL_STATIC_INLINE SV*
2496 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2497 {
2498     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2499      * interior, hence to the "}".  Finds what the name resolves to, returning
2500      * an SV* containing it; NULL if no valid one found */
2501
2502     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2503
2504     HV * table;
2505     SV **cvp;
2506     SV *cv;
2507     SV *rv;
2508     HV *stash;
2509     const U8* first_bad_char_loc;
2510     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2511
2512     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2513
2514     if (!SvCUR(res)) {
2515         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2516                        "Unknown charname '' is deprecated");
2517         return res;
2518     }
2519
2520     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2521                                      e - backslash_ptr,
2522                                      &first_bad_char_loc))
2523     {
2524         /* If warnings are on, this will print a more detailed analysis of what
2525          * is wrong than the error message below */
2526         utf8n_to_uvchr(first_bad_char_loc,
2527                        e - ((char *) first_bad_char_loc),
2528                        NULL, 0);
2529
2530         /* We deliberately don't try to print the malformed character, which
2531          * might not print very well; it also may be just the first of many
2532          * malformations, so don't print what comes after it */
2533         yyerror_pv(Perl_form(aTHX_
2534             "Malformed UTF-8 character immediately after '%.*s'",
2535             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2536                    SVf_UTF8);
2537         return NULL;
2538     }
2539
2540     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2541                         /* include the <}> */
2542                         e - backslash_ptr + 1);
2543     if (! SvPOK(res)) {
2544         SvREFCNT_dec_NN(res);
2545         return NULL;
2546     }
2547
2548     /* See if the charnames handler is the Perl core's, and if so, we can skip
2549      * the validation needed for a user-supplied one, as Perl's does its own
2550      * validation. */
2551     table = GvHV(PL_hintgv);             /* ^H */
2552     cvp = hv_fetchs(table, "charnames", FALSE);
2553     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2554         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2555     {
2556         const char * const name = HvNAME(stash);
2557         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2558          && strEQ(name, "_charnames")) {
2559            return res;
2560        }
2561     }
2562
2563     /* Here, it isn't Perl's charname handler.  We can't rely on a
2564      * user-supplied handler to validate the input name.  For non-ut8 input,
2565      * look to see that the first character is legal.  Then loop through the
2566      * rest checking that each is a continuation */
2567
2568     /* This code makes the reasonable assumption that the only Latin1-range
2569      * characters that begin a character name alias are alphabetic, otherwise
2570      * would have to create a isCHARNAME_BEGIN macro */
2571
2572     if (! UTF) {
2573         if (! isALPHAU(*s)) {
2574             goto bad_charname;
2575         }
2576         s++;
2577         while (s < e) {
2578             if (! isCHARNAME_CONT(*s)) {
2579                 goto bad_charname;
2580             }
2581             if (*s == ' ' && *(s-1) == ' ') {
2582                 goto multi_spaces;
2583             }
2584             s++;
2585         }
2586     }
2587     else {
2588         /* Similarly for utf8.  For invariants can check directly; for other
2589          * Latin1, can calculate their code point and check; otherwise  use a
2590          * swash */
2591         if (UTF8_IS_INVARIANT(*s)) {
2592             if (! isALPHAU(*s)) {
2593                 goto bad_charname;
2594             }
2595             s++;
2596         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2597             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2598                 goto bad_charname;
2599             }
2600             s += 2;
2601         }
2602         else {
2603             if (! PL_utf8_charname_begin) {
2604                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2605                 PL_utf8_charname_begin = _core_swash_init("utf8",
2606                                                         "_Perl_Charname_Begin",
2607                                                         &PL_sv_undef,
2608                                                         1, 0, NULL, &flags);
2609             }
2610             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2611                 goto bad_charname;
2612             }
2613             s += UTF8SKIP(s);
2614         }
2615
2616         while (s < e) {
2617             if (UTF8_IS_INVARIANT(*s)) {
2618                 if (! isCHARNAME_CONT(*s)) {
2619                     goto bad_charname;
2620                 }
2621                 if (*s == ' ' && *(s-1) == ' ') {
2622                     goto multi_spaces;
2623                 }
2624                 s++;
2625             }
2626             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2627                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2628                 {
2629                     goto bad_charname;
2630                 }
2631                 s += 2;
2632             }
2633             else {
2634                 if (! PL_utf8_charname_continue) {
2635                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2636                     PL_utf8_charname_continue = _core_swash_init("utf8",
2637                                                 "_Perl_Charname_Continue",
2638                                                 &PL_sv_undef,
2639                                                 1, 0, NULL, &flags);
2640                 }
2641                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2642                     goto bad_charname;
2643                 }
2644                 s += UTF8SKIP(s);
2645             }
2646         }
2647     }
2648     if (*(s-1) == ' ') {
2649         yyerror_pv(
2650             Perl_form(aTHX_
2651             "charnames alias definitions may not contain trailing "
2652             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2653             (int)(s - backslash_ptr + 1), backslash_ptr,
2654             (int)(e - s + 1), s + 1
2655             ),
2656         UTF ? SVf_UTF8 : 0);
2657         return NULL;
2658     }
2659
2660     if (SvUTF8(res)) { /* Don't accept malformed input */
2661         const U8* first_bad_char_loc;
2662         STRLEN len;
2663         const char* const str = SvPV_const(res, len);
2664         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2665             /* If warnings are on, this will print a more detailed analysis of
2666              * what is wrong than the error message below */
2667             utf8n_to_uvchr(first_bad_char_loc,
2668                            (char *) first_bad_char_loc - str,
2669                            NULL, 0);
2670
2671             /* We deliberately don't try to print the malformed character,
2672              * which might not print very well; it also may be just the first
2673              * of many malformations, so don't print what comes after it */
2674             yyerror_pv(
2675               Perl_form(aTHX_
2676                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2677                  (int) (e - backslash_ptr + 1), backslash_ptr,
2678                  (int) ((char *) first_bad_char_loc - str), str
2679               ),
2680               SVf_UTF8);
2681             return NULL;
2682         }
2683     }
2684
2685     return res;
2686
2687   bad_charname: {
2688
2689         /* The final %.*s makes sure that should the trailing NUL be missing
2690          * that this print won't run off the end of the string */
2691         yyerror_pv(
2692           Perl_form(aTHX_
2693             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2694             (int)(s - backslash_ptr + 1), backslash_ptr,
2695             (int)(e - s + 1), s + 1
2696           ),
2697           UTF ? SVf_UTF8 : 0);
2698         return NULL;
2699     }
2700
2701   multi_spaces:
2702         yyerror_pv(
2703           Perl_form(aTHX_
2704             "charnames alias definitions may not contain a sequence of "
2705             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2706             (int)(s - backslash_ptr + 1), backslash_ptr,
2707             (int)(e - s + 1), s + 1
2708           ),
2709           UTF ? SVf_UTF8 : 0);
2710         return NULL;
2711 }
2712
2713 /*
2714   scan_const
2715
2716   Extracts the next constant part of a pattern, double-quoted string,
2717   or transliteration.  This is terrifying code.
2718
2719   For example, in parsing the double-quoted string "ab\x63$d", it would
2720   stop at the '$' and return an OP_CONST containing 'abc'.
2721
2722   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2723   processing a pattern (PL_lex_inpat is true), a transliteration
2724   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2725
2726   Returns a pointer to the character scanned up to. If this is
2727   advanced from the start pointer supplied (i.e. if anything was
2728   successfully parsed), will leave an OP_CONST for the substring scanned
2729   in pl_yylval. Caller must intuit reason for not parsing further
2730   by looking at the next characters herself.
2731
2732   In patterns:
2733     expand:
2734       \N{FOO}  => \N{U+hex_for_character_FOO}
2735       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2736
2737     pass through:
2738         all other \-char, including \N and \N{ apart from \N{ABC}
2739
2740     stops on:
2741         @ and $ where it appears to be a var, but not for $ as tail anchor
2742         \l \L \u \U \Q \E
2743         (?{  or  (??{
2744
2745   In transliterations:
2746     characters are VERY literal, except for - not at the start or end
2747     of the string, which indicates a range. If the range is in bytes,
2748     scan_const expands the range to the full set of intermediate
2749     characters. If the range is in utf8, the hyphen is replaced with
2750     a certain range mark which will be handled by pmtrans() in op.c.
2751
2752   In double-quoted strings:
2753     backslashes:
2754       double-quoted style: \r and \n
2755       constants: \x31, etc.
2756       deprecated backrefs: \1 (in substitution replacements)
2757       case and quoting: \U \Q \E
2758     stops on @ and $
2759
2760   scan_const does *not* construct ops to handle interpolated strings.
2761   It stops processing as soon as it finds an embedded $ or @ variable
2762   and leaves it to the caller to work out what's going on.
2763
2764   embedded arrays (whether in pattern or not) could be:
2765       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2766
2767   $ in double-quoted strings must be the symbol of an embedded scalar.
2768
2769   $ in pattern could be $foo or could be tail anchor.  Assumption:
2770   it's a tail anchor if $ is the last thing in the string, or if it's
2771   followed by one of "()| \r\n\t"
2772
2773   \1 (backreferences) are turned into $1 in substitutions
2774
2775   The structure of the code is
2776       while (there's a character to process) {
2777           handle transliteration ranges
2778           skip regexp comments /(?#comment)/ and codes /(?{code})/
2779           skip #-initiated comments in //x patterns
2780           check for embedded arrays
2781           check for embedded scalars
2782           if (backslash) {
2783               deprecate \1 in substitution replacements
2784               handle string-changing backslashes \l \U \Q \E, etc.
2785               switch (what was escaped) {
2786                   handle \- in a transliteration (becomes a literal -)
2787                   if a pattern and not \N{, go treat as regular character
2788                   handle \132 (octal characters)
2789                   handle \x15 and \x{1234} (hex characters)
2790                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2791                   handle \cV (control characters)
2792                   handle printf-style backslashes (\f, \r, \n, etc)
2793               } (end switch)
2794               continue
2795           } (end if backslash)
2796           handle regular character
2797     } (end while character to read)
2798
2799 */
2800
2801 STATIC char *
2802 S_scan_const(pTHX_ char *start)
2803 {
2804     char *send = PL_bufend;             /* end of the constant */
2805     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2806                                            on sizing. */
2807     char *s = start;                    /* start of the constant */
2808     char *d = SvPVX(sv);                /* destination for copies */
2809     bool dorange = FALSE;               /* are we in a translit range? */
2810     bool didrange = FALSE;              /* did we just finish a range? */
2811     bool in_charclass = FALSE;          /* within /[...]/ */
2812     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2813     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2814                                            UTF8?  But, this can show as true
2815                                            when the source isn't utf8, as for
2816                                            example when it is entirely composed
2817                                            of hex constants */
2818     SV *res;                            /* result from charnames */
2819     STRLEN offset_to_max;   /* The offset in the output to where the range
2820                                high-end character is temporarily placed */
2821
2822     /* Note on sizing:  The scanned constant is placed into sv, which is
2823      * initialized by newSV() assuming one byte of output for every byte of
2824      * input.  This routine expects newSV() to allocate an extra byte for a
2825      * trailing NUL, which this routine will append if it gets to the end of
2826      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2827      * CAPITAL LETTER A}), or more output than input if the constant ends up
2828      * recoded to utf8, but each time a construct is found that might increase
2829      * the needed size, SvGROW() is called.  Its size parameter each time is
2830      * based on the best guess estimate at the time, namely the length used so
2831      * far, plus the length the current construct will occupy, plus room for
2832      * the trailing NUL, plus one byte for every input byte still unscanned */
2833
2834     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2835                        before set */
2836 #ifdef EBCDIC
2837     int backslash_N = 0;            /* ? was the character from \N{} */
2838     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2839                                        platform-specific like \x65 */
2840 #endif
2841
2842     PERL_ARGS_ASSERT_SCAN_CONST;
2843
2844     assert(PL_lex_inwhat != OP_TRANSR);
2845     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2846         /* If we are doing a trans and we know we want UTF8 set expectation */
2847         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2848         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2849     }
2850
2851     /* Protect sv from errors and fatal warnings. */
2852     ENTER_with_name("scan_const");
2853     SAVEFREESV(sv);
2854
2855     while (s < send
2856            || dorange   /* Handle tr/// range at right edge of input */
2857     ) {
2858
2859         /* get transliterations out of the way (they're most literal) */
2860         if (PL_lex_inwhat == OP_TRANS) {
2861
2862             /* But there isn't any special handling necessary unless there is a
2863              * range, so for most cases we just drop down and handle the value
2864              * as any other.  There are two exceptions.
2865              *
2866              * 1.  A minus sign indicates that we are actually going to have
2867              *     a range.  In this case, skip the '-', set a flag, then drop
2868              *     down to handle what should be the end range value.
2869              * 2.  After we've handled that value, the next time through, that
2870              *     flag is set and we fix up the range.
2871              *
2872              * Ranges entirely within Latin1 are expanded out entirely, in
2873              * order to avoid the significant overhead of making a swash.
2874              * Ranges that extend above Latin1 have to have a swash, so there
2875              * is no advantage to abbreviate them here, so they are stored here
2876              * as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies a
2877              * hyphen without any possible ambiguity.  On EBCDIC machines, if
2878              * the range is expressed as Unicode, the Latin1 portion is
2879              * expanded out even if the entire range extends above Latin1.
2880              * This is because each code point in it has to be processed here
2881              * individually to get its native translation */
2882
2883             if (! dorange) {
2884
2885                 /* Here, we don't think we're in a range.  If we've processed
2886                  * at least one character, then see if this next one is a '-',
2887                  * indicating the previous one was the start of a range.  But
2888                  * don't bother if we're too close to the end for the minus to
2889                  * mean that. */
2890                 if (*s != '-' || s >= send - 1 || s == start) {
2891
2892                     /* A regular character.  Process like any other, but first
2893                      * clear any flags */
2894                     didrange = FALSE;
2895                     dorange = FALSE;
2896 #ifdef EBCDIC
2897                     non_portable_endpoint = 0;
2898                     backslash_N = 0;
2899 #endif
2900                     /* Drops down to generic code to process current byte */
2901                 }
2902                 else {
2903                     if (didrange) { /* Something like y/A-C-Z// */
2904                         Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2905                     }
2906
2907                     dorange = TRUE;
2908
2909                     s++;    /* Skip past the minus */
2910
2911                     /* d now points to where the end-range character will be
2912                      * placed.  Save it so won't have to go finding it later,
2913                      * and drop down to get that character.  (Actually we
2914                      * instead save the offset, to handle the case where a
2915                      * realloc in the meantime could change the actual
2916                      * pointer).  We'll finish processing the range the next
2917                      * time through the loop */
2918                     offset_to_max = d - SvPVX_const(sv);
2919                 }
2920             }  /* End of not a range */
2921             else {
2922                 /* Here we have parsed a range.  Now must handle it.  At this
2923                  * point:
2924                  * 'sv' is a SV* that contains the output string we are
2925                  *      constructing.  The final two characters in that string
2926                  *      are the range start and range end, in order.
2927                  * 'd'  points to just beyond the range end in the 'sv' string,
2928                  *      where we would next place something
2929                  * 'offset_to_max' is the offset in 'sv' at which the character
2930                  *      before 'd' begins.
2931                  */
2932                 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2933                 const char * min_ptr;
2934                 IV range_min;
2935                 IV range_max;   /* last character in range */
2936                 STRLEN save_offset;
2937                 STRLEN grow;
2938 #ifndef EBCDIC  /* Not meaningful except in EBCDIC, so initialize to false */
2939                 const bool convert_unicode = FALSE;
2940                 const IV real_range_max = 0;
2941 #else
2942                 bool convert_unicode;
2943                 IV real_range_max = 0;
2944 #endif
2945
2946                 /* Get the range-ends code point values. */
2947                 if (has_utf8) {
2948                     /* We know the utf8 is valid, because we just constructed
2949                      * it ourselves in previous loop iterations */
2950                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2951                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2952                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2953                 }
2954                 else {
2955                     min_ptr = max_ptr - 1;
2956                     range_min = * (U8*) min_ptr;
2957                     range_max = * (U8*) max_ptr;
2958                 }
2959
2960 #ifdef EBCDIC
2961                 /* On EBCDIC platforms, we may have to deal with portable
2962                  * ranges.  These happen if at least one range endpoint is a
2963                  * Unicode value (\N{...}), or if the range is a subset of
2964                  * [A-Z] or [a-z], and both ends are literal characters,
2965                  * like 'A', and not like \x{C1} */
2966                 if ((convert_unicode
2967                      = cBOOL(backslash_N)   /* \N{} forces Unicode, hence
2968                                                portable range */
2969                       || (   ! non_portable_endpoint
2970                           && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
2971                              || (isUPPER_A(range_min) && isUPPER_A(range_max))))
2972                 )) {
2973
2974                     /* Special handling is needed for these portable ranges.
2975                      * They are defined to all be in Unicode terms, which
2976                      * include all Unicode code points between the end points.
2977                      * Convert to Unicode to get the Unicode range.  Later we
2978                      * will convert each code point in the range back to
2979                      * native.  */
2980                     range_min = NATIVE_TO_UNI(range_min);
2981                     range_max = NATIVE_TO_UNI(range_max);
2982                 }
2983 #endif
2984
2985                 if (range_min > range_max) {
2986                     if (convert_unicode) {
2987                         /* Need to convert back to native for meaningful
2988                          * messages for this platform */
2989                         range_min = UNI_TO_NATIVE(range_min);
2990                         range_max = UNI_TO_NATIVE(range_max);
2991                     }
2992
2993                     /* Use the characters themselves for the error message if
2994                      * ASCII printables; otherwise some visible representation
2995                      * of them */
2996                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
2997                         Perl_croak(aTHX_
2998                          "Invalid range \"%c-%c\" in transliteration operator",
2999                          (char)range_min, (char)range_max);
3000                     }
3001                     else if (convert_unicode) {
3002                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3003                         Perl_croak(aTHX_
3004                                "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
3005                                " in transliteration operator",
3006                                range_min, range_max);
3007                     }
3008                     else {
3009                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3010                         Perl_croak(aTHX_
3011                                "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
3012                                " in transliteration operator",
3013                                range_min, range_max);
3014                     }
3015                 }
3016
3017                 if (has_utf8) {
3018
3019                     /* We try to avoid creating a swash.  If the upper end of
3020                      * this range is below 256, this range won't force a swash;
3021                      * otherwise it does force a swash, and as long as we have
3022                      * to have one, we might as well not expand things out.
3023                      * But if it's EBCDIC, we may have to look at each
3024                      * character below 256 if we have to convert to/from
3025                      * Unicode values */
3026                     if (range_max > 255
3027 #ifdef EBCDIC
3028                         && (range_min > 255 || ! convert_unicode)
3029 #endif
3030                     ) {
3031                         /* Move the high character one byte to the right; then
3032                          * insert between it and the range begin, an illegal
3033                          * byte which serves to indicate this is a range (using
3034                          * a '-' could be ambiguous). */
3035                         char *e = d++;
3036                         while (e-- > max_ptr) {
3037                             *(e + 1) = *e;
3038                         }
3039                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3040                         goto range_done;
3041                     }
3042
3043                     /* Here, we're going to expand out the range.  For EBCDIC
3044                      * the range can extend above 255 (not so in ASCII), so
3045                      * for EBCDIC, split it into the parts above and below
3046                      * 255/256 */
3047 #ifdef EBCDIC
3048                     if (range_max > 255) {
3049                         real_range_max = range_max;
3050                         range_max = 255;
3051                     }
3052 #endif
3053                 }
3054
3055                 /* Here we need to expand out the string to contain each
3056                  * character in the range.  Grow the output to handle this */
3057
3058                 save_offset  = min_ptr - SvPVX_const(sv);
3059
3060                 /* The base growth is the number of code points in the range */
3061                 grow = range_max - range_min + 1;
3062                 if (has_utf8) {
3063
3064                     /* But if the output is UTF-8, some of those characters may
3065                      * need two bytes (since the maximum range value here is
3066                      * 255, the max bytes per character is two).  On ASCII
3067                      * platforms, it's not much trouble to get an accurate
3068                      * count of what's needed.  But on EBCDIC, the ones that
3069                      * need 2 bytes are scattered around, so just use a worst
3070                      * case value instead of calculating for that platform.  */
3071 #ifdef EBCDIC
3072                     grow *= 2;
3073 #else
3074                     /* Only those above 127 require 2 bytes.  This may be
3075                      * everything in the range, or not */
3076                     if (range_min > 127) {
3077                         grow *= 2;
3078                     }
3079                     else if (range_max > 127) {
3080                         grow += range_max - 127;
3081                     }
3082 #endif
3083                 }
3084
3085                 /* Subtract 3 for the bytes that were already accounted for
3086                  * (min, max, and the hyphen) */
3087                 SvGROW(sv, SvLEN(sv) + grow - 3);
3088                 d = SvPVX(sv) + save_offset;    /* refresh d after realloc */
3089
3090                 /* Here, we expand out the range.  On ASCII platforms, the
3091                  * compiler should optimize out the 'convert_unicode==TRUE'
3092                  * portion of this */
3093                 if (convert_unicode) {
3094                     IV i;
3095
3096                     /* Recall that the min and max are now in Unicode terms, so
3097                      * we have to convert each character to its native
3098                      * equivalent */
3099                     if (has_utf8) {
3100                         for (i = range_min; i <= range_max; i++) {
3101                             append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3102                                                          (U8 **) &d);
3103                         }
3104                     }
3105                     else {
3106                         for (i = range_min; i <= range_max; i++) {
3107                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3108                         }
3109                     }
3110                 }
3111                 else {
3112                     IV i;
3113
3114                     /* Here, no conversions are necessary, which means that the
3115                      * first character in the range is already in 'd' and
3116                      * valid, so we can skip overwriting it */
3117                     if (has_utf8) {
3118                         d += UTF8SKIP(d);
3119                         for (i = range_min + 1; i <= range_max; i++) {
3120                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3121                         }
3122                     }
3123                     else {
3124                         d++;
3125                         for (i = range_min + 1; i <= range_max; i++) {
3126                             *d++ = (char)i;
3127                         }
3128                     }
3129                 }
3130
3131                 /* (Compilers should optimize this out for non-EBCDIC).  If the
3132                  * original range extended above 255, add in that portion */
3133                 if (real_range_max) {
3134                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3135                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3136                     if (real_range_max > 0x101)
3137                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3138                     if (real_range_max > 0x100)
3139                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3140                 }
3141
3142               range_done:
3143                 /* mark the range as done, and continue */
3144                 didrange = TRUE;
3145                 dorange = FALSE;
3146 #ifdef EBCDIC
3147                 non_portable_endpoint = 0;
3148                 backslash_N = 0;
3149 #endif
3150                 continue;
3151             } /* End of is a range */
3152         } /* End of transliteration.  Joins main code after these else's */
3153         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3154             char *s1 = s-1;
3155             int esc = 0;
3156             while (s1 >= start && *s1-- == '\\')
3157                 esc = !esc;
3158             if (!esc)
3159                 in_charclass = TRUE;
3160         }
3161
3162         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3163             char *s1 = s-1;
3164             int esc = 0;
3165             while (s1 >= start && *s1-- == '\\')
3166                 esc = !esc;
3167             if (!esc)
3168                 in_charclass = FALSE;
3169         }
3170
3171         /* skip for regexp comments /(?#comment)/, except for the last
3172          * char, which will be done separately.
3173          * Stop on (?{..}) and friends */
3174
3175         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3176             if (s[2] == '#') {
3177                 while (s+1 < send && *s != ')')
3178                     *d++ = *s++;
3179             }
3180             else if (!PL_lex_casemods
3181                      && (    s[2] == '{' /* This should match regcomp.c */
3182                          || (s[2] == '?' && s[3] == '{')))
3183             {
3184                 break;
3185             }
3186         }
3187
3188         /* likewise skip #-initiated comments in //x patterns */
3189         else if (*s == '#'
3190                  && PL_lex_inpat
3191                  && !in_charclass
3192                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3193         {
3194             while (s+1 < send && *s != '\n')
3195                 *d++ = *s++;
3196         }
3197
3198         /* no further processing of single-quoted regex */
3199         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3200             goto default_action;
3201
3202         /* check for embedded arrays
3203            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3204            */
3205         else if (*s == '@' && s[1]) {
3206             if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3207                 break;
3208             if (strchr(":'{$", s[1]))
3209                 break;
3210             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3211                 break; /* in regexp, neither @+ nor @- are interpolated */
3212         }
3213
3214         /* check for embedded scalars.  only stop if we're sure it's a
3215            variable.
3216         */
3217         else if (*s == '$') {
3218             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3219                 break;
3220             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3221                 if (s[1] == '\\') {
3222                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3223                                    "Possible unintended interpolation of $\\ in regex");
3224                 }
3225                 break;          /* in regexp, $ might be tail anchor */
3226             }
3227         }
3228
3229         /* End of else if chain - OP_TRANS rejoin rest */
3230
3231         /* backslashes */
3232         if (*s == '\\' && s+1 < send) {
3233             char* e;    /* Can be used for ending '}', etc. */
3234
3235             s++;
3236
3237             /* warn on \1 - \9 in substitution replacements, but note that \11
3238              * is an octal; and \19 is \1 followed by '9' */
3239             if (PL_lex_inwhat == OP_SUBST
3240                 && !PL_lex_inpat
3241                 && isDIGIT(*s)
3242                 && *s != '0'
3243                 && !isDIGIT(s[1]))
3244             {
3245                 /* diag_listed_as: \%d better written as $%d */
3246                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3247                 *--s = '$';
3248                 break;
3249             }
3250
3251             /* string-change backslash escapes */
3252             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3253                 --s;
3254                 break;
3255             }
3256             /* In a pattern, process \N, but skip any other backslash escapes.
3257              * This is because we don't want to translate an escape sequence
3258              * into a meta symbol and have the regex compiler use the meta
3259              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3260              * in spite of this, we do have to process \N here while the proper
3261              * charnames handler is in scope.  See bugs #56444 and #62056.
3262              *
3263              * There is a complication because \N in a pattern may also stand
3264              * for 'match a non-nl', and not mean a charname, in which case its
3265              * processing should be deferred to the regex compiler.  To be a
3266              * charname it must be followed immediately by a '{', and not look
3267              * like \N followed by a curly quantifier, i.e., not something like
3268              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3269              * quantifier */
3270             else if (PL_lex_inpat
3271                     && (*s != 'N'
3272                         || s[1] != '{'
3273                         || regcurly(s + 1)))
3274             {
3275                 *d++ = '\\';
3276                 goto default_action;
3277             }
3278
3279             switch (*s) {
3280             default:
3281                 {
3282                     if ((isALPHANUMERIC(*s)))
3283                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3284                                        "Unrecognized escape \\%c passed through",
3285                                        *s);
3286                     /* default action is to copy the quoted character */
3287                     goto default_action;
3288                 }
3289
3290             /* eg. \132 indicates the octal constant 0132 */
3291             case '0': case '1': case '2': case '3':
3292             case '4': case '5': case '6': case '7':
3293                 {
3294                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3295                     STRLEN len = 3;
3296                     uv = grok_oct(s, &len, &flags, NULL);
3297                     s += len;
3298                     if (len < 3 && s < send && isDIGIT(*s)
3299                         && ckWARN(WARN_MISC))
3300                     {
3301                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3302                                     "%s", form_short_octal_warning(s, len));
3303                     }
3304                 }
3305                 goto NUM_ESCAPE_INSERT;
3306
3307             /* eg. \o{24} indicates the octal constant \024 */
3308             case 'o':
3309                 {
3310                     const char* error;
3311
3312                     bool valid = grok_bslash_o(&s, &uv, &error,
3313                                                TRUE, /* Output warning */
3314                                                FALSE, /* Not strict */
3315                                                TRUE, /* Output warnings for
3316                                                          non-portables */
3317                                                UTF);
3318                     if (! valid) {
3319                         yyerror(error);
3320                         continue;
3321                     }
3322                     goto NUM_ESCAPE_INSERT;
3323                 }
3324
3325             /* eg. \x24 indicates the hex constant 0x24 */
3326             case 'x':
3327                 {
3328                     const char* error;
3329
3330                     bool valid = grok_bslash_x(&s, &uv, &error,
3331                                                TRUE, /* Output warning */
3332                                                FALSE, /* Not strict */
3333                                                TRUE,  /* Output warnings for
3334                                                          non-portables */
3335                                                UTF);
3336                     if (! valid) {
3337                         yyerror(error);
3338                         continue;
3339                     }
3340                 }
3341
3342               NUM_ESCAPE_INSERT:
3343                 /* Insert oct or hex escaped character. */
3344
3345                 /* Here uv is the ordinal of the next character being added */
3346                 if (UVCHR_IS_INVARIANT(uv)) {
3347                     *d++ = (char) uv;
3348                 }
3349                 else {
3350                     if (!has_utf8 && uv > 255) {
3351                         /* Might need to recode whatever we have accumulated so
3352                          * far if it contains any chars variant in utf8 or
3353                          * utf-ebcdic. */
3354
3355                         SvCUR_set(sv, d - SvPVX_const(sv));
3356                         SvPOK_on(sv);
3357                         *d = '\0';
3358                         /* See Note on sizing above.  */
3359                         sv_utf8_upgrade_flags_grow(
3360                                        sv,
3361                                        SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3362                                                   /* Above-latin1 in string
3363                                                    * implies no encoding */
3364                                                   |SV_UTF8_NO_ENCODING,
3365                                        UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3366                         d = SvPVX(sv) + SvCUR(sv);
3367                         has_utf8 = TRUE;
3368                     }
3369
3370                     if (has_utf8) {
3371                        /* Usually, there will already be enough room in 'sv'
3372                         * since such escapes are likely longer than any UTF-8
3373                         * sequence they can end up as.  This isn't the case on
3374                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3375                         * UTF-8 for it contains 14.  And, we have to allow for
3376                         * a trailing NUL.  It probably can't happen on ASCII
3377                         * platforms, but be safe */
3378                         const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3379                                             + 1;
3380                         if (UNLIKELY(needed > SvLEN(sv))) {
3381                             SvCUR_set(sv, d - SvPVX_const(sv));
3382                             d = sv_grow(sv, needed) + SvCUR(sv);
3383                         }
3384
3385                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3386                         if (PL_lex_inwhat == OP_TRANS
3387                             && PL_sublex_info.sub_op)
3388                         {
3389                             PL_sublex_info.sub_op->op_private |=
3390                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3391                                              : OPpTRANS_TO_UTF);
3392                         }
3393                     }
3394                     else {
3395                         *d++ = (char)uv;
3396                     }
3397                 }
3398 #ifdef EBCDIC
3399                 non_portable_endpoint++;
3400 #endif
3401                 continue;
3402
3403             case 'N':
3404                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3405                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3406                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3407                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3408                  * convenience all three forms are referred to as "named
3409                  * characters" below.
3410                  *
3411                  * For patterns, \N also can mean to match a non-newline.  Code
3412                  * before this 'switch' statement should already have handled
3413                  * this situation, and hence this code only has to deal with
3414                  * the named character cases.
3415                  *
3416                  * For non-patterns, the named characters are converted to
3417                  * their string equivalents.  In patterns, named characters are
3418                  * not converted to their ultimate forms for the same reasons
3419                  * that other escapes aren't.  Instead, they are converted to
3420                  * the \N{U+...} form to get the value from the charnames that
3421                  * is in effect right now, while preserving the fact that it
3422                  * was a named character, so that the regex compiler knows
3423                  * this.
3424                  *
3425                  * The structure of this section of code (besides checking for
3426                  * errors and upgrading to utf8) is:
3427                  *    If the named character is of the form \N{U+...}, pass it
3428                  *      through if a pattern; otherwise convert the code point
3429                  *      to utf8
3430                  *    Otherwise must be some \N{NAME}: convert to
3431                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3432                  *
3433                  * Transliteration is an exception.  The conversion to utf8 is
3434                  * only done if the code point requires it to be representable.
3435                  *
3436                  * Here, 's' points to the 'N'; the test below is guaranteed to
3437                  * succeed if we are being called on a pattern, as we already
3438                  * know from a test above that the next character is a '{'.  A
3439                  * non-pattern \N must mean 'named character', which requires
3440                  * braces */
3441                 s++;
3442                 if (*s != '{') {
3443                     yyerror("Missing braces on \\N{}");
3444                     continue;
3445                 }
3446                 s++;
3447
3448                 /* If there is no matching '}', it is an error. */
3449                 if (! (e = strchr(s, '}'))) {
3450                     if (! PL_lex_inpat) {
3451                         yyerror("Missing right brace on \\N{}");
3452                     } else {
3453                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3454                     }
3455                     continue;
3456                 }
3457
3458                 /* Here it looks like a named character */
3459
3460                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3461                     s += 2;         /* Skip to next char after the 'U+' */
3462                     if (PL_lex_inpat) {
3463
3464                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3465                         /* Check the syntax.  */
3466                         const char *orig_s;
3467                         orig_s = s - 5;
3468                         if (!isXDIGIT(*s)) {
3469                           bad_NU:
3470                             yyerror(
3471                                 "Invalid hexadecimal number in \\N{U+...}"
3472                             );
3473                             s = e + 1;
3474                             continue;
3475                         }
3476                         while (++s < e) {
3477                             if (isXDIGIT(*s))
3478                                 continue;
3479                             else if ((*s == '.' || *s == '_')
3480                                   && isXDIGIT(s[1]))
3481                                 continue;
3482                             goto bad_NU;
3483                         }
3484
3485                         /* Pass everything through unchanged.
3486                          * +1 is for the '}' */
3487                         Copy(orig_s, d, e - orig_s + 1, char);
3488                         d += e - orig_s + 1;
3489                     }
3490                     else {  /* Not a pattern: convert the hex to string */
3491                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3492                                 | PERL_SCAN_SILENT_ILLDIGIT
3493                                 | PERL_SCAN_DISALLOW_PREFIX;
3494                         STRLEN len = e - s;
3495                         uv = grok_hex(s, &len, &flags, NULL);
3496                         if (len == 0 || (len != (STRLEN)(e - s)))
3497                             goto bad_NU;
3498
3499                          /* For non-tr///, if the destination is not in utf8,
3500                           * unconditionally recode it to be so.  This is
3501                           * because \N{} implies Unicode semantics, and scalars
3502                           * have to be in utf8 to guarantee those semantics.
3503                           * tr/// doesn't care about Unicode rules, so no need
3504                           * there to upgrade to UTF-8 for small enough code
3505                           * points */
3506                         if (! has_utf8 && (   uv > 0xFF
3507                                            || PL_lex_inwhat != OP_TRANS))
3508                         {
3509                             SvCUR_set(sv, d - SvPVX_const(sv));
3510                             SvPOK_on(sv);
3511                             *d = '\0';
3512                             /* See Note on sizing above.  */
3513                             sv_utf8_upgrade_flags_grow(
3514                                     sv,
3515                                     SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3516                                     UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
3517                             d = SvPVX(sv) + SvCUR(sv);
3518                             has_utf8 = TRUE;
3519                         }
3520
3521                         /* Add the (Unicode) code point to the output. */
3522                         if (OFFUNI_IS_INVARIANT(uv)) {
3523                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3524                         }
3525                         else {
3526                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3527                         }
3528                     }
3529                 }
3530                 else /* Here is \N{NAME} but not \N{U+...}. */
3531                      if ((res = get_and_check_backslash_N_name(s, e)))
3532                 {
3533                     STRLEN len;
3534                     const char *str = SvPV_const(res, len);
3535                     if (PL_lex_inpat) {
3536
3537                         if (! len) { /* The name resolved to an empty string */
3538                             Copy("\\N{}", d, 4, char);
3539                             d += 4;
3540                         }
3541                         else {
3542                             /* In order to not lose information for the regex
3543                             * compiler, pass the result in the specially made
3544                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3545                             * the code points in hex of each character
3546                             * returned by charnames */
3547
3548                             const char *str_end = str + len;
3549                             const STRLEN off = d - SvPVX_const(sv);
3550
3551                             if (! SvUTF8(res)) {
3552                                 /* For the non-UTF-8 case, we can determine the
3553                                  * exact length needed without having to parse
3554                                  * through the string.  Each character takes up
3555                                  * 2 hex digits plus either a trailing dot or
3556                                  * the "}" */
3557                                 const char initial_text[] = "\\N{U+";
3558                                 const STRLEN initial_len = sizeof(initial_text)
3559                                                            - 1;
3560                                 d = off + SvGROW(sv, off
3561                                                     + 3 * len
3562
3563                                                     /* +1 for trailing NUL */
3564                                                     + initial_len + 1
3565
3566                                                     + (STRLEN)(send - e));
3567                                 Copy(initial_text, d, initial_len, char);
3568                                 d += initial_len;
3569                                 while (str < str_end) {
3570                                     char hex_string[4];
3571                                     int len =
3572                                         my_snprintf(hex_string,
3573                                                   sizeof(hex_string),
3574                                                   "%02X.",
3575
3576                                                   /* The regex compiler is
3577                                                    * expecting Unicode, not
3578                                                    * native */
3579                                                   NATIVE_TO_LATIN1(*str));
3580                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3581                                                            sizeof(hex_string));
3582                                     Copy(hex_string, d, 3, char);
3583                                     d += 3;
3584                                     str++;
3585                                 }
3586                                 d--;    /* Below, we will overwrite the final
3587                                            dot with a right brace */
3588                             }
3589                             else {
3590                                 STRLEN char_length; /* cur char's byte length */
3591
3592                                 /* and the number of bytes after this is
3593                                  * translated into hex digits */
3594                                 STRLEN output_length;
3595
3596                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3597                                  * for max('U+', '.'); and 1 for NUL */
3598                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3599
3600                                 /* Get the first character of the result. */
3601                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3602                                                         len,
3603                                                         &char_length,
3604                                                         UTF8_ALLOW_ANYUV);
3605                                 /* Convert first code point to Unicode hex,
3606                                  * including the boiler plate before it. */
3607                                 output_length =
3608                                     my_snprintf(hex_string, sizeof(hex_string),
3609                                              "\\N{U+%X",
3610                                              (unsigned int) NATIVE_TO_UNI(uv));
3611
3612                                 /* Make sure there is enough space to hold it */
3613                                 d = off + SvGROW(sv, off
3614                                                     + output_length
3615                                                     + (STRLEN)(send - e)
3616                                                     + 2);       /* '}' + NUL */
3617                                 /* And output it */
3618                                 Copy(hex_string, d, output_length, char);
3619                                 d += output_length;
3620
3621                                 /* For each subsequent character, append dot and
3622                                 * its Unicode code point in hex */
3623                                 while ((str += char_length) < str_end) {
3624                                     const STRLEN off = d - SvPVX_const(sv);
3625                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3626                                                             str_end - str,
3627                                                             &char_length,
3628                                                             UTF8_ALLOW_ANYUV);
3629                                     output_length =
3630                                         my_snprintf(hex_string,
3631                                              sizeof(hex_string),
3632                                              ".%X",
3633                                              (unsigned int) NATIVE_TO_UNI(uv));
3634
3635                                     d = off + SvGROW(sv, off
3636                                                         + output_length
3637                                                         + (STRLEN)(send - e)
3638                                                         + 2);   /* '}' +  NUL */
3639                                     Copy(hex_string, d, output_length, char);
3640                                     d += output_length;
3641                                 }
3642                             }
3643
3644                             *d++ = '}'; /* Done.  Add the trailing brace */
3645                         }
3646                     }
3647                     else { /* Here, not in a pattern.  Convert the name to a
3648                             * string. */
3649
3650                         if (PL_lex_inwhat == OP_TRANS) {
3651                             str = SvPV_const(res, len);
3652                             if (len > ((SvUTF8(res))
3653                                        ? UTF8SKIP(str)
3654                                        : 1U))
3655                             {
3656                                 yyerror(Perl_form(aTHX_
3657                                     "%.*s must not be a named sequence"
3658                                     " in transliteration operator",
3659                                         /*  +1 to include the "}" */
3660                                     (int) (e + 1 - start), start));
3661                                 goto end_backslash_N;
3662                             }
3663                         }
3664                         else if (! SvUTF8(res)) {
3665                             /* Make sure \N{} return is UTF-8.  This is because
3666                             * \N{} implies Unicode semantics, and scalars have to
3667                             * be in utf8 to guarantee those semantics; but not
3668                             * needed in tr/// */
3669                             sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3670                             str = SvPV_const(res, len);
3671                         }
3672
3673                          /* Upgrade destination to be utf8 if this new
3674                           * component is */
3675                         if (! has_utf8 && SvUTF8(res)) {
3676                             SvCUR_set(sv, d - SvPVX_const(sv));
3677                             SvPOK_on(sv);
3678                             *d = '\0';
3679                             /* See Note on sizing above.  */
3680                             sv_utf8_upgrade_flags_grow(sv,
3681                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3682                                                 len + (STRLEN)(send - s) + 1);
3683                             d = SvPVX(sv) + SvCUR(sv);
3684                             has_utf8 = TRUE;
3685                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3686
3687                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3688                              * set correctly here). */
3689                             const STRLEN off = d - SvPVX_const(sv);
3690                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3691                         }
3692                         Copy(str, d, len, char);
3693                         d += len;
3694                     }
3695
3696                     SvREFCNT_dec(res);
3697
3698                 } /* End \N{NAME} */
3699
3700               end_backslash_N:
3701 #ifdef EBCDIC
3702                 backslash_N++; /* \N{} is defined to be Unicode */
3703 #endif
3704                 s = e + 1;  /* Point to just after the '}' */
3705                 continue;
3706
3707             /* \c is a control character */
3708             case 'c':
3709                 s++;
3710                 if (s < send) {
3711                     *d++ = grok_bslash_c(*s++, 1);
3712                 }
3713                 else {
3714                     yyerror("Missing control char name in \\c");
3715                 }
3716 #ifdef EBCDIC
3717                 non_portable_endpoint++;
3718 #endif
3719                 continue;
3720
3721             /* printf-style backslashes, formfeeds, newlines, etc */
3722             case 'b':
3723                 *d++ = '\b';
3724                 break;
3725             case 'n':
3726                 *d++ = '\n';
3727                 break;
3728             case 'r':
3729                 *d++ = '\r';
3730                 break;
3731             case 'f':
3732                 *d++ = '\f';
3733                 break;
3734             case 't':
3735                 *d++ = '\t';
3736                 break;
3737             case 'e':
3738                 *d++ = ESC_NATIVE;
3739                 break;
3740             case 'a':
3741                 *d++ = '\a';
3742                 break;
3743             } /* end switch */
3744
3745             s++;
3746             continue;
3747         } /* end if (backslash) */
3748
3749     default_action:
3750         /* If we started with encoded form, or already know we want it,
3751            then encode the next character */
3752         if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3753             STRLEN len  = 1;
3754
3755             /* One might think that it is wasted effort in the case of the
3756              * source being utf8 (this_utf8 == TRUE) to take the next character
3757              * in the source, convert it to an unsigned value, and then convert
3758              * it back again.  But the source has not been validated here.  The
3759              * routine that does the conversion checks for errors like
3760              * malformed utf8 */
3761
3762             const UV nextuv   = (this_utf8)
3763                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3764                                 : (UV) ((U8) *s);
3765             const STRLEN need = UVCHR_SKIP(nextuv);
3766             if (!has_utf8) {
3767                 SvCUR_set(sv, d - SvPVX_const(sv));
3768                 SvPOK_on(sv);
3769                 *d = '\0';
3770                 /* See Note on sizing above.  */
3771                 sv_utf8_upgrade_flags_grow(sv,
3772                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3773                                         need + (STRLEN)(send - s) + 1);
3774                 d = SvPVX(sv) + SvCUR(sv);
3775                 has_utf8 = TRUE;
3776             } else if (need > len) {
3777                 /* encoded value larger than old, may need extra space (NOTE:
3778                  * SvCUR() is not set correctly here).   See Note on sizing
3779                  * above.  */
3780                 const STRLEN off = d - SvPVX_const(sv);
3781                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3782             }
3783             s += len;
3784
3785             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3786         }
3787         else {
3788             *d++ = *s++;
3789         }
3790     } /* while loop to process each character */
3791
3792     /* terminate the string and set up the sv */
3793     *d = '\0';
3794     SvCUR_set(sv, d - SvPVX_const(sv));
3795     if (SvCUR(sv) >= SvLEN(sv))
3796         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3797                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3798
3799     SvPOK_on(sv);
3800     if (IN_ENCODING && !has_utf8) {
3801         sv_recode_to_utf8(sv, _get_encoding());
3802         if (SvUTF8(sv))
3803             has_utf8 = TRUE;
3804     }
3805     if (has_utf8) {
3806         SvUTF8_on(sv);
3807         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3808             PL_sublex_info.sub_op->op_private |=
3809                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3810         }
3811     }
3812
3813     /* shrink the sv if we allocated more than we used */
3814     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3815         SvPV_shrink_to_cur(sv);
3816     }
3817
3818     /* return the substring (via pl_yylval) only if we parsed anything */
3819     if (s > start) {
3820         char *s2 = start;
3821         for (; s2 < s; s2++) {
3822             if (*s2 == '\n')
3823                 COPLINE_INC_WITH_HERELINES;
3824         }
3825         SvREFCNT_inc_simple_void_NN(sv);
3826         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3827             && ! PL_parser->lex_re_reparsing)
3828         {
3829             const char *const key = PL_lex_inpat ? "qr" : "q";
3830             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3831             const char *type;
3832             STRLEN typelen;
3833
3834             if (PL_lex_inwhat == OP_TRANS) {
3835                 type = "tr";
3836                 typelen = 2;
3837             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3838                 type = "s";
3839                 typelen = 1;
3840             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3841                 type = "q";
3842                 typelen = 1;
3843             } else  {
3844                 type = "qq";
3845                 typelen = 2;
3846             }
3847
3848             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3849                                 type, typelen);
3850         }
3851         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3852     }
3853     LEAVE_with_name("scan_const");
3854     return s;
3855 }
3856
3857 /* S_intuit_more
3858  * Returns TRUE if there's more to the expression (e.g., a subscript),
3859  * FALSE otherwise.
3860  *
3861  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3862  *
3863  * ->[ and ->{ return TRUE
3864  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3865  * { and [ outside a pattern are always subscripts, so return TRUE
3866  * if we're outside a pattern and it's not { or [, then return FALSE
3867  * if we're in a pattern and the first char is a {
3868  *   {4,5} (any digits around the comma) returns FALSE
3869  * if we're in a pattern and the first char is a [
3870  *   [] returns FALSE
3871  *   [SOMETHING] has a funky algorithm to decide whether it's a
3872  *      character class or not.  It has to deal with things like
3873  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3874  * anything else returns TRUE
3875  */
3876
3877 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3878
3879 STATIC int
3880 S_intuit_more(pTHX_ char *s)
3881 {
3882     PERL_ARGS_ASSERT_INTUIT_MORE;
3883
3884     if (PL_lex_brackets)
3885         return TRUE;
3886     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3887         return TRUE;
3888     if (*s == '-' && s[1] == '>'
3889      && FEATURE_POSTDEREF_QQ_IS_ENABLED
3890      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3891         ||(s[2] == '@' && strchr("*[{",s[3])) ))
3892         return TRUE;
3893     if (*s != '{' && *s != '[')
3894         return FALSE;
3895     if (!PL_lex_inpat)
3896         return TRUE;
3897
3898     /* In a pattern, so maybe we have {n,m}. */
3899     if (*s == '{') {
3900         if (regcurly(s)) {
3901             return FALSE;
3902         }
3903         return TRUE;
3904     }
3905
3906     /* On the other hand, maybe we have a character class */
3907
3908     s++;
3909     if (*s == ']' || *s == '^')
3910         return FALSE;
3911     else {
3912         /* this is terrifying, and it works */
3913         int weight;
3914         char seen[256];
3915         const char * const send = strchr(s,']');
3916         unsigned char un_char, last_un_char;
3917         char tmpbuf[sizeof PL_tokenbuf * 4];
3918
3919         if (!send)              /* has to be an expression */
3920             return TRUE;
3921         weight = 2;             /* let's weigh the evidence */
3922
3923         if (*s == '$')
3924             weight -= 3;
3925         else if (isDIGIT(*s)) {
3926             if (s[1] != ']') {
3927                 if (isDIGIT(s[1]) && s[2] == ']')
3928                     weight -= 10;
3929             }
3930             else
3931                 weight -= 100;
3932         }
3933         Zero(seen,256,char);
3934         un_char = 255;
3935         for (; s < send; s++) {
3936             last_un_char = un_char;
3937             un_char = (unsigned char)*s;
3938             switch (*s) {
3939             case '@':
3940             case '&':
3941             case '$':
3942                 weight -= seen[un_char] * 10;
3943                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3944                     int len;
3945                     char *tmp = PL_bufend;
3946                     PL_bufend = (char*)send;
3947                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3948                     PL_bufend = tmp;
3949                     len = (int)strlen(tmpbuf);
3950                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3951                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3952                         weight -= 100;
3953                     else
3954                         weight -= 10;
3955                 }
3956                 else if (*s == '$'
3957                          && s[1]
3958                          && strchr("[#!%*<>()-=",s[1]))
3959                 {
3960                     if (/*{*/ strchr("])} =",s[2]))
3961                         weight -= 10;
3962                     else
3963                         weight -= 1;
3964                 }
3965                 break;
3966             case '\\':
3967                 un_char = 254;
3968                 if (s[1]) {
3969                     if (strchr("wds]",s[1]))
3970                         weight += 100;
3971                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3972                         weight += 1;
3973                     else if (strchr("rnftbxcav",s[1]))
3974                         weight += 40;
3975                     else if (isDIGIT(s[1])) {
3976                         weight += 40;
3977                         while (s[1] && isDIGIT(s[1]))
3978                             s++;
3979                     }
3980                 }
3981                 else
3982                     weight += 100;
3983                 break;
3984             case '-':
3985                 if (s[1] == '\\')
3986                     weight += 50;
3987                 if (strchr("aA01! ",last_un_char))
3988                     weight += 30;
3989                 if (strchr("zZ79~",s[1]))
3990                     weight += 30;
3991                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3992                     weight -= 5;        /* cope with negative subscript */
3993                 break;
3994             default:
3995                 if (!isWORDCHAR(last_un_char)
3996                     && !(last_un_char == '$' || last_un_char == '@'
3997                          || last_un_char == '&')
3998                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3999                     char *d = s;
4000                     while (isALPHA(*s))
4001                         s++;
4002                     if (keyword(d, s - d, 0))
4003                         weight -= 150;
4004                 }
4005                 if (un_char == last_un_char + 1)
4006                     weight += 5;
4007                 weight -= seen[un_char];
4008                 break;
4009             }
4010             seen[un_char]++;
4011         }
4012         if (weight >= 0)        /* probably a character class */
4013             return FALSE;
4014     }
4015
4016     return TRUE;
4017 }
4018
4019 /*
4020  * S_intuit_method
4021  *
4022  * Does all the checking to disambiguate
4023  *   foo bar
4024  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4025  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4026  *
4027  * First argument is the stuff after the first token, e.g. "bar".
4028  *
4029  * Not a method if foo is a filehandle.
4030  * Not a method if foo is a subroutine prototyped to take a filehandle.
4031  * Not a method if it's really "Foo $bar"
4032  * Method if it's "foo $bar"
4033  * Not a method if it's really "print foo $bar"
4034  * Method if it's really "foo package::" (interpreted as package->foo)
4035  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4036  * Not a method if bar is a filehandle or package, but is quoted with
4037  *   =>
4038  */
4039
4040 STATIC int
4041 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4042 {
4043     char *s = start + (*start == '$');
4044     char tmpbuf[sizeof PL_tokenbuf];
4045     STRLEN len;
4046     GV* indirgv;
4047         /* Mustn't actually add anything to a symbol table.
4048            But also don't want to "initialise" any placeholder
4049            constants that might already be there into full
4050            blown PVGVs with attached PVCV.  */
4051     GV * const gv =
4052         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4053
4054     PERL_ARGS_ASSERT_INTUIT_METHOD;
4055
4056     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4057             return 0;
4058     if (cv && SvPOK(cv)) {
4059         const char *proto = CvPROTO(cv);
4060         if (proto) {
4061             while (*proto && (isSPACE(*proto) || *proto == ';'))
4062                 proto++;
4063             if (*proto == '*')
4064                 return 0;
4065         }
4066     }
4067
4068     if (*start == '$') {
4069         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4070             || isUPPER(*PL_tokenbuf))
4071             return 0;
4072         s = skipspace(s);
4073         PL_bufptr = start;
4074         PL_expect = XREF;
4075         return *s == '(' ? FUNCMETH : METHOD;
4076     }
4077
4078     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4079     /* start is the beginning of the possible filehandle/object,
4080      * and s is the end of it
4081      * tmpbuf is a copy of it (but with single quotes as double colons)
4082      */
4083
4084     if (!keyword(tmpbuf, len, 0)) {
4085         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4086             len -= 2;
4087             tmpbuf[len] = '\0';
4088             goto bare_package;
4089         }
4090         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4091         if (indirgv && GvCVu(indirgv))
4092             return 0;
4093         /* filehandle or package name makes it a method */
4094         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4095             s = skipspace(s);
4096             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4097                 return 0;       /* no assumptions -- "=>" quotes bareword */
4098       bare_package:
4099             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4100                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4101             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4102             PL_expect = XTERM;
4103             force_next(WORD);
4104             PL_bufptr = s;
4105             return *s == '(' ? FUNCMETH : METHOD;
4106         }
4107     }
4108     return 0;
4109 }
4110
4111 /* Encoded script support. filter_add() effectively inserts a
4112  * 'pre-processing' function into the current source input stream.
4113  * Note that the filter function only applies to the current source file
4114  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4115  *
4116  * The datasv parameter (which may be NULL) can be used to pass
4117  * private data to this instance of the filter. The filter function
4118  * can recover the SV using the FILTER_DATA macro and use it to
4119  * store private buffers and state information.
4120  *
4121  * The supplied datasv parameter is upgraded to a PVIO type
4122  * and the IoDIRP/IoANY field is used to store the function pointer,
4123  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4124  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4125  * private use must be set using malloc'd pointers.
4126  */
4127
4128 SV *
4129 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4130 {
4131     if (!funcp)
4132         return NULL;
4133
4134     if (!PL_parser)
4135         return NULL;
4136
4137     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4138         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4139
4140     if (!PL_rsfp_filters)
4141         PL_rsfp_filters = newAV();
4142     if (!datasv)
4143         datasv = newSV(0);
4144     SvUPGRADE(datasv, SVt_PVIO);
4145     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4146     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4147     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4148                           FPTR2DPTR(void *, IoANY(datasv)),
4149                           SvPV_nolen(datasv)));
4150     av_unshift(PL_rsfp_filters, 1);
4151     av_store(PL_rsfp_filters, 0, datasv) ;
4152     if (
4153         !PL_parser->filtered
4154      && PL_parser->lex_flags & LEX_EVALBYTES
4155      && PL_bufptr < PL_bufend
4156     ) {
4157         const char *s = PL_bufptr;
4158         while (s < PL_bufend) {
4159             if (*s == '\n') {
4160                 SV *linestr = PL_parser->linestr;
4161                 char *buf = SvPVX(linestr);
4162                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4163                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4164                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4165                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4166                 STRLEN const last_uni_pos =
4167                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4168                 STRLEN const last_lop_pos =
4169                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4170                 av_push(PL_rsfp_filters, linestr);
4171                 PL_parser->linestr =
4172                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4173                 buf = SvPVX(PL_parser->linestr);
4174                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4175                 PL_parser->bufptr = buf + bufptr_pos;
4176                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4177                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4178                 PL_parser->linestart = buf + linestart_pos;
4179                 if (PL_parser->last_uni)
4180                     PL_parser->last_uni = buf + last_uni_pos;
4181                 if (PL_parser->last_lop)
4182                     PL_parser->last_lop = buf + last_lop_pos;
4183                 SvLEN(linestr) = SvCUR(linestr);
4184                 SvCUR(linestr) = s-SvPVX(linestr);
4185                 PL_parser->filtered = 1;
4186                 break;
4187             }
4188             s++;
4189         }
4190     }
4191     return(datasv);
4192 }
4193
4194
4195 /* Delete most recently added instance of this filter function. */
4196 void
4197 Perl_filter_del(pTHX_ filter_t funcp)
4198 {
4199     SV *datasv;
4200
4201     PERL_ARGS_ASSERT_FILTER_DEL;
4202
4203 #ifdef DEBUGGING
4204     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4205                           FPTR2DPTR(void*, funcp)));
4206 #endif
4207     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4208         return;
4209     /* if filter is on top of stack (usual case) just pop it off */
4210     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4211     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4212         sv_free(av_pop(PL_rsfp_filters));
4213
4214         return;
4215     }
4216     /* we need to search for the correct entry and clear it     */
4217     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4218 }
4219
4220
4221 /* Invoke the idxth filter function for the current rsfp.        */
4222 /* maxlen 0 = read one text line */
4223 I32
4224 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4225 {
4226     filter_t funcp;
4227     SV *datasv = NULL;
4228     /* This API is bad. It should have been using unsigned int for maxlen.
4229        Not sure if we want to change the API, but if not we should sanity
4230        check the value here.  */
4231     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4232
4233     PERL_ARGS_ASSERT_FILTER_READ;
4234
4235     if (!PL_parser || !PL_rsfp_filters)
4236         return -1;
4237     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4238         /* Provide a default input filter to make life easy.    */
4239         /* Note that we append to the line. This is handy.      */
4240         DEBUG_P(PerlIO_printf(Perl_debug_log,
4241                               "filter_read %d: from rsfp\n", idx));
4242         if (correct_length) {
4243             /* Want a block */
4244             int len ;
4245             const int old_len = SvCUR(buf_sv);
4246
4247             /* ensure buf_sv is large enough */
4248             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4249             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4250                                    correct_length)) <= 0) {
4251                 if (PerlIO_error(PL_rsfp))
4252                     return -1;          /* error */
4253                 else
4254                     return 0 ;          /* end of file */
4255             }
4256             SvCUR_set(buf_sv, old_len + len) ;
4257             SvPVX(buf_sv)[old_len + len] = '\0';
4258         } else {
4259             /* Want a line */
4260             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4261                 if (PerlIO_error(PL_rsfp))
4262                     return -1;          /* error */
4263                 else
4264                     return 0 ;          /* end of file */
4265             }
4266         }
4267         return SvCUR(buf_sv);
4268     }
4269     /* Skip this filter slot if filter has been deleted */
4270     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4271         DEBUG_P(PerlIO_printf(Perl_debug_log,
4272                               "filter_read %d: skipped (filter deleted)\n",
4273                               idx));
4274         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4275     }
4276     if (SvTYPE(datasv) != SVt_PVIO) {
4277         if (correct_length) {
4278             /* Want a block */
4279             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4280             if (!remainder) return 0; /* eof */
4281             if (correct_length > remainder) correct_length = remainder;
4282             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4283             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4284         } else {
4285             /* Want a line */
4286             const char *s = SvEND(datasv);
4287             const char *send = SvPVX(datasv) + SvLEN(datasv);
4288             while (s < send) {
4289                 if (*s == '\n') {
4290                     s++;
4291                     break;
4292                 }
4293                 s++;
4294             }
4295             if (s == send) return 0; /* eof */
4296             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4297             SvCUR_set(datasv, s-SvPVX(datasv));
4298         }
4299         return SvCUR(buf_sv);
4300     }
4301     /* Get function pointer hidden within datasv        */
4302     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4303     DEBUG_P(PerlIO_printf(Perl_debug_log,
4304                           "filter_read %d: via function %p (%s)\n",
4305                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4306     /* Call function. The function is expected to       */
4307     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4308     /* Return: <0:error, =0:eof, >0:not eof             */
4309     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4310 }
4311
4312 STATIC char *
4313 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4314 {
4315     PERL_ARGS_ASSERT_FILTER_GETS;
4316
4317 #ifdef PERL_CR_FILTER
4318     if (!PL_rsfp_filters) {
4319         filter_add(S_cr_textfilter,NULL);
4320     }
4321 #endif
4322     if (PL_rsfp_filters) {
4323         if (!append)
4324             SvCUR_set(sv, 0);   /* start with empty line        */
4325         if (FILTER_READ(0, sv, 0) > 0)
4326             return ( SvPVX(sv) ) ;
4327         else
4328             return NULL ;
4329     }
4330     else
4331         return (sv_gets(sv, PL_rsfp, append));
4332 }
4333
4334 STATIC HV *
4335 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4336 {
4337     GV *gv;
4338
4339     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4340
4341     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4342         return PL_curstash;
4343
4344     if (len > 2
4345         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4346         && (gv = gv_fetchpvn_flags(pkgname,
4347                                    len,
4348                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4349     {
4350         return GvHV(gv);                        /* Foo:: */
4351     }
4352
4353     /* use constant CLASS => 'MyClass' */
4354     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4355     if (gv && GvCV(gv)) {
4356         SV * const sv = cv_const_sv(GvCV(gv));
4357         if (sv)
4358             return gv_stashsv(sv, 0);
4359     }
4360
4361     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4362 }
4363
4364
4365 STATIC char *
4366 S_tokenize_use(pTHX_ int is_use, char *s) {
4367     PERL_ARGS_ASSERT_TOKENIZE_USE;
4368
4369     if (PL_expect != XSTATE)
4370         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4371                     is_use ? "use" : "no"));
4372     PL_expect = XTERM;
4373     s = skipspace(s);
4374     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4375         s = force_version(s, TRUE);
4376         if (*s == ';' || *s == '}'
4377                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4378             NEXTVAL_NEXTTOKE.opval = NULL;
4379             force_next(WORD);
4380         }
4381         else if (*s == 'v') {
4382             s = force_word(s,WORD,FALSE,TRUE);
4383             s = force_version(s, FALSE);
4384         }
4385     }
4386     else {
4387         s = force_word(s,WORD,FALSE,TRUE);
4388         s = force_version(s, FALSE);
4389     }
4390     pl_yylval.ival = is_use;
4391     return s;
4392 }
4393 #ifdef DEBUGGING
4394     static const char* const exp_name[] =
4395         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4396           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4397           "TERMORDORDOR"
4398         };
4399 #endif
4400
4401 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4402 STATIC bool
4403 S_word_takes_any_delimiter(char *p, STRLEN len)
4404 {
4405     return (len == 1 && strchr("msyq", p[0]))
4406             || (len == 2
4407                 && ((p[0] == 't' && p[1] == 'r')
4408                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4409 }
4410
4411 static void
4412 S_check_scalar_slice(pTHX_ char *s)
4413 {
4414     s++;
4415     while (*s == ' ' || *s == '\t') s++;
4416     if (*s == 'q' && s[1] == 'w'
4417      && !isWORDCHAR_lazy_if(s+2,UTF))
4418         return;
4419     while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4420         s += UTF ? UTF8SKIP(s) : 1;
4421     if (*s == '}' || *s == ']')
4422         pl_yylval.ival = OPpSLICEWARNING;
4423 }
4424
4425 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4426 static void
4427 S_lex_token_boundary(pTHX)
4428 {
4429     PL_oldoldbufptr = PL_oldbufptr;
4430     PL_oldbufptr = PL_bufptr;
4431 }
4432
4433 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4434 static char *
4435 S_vcs_conflict_marker(pTHX_ char *s)
4436 {
4437     lex_token_boundary();
4438     PL_bufptr = s;
4439     yyerror("Version control conflict marker");
4440     while (s < PL_bufend && *s != '\n')
4441         s++;
4442     return s;
4443 }
4444
4445 /*
4446   yylex
4447
4448   Works out what to call the token just pulled out of the input
4449   stream.  The yacc parser takes care of taking the ops we return and
4450   stitching them into a tree.
4451
4452   Returns:
4453     The type of the next token
4454
4455   Structure:
4456       Check if we have already built the token; if so, use it.
4457       Switch based on the current state:
4458           - if we have a case modifier in a string, deal with that
4459           - handle other cases of interpolation inside a string
4460           - scan the next line if we are inside a format
4461       In the normal state, switch on the next character:
4462           - default:
4463             if alphabetic, go to key lookup
4464             unrecognized character - croak
4465           - 0/4/26: handle end-of-line or EOF
4466           - cases for whitespace
4467           - \n and #: handle comments and line numbers
4468           - various operators, brackets and sigils
4469           - numbers
4470           - quotes
4471           - 'v': vstrings (or go to key lookup)
4472           - 'x' repetition operator (or go to key lookup)
4473           - other ASCII alphanumerics (key lookup begins here):
4474               word before => ?
4475               keyword plugin
4476               scan built-in keyword (but do nothing with it yet)
4477               check for statement label
4478               check for lexical subs
4479                   goto just_a_word if there is one
4480               see whether built-in keyword is overridden
4481               switch on keyword number:
4482                   - default: just_a_word:
4483                       not a built-in keyword; handle bareword lookup
4484                       disambiguate between method and sub call
4485                       fall back to bareword
4486                   - cases for built-in keywords
4487 */
4488
4489
4490 int
4491 Perl_yylex(pTHX)
4492 {
4493     dVAR;
4494     char *s = PL_bufptr;
4495     char *d;
4496     STRLEN len;
4497     bool bof = FALSE;
4498     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4499     U8 formbrack = 0;
4500     U32 fake_eof = 0;
4501
4502     /* orig_keyword, gvp, and gv are initialized here because
4503      * jump to the label just_a_word_zero can bypass their
4504      * initialization later. */
4505     I32 orig_keyword = 0;
4506     GV *gv = NULL;
4507     GV **gvp = NULL;
4508
4509     DEBUG_T( {
4510         SV* tmp = newSVpvs("");
4511         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4512             (IV)CopLINE(PL_curcop),
4513             lex_state_names[PL_lex_state],
4514             exp_name[PL_expect],
4515             pv_display(tmp, s, strlen(s), 0, 60));
4516         SvREFCNT_dec(tmp);
4517     } );
4518
4519     /* when we've already built the next token, just pull it out of the queue */
4520     if (PL_nexttoke) {
4521         PL_nexttoke--;
4522         pl_yylval = PL_nextval[PL_nexttoke];
4523         {
4524             I32 next_type;
4525             next_type = PL_nexttype[PL_nexttoke];
4526             if (next_type & (7<<24)) {
4527                 if (next_type & (1<<24)) {
4528                     if (PL_lex_brackets > 100)
4529                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4530                     PL_lex_brackstack[PL_lex_brackets++] =
4531                         (char) ((next_type >> 16) & 0xff);
4532                 }
4533                 if (next_type & (2<<24))
4534                     PL_lex_allbrackets++;
4535                 if (next_type & (4<<24))
4536                     PL_lex_allbrackets--;
4537                 next_type &= 0xffff;
4538             }
4539             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4540         }
4541     }
4542
4543     switch (PL_lex_state) {
4544     case LEX_NORMAL:
4545     case LEX_INTERPNORMAL:
4546         break;
4547
4548     /* interpolated case modifiers like \L \U, including \Q and \E.
4549        when we get here, PL_bufptr is at the \
4550     */
4551     case LEX_INTERPCASEMOD:
4552 #ifdef DEBUGGING
4553         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4554             Perl_croak(aTHX_
4555                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4556                        PL_bufptr, PL_bufend, *PL_bufptr);
4557 #endif
4558         /* handle \E or end of string */
4559         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4560             /* if at a \E */
4561             if (PL_lex_casemods) {
4562                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4563                 PL_lex_casestack[PL_lex_casemods] = '\0';
4564
4565                 if (PL_bufptr != PL_bufend
4566                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4567                         || oldmod == 'F')) {
4568                     PL_bufptr += 2;
4569                     PL_lex_state = LEX_INTERPCONCAT;
4570                 }
4571                 PL_lex_allbrackets--;
4572                 return REPORT(')');
4573             }
4574             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4575                /* Got an unpaired \E */
4576                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4577                         "Useless use of \\E");
4578             }
4579             if (PL_bufptr != PL_bufend)
4580                 PL_bufptr += 2;
4581             PL_lex_state = LEX_INTERPCONCAT;
4582             return yylex();
4583         }
4584         else {
4585             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4586               "### Saw case modifier\n"); });
4587             s = PL_bufptr + 1;
4588             if (s[1] == '\\' && s[2] == 'E') {
4589                 PL_bufptr = s + 3;
4590                 PL_lex_state = LEX_INTERPCONCAT;
4591                 return yylex();
4592             }
4593             else {
4594                 I32 tmp;
4595                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4596                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4597                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4598                     && (strchr(PL_lex_casestack, 'L')
4599                         || strchr(PL_lex_casestack, 'U')
4600                         || strchr(PL_lex_casestack, 'F')))
4601                 {
4602                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4603                     PL_lex_allbrackets--;
4604                     return REPORT(')');
4605                 }
4606                 if (PL_lex_casemods > 10)
4607                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4608                 PL_lex_casestack[PL_lex_casemods++] = *s;
4609                 PL_lex_casestack[PL_lex_casemods] = '\0';
4610                 PL_lex_state = LEX_INTERPCONCAT;
4611                 NEXTVAL_NEXTTOKE.ival = 0;
4612                 force_next((2<<24)|'(');
4613                 if (*s == 'l')
4614                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4615                 else if (*s == 'u')
4616                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4617                 else if (*s == 'L')
4618                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4619                 else if (*s == 'U')
4620                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4621                 else if (*s == 'Q')
4622                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4623                 else if (*s == 'F')
4624                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4625                 else
4626                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4627                 PL_bufptr = s + 1;
4628             }
4629             force_next(FUNC);
4630             if (PL_lex_starts) {
4631                 s = PL_bufptr;
4632                 PL_lex_starts = 0;
4633                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4634                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4635                     TOKEN(',');
4636                 else
4637                     AopNOASSIGN(OP_CONCAT);
4638             }
4639             else
4640                 return yylex();
4641         }
4642
4643     case LEX_INTERPPUSH:
4644         return REPORT(sublex_push());
4645
4646     case LEX_INTERPSTART:
4647         if (PL_bufptr == PL_bufend)
4648             return REPORT(sublex_done());
4649         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4650               "### Interpolated variable\n"); });
4651         PL_expect = XTERM;
4652         /* for /@a/, we leave the joining for the regex engine to do
4653          * (unless we're within \Q etc) */
4654         PL_lex_dojoin = (*PL_bufptr == '@'
4655                             && (!PL_lex_inpat || PL_lex_casemods));
4656         PL_lex_state = LEX_INTERPNORMAL;
4657         if (PL_lex_dojoin) {
4658             NEXTVAL_NEXTTOKE.ival = 0;
4659             force_next(',');
4660             force_ident("\"", '$');
4661             NEXTVAL_NEXTTOKE.ival = 0;
4662             force_next('$');
4663             NEXTVAL_NEXTTOKE.ival = 0;
4664             force_next((2<<24)|'(');
4665             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4666             force_next(FUNC);
4667         }
4668         /* Convert (?{...}) and friends to 'do {...}' */
4669         if (PL_lex_inpat && *PL_bufptr == '(') {
4670             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4671             PL_bufptr += 2;
4672             if (*PL_bufptr != '{')
4673                 PL_bufptr++;
4674             PL_expect = XTERMBLOCK;
4675             force_next(DO);
4676         }
4677
4678         if (PL_lex_starts++) {
4679             s = PL_bufptr;
4680             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4681             if (!PL_lex_casemods && PL_lex_inpat)
4682                 TOKEN(',');
4683             else
4684                 AopNOASSIGN(OP_CONCAT);
4685         }
4686         return yylex();
4687
4688     case LEX_INTERPENDMAYBE:
4689         if (intuit_more(PL_bufptr)) {
4690             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4691             break;
4692         }
4693         /* FALLTHROUGH */
4694
4695     case LEX_INTERPEND:
4696         if (PL_lex_dojoin) {
4697             const U8 dojoin_was = PL_lex_dojoin;
4698             PL_lex_dojoin = FALSE;
4699             PL_lex_state = LEX_INTERPCONCAT;
4700             PL_lex_allbrackets--;
4701             return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4702         }
4703         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4704             && SvEVALED(PL_lex_repl))
4705         {
4706             if (PL_bufptr != PL_bufend)
4707                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4708             PL_lex_repl = NULL;
4709         }
4710         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4711            re_eval_str.  If the here-doc body’s length equals the previous
4712            value of re_eval_start, re_eval_start will now be null.  So
4713            check re_eval_str as well. */
4714         if (PL_parser->lex_shared->re_eval_start
4715          || PL_parser->lex_shared->re_eval_str) {
4716             SV *sv;
4717             if (*PL_bufptr != ')')
4718                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4719             PL_bufptr++;
4720             /* having compiled a (?{..}) expression, return the original
4721              * text too, as a const */
4722             if (PL_parser->lex_shared->re_eval_str) {
4723                 sv = PL_parser->lex_shared->re_eval_str;
4724                 PL_parser->lex_shared->re_eval_str = NULL;
4725                 SvCUR_set(sv,
4726                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4727                 SvPV_shrink_to_cur(sv);
4728             }
4729             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4730                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4731             NEXTVAL_NEXTTOKE.opval =
4732                     (OP*)newSVOP(OP_CONST, 0,
4733                                  sv);
4734             force_next(THING);
4735             PL_parser->lex_shared->re_eval_start = NULL;
4736             PL_expect = XTERM;
4737             return REPORT(',');
4738         }
4739
4740         /* FALLTHROUGH */
4741     case LEX_INTERPCONCAT:
4742 #ifdef DEBUGGING
4743         if (PL_lex_brackets)
4744             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4745                        (long) PL_lex_brackets);
4746 #endif
4747         if (PL_bufptr == PL_bufend)
4748             return REPORT(sublex_done());
4749
4750         /* m'foo' still needs to be parsed for possible (?{...}) */
4751         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4752             SV *sv = newSVsv(PL_linestr);
4753             sv = tokeq(sv);
4754             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4755             s = PL_bufend;
4756         }
4757         else {
4758             s = scan_const(PL_bufptr);
4759             if (*s == '\\')
4760                 PL_lex_state = LEX_INTERPCASEMOD;
4761             else
4762                 PL_lex_state = LEX_INTERPSTART;
4763         }
4764
4765         if (s != PL_bufptr) {
4766             NEXTVAL_NEXTTOKE = pl_yylval;
4767             PL_expect = XTERM;
4768             force_next(THING);
4769             if (PL_lex_starts++) {
4770                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4771                 if (!PL_lex_casemods && PL_lex_inpat)
4772                     TOKEN(',');
4773                 else
4774                     AopNOASSIGN(OP_CONCAT);
4775             }
4776             else {
4777                 PL_bufptr = s;
4778                 return yylex();
4779             }
4780         }
4781
4782         return yylex();
4783     case LEX_FORMLINE:
4784         s = scan_formline(PL_bufptr);
4785         if (!PL_lex_formbrack)
4786         {
4787             formbrack = 1;
4788             goto rightbracket;
4789         }
4790         PL_bufptr = s;
4791         return yylex();
4792     }
4793
4794     /* We really do *not* want PL_linestr ever becoming a COW. */
4795     assert (!SvIsCOW(PL_linestr));
4796     s = PL_bufptr;
4797     PL_oldoldbufptr = PL_oldbufptr;
4798     PL_oldbufptr = s;
4799     PL_parser->saw_infix_sigil = 0;
4800
4801   retry:
4802     switch (*s) {
4803     default:
4804         if (UTF) {
4805             if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4806                 ENTER;
4807                 SAVESPTR(PL_warnhook);
4808                 PL_warnhook = PERL_WARNHOOK_FATAL;
4809                 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4810                 LEAVE;
4811             }
4812             if (isIDFIRST_utf8((U8*)s)) {
4813                 goto keylookup;
4814             }
4815         }
4816         else if (isALNUMC(*s)) {
4817             goto keylookup;
4818         }
4819     {
4820         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4821         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4822                                                     UTF8SKIP(s),
4823                                                     SVs_TEMP | SVf_UTF8),
4824                                             10, UNI_DISPLAY_ISPRINT)
4825                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4826         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4827         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4828             d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4829         } else {
4830             d = PL_linestart;
4831         }
4832         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4833                           UTF8fARG(UTF, (s - d), d),
4834                          (int) len + 1);
4835     }
4836     case 4:
4837     case 26:
4838         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4839     case 0:
4840         if ((!PL_rsfp || PL_lex_inwhat)
4841          && (!PL_parser->filtered || s+1 < PL_bufend)) {
4842             PL_last_uni = 0;
4843             PL_last_lop = 0;
4844             if (PL_lex_brackets
4845                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4846             {
4847                 yyerror((const char *)
4848                         (PL_lex_formbrack
4849                          ? "Format not terminated"
4850                          : "Missing right curly or square bracket"));
4851             }
4852             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4853                         "### Tokener got EOF\n");
4854             } );
4855             TOKEN(0);
4856         }
4857         if (s++ < PL_bufend)
4858             goto retry;                 /* ignore stray nulls */
4859         PL_last_uni = 0;
4860         PL_last_lop = 0;
4861         if (!PL_in_eval && !PL_preambled) {
4862             PL_preambled = TRUE;
4863             if (PL_perldb) {
4864                 /* Generate a string of Perl code to load the debugger.
4865                  * If PERL5DB is set, it will return the contents of that,
4866                  * otherwise a compile-time require of perl5db.pl.  */
4867
4868                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4869
4870                 if (pdb) {
4871                     sv_setpv(PL_linestr, pdb);
4872                     sv_catpvs(PL_linestr,";");
4873                 } else {
4874                     SETERRNO(0,SS_NORMAL);
4875                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4876                 }
4877                 PL_parser->preambling = CopLINE(PL_curcop);
4878             } else
4879                 sv_setpvs(PL_linestr,"");
4880             if (PL_preambleav) {
4881                 SV **svp = AvARRAY(PL_preambleav);
4882                 SV **const end = svp + AvFILLp(PL_preambleav);
4883                 while(svp <= end) {
4884                     sv_catsv(PL_linestr, *svp);
4885                     ++svp;
4886                     sv_catpvs(PL_linestr, ";");
4887                 }
4888                 sv_free(MUTABLE_SV(PL_preambleav));
4889                 PL_preambleav = NULL;
4890             }
4891             if (PL_minus_E)
4892                 sv_catpvs(PL_linestr,
4893                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4894             if (PL_minus_n || PL_minus_p) {
4895                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4896                 if (PL_minus_l)
4897                     sv_catpvs(PL_linestr,"chomp;");
4898                 if (PL_minus_a) {
4899                     if (PL_minus_F) {
4900                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4901                              || *PL_splitstr == '"')
4902                               && strchr(PL_splitstr + 1, *PL_splitstr))
4903                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4904                         else {
4905                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4906                                bytes can be used as quoting characters.  :-) */
4907                             const char *splits = PL_splitstr;
4908                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4909                             do {
4910                                 /* Need to \ \s  */
4911                                 if (*splits == '\\')
4912                                     sv_catpvn(PL_linestr, splits, 1);
4913                                 sv_catpvn(PL_linestr, splits, 1);
4914                             } while (*splits++);
4915                             /* This loop will embed the trailing NUL of
4916                                PL_linestr as the last thing it does before
4917                                terminating.  */
4918                             sv_catpvs(PL_linestr, ");");
4919                         }
4920                     }
4921                     else
4922                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4923                 }
4924             }
4925             sv_catpvs(PL_linestr, "\n");
4926             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4927             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4928             PL_last_lop = PL_last_uni = NULL;
4929             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
4930                 update_debugger_info(PL_linestr, NULL, 0);
4931             goto retry;
4932         }
4933         do {
4934             fake_eof = 0;
4935             bof = PL_rsfp ? TRUE : FALSE;
4936             if (0) {
4937               fake_eof:
4938                 fake_eof = LEX_FAKE_EOF;
4939             }
4940             PL_bufptr = PL_bufend;
4941             COPLINE_INC_WITH_HERELINES;
4942             if (!lex_next_chunk(fake_eof)) {
4943                 CopLINE_dec(PL_curcop);
4944                 s = PL_bufptr;
4945                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4946             }
4947             CopLINE_dec(PL_curcop);
4948             s = PL_bufptr;
4949             /* If it looks like the start of a BOM or raw UTF-16,
4950              * check if it in fact is. */
4951             if (bof && PL_rsfp
4952                 && (*s == 0
4953                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
4954                         || *(U8*)s >= 0xFE
4955                         || s[1] == 0))
4956             {
4957                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4958                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4959 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4960                 /* offset may include swallowed CR */
4961                 if (!bof)
4962                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4963 #endif
4964                 if (bof) {
4965                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4966                     s = swallow_bom((U8*)s);
4967                 }
4968             }
4969             if (PL_parser->in_pod) {
4970                 /* Incest with pod. */
4971                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4972                     sv_setpvs(PL_linestr, "");
4973                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4974                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4975                     PL_last_lop = PL_last_uni = NULL;
4976                     PL_parser->in_pod = 0;
4977                 }
4978             }
4979             if (PL_rsfp || PL_parser->filtered)
4980                 incline(s);
4981         } while (PL_parser->in_pod);
4982         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4983         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4984         PL_last_lop = PL_last_uni = NULL;
4985         if (CopLINE(PL_curcop) == 1) {
4986             while (s < PL_bufend && isSPACE(*s))
4987                 s++;
4988             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4989                 s++;
4990             d = NULL;
4991             if (!PL_in_eval) {
4992                 if (*s == '#' && *(s+1) == '!')
4993                     d = s + 2;
4994 #ifdef ALTERNATE_SHEBANG
4995                 else {
4996                     static char const as[] = ALTERNATE_SHEBANG;
4997                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4998                         d = s + (sizeof(as) - 1);
4999                 }
5000 #endif /* ALTERNATE_SHEBANG */
5001             }
5002             if (d) {
5003                 char *ipath;
5004                 char *ipathend;
5005
5006                 while (isSPACE(*d))
5007                     d++;
5008                 ipath = d;
5009                 while (*d && !isSPACE(*d))
5010                     d++;
5011                 ipathend = d;
5012
5013 #ifdef ARG_ZERO_IS_SCRIPT
5014                 if (ipathend > ipath) {
5015                     /*
5016                      * HP-UX (at least) sets argv[0] to the script name,
5017                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5018                      * at least, set argv[0] to the basename of the Perl
5019                      * interpreter. So, having found "#!", we'll set it right.
5020                      */
5021                     SV* copfilesv = CopFILESV(PL_curcop);
5022                     if (copfilesv) {
5023                         SV * const x =
5024                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5025                                              SVt_PV)); /* $^X */
5026                         assert(SvPOK(x) || SvGMAGICAL(x));
5027                         if (sv_eq(x, copfilesv)) {
5028                             sv_setpvn(x, ipath, ipathend - ipath);
5029                             SvSETMAGIC(x);
5030                         }
5031                         else {
5032                             STRLEN blen;
5033                             STRLEN llen;
5034                             const char *bstart = SvPV_const(copfilesv, blen);
5035                             const char * const lstart = SvPV_const(x, llen);
5036                             if (llen < blen) {
5037                                 bstart += blen - llen;
5038                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5039                                     sv_setpvn(x, ipath, ipathend - ipath);
5040                                     SvSETMAGIC(x);
5041                                 }
5042                             }
5043                         }
5044                     }
5045                     else {
5046                         /* Anything to do if no copfilesv? */
5047                     }
5048                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5049                 }
5050 #endif /* ARG_ZERO_IS_SCRIPT */
5051
5052                 /*
5053                  * Look for options.
5054                  */
5055                 d = instr(s,"perl -");
5056                 if (!d) {
5057                     d = instr(s,"perl");
5058                     if (d && d[4] == '6')
5059                         d = NULL;
5060 #if defined(DOSISH)
5061                     /* avoid getting into infinite loops when shebang
5062                      * line contains "Perl" rather than "perl" */
5063                     if (!d) {
5064                         for (d = ipathend-4; d >= ipath; --d) {
5065                             if (isALPHA_FOLD_EQ(*d, 'p')
5066                                 && !ibcmp(d, "perl", 4))
5067                             {
5068                                 break;
5069                             }
5070                         }
5071                         if (d < ipath)
5072                             d = NULL;
5073                     }
5074 #endif
5075                 }
5076 #ifdef ALTERNATE_SHEBANG
5077                 /*
5078                  * If the ALTERNATE_SHEBANG on this system starts with a
5079                  * character that can be part of a Perl expression, then if
5080                  * we see it but not "perl", we're probably looking at the
5081                  * start of Perl code, not a request to hand off to some
5082                  * other interpreter.  Similarly, if "perl" is there, but
5083                  * not in the first 'word' of the line, we assume the line
5084                  * contains the start of the Perl program.
5085                  */
5086                 if (d && *s != '#') {
5087                     const char *c = ipath;
5088                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5089                         c++;
5090                     if (c < d)
5091                         d = NULL;       /* "perl" not in first word; ignore */
5092                     else
5093                         *s = '#';       /* Don't try to parse shebang line */
5094                 }
5095 #endif /* ALTERNATE_SHEBANG */
5096                 if (!d
5097                     && *s == '#'
5098                     && ipathend > ipath
5099                     && !PL_minus_c
5100                     && !instr(s,"indir")
5101                     && instr(PL_origargv[0],"perl"))
5102                 {
5103                     dVAR;
5104                     char **newargv;
5105
5106                     *ipathend = '\0';
5107                     s = ipathend + 1;
5108                     while (s < PL_bufend && isSPACE(*s))
5109                         s++;
5110                     if (s < PL_bufend) {
5111                         Newx(newargv,PL_origargc+3,char*);
5112                         newargv[1] = s;
5113                         while (s < PL_bufend && !isSPACE(*s))
5114                             s++;
5115                         *s = '\0';
5116                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5117                     }
5118                     else
5119                         newargv = PL_origargv;
5120                     newargv[0] = ipath;
5121                     PERL_FPU_PRE_EXEC
5122                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5123                     PERL_FPU_POST_EXEC
5124                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5125                 }
5126                 if (d) {
5127                     while (*d && !isSPACE(*d))
5128                         d++;
5129                     while (SPACE_OR_TAB(*d))
5130                         d++;
5131
5132                     if (*d++ == '-') {
5133                         const bool switches_done = PL_doswitches;
5134                         const U32 oldpdb = PL_perldb;
5135                         const bool oldn = PL_minus_n;
5136                         const bool oldp = PL_minus_p;
5137                         const char *d1 = d;
5138
5139                         do {
5140                             bool baduni = FALSE;
5141                             if (*d1 == 'C') {
5142                                 const char *d2 = d1 + 1;
5143                                 if (parse_unicode_opts((const char **)&d2)
5144                                     != PL_unicode)
5145                                     baduni = TRUE;
5146                             }
5147                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5148                                 const char * const m = d1;
5149                                 while (*d1 && !isSPACE(*d1))
5150                                     d1++;
5151                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5152                                       (int)(d1 - m), m);
5153                             }
5154                             d1 = moreswitches(d1);
5155                         } while (d1);
5156                         if (PL_doswitches && !switches_done) {
5157                             int argc = PL_origargc;
5158                             char **argv = PL_origargv;
5159                             do {
5160                                 argc--,argv++;
5161                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5162                             init_argv_symbols(argc,argv);
5163                         }
5164                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5165                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5166                               /* if we have already added "LINE: while (<>) {",
5167                                  we must not do it again */
5168                         {
5169                             sv_setpvs(PL_linestr, "");
5170                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5171                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5172                             PL_last_lop = PL_last_uni = NULL;
5173                             PL_preambled = FALSE;
5174                             if (PERLDB_LINE_OR_SAVESRC)
5175                                 (void)gv_fetchfile(PL_origfilename);
5176                             goto retry;
5177                         }
5178                     }
5179                 }
5180             }
5181         }
5182         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5183             PL_lex_state = LEX_FORMLINE;
5184             force_next(FORMRBRACK);
5185             TOKEN(';');
5186         }
5187         goto retry;
5188     case '\r':
5189 #ifdef PERL_STRICT_CR
5190         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5191         Perl_croak(aTHX_
5192       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5193 #endif
5194     case ' ': case '\t': case '\f': case '\v':
5195         s++;
5196         goto retry;
5197     case '#':
5198     case '\n':
5199         if (PL_lex_state != LEX_NORMAL
5200             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5201         {
5202             const bool in_comment = *s == '#';
5203             if (*s == '#' && s == PL_linestart && PL_in_eval
5204              && !PL_rsfp && !PL_parser->filtered) {
5205                 /* handle eval qq[#line 1 "foo"\n ...] */
5206                 CopLINE_dec(PL_curcop);
5207                 incline(s);
5208             }
5209             d = s;
5210             while (d < PL_bufend && *d != '\n')
5211                 d++;
5212             if (d < PL_bufend)
5213                 d++;
5214             else if (d > PL_bufend)
5215                 /* Found by Ilya: feed random input to Perl. */
5216                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5217                            d, PL_bufend);
5218             s = d;
5219             if (in_comment && d == PL_bufend
5220                 && PL_lex_state == LEX_INTERPNORMAL
5221                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5222                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5223             else
5224                 incline(s);
5225             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5226                 PL_lex_state = LEX_FORMLINE;
5227                 force_next(FORMRBRACK);
5228                 TOKEN(';');
5229             }
5230         }
5231         else {
5232             while (s < PL_bufend && *s != '\n')
5233                 s++;
5234             if (s < PL_bufend)
5235                 {
5236                     s++;
5237                     if (s < PL_bufend)
5238                         incline(s);
5239                 }
5240             else if (s > PL_bufend)
5241                 /* Found by Ilya: feed random input to Perl. */
5242                 Perl_croak(aTHX_ "panic: input overflow");
5243         }
5244         goto retry;
5245     case '-':
5246         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5247             I32 ftst = 0;
5248             char tmp;
5249
5250             s++;
5251             PL_bufptr = s;
5252             tmp = *s++;
5253
5254             while (s < PL_bufend && SPACE_OR_TAB(*s))
5255                 s++;
5256
5257             if (strnEQ(s,"=>",2)) {
5258                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5259                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5260                 OPERATOR('-');          /* unary minus */
5261             }
5262             switch (tmp) {
5263             case 'r': ftst = OP_FTEREAD;        break;
5264             case 'w': ftst = OP_FTEWRITE;       break;
5265             case 'x': ftst = OP_FTEEXEC;        break;
5266             case 'o': ftst = OP_FTEOWNED;       break;
5267             case 'R': ftst = OP_FTRREAD;        break;
5268             case 'W': ftst = OP_FTRWRITE;       break;
5269             case 'X': ftst = OP_FTREXEC;        break;
5270             case 'O': ftst = OP_FTROWNED;       break;
5271             case 'e': ftst = OP_FTIS;           break;
5272             case 'z': ftst = OP_FTZERO;         break;
5273             case 's': ftst = OP_FTSIZE;         break;
5274             case 'f': ftst = OP_FTFILE;         break;
5275             case 'd': ftst = OP_FTDIR;          break;
5276             case 'l': ftst = OP_FTLINK;         break;
5277             case 'p': ftst = OP_FTPIPE;         break;
5278             case 'S': ftst = OP_FTSOCK;         break;
5279             case 'u': ftst = OP_FTSUID;         break;
5280             case 'g': ftst = OP_FTSGID;         break;
5281             case 'k': ftst = OP_FTSVTX;         break;
5282             case 'b': ftst = OP_FTBLK;          break;
5283             case 'c': ftst = OP_FTCHR;          break;
5284             case 't': ftst = OP_FTTTY;          break;
5285             case 'T': ftst = OP_FTTEXT;         break;
5286             case 'B': ftst = OP_FTBINARY;       break;
5287             case 'M': case 'A': case 'C':
5288                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5289                 switch (tmp) {
5290                 case 'M': ftst = OP_FTMTIME;    break;
5291                 case 'A': ftst = OP_FTATIME;    break;
5292                 case 'C': ftst = OP_FTCTIME;    break;
5293                 default:                        break;
5294                 }
5295                 break;
5296             default:
5297                 break;
5298             }
5299             if (ftst) {
5300                 PL_last_uni = PL_oldbufptr;
5301                 PL_last_lop_op = (OPCODE)ftst;
5302                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5303                         "### Saw file test %c\n", (int)tmp);
5304                 } );
5305                 FTST(ftst);
5306             }
5307             else {
5308                 /* Assume it was a minus followed by a one-letter named
5309                  * subroutine call (or a -bareword), then. */
5310                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5311                         "### '-%c' looked like a file test but was not\n",
5312                         (int) tmp);
5313                 } );
5314                 s = --PL_bufptr;
5315             }
5316         }
5317         {
5318             const char tmp = *s++;
5319             if (*s == tmp) {
5320                 s++;
5321                 if (PL_expect == XOPERATOR)
5322                     TERM(POSTDEC);
5323                 else
5324                     OPERATOR(PREDEC);
5325             }
5326             else if (*s == '>') {
5327                 s++;
5328                 s = skipspace(s);
5329                 if (((*s == '$' || *s == '&') && s[1] == '*')
5330                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5331                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5332                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5333                  )
5334                 {
5335                     PL_expect = XPOSTDEREF;
5336                     TOKEN(ARROW);
5337                 }
5338                 if (isIDFIRST_lazy_if(s,UTF)) {
5339                     s = force_word(s,METHOD,FALSE,TRUE);
5340                     TOKEN(ARROW);
5341                 }
5342                 else if (*s == '$')
5343                     OPERATOR(ARROW);
5344                 else
5345                     TERM(ARROW);
5346             }
5347             if (PL_expect == XOPERATOR) {
5348                 if (*s == '='
5349                     && !PL_lex_allbrackets
5350                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5351                 {
5352                     s--;
5353                     TOKEN(0);
5354                 }
5355                 Aop(OP_SUBTRACT);
5356             }
5357             else {
5358                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5359                     check_uni();
5360                 OPERATOR('-');          /* unary minus */
5361             }
5362         }
5363
5364     case '+':
5365         {
5366             const char tmp = *s++;
5367             if (*s == tmp) {
5368                 s++;
5369                 if (PL_expect == XOPERATOR)
5370                     TERM(POSTINC);
5371                 else
5372                     OPERATOR(PREINC);
5373             }
5374             if (PL_expect == XOPERATOR) {
5375                 if (*s == '='
5376                     && !PL_lex_allbrackets
5377                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5378                 {
5379                     s--;
5380                     TOKEN(0);
5381                 }
5382                 Aop(OP_ADD);
5383             }
5384             else {
5385                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5386                     check_uni();
5387                 OPERATOR('+');
5388             }
5389         }
5390
5391     case '*':
5392         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5393         if (PL_expect != XOPERATOR) {
5394             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5395             PL_expect = XOPERATOR;
5396             force_ident(PL_tokenbuf, '*');
5397             if (!*PL_tokenbuf)
5398                 PREREF('*');
5399             TERM('*');
5400         }
5401         s++;
5402         if (*s == '*') {
5403             s++;
5404             if (*s == '=' && !PL_lex_allbrackets
5405                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5406             {
5407                 s -= 2;
5408                 TOKEN(0);
5409             }
5410             PWop(OP_POW);
5411         }
5412         if (*s == '='
5413             && !PL_lex_allbrackets
5414             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5415         {
5416             s--;
5417             TOKEN(0);
5418         }
5419         PL_parser->saw_infix_sigil = 1;
5420         Mop(OP_MULTIPLY);
5421
5422     case '%':
5423     {
5424         if (PL_expect == XOPERATOR) {
5425             if (s[1] == '='
5426                 && !PL_lex_allbrackets
5427                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5428             {
5429                 TOKEN(0);
5430             }
5431             ++s;
5432             PL_parser->saw_infix_sigil = 1;
5433             Mop(OP_MODULO);
5434         }
5435         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5436         PL_tokenbuf[0] = '%';
5437         s = scan_ident(s, PL_tokenbuf + 1,
5438                 sizeof PL_tokenbuf - 1, FALSE);
5439         pl_yylval.ival = 0;
5440         if (!PL_tokenbuf[1]) {
5441             PREREF('%');
5442         }
5443         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5444             if (*s == '[')
5445                 PL_tokenbuf[0] = '@';
5446         }
5447         PL_expect = XOPERATOR;
5448         force_ident_maybe_lex('%');
5449         TERM('%');
5450     }
5451     case '^':
5452         d = s;
5453         bof = FEATURE_BITWISE_IS_ENABLED;
5454         if (bof && s[1] == '.')
5455             s++;
5456         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5457                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5458         {
5459             s = d;
5460             TOKEN(0);
5461         }
5462         s++;
5463         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5464     case '[':
5465         if (PL_lex_brackets > 100)
5466             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5467         PL_lex_brackstack[PL_lex_brackets++] = 0;
5468         PL_lex_allbrackets++;
5469         {
5470             const char tmp = *s++;
5471             OPERATOR(tmp);
5472         }
5473     case '~':
5474         if (s[1] == '~'
5475             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5476         {
5477             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5478                 TOKEN(0);
5479             s += 2;
5480             Perl_ck_warner_d(aTHX_
5481                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5482                 "Smartmatch is experimental");
5483             Eop(OP_SMARTMATCH);
5484         }
5485         s++;
5486         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5487             s++;
5488             BCop(OP_SCOMPLEMENT);
5489         }
5490         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5491     case ',':
5492         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5493             TOKEN(0);
5494         s++;
5495         OPERATOR(',');
5496     case ':':
5497         if (s[1] == ':') {
5498             len = 0;
5499             goto just_a_word_zero_gv;
5500         }
5501         s++;
5502         {
5503         OP *attrs;
5504
5505         switch (PL_expect) {
5506         case XOPERATOR:
5507             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5508                 break;
5509             PL_bufptr = s;      /* update in case we back off */
5510             if (*s == '=') {
5511                 Perl_croak(aTHX_
5512                            "Use of := for an empty attribute list is not allowed");
5513             }
5514             goto grabattrs;
5515         case XATTRBLOCK:
5516             PL_expect = XBLOCK;
5517             goto grabattrs;
5518         case XATTRTERM:
5519             PL_expect = XTERMBLOCK;
5520          grabattrs:
5521             s = skipspace(s);
5522             attrs = NULL;
5523             while (isIDFIRST_lazy_if(s,UTF)) {
5524                 I32 tmp;
5525                 SV *sv;
5526                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5527                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5528                     if (tmp < 0) tmp = -tmp;
5529                     switch (tmp) {
5530                     case KEY_or:
5531                     case KEY_and:
5532                     case KEY_for:
5533                     case KEY_foreach:
5534                     case KEY_unless:
5535                     case KEY_if:
5536                     case KEY_while:
5537                     case KEY_until:
5538                         goto got_attrs;
5539                     default:
5540                         break;
5541                     }
5542                 }
5543                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5544                 if (*d == '(') {
5545                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5546                     COPLINE_SET_FROM_MULTI_END;
5547                     if (!d) {
5548                         /* MUST advance bufptr here to avoid bogus
5549                            "at end of line" context messages from yyerror().
5550                          */
5551                         PL_bufptr = s + len;
5552                         yyerror("Unterminated attribute parameter in attribute list");
5553                         if (attrs)
5554                             op_free(attrs);
5555                         sv_free(sv);
5556                         return REPORT(0);       /* EOF indicator */
5557                     }
5558                 }
5559                 if (PL_lex_stuff) {
5560                     sv_catsv(sv, PL_lex_stuff);
5561                     attrs = op_append_elem(OP_LIST, attrs,
5562                                         newSVOP(OP_CONST, 0, sv));
5563                     SvREFCNT_dec_NN(PL_lex_stuff);
5564                     PL_lex_stuff = NULL;
5565                 }
5566                 else {
5567                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5568                         sv_free(sv);
5569                         if (PL_in_my == KEY_our) {
5570                             deprecate(":unique");
5571                         }
5572                         else
5573                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5574                     }
5575
5576                     /* NOTE: any CV attrs applied here need to be part of
5577                        the CVf_BUILTIN_ATTRS define in cv.h! */
5578                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5579                         sv_free(sv);
5580                         CvLVALUE_on(PL_compcv);
5581                     }
5582                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5583                         sv_free(sv);
5584                         deprecate(":locked");
5585                     }
5586                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5587                         sv_free(sv);
5588                         CvMETHOD_on(PL_compcv);
5589                     }
5590                     else if (!PL_in_my && len == 5
5591                           && strnEQ(SvPVX(sv), "const", len))
5592                     {
5593                         sv_free(sv);
5594                         Perl_ck_warner_d(aTHX_
5595                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5596                            ":const is experimental"
5597                         );
5598                         CvANONCONST_on(PL_compcv);
5599                         if (!CvANON(PL_compcv))
5600                             yyerror(":const is not permitted on named "
5601                                     "subroutines");
5602                     }
5603                     /* After we've set the flags, it could be argued that
5604                        we don't need to do the attributes.pm-based setting
5605                        process, and shouldn't bother appending recognized
5606                        flags.  To experiment with that, uncomment the
5607                        following "else".  (Note that's already been
5608                        uncommented.  That keeps the above-applied built-in
5609                        attributes from being intercepted (and possibly
5610                        rejected) by a package's attribute routines, but is
5611                        justified by the performance win for the common case
5612                        of applying only built-in attributes.) */
5613                     else
5614                         attrs = op_append_elem(OP_LIST, attrs,
5615                                             newSVOP(OP_CONST, 0,
5616                                                     sv));
5617                 }
5618                 s = skipspace(d);
5619                 if (*s == ':' && s[1] != ':')
5620                     s = skipspace(s+1);
5621                 else if (s == d)
5622                     break;      /* require real whitespace or :'s */
5623                 /* XXX losing whitespace on sequential attributes here */
5624             }
5625             {
5626                 if (*s != ';'
5627                     && *s != '}'
5628                     && !(PL_expect == XOPERATOR
5629                          ? (*s == '=' ||  *s == ')')
5630                          : (*s == '{' ||  *s == '(')))
5631                 {
5632                     const char q = ((*s == '\'') ? '"' : '\'');
5633                     /* If here for an expression, and parsed no attrs, back
5634                        off. */
5635                     if (PL_expect == XOPERATOR && !attrs) {
5636                         s = PL_bufptr;
5637                         break;
5638                     }
5639                     /* MUST advance bufptr here to avoid bogus "at end of line"
5640                        context messages from yyerror().
5641                     */
5642                     PL_bufptr = s;
5643                     yyerror( (const char *)
5644                              (*s
5645                               ? Perl_form(aTHX_ "Invalid separator character "
5646                                           "%c%c%c in attribute list", q, *s, q)
5647                               : "Unterminated attribute list" ) );
5648                     if (attrs)
5649                         op_free(attrs);
5650                     OPERATOR(':');
5651                 }
5652             }
5653         got_attrs:
5654             if (attrs) {
5655                 NEXTVAL_NEXTTOKE.opval = attrs;
5656                 force_next(THING);
5657             }
5658             TOKEN(COLONATTR);
5659         }
5660         }
5661         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5662             s--;
5663             TOKEN(0);
5664         }
5665         PL_lex_allbrackets--;
5666         OPERATOR(':');
5667     case '(':
5668         s++;
5669         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5670             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5671         else
5672             PL_expect = XTERM;
5673         s = skipspace(s);
5674         PL_lex_allbrackets++;
5675         TOKEN('(');
5676     case ';':
5677         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5678             TOKEN(0);
5679         CLINE;
5680         s++;
5681         PL_expect = XSTATE;
5682         TOKEN(';');
5683     case ')':
5684         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5685             TOKEN(0);
5686         s++;
5687         PL_lex_allbrackets--;
5688         s = skipspace(s);
5689         if (*s == '{')
5690             PREBLOCK(')');
5691         TERM(')');
5692     case ']':
5693         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5694             TOKEN(0);
5695         s++;
5696         if (PL_lex_brackets <= 0)
5697             /* diag_listed_as: Unmatched right %s bracket */
5698             yyerror("Unmatched right square bracket");
5699         else
5700             --PL_lex_brackets;
5701         PL_lex_allbrackets--;
5702         if (PL_lex_state == LEX_INTERPNORMAL) {
5703             if (PL_lex_brackets == 0) {
5704                 if (*s == '-' && s[1] == '>')
5705                     PL_lex_state = LEX_INTERPENDMAYBE;
5706                 else if (*s != '[' && *s != '{')
5707                     PL_lex_state = LEX_INTERPEND;
5708             }
5709         }
5710         TERM(']');
5711     case '{':
5712         s++;
5713       leftbracket:
5714         if (PL_lex_brackets > 100) {
5715             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5716         }
5717         switch (PL_expect) {
5718         case XTERM:
5719         case XTERMORDORDOR:
5720             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5721             PL_lex_allbrackets++;
5722             OPERATOR(HASHBRACK);
5723         case XOPERATOR:
5724             while (s < PL_bufend && SPACE_OR_TAB(*s))
5725                 s++;
5726             d = s;
5727             PL_tokenbuf[0] = '\0';
5728             if (d < PL_bufend && *d == '-') {
5729                 PL_tokenbuf[0] = '-';
5730                 d++;
5731                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5732                     d++;
5733             }
5734             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5735                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5736                               FALSE, &len);
5737                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5738                     d++;
5739                 if (*d == '}') {
5740                     const char minus = (PL_tokenbuf[0] == '-');
5741                     s = force_word(s + minus, WORD, FALSE, TRUE);
5742                     if (minus)
5743                         force_next('-');
5744                 }
5745             }
5746             /* FALLTHROUGH */
5747         case XATTRTERM:
5748         case XTERMBLOCK:
5749             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5750             PL_lex_allbrackets++;
5751             PL_expect = XSTATE;
5752             break;
5753         case XATTRBLOCK:
5754         case XBLOCK:
5755             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5756             PL_lex_allbrackets++;
5757             PL_expect = XSTATE;
5758             break;
5759         case XBLOCKTERM:
5760             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5761             PL_lex_allbrackets++;
5762             PL_expect = XSTATE;
5763             break;
5764         default: {
5765                 const char *t;
5766                 if (PL_oldoldbufptr == PL_last_lop)
5767                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5768                 else
5769                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5770                 PL_lex_allbrackets++;
5771                 s = skipspace(s);
5772                 if (*s == '}') {
5773                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5774                         PL_expect = XTERM;
5775                         /* This hack is to get the ${} in the message. */
5776                         PL_bufptr = s+1;
5777                         yyerror("syntax error");
5778                         break;
5779                     }
5780                     OPERATOR(HASHBRACK);
5781                 }
5782                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5783                     /* ${...} or @{...} etc., but not print {...}
5784                      * Skip the disambiguation and treat this as a block.
5785                      */
5786                     goto block_expectation;
5787                 }
5788                 /* This hack serves to disambiguate a pair of curlies
5789                  * as being a block or an anon hash.  Normally, expectation
5790                  * determines that, but in cases where we're not in a
5791                  * position to expect anything in particular (like inside
5792                  * eval"") we have to resolve the ambiguity.  This code
5793                  * covers the case where the first term in the curlies is a
5794                  * quoted string.  Most other cases need to be explicitly
5795                  * disambiguated by prepending a "+" before the opening
5796                  * curly in order to force resolution as an anon hash.
5797                  *
5798                  * XXX should probably propagate the outer expectation
5799                  * into eval"" to rely less on this hack, but that could
5800                  * potentially break current behavior of eval"".
5801                  * GSAR 97-07-21
5802                  */
5803                 t = s;
5804                 if (*s == '\'' || *s == '"' || *s == '`') {
5805                     /* common case: get past first string, handling escapes */
5806                     for (t++; t < PL_bufend && *t != *s;)
5807                         if (*t++ == '\\')
5808                             t++;
5809                     t++;
5810                 }
5811                 else if (*s == 'q') {
5812                     if (++t < PL_bufend
5813                         && (!isWORDCHAR(*t)
5814                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5815                                 && !isWORDCHAR(*t))))
5816                     {
5817                         /* skip q//-like construct */
5818                         const char *tmps;
5819                         char open, close, term;
5820                         I32 brackets = 1;
5821
5822                         while (t < PL_bufend && isSPACE(*t))
5823                             t++;
5824                         /* check for q => */
5825                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5826                             OPERATOR(HASHBRACK);
5827                         }
5828                         term = *t;
5829                         open = term;
5830                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5831                             term = tmps[5];
5832                         close = term;
5833                         if (open == close)
5834                             for (t++; t < PL_bufend; t++) {
5835                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5836                                     t++;
5837                                 else if (*t == open)
5838                                     break;
5839                             }
5840                         else {
5841                             for (t++; t < PL_bufend; t++) {
5842                                 if (*t == '\\' && t+1 < PL_bufend)
5843                                     t++;
5844                                 else if (*t == close && --brackets <= 0)
5845                                     break;
5846                                 else if (*t == open)
5847                                     brackets++;
5848                             }
5849                         }
5850                         t++;
5851                     }
5852                     else
5853                         /* skip plain q word */
5854                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5855                             t += UTF ? UTF8SKIP(t) : 1;
5856                 }
5857                 else if (isWORDCHAR_lazy_if(t,UTF)) {
5858                     t += UTF ? UTF8SKIP(t) : 1;
5859                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5860                         t += UTF ? UTF8SKIP(t) : 1;
5861                 }
5862                 while (t < PL_bufend && isSPACE(*t))
5863                     t++;
5864                 /* if comma follows first term, call it an anon hash */
5865                 /* XXX it could be a comma expression with loop modifiers */
5866                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5867                                    || (*t == '=' && t[1] == '>')))
5868                     OPERATOR(HASHBRACK);
5869                 if (PL_expect == XREF)
5870                 {
5871                   block_expectation:
5872                     /* If there is an opening brace or 'sub:', treat it
5873                        as a term to make ${{...}}{k} and &{sub:attr...}
5874                        dwim.  Otherwise, treat it as a statement, so
5875                        map {no strict; ...} works.
5876                      */
5877                     s = skipspace(s);
5878                     if (*s == '{') {
5879                         PL_expect = XTERM;
5880                         break;
5881                     }
5882                     if (strnEQ(s, "sub", 3)) {
5883                         d = s + 3;
5884                         d = skipspace(d);
5885                         if (*d == ':') {
5886                             PL_expect = XTERM;
5887                             break;
5888                         }
5889                     }
5890                     PL_expect = XSTATE;
5891                 }
5892                 else {
5893                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5894                     PL_expect = XSTATE;
5895                 }
5896             }
5897             break;
5898         }
5899         pl_yylval.ival = CopLINE(PL_curcop);
5900         PL_copline = NOLINE;   /* invalidate current command line number */
5901         TOKEN(formbrack ? '=' : '{');
5902     case '}':
5903         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5904             TOKEN(0);
5905       rightbracket:
5906         s++;
5907         if (PL_lex_brackets <= 0)
5908             /* diag_listed_as: Unmatched right %s bracket */
5909             yyerror("Unmatched right curly bracket");
5910         else
5911             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5912         PL_lex_allbrackets--;
5913         if (PL_lex_state == LEX_INTERPNORMAL) {
5914             if (PL_lex_brackets == 0) {
5915                 if (PL_expect & XFAKEBRACK) {
5916                     PL_expect &= XENUMMASK;
5917                     PL_lex_state = LEX_INTERPEND;
5918                     PL_bufptr = s;
5919                     return yylex();     /* ignore fake brackets */
5920                 }
5921                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5922                  && SvEVALED(PL_lex_repl))
5923                     PL_lex_state = LEX_INTERPEND;
5924                 else if (*s == '-' && s[1] == '>')
5925                     PL_lex_state = LEX_INTERPENDMAYBE;
5926                 else if (*s != '[' && *s != '{')
5927                     PL_lex_state = LEX_INTERPEND;
5928             }
5929         }
5930         if (PL_expect & XFAKEBRACK) {
5931             PL_expect &= XENUMMASK;
5932             PL_bufptr = s;
5933             return yylex();             /* ignore fake brackets */
5934         }
5935         force_next(formbrack ? '.' : '}');
5936         if (formbrack) LEAVE;
5937         if (formbrack == 2) { /* means . where arguments were expected */
5938             force_next(';');
5939             TOKEN(FORMRBRACK);
5940         }
5941         TOKEN(';');
5942     case '&':
5943         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5944         s++;
5945         if (*s++ == '&') {
5946             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5947                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5948                 s -= 2;
5949                 TOKEN(0);
5950             }
5951             AOPERATOR(ANDAND);
5952         }
5953         s--;
5954         if (PL_expect == XOPERATOR) {
5955             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5956                 && isIDFIRST_lazy_if(s,UTF))
5957             {
5958                 CopLINE_dec(PL_curcop);
5959                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5960                 CopLINE_inc(PL_curcop);
5961             }
5962             d = s;
5963             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5964                 s++;
5965             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5966                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5967                 s = d;
5968                 s--;
5969                 TOKEN(0);
5970             }
5971             if (d == s) {
5972                 PL_parser->saw_infix_sigil = 1;
5973                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5974             }
5975             else
5976                 BAop(OP_SBIT_AND);
5977         }
5978
5979         PL_tokenbuf[0] = '&';
5980         s = scan_ident(s - 1, PL_tokenbuf + 1,
5981                        sizeof PL_tokenbuf - 1, TRUE);
5982         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5983         if (PL_tokenbuf[1]) {
5984             force_ident_maybe_lex('&');
5985         }
5986         else
5987             PREREF('&');
5988         TERM('&');
5989
5990     case '|':
5991         s++;
5992         if (*s++ == '|') {
5993             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5994                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5995                 s -= 2;
5996                 TOKEN(0);
5997             }
5998             AOPERATOR(OROR);
5999         }
6000         s--;
6001         d = s;
6002         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6003             s++;
6004         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6005                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6006             s = d - 1;
6007             TOKEN(0);
6008         }
6009         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6010     case '=':
6011         s++;
6012         {
6013             const char tmp = *s++;
6014             if (tmp == '=') {
6015                 if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "=====", 5)) {
6016                     s = vcs_conflict_marker(s + 5);
6017                     goto retry;
6018                 }
6019                 if (!PL_lex_allbrackets
6020                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6021                 {
6022                     s -= 2;
6023                     TOKEN(0);
6024                 }
6025                 Eop(OP_EQ);
6026             }
6027             if (tmp == '>') {
6028                 if (!PL_lex_allbrackets
6029                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6030                 {
6031                     s -= 2;
6032                     TOKEN(0);
6033                 }
6034                 OPERATOR(',');
6035             }
6036             if (tmp == '~')
6037                 PMop(OP_MATCH);
6038             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6039                 && strchr("+-*/%.^&|<",tmp))
6040                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6041                             "Reversed %c= operator",(int)tmp);
6042             s--;
6043             if (PL_expect == XSTATE
6044                 && isALPHA(tmp)
6045                 && (s == PL_linestart+1 || s[-2] == '\n') )
6046             {
6047                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6048                     || PL_lex_state != LEX_NORMAL) {
6049                     d = PL_bufend;
6050                     while (s < d) {
6051                         if (*s++ == '\n') {
6052                             incline(s);
6053                             if (strnEQ(s,"=cut",4)) {
6054                                 s = strchr(s,'\n');
6055                                 if (s)
6056                                     s++;
6057                                 else
6058                                     s = d;
6059                                 incline(s);
6060                                 goto retry;
6061                             }
6062                         }
6063                     }
6064                     goto retry;
6065                 }
6066                 s = PL_bufend;
6067                 PL_parser->in_pod = 1;
6068                 goto retry;
6069             }
6070         }
6071         if (PL_expect == XBLOCK) {
6072             const char *t = s;
6073 #ifdef PERL_STRICT_CR
6074             while (SPACE_OR_TAB(*t))
6075 #else
6076             while (SPACE_OR_TAB(*t) || *t == '\r')
6077 #endif
6078                 t++;
6079             if (*t == '\n' || *t == '#') {
6080                 formbrack = 1;
6081                 ENTER;
6082                 SAVEI8(PL_parser->form_lex_state);
6083                 SAVEI32(PL_lex_formbrack);
6084                 PL_parser->form_lex_state = PL_lex_state;
6085                 PL_lex_formbrack = PL_lex_brackets + 1;
6086                 goto leftbracket;
6087             }
6088         }
6089         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6090             s--;
6091             TOKEN(0);
6092         }
6093         pl_yylval.ival = 0;
6094         OPERATOR(ASSIGNOP);
6095     case '!':
6096         s++;
6097         {
6098             const char tmp = *s++;
6099             if (tmp == '=') {
6100                 /* was this !=~ where !~ was meant?
6101                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6102
6103                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6104                     const char *t = s+1;
6105
6106                     while (t < PL_bufend && isSPACE(*t))
6107                         ++t;
6108
6109                     if (*t == '/' || *t == '?'
6110                         || ((*t == 'm' || *t == 's' || *t == 'y')
6111                             && !isWORDCHAR(t[1]))
6112                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6113                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6114                                     "!=~ should be !~");
6115                 }
6116                 if (!PL_lex_allbrackets
6117                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6118                 {
6119                     s -= 2;
6120                     TOKEN(0);
6121                 }
6122                 Eop(OP_NE);
6123             }
6124             if (tmp == '~')
6125                 PMop(OP_NOT);
6126         }
6127         s--;
6128         OPERATOR('!');
6129     case '<':
6130         if (PL_expect != XOPERATOR) {
6131             if (s[1] != '<' && !strchr(s,'>'))
6132                 check_uni();
6133             if (s[1] == '<' && s[2] != '>') {
6134                 if ((s == PL_linestart || s[-1] == '\n') && strnEQ(s+2, "<<<<<", 5)) {
6135                     s = vcs_conflict_marker(s + 7);
6136                     goto retry;
6137                 }
6138                 s = scan_heredoc(s);
6139             }
6140             else
6141                 s = scan_inputsymbol(s);
6142             PL_expect = XOPERATOR;
6143             TOKEN(sublex_start());
6144         }
6145         s++;
6146         {
6147             char tmp = *s++;
6148             if (tmp == '<') {
6149                 if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "<<<<<", 5)) {
6150                     s = vcs_conflict_marker(s + 5);
6151                     goto retry;
6152                 }
6153                 if (*s == '=' && !PL_lex_allbrackets
6154                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6155                 {
6156                     s -= 2;
6157                     TOKEN(0);
6158                 }
6159                 SHop(OP_LEFT_SHIFT);
6160             }
6161             if (tmp == '=') {
6162                 tmp = *s++;
6163                 if (tmp == '>') {
6164                     if (!PL_lex_allbrackets
6165                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6166                     {
6167                         s -= 3;
6168                         TOKEN(0);
6169                     }
6170                     Eop(OP_NCMP);
6171                 }
6172                 s--;
6173                 if (!PL_lex_allbrackets
6174                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6175                 {
6176                     s -= 2;
6177                     TOKEN(0);
6178                 }
6179                 Rop(OP_LE);
6180             }
6181         }
6182         s--;
6183         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6184             s--;
6185             TOKEN(0);
6186         }
6187         Rop(OP_LT);
6188     case '>':
6189         s++;
6190         {
6191             const char tmp = *s++;
6192             if (tmp == '>') {
6193                 if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, ">>>>>", 5)) {
6194                     s = vcs_conflict_marker(s + 5);
6195                     goto retry;
6196                 }
6197                 if (*s == '=' && !PL_lex_allbrackets
6198                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6199                 {
6200                     s -= 2;
6201                     TOKEN(0);
6202                 }
6203                 SHop(OP_RIGHT_SHIFT);
6204             }
6205             else if (tmp == '=') {
6206                 if (!PL_lex_allbrackets
6207                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6208                 {
6209                     s -= 2;
6210                     TOKEN(0);
6211                 }
6212                 Rop(OP_GE);
6213             }
6214         }
6215         s--;
6216         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6217             s--;
6218             TOKEN(0);
6219         }
6220         Rop(OP_GT);
6221
6222     case '$':
6223         CLINE;
6224
6225         if (PL_expect == XOPERATOR) {
6226             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6227                 return deprecate_commaless_var_list();
6228             }
6229         }
6230         else if (PL_expect == XPOSTDEREF) {
6231             if (s[1] == '#') {
6232                 s++;
6233                 POSTDEREF(DOLSHARP);
6234             }
6235             POSTDEREF('$');
6236         }
6237
6238         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6239             PL_tokenbuf[0] = '@';
6240             s = scan_ident(s + 1, PL_tokenbuf + 1,
6241                            sizeof PL_tokenbuf - 1, FALSE);
6242             if (PL_expect == XOPERATOR) {
6243                 d = s;
6244                 if (PL_bufptr > s) {
6245                     d = PL_bufptr-1;
6246                     PL_bufptr = PL_oldbufptr;
6247                 }
6248                 no_op("Array length", d);
6249             }
6250             if (!PL_tokenbuf[1])
6251                 PREREF(DOLSHARP);
6252             PL_expect = XOPERATOR;
6253             force_ident_maybe_lex('#');
6254             TOKEN(DOLSHARP);
6255         }
6256
6257         PL_tokenbuf[0] = '$';
6258         s = scan_ident(s, PL_tokenbuf + 1,
6259                        sizeof PL_tokenbuf - 1, FALSE);
6260         if (PL_expect == XOPERATOR) {
6261             d = s;
6262             if (PL_bufptr > s) {
6263                 d = PL_bufptr-1;
6264                 PL_bufptr = PL_oldbufptr;
6265             }
6266             no_op("Scalar", d);
6267         }
6268         if (!PL_tokenbuf[1]) {
6269             if (s == PL_bufend)
6270                 yyerror("Final $ should be \\$ or $name");
6271             PREREF('$');
6272         }
6273
6274         d = s;
6275         {
6276             const char tmp = *s;
6277             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6278                 s = skipspace(s);
6279
6280             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6281                 && intuit_more(s)) {
6282                 if (*s == '[') {
6283                     PL_tokenbuf[0] = '@';
6284                     if (ckWARN(WARN_SYNTAX)) {
6285                         char *t = s+1;
6286
6287                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6288                             t += UTF ? UTF8SKIP(t) : 1;
6289                         if (*t++ == ',') {
6290                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6291                             while (t < PL_bufend && *t != ']')
6292                                 t++;
6293                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6294                                         "Multidimensional syntax %"UTF8f" not supported",
6295                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6296                         }
6297                     }
6298                 }
6299                 else if (*s == '{') {
6300                     char *t;
6301                     PL_tokenbuf[0] = '%';
6302                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6303                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6304                         {
6305                             char tmpbuf[sizeof PL_tokenbuf];
6306                             do {
6307                                 t++;
6308                             } while (isSPACE(*t));
6309                             if (isIDFIRST_lazy_if(t,UTF)) {
6310                                 STRLEN len;
6311                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6312                                               &len);
6313                                 while (isSPACE(*t))
6314                                     t++;
6315                                 if (*t == ';'
6316                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6317                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6318                                         "You need to quote \"%"UTF8f"\"",
6319                                          UTF8fARG(UTF, len, tmpbuf));
6320                             }
6321                         }
6322                 }
6323             }
6324
6325             PL_expect = XOPERATOR;
6326             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6327                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6328                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6329                     PL_expect = XOPERATOR;
6330                 else if (strchr("$@\"'`q", *s))
6331                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6332                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6333                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6334                 else if (isIDFIRST_lazy_if(s,UTF)) {
6335                     char tmpbuf[sizeof PL_tokenbuf];
6336                     int t2;
6337                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6338                     if ((t2 = keyword(tmpbuf, len, 0))) {
6339                         /* binary operators exclude handle interpretations */
6340                         switch (t2) {
6341                         case -KEY_x:
6342                         case -KEY_eq:
6343                         case -KEY_ne:
6344                         case -KEY_gt:
6345                         case -KEY_lt:
6346                         case -KEY_ge:
6347                         case -KEY_le:
6348                         case -KEY_cmp:
6349                             break;
6350                         default:
6351                             PL_expect = XTERM;  /* e.g. print $fh length() */
6352                             break;
6353                         }
6354                     }
6355                     else {
6356                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6357                     }
6358                 }
6359                 else if (isDIGIT(*s))
6360                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6361                 else if (*s == '.' && isDIGIT(s[1]))
6362                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6363                 else if ((*s == '?' || *s == '-' || *s == '+')
6364                          && !isSPACE(s[1]) && s[1] != '=')
6365                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6366                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6367                          && s[1] != '/')
6368                     PL_expect = XTERM;          /* e.g. print $fh /.../
6369                                                    XXX except DORDOR operator
6370                                                 */
6371                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6372                          && s[2] != '=')
6373                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6374             }
6375         }
6376         force_ident_maybe_lex('$');
6377         TOKEN('$');
6378
6379     case '@':
6380         if (PL_expect == XPOSTDEREF)
6381             POSTDEREF('@');
6382         PL_tokenbuf[0] = '@';
6383         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6384         if (PL_expect == XOPERATOR) {
6385             d = s;
6386             if (PL_bufptr > s) {
6387                 d = PL_bufptr-1;
6388                 PL_bufptr = PL_oldbufptr;
6389             }
6390             no_op("Array", d);
6391         }
6392         pl_yylval.ival = 0;
6393         if (!PL_tokenbuf[1]) {
6394             PREREF('@');
6395         }
6396         if (PL_lex_state == LEX_NORMAL)
6397             s = skipspace(s);
6398         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6399             if (*s == '{')
6400                 PL_tokenbuf[0] = '%';
6401
6402             /* Warn about @ where they meant $. */
6403             if (*s == '[' || *s == '{') {
6404                 if (ckWARN(WARN_SYNTAX)) {
6405                     S_check_scalar_slice(aTHX_ s);
6406                 }
6407             }
6408         }
6409         PL_expect = XOPERATOR;
6410         force_ident_maybe_lex('@');
6411         TERM('@');
6412
6413      case '/':                  /* may be division, defined-or, or pattern */
6414         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6415             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6416                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6417                 TOKEN(0);
6418             s += 2;
6419             AOPERATOR(DORDOR);
6420         }
6421         else if (PL_expect == XOPERATOR) {
6422             s++;
6423             if (*s == '=' && !PL_lex_allbrackets
6424                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6425             {
6426                 s--;
6427                 TOKEN(0);
6428             }
6429             Mop(OP_DIVIDE);
6430         }
6431         else {
6432             /* Disable warning on "study /blah/" */
6433             if (PL_oldoldbufptr == PL_last_uni
6434              && (*PL_last_uni != 's' || s - PL_last_uni < 5
6435                  || memNE(PL_last_uni, "study", 5)
6436                  || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6437              ))
6438                 check_uni();
6439             s = scan_pat(s,OP_MATCH);
6440             TERM(sublex_start());
6441         }
6442
6443      case '?':                  /* conditional */
6444         s++;
6445         if (!PL_lex_allbrackets
6446             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6447         {
6448             s--;
6449             TOKEN(0);
6450         }
6451         PL_lex_allbrackets++;
6452         OPERATOR('?');
6453
6454     case '.':
6455         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6456 #ifdef PERL_STRICT_CR
6457             && s[1] == '\n'
6458 #else
6459             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6460 #endif
6461             && (s == PL_linestart || s[-1] == '\n') )
6462         {
6463             PL_expect = XSTATE;
6464             formbrack = 2; /* dot seen where arguments expected */
6465             goto rightbracket;
6466         }
6467         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6468             s += 3;
6469             OPERATOR(YADAYADA);
6470         }
6471         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6472             char tmp = *s++;
6473             if (*s == tmp) {
6474                 if (!PL_lex_allbrackets
6475                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6476                 {
6477                     s--;
6478                     TOKEN(0);
6479                 }
6480                 s++;
6481                 if (*s == tmp) {
6482                     s++;
6483                     pl_yylval.ival = OPf_SPECIAL;
6484                 }
6485                 else
6486                     pl_yylval.ival = 0;
6487                 OPERATOR(DOTDOT);
6488             }
6489             if (*s == '=' && !PL_lex_allbrackets
6490                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6491             {
6492                 s--;
6493                 TOKEN(0);
6494             }
6495             Aop(OP_CONCAT);
6496         }
6497         /* FALLTHROUGH */
6498     case '0': case '1': case '2': case '3': case '4':
6499     case '5': case '6': case '7': case '8': case '9':
6500         s = scan_num(s, &pl_yylval);
6501         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6502         if (PL_expect == XOPERATOR)
6503             no_op("Number",s);
6504         TERM(THING);
6505
6506     case '\'':
6507         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6508         if (!s)
6509             missingterm(NULL);
6510         COPLINE_SET_FROM_MULTI_END;
6511         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6512         if (PL_expect == XOPERATOR) {
6513             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6514                 return deprecate_commaless_var_list();
6515             }
6516             else
6517                 no_op("String",s);
6518         }
6519         pl_yylval.ival = OP_CONST;
6520         TERM(sublex_start());
6521
6522     case '"':
6523         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6524         DEBUG_T( {
6525             if (s)
6526                 printbuf("### Saw string before %s\n", s);
6527             else
6528                 PerlIO_printf(Perl_debug_log,
6529                              "### Saw unterminated string\n");
6530         } );
6531         if (PL_expect == XOPERATOR) {
6532             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6533                 return deprecate_commaless_var_list();
6534             }
6535             else
6536                 no_op("String",s);
6537         }
6538         if (!s)
6539             missingterm(NULL);
6540         pl_yylval.ival = OP_CONST;
6541         /* FIXME. I think that this can be const if char *d is replaced by
6542            more localised variables.  */
6543         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6544             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6545                 pl_yylval.ival = OP_STRINGIFY;
6546                 break;
6547             }
6548         }
6549         if (pl_yylval.ival == OP_CONST)
6550             COPLINE_SET_FROM_MULTI_END;
6551         TERM(sublex_start());
6552
6553     case '`':
6554         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6555         DEBUG_T( {
6556             if (s)
6557                 printbuf("### Saw backtick string before %s\n", s);
6558             else
6559                 PerlIO_printf(Perl_debug_log,
6560                              "### Saw unterminated backtick string\n");
6561         } );
6562         if (PL_expect == XOPERATOR)
6563             no_op("Backticks",s);
6564         if (!s)
6565             missingterm(NULL);
6566         pl_yylval.ival = OP_BACKTICK;
6567         TERM(sublex_start());
6568
6569     case '\\':
6570         s++;
6571         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6572          && isDIGIT(*s))
6573             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6574                            *s, *s);
6575         if (PL_expect == XOPERATOR)
6576             no_op("Backslash",s);
6577         OPERATOR(REFGEN);
6578
6579     case 'v':
6580         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6581             char *start = s + 2;
6582             while (isDIGIT(*start) || *start == '_')
6583                 start++;
6584             if (*start == '.' && isDIGIT(start[1])) {
6585                 s = scan_num(s, &pl_yylval);
6586                 TERM(THING);
6587             }
6588             else if ((*start == ':' && start[1] == ':')
6589                   || (PL_expect == XSTATE && *start == ':'))
6590                 goto keylookup;
6591             else if (PL_expect == XSTATE) {
6592                 d = start;
6593                 while (d < PL_bufend && isSPACE(*d)) d++;
6594                 if (*d == ':') goto keylookup;
6595             }
6596             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6597             if (!isALPHA(*start) && (PL_expect == XTERM
6598                         || PL_expect == XREF || PL_expect == XSTATE
6599                         || PL_expect == XTERMORDORDOR)) {
6600                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6601                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6602                 if (!gv) {
6603                     s = scan_num(s, &pl_yylval);
6604                     TERM(THING);
6605                 }
6606             }
6607         }
6608         goto keylookup;
6609     case 'x':
6610         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6611             s++;
6612             Mop(OP_REPEAT);
6613         }
6614         goto keylookup;
6615
6616     case '_':
6617     case 'a': case 'A':
6618     case 'b': case 'B':
6619     case 'c': case 'C':
6620     case 'd': case 'D':
6621     case 'e': case 'E':
6622     case 'f': case 'F':
6623     case 'g': case 'G':
6624     case 'h': case 'H':
6625     case 'i': case 'I':
6626     case 'j': case 'J':
6627     case 'k': case 'K':
6628     case 'l': case 'L':
6629     case 'm': case 'M':
6630     case 'n': case 'N':
6631     case 'o': case 'O':
6632     case 'p': case 'P':
6633     case 'q': case 'Q':
6634     case 'r': case 'R':
6635     case 's': case 'S':
6636     case 't': case 'T':
6637     case 'u': case 'U':
6638               case 'V':
6639     case 'w': case 'W':
6640               case 'X':
6641     case 'y': case 'Y':
6642     case 'z': case 'Z':
6643
6644       keylookup: {
6645         bool anydelim;
6646         bool lex;
6647         I32 tmp;
6648         SV *sv;
6649         CV *cv;
6650         PADOFFSET off;
6651         OP *rv2cv_op;
6652
6653         lex = FALSE;
6654         orig_keyword = 0;
6655         off = 0;
6656         sv = NULL;
6657         cv = NULL;
6658         gv = NULL;
6659         gvp = NULL;
6660         rv2cv_op = NULL;
6661
6662         PL_bufptr = s;
6663         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6664
6665         /* Some keywords can be followed by any delimiter, including ':' */
6666         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6667
6668         /* x::* is just a word, unless x is "CORE" */
6669         if (!anydelim && *s == ':' && s[1] == ':') {
6670             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6671             goto just_a_word;
6672         }
6673
6674         d = s;
6675         while (d < PL_bufend && isSPACE(*d))
6676                 d++;    /* no comments skipped here, or s### is misparsed */
6677
6678         /* Is this a word before a => operator? */
6679         if (*d == '=' && d[1] == '>') {
6680           fat_arrow:
6681             CLINE;
6682             pl_yylval.opval
6683                 = (OP*)newSVOP(OP_CONST, 0,
6684                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6685             pl_yylval.opval->op_private = OPpCONST_BARE;
6686             TERM(WORD);
6687         }
6688
6689         /* Check for plugged-in keyword */
6690         {
6691             OP *o;
6692             int result;
6693             char *saved_bufptr = PL_bufptr;
6694             PL_bufptr = s;
6695             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6696             s = PL_bufptr;
6697             if (result == KEYWORD_PLUGIN_DECLINE) {
6698                 /* not a plugged-in keyword */
6699                 PL_bufptr = saved_bufptr;
6700             } else if (result == KEYWORD_PLUGIN_STMT) {
6701                 pl_yylval.opval = o;
6702                 CLINE;
6703                 if (!PL_nexttoke) PL_expect = XSTATE;
6704                 return REPORT(PLUGSTMT);
6705             } else if (result == KEYWORD_PLUGIN_EXPR) {
6706                 pl_yylval.opval = o;
6707                 CLINE;
6708                 if (!PL_nexttoke) PL_expect = XOPERATOR;
6709                 return REPORT(PLUGEXPR);
6710             } else {
6711                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6712                                         PL_tokenbuf);
6713             }
6714         }
6715
6716         /* Check for built-in keyword */
6717         tmp = keyword(PL_tokenbuf, len, 0);
6718
6719         /* Is this a label? */
6720         if (!anydelim && PL_expect == XSTATE
6721               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6722             s = d + 1;
6723             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6724             pl_yylval.pval[len] = '\0';
6725             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6726             CLINE;
6727             TOKEN(LABEL);
6728         }
6729
6730         /* Check for lexical sub */
6731         if (PL_expect != XOPERATOR) {
6732             char tmpbuf[sizeof PL_tokenbuf + 1];
6733             *tmpbuf = '&';
6734             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6735             off = pad_findmy_pvn(tmpbuf, len+1, 0);
6736             if (off != NOT_IN_PAD) {
6737                 assert(off); /* we assume this is boolean-true below */
6738                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6739                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6740                     HEK * const stashname = HvNAME_HEK(stash);
6741                     sv = newSVhek(stashname);
6742                     sv_catpvs(sv, "::");
6743                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6744                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6745                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6746                                     SVt_PVCV);
6747                     off = 0;
6748                     if (!gv) {
6749                         sv_free(sv);
6750                         sv = NULL;
6751                         goto just_a_word;
6752                     }
6753                 }
6754                 else {
6755                     rv2cv_op = newOP(OP_PADANY, 0);
6756                     rv2cv_op->op_targ = off;
6757                     cv = find_lexical_cv(off);
6758                 }
6759                 lex = TRUE;
6760                 goto just_a_word;
6761             }
6762             off = 0;
6763         }
6764
6765         if (tmp < 0) {                  /* second-class keyword? */
6766             GV *ogv = NULL;     /* override (winner) */
6767             GV *hgv = NULL;     /* hidden (loser) */
6768             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6769                 CV *cv;
6770                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6771                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6772                                             SVt_PVCV))
6773                     && (cv = GvCVu(gv)))
6774                 {
6775                     if (GvIMPORTED_CV(gv))
6776                         ogv = gv;
6777                     else if (! CvMETHOD(cv))
6778                         hgv = gv;
6779                 }
6780                 if (!ogv
6781                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6782                                                               len, FALSE))
6783                     && (gv = *gvp)
6784                     && (isGV_with_GP(gv)
6785                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
6786                         :   SvPCS_IMPORTED(gv)
6787                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6788                                                                  len, 0), 1)))
6789                 {
6790                     ogv = gv;
6791                 }
6792             }
6793             if (ogv) {
6794                 orig_keyword = tmp;
6795                 tmp = 0;                /* overridden by import or by GLOBAL */
6796             }
6797             else if (gv && !gvp
6798                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6799                      && GvCVu(gv))
6800             {
6801                 tmp = 0;                /* any sub overrides "weak" keyword */
6802             }
6803             else {                      /* no override */
6804                 tmp = -tmp;
6805                 if (tmp == KEY_dump) {
6806                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6807                                    "dump() better written as CORE::dump()");
6808                 }
6809                 gv = NULL;
6810                 gvp = 0;
6811                 if (hgv && tmp != KEY_x)        /* never ambiguous */
6812                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6813                                    "Ambiguous call resolved as CORE::%s(), "
6814                                    "qualify as such or use &",
6815                                    GvENAME(hgv));
6816             }
6817         }
6818
6819         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6820          && (!anydelim || *s != '#')) {
6821             /* no override, and not s### either; skipspace is safe here
6822              * check for => on following line */
6823             bool arrow;
6824             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6825             STRLEN   soff = s         - SvPVX(PL_linestr);
6826             s = skipspace_flags(s, LEX_NO_INCLINE);
6827             arrow = *s == '=' && s[1] == '>';
6828             PL_bufptr = SvPVX(PL_linestr) + bufoff;
6829             s         = SvPVX(PL_linestr) +   soff;
6830             if (arrow)
6831                 goto fat_arrow;
6832         }
6833
6834       reserved_word:
6835         switch (tmp) {
6836
6837         default:                        /* not a keyword */
6838             /* Trade off - by using this evil construction we can pull the
6839                variable gv into the block labelled keylookup. If not, then
6840                we have to give it function scope so that the goto from the
6841                earlier ':' case doesn't bypass the initialisation.  */
6842             if (0) {
6843             just_a_word_zero_gv:
6844                 sv = NULL;
6845                 cv = NULL;
6846                 gv = NULL;
6847                 gvp = NULL;
6848                 rv2cv_op = NULL;
6849                 orig_keyword = 0;
6850                 lex = 0;
6851                 off = 0;
6852             }
6853           just_a_word: {
6854                 int pkgname = 0;
6855                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6856                 bool safebw;
6857
6858
6859                 /* Get the rest if it looks like a package qualifier */
6860
6861                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6862                     STRLEN morelen;
6863                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6864                                   TRUE, &morelen);
6865                     if (!morelen)
6866                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6867                                 UTF8fARG(UTF, len, PL_tokenbuf),
6868                                 *s == '\'' ? "'" : "::");
6869                     len += morelen;
6870                     pkgname = 1;
6871                 }
6872
6873                 if (PL_expect == XOPERATOR) {
6874                     if (PL_bufptr == PL_linestart) {
6875                         CopLINE_dec(PL_curcop);
6876                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6877                         CopLINE_inc(PL_curcop);
6878                     }
6879                     else
6880                         no_op("Bareword",s);
6881                 }
6882
6883                 /* See if the name is "Foo::",
6884                    in which case Foo is a bareword
6885                    (and a package name). */
6886
6887                 if (len > 2
6888                     && PL_tokenbuf[len - 2] == ':'
6889                     && PL_tokenbuf[len - 1] == ':')
6890                 {
6891                     if (ckWARN(WARN_BAREWORD)
6892                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6893                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6894                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
6895                            UTF8fARG(UTF, len, PL_tokenbuf));
6896                     len -= 2;
6897                     PL_tokenbuf[len] = '\0';
6898                     gv = NULL;
6899                     gvp = 0;
6900                     safebw = TRUE;
6901                 }
6902                 else {
6903                     safebw = FALSE;
6904                 }
6905
6906                 /* if we saw a global override before, get the right name */
6907
6908                 if (!sv)
6909                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6910                                                 len);
6911                 if (gvp) {
6912                     SV * const tmp_sv = sv;
6913                     sv = newSVpvs("CORE::GLOBAL::");
6914                     sv_catsv(sv, tmp_sv);
6915                     SvREFCNT_dec(tmp_sv);
6916                 }
6917
6918
6919                 /* Presume this is going to be a bareword of some sort. */
6920                 CLINE;
6921                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6922                 pl_yylval.opval->op_private = OPpCONST_BARE;
6923
6924                 /* And if "Foo::", then that's what it certainly is. */
6925                 if (safebw)
6926                     goto safe_bareword;
6927
6928                 if (!off)
6929                 {
6930                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6931                     const_op->op_private = OPpCONST_BARE;
6932                     rv2cv_op =
6933                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6934                     cv = lex
6935                         ? isGV(gv)
6936                             ? GvCV(gv)
6937                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6938                                 ? (CV *)SvRV(gv)
6939                                 : ((CV *)gv)
6940                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6941                 }
6942
6943                 /* Use this var to track whether intuit_method has been
6944                    called.  intuit_method returns 0 or > 255.  */
6945                 tmp = 1;
6946
6947                 /* See if it's the indirect object for a list operator. */
6948
6949                 if (PL_oldoldbufptr
6950                     && PL_oldoldbufptr < PL_bufptr
6951                     && (PL_oldoldbufptr == PL_last_lop
6952                         || PL_oldoldbufptr == PL_last_uni)
6953                     && /* NO SKIPSPACE BEFORE HERE! */
6954                        (PL_expect == XREF
6955                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
6956                                                                == OA_FILEREF))
6957                 {
6958                     bool immediate_paren = *s == '(';
6959
6960                     /* (Now we can afford to cross potential line boundary.) */
6961                     s = skipspace(s);
6962
6963                     /* Two barewords in a row may indicate method call. */
6964
6965                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
6966                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
6967                     {
6968                         goto method;
6969                     }
6970
6971                     /* If not a declared subroutine, it's an indirect object. */
6972                     /* (But it's an indir obj regardless for sort.) */
6973                     /* Also, if "_" follows a filetest operator, it's a bareword */
6974
6975                     if (
6976                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
6977                          || (!cv
6978                              && (PL_last_lop_op != OP_MAPSTART
6979                                  && PL_last_lop_op != OP_GREPSTART))))
6980                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6981                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
6982                                                             == OA_FILESTATOP))
6983                        )
6984                     {
6985                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6986                         goto bareword;
6987                     }
6988                 }
6989
6990                 PL_expect = XOPERATOR;
6991                 s = skipspace(s);
6992
6993                 /* Is this a word before a => operator? */
6994                 if (*s == '=' && s[1] == '>' && !pkgname) {
6995                     op_free(rv2cv_op);
6996                     CLINE;
6997                     if (gvp || (lex && !off)) {
6998                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
6999                         /* This is our own scalar, created a few lines
7000                            above, so this is safe. */
7001                         SvREADONLY_off(sv);
7002                         sv_setpv(sv, PL_tokenbuf);
7003                         if (UTF && !IN_BYTES
7004                          && is_utf8_string((U8*)PL_tokenbuf, len))
7005                               SvUTF8_on(sv);
7006                         SvREADONLY_on(sv);
7007                     }
7008                     TERM(WORD);
7009                 }
7010
7011                 /* If followed by a paren, it's certainly a subroutine. */
7012                 if (*s == '(') {
7013                     CLINE;
7014                     if (cv) {
7015                         d = s + 1;
7016                         while (SPACE_OR_TAB(*d))
7017                             d++;
7018                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7019                             s = d + 1;
7020                             goto its_constant;
7021                         }
7022                     }
7023                     NEXTVAL_NEXTTOKE.opval =
7024                         off ? rv2cv_op : pl_yylval.opval;
7025                     if (off)
7026                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7027                     else op_free(rv2cv_op),        force_next(WORD);
7028                     pl_yylval.ival = 0;
7029                     TOKEN('&');
7030                 }
7031
7032                 /* If followed by var or block, call it a method (unless sub) */
7033
7034                 if ((*s == '$' || *s == '{') && !cv) {
7035                     op_free(rv2cv_op);
7036                     PL_last_lop = PL_oldbufptr;
7037                     PL_last_lop_op = OP_METHOD;
7038                     if (!PL_lex_allbrackets
7039                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7040                     {
7041                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7042                     }
7043                     PL_expect = XBLOCKTERM;
7044                     PL_bufptr = s;
7045                     return REPORT(METHOD);
7046                 }
7047
7048                 /* If followed by a bareword, see if it looks like indir obj. */
7049
7050                 if (tmp == 1 && !orig_keyword
7051                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7052                         && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7053                   method:
7054                     if (lex && !off) {
7055                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7056                         SvREADONLY_off(sv);
7057                         sv_setpvn(sv, PL_tokenbuf, len);
7058                         if (UTF && !IN_BYTES
7059                          && is_utf8_string((U8*)PL_tokenbuf, len))
7060                             SvUTF8_on (sv);
7061                         else SvUTF8_off(sv);
7062                     }
7063                     op_free(rv2cv_op);
7064                     if (tmp == METHOD && !PL_lex_allbrackets
7065                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7066                     {
7067                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7068                     }
7069                     return REPORT(tmp);
7070                 }
7071
7072                 /* Not a method, so call it a subroutine (if defined) */
7073
7074                 if (cv) {
7075                     /* Check for a constant sub */
7076                     if ((sv = cv_const_sv_or_av(cv))) {
7077                   its_constant:
7078                         op_free(rv2cv_op);
7079                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7080                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7081                         if (SvTYPE(sv) == SVt_PVAV)
7082                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7083                                                       pl_yylval.opval);
7084                         else {
7085                             pl_yylval.opval->op_private = 0;
7086                             pl_yylval.opval->op_folded = 1;
7087                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7088                         }
7089                         TOKEN(WORD);
7090                     }
7091
7092                     op_free(pl_yylval.opval);
7093                     pl_yylval.opval =
7094                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7095                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7096                     PL_last_lop = PL_oldbufptr;
7097                     PL_last_lop_op = OP_ENTERSUB;
7098                     /* Is there a prototype? */
7099                     if (
7100                         SvPOK(cv))
7101                     {
7102                         STRLEN protolen = CvPROTOLEN(cv);
7103                         const char *proto = CvPROTO(cv);
7104                         bool optional;
7105                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7106                         if (!protolen)
7107                             TERM(FUNC0SUB);
7108                         if ((optional = *proto == ';'))
7109                           do
7110                             proto++;
7111                           while (*proto == ';');
7112                         if (
7113                             (
7114                                 (
7115                                     *proto == '$' || *proto == '_'
7116                                  || *proto == '*' || *proto == '+'
7117                                 )
7118                              && proto[1] == '\0'
7119                             )
7120                          || (
7121                              *proto == '\\' && proto[1] && proto[2] == '\0'
7122                             )
7123                         )
7124                             UNIPROTO(UNIOPSUB,optional);
7125                         if (*proto == '\\' && proto[1] == '[') {
7126                             const char *p = proto + 2;
7127                             while(*p && *p != ']')
7128                                 ++p;
7129                             if(*p == ']' && !p[1])
7130                                 UNIPROTO(UNIOPSUB,optional);
7131                         }
7132                         if (*proto == '&' && *s == '{') {
7133                             if (PL_curstash)
7134                                 sv_setpvs(PL_subname, "__ANON__");
7135                             else
7136                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7137                             if (!PL_lex_allbrackets
7138                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7139                             {
7140                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7141                             }
7142                             PREBLOCK(LSTOPSUB);
7143                         }
7144                     }
7145                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7146                     PL_expect = XTERM;
7147                     force_next(off ? PRIVATEREF : WORD);
7148                     if (!PL_lex_allbrackets
7149                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7150                     {
7151                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7152                     }
7153                     TOKEN(NOAMP);
7154                 }
7155
7156                 /* Call it a bare word */
7157
7158                 if (PL_hints & HINT_STRICT_SUBS)
7159                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7160                 else {
7161                 bareword:
7162                     /* after "print" and similar functions (corresponding to
7163                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7164                      * a filehandle should be subject to "strict subs".
7165                      * Likewise for the optional indirect-object argument to system
7166                      * or exec, which can't be a bareword */
7167                     if ((PL_last_lop_op == OP_PRINT
7168                             || PL_last_lop_op == OP_PRTF
7169                             || PL_last_lop_op == OP_SAY
7170                             || PL_last_lop_op == OP_SYSTEM
7171                             || PL_last_lop_op == OP_EXEC)
7172                             && (PL_hints & HINT_STRICT_SUBS))
7173                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7174                     if (lastchar != '-') {
7175                         if (ckWARN(WARN_RESERVED)) {
7176                             d = PL_tokenbuf;
7177                             while (isLOWER(*d))
7178                                 d++;
7179                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7180                             {
7181                                 /* PL_warn_reserved is constant */
7182                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7183                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7184                                        PL_tokenbuf);
7185                                 GCC_DIAG_RESTORE;
7186                             }
7187                         }
7188                     }
7189                 }
7190                 op_free(rv2cv_op);
7191
7192             safe_bareword:
7193                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7194                  && saw_infix_sigil) {
7195                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7196                                      "Operator or semicolon missing before %c%"UTF8f,
7197                                      lastchar,
7198                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7199                                               PL_tokenbuf));
7200                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7201                                      "Ambiguous use of %c resolved as operator %c",
7202                                      lastchar, lastchar);
7203                 }
7204                 TOKEN(WORD);
7205             }
7206
7207         case KEY___FILE__:
7208             FUN0OP(
7209                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7210             );
7211
7212         case KEY___LINE__:
7213             FUN0OP(
7214                 (OP*)newSVOP(OP_CONST, 0,
7215                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7216             );
7217
7218         case KEY___PACKAGE__:
7219             FUN0OP(
7220                 (OP*)newSVOP(OP_CONST, 0,
7221                                         (PL_curstash
7222                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7223                                          : &PL_sv_undef))
7224             );
7225
7226         case KEY___DATA__:
7227         case KEY___END__: {
7228             GV *gv;
7229             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7230                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7231                                         ? PL_curstash
7232                                         : PL_defstash;
7233                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7234                 if (!isGV(gv))
7235                     gv_init(gv,stash,"DATA",4,0);
7236                 GvMULTI_on(gv);
7237                 if (!GvIO(gv))
7238                     GvIOp(gv) = newIO();
7239                 IoIFP(GvIOp(gv)) = PL_rsfp;
7240 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7241                 {
7242                     const int fd = PerlIO_fileno(PL_rsfp);
7243                     if (fd >= 3) {
7244                         fcntl(fd,F_SETFD, FD_CLOEXEC);
7245                     }
7246                 }
7247 #endif
7248                 /* Mark this internal pseudo-handle as clean */
7249                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7250                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7251                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7252                 else
7253                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7254 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7255                 /* if the script was opened in binmode, we need to revert
7256                  * it to text mode for compatibility; but only iff it has CRs
7257                  * XXX this is a questionable hack at best. */
7258                 if (PL_bufend-PL_bufptr > 2
7259                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7260                 {
7261                     Off_t loc = 0;
7262                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7263                         loc = PerlIO_tell(PL_rsfp);
7264                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7265                     }
7266 #ifdef NETWARE
7267                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7268 #else
7269                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7270 #endif  /* NETWARE */
7271                         if (loc > 0)
7272                             PerlIO_seek(PL_rsfp, loc, 0);
7273                     }
7274                 }
7275 #endif
7276 #ifdef PERLIO_LAYERS
7277                 if (!IN_BYTES) {
7278                     if (UTF)
7279                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7280                     else if (IN_ENCODING) {
7281                         SV *name;
7282                         dSP;
7283                         ENTER;
7284                         SAVETMPS;
7285                         PUSHMARK(sp);
7286                         XPUSHs(_get_encoding());
7287                         PUTBACK;
7288                         call_method("name", G_SCALAR);
7289                         SPAGAIN;
7290                         name = POPs;
7291                         PUTBACK;
7292                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7293                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7294                                                       SVfARG(name)));
7295                         FREETMPS;
7296                         LEAVE;
7297                     }
7298                 }
7299 #endif
7300                 PL_rsfp = NULL;
7301             }
7302             goto fake_eof;
7303         }
7304
7305         case KEY___SUB__:
7306             FUN0OP(CvCLONE(PL_compcv)
7307                         ? newOP(OP_RUNCV, 0)
7308                         : newPVOP(OP_RUNCV,0,NULL));
7309
7310         case KEY_AUTOLOAD:
7311         case KEY_DESTROY:
7312         case KEY_BEGIN:
7313         case KEY_UNITCHECK:
7314         case KEY_CHECK:
7315         case KEY_INIT:
7316         case KEY_END:
7317             if (PL_expect == XSTATE) {
7318                 s = PL_bufptr;
7319                 goto really_sub;
7320             }
7321             goto just_a_word;
7322
7323         case_KEY_CORE:
7324             {
7325                 STRLEN olen = len;
7326                 d = s;
7327                 s += 2;
7328                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7329                 if ((*s == ':' && s[1] == ':')
7330                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7331                 {
7332                     s = d;
7333                     len = olen;
7334                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7335                     goto just_a_word;
7336                 }
7337                 if (!tmp)
7338                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7339                                       UTF8fARG(UTF, len, PL_tokenbuf));
7340                 if (tmp < 0)
7341                     tmp = -tmp;
7342                 else if (tmp == KEY_require || tmp == KEY_do
7343                       || tmp == KEY_glob)
7344                     /* that's a way to remember we saw "CORE::" */
7345                     orig_keyword = tmp;
7346                 goto reserved_word;
7347             }
7348
7349         case KEY_abs:
7350             UNI(OP_ABS);
7351
7352         case KEY_alarm:
7353             UNI(OP_ALARM);
7354
7355         case KEY_accept:
7356             LOP(OP_ACCEPT,XTERM);
7357
7358         case KEY_and:
7359             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7360                 return REPORT(0);
7361             OPERATOR(ANDOP);
7362
7363         case KEY_atan2:
7364             LOP(OP_ATAN2,XTERM);
7365
7366         case KEY_bind:
7367             LOP(OP_BIND,XTERM);
7368
7369         case KEY_binmode:
7370             LOP(OP_BINMODE,XTERM);
7371
7372         case KEY_bless:
7373             LOP(OP_BLESS,XTERM);
7374
7375         case KEY_break:
7376             FUN0(OP_BREAK);
7377
7378         case KEY_chop:
7379             UNI(OP_CHOP);
7380
7381         case KEY_continue:
7382                     /* We have to disambiguate the two senses of
7383                       "continue". If the next token is a '{' then
7384                       treat it as the start of a continue block;
7385                       otherwise treat it as a control operator.
7386                      */
7387                     s = skipspace(s);
7388                     if (*s == '{')
7389             PREBLOCK(CONTINUE);
7390                     else
7391                         FUN0(OP_CONTINUE);
7392
7393         case KEY_chdir:
7394             /* may use HOME */
7395             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7396             UNI(OP_CHDIR);
7397
7398         case KEY_close:
7399             UNI(OP_CLOSE);
7400
7401         case KEY_closedir:
7402             UNI(OP_CLOSEDIR);
7403
7404         case KEY_cmp:
7405             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7406                 return REPORT(0);
7407             Eop(OP_SCMP);
7408
7409         case KEY_caller:
7410             UNI(OP_CALLER);
7411
7412         case KEY_crypt:
7413 #ifdef FCRYPT
7414             if (!PL_cryptseen) {
7415                 PL_cryptseen = TRUE;
7416                 init_des();
7417             }
7418 #endif
7419             LOP(OP_CRYPT,XTERM);
7420
7421         case KEY_chmod:
7422             LOP(OP_CHMOD,XTERM);
7423
7424         case KEY_chown:
7425             LOP(OP_CHOWN,XTERM);
7426
7427         case KEY_connect:
7428             LOP(OP_CONNECT,XTERM);
7429
7430         case KEY_chr:
7431             UNI(OP_CHR);
7432
7433         case KEY_cos:
7434             UNI(OP_COS);
7435
7436         case KEY_chroot:
7437             UNI(OP_CHROOT);
7438
7439         case KEY_default:
7440             PREBLOCK(DEFAULT);
7441
7442         case KEY_do:
7443             s = skipspace(s);
7444             if (*s == '{')
7445                 PRETERMBLOCK(DO);
7446             if (*s != '\'') {
7447                 *PL_tokenbuf = '&';
7448                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7449                               1, &len);
7450                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7451                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7452                     d = skipspace(d);
7453                     if (*d == '(') {
7454                         force_ident_maybe_lex('&');
7455                         s = d;
7456                     }
7457                 }
7458             }
7459             if (orig_keyword == KEY_do) {
7460                 orig_keyword = 0;
7461                 pl_yylval.ival = 1;
7462             }
7463             else
7464                 pl_yylval.ival = 0;
7465             OPERATOR(DO);
7466
7467         case KEY_die:
7468             PL_hints |= HINT_BLOCK_SCOPE;
7469             LOP(OP_DIE,XTERM);
7470
7471         case KEY_defined:
7472             UNI(OP_DEFINED);
7473
7474         case KEY_delete:
7475             UNI(OP_DELETE);
7476
7477         case KEY_dbmopen:
7478             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7479                               STR_WITH_LEN("NDBM_File::"),
7480                               STR_WITH_LEN("DB_File::"),
7481                               STR_WITH_LEN("GDBM_File::"),
7482                               STR_WITH_LEN("SDBM_File::"),
7483                               STR_WITH_LEN("ODBM_File::"),
7484                               NULL);
7485             LOP(OP_DBMOPEN,XTERM);
7486
7487         case KEY_dbmclose:
7488             UNI(OP_DBMCLOSE);
7489
7490         case KEY_dump:
7491             LOOPX(OP_DUMP);
7492
7493         case KEY_else:
7494             PREBLOCK(ELSE);
7495
7496         case KEY_elsif:
7497             pl_yylval.ival = CopLINE(PL_curcop);
7498             OPERATOR(ELSIF);
7499
7500         case KEY_eq:
7501             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7502                 return REPORT(0);
7503             Eop(OP_SEQ);
7504
7505         case KEY_exists:
7506             UNI(OP_EXISTS);
7507
7508         case KEY_exit:
7509             UNI(OP_EXIT);
7510
7511         case KEY_eval:
7512             s = skipspace(s);
7513             if (*s == '{') { /* block eval */
7514                 PL_expect = XTERMBLOCK;
7515                 UNIBRACK(OP_ENTERTRY);
7516             }
7517             else { /* string eval */
7518                 PL_expect = XTERM;
7519                 UNIBRACK(OP_ENTEREVAL);
7520             }
7521
7522         case KEY_evalbytes:
7523             PL_expect = XTERM;
7524             UNIBRACK(-OP_ENTEREVAL);
7525
7526         case KEY_eof:
7527             UNI(OP_EOF);
7528
7529         case KEY_exp:
7530             UNI(OP_EXP);
7531
7532         case KEY_each:
7533             UNI(OP_EACH);
7534
7535         case KEY_exec:
7536             LOP(OP_EXEC,XREF);
7537
7538         case KEY_endhostent:
7539             FUN0(OP_EHOSTENT);
7540
7541         case KEY_endnetent:
7542             FUN0(OP_ENETENT);
7543
7544         case KEY_endservent:
7545             FUN0(OP_ESERVENT);
7546
7547         case KEY_endprotoent:
7548             FUN0(OP_EPROTOENT);
7549
7550         case KEY_endpwent:
7551             FUN0(OP_EPWENT);
7552
7553         case KEY_endgrent:
7554             FUN0(OP_EGRENT);
7555
7556         case KEY_for:
7557         case KEY_foreach:
7558             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7559                 return REPORT(0);
7560             pl_yylval.ival = CopLINE(PL_curcop);
7561             s = skipspace(s);
7562             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7563                 char *p = s;
7564
7565                 if ((PL_bufend - p) >= 3
7566                     && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7567                 {
7568                     p += 2;
7569                 }
7570                 else if ((PL_bufend - p) >= 4
7571                          && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7572                     p += 3;
7573                 p = skipspace(p);
7574                 /* skip optional package name, as in "for my abc $x (..)" */
7575                 if (isIDFIRST_lazy_if(p,UTF)) {
7576                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7577                     p = skipspace(p);
7578                 }
7579                 if (*p != '$')
7580                     Perl_croak(aTHX_ "Missing $ on loop variable");
7581             }
7582             OPERATOR(FOR);
7583
7584         case KEY_formline:
7585             LOP(OP_FORMLINE,XTERM);
7586
7587         case KEY_fork:
7588             FUN0(OP_FORK);
7589
7590         case KEY_fc:
7591             UNI(OP_FC);
7592
7593         case KEY_fcntl:
7594             LOP(OP_FCNTL,XTERM);
7595
7596         case KEY_fileno:
7597             UNI(OP_FILENO);
7598
7599         case KEY_flock:
7600             LOP(OP_FLOCK,XTERM);
7601
7602         case KEY_gt:
7603             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7604                 return REPORT(0);
7605             Rop(OP_SGT);
7606
7607         case KEY_ge:
7608             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7609                 return REPORT(0);
7610             Rop(OP_SGE);
7611
7612         case KEY_grep:
7613             LOP(OP_GREPSTART, XREF);
7614
7615         case KEY_goto:
7616             LOOPX(OP_GOTO);
7617
7618         case KEY_gmtime:
7619             UNI(OP_GMTIME);
7620
7621         case KEY_getc:
7622             UNIDOR(OP_GETC);
7623
7624         case KEY_getppid:
7625             FUN0(OP_GETPPID);
7626
7627         case KEY_getpgrp:
7628             UNI(OP_GETPGRP);
7629
7630         case KEY_getpriority:
7631             LOP(OP_GETPRIORITY,XTERM);
7632
7633         case KEY_getprotobyname:
7634             UNI(OP_GPBYNAME);
7635
7636         case KEY_getprotobynumber:
7637             LOP(OP_GPBYNUMBER,XTERM);
7638
7639         case KEY_getprotoent:
7640             FUN0(OP_GPROTOENT);
7641
7642         case KEY_getpwent:
7643             FUN0(OP_GPWENT);
7644
7645         case KEY_getpwnam:
7646             UNI(OP_GPWNAM);
7647
7648         case KEY_getpwuid:
7649             UNI(OP_GPWUID);
7650
7651         case KEY_getpeername:
7652             UNI(OP_GETPEERNAME);
7653
7654         case KEY_gethostbyname:
7655             UNI(OP_GHBYNAME);
7656
7657         case KEY_gethostbyaddr:
7658             LOP(OP_GHBYADDR,XTERM);
7659
7660         case KEY_gethostent:
7661             FUN0(OP_GHOSTENT);
7662
7663         case KEY_getnetbyname:
7664             UNI(OP_GNBYNAME);
7665
7666         case KEY_getnetbyaddr:
7667             LOP(OP_GNBYADDR,XTERM);
7668
7669         case KEY_getnetent:
7670             FUN0(OP_GNETENT);
7671
7672         case KEY_getservbyname:
7673             LOP(OP_GSBYNAME,XTERM);
7674
7675         case KEY_getservbyport:
7676             LOP(OP_GSBYPORT,XTERM);
7677
7678         case KEY_getservent:
7679             FUN0(OP_GSERVENT);
7680
7681         case KEY_getsockname:
7682             UNI(OP_GETSOCKNAME);
7683
7684         case KEY_getsockopt:
7685             LOP(OP_GSOCKOPT,XTERM);
7686
7687         case KEY_getgrent:
7688             FUN0(OP_GGRENT);
7689
7690         case KEY_getgrnam:
7691             UNI(OP_GGRNAM);
7692
7693         case KEY_getgrgid:
7694             UNI(OP_GGRGID);
7695
7696         case KEY_getlogin:
7697             FUN0(OP_GETLOGIN);
7698
7699         case KEY_given:
7700             pl_yylval.ival = CopLINE(PL_curcop);
7701             Perl_ck_warner_d(aTHX_
7702                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7703                 "given is experimental");
7704             OPERATOR(GIVEN);
7705
7706         case KEY_glob:
7707             LOP(
7708              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7709              XTERM
7710             );
7711
7712         case KEY_hex:
7713             UNI(OP_HEX);
7714
7715         case KEY_if:
7716             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7717                 return REPORT(0);
7718             pl_yylval.ival = CopLINE(PL_curcop);
7719             OPERATOR(IF);
7720
7721         case KEY_index:
7722             LOP(OP_INDEX,XTERM);
7723
7724         case KEY_int:
7725             UNI(OP_INT);
7726
7727         case KEY_ioctl:
7728             LOP(OP_IOCTL,XTERM);
7729
7730         case KEY_join:
7731             LOP(OP_JOIN,XTERM);
7732
7733         case KEY_keys:
7734             UNI(OP_KEYS);
7735
7736         case KEY_kill:
7737             LOP(OP_KILL,XTERM);
7738
7739         case KEY_last:
7740             LOOPX(OP_LAST);
7741
7742         case KEY_lc:
7743             UNI(OP_LC);
7744
7745         case KEY_lcfirst:
7746             UNI(OP_LCFIRST);
7747
7748         case KEY_local:
7749             OPERATOR(LOCAL);
7750
7751         case KEY_length:
7752             UNI(OP_LENGTH);
7753
7754         case KEY_lt:
7755             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7756                 return REPORT(0);
7757             Rop(OP_SLT);
7758
7759         case KEY_le:
7760             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7761                 return REPORT(0);
7762             Rop(OP_SLE);
7763
7764         case KEY_localtime:
7765             UNI(OP_LOCALTIME);
7766
7767         case KEY_log:
7768             UNI(OP_LOG);
7769
7770         case KEY_link:
7771             LOP(OP_LINK,XTERM);
7772
7773         case KEY_listen:
7774             LOP(OP_LISTEN,XTERM);
7775
7776         case KEY_lock:
7777             UNI(OP_LOCK);
7778
7779         case KEY_lstat:
7780             UNI(OP_LSTAT);
7781
7782         case KEY_m:
7783             s = scan_pat(s,OP_MATCH);
7784             TERM(sublex_start());
7785
7786         case KEY_map:
7787             LOP(OP_MAPSTART, XREF);
7788
7789         case KEY_mkdir:
7790             LOP(OP_MKDIR,XTERM);
7791
7792         case KEY_msgctl:
7793             LOP(OP_MSGCTL,XTERM);
7794
7795         case KEY_msgget:
7796             LOP(OP_MSGGET,XTERM);
7797
7798         case KEY_msgrcv:
7799             LOP(OP_MSGRCV,XTERM);
7800
7801         case KEY_msgsnd:
7802             LOP(OP_MSGSND,XTERM);
7803
7804         case KEY_our:
7805         case KEY_my:
7806         case KEY_state:
7807             if (PL_in_my) {
7808                 PL_bufptr = s;
7809                 yyerror(Perl_form(aTHX_
7810                                   "Can't redeclare \"%s\" in \"%s\"",
7811                                    tmp      == KEY_my    ? "my" :
7812                                    tmp      == KEY_state ? "state" : "our",
7813                                    PL_in_my == KEY_my    ? "my" :
7814                                    PL_in_my == KEY_state ? "state" : "our"));
7815             }
7816             PL_in_my = (U16)tmp;
7817             s = skipspace(s);
7818             if (isIDFIRST_lazy_if(s,UTF)) {
7819                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7820                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7821                     goto really_sub;
7822                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7823                 if (!PL_in_my_stash) {
7824                     char tmpbuf[1024];
7825                     int len;
7826                     PL_bufptr = s;
7827                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7828                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7829                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7830                 }
7831             }
7832             OPERATOR(MY);
7833
7834         case KEY_next:
7835             LOOPX(OP_NEXT);
7836
7837         case KEY_ne:
7838             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7839                 return REPORT(0);
7840             Eop(OP_SNE);
7841
7842         case KEY_no:
7843             s = tokenize_use(0, s);
7844             TOKEN(USE);
7845
7846         case KEY_not:
7847             if (*s == '(' || (s = skipspace(s), *s == '('))
7848                 FUN1(OP_NOT);
7849             else {
7850                 if (!PL_lex_allbrackets
7851                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7852                 {
7853                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7854                 }
7855                 OPERATOR(NOTOP);
7856             }
7857
7858         case KEY_open:
7859             s = skipspace(s);
7860             if (isIDFIRST_lazy_if(s,UTF)) {
7861           const char *t;
7862           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7863               &len);
7864                 for (t=d; isSPACE(*t);)
7865                     t++;
7866                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7867                     /* [perl #16184] */
7868                     && !(t[0] == '=' && t[1] == '>')
7869                     && !(t[0] == ':' && t[1] == ':')
7870                     && !keyword(s, d-s, 0)
7871                 ) {
7872                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7873                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7874                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7875                 }
7876             }
7877             LOP(OP_OPEN,XTERM);
7878
7879         case KEY_or:
7880             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7881                 return REPORT(0);
7882             pl_yylval.ival = OP_OR;
7883             OPERATOR(OROP);
7884
7885         case KEY_ord:
7886             UNI(OP_ORD);
7887
7888         case KEY_oct:
7889             UNI(OP_OCT);
7890
7891         case KEY_opendir:
7892             LOP(OP_OPEN_DIR,XTERM);
7893
7894         case KEY_print:
7895             checkcomma(s,PL_tokenbuf,"filehandle");
7896             LOP(OP_PRINT,XREF);
7897
7898         case KEY_printf:
7899             checkcomma(s,PL_tokenbuf,"filehandle");
7900             LOP(OP_PRTF,XREF);
7901
7902         case KEY_prototype:
7903             UNI(OP_PROTOTYPE);
7904
7905         case KEY_push:
7906             LOP(OP_PUSH,XTERM);
7907
7908         case KEY_pop:
7909             UNIDOR(OP_POP);
7910
7911         case KEY_pos:
7912             UNIDOR(OP_POS);
7913
7914         case KEY_pack:
7915             LOP(OP_PACK,XTERM);
7916
7917         case KEY_package:
7918             s = force_word(s,WORD,FALSE,TRUE);
7919             s = skipspace(s);
7920             s = force_strict_version(s);
7921             PREBLOCK(PACKAGE);
7922
7923         case KEY_pipe:
7924             LOP(OP_PIPE_OP,XTERM);
7925
7926         case KEY_q:
7927             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7928             if (!s)
7929                 missingterm(NULL);
7930             COPLINE_SET_FROM_MULTI_END;
7931             pl_yylval.ival = OP_CONST;
7932             TERM(sublex_start());
7933
7934         case KEY_quotemeta:
7935             UNI(OP_QUOTEMETA);
7936
7937         case KEY_qw: {
7938             OP *words = NULL;
7939             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7940             if (!s)
7941                 missingterm(NULL);
7942             COPLINE_SET_FROM_MULTI_END;
7943             PL_expect = XOPERATOR;
7944             if (SvCUR(PL_lex_stuff)) {
7945                 int warned_comma = !ckWARN(WARN_QW);
7946                 int warned_comment = warned_comma;
7947                 d = SvPV_force(PL_lex_stuff, len);
7948                 while (len) {
7949                     for (; isSPACE(*d) && len; --len, ++d)
7950                         /**/;
7951                     if (len) {
7952                         SV *sv;
7953                         const char *b = d;
7954                         if (!warned_comma || !warned_comment) {
7955                             for (; !isSPACE(*d) && len; --len, ++d) {
7956                                 if (!warned_comma && *d == ',') {
7957                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7958                                         "Possible attempt to separate words with commas");
7959                                     ++warned_comma;
7960                                 }
7961                                 else if (!warned_comment && *d == '#') {
7962                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7963                                         "Possible attempt to put comments in qw() list");
7964                                     ++warned_comment;
7965                                 }
7966                             }
7967                         }
7968                         else {
7969                             for (; !isSPACE(*d) && len; --len, ++d)
7970                                 /**/;
7971                         }
7972                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7973                         words = op_append_elem(OP_LIST, words,
7974                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7975                     }
7976                 }
7977             }
7978             if (!words)
7979                 words = newNULLLIST();
7980             SvREFCNT_dec_NN(PL_lex_stuff);
7981             PL_lex_stuff = NULL;
7982             PL_expect = XOPERATOR;
7983             pl_yylval.opval = sawparens(words);
7984             TOKEN(QWLIST);
7985         }
7986
7987         case KEY_qq:
7988             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7989             if (!s)
7990                 missingterm(NULL);
7991             pl_yylval.ival = OP_STRINGIFY;
7992             if (SvIVX(PL_lex_stuff) == '\'')
7993                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7994             TERM(sublex_start());
7995
7996         case KEY_qr:
7997             s = scan_pat(s,OP_QR);
7998             TERM(sublex_start());
7999
8000         case KEY_qx:
8001             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8002             if (!s)
8003                 missingterm(NULL);
8004             pl_yylval.ival = OP_BACKTICK;
8005             TERM(sublex_start());
8006
8007         case KEY_return:
8008             OLDLOP(OP_RETURN);
8009
8010         case KEY_require:
8011             s = skipspace(s);
8012             if (isDIGIT(*s)) {
8013                 s = force_version(s, FALSE);
8014             }
8015             else if (*s != 'v' || !isDIGIT(s[1])
8016                     || (s = force_version(s, TRUE), *s == 'v'))
8017             {
8018                 *PL_tokenbuf = '\0';
8019                 s = force_word(s,WORD,TRUE,TRUE);
8020                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8021                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8022                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8023                 else if (*s == '<')
8024                     yyerror("<> at require-statement should be quotes");
8025             }
8026             if (orig_keyword == KEY_require) {
8027                 orig_keyword = 0;
8028                 pl_yylval.ival = 1;
8029             }
8030             else
8031                 pl_yylval.ival = 0;
8032             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8033             PL_bufptr = s;
8034             PL_last_uni = PL_oldbufptr;
8035             PL_last_lop_op = OP_REQUIRE;
8036             s = skipspace(s);
8037             return REPORT( (int)REQUIRE );
8038
8039         case KEY_reset:
8040             UNI(OP_RESET);
8041
8042         case KEY_redo:
8043             LOOPX(OP_REDO);
8044
8045         case KEY_rename:
8046             LOP(OP_RENAME,XTERM);
8047
8048         case KEY_rand:
8049             UNI(OP_RAND);
8050
8051         case KEY_rmdir:
8052             UNI(OP_RMDIR);
8053
8054         case KEY_rindex:
8055             LOP(OP_RINDEX,XTERM);
8056
8057         case KEY_read:
8058             LOP(OP_READ,XTERM);
8059
8060         case KEY_readdir:
8061             UNI(OP_READDIR);
8062
8063         case KEY_readline:
8064             UNIDOR(OP_READLINE);
8065
8066         case KEY_readpipe:
8067             UNIDOR(OP_BACKTICK);
8068
8069         case KEY_rewinddir:
8070             UNI(OP_REWINDDIR);
8071
8072         case KEY_recv:
8073             LOP(OP_RECV,XTERM);
8074
8075         case KEY_reverse:
8076             LOP(OP_REVERSE,XTERM);
8077
8078         case KEY_readlink:
8079             UNIDOR(OP_READLINK);
8080
8081         case KEY_ref:
8082             UNI(OP_REF);
8083
8084         case KEY_s:
8085             s = scan_subst(s);
8086             if (pl_yylval.opval)
8087                 TERM(sublex_start());
8088             else
8089                 TOKEN(1);       /* force error */
8090
8091         case KEY_say:
8092             checkcomma(s,PL_tokenbuf,"filehandle");
8093             LOP(OP_SAY,XREF);
8094
8095         case KEY_chomp:
8096             UNI(OP_CHOMP);
8097
8098         case KEY_scalar:
8099             UNI(OP_SCALAR);
8100
8101         case KEY_select:
8102             LOP(OP_SELECT,XTERM);
8103
8104         case KEY_seek:
8105             LOP(OP_SEEK,XTERM);
8106
8107         case KEY_semctl:
8108             LOP(OP_SEMCTL,XTERM);
8109
8110         case KEY_semget:
8111             LOP(OP_SEMGET,XTERM);
8112
8113         case KEY_semop:
8114             LOP(OP_SEMOP,XTERM);
8115
8116         case KEY_send:
8117             LOP(OP_SEND,XTERM);
8118
8119         case KEY_setpgrp:
8120             LOP(OP_SETPGRP,XTERM);
8121
8122         case KEY_setpriority:
8123             LOP(OP_SETPRIORITY,XTERM);
8124
8125         case KEY_sethostent:
8126             UNI(OP_SHOSTENT);
8127
8128         case KEY_setnetent:
8129             UNI(OP_SNETENT);
8130
8131         case KEY_setservent:
8132             UNI(OP_SSERVENT);
8133
8134         case KEY_setprotoent:
8135             UNI(OP_SPROTOENT);
8136
8137         case KEY_setpwent:
8138             FUN0(OP_SPWENT);
8139
8140         case KEY_setgrent:
8141             FUN0(OP_SGRENT);
8142
8143         case KEY_seekdir:
8144             LOP(OP_SEEKDIR,XTERM);
8145
8146         case KEY_setsockopt:
8147             LOP(OP_SSOCKOPT,XTERM);
8148
8149         case KEY_shift:
8150             UNIDOR(OP_SHIFT);
8151
8152         case KEY_shmctl:
8153             LOP(OP_SHMCTL,XTERM);
8154
8155         case KEY_shmget:
8156             LOP(OP_SHMGET,XTERM);
8157
8158         case KEY_shmread:
8159             LOP(OP_SHMREAD,XTERM);
8160
8161         case KEY_shmwrite:
8162             LOP(OP_SHMWRITE,XTERM);
8163
8164         case KEY_shutdown:
8165             LOP(OP_SHUTDOWN,XTERM);
8166
8167         case KEY_sin:
8168             UNI(OP_SIN);
8169
8170         case KEY_sleep:
8171             UNI(OP_SLEEP);
8172
8173         case KEY_socket:
8174             LOP(OP_SOCKET,XTERM);
8175
8176         case KEY_socketpair:
8177             LOP(OP_SOCKPAIR,XTERM);
8178
8179         case KEY_sort:
8180             checkcomma(s,PL_tokenbuf,"subroutine name");
8181             s = skipspace(s);
8182             PL_expect = XTERM;
8183             s = force_word(s,WORD,TRUE,TRUE);
8184             LOP(OP_SORT,XREF);
8185
8186         case KEY_split:
8187             LOP(OP_SPLIT,XTERM);
8188
8189         case KEY_sprintf:
8190             LOP(OP_SPRINTF,XTERM);
8191
8192         case KEY_splice:
8193             LOP(OP_SPLICE,XTERM);
8194
8195         case KEY_sqrt:
8196             UNI(OP_SQRT);
8197
8198         case KEY_srand:
8199             UNI(OP_SRAND);
8200
8201         case KEY_stat:
8202             UNI(OP_STAT);
8203
8204         case KEY_study:
8205             UNI(OP_STUDY);
8206
8207         case KEY_substr:
8208             LOP(OP_SUBSTR,XTERM);
8209
8210         case KEY_format:
8211         case KEY_sub:
8212           really_sub:
8213             {
8214                 char * const tmpbuf = PL_tokenbuf + 1;
8215                 expectation attrful;
8216                 bool have_name, have_proto;
8217                 const int key = tmp;
8218                 SV *format_name = NULL;
8219
8220                 d = s;
8221                 s = skipspace(s);
8222
8223                 if (isIDFIRST_lazy_if(s,UTF)
8224                     || *s == '\''
8225                     || (*s == ':' && s[1] == ':'))
8226                 {
8227
8228                     PL_expect = XBLOCK;
8229                     attrful = XATTRBLOCK;
8230                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8231                                   &len);
8232                     if (key == KEY_format)
8233                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8234                     *PL_tokenbuf = '&';
8235                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8236                      || pad_findmy_pvn(
8237                             PL_tokenbuf, len + 1, 0
8238                         ) != NOT_IN_PAD)
8239                         sv_setpvn(PL_subname, tmpbuf, len);
8240                     else {
8241                         sv_setsv(PL_subname,PL_curstname);
8242                         sv_catpvs(PL_subname,"::");
8243                         sv_catpvn(PL_subname,tmpbuf,len);
8244                     }
8245                     if (SvUTF8(PL_linestr))
8246                         SvUTF8_on(PL_subname);
8247                     have_name = TRUE;
8248
8249
8250                     s = skipspace(d);
8251                 }
8252                 else {
8253                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8254                     {
8255                         *d = '\0';
8256                         /* diag_listed_as: Missing name in "%s sub" */
8257                         Perl_croak(aTHX_
8258                                   "Missing name in \"%s\"", PL_bufptr);
8259                     }
8260                     PL_expect = XTERMBLOCK;
8261                     attrful = XATTRTERM;
8262                     sv_setpvs(PL_subname,"?");
8263                     have_name = FALSE;
8264                 }
8265
8266                 if (key == KEY_format) {
8267                     if (format_name) {
8268                         NEXTVAL_NEXTTOKE.opval
8269                             = (OP*)newSVOP(OP_CONST,0, format_name);
8270                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8271                         force_next(WORD);
8272                     }
8273                     PREBLOCK(FORMAT);
8274                 }
8275
8276                 /* Look for a prototype */
8277                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8278                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8279                     COPLINE_SET_FROM_MULTI_END;
8280                     if (!s)
8281                         Perl_croak(aTHX_ "Prototype not terminated");
8282                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8283                     have_proto = TRUE;
8284
8285                     s = skipspace(s);
8286                 }
8287                 else
8288                     have_proto = FALSE;
8289
8290                 if (*s == ':' && s[1] != ':')
8291                     PL_expect = attrful;
8292                 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8293                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8294                            key == KEY_DESTROY || key == KEY_BEGIN ||
8295                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8296                            key == KEY_INIT || key == KEY_END ||
8297                            key == KEY_my || key == KEY_state ||
8298                            key == KEY_our);
8299                     if (!have_name)
8300                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8301                     else if (*s != ';' && *s != '}')
8302                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8303                 }
8304
8305                 if (have_proto) {
8306                     NEXTVAL_NEXTTOKE.opval =
8307                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8308                     PL_lex_stuff = NULL;
8309                     force_next(THING);
8310                 }
8311                 if (!have_name) {
8312                     if (PL_curstash)
8313                         sv_setpvs(PL_subname, "__ANON__");
8314                     else
8315                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8316                     TOKEN(ANONSUB);
8317                 }
8318                 force_ident_maybe_lex('&');
8319                 TOKEN(SUB);
8320             }
8321
8322         case KEY_system:
8323             LOP(OP_SYSTEM,XREF);
8324
8325         case KEY_symlink:
8326             LOP(OP_SYMLINK,XTERM);
8327
8328         case KEY_syscall:
8329             LOP(OP_SYSCALL,XTERM);
8330
8331         case KEY_sysopen:
8332             LOP(OP_SYSOPEN,XTERM);
8333
8334         case KEY_sysseek:
8335             LOP(OP_SYSSEEK,XTERM);
8336
8337         case KEY_sysread:
8338             LOP(OP_SYSREAD,XTERM);
8339
8340         case KEY_syswrite:
8341             LOP(OP_SYSWRITE,XTERM);
8342
8343         case KEY_tr:
8344         case KEY_y:
8345             s = scan_trans(s);
8346             TERM(sublex_start());
8347
8348         case KEY_tell:
8349             UNI(OP_TELL);
8350
8351         case KEY_telldir:
8352             UNI(OP_TELLDIR);
8353
8354         case KEY_tie:
8355             LOP(OP_TIE,XTERM);
8356
8357         case KEY_tied:
8358             UNI(OP_TIED);
8359
8360         case KEY_time:
8361             FUN0(OP_TIME);
8362
8363         case KEY_times:
8364             FUN0(OP_TMS);
8365
8366         case KEY_truncate:
8367             LOP(OP_TRUNCATE,XTERM);
8368
8369         case KEY_uc:
8370             UNI(OP_UC);
8371
8372         case KEY_ucfirst:
8373             UNI(OP_UCFIRST);
8374
8375         case KEY_untie:
8376             UNI(OP_UNTIE);
8377
8378         case KEY_until:
8379             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8380                 return REPORT(0);
8381             pl_yylval.ival = CopLINE(PL_curcop);
8382             OPERATOR(UNTIL);
8383
8384         case KEY_unless:
8385             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8386                 return REPORT(0);
8387             pl_yylval.ival = CopLINE(PL_curcop);
8388             OPERATOR(UNLESS);
8389
8390         case KEY_unlink:
8391             LOP(OP_UNLINK,XTERM);
8392
8393         case KEY_undef:
8394             UNIDOR(OP_UNDEF);
8395
8396         case KEY_unpack:
8397             LOP(OP_UNPACK,XTERM);
8398
8399         case KEY_utime:
8400             LOP(OP_UTIME,XTERM);
8401
8402         case KEY_umask:
8403             UNIDOR(OP_UMASK);
8404
8405         case KEY_unshift:
8406             LOP(OP_UNSHIFT,XTERM);
8407
8408         case KEY_use:
8409             s = tokenize_use(1, s);
8410             TOKEN(USE);
8411
8412         case KEY_values:
8413             UNI(OP_VALUES);
8414
8415         case KEY_vec:
8416             LOP(OP_VEC,XTERM);
8417
8418         case KEY_when:
8419             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8420                 return REPORT(0);
8421             pl_yylval.ival = CopLINE(PL_curcop);
8422             Perl_ck_warner_d(aTHX_
8423                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8424                 "when is experimental");
8425             OPERATOR(WHEN);
8426
8427         case KEY_while:
8428             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8429                 return REPORT(0);
8430             pl_yylval.ival = CopLINE(PL_curcop);
8431             OPERATOR(WHILE);
8432
8433         case KEY_warn:
8434             PL_hints |= HINT_BLOCK_SCOPE;
8435             LOP(OP_WARN,XTERM);
8436
8437         case KEY_wait:
8438             FUN0(OP_WAIT);
8439
8440         case KEY_waitpid:
8441             LOP(OP_WAITPID,XTERM);
8442
8443         case KEY_wantarray:
8444             FUN0(OP_WANTARRAY);
8445
8446         case KEY_write:
8447             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8448              * we use the same number on EBCDIC */
8449             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8450             UNI(OP_ENTERWRITE);
8451
8452         case KEY_x:
8453             if (PL_expect == XOPERATOR) {
8454                 if (*s == '=' && !PL_lex_allbrackets
8455                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8456                 {
8457                     return REPORT(0);
8458                 }
8459                 Mop(OP_REPEAT);
8460             }
8461             check_uni();
8462             goto just_a_word;
8463
8464         case KEY_xor:
8465             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8466                 return REPORT(0);
8467             pl_yylval.ival = OP_XOR;
8468             OPERATOR(OROP);
8469         }
8470     }}
8471 }
8472
8473 /*
8474   S_pending_ident
8475
8476   Looks up an identifier in the pad or in a package
8477
8478   Returns:
8479     PRIVATEREF if this is a lexical name.
8480     WORD       if this belongs to a package.
8481
8482   Structure:
8483       if we're in a my declaration
8484           croak if they tried to say my($foo::bar)
8485           build the ops for a my() declaration
8486       if it's an access to a my() variable
8487           build ops for access to a my() variable
8488       if in a dq string, and they've said @foo and we can't find @foo
8489           warn
8490       build ops for a bareword
8491 */
8492
8493 static int
8494 S_pending_ident(pTHX)
8495 {
8496     PADOFFSET tmp = 0;
8497     const char pit = (char)pl_yylval.ival;
8498     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8499     /* All routes through this function want to know if there is a colon.  */
8500     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8501
8502     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8503           "### Pending identifier '%s'\n", PL_tokenbuf); });
8504
8505     /* if we're in a my(), we can't allow dynamics here.
8506        $foo'bar has already been turned into $foo::bar, so
8507        just check for colons.
8508
8509        if it's a legal name, the OP is a PADANY.
8510     */
8511     if (PL_in_my) {
8512         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8513             if (has_colon)
8514                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8515                                   "variable %s in \"our\"",
8516                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8517             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8518         }
8519         else {
8520             if (has_colon) {
8521                 /* "my" variable %s can't be in a package */
8522                 /* PL_no_myglob is constant */
8523                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8524                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8525                             PL_in_my == KEY_my ? "my" : "state",
8526                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8527                             PL_tokenbuf),
8528                             UTF ? SVf_UTF8 : 0);
8529                 GCC_DIAG_RESTORE;
8530             }
8531
8532             pl_yylval.opval = newOP(OP_PADANY, 0);
8533             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8534                                                         UTF ? SVf_UTF8 : 0);
8535             return PRIVATEREF;
8536         }
8537     }
8538
8539     /*
8540        build the ops for accesses to a my() variable.
8541     */
8542
8543     if (!has_colon) {
8544         if (!PL_in_my)
8545             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8546                                  0);
8547         if (tmp != NOT_IN_PAD) {
8548             /* might be an "our" variable" */
8549             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8550                 /* build ops for a bareword */
8551                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8552                 HEK * const stashname = HvNAME_HEK(stash);
8553                 SV *  const sym = newSVhek(stashname);
8554                 sv_catpvs(sym, "::");
8555                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8556                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8557                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8558                 if (pit != '&')
8559                   gv_fetchsv(sym,
8560                     GV_ADDMULTI,
8561                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8562                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8563                      : SVt_PVHV));
8564                 return WORD;
8565             }
8566
8567             pl_yylval.opval = newOP(OP_PADANY, 0);
8568             pl_yylval.opval->op_targ = tmp;
8569             return PRIVATEREF;
8570         }
8571     }
8572
8573     /*
8574        Whine if they've said @foo in a doublequoted string,
8575        and @foo isn't a variable we can find in the symbol
8576        table.
8577     */
8578     if (ckWARN(WARN_AMBIGUOUS)
8579         && pit == '@'
8580         && PL_lex_state != LEX_NORMAL
8581         && !PL_lex_brackets)
8582     {
8583         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8584                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8585         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8586                 /* DO NOT warn for @- and @+ */
8587                 && !( PL_tokenbuf[2] == '\0'
8588                       && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8589            )
8590         {
8591             /* Downgraded from fatal to warning 20000522 mjd */
8592             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8593                         "Possible unintended interpolation of %"UTF8f
8594                         " in string",
8595                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8596         }
8597     }
8598
8599     /* build ops for a bareword */
8600     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8601                                    newSVpvn_flags(PL_tokenbuf + 1,
8602                                                       tokenbuf_len - 1,
8603                                                       UTF ? SVf_UTF8 : 0 ));
8604     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8605     if (pit != '&')
8606         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8607                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8608                      | ( UTF ? SVf_UTF8 : 0 ),
8609                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8610                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8611                       : SVt_PVHV));
8612     return WORD;
8613 }
8614
8615 STATIC void
8616 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8617 {
8618     PERL_ARGS_ASSERT_CHECKCOMMA;
8619
8620     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8621         if (ckWARN(WARN_SYNTAX)) {
8622             int level = 1;
8623             const char *w;
8624             for (w = s+2; *w && level; w++) {
8625                 if (*w == '(')
8626                     ++level;
8627                 else if (*w == ')')
8628                     --level;
8629             }
8630             while (isSPACE(*w))
8631                 ++w;
8632             /* the list of chars below is for end of statements or
8633              * block / parens, boolean operators (&&, ||, //) and branch
8634              * constructs (or, and, if, until, unless, while, err, for).
8635              * Not a very solid hack... */
8636             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8637                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8638                             "%s (...) interpreted as function",name);
8639         }
8640     }
8641     while (s < PL_bufend && isSPACE(*s))
8642         s++;
8643     if (*s == '(')
8644         s++;
8645     while (s < PL_bufend && isSPACE(*s))
8646         s++;
8647     if (isIDFIRST_lazy_if(s,UTF)) {
8648         const char * const w = s;
8649         s += UTF ? UTF8SKIP(s) : 1;
8650         while (isWORDCHAR_lazy_if(s,UTF))
8651             s += UTF ? UTF8SKIP(s) : 1;
8652         while (s < PL_bufend && isSPACE(*s))
8653             s++;
8654         if (*s == ',') {
8655             GV* gv;
8656             PADOFFSET off;
8657             if (keyword(w, s - w, 0))
8658                 return;
8659
8660             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8661             if (gv && GvCVu(gv))
8662                 return;
8663             if (s - w <= 254) {
8664                 char tmpbuf[256];
8665                 Copy(w, tmpbuf+1, s - w, char);
8666                 *tmpbuf = '&';
8667                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8668                 if (off != NOT_IN_PAD) return;
8669             }
8670             Perl_croak(aTHX_ "No comma allowed after %s", what);
8671         }
8672     }
8673 }
8674
8675 /* S_new_constant(): do any overload::constant lookup.
8676
8677    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8678    Best used as sv=new_constant(..., sv, ...).
8679    If s, pv are NULL, calls subroutine with one argument,
8680    and <type> is used with error messages only.
8681    <type> is assumed to be well formed UTF-8 */
8682
8683 STATIC SV *
8684 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8685                SV *sv, SV *pv, const char *type, STRLEN typelen)
8686 {
8687     dSP;
8688     HV * table = GvHV(PL_hintgv);                /* ^H */
8689     SV *res;
8690     SV *errsv = NULL;
8691     SV **cvp;
8692     SV *cv, *typesv;
8693     const char *why1 = "", *why2 = "", *why3 = "";
8694
8695     PERL_ARGS_ASSERT_NEW_CONSTANT;
8696     /* We assume that this is true: */
8697     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8698     assert(type || s);
8699
8700     /* charnames doesn't work well if there have been errors found */
8701     if (PL_error_count > 0 && *key == 'c')
8702     {
8703         SvREFCNT_dec_NN(sv);
8704         return &PL_sv_undef;
8705     }
8706
8707     sv_2mortal(sv);                     /* Parent created it permanently */
8708     if (!table
8709         || ! (PL_hints & HINT_LOCALIZE_HH)
8710         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8711         || ! SvOK(*cvp))
8712     {
8713         char *msg;
8714
8715         /* Here haven't found what we're looking for.  If it is charnames,
8716          * perhaps it needs to be loaded.  Try doing that before giving up */
8717         if (*key == 'c') {
8718             Perl_load_module(aTHX_
8719                             0,
8720                             newSVpvs("_charnames"),
8721                              /* version parameter; no need to specify it, as if
8722                               * we get too early a version, will fail anyway,
8723                               * not being able to find '_charnames' */
8724                             NULL,
8725                             newSVpvs(":full"),
8726                             newSVpvs(":short"),
8727                             NULL);
8728             assert(sp == PL_stack_sp);
8729             table = GvHV(PL_hintgv);
8730             if (table
8731                 && (PL_hints & HINT_LOCALIZE_HH)
8732                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8733                 && SvOK(*cvp))
8734             {
8735                 goto now_ok;
8736             }
8737         }
8738         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8739             msg = Perl_form(aTHX_
8740                                "Constant(%.*s) unknown",
8741                                 (int)(type ? typelen : len),
8742                                 (type ? type: s));
8743         }
8744         else {
8745             why1 = "$^H{";
8746             why2 = key;
8747             why3 = "} is not defined";
8748         report:
8749             if (*key == 'c') {
8750                 msg = Perl_form(aTHX_
8751                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8752                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8753                       );
8754             }
8755             else {
8756                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8757                                     (int)(type ? typelen : len),
8758                                     (type ? type: s), why1, why2, why3);
8759             }
8760         }
8761         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8762         return SvREFCNT_inc_simple_NN(sv);
8763     }
8764   now_ok:
8765     cv = *cvp;
8766     if (!pv && s)
8767         pv = newSVpvn_flags(s, len, SVs_TEMP);
8768     if (type && pv)
8769         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8770     else
8771         typesv = &PL_sv_undef;
8772
8773     PUSHSTACKi(PERLSI_OVERLOAD);
8774     ENTER ;
8775     SAVETMPS;
8776
8777     PUSHMARK(SP) ;
8778     EXTEND(sp, 3);
8779     if (pv)
8780         PUSHs(pv);
8781     PUSHs(sv);
8782     if (pv)
8783         PUSHs(typesv);
8784     PUTBACK;
8785     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8786
8787     SPAGAIN ;
8788
8789     /* Check the eval first */
8790     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8791         STRLEN errlen;
8792         const char * errstr;
8793         sv_catpvs(errsv, "Propagated");
8794         errstr = SvPV_const(errsv, errlen);
8795         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8796         (void)POPs;
8797         res = SvREFCNT_inc_simple_NN(sv);
8798     }
8799     else {
8800         res = POPs;
8801         SvREFCNT_inc_simple_void_NN(res);
8802     }
8803
8804     PUTBACK ;
8805     FREETMPS ;
8806     LEAVE ;
8807     POPSTACK;
8808
8809     if (!SvOK(res)) {
8810         why1 = "Call to &{$^H{";
8811         why2 = key;
8812         why3 = "}} did not return a defined value";
8813         sv = res;
8814         (void)sv_2mortal(sv);
8815         goto report;
8816     }
8817
8818     return res;
8819 }
8820
8821 PERL_STATIC_INLINE void
8822 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8823     PERL_ARGS_ASSERT_PARSE_IDENT;
8824
8825     for (;;) {
8826         if (*d >= e)
8827             Perl_croak(aTHX_ "%s", ident_too_long);
8828         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8829              /* The UTF-8 case must come first, otherwise things
8830              * like c\N{COMBINING TILDE} would start failing, as the
8831              * isWORDCHAR_A case below would gobble the 'c' up.
8832              */
8833
8834             char *t = *s + UTF8SKIP(*s);
8835             while (isIDCONT_utf8((U8*)t))
8836                 t += UTF8SKIP(t);
8837             if (*d + (t - *s) > e)
8838                 Perl_croak(aTHX_ "%s", ident_too_long);
8839             Copy(*s, *d, t - *s, char);
8840             *d += t - *s;
8841             *s = t;
8842         }
8843         else if ( isWORDCHAR_A(**s) ) {
8844             do {
8845                 *(*d)++ = *(*s)++;
8846             } while (isWORDCHAR_A(**s) && *d < e);
8847         }
8848         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8849             *(*d)++ = ':';
8850             *(*d)++ = ':';
8851             (*s)++;
8852         }
8853         else if (allow_package && **s == ':' && (*s)[1] == ':'
8854            /* Disallow things like Foo::$bar. For the curious, this is
8855             * the code path that triggers the "Bad name after" warning
8856             * when looking for barewords.
8857             */
8858            && (*s)[2] != '$') {
8859             *(*d)++ = *(*s)++;
8860             *(*d)++ = *(*s)++;
8861         }
8862         else
8863             break;
8864     }
8865     return;
8866 }
8867
8868 /* Returns a NUL terminated string, with the length of the string written to
8869    *slp
8870    */
8871 STATIC char *
8872 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8873 {
8874     char *d = dest;
8875     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8876     bool is_utf8 = cBOOL(UTF);
8877
8878     PERL_ARGS_ASSERT_SCAN_WORD;
8879
8880     parse_ident(&s, &d, e, allow_package, is_utf8);
8881     *d = '\0';
8882     *slp = d - dest;
8883     return s;
8884 }
8885
8886 /* Is the byte 'd' a legal single character identifier name?  'u' is true
8887  * iff Unicode semantics are to be used.  The legal ones are any of:
8888  *  a) all ASCII characters except:
8889  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
8890  *          2) '{'
8891  *     The final case currently doesn't get this far in the program, so we
8892  *     don't test for it.  If that were to change, it would be ok to allow it.
8893  *  b) When not under Unicode rules, any upper Latin1 character
8894  *  c) Otherwise, when unicode rules are used, all XIDS characters.
8895  *
8896  *      Because all ASCII characters have the same representation whether
8897  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8898  *      '{' without knowing if is UTF-8 or not. */
8899 #define VALID_LEN_ONE_IDENT(s, is_utf8)                                       \
8900     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8901                          ? isIDFIRST_utf8((U8*) (s))                          \
8902                          : (isGRAPH_L1(*s)                                    \
8903                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8904
8905 STATIC char *
8906 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8907 {
8908     I32 herelines = PL_parser->herelines;
8909     SSize_t bracket = -1;
8910     char funny = *s++;
8911     char *d = dest;
8912     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8913     bool is_utf8 = cBOOL(UTF);
8914     I32 orig_copline = 0, tmp_copline = 0;
8915
8916     PERL_ARGS_ASSERT_SCAN_IDENT;
8917
8918     if (isSPACE(*s) || !*s)
8919         s = skipspace(s);
8920     if (isDIGIT(*s)) {
8921         while (isDIGIT(*s)) {
8922             if (d >= e)
8923                 Perl_croak(aTHX_ "%s", ident_too_long);
8924             *d++ = *s++;
8925         }
8926     }
8927     else {  /* See if it is a "normal" identifier */
8928         parse_ident(&s, &d, e, 1, is_utf8);
8929     }
8930     *d = '\0';
8931     d = dest;
8932     if (*d) {
8933         /* Either a digit variable, or parse_ident() found an identifier
8934            (anything valid as a bareword), so job done and return.  */
8935         if (PL_lex_state != LEX_NORMAL)
8936             PL_lex_state = LEX_INTERPENDMAYBE;
8937         return s;
8938     }
8939
8940     /* Here, it is not a run-of-the-mill identifier name */
8941
8942     if (*s == '$' && s[1]
8943         && (isIDFIRST_lazy_if(s+1,is_utf8)
8944             || isDIGIT_A((U8)s[1])
8945             || s[1] == '$'
8946             || s[1] == '{'
8947             || strnEQ(s+1,"::",2)) )
8948     {
8949         /* Dereferencing a value in a scalar variable.
8950            The alternatives are different syntaxes for a scalar variable.
8951            Using ' as a leading package separator isn't allowed. :: is.   */
8952         return s;
8953     }
8954     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8955     if (*s == '{') {
8956         bracket = s - SvPVX(PL_linestr);
8957         s++;
8958         orig_copline = CopLINE(PL_curcop);
8959         if (s < PL_bufend && isSPACE(*s)) {
8960             s = skipspace(s);
8961         }
8962     }
8963     if ((s <= PL_bufend - (is_utf8)
8964                           ? UTF8SKIP(s)
8965                           : 1)
8966         && VALID_LEN_ONE_IDENT(s, is_utf8))
8967     {
8968         if (is_utf8) {
8969             const STRLEN skip = UTF8SKIP(s);
8970             STRLEN i;
8971             d[skip] = '\0';
8972             for ( i = 0; i < skip; i++ )
8973                 d[i] = *s++;
8974         }
8975         else {
8976             *d = *s++;
8977             d[1] = '\0';
8978         }
8979     }
8980     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8981     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8982         *d = toCTRL(*s);
8983         s++;
8984     }
8985     /* Warn about ambiguous code after unary operators if {...} notation isn't
8986        used.  There's no difference in ambiguity; it's merely a heuristic
8987        about when not to warn.  */
8988     else if (ck_uni && bracket == -1)
8989         check_uni();
8990     if (bracket != -1) {
8991         /* If we were processing {...} notation then...  */
8992         if (isIDFIRST_lazy_if(d,is_utf8)) {
8993             /* if it starts as a valid identifier, assume that it is one.
8994                (the later check for } being at the expected point will trap
8995                cases where this doesn't pan out.)  */
8996             d += is_utf8 ? UTF8SKIP(d) : 1;
8997             parse_ident(&s, &d, e, 1, is_utf8);
8998             *d = '\0';
8999             tmp_copline = CopLINE(PL_curcop);
9000             if (s < PL_bufend && isSPACE(*s)) {
9001                 s = skipspace(s);
9002             }
9003             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9004                 /* ${foo[0]} and ${foo{bar}} notation.  */
9005                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9006                     const char * const brack =
9007                         (const char *)
9008                         ((*s == '[') ? "[...]" : "{...}");
9009                     orig_copline = CopLINE(PL_curcop);
9010                     CopLINE_set(PL_curcop, tmp_copline);
9011    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9012                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9013                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9014                         funny, dest, brack, funny, dest, brack);
9015                     CopLINE_set(PL_curcop, orig_copline);
9016                 }
9017                 bracket++;
9018                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9019                 PL_lex_allbrackets++;
9020                 return s;
9021             }
9022         }
9023         /* Handle extended ${^Foo} variables
9024          * 1999-02-27 mjd-perl-patch@plover.com */
9025         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9026                  && isWORDCHAR(*s))
9027         {
9028             d++;
9029             while (isWORDCHAR(*s) && d < e) {
9030                 *d++ = *s++;
9031             }
9032             if (d >= e)
9033                 Perl_croak(aTHX_ "%s", ident_too_long);
9034             *d = '\0';
9035         }
9036
9037         if ( !tmp_copline )
9038             tmp_copline = CopLINE(PL_curcop);
9039         if (s < PL_bufend && isSPACE(*s)) {
9040             s = skipspace(s);
9041         }
9042
9043         /* Expect to find a closing } after consuming any trailing whitespace.
9044          */
9045         if (*s == '}') {
9046             s++;
9047             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9048                 PL_lex_state = LEX_INTERPEND;
9049                 PL_expect = XREF;
9050             }
9051             if (PL_lex_state == LEX_NORMAL) {
9052                 if (ckWARN(WARN_AMBIGUOUS)
9053                     && (keyword(dest, d - dest, 0)
9054                         || get_cvn_flags(dest, d - dest, is_utf8
9055                            ? SVf_UTF8
9056                            : 0)))
9057                 {
9058                     SV *tmp = newSVpvn_flags( dest, d - dest,
9059                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9060                     if (funny == '#')
9061                         funny = '@';
9062                     orig_copline = CopLINE(PL_curcop);
9063                     CopLINE_set(PL_curcop, tmp_copline);
9064                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9065                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9066                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9067                     CopLINE_set(PL_curcop, orig_copline);
9068                 }
9069             }
9070         }
9071         else {
9072             /* Didn't find the closing } at the point we expected, so restore
9073                state such that the next thing to process is the opening { and */
9074             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9075             CopLINE_set(PL_curcop, orig_copline);
9076             PL_parser->herelines = herelines;
9077             *dest = '\0';
9078         }
9079     }
9080     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9081         PL_lex_state = LEX_INTERPEND;
9082     return s;
9083 }
9084
9085 static bool
9086 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9087
9088     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9089      * found in the parse starting at 's', based on the subset that are valid
9090      * in this context input to this routine in 'valid_flags'. Advances s.
9091      * Returns TRUE if the input should be treated as a valid flag, so the next
9092      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9093      * upon first call on the current regex.  This routine will set it to any
9094      * charset modifier found.  The caller shouldn't change it.  This way,
9095      * another charset modifier encountered in the parse can be detected as an
9096      * error, as we have decided to allow only one */
9097
9098     const char c = **s;
9099     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9100
9101     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9102         if (isWORDCHAR_lazy_if(*s, UTF)) {
9103             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9104                        UTF ? SVf_UTF8 : 0);
9105             (*s) += charlen;
9106             /* Pretend that it worked, so will continue processing before
9107              * dieing */
9108             return TRUE;
9109         }
9110         return FALSE;
9111     }
9112
9113     switch (c) {
9114
9115         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9116         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9117         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9118         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9119         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9120         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9121         case LOCALE_PAT_MOD:
9122             if (*charset) {
9123                 goto multiple_charsets;
9124             }
9125             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9126             *charset = c;
9127             break;
9128         case UNICODE_PAT_MOD:
9129             if (*charset) {
9130                 goto multiple_charsets;
9131             }
9132             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9133             *charset = c;
9134             break;
9135         case ASCII_RESTRICT_PAT_MOD:
9136             if (! *charset) {
9137                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9138             }
9139             else {
9140
9141                 /* Error if previous modifier wasn't an 'a', but if it was, see
9142                  * if, and accept, a second occurrence (only) */
9143                 if (*charset != 'a'
9144                     || get_regex_charset(*pmfl)
9145                         != REGEX_ASCII_RESTRICTED_CHARSET)
9146                 {
9147                         goto multiple_charsets;
9148                 }
9149                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9150             }
9151             *charset = c;
9152             break;
9153         case DEPENDS_PAT_MOD:
9154             if (*charset) {
9155                 goto multiple_charsets;
9156             }
9157             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9158             *charset = c;
9159             break;
9160     }
9161
9162     (*s)++;
9163     return TRUE;
9164
9165     multiple_charsets:
9166         if (*charset != c) {
9167             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9168         }
9169         else if (c == 'a') {
9170   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9171             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9172         }
9173         else {
9174             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9175         }
9176
9177         /* Pretend that it worked, so will continue processing before dieing */
9178         (*s)++;
9179         return TRUE;
9180 }
9181
9182 STATIC char *
9183 S_scan_pat(pTHX_ char *start, I32 type)
9184 {
9185     PMOP *pm;
9186     char *s;
9187     const char * const valid_flags =
9188         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9189     char charset = '\0';    /* character set modifier */
9190     unsigned int x_mod_count = 0;
9191
9192     PERL_ARGS_ASSERT_SCAN_PAT;
9193
9194     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9195     if (!s)
9196         Perl_croak(aTHX_ "Search pattern not terminated");
9197
9198     pm = (PMOP*)newPMOP(type, 0);
9199     if (PL_multi_open == '?') {
9200         /* This is the only point in the code that sets PMf_ONCE:  */
9201         pm->op_pmflags |= PMf_ONCE;
9202
9203         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9204            allows us to restrict the list needed by reset to just the ??
9205            matches.  */
9206         assert(type != OP_TRANS);
9207         if (PL_curstash) {
9208             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9209             U32 elements;
9210             if (!mg) {
9211                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9212                                  0);
9213             }
9214             elements = mg->mg_len / sizeof(PMOP**);
9215             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9216             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9217             mg->mg_len = elements * sizeof(PMOP**);
9218             PmopSTASH_set(pm,PL_curstash);
9219         }
9220     }
9221
9222     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9223      * anon CV. False positives like qr/[(?{]/ are harmless */
9224
9225     if (type == OP_QR) {
9226         STRLEN len;
9227         char *e, *p = SvPV(PL_lex_stuff, len);
9228         e = p + len;
9229         for (; p < e; p++) {
9230             if (p[0] == '(' && p[1] == '?'
9231                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9232             {
9233                 pm->op_pmflags |= PMf_HAS_CV;
9234                 break;
9235             }
9236         }
9237         pm->op_pmflags |= PMf_IS_QR;
9238     }
9239
9240     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9241                                 &s, &charset, &x_mod_count))
9242     {};
9243     /* issue a warning if /c is specified,but /g is not */
9244     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9245     {
9246         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9247                        "Use of /c modifier is meaningless without /g" );
9248     }
9249
9250     if (UNLIKELY((x_mod_count) > 1)) {
9251         yyerror("Only one /x regex modifier is allowed");
9252     }
9253
9254     PL_lex_op = (OP*)pm;
9255     pl_yylval.ival = OP_MATCH;
9256     return s;
9257 }
9258
9259 STATIC char *
9260 S_scan_subst(pTHX_ char *start)
9261 {
9262     char *s;
9263     PMOP *pm;
9264     I32 first_start;
9265     line_t first_line;
9266     I32 es = 0;
9267     char charset = '\0';    /* character set modifier */
9268     unsigned int x_mod_count = 0;
9269     char *t;
9270
9271     PERL_ARGS_ASSERT_SCAN_SUBST;
9272
9273     pl_yylval.ival = OP_NULL;
9274
9275     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9276
9277     if (!s)
9278         Perl_croak(aTHX_ "Substitution pattern not terminated");
9279
9280     s = t;
9281
9282     first_start = PL_multi_start;
9283     first_line = CopLINE(PL_curcop);
9284     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9285     if (!s) {
9286         SvREFCNT_dec_NN(PL_lex_stuff);
9287         PL_lex_stuff = NULL;
9288         Perl_croak(aTHX_ "Substitution replacement not terminated");
9289     }
9290     PL_multi_start = first_start;       /* so whole substitution is taken together */
9291
9292     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9293
9294
9295     while (*s) {
9296         if (*s == EXEC_PAT_MOD) {
9297             s++;
9298             es++;
9299         }
9300         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9301                                   &s, &charset, &x_mod_count))
9302         {
9303             break;
9304         }
9305     }
9306
9307     if (UNLIKELY((x_mod_count) > 1)) {
9308         yyerror("Only one /x regex modifier is allowed");
9309     }
9310
9311     if ((pm->op_pmflags & PMf_CONTINUE)) {
9312         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9313     }
9314
9315     if (es) {
9316         SV * const repl = newSVpvs("");
9317
9318         PL_multi_end = 0;
9319         pm->op_pmflags |= PMf_EVAL;
9320         while (es-- > 0) {
9321             if (es)
9322                 sv_catpvs(repl, "eval ");
9323             else
9324                 sv_catpvs(repl, "do ");
9325         }
9326         sv_catpvs(repl, "{");
9327         sv_catsv(repl, PL_sublex_info.repl);
9328         sv_catpvs(repl, "}");
9329         SvEVALED_on(repl);
9330         SvREFCNT_dec(PL_sublex_info.repl);
9331         PL_sublex_info.repl = repl;
9332     }
9333     if (CopLINE(PL_curcop) != first_line) {
9334         sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9335         ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9336             CopLINE(PL_curcop) - first_line;
9337         CopLINE_set(PL_curcop, first_line);
9338     }
9339
9340     PL_lex_op = (OP*)pm;
9341     pl_yylval.ival = OP_SUBST;
9342     return s;
9343 }
9344
9345 STATIC char *
9346 S_scan_trans(pTHX_ char *start)
9347 {
9348     char* s;
9349     OP *o;
9350     U8 squash;
9351     U8 del;
9352     U8 complement;
9353     bool nondestruct = 0;
9354     char *t;
9355
9356     PERL_ARGS_ASSERT_SCAN_TRANS;
9357
9358     pl_yylval.ival = OP_NULL;
9359
9360     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9361     if (!s)
9362         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9363
9364     s = t;
9365
9366     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9367     if (!s) {
9368         SvREFCNT_dec_NN(PL_lex_stuff);
9369         PL_lex_stuff = NULL;
9370         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9371     }
9372
9373     complement = del = squash = 0;
9374     while (1) {
9375         switch (*s) {
9376         case 'c':
9377             complement = OPpTRANS_COMPLEMENT;
9378             break;
9379         case 'd':
9380             del = OPpTRANS_DELETE;
9381             break;
9382         case 's':
9383             squash = OPpTRANS_SQUASH;
9384             break;
9385         case 'r':
9386             nondestruct = 1;
9387             break;
9388         default:
9389             goto no_more;
9390         }
9391         s++;
9392     }
9393   no_more:
9394
9395     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9396     o->op_private &= ~OPpTRANS_ALL;
9397     o->op_private |= del|squash|complement|
9398       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9399       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9400
9401     PL_lex_op = o;
9402     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9403
9404
9405     return s;
9406 }
9407
9408 /* scan_heredoc
9409    Takes a pointer to the first < in <<FOO.
9410    Returns a pointer to the byte following <<FOO.
9411
9412    This function scans a heredoc, which involves different methods
9413    depending on whether we are in a string eval, quoted construct, etc.
9414    This is because PL_linestr could containing a single line of input, or
9415    a whole string being evalled, or the contents of the current quote-
9416    like operator.
9417
9418    The two basic methods are:
9419     - Steal lines from the input stream
9420     - Scan the heredoc in PL_linestr and remove it therefrom
9421
9422    In a file scope or filtered eval, the first method is used; in a
9423    string eval, the second.
9424
9425    In a quote-like operator, we have to choose between the two,
9426    depending on where we can find a newline.  We peek into outer lex-
9427    ing scopes until we find one with a newline in it.  If we reach the
9428    outermost lexing scope and it is a file, we use the stream method.
9429    Otherwise it is treated as an eval.
9430 */
9431
9432 STATIC char *
9433 S_scan_heredoc(pTHX_ char *s)
9434 {
9435     I32 op_type = OP_SCALAR;
9436     I32 len;
9437     SV *tmpstr;
9438     char term;
9439     char *d;
9440     char *e;
9441     char *peek;
9442     const bool infile = PL_rsfp || PL_parser->filtered;
9443     const line_t origline = CopLINE(PL_curcop);
9444     LEXSHARED *shared = PL_parser->lex_shared;
9445
9446     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9447
9448     s += 2;
9449     d = PL_tokenbuf + 1;
9450     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9451     *PL_tokenbuf = '\n';
9452     peek = s;
9453     while (SPACE_OR_TAB(*peek))
9454         peek++;
9455     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9456         s = peek;
9457         term = *s++;
9458         s = delimcpy(d, e, s, PL_bufend, term, &len);
9459         if (s == PL_bufend)
9460             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9461         d += len;
9462         s++;
9463     }
9464     else {
9465         if (*s == '\\')
9466             /* <<\FOO is equivalent to <<'FOO' */
9467             s++, term = '\'';
9468         else
9469             term = '"';
9470         if (!isWORDCHAR_lazy_if(s,UTF))
9471             deprecate("bare << to mean <<\"\"");
9472         peek = s;
9473         while (isWORDCHAR_lazy_if(peek,UTF)) {
9474             peek += UTF ? UTF8SKIP(peek) : 1;
9475         }
9476         len = (peek - s >= e - d) ? (e - d) : (peek - s);
9477         Copy(s, d, len, char);
9478         s += len;
9479         d += len;
9480     }
9481     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9482         Perl_croak(aTHX_ "Delimiter for here document is too long");
9483     *d++ = '\n';
9484     *d = '\0';
9485     len = d - PL_tokenbuf;
9486
9487 #ifndef PERL_STRICT_CR
9488     d = strchr(s, '\r');
9489     if (d) {
9490         char * const olds = s;
9491         s = d;
9492         while (s < PL_bufend) {
9493             if (*s == '\r') {
9494                 *d++ = '\n';
9495                 if (*++s == '\n')
9496                     s++;
9497             }
9498             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9499                 *d++ = *s++;
9500                 s++;
9501             }
9502             else
9503                 *d++ = *s++;
9504         }
9505         *d = '\0';
9506         PL_bufend = d;
9507         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9508         s = olds;
9509     }
9510 #endif
9511
9512     tmpstr = newSV_type(SVt_PVIV);
9513     SvGROW(tmpstr, 80);
9514     if (term == '\'') {
9515         op_type = OP_CONST;
9516         SvIV_set(tmpstr, -1);
9517     }
9518     else if (term == '`') {
9519         op_type = OP_BACKTICK;
9520         SvIV_set(tmpstr, '\\');
9521     }
9522
9523     PL_multi_start = origline + 1 + PL_parser->herelines;
9524     PL_multi_open = PL_multi_close = '<';
9525     /* inside a string eval or quote-like operator */
9526     if (!infile || PL_lex_inwhat) {
9527         SV *linestr;
9528         char *bufend;
9529         char * const olds = s;
9530         PERL_CONTEXT * const cx = CX_CUR();
9531         /* These two fields are not set until an inner lexing scope is
9532            entered.  But we need them set here. */
9533         shared->ls_bufptr  = s;
9534         shared->ls_linestr = PL_linestr;
9535         if (PL_lex_inwhat)
9536           /* Look for a newline.  If the current buffer does not have one,
9537              peek into the line buffer of the parent lexing scope, going
9538              up as many levels as necessary to find one with a newline
9539              after bufptr.
9540            */
9541           while (!(s = (char *)memchr(
9542                     (void *)shared->ls_bufptr, '\n',
9543                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9544                 ))) {
9545             shared = shared->ls_prev;
9546             /* shared is only null if we have gone beyond the outermost
9547                lexing scope.  In a file, we will have broken out of the
9548                loop in the previous iteration.  In an eval, the string buf-
9549                fer ends with "\n;", so the while condition above will have
9550                evaluated to false.  So shared can never be null.  Or so you
9551                might think.  Odd syntax errors like s;@{<<; can gobble up
9552                the implicit semicolon at the end of a flie, causing the
9553                file handle to be closed even when we are not in a string
9554                eval.  So shared may be null in that case.  */
9555             if (UNLIKELY(!shared))
9556                 goto interminable;
9557             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9558                most lexing scope.  In a file, shared->ls_linestr at that
9559                level is just one line, so there is no body to steal. */
9560             if (infile && !shared->ls_prev) {
9561                 s = olds;
9562                 goto streaming;
9563             }
9564           }
9565         else {  /* eval or we've already hit EOF */
9566             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9567             if (!s)
9568                 goto interminable;
9569         }
9570         linestr = shared->ls_linestr;
9571         bufend = SvEND(linestr);
9572         d = s;
9573         while (s < bufend - len + 1
9574                && memNE(s,PL_tokenbuf,len) )
9575         {
9576             if (*s++ == '\n')
9577                 ++PL_parser->herelines;
9578         }
9579         if (s >= bufend - len + 1) {
9580             goto interminable;
9581         }
9582         sv_setpvn(tmpstr,d+1,s-d);
9583         s += len - 1;
9584         /* the preceding stmt passes a newline */
9585         PL_parser->herelines++;
9586
9587         /* s now points to the newline after the heredoc terminator.
9588            d points to the newline before the body of the heredoc.
9589          */
9590
9591         /* We are going to modify linestr in place here, so set
9592            aside copies of the string if necessary for re-evals or
9593            (caller $n)[6]. */
9594         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9595            check shared->re_eval_str. */
9596         if (shared->re_eval_start || shared->re_eval_str) {
9597             /* Set aside the rest of the regexp */
9598             if (!shared->re_eval_str)
9599                 shared->re_eval_str =
9600                        newSVpvn(shared->re_eval_start,
9601                                 bufend - shared->re_eval_start);
9602             shared->re_eval_start -= s-d;
9603         }
9604         if (cxstack_ix >= 0
9605             && CxTYPE(cx) == CXt_EVAL
9606             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9607             && cx->blk_eval.cur_text == linestr)
9608         {
9609             cx->blk_eval.cur_text = newSVsv(linestr);
9610             SvSCREAM_on(cx->blk_eval.cur_text);
9611         }
9612         /* Copy everything from s onwards back to d. */
9613         Move(s,d,bufend-s + 1,char);
9614         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9615         /* Setting PL_bufend only applies when we have not dug deeper
9616            into other scopes, because sublex_done sets PL_bufend to
9617            SvEND(PL_linestr). */
9618         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9619         s = olds;
9620     }
9621     else
9622     {
9623       SV *linestr_save;
9624       char *oldbufptr_save;
9625      streaming:
9626       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9627       term = PL_tokenbuf[1];
9628       len--;
9629       linestr_save = PL_linestr; /* must restore this afterwards */
9630       d = s;                     /* and this */
9631       oldbufptr_save = PL_oldbufptr;
9632       PL_linestr = newSVpvs("");
9633       PL_bufend = SvPVX(PL_linestr);
9634       while (1) {
9635         PL_bufptr = PL_bufend;
9636         CopLINE_set(PL_curcop,
9637                     origline + 1 + PL_parser->herelines);
9638         if (!lex_next_chunk(LEX_NO_TERM)
9639          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9640             /* Simply freeing linestr_save might seem simpler here, as it
9641                does not matter what PL_linestr points to, since we are
9642                about to croak; but in a quote-like op, linestr_save
9643                will have been prospectively freed already, via
9644                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9645                restore PL_linestr. */
9646             SvREFCNT_dec_NN(PL_linestr);
9647             PL_linestr = linestr_save;
9648             PL_oldbufptr = oldbufptr_save;
9649             goto interminable;
9650         }
9651         CopLINE_set(PL_curcop, origline);
9652         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9653             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9654             /* ^That should be enough to avoid this needing to grow:  */
9655             sv_catpvs(PL_linestr, "\n\0");
9656             assert(s == SvPVX(PL_linestr));
9657             PL_bufend = SvEND(PL_linestr);
9658         }
9659         s = PL_bufptr;
9660         PL_parser->herelines++;
9661         PL_last_lop = PL_last_uni = NULL;
9662 #ifndef PERL_STRICT_CR
9663         if (PL_bufend - PL_linestart >= 2) {
9664             if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9665                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9666             {
9667                 PL_bufend[-2] = '\n';
9668                 PL_bufend--;
9669                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9670             }
9671             else if (PL_bufend[-1] == '\r')
9672                 PL_bufend[-1] = '\n';
9673         }
9674         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9675             PL_bufend[-1] = '\n';
9676 #endif
9677         if (*s == term && PL_bufend-s >= len
9678          && memEQ(s,PL_tokenbuf + 1,len)) {
9679             SvREFCNT_dec(PL_linestr);
9680             PL_linestr = linestr_save;
9681             PL_linestart = SvPVX(linestr_save);
9682             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9683             PL_oldbufptr = oldbufptr_save;
9684             s = d;
9685             break;
9686         }
9687         else {
9688             sv_catsv(tmpstr,PL_linestr);
9689         }
9690       }
9691     }
9692     PL_multi_end = origline + PL_parser->herelines;
9693     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9694         SvPV_shrink_to_cur(tmpstr);
9695     }
9696     if (!IN_BYTES) {
9697         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9698             SvUTF8_on(tmpstr);
9699         else if (IN_ENCODING)
9700             sv_recode_to_utf8(tmpstr, _get_encoding());
9701     }
9702     PL_lex_stuff = tmpstr;
9703     pl_yylval.ival = op_type;
9704     return s;
9705
9706   interminable:
9707     SvREFCNT_dec(tmpstr);
9708     CopLINE_set(PL_curcop, origline);
9709     missingterm(PL_tokenbuf + 1);
9710 }
9711
9712 /* scan_inputsymbol
9713    takes: current position in input buffer
9714    returns: new position in input buffer
9715    side-effects: pl_yylval and lex_op are set.
9716
9717    This code handles:
9718
9719    <>           read from ARGV
9720    <<>>         read from ARGV without magic open
9721    <FH>         read from filehandle
9722    <pkg::FH>    read from package qualified filehandle
9723    <pkg'FH>     read from package qualified filehandle
9724    <$fh>        read from filehandle in $fh
9725    <*.h>        filename glob
9726
9727 */
9728
9729 STATIC char *
9730 S_scan_inputsymbol(pTHX_ char *start)
9731 {
9732     char *s = start;            /* current position in buffer */
9733     char *end;
9734     I32 len;
9735     bool nomagicopen = FALSE;
9736     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9737     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9738
9739     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9740
9741     end = strchr(s, '\n');
9742     if (!end)
9743         end = PL_bufend;
9744     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9745         nomagicopen = TRUE;
9746         *d = '\0';
9747         len = 0;
9748         s += 3;
9749     }
9750     else
9751         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
9752
9753     /* die if we didn't have space for the contents of the <>,
9754        or if it didn't end, or if we see a newline
9755     */
9756
9757     if (len >= (I32)sizeof PL_tokenbuf)
9758         Perl_croak(aTHX_ "Excessively long <> operator");
9759     if (s >= end)
9760         Perl_croak(aTHX_ "Unterminated <> operator");
9761
9762     s++;
9763
9764     /* check for <$fh>
9765        Remember, only scalar variables are interpreted as filehandles by
9766        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9767        treated as a glob() call.
9768        This code makes use of the fact that except for the $ at the front,
9769        a scalar variable and a filehandle look the same.
9770     */
9771     if (*d == '$' && d[1]) d++;
9772
9773     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9774     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9775         d += UTF ? UTF8SKIP(d) : 1;
9776
9777     /* If we've tried to read what we allow filehandles to look like, and
9778        there's still text left, then it must be a glob() and not a getline.
9779        Use scan_str to pull out the stuff between the <> and treat it
9780        as nothing more than a string.
9781     */
9782
9783     if (d - PL_tokenbuf != len) {
9784         pl_yylval.ival = OP_GLOB;
9785         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9786         if (!s)
9787            Perl_croak(aTHX_ "Glob not terminated");
9788         return s;
9789     }
9790     else {
9791         bool readline_overriden = FALSE;
9792         GV *gv_readline;
9793         /* we're in a filehandle read situation */
9794         d = PL_tokenbuf;
9795
9796         /* turn <> into <ARGV> */
9797         if (!len)
9798             Copy("ARGV",d,5,char);
9799
9800         /* Check whether readline() is overriden */
9801         if ((gv_readline = gv_override("readline",8)))
9802             readline_overriden = TRUE;
9803
9804         /* if <$fh>, create the ops to turn the variable into a
9805            filehandle
9806         */
9807         if (*d == '$') {
9808             /* try to find it in the pad for this block, otherwise find
9809                add symbol table ops
9810             */
9811             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9812             if (tmp != NOT_IN_PAD) {
9813                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9814                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9815                     HEK * const stashname = HvNAME_HEK(stash);
9816                     SV * const sym = sv_2mortal(newSVhek(stashname));
9817                     sv_catpvs(sym, "::");
9818                     sv_catpv(sym, d+1);
9819                     d = SvPVX(sym);
9820                     goto intro_sym;
9821                 }
9822                 else {
9823                     OP * const o = newOP(OP_PADSV, 0);
9824                     o->op_targ = tmp;
9825                     PL_lex_op = readline_overriden
9826                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9827                                 op_append_elem(OP_LIST, o,
9828                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9829                         : (OP*)newUNOP(OP_READLINE, 0, o);
9830                 }
9831             }
9832             else {
9833                 GV *gv;
9834                 ++d;
9835               intro_sym:
9836                 gv = gv_fetchpv(d,
9837                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9838                                 SVt_PV);
9839                 PL_lex_op = readline_overriden
9840                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9841                             op_append_elem(OP_LIST,
9842                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9843                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9844                     : (OP*)newUNOP(OP_READLINE, 0,
9845                             newUNOP(OP_RV2SV, 0,
9846                                 newGVOP(OP_GV, 0, gv)));
9847             }
9848             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9849             pl_yylval.ival = OP_NULL;
9850         }
9851
9852         /* If it's none of the above, it must be a literal filehandle
9853            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9854         else {
9855             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9856             PL_lex_op = readline_overriden
9857                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9858                         op_append_elem(OP_LIST,
9859                             newGVOP(OP_GV, 0, gv),
9860                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9861                 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9862             pl_yylval.ival = OP_NULL;
9863         }
9864     }
9865
9866     return s;
9867 }
9868
9869
9870 /* scan_str
9871    takes:
9872         start                   position in buffer
9873         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9874                                 only if they are of the open/close form
9875         keep_delims             preserve the delimiters around the string
9876         re_reparse              compiling a run-time /(?{})/:
9877                                    collapse // to /,  and skip encoding src
9878         delimp                  if non-null, this is set to the position of
9879                                 the closing delimiter, or just after it if
9880                                 the closing and opening delimiters differ
9881                                 (i.e., the opening delimiter of a substitu-
9882                                 tion replacement)
9883    returns: position to continue reading from buffer
9884    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9885         updates the read buffer.
9886
9887    This subroutine pulls a string out of the input.  It is called for:
9888         q               single quotes           q(literal text)
9889         '               single quotes           'literal text'
9890         qq              double quotes           qq(interpolate $here please)
9891         "               double quotes           "interpolate $here please"
9892         qx              backticks               qx(/bin/ls -l)
9893         `               backticks               `/bin/ls -l`
9894         qw              quote words             @EXPORT_OK = qw( func() $spam )
9895         m//             regexp match            m/this/
9896         s///            regexp substitute       s/this/that/
9897         tr///           string transliterate    tr/this/that/
9898         y///            string transliterate    y/this/that/
9899         ($*@)           sub prototypes          sub foo ($)
9900         (stuff)         sub attr parameters     sub foo : attr(stuff)
9901         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9902
9903    In most of these cases (all but <>, patterns and transliterate)
9904    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9905    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9906    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9907    calls scan_str().
9908
9909    It skips whitespace before the string starts, and treats the first
9910    character as the delimiter.  If the delimiter is one of ([{< then
9911    the corresponding "close" character )]}> is used as the closing
9912    delimiter.  It allows quoting of delimiters, and if the string has
9913    balanced delimiters ([{<>}]) it allows nesting.
9914
9915    On success, the SV with the resulting string is put into lex_stuff or,
9916    if that is already non-NULL, into lex_repl. The second case occurs only
9917    when parsing the RHS of the special constructs s/// and tr/// (y///).
9918    For convenience, the terminating delimiter character is stuffed into
9919    SvIVX of the SV.
9920 */
9921
9922 STATIC char *
9923 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9924                  char **delimp
9925     )
9926 {
9927     SV *sv;                     /* scalar value: string */
9928     const char *tmps;           /* temp string, used for delimiter matching */
9929     char *s = start;            /* current position in the buffer */
9930     char term;                  /* terminating character */
9931     char *to;                   /* current position in the sv's data */
9932     I32 brackets = 1;           /* bracket nesting level */
9933     bool has_utf8 = FALSE;      /* is there any utf8 content? */
9934     I32 termcode;               /* terminating char. code */
9935     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
9936     STRLEN termlen;             /* length of terminating string */
9937     int last_off = 0;           /* last position for nesting bracket */
9938     line_t herelines;
9939
9940     PERL_ARGS_ASSERT_SCAN_STR;
9941
9942     /* skip space before the delimiter */
9943     if (isSPACE(*s)) {
9944         s = skipspace(s);
9945     }
9946
9947     /* mark where we are, in case we need to report errors */
9948     CLINE;
9949
9950     /* after skipping whitespace, the next character is the terminator */
9951     term = *s;
9952     if (!UTF) {
9953         termcode = termstr[0] = term;
9954         termlen = 1;
9955     }
9956     else {
9957         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9958         Copy(s, termstr, termlen, U8);
9959         if (!UTF8_IS_INVARIANT(term))
9960             has_utf8 = TRUE;
9961     }
9962
9963     /* mark where we are */
9964     PL_multi_start = CopLINE(PL_curcop);
9965     PL_multi_open = term;
9966     herelines = PL_parser->herelines;
9967
9968     /* find corresponding closing delimiter */
9969     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9970         termcode = termstr[0] = term = tmps[5];
9971
9972     PL_multi_close = term;
9973
9974     if (PL_multi_open == PL_multi_close) {
9975         keep_bracketed_quoted = FALSE;
9976     }
9977
9978     /* create a new SV to hold the contents.  79 is the SV's initial length.
9979        What a random number. */
9980     sv = newSV_type(SVt_PVIV);
9981     SvGROW(sv, 80);
9982     SvIV_set(sv, termcode);
9983     (void)SvPOK_only(sv);               /* validate pointer */
9984
9985     /* move past delimiter and try to read a complete string */
9986     if (keep_delims)
9987         sv_catpvn(sv, s, termlen);
9988     s += termlen;
9989     for (;;) {
9990         if (IN_ENCODING && !UTF && !re_reparse) {
9991             bool cont = TRUE;
9992
9993             while (cont) {
9994                 int offset = s - SvPVX_const(PL_linestr);
9995                 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
9996                                            &offset, (char*)termstr, termlen);
9997                 const char *ns;
9998                 char *svlast;
9999
10000                 if (SvIsCOW(PL_linestr)) {
10001                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10002                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10003                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10004                     char *buf = SvPVX(PL_linestr);
10005                     bufend_pos = PL_parser->bufend - buf;
10006                     bufptr_pos = PL_parser->bufptr - buf;
10007                     oldbufptr_pos = PL_parser->oldbufptr - buf;
10008                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10009                     linestart_pos = PL_parser->linestart - buf;
10010                     last_uni_pos = PL_parser->last_uni
10011                         ? PL_parser->last_uni - buf
10012                         : 0;
10013                     last_lop_pos = PL_parser->last_lop
10014                         ? PL_parser->last_lop - buf
10015                         : 0;
10016                     re_eval_start_pos =
10017                         PL_parser->lex_shared->re_eval_start ?
10018                             PL_parser->lex_shared->re_eval_start - buf : 0;
10019                     s_pos = s - buf;
10020
10021                     sv_force_normal(PL_linestr);
10022
10023                     buf = SvPVX(PL_linestr);
10024                     PL_parser->bufend = buf + bufend_pos;
10025                     PL_parser->bufptr = buf + bufptr_pos;
10026                     PL_parser->oldbufptr = buf + oldbufptr_pos;
10027                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10028                     PL_parser->linestart = buf + linestart_pos;
10029                     if (PL_parser->last_uni)
10030                         PL_parser->last_uni = buf + last_uni_pos;
10031                     if (PL_parser->last_lop)
10032                         PL_parser->last_lop = buf + last_lop_pos;
10033                     if (PL_parser->lex_shared->re_eval_start)
10034                         PL_parser->lex_shared->re_eval_start  =
10035                             buf + re_eval_start_pos;
10036                     s = buf + s_pos;
10037                 }
10038                 ns = SvPVX_const(PL_linestr) + offset;
10039                 svlast = SvEND(sv) - 1;
10040
10041                 for (; s < ns; s++) {
10042                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10043                         COPLINE_INC_WITH_HERELINES;
10044                 }
10045                 if (!found)
10046                     goto read_more_line;
10047                 else {
10048                     /* handle quoted delimiters */
10049                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10050                         const char *t;
10051                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10052                             t--;
10053                         if ((svlast-1 - t) % 2) {
10054                             if (!keep_bracketed_quoted) {
10055                                 *(svlast-1) = term;
10056                                 *svlast = '\0';
10057                                 SvCUR_set(sv, SvCUR(sv) - 1);
10058                             }
10059                             continue;
10060                         }
10061                     }
10062                     if (PL_multi_open == PL_multi_close) {
10063                         cont = FALSE;
10064                     }
10065                     else {
10066                         const char *t;
10067                         char *w;
10068                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10069                             /* At here, all closes are "was quoted" one,
10070                                so we don't check PL_multi_close. */
10071                             if (*t == '\\') {
10072                                 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
10073                                     t++;
10074                                 else
10075                                     *w++ = *t++;
10076                             }
10077                             else if (*t == PL_multi_open)
10078                                 brackets++;
10079
10080                             *w = *t;
10081                         }
10082                         if (w < t) {
10083                             *w++ = term;
10084                             *w = '\0';
10085                             SvCUR_set(sv, w - SvPVX_const(sv));
10086                         }
10087                         last_off = w - SvPVX(sv);
10088                         if (--brackets <= 0)
10089                             cont = FALSE;
10090                     }
10091                 }
10092             }
10093             if (!keep_delims) {
10094                 SvCUR_set(sv, SvCUR(sv) - 1);
10095                 *SvEND(sv) = '\0';
10096             }
10097             break;
10098         }
10099
10100         /* extend sv if need be */
10101         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10102         /* set 'to' to the next character in the sv's string */
10103         to = SvPVX(sv)+SvCUR(sv);
10104
10105         /* if open delimiter is the close delimiter read unbridle */
10106         if (PL_multi_open == PL_multi_close) {
10107             for (; s < PL_bufend; s++,to++) {
10108                 /* embedded newlines increment the current line number */
10109                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10110                     COPLINE_INC_WITH_HERELINES;
10111                 /* handle quoted delimiters */
10112                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10113                     if (!keep_bracketed_quoted
10114                         && (s[1] == term
10115                             || (re_reparse && s[1] == '\\'))
10116                     )
10117                         s++;
10118                     else /* any other quotes are simply copied straight through */
10119                         *to++ = *s++;
10120                 }
10121                 /* terminate when run out of buffer (the for() condition), or
10122                    have found the terminator */
10123                 else if (*s == term) {
10124                     if (termlen == 1)
10125                         break;
10126                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10127                         break;
10128                 }
10129                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10130                     has_utf8 = TRUE;
10131                 *to = *s;
10132             }
10133         }
10134
10135         /* if the terminator isn't the same as the start character (e.g.,
10136            matched brackets), we have to allow more in the quoting, and
10137            be prepared for nested brackets.
10138         */
10139         else {
10140             /* read until we run out of string, or we find the terminator */
10141             for (; s < PL_bufend; s++,to++) {
10142                 /* embedded newlines increment the line count */
10143                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10144                     COPLINE_INC_WITH_HERELINES;
10145                 /* backslashes can escape the open or closing characters */
10146                 if (*s == '\\' && s+1 < PL_bufend) {
10147                     if (!keep_bracketed_quoted
10148                        && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10149                     {
10150                         s++;
10151                     }
10152                     else
10153                         *to++ = *s++;
10154                 }
10155                 /* allow nested opens and closes */
10156                 else if (*s == PL_multi_close && --brackets <= 0)
10157                     break;
10158                 else if (*s == PL_multi_open)
10159                     brackets++;
10160                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10161                     has_utf8 = TRUE;
10162                 *to = *s;
10163             }
10164         }
10165         /* terminate the copied string and update the sv's end-of-string */
10166         *to = '\0';
10167         SvCUR_set(sv, to - SvPVX_const(sv));
10168
10169         /*
10170          * this next chunk reads more into the buffer if we're not done yet
10171          */
10172
10173         if (s < PL_bufend)
10174             break;              /* handle case where we are done yet :-) */
10175
10176 #ifndef PERL_STRICT_CR
10177         if (to - SvPVX_const(sv) >= 2) {
10178             if (   (to[-2] == '\r' && to[-1] == '\n')
10179                 || (to[-2] == '\n' && to[-1] == '\r'))
10180             {
10181                 to[-2] = '\n';
10182                 to--;
10183                 SvCUR_set(sv, to - SvPVX_const(sv));
10184             }
10185             else if (to[-1] == '\r')
10186                 to[-1] = '\n';
10187         }
10188         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10189             to[-1] = '\n';
10190 #endif
10191
10192      read_more_line:
10193         /* if we're out of file, or a read fails, bail and reset the current
10194            line marker so we can report where the unterminated string began
10195         */
10196         COPLINE_INC_WITH_HERELINES;
10197         PL_bufptr = PL_bufend;
10198         if (!lex_next_chunk(0)) {
10199             sv_free(sv);
10200             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10201             return NULL;
10202         }
10203         s = PL_bufptr;
10204     }
10205
10206     /* at this point, we have successfully read the delimited string */
10207
10208     if (!IN_ENCODING || UTF || re_reparse) {
10209
10210         if (keep_delims)
10211             sv_catpvn(sv, s, termlen);
10212         s += termlen;
10213     }
10214     if (has_utf8 || (IN_ENCODING && !re_reparse))
10215         SvUTF8_on(sv);
10216
10217     PL_multi_end = CopLINE(PL_curcop);
10218     CopLINE_set(PL_curcop, PL_multi_start);
10219     PL_parser->herelines = herelines;
10220
10221     /* if we allocated too much space, give some back */
10222     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10223         SvLEN_set(sv, SvCUR(sv) + 1);
10224         SvPV_renew(sv, SvLEN(sv));
10225     }
10226
10227     /* decide whether this is the first or second quoted string we've read
10228        for this op
10229     */
10230
10231     if (PL_lex_stuff)
10232         PL_sublex_info.repl = sv;
10233     else
10234         PL_lex_stuff = sv;
10235     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10236     return s;
10237 }
10238
10239 /*
10240   scan_num
10241   takes: pointer to position in buffer
10242   returns: pointer to new position in buffer
10243   side-effects: builds ops for the constant in pl_yylval.op
10244
10245   Read a number in any of the formats that Perl accepts:
10246
10247   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10248   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10249   0b[01](_?[01])*                                       binary integers
10250   0[0-7](_?[0-7])*                                      octal integers
10251   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10252   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10253
10254   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10255   thing it reads.
10256
10257   If it reads a number without a decimal point or an exponent, it will
10258   try converting the number to an integer and see if it can do so
10259   without loss of precision.
10260 */
10261
10262 char *
10263 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10264 {
10265     const char *s = start;      /* current position in buffer */
10266     char *d;                    /* destination in temp buffer */
10267     char *e;                    /* end of temp buffer */
10268     NV nv;                              /* number read, as a double */
10269     SV *sv = NULL;                      /* place to put the converted number */
10270     bool floatit;                       /* boolean: int or float? */
10271     const char *lastub = NULL;          /* position of last underbar */
10272     static const char* const number_too_long = "Number too long";
10273     /* Hexadecimal floating point.
10274      *
10275      * In many places (where we have quads and NV is IEEE 754 double)
10276      * we can fit the mantissa bits of a NV into an unsigned quad.
10277      * (Note that UVs might not be quads even when we have quads.)
10278      * This will not work everywhere, though (either no quads, or
10279      * using long doubles), in which case we have to resort to NV,
10280      * which will probably mean horrible loss of precision due to
10281      * multiple fp operations. */
10282     bool hexfp = FALSE;
10283     int total_bits = 0;
10284     int significant_bits = 0;
10285 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10286 #  define HEXFP_UQUAD
10287     Uquad_t hexfp_uquad = 0;
10288     int hexfp_frac_bits = 0;
10289 #else
10290 #  define HEXFP_NV
10291     NV hexfp_nv = 0.0;
10292 #endif
10293     NV hexfp_mult = 1.0;
10294     UV high_non_zero = 0; /* highest digit */
10295     int non_zero_integer_digits = 0;
10296
10297     PERL_ARGS_ASSERT_SCAN_NUM;
10298
10299     /* We use the first character to decide what type of number this is */
10300
10301     switch (*s) {
10302     default:
10303         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10304
10305     /* if it starts with a 0, it could be an octal number, a decimal in
10306        0.13 disguise, or a hexadecimal number, or a binary number. */
10307     case '0':
10308         {
10309           /* variables:
10310              u          holds the "number so far"
10311              shift      the power of 2 of the base
10312                         (hex == 4, octal == 3, binary == 1)
10313              overflowed was the number more than we can hold?
10314
10315              Shift is used when we add a digit.  It also serves as an "are
10316              we in octal/hex/binary?" indicator to disallow hex characters
10317              when in octal mode.
10318            */
10319             NV n = 0.0;
10320             UV u = 0;
10321             I32 shift;
10322             bool overflowed = FALSE;
10323             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10324             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10325             static const char* const bases[5] =
10326               { "", "binary", "", "octal", "hexadecimal" };
10327             static const char* const Bases[5] =
10328               { "", "Binary", "", "Octal", "Hexadecimal" };
10329             static const char* const maxima[5] =
10330               { "",
10331                 "0b11111111111111111111111111111111",
10332                 "",
10333                 "037777777777",
10334                 "0xffffffff" };
10335             const char *base, *Base, *max;
10336
10337             /* check for hex */
10338             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10339                 shift = 4;
10340                 s += 2;
10341                 just_zero = FALSE;
10342             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10343                 shift = 1;
10344                 s += 2;
10345                 just_zero = FALSE;
10346             }
10347             /* check for a decimal in disguise */
10348             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10349                 goto decimal;
10350             /* so it must be octal */
10351             else {
10352                 shift = 3;
10353                 s++;
10354             }
10355
10356             if (*s == '_') {
10357                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10358                                "Misplaced _ in number");
10359                lastub = s++;
10360             }
10361
10362             base = bases[shift];
10363             Base = Bases[shift];
10364             max  = maxima[shift];
10365
10366             /* read the rest of the number */
10367             for (;;) {
10368                 /* x is used in the overflow test,
10369                    b is the digit we're adding on. */
10370                 UV x, b;
10371
10372                 switch (*s) {
10373
10374                 /* if we don't mention it, we're done */
10375                 default:
10376                     goto out;
10377
10378                 /* _ are ignored -- but warned about if consecutive */
10379                 case '_':
10380                     if (lastub && s == lastub + 1)
10381                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10382                                        "Misplaced _ in number");
10383                     lastub = s++;
10384                     break;
10385
10386                 /* 8 and 9 are not octal */
10387                 case '8': case '9':
10388                     if (shift == 3)
10389                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10390                     /* FALLTHROUGH */
10391
10392                 /* octal digits */
10393                 case '2': case '3': case '4':
10394                 case '5': case '6': case '7':
10395                     if (shift == 1)
10396                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10397                     /* FALLTHROUGH */
10398
10399                 case '0': case '1':
10400                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10401                     goto digit;
10402
10403                 /* hex digits */
10404                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10405                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10406                     /* make sure they said 0x */
10407                     if (shift != 4)
10408                         goto out;
10409                     b = (*s++ & 7) + 9;
10410
10411                     /* Prepare to put the digit we have onto the end
10412                        of the number so far.  We check for overflows.
10413                     */
10414
10415                   digit:
10416                     just_zero = FALSE;
10417                     if (!overflowed) {
10418                         x = u << shift; /* make room for the digit */
10419
10420                         total_bits += shift;
10421
10422                         if ((x >> shift) != u
10423                             && !(PL_hints & HINT_NEW_BINARY)) {
10424                             overflowed = TRUE;
10425                             n = (NV) u;
10426                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10427                                              "Integer overflow in %s number",
10428                                              base);
10429                         } else
10430                             u = x | b;          /* add the digit to the end */
10431                     }
10432                     if (overflowed) {
10433                         n *= nvshift[shift];
10434                         /* If an NV has not enough bits in its
10435                          * mantissa to represent an UV this summing of
10436                          * small low-order numbers is a waste of time
10437                          * (because the NV cannot preserve the
10438                          * low-order bits anyway): we could just
10439                          * remember when did we overflow and in the
10440                          * end just multiply n by the right
10441                          * amount. */
10442                         n += (NV) b;
10443                     }
10444
10445                     if (high_non_zero == 0 && b > 0)
10446                         high_non_zero = b;
10447
10448                     if (high_non_zero)
10449                         non_zero_integer_digits++;
10450
10451                     /* this could be hexfp, but peek ahead
10452                      * to avoid matching ".." */
10453                     if (UNLIKELY(HEXFP_PEEK(s))) {
10454                         goto out;
10455                     }
10456
10457                     break;
10458                 }
10459             }
10460
10461           /* if we get here, we had success: make a scalar value from
10462              the number.
10463           */
10464           out:
10465
10466             /* final misplaced underbar check */
10467             if (s[-1] == '_') {
10468                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10469             }
10470
10471             if (UNLIKELY(HEXFP_PEEK(s))) {
10472                 /* Do sloppy (on the underbars) but quick detection
10473                  * (and value construction) for hexfp, the decimal
10474                  * detection will shortly be more thorough with the
10475                  * underbar checks. */
10476                 const char* h = s;
10477                 significant_bits = non_zero_integer_digits * shift;
10478 #ifdef HEXFP_UQUAD
10479                 hexfp_uquad = u;
10480 #else /* HEXFP_NV */
10481                 hexfp_nv = u;
10482 #endif
10483                 /* Ignore the leading zero bits of
10484                  * the high (first) non-zero digit. */
10485                 if (high_non_zero) {
10486                     if (high_non_zero < 0x8)
10487                         significant_bits--;
10488                     if (high_non_zero < 0x4)
10489                         significant_bits--;
10490                     if (high_non_zero < 0x2)
10491                         significant_bits--;
10492                 }
10493
10494                 if (*h == '.') {
10495 #ifdef HEXFP_NV
10496                     NV nv_mult = 1.0;
10497 #endif
10498                     bool accumulate = TRUE;
10499                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10500                         if (isXDIGIT(*h)) {
10501                             U8 b = XDIGIT_VALUE(*h);
10502                             significant_bits += shift;
10503 #ifdef HEXFP_UQUAD
10504                             if (accumulate) {
10505                                 if (significant_bits < NV_MANT_DIG) {
10506                                     /* We are in the long "run" of xdigits,
10507                                      * accumulate the full four bits. */
10508                                     hexfp_uquad <<= shift;
10509                                     hexfp_uquad |= b;
10510                                     hexfp_frac_bits += shift;
10511                                 } else {
10512                                     /* We are at a hexdigit either at,
10513                                      * or straddling, the edge of mantissa.
10514                                      * We will try grabbing as many as
10515                                      * possible bits. */
10516                                     int tail =
10517                                       significant_bits - NV_MANT_DIG;
10518                                     if (tail <= 0)
10519                                        tail += shift;
10520                                     hexfp_uquad <<= tail;
10521                                     hexfp_uquad |= b >> (shift - tail);
10522                                     hexfp_frac_bits += tail;
10523
10524                                     /* Ignore the trailing zero bits
10525                                      * of the last non-zero xdigit.
10526                                      *
10527                                      * The assumption here is that if
10528                                      * one has input of e.g. the xdigit
10529                                      * eight (0x8), there is only one
10530                                      * bit being input, not the full
10531                                      * four bits.  Conversely, if one
10532                                      * specifies a zero xdigit, the
10533                                      * assumption is that one really
10534                                      * wants all those bits to be zero. */
10535                                     if (b) {
10536                                         if ((b & 0x1) == 0x0) {
10537                                             significant_bits--;
10538                                             if ((b & 0x2) == 0x0) {
10539                                                 significant_bits--;
10540                                                 if ((b & 0x4) == 0x0) {
10541                                                     significant_bits--;
10542                                                 }
10543                                             }
10544                                         }
10545                                     }
10546
10547                                     accumulate = FALSE;
10548                                 }
10549                             } else {
10550                                 /* Keep skipping the xdigits, and
10551                                  * accumulating the significant bits,
10552                                  * but do not shift the uquad
10553                                  * (which would catastrophically drop
10554                                  * high-order bits) or accumulate the
10555                                  * xdigits anymore. */
10556                             }
10557 #else /* HEXFP_NV */
10558                             if (accumulate) {
10559                                 nv_mult /= 16.0;
10560                                 if (nv_mult > 0.0)
10561                                     hexfp_nv += b * nv_mult;
10562                                 else
10563                                     accumulate = FALSE;
10564                             }
10565 #endif
10566                         }
10567                         if (significant_bits >= NV_MANT_DIG)
10568                             accumulate = FALSE;
10569                     }
10570                 }
10571
10572                 if ((total_bits > 0 || significant_bits > 0) &&
10573                     isALPHA_FOLD_EQ(*h, 'p')) {
10574                     bool negexp = FALSE;
10575                     h++;
10576                     if (*h == '+')
10577                         h++;
10578                     else if (*h == '-') {
10579                         negexp = TRUE;
10580                         h++;
10581                     }
10582                     if (isDIGIT(*h)) {
10583                         I32 hexfp_exp = 0;
10584                         while (isDIGIT(*h) || *h == '_') {
10585                             if (isDIGIT(*h)) {
10586                                 hexfp_exp *= 10;
10587                                 hexfp_exp += *h - '0';
10588 #ifdef NV_MIN_EXP
10589                                 if (negexp
10590                                     && -hexfp_exp < NV_MIN_EXP - 1) {
10591                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10592                                                    "Hexadecimal float: exponent underflow");
10593                                     break;
10594                                 }
10595 #endif
10596 #ifdef NV_MAX_EXP
10597                                 if (!negexp
10598                                     && hexfp_exp > NV_MAX_EXP - 1) {
10599                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10600                                                    "Hexadecimal float: exponent overflow");
10601                                     break;
10602                                 }
10603 #endif
10604                             }
10605                             h++;
10606                         }
10607                         if (negexp)
10608                             hexfp_exp = -hexfp_exp;
10609 #ifdef HEXFP_UQUAD
10610                         hexfp_exp -= hexfp_frac_bits;
10611 #endif
10612                         hexfp_mult = pow(2.0, hexfp_exp);
10613                         hexfp = TRUE;
10614                         goto decimal;
10615                     }
10616                 }
10617             }
10618
10619             if (overflowed) {
10620                 if (n > 4294967295.0)
10621                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10622                                    "%s number > %s non-portable",
10623                                    Base, max);
10624                 sv = newSVnv(n);
10625             }
10626             else {
10627 #if UVSIZE > 4
10628                 if (u > 0xffffffff)
10629                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10630                                    "%s number > %s non-portable",
10631                                    Base, max);
10632 #endif
10633                 sv = newSVuv(u);
10634             }
10635             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10636                 sv = new_constant(start, s - start, "integer",
10637                                   sv, NULL, NULL, 0);
10638             else if (PL_hints & HINT_NEW_BINARY)
10639                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10640         }
10641         break;
10642
10643     /*
10644       handle decimal numbers.
10645       we're also sent here when we read a 0 as the first digit
10646     */
10647     case '1': case '2': case '3': case '4': case '5':
10648     case '6': case '7': case '8': case '9': case '.':
10649       decimal:
10650         d = PL_tokenbuf;
10651         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10652         floatit = FALSE;
10653         if (hexfp) {
10654             floatit = TRUE;
10655             *d++ = '0';
10656             *d++ = 'x';
10657             s = start + 2;
10658         }
10659
10660         /* read next group of digits and _ and copy into d */
10661         while (isDIGIT(*s)
10662                || *s == '_'
10663                || UNLIKELY(hexfp && isXDIGIT(*s)))
10664         {
10665             /* skip underscores, checking for misplaced ones
10666                if -w is on
10667             */
10668             if (*s == '_') {
10669                 if (lastub && s == lastub + 1)
10670                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10671                                    "Misplaced _ in number");
10672                 lastub = s++;
10673             }
10674             else {
10675                 /* check for end of fixed-length buffer */
10676                 if (d >= e)
10677                     Perl_croak(aTHX_ "%s", number_too_long);
10678                 /* if we're ok, copy the character */
10679                 *d++ = *s++;
10680             }
10681         }
10682
10683         /* final misplaced underbar check */
10684         if (lastub && s == lastub + 1) {
10685             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10686         }
10687
10688         /* read a decimal portion if there is one.  avoid
10689            3..5 being interpreted as the number 3. followed
10690            by .5
10691         */
10692         if (*s == '.' && s[1] != '.') {
10693             floatit = TRUE;
10694             *d++ = *s++;
10695
10696             if (*s == '_') {
10697                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10698                                "Misplaced _ in number");
10699                 lastub = s;
10700             }
10701
10702             /* copy, ignoring underbars, until we run out of digits.
10703             */
10704             for (; isDIGIT(*s)
10705                    || *s == '_'
10706                    || UNLIKELY(hexfp && isXDIGIT(*s));
10707                  s++)
10708             {
10709                 /* fixed length buffer check */
10710                 if (d >= e)
10711                     Perl_croak(aTHX_ "%s", number_too_long);
10712                 if (*s == '_') {
10713                    if (lastub && s == lastub + 1)
10714                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10715                                       "Misplaced _ in number");
10716                    lastub = s;
10717                 }
10718                 else
10719                     *d++ = *s;
10720             }
10721             /* fractional part ending in underbar? */
10722             if (s[-1] == '_') {
10723                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10724                                "Misplaced _ in number");
10725             }
10726             if (*s == '.' && isDIGIT(s[1])) {
10727                 /* oops, it's really a v-string, but without the "v" */
10728                 s = start;
10729                 goto vstring;
10730             }
10731         }
10732
10733         /* read exponent part, if present */
10734         if ((isALPHA_FOLD_EQ(*s, 'e')
10735               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10736             && strchr("+-0123456789_", s[1]))
10737         {
10738             floatit = TRUE;
10739
10740             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10741                ditto for p (hexfloats) */
10742             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10743                 /* At least some Mach atof()s don't grok 'E' */
10744                 *d++ = 'e';
10745             }
10746             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10747                 *d++ = 'p';
10748             }
10749
10750             s++;
10751
10752
10753             /* stray preinitial _ */
10754             if (*s == '_') {
10755                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10756                                "Misplaced _ in number");
10757                 lastub = s++;
10758             }
10759
10760             /* allow positive or negative exponent */
10761             if (*s == '+' || *s == '-')
10762                 *d++ = *s++;
10763
10764             /* stray initial _ */
10765             if (*s == '_') {
10766                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10767                                "Misplaced _ in number");
10768                 lastub = s++;
10769             }
10770
10771             /* read digits of exponent */
10772             while (isDIGIT(*s) || *s == '_') {
10773                 if (isDIGIT(*s)) {
10774                     if (d >= e)
10775                         Perl_croak(aTHX_ "%s", number_too_long);
10776                     *d++ = *s++;
10777                 }
10778                 else {
10779                    if (((lastub && s == lastub + 1)
10780                         || (!isDIGIT(s[1]) && s[1] != '_')))
10781                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10782                                       "Misplaced _ in number");
10783                    lastub = s++;
10784                 }
10785             }
10786         }
10787
10788
10789         /*
10790            We try to do an integer conversion first if no characters
10791            indicating "float" have been found.
10792          */
10793
10794         if (!floatit) {
10795             UV uv;
10796             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10797
10798             if (flags == IS_NUMBER_IN_UV) {
10799               if (uv <= IV_MAX)
10800                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10801               else
10802                 sv = newSVuv(uv);
10803             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10804               if (uv <= (UV) IV_MIN)
10805                 sv = newSViv(-(IV)uv);
10806               else
10807                 floatit = TRUE;
10808             } else
10809               floatit = TRUE;
10810         }
10811         if (floatit) {
10812             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10813             /* terminate the string */
10814             *d = '\0';
10815             if (UNLIKELY(hexfp)) {
10816 #  ifdef NV_MANT_DIG
10817                 if (significant_bits > NV_MANT_DIG)
10818                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10819                                    "Hexadecimal float: mantissa overflow");
10820 #  endif
10821 #ifdef HEXFP_UQUAD
10822                 nv = hexfp_uquad * hexfp_mult;
10823 #else /* HEXFP_NV */
10824                 nv = hexfp_nv * hexfp_mult;
10825 #endif
10826             } else {
10827                 nv = Atof(PL_tokenbuf);
10828             }
10829             RESTORE_LC_NUMERIC_UNDERLYING();
10830             sv = newSVnv(nv);
10831         }
10832
10833         if ( floatit
10834              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10835             const char *const key = floatit ? "float" : "integer";
10836             const STRLEN keylen = floatit ? 5 : 7;
10837             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10838                                 key, keylen, sv, NULL, NULL, 0);
10839         }
10840         break;
10841
10842     /* if it starts with a v, it could be a v-string */
10843     case 'v':
10844     vstring:
10845                 sv = newSV(5); /* preallocate storage space */
10846                 ENTER_with_name("scan_vstring");
10847                 SAVEFREESV(sv);
10848                 s = scan_vstring(s, PL_bufend, sv);
10849                 SvREFCNT_inc_simple_void_NN(sv);
10850                 LEAVE_with_name("scan_vstring");
10851         break;
10852     }
10853
10854     /* make the op for the constant and return */
10855
10856     if (sv)
10857         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10858     else
10859         lvalp->opval = NULL;
10860
10861     return (char *)s;
10862 }
10863
10864 STATIC char *
10865 S_scan_formline(pTHX_ char *s)
10866 {
10867     char *eol;
10868     char *t;
10869     SV * const stuff = newSVpvs("");
10870     bool needargs = FALSE;
10871     bool eofmt = FALSE;
10872
10873     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10874
10875     while (!needargs) {
10876         if (*s == '.') {
10877             t = s+1;
10878 #ifdef PERL_STRICT_CR
10879             while (SPACE_OR_TAB(*t))
10880                 t++;
10881 #else
10882             while (SPACE_OR_TAB(*t) || *t == '\r')
10883                 t++;
10884 #endif
10885             if (*t == '\n' || t == PL_bufend) {
10886                 eofmt = TRUE;
10887                 break;
10888             }
10889         }
10890         eol = (char *) memchr(s,'\n',PL_bufend-s);
10891         if (!eol++)
10892                 eol = PL_bufend;
10893         if (*s != '#') {
10894             for (t = s; t < eol; t++) {
10895                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10896                     needargs = FALSE;
10897                     goto enough;        /* ~~ must be first line in formline */
10898                 }
10899                 if (*t == '@' || *t == '^')
10900                     needargs = TRUE;
10901             }
10902             if (eol > s) {
10903                 sv_catpvn(stuff, s, eol-s);
10904 #ifndef PERL_STRICT_CR
10905                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10906                     char *end = SvPVX(stuff) + SvCUR(stuff);
10907                     end[-2] = '\n';
10908                     end[-1] = '\0';
10909                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10910                 }
10911 #endif
10912             }
10913             else
10914               break;
10915         }
10916         s = (char*)eol;
10917         if ((PL_rsfp || PL_parser->filtered)
10918          && PL_parser->form_lex_state == LEX_NORMAL) {
10919             bool got_some;
10920             PL_bufptr = PL_bufend;
10921             COPLINE_INC_WITH_HERELINES;
10922             got_some = lex_next_chunk(0);
10923             CopLINE_dec(PL_curcop);
10924             s = PL_bufptr;
10925             if (!got_some)
10926                 break;
10927         }
10928         incline(s);
10929     }
10930   enough:
10931     if (!SvCUR(stuff) || needargs)
10932         PL_lex_state = PL_parser->form_lex_state;
10933     if (SvCUR(stuff)) {
10934         PL_expect = XSTATE;
10935         if (needargs) {
10936             const char *s2 = s;
10937             while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10938                 || *s2 == '\v')
10939                 s2++;
10940             if (*s2 == '{') {
10941                 PL_expect = XTERMBLOCK;
10942                 NEXTVAL_NEXTTOKE.ival = 0;
10943                 force_next(DO);
10944             }
10945             NEXTVAL_NEXTTOKE.ival = 0;
10946             force_next(FORMLBRACK);
10947         }
10948         if (!IN_BYTES) {
10949             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10950                 SvUTF8_on(stuff);
10951             else if (IN_ENCODING)
10952                 sv_recode_to_utf8(stuff, _get_encoding());
10953         }
10954         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10955         force_next(THING);
10956     }
10957     else {
10958         SvREFCNT_dec(stuff);
10959         if (eofmt)
10960             PL_lex_formbrack = 0;
10961     }
10962     return s;
10963 }
10964
10965 I32
10966 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10967 {
10968     const I32 oldsavestack_ix = PL_savestack_ix;
10969     CV* const outsidecv = PL_compcv;
10970
10971     SAVEI32(PL_subline);
10972     save_item(PL_subname);
10973     SAVESPTR(PL_compcv);
10974
10975     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10976     CvFLAGS(PL_compcv) |= flags;
10977
10978     PL_subline = CopLINE(PL_curcop);
10979     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10980     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10981     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10982     if (outsidecv && CvPADLIST(outsidecv))
10983         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10984
10985     return oldsavestack_ix;
10986 }
10987
10988 static int
10989 S_yywarn(pTHX_ const char *const s, U32 flags)
10990 {
10991     PERL_ARGS_ASSERT_YYWARN;
10992
10993     PL_in_eval |= EVAL_WARNONLY;
10994     yyerror_pv(s, flags);
10995     return 0;
10996 }
10997
10998 int
10999 Perl_yyerror(pTHX_ const char *const s)
11000 {
11001     PERL_ARGS_ASSERT_YYERROR;
11002     return yyerror_pvn(s, strlen(s), 0);
11003 }
11004
11005 int
11006 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11007 {
11008     PERL_ARGS_ASSERT_YYERROR_PV;
11009     return yyerror_pvn(s, strlen(s), flags);
11010 }
11011
11012 int
11013 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11014 {
11015     const char *context = NULL;
11016     int contlen = -1;
11017     SV *msg;
11018     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11019     int yychar  = PL_parser->yychar;
11020
11021     PERL_ARGS_ASSERT_YYERROR_PVN;
11022
11023     if (!yychar || (yychar == ';' && !PL_rsfp))
11024         sv_catpvs(where_sv, "at EOF");
11025     else if (   PL_oldoldbufptr
11026              && PL_bufptr > PL_oldoldbufptr
11027              && PL_bufptr - PL_oldoldbufptr < 200
11028              && PL_oldoldbufptr != PL_oldbufptr
11029              && PL_oldbufptr != PL_bufptr)
11030     {
11031         /*
11032                 Only for NetWare:
11033                 The code below is removed for NetWare because it abends/crashes on NetWare
11034                 when the script has error such as not having the closing quotes like:
11035                     if ($var eq "value)
11036                 Checking of white spaces is anyway done in NetWare code.
11037         */
11038 #ifndef NETWARE
11039         while (isSPACE(*PL_oldoldbufptr))
11040             PL_oldoldbufptr++;
11041 #endif
11042         context = PL_oldoldbufptr;
11043         contlen = PL_bufptr - PL_oldoldbufptr;
11044     }
11045     else if (  PL_oldbufptr
11046             && PL_bufptr > PL_oldbufptr
11047             && PL_bufptr - PL_oldbufptr < 200
11048             && PL_oldbufptr != PL_bufptr) {
11049         /*
11050                 Only for NetWare:
11051                 The code below is removed for NetWare because it abends/crashes on NetWare
11052                 when the script has error such as not having the closing quotes like:
11053                     if ($var eq "value)
11054                 Checking of white spaces is anyway done in NetWare code.
11055         */
11056 #ifndef NETWARE
11057         while (isSPACE(*PL_oldbufptr))
11058             PL_oldbufptr++;
11059 #endif
11060         context = PL_oldbufptr;
11061         contlen = PL_bufptr - PL_oldbufptr;
11062     }
11063     else if (yychar > 255)
11064         sv_catpvs(where_sv, "next token ???");
11065     else if (yychar == YYEMPTY) {
11066         if (PL_lex_state == LEX_NORMAL)
11067             sv_catpvs(where_sv, "at end of line");
11068         else if (PL_lex_inpat)
11069             sv_catpvs(where_sv, "within pattern");
11070         else
11071             sv_catpvs(where_sv, "within string");
11072     }
11073     else {
11074         sv_catpvs(where_sv, "next char ");
11075         if (yychar < 32)
11076             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11077         else if (isPRINT_LC(yychar)) {
11078             const char string = yychar;
11079             sv_catpvn(where_sv, &string, 1);
11080         }
11081         else
11082             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11083     }
11084     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11085     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11086         OutCopFILE(PL_curcop),
11087         (IV)(PL_parser->preambling == NOLINE
11088                ? CopLINE(PL_curcop)
11089                : PL_parser->preambling));
11090     if (context)
11091         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11092                              UTF8fARG(UTF, contlen, context));
11093     else
11094         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11095     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11096         Perl_sv_catpvf(aTHX_ msg,
11097         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11098                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11099         PL_multi_end = 0;
11100     }
11101     if (PL_in_eval & EVAL_WARNONLY) {
11102         PL_in_eval &= ~EVAL_WARNONLY;
11103         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11104     }
11105     else
11106         qerror(msg);
11107     if (PL_error_count >= 10) {
11108         SV * errsv;
11109         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11110             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11111                        SVfARG(errsv), OutCopFILE(PL_curcop));
11112         else
11113             Perl_croak(aTHX_ "%s has too many errors.\n",
11114             OutCopFILE(PL_curcop));
11115     }
11116     PL_in_my = 0;
11117     PL_in_my_stash = NULL;
11118     return 0;
11119 }
11120
11121 STATIC char*
11122 S_swallow_bom(pTHX_ U8 *s)
11123 {
11124     const STRLEN slen = SvCUR(PL_linestr);
11125
11126     PERL_ARGS_ASSERT_SWALLOW_BOM;
11127
11128     switch (s[0]) {
11129     case 0xFF:
11130         if (s[1] == 0xFE) {
11131             /* UTF-16 little-endian? (or UTF-32LE?) */
11132             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11133                 /* diag_listed_as: Unsupported script encoding %s */
11134                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11135 #ifndef PERL_NO_UTF16_FILTER
11136             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11137             s += 2;
11138             if (PL_bufend > (char*)s) {
11139                 s = add_utf16_textfilter(s, TRUE);
11140             }
11141 #else
11142             /* diag_listed_as: Unsupported script encoding %s */
11143             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11144 #endif
11145         }
11146         break;
11147     case 0xFE:
11148         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11149 #ifndef PERL_NO_UTF16_FILTER
11150             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11151             s += 2;
11152             if (PL_bufend > (char *)s) {
11153                 s = add_utf16_textfilter(s, FALSE);
11154             }
11155 #else
11156             /* diag_listed_as: Unsupported script encoding %s */
11157             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11158 #endif
11159         }
11160         break;
11161     case BOM_UTF8_FIRST_BYTE: {
11162         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11163         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11164             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11165             s += len + 1;                      /* UTF-8 */
11166         }
11167         break;
11168     }
11169     case 0:
11170         if (slen > 3) {
11171              if (s[1] == 0) {
11172                   if (s[2] == 0xFE && s[3] == 0xFF) {
11173                        /* UTF-32 big-endian */
11174                        /* diag_listed_as: Unsupported script encoding %s */
11175                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11176                   }
11177              }
11178              else if (s[2] == 0 && s[3] != 0) {
11179                   /* Leading bytes
11180                    * 00 xx 00 xx
11181                    * are a good indicator of UTF-16BE. */
11182 #ifndef PERL_NO_UTF16_FILTER
11183                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11184                   s = add_utf16_textfilter(s, FALSE);
11185 #else
11186                   /* diag_listed_as: Unsupported script encoding %s */
11187                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11188 #endif
11189              }
11190         }
11191         break;
11192
11193     default:
11194          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11195                   /* Leading bytes
11196                    * xx 00 xx 00
11197                    * are a good indicator of UTF-16LE. */
11198 #ifndef PERL_NO_UTF16_FILTER
11199               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11200               s = add_utf16_textfilter(s, TRUE);
11201 #else
11202               /* diag_listed_as: Unsupported script encoding %s */
11203               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11204 #endif
11205          }
11206     }
11207     return (char*)s;
11208 }
11209
11210
11211 #ifndef PERL_NO_UTF16_FILTER
11212 static I32
11213 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11214 {
11215     SV *const filter = FILTER_DATA(idx);
11216     /* We re-use this each time round, throwing the contents away before we
11217        return.  */
11218     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11219     SV *const utf8_buffer = filter;
11220     IV status = IoPAGE(filter);
11221     const bool reverse = cBOOL(IoLINES(filter));
11222     I32 retval;
11223
11224     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11225
11226     /* As we're automatically added, at the lowest level, and hence only called
11227        from this file, we can be sure that we're not called in block mode. Hence
11228        don't bother writing code to deal with block mode.  */
11229     if (maxlen) {
11230         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11231     }
11232     if (status < 0) {
11233         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11234     }
11235     DEBUG_P(PerlIO_printf(Perl_debug_log,
11236                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11237                           FPTR2DPTR(void *, S_utf16_textfilter),
11238                           reverse ? 'l' : 'b', idx, maxlen, status,
11239                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11240
11241     while (1) {
11242         STRLEN chars;
11243         STRLEN have;
11244         I32 newlen;
11245         U8 *end;
11246         /* First, look in our buffer of existing UTF-8 data:  */
11247         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11248
11249         if (nl) {
11250             ++nl;
11251         } else if (status == 0) {
11252             /* EOF */
11253             IoPAGE(filter) = 0;
11254             nl = SvEND(utf8_buffer);
11255         }
11256         if (nl) {
11257             STRLEN got = nl - SvPVX(utf8_buffer);
11258             /* Did we have anything to append?  */
11259             retval = got != 0;
11260             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11261             /* Everything else in this code works just fine if SVp_POK isn't
11262                set.  This, however, needs it, and we need it to work, else
11263                we loop infinitely because the buffer is never consumed.  */
11264             sv_chop(utf8_buffer, nl);
11265             break;
11266         }
11267
11268         /* OK, not a complete line there, so need to read some more UTF-16.
11269            Read an extra octect if the buffer currently has an odd number. */
11270         while (1) {
11271             if (status <= 0)
11272                 break;
11273             if (SvCUR(utf16_buffer) >= 2) {
11274                 /* Location of the high octet of the last complete code point.
11275                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11276                    *coupled* with all the benefits of partial reads and
11277                    endianness.  */
11278                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11279                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11280
11281                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11282                     break;
11283                 }
11284
11285                 /* We have the first half of a surrogate. Read more.  */
11286                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11287             }
11288
11289             status = FILTER_READ(idx + 1, utf16_buffer,
11290                                  160 + (SvCUR(utf16_buffer) & 1));
11291             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11292             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11293             if (status < 0) {
11294                 /* Error */
11295                 IoPAGE(filter) = status;
11296                 return status;
11297             }
11298         }
11299
11300         chars = SvCUR(utf16_buffer) >> 1;
11301         have = SvCUR(utf8_buffer);
11302         SvGROW(utf8_buffer, have + chars * 3 + 1);
11303
11304         if (reverse) {
11305             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11306                                          (U8*)SvPVX_const(utf8_buffer) + have,
11307                                          chars * 2, &newlen);
11308         } else {
11309             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11310                                 (U8*)SvPVX_const(utf8_buffer) + have,
11311                                 chars * 2, &newlen);
11312         }
11313         SvCUR_set(utf8_buffer, have + newlen);
11314         *end = '\0';
11315
11316         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11317            it's private to us, and utf16_to_utf8{,reversed} take a
11318            (pointer,length) pair, rather than a NUL-terminated string.  */
11319         if(SvCUR(utf16_buffer) & 1) {
11320             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11321             SvCUR_set(utf16_buffer, 1);
11322         } else {
11323             SvCUR_set(utf16_buffer, 0);
11324         }
11325     }
11326     DEBUG_P(PerlIO_printf(Perl_debug_log,
11327                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11328                           status,
11329                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11330     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11331     return retval;
11332 }
11333
11334 static U8 *
11335 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11336 {
11337     SV *filter = filter_add(S_utf16_textfilter, NULL);
11338
11339     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11340
11341     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11342     sv_setpvs(filter, "");
11343     IoLINES(filter) = reversed;
11344     IoPAGE(filter) = 1; /* Not EOF */
11345
11346     /* Sadly, we have to return a valid pointer, come what may, so we have to
11347        ignore any error return from this.  */
11348     SvCUR_set(PL_linestr, 0);
11349     if (FILTER_READ(0, PL_linestr, 0)) {
11350         SvUTF8_on(PL_linestr);
11351     } else {
11352         SvUTF8_on(PL_linestr);
11353     }
11354     PL_bufend = SvEND(PL_linestr);
11355     return (U8*)SvPVX(PL_linestr);
11356 }
11357 #endif
11358
11359 /*
11360 Returns a pointer to the next character after the parsed
11361 vstring, as well as updating the passed in sv.
11362
11363 Function must be called like
11364
11365         sv = sv_2mortal(newSV(5));
11366         s = scan_vstring(s,e,sv);
11367
11368 where s and e are the start and end of the string.
11369 The sv should already be large enough to store the vstring
11370 passed in, for performance reasons.
11371
11372 This function may croak if fatal warnings are enabled in the
11373 calling scope, hence the sv_2mortal in the example (to prevent
11374 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11375 sv_2mortal.
11376
11377 */
11378
11379 char *
11380 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11381 {
11382     const char *pos = s;
11383     const char *start = s;
11384
11385     PERL_ARGS_ASSERT_SCAN_VSTRING;
11386
11387     if (*pos == 'v') pos++;  /* get past 'v' */
11388     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11389         pos++;
11390     if ( *pos != '.') {
11391         /* this may not be a v-string if followed by => */
11392         const char *next = pos;
11393         while (next < e && isSPACE(*next))
11394             ++next;
11395         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11396             /* return string not v-string */
11397             sv_setpvn(sv,(char *)s,pos-s);
11398             return (char *)pos;
11399         }
11400     }
11401
11402     if (!isALPHA(*pos)) {
11403         U8 tmpbuf[UTF8_MAXBYTES+1];
11404
11405         if (*s == 'v')
11406             s++;  /* get past 'v' */
11407
11408         sv_setpvs(sv, "");
11409
11410         for (;;) {
11411             /* this is atoi() that tolerates underscores */
11412             U8 *tmpend;
11413             UV rev = 0;
11414             const char *end = pos;
11415             UV mult = 1;
11416             while (--end >= s) {
11417                 if (*end != '_') {
11418                     const UV orev = rev;
11419                     rev += (*end - '0') * mult;
11420                     mult *= 10;
11421                     if (orev > rev)
11422                         /* diag_listed_as: Integer overflow in %s number */
11423                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11424                                          "Integer overflow in decimal number");
11425                 }
11426             }
11427
11428             /* Append native character for the rev point */
11429             tmpend = uvchr_to_utf8(tmpbuf, rev);
11430             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11431             if (!UVCHR_IS_INVARIANT(rev))
11432                  SvUTF8_on(sv);
11433             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11434                  s = ++pos;
11435             else {
11436                  s = pos;
11437                  break;
11438             }
11439             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11440                  pos++;
11441         }
11442         SvPOK_on(sv);
11443         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11444         SvRMAGICAL_on(sv);
11445     }
11446     return (char *)s;
11447 }
11448
11449 int
11450 Perl_keyword_plugin_standard(pTHX_
11451         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11452 {
11453     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11454     PERL_UNUSED_CONTEXT;
11455     PERL_UNUSED_ARG(keyword_ptr);
11456     PERL_UNUSED_ARG(keyword_len);
11457     PERL_UNUSED_ARG(op_ptr);
11458     return KEYWORD_PLUGIN_DECLINE;
11459 }
11460
11461 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11462 static void
11463 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11464 {
11465     SAVEI32(PL_lex_brackets);
11466     if (PL_lex_brackets > 100)
11467         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11468     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11469     SAVEI32(PL_lex_allbrackets);
11470     PL_lex_allbrackets = 0;
11471     SAVEI8(PL_lex_fakeeof);
11472     PL_lex_fakeeof = (U8)fakeeof;
11473     if(yyparse(gramtype) && !PL_parser->error_count)
11474         qerror(Perl_mess(aTHX_ "Parse error"));
11475 }
11476
11477 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11478 static OP *
11479 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11480 {
11481     OP *o;
11482     ENTER;
11483     SAVEVPTR(PL_eval_root);
11484     PL_eval_root = NULL;
11485     parse_recdescent(gramtype, fakeeof);
11486     o = PL_eval_root;
11487     LEAVE;
11488     return o;
11489 }
11490
11491 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11492 static OP *
11493 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11494 {
11495     OP *exprop;
11496     if (flags & ~PARSE_OPTIONAL)
11497         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11498     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11499     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11500         if (!PL_parser->error_count)
11501             qerror(Perl_mess(aTHX_ "Parse error"));
11502         exprop = newOP(OP_NULL, 0);
11503     }
11504     return exprop;
11505 }
11506
11507 /*
11508 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11509
11510 Parse a Perl arithmetic expression.  This may contain operators of precedence
11511 down to the bit shift operators.  The expression must be followed (and thus
11512 terminated) either by a comparison or lower-precedence operator or by
11513 something that would normally terminate an expression such as semicolon.
11514 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11515 otherwise it is mandatory.  It is up to the caller to ensure that the
11516 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11517 the source of the code to be parsed and the lexical context for the
11518 expression.
11519
11520 The op tree representing the expression is returned.  If an optional
11521 expression is absent, a null pointer is returned, otherwise the pointer
11522 will be non-null.
11523
11524 If an error occurs in parsing or compilation, in most cases a valid op
11525 tree is returned anyway.  The error is reflected in the parser state,
11526 normally resulting in a single exception at the top level of parsing
11527 which covers all the compilation errors that occurred.  Some compilation
11528 errors, however, will throw an exception immediately.
11529
11530 =cut
11531 */
11532
11533 OP *
11534 Perl_parse_arithexpr(pTHX_ U32 flags)
11535 {
11536     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11537 }
11538
11539 /*
11540 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11541
11542 Parse a Perl term expression.  This may contain operators of precedence
11543 down to the assignment operators.  The expression must be followed (and thus
11544 terminated) either by a comma or lower-precedence operator or by
11545 something that would normally terminate an expression such as semicolon.
11546 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11547 otherwise it is mandatory.  It is up to the caller to ensure that the
11548 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11549 the source of the code to be parsed and the lexical context for the
11550 expression.
11551
11552 The op tree representing the expression is returned.  If an optional
11553 expression is absent, a null pointer is returned, otherwise the pointer
11554 will be non-null.
11555
11556 If an error occurs in parsing or compilation, in most cases a valid op
11557 tree is returned anyway.  The error is reflected in the parser state,
11558 normally resulting in a single exception at the top level of parsing
11559 which covers all the compilation errors that occurred.  Some compilation
11560 errors, however, will throw an exception immediately.
11561
11562 =cut
11563 */
11564
11565 OP *
11566 Perl_parse_termexpr(pTHX_ U32 flags)
11567 {
11568     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11569 }
11570
11571 /*
11572 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11573
11574 Parse a Perl list expression.  This may contain operators of precedence
11575 down to the comma operator.  The expression must be followed (and thus
11576 terminated) either by a low-precedence logic operator such as C<or> or by
11577 something that would normally terminate an expression such as semicolon.
11578 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11579 otherwise it is mandatory.  It is up to the caller to ensure that the
11580 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11581 the source of the code to be parsed and the lexical context for the
11582 expression.
11583
11584 The op tree representing the expression is returned.  If an optional
11585 expression is absent, a null pointer is returned, otherwise the pointer
11586 will be non-null.
11587
11588 If an error occurs in parsing or compilation, in most cases a valid op
11589 tree is returned anyway.  The error is reflected in the parser state,
11590 normally resulting in a single exception at the top level of parsing
11591 which covers all the compilation errors that occurred.  Some compilation
11592 errors, however, will throw an exception immediately.
11593
11594 =cut
11595 */
11596
11597 OP *
11598 Perl_parse_listexpr(pTHX_ U32 flags)
11599 {
11600     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11601 }
11602
11603 /*
11604 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11605
11606 Parse a single complete Perl expression.  This allows the full
11607 expression grammar, including the lowest-precedence operators such
11608 as C<or>.  The expression must be followed (and thus terminated) by a
11609 token that an expression would normally be terminated by: end-of-file,
11610 closing bracketing punctuation, semicolon, or one of the keywords that
11611 signals a postfix expression-statement modifier.  If C<flags> has the
11612 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11613 mandatory.  It is up to the caller to ensure that the dynamic parser
11614 state (L</PL_parser> et al) is correctly set to reflect the source of
11615 the code to be parsed and the lexical context for the expression.
11616
11617 The op tree representing the expression is returned.  If an optional
11618 expression is absent, a null pointer is returned, otherwise the pointer
11619 will be non-null.
11620
11621 If an error occurs in parsing or compilation, in most cases a valid op
11622 tree is returned anyway.  The error is reflected in the parser state,
11623 normally resulting in a single exception at the top level of parsing
11624 which covers all the compilation errors that occurred.  Some compilation
11625 errors, however, will throw an exception immediately.
11626
11627 =cut
11628 */
11629
11630 OP *
11631 Perl_parse_fullexpr(pTHX_ U32 flags)
11632 {
11633     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11634 }
11635
11636 /*
11637 =for apidoc Amx|OP *|parse_block|U32 flags
11638
11639 Parse a single complete Perl code block.  This consists of an opening
11640 brace, a sequence of statements, and a closing brace.  The block
11641 constitutes a lexical scope, so C<my> variables and various compile-time
11642 effects can be contained within it.  It is up to the caller to ensure
11643 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11644 reflect the source of the code to be parsed and the lexical context for
11645 the statement.
11646
11647 The op tree representing the code block is returned.  This is always a
11648 real op, never a null pointer.  It will normally be a C<lineseq> list,
11649 including C<nextstate> or equivalent ops.  No ops to construct any kind
11650 of runtime scope are included by virtue of it being a block.
11651
11652 If an error occurs in parsing or compilation, in most cases a valid op
11653 tree (most likely null) is returned anyway.  The error is reflected in
11654 the parser state, normally resulting in a single exception at the top
11655 level of parsing which covers all the compilation errors that occurred.
11656 Some compilation errors, however, will throw an exception immediately.
11657
11658 The C<flags> parameter is reserved for future use, and must always
11659 be zero.
11660
11661 =cut
11662 */
11663
11664 OP *
11665 Perl_parse_block(pTHX_ U32 flags)
11666 {
11667     if (flags)
11668         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11669     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11670 }
11671
11672 /*
11673 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11674
11675 Parse a single unadorned Perl statement.  This may be a normal imperative
11676 statement or a declaration that has compile-time effect.  It does not
11677 include any label or other affixture.  It is up to the caller to ensure
11678 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11679 reflect the source of the code to be parsed and the lexical context for
11680 the statement.
11681
11682 The op tree representing the statement is returned.  This may be a
11683 null pointer if the statement is null, for example if it was actually
11684 a subroutine definition (which has compile-time side effects).  If not
11685 null, it will be ops directly implementing the statement, suitable to
11686 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11687 equivalent op (except for those embedded in a scope contained entirely
11688 within the statement).
11689
11690 If an error occurs in parsing or compilation, in most cases a valid op
11691 tree (most likely null) is returned anyway.  The error is reflected in
11692 the parser state, normally resulting in a single exception at the top
11693 level of parsing which covers all the compilation errors that occurred.
11694 Some compilation errors, however, will throw an exception immediately.
11695
11696 The C<flags> parameter is reserved for future use, and must always
11697 be zero.
11698
11699 =cut
11700 */
11701
11702 OP *
11703 Perl_parse_barestmt(pTHX_ U32 flags)
11704 {
11705     if (flags)
11706         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11707     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11708 }
11709
11710 /*
11711 =for apidoc Amx|SV *|parse_label|U32 flags
11712
11713 Parse a single label, possibly optional, of the type that may prefix a
11714 Perl statement.  It is up to the caller to ensure that the dynamic parser
11715 state (L</PL_parser> et al) is correctly set to reflect the source of
11716 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11717 label is optional, otherwise it is mandatory.
11718
11719 The name of the label is returned in the form of a fresh scalar.  If an
11720 optional label is absent, a null pointer is returned.
11721
11722 If an error occurs in parsing, which can only occur if the label is
11723 mandatory, a valid label is returned anyway.  The error is reflected in
11724 the parser state, normally resulting in a single exception at the top
11725 level of parsing which covers all the compilation errors that occurred.
11726
11727 =cut
11728 */
11729
11730 SV *
11731 Perl_parse_label(pTHX_ U32 flags)
11732 {
11733     if (flags & ~PARSE_OPTIONAL)
11734         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11735     if (PL_nexttoke) {
11736         PL_parser->yychar = yylex();
11737         if (PL_parser->yychar == LABEL) {
11738             char * const lpv = pl_yylval.pval;
11739             STRLEN llen = strlen(lpv);
11740             PL_parser->yychar = YYEMPTY;
11741             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11742         } else {
11743             yyunlex();
11744             goto no_label;
11745         }
11746     } else {
11747         char *s, *t;
11748         STRLEN wlen, bufptr_pos;
11749         lex_read_space(0);
11750         t = s = PL_bufptr;
11751         if (!isIDFIRST_lazy_if(s, UTF))
11752             goto no_label;
11753         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11754         if (word_takes_any_delimiter(s, wlen))
11755             goto no_label;
11756         bufptr_pos = s - SvPVX(PL_linestr);
11757         PL_bufptr = t;
11758         lex_read_space(LEX_KEEP_PREVIOUS);
11759         t = PL_bufptr;
11760         s = SvPVX(PL_linestr) + bufptr_pos;
11761         if (t[0] == ':' && t[1] != ':') {
11762             PL_oldoldbufptr = PL_oldbufptr;
11763             PL_oldbufptr = s;
11764             PL_bufptr = t+1;
11765             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11766         } else {
11767             PL_bufptr = s;
11768             no_label:
11769             if (flags & PARSE_OPTIONAL) {
11770                 return NULL;
11771             } else {
11772                 qerror(Perl_mess(aTHX_ "Parse error"));
11773                 return newSVpvs("x");
11774             }
11775         }
11776     }
11777 }
11778
11779 /*
11780 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11781
11782 Parse a single complete Perl statement.  This may be a normal imperative
11783 statement or a declaration that has compile-time effect, and may include
11784 optional labels.  It is up to the caller to ensure that the dynamic
11785 parser state (L</PL_parser> et al) is correctly set to reflect the source
11786 of the code to be parsed and the lexical context for the statement.
11787
11788 The op tree representing the statement is returned.  This may be a
11789 null pointer if the statement is null, for example if it was actually
11790 a subroutine definition (which has compile-time side effects).  If not
11791 null, it will be the result of a L</newSTATEOP> call, normally including
11792 a C<nextstate> or equivalent op.
11793
11794 If an error occurs in parsing or compilation, in most cases a valid op
11795 tree (most likely null) is returned anyway.  The error is reflected in
11796 the parser state, normally resulting in a single exception at the top
11797 level of parsing which covers all the compilation errors that occurred.
11798 Some compilation errors, however, will throw an exception immediately.
11799
11800 The C<flags> parameter is reserved for future use, and must always
11801 be zero.
11802
11803 =cut
11804 */
11805
11806 OP *
11807 Perl_parse_fullstmt(pTHX_ U32 flags)
11808 {
11809     if (flags)
11810         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11811     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11812 }
11813
11814 /*
11815 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11816
11817 Parse a sequence of zero or more Perl statements.  These may be normal
11818 imperative statements, including optional labels, or declarations
11819 that have compile-time effect, or any mixture thereof.  The statement
11820 sequence ends when a closing brace or end-of-file is encountered in a
11821 place where a new statement could have validly started.  It is up to
11822 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11823 is correctly set to reflect the source of the code to be parsed and the
11824 lexical context for the statements.
11825
11826 The op tree representing the statement sequence is returned.  This may
11827 be a null pointer if the statements were all null, for example if there
11828 were no statements or if there were only subroutine definitions (which
11829 have compile-time side effects).  If not null, it will be a C<lineseq>
11830 list, normally including C<nextstate> or equivalent ops.
11831
11832 If an error occurs in parsing or compilation, in most cases a valid op
11833 tree is returned anyway.  The error is reflected in the parser state,
11834 normally resulting in a single exception at the top level of parsing
11835 which covers all the compilation errors that occurred.  Some compilation
11836 errors, however, will throw an exception immediately.
11837
11838 The C<flags> parameter is reserved for future use, and must always
11839 be zero.
11840
11841 =cut
11842 */
11843
11844 OP *
11845 Perl_parse_stmtseq(pTHX_ U32 flags)
11846 {
11847     OP *stmtseqop;
11848     I32 c;
11849     if (flags)
11850         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11851     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11852     c = lex_peek_unichar(0);
11853     if (c != -1 && c != /*{*/'}')
11854         qerror(Perl_mess(aTHX_ "Parse error"));
11855     return stmtseqop;
11856 }
11857
11858 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11859 static OP *
11860 S_parse_opt_lexvar(pTHX)
11861 {
11862     I32 sigil, c;
11863     char *s, *d;
11864     OP *var;
11865     lex_token_boundary();
11866     sigil = lex_read_unichar(0);
11867     if (lex_peek_unichar(0) == '#') {
11868         qerror(Perl_mess(aTHX_ "Parse error"));
11869         return NULL;
11870     }
11871     lex_read_space(0);
11872     c = lex_peek_unichar(0);
11873     if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11874         return NULL;
11875     s = PL_bufptr;
11876     d = PL_tokenbuf + 1;
11877     PL_tokenbuf[0] = (char)sigil;
11878     parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11879     PL_bufptr = s;
11880     if (d == PL_tokenbuf+1)
11881         return NULL;
11882     var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11883                 OPf_MOD | (OPpLVAL_INTRO<<8));
11884     var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11885     return var;
11886 }
11887
11888 OP *
11889 Perl_parse_subsignature(pTHX)
11890 {
11891     I32 c;
11892     int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11893     OP *initops = NULL;
11894     lex_read_space(0);
11895     c = lex_peek_unichar(0);
11896     while (c != /*(*/')') {
11897         switch (c) {
11898             case '$': {
11899                 OP *var, *expr;
11900                 if (prev_type == 2)
11901                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11902                 var = parse_opt_lexvar();
11903                 expr = var ?
11904                     newBINOP(OP_AELEM, 0,
11905                         ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11906                             OP_RV2AV),
11907                         newSVOP(OP_CONST, 0, newSViv(pos))) :
11908                     NULL;
11909                 lex_read_space(0);
11910                 c = lex_peek_unichar(0);
11911                 if (c == '=') {
11912                     lex_token_boundary();
11913                     lex_read_unichar(0);
11914                     lex_read_space(0);
11915                     c = lex_peek_unichar(0);
11916                     if (c == ',' || c == /*(*/')') {
11917                         if (var)
11918                             qerror(Perl_mess(aTHX_ "Optional parameter "
11919                                     "lacks default expression"));
11920                     } else {
11921                         OP *defexpr = parse_termexpr(0);
11922                         if (defexpr->op_type == OP_UNDEF
11923                             && !(defexpr->op_flags & OPf_KIDS))
11924                         {
11925                             op_free(defexpr);
11926                         } else {
11927                             OP *ifop =
11928                                 newBINOP(OP_GE, 0,
11929                                     scalar(newUNOP(OP_RV2AV, 0,
11930                                             newGVOP(OP_GV, 0, PL_defgv))),
11931                                     newSVOP(OP_CONST, 0, newSViv(pos+1)));
11932                             expr = var ?
11933                                 newCONDOP(0, ifop, expr, defexpr) :
11934                                 newLOGOP(OP_OR, 0, ifop, defexpr);
11935                         }
11936                     }
11937                     prev_type = 1;
11938                 } else {
11939                     if (prev_type == 1)
11940                         qerror(Perl_mess(aTHX_ "Mandatory parameter "
11941                                 "follows optional parameter"));
11942                     prev_type = 0;
11943                     min_arity = pos + 1;
11944                 }
11945                 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11946                 if (expr)
11947                     initops = op_append_list(OP_LINESEQ, initops,
11948                                 newSTATEOP(0, NULL, expr));
11949                 max_arity = ++pos;
11950             } break;
11951             case '@':
11952             case '%': {
11953                 OP *var;
11954                 if (prev_type == 2)
11955                     qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11956                 var = parse_opt_lexvar();
11957                 if (c == '%') {
11958                     OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11959                             newBINOP(OP_BIT_AND, 0,
11960                                 scalar(newUNOP(OP_RV2AV, 0,
11961                                     newGVOP(OP_GV, 0, PL_defgv))),
11962                                 newSVOP(OP_CONST, 0, newSViv(1))),
11963                             op_convert_list(OP_DIE, 0,
11964                                 op_convert_list(OP_SPRINTF, 0,
11965                                     op_append_list(OP_LIST,
11966                                         newSVOP(OP_CONST, 0,
11967                                             newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11968                                         newSLICEOP(0,
11969                                             op_append_list(OP_LIST,
11970                                                 newSVOP(OP_CONST, 0, newSViv(1)),
11971                                                 newSVOP(OP_CONST, 0, newSViv(2))),
11972                                             newOP(OP_CALLER, 0))))));
11973                     if (pos != min_arity)
11974                         chkop = newLOGOP(OP_AND, 0,
11975                                     newBINOP(OP_GT, 0,
11976                                         scalar(newUNOP(OP_RV2AV, 0,
11977                                             newGVOP(OP_GV, 0, PL_defgv))),
11978                                         newSVOP(OP_CONST, 0, newSViv(pos))),
11979                                     chkop);
11980                     initops = op_append_list(OP_LINESEQ,
11981                                 newSTATEOP(0, NULL, chkop),
11982                                 initops);
11983                 }
11984                 if (var) {
11985                     OP *slice = pos ?
11986                         op_prepend_elem(OP_ASLICE,
11987                             newOP(OP_PUSHMARK, 0),
11988                             newLISTOP(OP_ASLICE, 0,
11989                                 list(newRANGE(0,
11990                                     newSVOP(OP_CONST, 0, newSViv(pos)),
11991                                     newUNOP(OP_AV2ARYLEN, 0,
11992                                         ref(newUNOP(OP_RV2AV, 0,
11993                                                 newGVOP(OP_GV, 0, PL_defgv)),
11994                                             OP_AV2ARYLEN)))),
11995                                 ref(newUNOP(OP_RV2AV, 0,
11996                                         newGVOP(OP_GV, 0, PL_defgv)),
11997                                     OP_ASLICE))) :
11998                         newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11999                     initops = op_append_list(OP_LINESEQ, initops,
12000                         newSTATEOP(0, NULL,
12001                             newASSIGNOP(OPf_STACKED, var, 0, slice)));
12002                 }
12003                 prev_type = 2;
12004                 max_arity = -1;
12005             } break;
12006             default:
12007                 parse_error:
12008                 qerror(Perl_mess(aTHX_ "Parse error"));
12009                 return NULL;
12010         }
12011         lex_read_space(0);
12012         c = lex_peek_unichar(0);
12013         switch (c) {
12014             case /*(*/')': break;
12015             case ',':
12016                 do {
12017                     lex_token_boundary();
12018                     lex_read_unichar(0);
12019                     lex_read_space(0);
12020                     c = lex_peek_unichar(0);
12021                 } while (c == ',');
12022                 break;
12023             default:
12024                 goto parse_error;
12025         }
12026     }
12027     if (min_arity != 0) {
12028         initops = op_append_list(OP_LINESEQ,
12029             newSTATEOP(0, NULL,
12030                 newLOGOP(OP_OR, 0,
12031                     newBINOP(OP_GE, 0,
12032                         scalar(newUNOP(OP_RV2AV, 0,
12033                             newGVOP(OP_GV, 0, PL_defgv))),
12034                         newSVOP(OP_CONST, 0, newSViv(min_arity))),
12035                     op_convert_list(OP_DIE, 0,
12036                         op_convert_list(OP_SPRINTF, 0,
12037                             op_append_list(OP_LIST,
12038                                 newSVOP(OP_CONST, 0,
12039                                     newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
12040                                 newSLICEOP(0,
12041                                     op_append_list(OP_LIST,
12042                                         newSVOP(OP_CONST, 0, newSViv(1)),
12043                                         newSVOP(OP_CONST, 0, newSViv(2))),
12044                                     newOP(OP_CALLER, 0))))))),
12045             initops);
12046     }
12047     if (max_arity != -1) {
12048         initops = op_append_list(OP_LINESEQ,
12049             newSTATEOP(0, NULL,
12050                 newLOGOP(OP_OR, 0,
12051                     newBINOP(OP_LE, 0,
12052                         scalar(newUNOP(OP_RV2AV, 0,
12053                             newGVOP(OP_GV, 0, PL_defgv))),
12054                         newSVOP(OP_CONST, 0, newSViv(max_arity))),
12055                     op_convert_list(OP_DIE, 0,
12056                         op_convert_list(OP_SPRINTF, 0,
12057                             op_append_list(OP_LIST,
12058                                 newSVOP(OP_CONST, 0,
12059                                     newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
12060                                 newSLICEOP(0,
12061                                     op_append_list(OP_LIST,
12062                                         newSVOP(OP_CONST, 0, newSViv(1)),
12063                                         newSVOP(OP_CONST, 0, newSViv(2))),
12064                                     newOP(OP_CALLER, 0))))))),
12065             initops);
12066     }
12067     return initops;
12068 }
12069
12070 /*
12071  * ex: set ts=8 sts=4 sw=4 et:
12072  */